def create_admin_bdys_for_analysis(): # Step 3 of 3 : create admin bdy tables optimised for spatial analysis start_time = datetime.now() if settings.st_subdivide_supported: template_sql = geoscape.open_sql_file( "02-03-create-admin-bdy-analysis-tables_template.sql") sql_list = list() for table in settings.admin_bdy_list: sql = template_sql.format(table[0], table[1]) if table[ 0] == "locality_bdys": # special case, need to change schema name # sql = sql.replace(settings.raw_admin_bdys_schema, settings.admin_bdys_schema) sql = sql.replace("name", "locality_name") # add postcodes sql = sql.replace( "locality_name text NOT NULL,", "old_locality_pid text NULL, locality_name text NOT NULL, postcode text NULL," ) sql = sql.replace( "locality_name,", "old_locality_pid, locality_name, postcode,") sql_list.append(sql) geoscape.multiprocess_list("sql", sql_list, logger) logger.info( "\t- Step 3 of 3 : admin boundaries for analysis created : {0}". format(datetime.now() - start_time)) else: logger.warning( "\t- Step 3 of 3 : admin boundaries for analysis NOT created - " "requires PostGIS 2.2+ with GEOS 3.5.0+")
def populate_raw_gnaf(pg_cur): # Step 4 of 7 : load raw gnaf authority & state tables start_time = datetime.now() # authority code file list sql_list = get_raw_gnaf_files("authority_code") # add state file lists for state in settings.states_to_load: logger.info(f"\t\t- Loading state {state}") sql_list.extend(get_raw_gnaf_files(state)) # are there any files to load? if len(sql_list) == 0: logger.fatal( "No raw GNAF PSV files found\nACTION: Check your 'gnaf_network_directory' path" ) logger.fatal("\t- Step 4 of 7 : table populate FAILED!") else: # load all PSV files using multiprocessing geoscape.multiprocess_list("sql", sql_list, logger) # fix missing geocodes (added due to missing data in 202202 release) sql = geoscape.open_sql_file("01-04-raw-gnaf-fix-missing-geocodes.sql") pg_cur.execute(sql) logger.info( f"\t- Step 4 of 7 : tables populated : {datetime.now() - start_time}" ) logger.info("\t\t- fixed missing geocodes")
def get_locality_state_border_gaps(pg_cur, settings): start_time = datetime.now() sql = geoscape.open_sql_file("04-create-holes-along-borders.sql", settings) sql_list = geoscape.split_sql_into_list( pg_cur, sql, settings['admin_bdys_schema'], "temp_state_border_buffers_subdivided", "ste", "new_gid", settings, logger) geoscape.multiprocess_list("sql", sql_list, settings, logger) logger.info("\t- Step 4 of 8 : locality holes created : {0}".format( datetime.now() - start_time))
def get_split_localities(pg_cur, settings): start_time = datetime.now() sql = geoscape.open_sql_file("02-split-localities-by-state-borders.sql", settings) sql_list = geoscape.split_sql_into_list(pg_cur, sql, settings['admin_bdys_schema'], "temp_localities", "loc", "gid", settings, logger) geoscape.multiprocess_list("sql", sql_list, settings, logger) logger.info("\t- Step 2 of 8 : localities split by state : {0}".format( datetime.now() - start_time))
def create_states_and_prep_localities(settings): start_time = datetime.now() sql_list = [ geoscape.open_sql_file("01a-create-states-from-sa4s.sql", settings).format(settings['srid']), geoscape.open_sql_file("01b-prep-locality-boundaries.sql", settings).format(settings['srid']) ] geoscape.multiprocess_list("sql", sql_list, settings, logger) logger.info( "\t- Step 1 of 8 : state table created & localities prepped : {0}". format(datetime.now() - start_time))
def prep_admin_bdys(pg_cur): # Step 3 of 4 : create admin bdy tables read to be used start_time = datetime.now() # create tables using multiprocessing - using flag in file to split file up into sets of statements sql_list = geoscape.open_sql_file( "02-02a-prep-admin-bdys-tables.sql").split("-- # --") sql_list = sql_list + geoscape.open_sql_file( "02-02b-prep-census-2011-bdys-tables.sql").split("-- # --") sql_list = sql_list + geoscape.open_sql_file( "02-02c-prep-census-2016-bdys-tables.sql").split("-- # --") sql_list = sql_list + geoscape.open_sql_file( "02-02d-prep-census-2021-bdys-tables.sql").split("-- # --") # # Account for bdys that are not in states to load - not yet working # for sql in sql_list: # if settings.states_to_load == ["OT"] and ".commonwealth_electorates " in sql: # sql_list.remove(sql) # # if settings.states_to_load == ["ACT"] and ".local_government_areas " in sql: # sql_list.remove(sql) # # logger.info(settings.states_to_load # # if not ("NT" in settings.states_to_load or "SA" in settings.states_to_load # or "VIC" in settings.states_to_load or "WA" in settings.states_to_load) \ # and ".local_government_wards " in sql: # sql_list.remove(sql) # # if settings.states_to_load == ["OT"] and ".state_lower_house_electorates " in sql: # sql_list.remove(sql) # # if not ("TAS" in settings.states_to_load or "VIC" in settings.states_to_load # or "WA" in settings.states_to_load) and ".state_upper_house_electorates " in sql: # sql_list.remove(sql) geoscape.multiprocess_list("sql", sql_list, logger) # Special case - remove custom outback bdy if South Australia not requested if "SA" not in settings.states_to_load: pg_cur.execute( geoscape.prep_sql( "DELETE FROM admin_bdys.locality_bdys WHERE locality_pid = 'SA999999'" )) pg_cur.execute( geoscape.prep_sql("VACUUM ANALYZE admin_bdys.locality_bdys")) logger.info("\t- Step 2 of 3 : admin boundaries prepped : {0}".format( datetime.now() - start_time))
def index_raw_gnaf(pg_cur): # Step 5 of 7 : create indexes start_time = datetime.now() raw_sql_list = geoscape.open_sql_file( "01-05-raw-gnaf-create-indexes.sql").split("\n") sql_list = [] for sql in raw_sql_list: if sql[0:2] != "--" and sql[0:2] != "": sql_list.append(sql) geoscape.multiprocess_list("sql", sql_list, logger) # # create distinct new & old locality pid lookup table # pg_cur.execute(geoscape.open_sql_file("01-05b-create-distinct-locality-pid-linkage-table.sql")) logger.info( f"\t- Step 5 of 7 : indexes created : {datetime.now() - start_time}")
def analyse_raw_gnaf_tables(pg_cur): start_time = datetime.now() # get list of tables that haven't been analysed (i.e. that have no real row count) sql = f"""SELECT nspname|| '.' || relname AS table_name FROM pg_class C LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace) WHERE nspname = '{settings.raw_gnaf_schema}' AND relkind='r' AND reltuples = 0""" pg_cur.execute(sql) sql_list = [] for pg_row in pg_cur: sql_list.append(f"ANALYZE {pg_row[0]}") # run queries in separate processes geoscape.multiprocess_list("sql", sql_list, logger) logger.info( f"\t- Step 7 of 7 : tables analysed : {datetime.now() - start_time}")
def create_primary_foreign_keys(): start_time = datetime.now() key_sql = geoscape.open_sql_file( "01-06-raw-gnaf-create-primary-foreign-keys.sql") key_sql_list = key_sql.split("--") sql_list = [] for sql in key_sql_list: sql = sql.strip() if sql[0:6] == "ALTER ": # add schema to tables names, in case raw gnaf schema not the default sql = sql.replace(f"ALTER TABLE ONLY ", "ALTER TABLE ONLY {settings.raw_gnaf_schema}.") sql_list.append(sql) # run queries in separate processes geoscape.multiprocess_list("sql", sql_list, logger) logger.info( f"\t- Step 6 of 7 : primary & foreign keys created : {datetime.now() - start_time}" )
def populate_raw_gnaf(): # Step 4 of 7 : load raw gnaf authority & state tables start_time = datetime.now() # authority code file list sql_list = get_raw_gnaf_files("authority_code") # add state file lists for state in settings.states_to_load: logger.info("\t\t- Loading state {}".format(state)) sql_list.extend(get_raw_gnaf_files(state)) # are there any files to load? if len(sql_list) == 0: logger.fatal( "No raw GNAF PSV files found\nACTION: Check your 'gnaf_network_directory' path" ) logger.fatal("\t- Step 4 of 7 : table populate FAILED!") else: # load all PSV files using multiprocessing geoscape.multiprocess_list("sql", sql_list, logger) logger.info( "\t- Step 4 of 7 : tables populated : {0}".format(datetime.now() - start_time))
def boundary_tag_gnaf(pg_cur): # create bdy table list # remove localities, postcodes and states as these IDs are already assigned to GNAF addresses table_list = list() for table in settings.admin_bdy_list: if table[0] not in ["locality_bdys", "postcode_bdys", "state_bdys"]: # if no analysis tables created - use the full tables instead of the subdivided ones # WARNING: this can add hours to the processing if settings.st_subdivide_supported: table_name = "{}_analysis".format(table[0], ) else: table_name = table[0] table_list.append([table_name, table[1]]) # create bdy tagged address tables for address_table in ["address_principal", "address_alias"]: pg_cur.execute( "DROP TABLE IF EXISTS {}.{}_admin_boundaries CASCADE".format( settings.gnaf_schema, address_table)) create_table_list = list() create_table_list.append( "CREATE TABLE {}.{}_admin_boundaries (gid serial NOT NULL," "gnaf_pid text NOT NULL," # "alias_principal character(1) NOT NULL," "locality_pid text NOT NULL," "old_locality_pid text NULL," "locality_name text NOT NULL," "postcode text," "state text NOT NULL".format(settings.gnaf_schema, address_table)) for table in table_list: pid_field = table[1] name_field = pid_field.replace("_pid", "_name") create_table_list.append(", {} text, {} text".format( pid_field, name_field)) create_table_list.append( ") WITH (OIDS=FALSE);ALTER TABLE {}.{}_admin_boundaries OWNER TO {}" .format(settings.gnaf_schema, address_table, settings.pg_user)) pg_cur.execute("".join(create_table_list)) # Step 1 of 7 : tag gnaf addresses with admin boundary IDs, using multiprocessing start_time = datetime.now() # create temp tables template_sql = geoscape.open_sql_file( "04-01a-bdy-tag-create-table-template.sql") for table in table_list: pg_cur.execute(template_sql.format(table[0], )) # create temp tables of bdy tagged gnaf_pids template_sql = geoscape.open_sql_file("04-01b-bdy-tag-template.sql") sql_list = list() for table in table_list: sql = template_sql.format(table[0], table[1]) short_sql_list = geoscape.split_sql_into_list( pg_cur, sql, settings.admin_bdys_schema, table[0], "bdys", "gid", logger) if short_sql_list is not None: sql_list.extend(short_sql_list) # logger.info("\n".join(sql_list)) if sql_list is not None: geoscape.multiprocess_list("sql", sql_list, logger) logger.info( "\t- Step 1 of 7 : principal addresses tagged with admin boundary IDs: {}" .format(datetime.now() - start_time, )) start_time = datetime.now() # Step 2 of 7 : delete invalid matches, create indexes and analyse tables sql_list = list() for table in table_list: sql = "DELETE FROM {0}.temp_{1}_tags WHERE gnaf_state <> bdy_state AND gnaf_state <> 'OT';" \ "CREATE INDEX temp_{1}_tags_gnaf_pid_idx ON {0}.temp_{1}_tags USING btree(gnaf_pid);" \ "ANALYZE {0}.temp_{1}_tags".format(settings.gnaf_schema, table[0]) sql_list.append(sql) geoscape.multiprocess_list("sql", sql_list, logger) logger.info( "\t- Step 2 of 7 : principal addresses - invalid matches deleted & bdy tag indexes created : {}" .format(datetime.now() - start_time, )) start_time = datetime.now() # Step 3 of 7 : insert boundary tagged addresses # create insert statement for multiprocessing insert_field_list = list() insert_field_list.append( "(gnaf_pid, locality_pid, old_locality_pid, locality_name, postcode, state" ) insert_join_list = list() insert_join_list.append("FROM {}.address_principals AS pnts ".format( settings.gnaf_schema, )) select_field_list = list() select_field_list.append( "SELECT pnts.gnaf_pid, pnts.locality_pid, pnts.old_locality_pid, " "pnts.locality_name, pnts.postcode, pnts.state") drop_table_list = list() for table in table_list: pid_field = table[1] name_field = pid_field.replace("_pid", "_name") insert_field_list.append(", {0}, {1}".format(pid_field, name_field)) select_field_list.append( ", temp_{0}_tags.bdy_pid, temp_{0}_tags.bdy_name ".format( table[0])) insert_join_list.append( "LEFT OUTER JOIN {0}.temp_{1}_tags ON pnts.gnaf_pid = temp_{1}_tags.gnaf_pid " .format(settings.gnaf_schema, table[0])) drop_table_list.append( "DROP TABLE IF EXISTS {0}.temp_{1}_tags;".format( settings.gnaf_schema, table[0])) insert_field_list.append(") ") insert_statement_list = list() insert_statement_list.append( "INSERT INTO {0}.address_principal_admin_boundaries ".format( settings.gnaf_schema, )) insert_statement_list.append("".join(insert_field_list)) insert_statement_list.append("".join(select_field_list)) insert_statement_list.append("".join(insert_join_list)) sql = "".join(insert_statement_list) + ";" sql_list = geoscape.split_sql_into_list(pg_cur, sql, settings.gnaf_schema, "address_principals", "pnts", "gid", logger) # logger.info("\n".join(sql_list) if sql_list is not None: geoscape.multiprocess_list("sql", sql_list, logger) # drop temp tables pg_cur.execute("".join(drop_table_list)) # get stats pg_cur.execute("ANALYZE {0}.address_principal_admin_boundaries ".format( settings.gnaf_schema)) logger.info( "\t- Step 3 of 7 : principal addresses - bdy tags added to output table : {}" .format(datetime.now() - start_time, )) start_time = datetime.now() # Step 4 of 7 : add index to output table sql = "CREATE INDEX address_principal_admin_boundaries_gnaf_pid_idx " \ "ON {0}.address_principal_admin_boundaries USING btree (gnaf_pid)"\ .format(settings.gnaf_schema) pg_cur.execute(sql) logger.info( "\t- Step 4 of 7 : created index on bdy tagged address table : {}". format(datetime.now() - start_time, )) start_time = datetime.now() # Step 5 of 7 : log duplicates - happens when 2 boundaries overlap by a very small amount # (can be ignored if there's a small number of records affected) sql = "SELECT gnaf_pid FROM (SELECT Count(*) AS cnt, gnaf_pid FROM {0}.address_principal_admin_boundaries " \ "GROUP BY gnaf_pid) AS sqt WHERE cnt > 1".format(settings.gnaf_schema) pg_cur.execute(sql) # get cursor description to test if any rows returned safely columns = pg_cur.description # log gnaf_pids that got duplicate results if columns is not None: duplicates = pg_cur.fetchall() gnaf_pids = list() for duplicate in duplicates: gnaf_pids.append("\t\t" + duplicate[0]) if len(gnaf_pids) > 0: logger.warning( "\t- Step 5 of 7 : found boundary tag duplicates : {}".format( datetime.now() - start_time, )) logger.warning("\n".join(gnaf_pids)) else: logger.info( "\t- Step 5 of 7 : no boundary tag duplicates : {}".format( datetime.now() - start_time, )) else: logger.info("\t- Step 5 of 7 : no boundary tag duplicates : {}".format( datetime.now() - start_time, )) # Step 6 of 7 : Copy principal boundary tags to alias addresses pg_cur.execute( geoscape.open_sql_file("04-06-bdy-tags-for-alias-addresses.sql")) logger.info( "\t- Step 6 of 7 : alias addresses boundary tagged : {}".format( datetime.now() - start_time, )) start_time = datetime.now() # Step 7 of 7 : Create view of all bdy tags pg_cur.execute(geoscape.open_sql_file("04-07-create-bdy-tag-view.sql")) logger.info( "\t- Step 7 of 7 : boundary tagged address view created : {}".format( datetime.now() - start_time, ))
def create_reference_tables(pg_cur): # set postgres search path back to the default pg_cur.execute("SET search_path = public, pg_catalog") # Step 1 of 14 : create reference tables start_time = datetime.now() pg_cur.execute(geoscape.open_sql_file("03-01-reference-create-tables.sql")) logger.info("\t- Step 1 of 14 : create reference tables : {0}".format( datetime.now() - start_time)) # Step 2 of 14 : populate localities start_time = datetime.now() pg_cur.execute( geoscape.open_sql_file("03-02-reference-populate-localities.sql")) logger.info("\t- Step 2 of 14 : localities populated : {0}".format( datetime.now() - start_time)) # Step 3 of 14 : populate locality aliases start_time = datetime.now() pg_cur.execute( geoscape.open_sql_file( "03-03-reference-populate-locality-aliases.sql")) logger.info("\t- Step 3 of 14 : locality aliases populated : {0}".format( datetime.now() - start_time)) # Step 4 of 14 : populate locality neighbours start_time = datetime.now() pg_cur.execute( geoscape.open_sql_file( "03-04-reference-populate-locality-neighbours.sql")) logger.info( "\t- Step 4 of 14 : locality neighbours populated : {0}".format( datetime.now() - start_time)) # Step 5 of 14 : populate streets start_time = datetime.now() pg_cur.execute( geoscape.open_sql_file("03-05-reference-populate-streets.sql")) logger.info( "\t- Step 5 of 14 : streets populated : {0}".format(datetime.now() - start_time)) # Step 6 of 14 : populate street aliases start_time = datetime.now() pg_cur.execute( geoscape.open_sql_file("03-06-reference-populate-street-aliases.sql")) logger.info("\t- Step 6 of 14 : street aliases populated : {0}".format( datetime.now() - start_time)) # Step 7 of 14 : populate addresses, using multiprocessing start_time = datetime.now() sql = geoscape.open_sql_file("03-07-reference-populate-addresses-1.sql") sql_list = geoscape.split_sql_into_list(pg_cur, sql, settings.gnaf_schema, "streets", "str", "gid", logger) if sql_list is not None: geoscape.multiprocess_list("sql", sql_list, logger) pg_cur.execute(geoscape.prep_sql("ANALYZE gnaf.temp_addresses;")) logger.info( "\t- Step 7 of 14 : addresses populated : {0}".format(datetime.now() - start_time)) # Step 8 of 14 : populate principal alias lookup start_time = datetime.now() pg_cur.execute( geoscape.open_sql_file( "03-08-reference-populate-address-alias-lookup.sql")) logger.info( "\t- Step 8 of 14 : principal alias lookup populated : {0}".format( datetime.now() - start_time)) # Step 9 of 14 : populate primary secondary lookup start_time = datetime.now() pg_cur.execute( geoscape.open_sql_file( "03-09-reference-populate-address-secondary-lookup.sql")) pg_cur.execute( geoscape.prep_sql("VACUUM ANALYSE gnaf.address_secondary_lookup")) logger.info( "\t- Step 9 of 14 : primary secondary lookup populated : {0}".format( datetime.now() - start_time)) # Step 10 of 14 : split the Melbourne locality into its 2 postcodes (3000, 3004) start_time = datetime.now() pg_cur.execute( geoscape.open_sql_file("03-10-reference-split-melbourne.sql")) logger.info( "\t- Step 10 of 14 : Melbourne split : {0}".format(datetime.now() - start_time)) # Step 11 of 14 : finalise localities assigned to streets and addresses start_time = datetime.now() pg_cur.execute( geoscape.open_sql_file("03-11-reference-finalise-localities.sql")) logger.info("\t- Step 11 of 14 : localities finalised : {0}".format( datetime.now() - start_time)) # Step 12 of 14 : finalise addresses, using multiprocessing start_time = datetime.now() sql = geoscape.open_sql_file("03-12-reference-populate-addresses-2.sql") sql_list = geoscape.split_sql_into_list(pg_cur, sql, settings.gnaf_schema, "localities", "loc", "gid", logger) if sql_list is not None: geoscape.multiprocess_list("sql", sql_list, logger) # turf the temp address table pg_cur.execute( geoscape.prep_sql("DROP TABLE IF EXISTS gnaf.temp_addresses")) logger.info( "\t- Step 12 of 14 : addresses finalised : {0}".format(datetime.now() - start_time)) # Step 13 of 14 : create almost correct postcode boundaries by aggregating localities, using multiprocessing start_time = datetime.now() sql = geoscape.open_sql_file("03-13-reference-derived-postcode-bdys.sql") sql_list = [] for state in settings.states_to_load: state_sql = sql.replace("GROUP BY ", "WHERE state = '{0}' GROUP BY ".format(state)) sql_list.append(state_sql) geoscape.multiprocess_list("sql", sql_list, logger) # create analysis table? if settings.st_subdivide_supported: pg_cur.execute( geoscape.open_sql_file( "03-13a-create-postcode-analysis-table.sql")) logger.info("\t- Step 13 of 14 : postcode boundaries created : {0}".format( datetime.now() - start_time)) # Step 14 of 14 : create indexes, primary and foreign keys, using multiprocessing start_time = datetime.now() raw_sql_list = geoscape.open_sql_file( "03-14-reference-create-indexes.sql").split("\n") sql_list = [] for sql in raw_sql_list: if sql[0:2] != "--" and sql[0:2] != "": sql_list.append(sql) geoscape.multiprocess_list("sql", sql_list, logger) logger.info( "\t- Step 14 of 14 : create primary & foreign keys and indexes : {0}". format(datetime.now() - start_time))