def create_states_and_prep_localities(settings): start_time = datetime.now() sql_list = [ geoscape.open_sql_file("01a-create-states-from-sa4s.sql", settings).format(settings['srid']), geoscape.open_sql_file("01b-prep-locality-boundaries.sql", settings).format(settings['srid']) ] geoscape.multiprocess_list("sql", sql_list, settings, logger) logger.info( "\t- Step 1 of 8 : state table created & localities prepped : {0}". format(datetime.now() - start_time))
def prep_admin_bdys(pg_cur): # Step 3 of 4 : create admin bdy tables read to be used start_time = datetime.now() # create tables using multiprocessing - using flag in file to split file up into sets of statements sql_list = geoscape.open_sql_file( "02-02a-prep-admin-bdys-tables.sql").split("-- # --") sql_list = sql_list + geoscape.open_sql_file( "02-02b-prep-census-2011-bdys-tables.sql").split("-- # --") sql_list = sql_list + geoscape.open_sql_file( "02-02c-prep-census-2016-bdys-tables.sql").split("-- # --") sql_list = sql_list + geoscape.open_sql_file( "02-02d-prep-census-2021-bdys-tables.sql").split("-- # --") # # Account for bdys that are not in states to load - not yet working # for sql in sql_list: # if settings.states_to_load == ["OT"] and ".commonwealth_electorates " in sql: # sql_list.remove(sql) # # if settings.states_to_load == ["ACT"] and ".local_government_areas " in sql: # sql_list.remove(sql) # # logger.info(settings.states_to_load # # if not ("NT" in settings.states_to_load or "SA" in settings.states_to_load # or "VIC" in settings.states_to_load or "WA" in settings.states_to_load) \ # and ".local_government_wards " in sql: # sql_list.remove(sql) # # if settings.states_to_load == ["OT"] and ".state_lower_house_electorates " in sql: # sql_list.remove(sql) # # if not ("TAS" in settings.states_to_load or "VIC" in settings.states_to_load # or "WA" in settings.states_to_load) and ".state_upper_house_electorates " in sql: # sql_list.remove(sql) geoscape.multiprocess_list("sql", sql_list, logger) # Special case - remove custom outback bdy if South Australia not requested if "SA" not in settings.states_to_load: pg_cur.execute( geoscape.prep_sql( "DELETE FROM admin_bdys.locality_bdys WHERE locality_pid = 'SA999999'" )) pg_cur.execute( geoscape.prep_sql("VACUUM ANALYZE admin_bdys.locality_bdys")) logger.info("\t- Step 2 of 3 : admin boundaries prepped : {0}".format( datetime.now() - start_time))
def create_display_postcodes(pg_cur, settings): start_time = datetime.now() pg_cur.execute( geoscape.open_sql_file("06-create-display-postcodes.sql", settings).format(settings['srid'])) logger.info("\t- Step 6 of 8 : display postcodes created : {0}".format( datetime.now() - start_time))
def create_admin_bdys_for_analysis(): # Step 3 of 3 : create admin bdy tables optimised for spatial analysis start_time = datetime.now() if settings.st_subdivide_supported: template_sql = geoscape.open_sql_file( "02-03-create-admin-bdy-analysis-tables_template.sql") sql_list = list() for table in settings.admin_bdy_list: sql = template_sql.format(table[0], table[1]) if table[ 0] == "locality_bdys": # special case, need to change schema name # sql = sql.replace(settings.raw_admin_bdys_schema, settings.admin_bdys_schema) sql = sql.replace("name", "locality_name") # add postcodes sql = sql.replace( "locality_name text NOT NULL,", "old_locality_pid text NULL, locality_name text NOT NULL, postcode text NULL," ) sql = sql.replace( "locality_name,", "old_locality_pid, locality_name, postcode,") sql_list.append(sql) geoscape.multiprocess_list("sql", sql_list, logger) logger.info( "\t- Step 3 of 3 : admin boundaries for analysis created : {0}". format(datetime.now() - start_time)) else: logger.warning( "\t- Step 3 of 3 : admin boundaries for analysis NOT created - " "requires PostGIS 2.2+ with GEOS 3.5.0+")
def populate_raw_gnaf(pg_cur): # Step 4 of 7 : load raw gnaf authority & state tables start_time = datetime.now() # authority code file list sql_list = get_raw_gnaf_files("authority_code") # add state file lists for state in settings.states_to_load: logger.info(f"\t\t- Loading state {state}") sql_list.extend(get_raw_gnaf_files(state)) # are there any files to load? if len(sql_list) == 0: logger.fatal( "No raw GNAF PSV files found\nACTION: Check your 'gnaf_network_directory' path" ) logger.fatal("\t- Step 4 of 7 : table populate FAILED!") else: # load all PSV files using multiprocessing geoscape.multiprocess_list("sql", sql_list, logger) # fix missing geocodes (added due to missing data in 202202 release) sql = geoscape.open_sql_file("01-04-raw-gnaf-fix-missing-geocodes.sql") pg_cur.execute(sql) logger.info( f"\t- Step 4 of 7 : tables populated : {datetime.now() - start_time}" ) logger.info("\t\t- fixed missing geocodes")
def create_raw_gnaf_tables(pg_cur): # Step 3 of 7 : create tables start_time = datetime.now() # prep create table sql scripts (note: file doesn't contain any schema prefixes on table names) sql = geoscape.open_sql_file("01-03-raw-gnaf-create-tables.sql") # set search path if settings.raw_gnaf_schema != "public": pg_cur.execute("SET search_path = {0}".format( settings.raw_gnaf_schema, )) # alter create table script to run on chosen schema sql = sql.replace( "SET search_path = public", "SET search_path = {0}".format(settings.raw_gnaf_schema, )) # set tables to unlogged to speed up the load? (if requested) # -- they'll have to be rebuilt using this script again after a system crash -- if settings.unlogged_tables: sql = sql.replace("CREATE TABLE ", "CREATE UNLOGGED TABLE ") unlogged_string = "UNLOGGED " else: unlogged_string = "" # create raw gnaf tables pg_cur.execute(sql) logger.info("\t- Step 3 of 7 : {1}tables created : {0}".format( datetime.now() - start_time, unlogged_string))
def qa_display_localities(pg_cur, settings): logger.info("\t- Step 8 of 8 : Start QA") start_time = datetime.now() pg_cur.execute( geoscape.prep_sql( "SELECT locality_pid, locality_name, coalesce(postcode, '') as postcode, state, " "address_count, street_count " "FROM admin_bdys.locality_bdys_display WHERE NOT ST_IsValid(geom);", settings)) display_qa_results("Invalid Geometries", pg_cur) pg_cur.execute( geoscape.prep_sql( "SELECT locality_pid, locality_name, coalesce(postcode, '') as postcode, state, " "address_count, street_count " "FROM admin_bdys.locality_bdys_display WHERE ST_IsEmpty(geom);", settings)) display_qa_results("Empty Geometries", pg_cur) pg_cur.execute( geoscape.open_sql_file("08-qa-display-localities.sql", settings)) display_qa_results("Dropped Localities", pg_cur) logger.info("\t- Step 8 of 8 : display localities qa'd : {0}".format( datetime.now() - start_time))
def finalise_display_localities(pg_cur, settings): start_time = datetime.now() pg_cur.execute( geoscape.open_sql_file("05-finalise-display-localities.sql", settings).format(settings['srid'])) logger.info("\t- Step 5 of 8 : display localities finalised : {0}".format( datetime.now() - start_time))
def verify_locality_polygons(pg_cur, settings): start_time = datetime.now() pg_cur.execute( geoscape.open_sql_file("03a-verify-split-polygons.sql", settings).format(settings['srid'])) pg_cur.execute( geoscape.open_sql_file("03b-load-messy-centroids.sql", settings)) # convert messy centroids to GDA2020 if required if settings['srid'] == 7844: pg_cur.execute( geoscape.open_sql_file("03c-load-messy-centroids-gda2020.sql", settings)) logger.info( "\t- Step 3 of 8 : messy locality polygons verified : {0}".format( datetime.now() - start_time))
def get_locality_state_border_gaps(pg_cur, settings): start_time = datetime.now() sql = geoscape.open_sql_file("04-create-holes-along-borders.sql", settings) sql_list = geoscape.split_sql_into_list( pg_cur, sql, settings['admin_bdys_schema'], "temp_state_border_buffers_subdivided", "ste", "new_gid", settings, logger) geoscape.multiprocess_list("sql", sql_list, settings, logger) logger.info("\t- Step 4 of 8 : locality holes created : {0}".format( datetime.now() - start_time))
def get_split_localities(pg_cur, settings): start_time = datetime.now() sql = geoscape.open_sql_file("02-split-localities-by-state-borders.sql", settings) sql_list = geoscape.split_sql_into_list(pg_cur, sql, settings['admin_bdys_schema'], "temp_localities", "loc", "gid", settings, logger) geoscape.multiprocess_list("sql", sql_list, settings, logger) logger.info("\t- Step 2 of 8 : localities split by state : {0}".format( datetime.now() - start_time))
def index_raw_gnaf(pg_cur): # Step 5 of 7 : create indexes start_time = datetime.now() raw_sql_list = geoscape.open_sql_file( "01-05-raw-gnaf-create-indexes.sql").split("\n") sql_list = [] for sql in raw_sql_list: if sql[0:2] != "--" and sql[0:2] != "": sql_list.append(sql) geoscape.multiprocess_list("sql", sql_list, logger) # create distinct new & old locality pid lookup table pg_cur.execute( geoscape.open_sql_file( "01-05b-create-distinct-locality-pid-linkage-table.sql")) logger.info( "\t- Step 5 of 7 : indexes created: {0}".format(datetime.now() - start_time))
def drop_tables_and_vacuum_db(pg_cur): # Step 1 of 7 : drop tables start_time = datetime.now() pg_cur.execute(geoscape.open_sql_file("01-01-drop-tables.sql")) logger.info( f"\t- Step 1 of 7 : tables dropped : {datetime.now() - start_time}") # Step 2 of 7 : vacuum database (if requested) start_time = datetime.now() if settings.vacuum_db: pg_cur.execute("VACUUM") logger.info( f"\t- Step 2 of 7 : database vacuumed : {datetime.now() - start_time}" ) else: logger.info("\t- Step 2 of 7 : database NOT vacuumed")
def create_primary_foreign_keys(): start_time = datetime.now() key_sql = geoscape.open_sql_file( "01-06-raw-gnaf-create-primary-foreign-keys.sql") key_sql_list = key_sql.split("--") sql_list = [] for sql in key_sql_list: sql = sql.strip() if sql[0:6] == "ALTER ": # add schema to tables names, in case raw gnaf schema not the default sql = sql.replace(f"ALTER TABLE ONLY ", "ALTER TABLE ONLY {settings.raw_gnaf_schema}.") sql_list.append(sql) # run queries in separate processes geoscape.multiprocess_list("sql", sql_list, logger) logger.info( f"\t- Step 6 of 7 : primary & foreign keys created : {datetime.now() - start_time}" )
def load_raw_admin_boundaries(pg_cur): start_time = datetime.now() # drop existing views pg_cur.execute(geoscape.open_sql_file("02-01-drop-admin-bdy-views.sql")) # add authority code tables settings.states_to_load.extend(["authority_code"]) # get file list table_list = list() create_list = list() append_list = list() for state in settings.states_to_load: state = state.lower() # get a dictionary of Shapefiles and DBFs matching the state for root, dirs, files in os.walk(settings.admin_bdys_local_directory): for file_name in files: if file_name.lower().startswith(state + "_"): if file_name.lower().endswith( ".shp") or file_name.lower().endswith("_shp.dbf"): file_dict = dict() # list .shp files and standalone .dbf files - ignore the rest if file_name.lower().endswith(".shp"): file_dict["spatial"] = True file_dict["file_path"] = os.path.join( root, file_name) elif file_name.lower( ).endswith(".dbf") and not file_name.lower().endswith( "_polygon_shp.dbf") and not file_name.lower( ).endswith("_point_shp.dbf"): file_dict["spatial"] = False file_dict["file_path"] = os.path.join( root, file_name) if file_dict.get("file_path") is not None: file_dict["pg_table"] = file_name.lower().replace(state + "_", "aus_", 1)\ .replace(".dbf", "").replace(".shp", "").replace("_shp", "") file_dict[ "pg_schema"] = settings.raw_admin_bdys_schema # set command line parameters depending on whether this is the 1st state table_list_add = False if file_dict["pg_table"] not in table_list: table_list_add = True file_dict["delete_table"] = True else: file_dict["delete_table"] = False # if locality file from Towns folder: don't add - it's a duplicate if "town points" not in file_dict[ "file_path"].lower(): if table_list_add: table_list.append(file_dict["pg_table"]) create_list.append(file_dict) else: # # don't add duplicates if more than one Authority Code file per boundary type # if "_aut_" not in file_name.lower(): append_list.append(file_dict) else: if not file_dict["file_path"].lower().endswith( "_locality_shp.dbf"): if table_list_add: table_list.append( file_dict["pg_table"]) create_list.append(file_dict) else: # # don't add duplicates if more than one Authority Code file per boundary type # if "_aut_" not in file_name.lower(): append_list.append(file_dict) # [print(table) for table in create_list] # print("---------------------------------------------------------------------------------------") # [print(table) for table in append_list] # are there any files to load? if len(create_list) == 0: logger.fatal( "No admin boundary files found\nACTION: Check your 'admin-bdys-path' argument" ) else: # load files in separate processes geoscape.multiprocess_shapefile_load(create_list, logger) # Run the appends one at a time (Can't multiprocess as sets of parallel INSERTs can cause database deadlocks) for shp in append_list: result = geoscape.import_shapefile_to_postgres( shp["file_path"], shp["pg_table"], shp["pg_schema"], shp["delete_table"], shp["spatial"]) if result != "SUCCESS": logger.warning(result) logger.info( "\t- Step 1 of 3 : raw admin boundaries loaded : {0}".format( datetime.now() - start_time))
def main(): full_start_time = datetime.now() # set command line arguments args = set_arguments() # get settings from arguments settings = get_settings(args) # connect to Postgres try: pg_conn = psycopg2.connect(settings['pg_connect_string']) except psycopg2.Error: logger.fatal( "Unable to connect to database\nACTION: Check your Postgres parameters and/or database security" ) return False pg_conn.autocommit = True pg_cur = pg_conn.cursor() # log postgres/postgis versions being used geoscape.check_postgis_version(pg_cur, settings, logger) logger.info("") # get SRID of locality boundaries sql = geoscape.prep_sql( f"select Find_SRID('{settings['admin_bdys_schema']}', 'locality_bdys', 'geom')", settings) pg_cur.execute(sql) settings['srid'] = int(pg_cur.fetchone()[0]) if settings['srid'] == 4283: logger.info( f"Locality boundary coordinate system is EPSG:{settings['srid']} (GDA94)" ) elif settings['srid'] == 7844: logger.info( f"Locality boundary coordinate system is EPSG:{settings['srid']} (GDA2020)" ) else: logger.fatal( "Invalid coordinate system (SRID) - EXITING!\nValid values are 4283 (GDA94) and 7844 (GDA2020)" ) exit() # add Postgres functions to clean out non-polygon geometries from GeometryCollections pg_cur.execute( geoscape.open_sql_file("create-polygon-intersection-function.sql", settings).format(settings['srid'])) pg_cur.execute( geoscape.open_sql_file("create-multi-linestring-split-function.sql", settings)) # let's build some clean localities! logger.info("") create_states_and_prep_localities(settings) get_split_localities(pg_cur, settings) verify_locality_polygons(pg_cur, settings) get_locality_state_border_gaps(pg_cur, settings) finalise_display_localities(pg_cur, settings) create_display_postcodes(pg_cur, settings) export_display_localities(pg_cur, settings) qa_display_localities(pg_cur, settings) pg_cur.close() pg_conn.close() logger.info("Total time : {0}".format(datetime.now() - full_start_time)) return True
def create_reference_tables(pg_cur): # set postgres search path back to the default pg_cur.execute("SET search_path = public, pg_catalog") # Step 1 of 14 : create reference tables start_time = datetime.now() pg_cur.execute(geoscape.open_sql_file("03-01-reference-create-tables.sql")) logger.info("\t- Step 1 of 14 : create reference tables : {0}".format( datetime.now() - start_time)) # Step 2 of 14 : populate localities start_time = datetime.now() pg_cur.execute( geoscape.open_sql_file("03-02-reference-populate-localities.sql")) logger.info("\t- Step 2 of 14 : localities populated : {0}".format( datetime.now() - start_time)) # Step 3 of 14 : populate locality aliases start_time = datetime.now() pg_cur.execute( geoscape.open_sql_file( "03-03-reference-populate-locality-aliases.sql")) logger.info("\t- Step 3 of 14 : locality aliases populated : {0}".format( datetime.now() - start_time)) # Step 4 of 14 : populate locality neighbours start_time = datetime.now() pg_cur.execute( geoscape.open_sql_file( "03-04-reference-populate-locality-neighbours.sql")) logger.info( "\t- Step 4 of 14 : locality neighbours populated : {0}".format( datetime.now() - start_time)) # Step 5 of 14 : populate streets start_time = datetime.now() pg_cur.execute( geoscape.open_sql_file("03-05-reference-populate-streets.sql")) logger.info( "\t- Step 5 of 14 : streets populated : {0}".format(datetime.now() - start_time)) # Step 6 of 14 : populate street aliases start_time = datetime.now() pg_cur.execute( geoscape.open_sql_file("03-06-reference-populate-street-aliases.sql")) logger.info("\t- Step 6 of 14 : street aliases populated : {0}".format( datetime.now() - start_time)) # Step 7 of 14 : populate addresses, using multiprocessing start_time = datetime.now() sql = geoscape.open_sql_file("03-07-reference-populate-addresses-1.sql") sql_list = geoscape.split_sql_into_list(pg_cur, sql, settings.gnaf_schema, "streets", "str", "gid", logger) if sql_list is not None: geoscape.multiprocess_list("sql", sql_list, logger) pg_cur.execute(geoscape.prep_sql("ANALYZE gnaf.temp_addresses;")) logger.info( "\t- Step 7 of 14 : addresses populated : {0}".format(datetime.now() - start_time)) # Step 8 of 14 : populate principal alias lookup start_time = datetime.now() pg_cur.execute( geoscape.open_sql_file( "03-08-reference-populate-address-alias-lookup.sql")) logger.info( "\t- Step 8 of 14 : principal alias lookup populated : {0}".format( datetime.now() - start_time)) # Step 9 of 14 : populate primary secondary lookup start_time = datetime.now() pg_cur.execute( geoscape.open_sql_file( "03-09-reference-populate-address-secondary-lookup.sql")) pg_cur.execute( geoscape.prep_sql("VACUUM ANALYSE gnaf.address_secondary_lookup")) logger.info( "\t- Step 9 of 14 : primary secondary lookup populated : {0}".format( datetime.now() - start_time)) # Step 10 of 14 : split the Melbourne locality into its 2 postcodes (3000, 3004) start_time = datetime.now() pg_cur.execute( geoscape.open_sql_file("03-10-reference-split-melbourne.sql")) logger.info( "\t- Step 10 of 14 : Melbourne split : {0}".format(datetime.now() - start_time)) # Step 11 of 14 : finalise localities assigned to streets and addresses start_time = datetime.now() pg_cur.execute( geoscape.open_sql_file("03-11-reference-finalise-localities.sql")) logger.info("\t- Step 11 of 14 : localities finalised : {0}".format( datetime.now() - start_time)) # Step 12 of 14 : finalise addresses, using multiprocessing start_time = datetime.now() sql = geoscape.open_sql_file("03-12-reference-populate-addresses-2.sql") sql_list = geoscape.split_sql_into_list(pg_cur, sql, settings.gnaf_schema, "localities", "loc", "gid", logger) if sql_list is not None: geoscape.multiprocess_list("sql", sql_list, logger) # turf the temp address table pg_cur.execute( geoscape.prep_sql("DROP TABLE IF EXISTS gnaf.temp_addresses")) logger.info( "\t- Step 12 of 14 : addresses finalised : {0}".format(datetime.now() - start_time)) # Step 13 of 14 : create almost correct postcode boundaries by aggregating localities, using multiprocessing start_time = datetime.now() sql = geoscape.open_sql_file("03-13-reference-derived-postcode-bdys.sql") sql_list = [] for state in settings.states_to_load: state_sql = sql.replace("GROUP BY ", "WHERE state = '{0}' GROUP BY ".format(state)) sql_list.append(state_sql) geoscape.multiprocess_list("sql", sql_list, logger) # create analysis table? if settings.st_subdivide_supported: pg_cur.execute( geoscape.open_sql_file( "03-13a-create-postcode-analysis-table.sql")) logger.info("\t- Step 13 of 14 : postcode boundaries created : {0}".format( datetime.now() - start_time)) # Step 14 of 14 : create indexes, primary and foreign keys, using multiprocessing start_time = datetime.now() raw_sql_list = geoscape.open_sql_file( "03-14-reference-create-indexes.sql").split("\n") sql_list = [] for sql in raw_sql_list: if sql[0:2] != "--" and sql[0:2] != "": sql_list.append(sql) geoscape.multiprocess_list("sql", sql_list, logger) logger.info( "\t- Step 14 of 14 : create primary & foreign keys and indexes : {0}". format(datetime.now() - start_time))
def boundary_tag_gnaf(pg_cur): # create bdy table list # remove localities, postcodes and states as these IDs are already assigned to GNAF addresses table_list = list() for table in settings.admin_bdy_list: if table[0] not in ["locality_bdys", "postcode_bdys", "state_bdys"]: # if no analysis tables created - use the full tables instead of the subdivided ones # WARNING: this can add hours to the processing if settings.st_subdivide_supported: table_name = "{}_analysis".format(table[0], ) else: table_name = table[0] table_list.append([table_name, table[1]]) # create bdy tagged address tables for address_table in ["address_principal", "address_alias"]: pg_cur.execute( "DROP TABLE IF EXISTS {}.{}_admin_boundaries CASCADE".format( settings.gnaf_schema, address_table)) create_table_list = list() create_table_list.append( "CREATE TABLE {}.{}_admin_boundaries (gid serial NOT NULL," "gnaf_pid text NOT NULL," # "alias_principal character(1) NOT NULL," "locality_pid text NOT NULL," "old_locality_pid text NULL," "locality_name text NOT NULL," "postcode text," "state text NOT NULL".format(settings.gnaf_schema, address_table)) for table in table_list: pid_field = table[1] name_field = pid_field.replace("_pid", "_name") create_table_list.append(", {} text, {} text".format( pid_field, name_field)) create_table_list.append( ") WITH (OIDS=FALSE);ALTER TABLE {}.{}_admin_boundaries OWNER TO {}" .format(settings.gnaf_schema, address_table, settings.pg_user)) pg_cur.execute("".join(create_table_list)) # Step 1 of 7 : tag gnaf addresses with admin boundary IDs, using multiprocessing start_time = datetime.now() # create temp tables template_sql = geoscape.open_sql_file( "04-01a-bdy-tag-create-table-template.sql") for table in table_list: pg_cur.execute(template_sql.format(table[0], )) # create temp tables of bdy tagged gnaf_pids template_sql = geoscape.open_sql_file("04-01b-bdy-tag-template.sql") sql_list = list() for table in table_list: sql = template_sql.format(table[0], table[1]) short_sql_list = geoscape.split_sql_into_list( pg_cur, sql, settings.admin_bdys_schema, table[0], "bdys", "gid", logger) if short_sql_list is not None: sql_list.extend(short_sql_list) # logger.info("\n".join(sql_list)) if sql_list is not None: geoscape.multiprocess_list("sql", sql_list, logger) logger.info( "\t- Step 1 of 7 : principal addresses tagged with admin boundary IDs: {}" .format(datetime.now() - start_time, )) start_time = datetime.now() # Step 2 of 7 : delete invalid matches, create indexes and analyse tables sql_list = list() for table in table_list: sql = "DELETE FROM {0}.temp_{1}_tags WHERE gnaf_state <> bdy_state AND gnaf_state <> 'OT';" \ "CREATE INDEX temp_{1}_tags_gnaf_pid_idx ON {0}.temp_{1}_tags USING btree(gnaf_pid);" \ "ANALYZE {0}.temp_{1}_tags".format(settings.gnaf_schema, table[0]) sql_list.append(sql) geoscape.multiprocess_list("sql", sql_list, logger) logger.info( "\t- Step 2 of 7 : principal addresses - invalid matches deleted & bdy tag indexes created : {}" .format(datetime.now() - start_time, )) start_time = datetime.now() # Step 3 of 7 : insert boundary tagged addresses # create insert statement for multiprocessing insert_field_list = list() insert_field_list.append( "(gnaf_pid, locality_pid, old_locality_pid, locality_name, postcode, state" ) insert_join_list = list() insert_join_list.append("FROM {}.address_principals AS pnts ".format( settings.gnaf_schema, )) select_field_list = list() select_field_list.append( "SELECT pnts.gnaf_pid, pnts.locality_pid, pnts.old_locality_pid, " "pnts.locality_name, pnts.postcode, pnts.state") drop_table_list = list() for table in table_list: pid_field = table[1] name_field = pid_field.replace("_pid", "_name") insert_field_list.append(", {0}, {1}".format(pid_field, name_field)) select_field_list.append( ", temp_{0}_tags.bdy_pid, temp_{0}_tags.bdy_name ".format( table[0])) insert_join_list.append( "LEFT OUTER JOIN {0}.temp_{1}_tags ON pnts.gnaf_pid = temp_{1}_tags.gnaf_pid " .format(settings.gnaf_schema, table[0])) drop_table_list.append( "DROP TABLE IF EXISTS {0}.temp_{1}_tags;".format( settings.gnaf_schema, table[0])) insert_field_list.append(") ") insert_statement_list = list() insert_statement_list.append( "INSERT INTO {0}.address_principal_admin_boundaries ".format( settings.gnaf_schema, )) insert_statement_list.append("".join(insert_field_list)) insert_statement_list.append("".join(select_field_list)) insert_statement_list.append("".join(insert_join_list)) sql = "".join(insert_statement_list) + ";" sql_list = geoscape.split_sql_into_list(pg_cur, sql, settings.gnaf_schema, "address_principals", "pnts", "gid", logger) # logger.info("\n".join(sql_list) if sql_list is not None: geoscape.multiprocess_list("sql", sql_list, logger) # drop temp tables pg_cur.execute("".join(drop_table_list)) # get stats pg_cur.execute("ANALYZE {0}.address_principal_admin_boundaries ".format( settings.gnaf_schema)) logger.info( "\t- Step 3 of 7 : principal addresses - bdy tags added to output table : {}" .format(datetime.now() - start_time, )) start_time = datetime.now() # Step 4 of 7 : add index to output table sql = "CREATE INDEX address_principal_admin_boundaries_gnaf_pid_idx " \ "ON {0}.address_principal_admin_boundaries USING btree (gnaf_pid)"\ .format(settings.gnaf_schema) pg_cur.execute(sql) logger.info( "\t- Step 4 of 7 : created index on bdy tagged address table : {}". format(datetime.now() - start_time, )) start_time = datetime.now() # Step 5 of 7 : log duplicates - happens when 2 boundaries overlap by a very small amount # (can be ignored if there's a small number of records affected) sql = "SELECT gnaf_pid FROM (SELECT Count(*) AS cnt, gnaf_pid FROM {0}.address_principal_admin_boundaries " \ "GROUP BY gnaf_pid) AS sqt WHERE cnt > 1".format(settings.gnaf_schema) pg_cur.execute(sql) # get cursor description to test if any rows returned safely columns = pg_cur.description # log gnaf_pids that got duplicate results if columns is not None: duplicates = pg_cur.fetchall() gnaf_pids = list() for duplicate in duplicates: gnaf_pids.append("\t\t" + duplicate[0]) if len(gnaf_pids) > 0: logger.warning( "\t- Step 5 of 7 : found boundary tag duplicates : {}".format( datetime.now() - start_time, )) logger.warning("\n".join(gnaf_pids)) else: logger.info( "\t- Step 5 of 7 : no boundary tag duplicates : {}".format( datetime.now() - start_time, )) else: logger.info("\t- Step 5 of 7 : no boundary tag duplicates : {}".format( datetime.now() - start_time, )) # Step 6 of 7 : Copy principal boundary tags to alias addresses pg_cur.execute( geoscape.open_sql_file("04-06-bdy-tags-for-alias-addresses.sql")) logger.info( "\t- Step 6 of 7 : alias addresses boundary tagged : {}".format( datetime.now() - start_time, )) start_time = datetime.now() # Step 7 of 7 : Create view of all bdy tags pg_cur.execute(geoscape.open_sql_file("04-07-create-bdy-tag-view.sql")) logger.info( "\t- Step 7 of 7 : boundary tagged address view created : {}".format( datetime.now() - start_time, ))
def export_display_localities(pg_cur, settings): start_time = datetime.now() # create export path pathlib.Path(settings['output_path']).mkdir(parents=True, exist_ok=True) sql = geoscape.open_sql_file("07-export-display-localities.sql", settings) if platform.system() == "Windows": password_str = "SET" else: password_str = "export" password_str += " PGPASSWORD={0}&&".format(settings['pg_password']) cmd = password_str + "pgsql2shp -f \"{0}\" -u {1} -h {2} -p {3} {4} \"{5}\""\ .format(settings['shapefile_export_path'], settings['pg_user'], settings['pg_host'], settings['pg_port'], settings['pg_db'], sql) # logger.info(cmd geoscape.run_command_line(cmd) # zip shapefile if settings['srid'] == 4283: shp_zip_path = settings['shapefile_name'] + "-shapefile.zip" else: shp_zip_path = settings['shapefile_name'] + "-gda2020-shapefile.zip" output_zipfile = os.path.join(settings['output_path'], shp_zip_path) zf = zipfile.ZipFile(output_zipfile, mode="w") for ext in settings['shapefile_extensions']: file_name = settings['shapefile_name'] + ext file_path = os.path.join(settings['output_path'], file_name) zf.write(file_path, file_name, compress_type=zipfile.ZIP_DEFLATED) zf.close() time_elapsed = datetime.now() - start_time logger.info( "\t- Step 7 of 8 : display localities exported to SHP : {0}".format( time_elapsed)) if time_elapsed.seconds < 2: logger.warning( "\t\t- This step took < 2 seconds - it may have failed silently. " "Check your output directory!") start_time = datetime.now() # Export as GeoJSON FeatureCollection sql = geoscape.prep_sql( "SELECT gid, locality_pid, locality_name, COALESCE(postcode, '') AS postcode, state, " "locality_class, address_count, street_count, ST_AsGeoJSON(geom, 5, 0) AS geom " "FROM {0}.locality_bdys_display".format(settings['admin_bdys_schema']), settings) pg_cur.execute(sql) # Create the GeoJSON output with an array of dictionaries containing the field names and values # get column names from cursor column_names = [desc[0] for desc in pg_cur.description] json_dicts = [] row = pg_cur.fetchone() if row is not None: while row is not None: rec = {} props = {} i = 0 rec["type"] = "Feature" for column in column_names: if column == "geometry" or column == "geom": rec["geometry"] = row[i] else: props[column] = row[i] i += 1 rec["properties"] = props json_dicts.append(rec) row = pg_cur.fetchone() gj = json.dumps(json_dicts).replace("\\", "").replace('"{', '{').replace('}"', '}') geojson = ''.join(['{"type":"FeatureCollection","features":', gj, '}']) text_file = open(settings['geojson_export_path'], "w") text_file.write(geojson) text_file.close() # compress GeoJSON if settings['srid'] == 4283: geojson_zip_path = settings['geojson_export_path'].replace( ".geojson", "-geojson.zip") else: geojson_zip_path = settings['geojson_export_path'].replace( ".geojson", "-gda2020-geojson.zip") zipfile.ZipFile(geojson_zip_path, mode="w")\ .write(settings['geojson_export_path'], compress_type=zipfile.ZIP_DEFLATED) logger.info( "\t- Step 7 of 8 : display localities exported to GeoJSON : {0}". format(datetime.now() - start_time))