Example #1
0
def create_states_and_prep_localities(settings):
    start_time = datetime.now()
    sql_list = [
        geoscape.open_sql_file("01a-create-states-from-sa4s.sql",
                               settings).format(settings['srid']),
        geoscape.open_sql_file("01b-prep-locality-boundaries.sql",
                               settings).format(settings['srid'])
    ]
    geoscape.multiprocess_list("sql", sql_list, settings, logger)
    logger.info(
        "\t- Step 1 of 8 : state table created & localities prepped : {0}".
        format(datetime.now() - start_time))
Example #2
0
def prep_admin_bdys(pg_cur):
    # Step 3 of 4 : create admin bdy tables read to be used
    start_time = datetime.now()

    # create tables using multiprocessing - using flag in file to split file up into sets of statements
    sql_list = geoscape.open_sql_file(
        "02-02a-prep-admin-bdys-tables.sql").split("-- # --")
    sql_list = sql_list + geoscape.open_sql_file(
        "02-02b-prep-census-2011-bdys-tables.sql").split("-- # --")
    sql_list = sql_list + geoscape.open_sql_file(
        "02-02c-prep-census-2016-bdys-tables.sql").split("-- # --")
    sql_list = sql_list + geoscape.open_sql_file(
        "02-02d-prep-census-2021-bdys-tables.sql").split("-- # --")

    # # Account for bdys that are not in states to load - not yet working
    # for sql in sql_list:
    #     if settings.states_to_load == ["OT"] and ".commonwealth_electorates " in sql:
    #         sql_list.remove(sql)
    #
    #     if settings.states_to_load == ["ACT"] and ".local_government_areas " in sql:
    #         sql_list.remove(sql)
    #
    #     logger.info(settings.states_to_load
    #
    #     if not ("NT" in settings.states_to_load or "SA" in settings.states_to_load
    #             or "VIC" in settings.states_to_load or "WA" in settings.states_to_load) \
    #             and ".local_government_wards " in sql:
    #         sql_list.remove(sql)
    #
    #     if settings.states_to_load == ["OT"] and ".state_lower_house_electorates " in sql:
    #         sql_list.remove(sql)
    #
    #     if not ("TAS" in settings.states_to_load or "VIC" in settings.states_to_load
    #             or "WA" in settings.states_to_load) and ".state_upper_house_electorates " in sql:
    #         sql_list.remove(sql)

    geoscape.multiprocess_list("sql", sql_list, logger)

    # Special case - remove custom outback bdy if South Australia not requested
    if "SA" not in settings.states_to_load:
        pg_cur.execute(
            geoscape.prep_sql(
                "DELETE FROM admin_bdys.locality_bdys WHERE locality_pid = 'SA999999'"
            ))
        pg_cur.execute(
            geoscape.prep_sql("VACUUM ANALYZE admin_bdys.locality_bdys"))

    logger.info("\t- Step 2 of 3 : admin boundaries prepped : {0}".format(
        datetime.now() - start_time))
Example #3
0
def create_display_postcodes(pg_cur, settings):
    start_time = datetime.now()
    pg_cur.execute(
        geoscape.open_sql_file("06-create-display-postcodes.sql",
                               settings).format(settings['srid']))
    logger.info("\t- Step 6 of 8 : display postcodes created : {0}".format(
        datetime.now() - start_time))
Example #4
0
def create_admin_bdys_for_analysis():
    # Step 3 of 3 : create admin bdy tables optimised for spatial analysis
    start_time = datetime.now()

    if settings.st_subdivide_supported:
        template_sql = geoscape.open_sql_file(
            "02-03-create-admin-bdy-analysis-tables_template.sql")
        sql_list = list()

        for table in settings.admin_bdy_list:
            sql = template_sql.format(table[0], table[1])
            if table[
                    0] == "locality_bdys":  # special case, need to change schema name
                # sql = sql.replace(settings.raw_admin_bdys_schema, settings.admin_bdys_schema)
                sql = sql.replace("name", "locality_name")
                # add postcodes
                sql = sql.replace(
                    "locality_name text NOT NULL,",
                    "old_locality_pid text NULL, locality_name text NOT NULL, postcode text NULL,"
                )
                sql = sql.replace(
                    "locality_name,",
                    "old_locality_pid, locality_name, postcode,")

            sql_list.append(sql)
        geoscape.multiprocess_list("sql", sql_list, logger)
        logger.info(
            "\t- Step 3 of 3 : admin boundaries for analysis created : {0}".
            format(datetime.now() - start_time))
    else:
        logger.warning(
            "\t- Step 3 of 3 : admin boundaries for analysis NOT created - "
            "requires PostGIS 2.2+ with GEOS 3.5.0+")
Example #5
0
def populate_raw_gnaf(pg_cur):
    # Step 4 of 7 : load raw gnaf authority & state tables
    start_time = datetime.now()

    # authority code file list
    sql_list = get_raw_gnaf_files("authority_code")

    # add state file lists
    for state in settings.states_to_load:
        logger.info(f"\t\t- Loading state {state}")
        sql_list.extend(get_raw_gnaf_files(state))

    # are there any files to load?
    if len(sql_list) == 0:
        logger.fatal(
            "No raw GNAF PSV files found\nACTION: Check your 'gnaf_network_directory' path"
        )
        logger.fatal("\t- Step 4 of 7 : table populate FAILED!")
    else:
        # load all PSV files using multiprocessing
        geoscape.multiprocess_list("sql", sql_list, logger)

        # fix missing geocodes (added due to missing data in 202202 release)
        sql = geoscape.open_sql_file("01-04-raw-gnaf-fix-missing-geocodes.sql")
        pg_cur.execute(sql)

        logger.info(
            f"\t- Step 4 of 7 : tables populated : {datetime.now() - start_time}"
        )
        logger.info("\t\t- fixed missing geocodes")
Example #6
0
def create_raw_gnaf_tables(pg_cur):
    # Step 3 of 7 : create tables
    start_time = datetime.now()

    # prep create table sql scripts (note: file doesn't contain any schema prefixes on table names)
    sql = geoscape.open_sql_file("01-03-raw-gnaf-create-tables.sql")

    # set search path
    if settings.raw_gnaf_schema != "public":
        pg_cur.execute("SET search_path = {0}".format(
            settings.raw_gnaf_schema, ))

        # alter create table script to run on chosen schema
        sql = sql.replace(
            "SET search_path = public",
            "SET search_path = {0}".format(settings.raw_gnaf_schema, ))

    # set tables to unlogged to speed up the load? (if requested)
    # -- they'll have to be rebuilt using this script again after a system crash --
    if settings.unlogged_tables:
        sql = sql.replace("CREATE TABLE ", "CREATE UNLOGGED TABLE ")
        unlogged_string = "UNLOGGED "
    else:
        unlogged_string = ""

    # create raw gnaf tables
    pg_cur.execute(sql)

    logger.info("\t- Step 3 of 7 : {1}tables created : {0}".format(
        datetime.now() - start_time, unlogged_string))
Example #7
0
def qa_display_localities(pg_cur, settings):
    logger.info("\t- Step 8 of 8 : Start QA")
    start_time = datetime.now()

    pg_cur.execute(
        geoscape.prep_sql(
            "SELECT locality_pid, locality_name, coalesce(postcode, '') as postcode, state, "
            "address_count, street_count "
            "FROM admin_bdys.locality_bdys_display WHERE NOT ST_IsValid(geom);",
            settings))
    display_qa_results("Invalid Geometries", pg_cur)

    pg_cur.execute(
        geoscape.prep_sql(
            "SELECT locality_pid, locality_name, coalesce(postcode, '') as postcode, state, "
            "address_count, street_count "
            "FROM admin_bdys.locality_bdys_display WHERE ST_IsEmpty(geom);",
            settings))
    display_qa_results("Empty Geometries", pg_cur)

    pg_cur.execute(
        geoscape.open_sql_file("08-qa-display-localities.sql", settings))
    display_qa_results("Dropped Localities", pg_cur)

    logger.info("\t- Step 8 of 8 : display localities qa'd : {0}".format(
        datetime.now() - start_time))
Example #8
0
def finalise_display_localities(pg_cur, settings):
    start_time = datetime.now()
    pg_cur.execute(
        geoscape.open_sql_file("05-finalise-display-localities.sql",
                               settings).format(settings['srid']))
    logger.info("\t- Step 5 of 8 : display localities finalised : {0}".format(
        datetime.now() - start_time))
Example #9
0
def verify_locality_polygons(pg_cur, settings):
    start_time = datetime.now()
    pg_cur.execute(
        geoscape.open_sql_file("03a-verify-split-polygons.sql",
                               settings).format(settings['srid']))
    pg_cur.execute(
        geoscape.open_sql_file("03b-load-messy-centroids.sql", settings))

    # convert messy centroids to GDA2020 if required
    if settings['srid'] == 7844:
        pg_cur.execute(
            geoscape.open_sql_file("03c-load-messy-centroids-gda2020.sql",
                                   settings))

    logger.info(
        "\t- Step 3 of 8 : messy locality polygons verified : {0}".format(
            datetime.now() - start_time))
Example #10
0
def get_locality_state_border_gaps(pg_cur, settings):
    start_time = datetime.now()
    sql = geoscape.open_sql_file("04-create-holes-along-borders.sql", settings)
    sql_list = geoscape.split_sql_into_list(
        pg_cur, sql, settings['admin_bdys_schema'],
        "temp_state_border_buffers_subdivided", "ste", "new_gid", settings,
        logger)
    geoscape.multiprocess_list("sql", sql_list, settings, logger)
    logger.info("\t- Step 4 of 8 : locality holes created : {0}".format(
        datetime.now() - start_time))
Example #11
0
def get_split_localities(pg_cur, settings):
    start_time = datetime.now()
    sql = geoscape.open_sql_file("02-split-localities-by-state-borders.sql",
                                 settings)
    sql_list = geoscape.split_sql_into_list(pg_cur, sql,
                                            settings['admin_bdys_schema'],
                                            "temp_localities", "loc", "gid",
                                            settings, logger)
    geoscape.multiprocess_list("sql", sql_list, settings, logger)
    logger.info("\t- Step 2 of 8 : localities split by state : {0}".format(
        datetime.now() - start_time))
Example #12
0
def index_raw_gnaf(pg_cur):
    # Step 5 of 7 : create indexes
    start_time = datetime.now()

    raw_sql_list = geoscape.open_sql_file(
        "01-05-raw-gnaf-create-indexes.sql").split("\n")
    sql_list = []
    for sql in raw_sql_list:
        if sql[0:2] != "--" and sql[0:2] != "":
            sql_list.append(sql)

    geoscape.multiprocess_list("sql", sql_list, logger)

    # create distinct new & old locality pid lookup table
    pg_cur.execute(
        geoscape.open_sql_file(
            "01-05b-create-distinct-locality-pid-linkage-table.sql"))

    logger.info(
        "\t- Step 5 of 7 : indexes created: {0}".format(datetime.now() -
                                                        start_time))
Example #13
0
def drop_tables_and_vacuum_db(pg_cur):
    # Step 1 of 7 : drop tables
    start_time = datetime.now()
    pg_cur.execute(geoscape.open_sql_file("01-01-drop-tables.sql"))
    logger.info(
        f"\t- Step 1 of 7 : tables dropped : {datetime.now() - start_time}")

    # Step 2 of 7 : vacuum database (if requested)
    start_time = datetime.now()
    if settings.vacuum_db:
        pg_cur.execute("VACUUM")
        logger.info(
            f"\t- Step 2 of 7 : database vacuumed : {datetime.now() - start_time}"
        )
    else:
        logger.info("\t- Step 2 of 7 : database NOT vacuumed")
Example #14
0
def create_primary_foreign_keys():
    start_time = datetime.now()

    key_sql = geoscape.open_sql_file(
        "01-06-raw-gnaf-create-primary-foreign-keys.sql")
    key_sql_list = key_sql.split("--")
    sql_list = []

    for sql in key_sql_list:
        sql = sql.strip()
        if sql[0:6] == "ALTER ":
            # add schema to tables names, in case raw gnaf schema not the default
            sql = sql.replace(f"ALTER TABLE ONLY ",
                              "ALTER TABLE ONLY {settings.raw_gnaf_schema}.")
            sql_list.append(sql)

    # run queries in separate processes
    geoscape.multiprocess_list("sql", sql_list, logger)

    logger.info(
        f"\t- Step 6 of 7 : primary & foreign keys created : {datetime.now() - start_time}"
    )
Example #15
0
def load_raw_admin_boundaries(pg_cur):
    start_time = datetime.now()

    # drop existing views
    pg_cur.execute(geoscape.open_sql_file("02-01-drop-admin-bdy-views.sql"))

    # add authority code tables
    settings.states_to_load.extend(["authority_code"])

    # get file list
    table_list = list()
    create_list = list()
    append_list = list()

    for state in settings.states_to_load:
        state = state.lower()
        # get a dictionary of Shapefiles and DBFs matching the state
        for root, dirs, files in os.walk(settings.admin_bdys_local_directory):
            for file_name in files:
                if file_name.lower().startswith(state + "_"):
                    if file_name.lower().endswith(
                            ".shp") or file_name.lower().endswith("_shp.dbf"):
                        file_dict = dict()

                        # list .shp files and standalone .dbf files - ignore the rest
                        if file_name.lower().endswith(".shp"):
                            file_dict["spatial"] = True
                            file_dict["file_path"] = os.path.join(
                                root, file_name)
                        elif file_name.lower(
                        ).endswith(".dbf") and not file_name.lower().endswith(
                                "_polygon_shp.dbf") and not file_name.lower(
                                ).endswith("_point_shp.dbf"):
                            file_dict["spatial"] = False
                            file_dict["file_path"] = os.path.join(
                                root, file_name)

                        if file_dict.get("file_path") is not None:
                            file_dict["pg_table"] = file_name.lower().replace(state + "_", "aus_", 1)\
                                .replace(".dbf", "").replace(".shp", "").replace("_shp", "")

                            file_dict[
                                "pg_schema"] = settings.raw_admin_bdys_schema

                            # set command line parameters depending on whether this is the 1st state
                            table_list_add = False

                            if file_dict["pg_table"] not in table_list:
                                table_list_add = True

                                file_dict["delete_table"] = True
                            else:
                                file_dict["delete_table"] = False

                            # if locality file from Towns folder: don't add - it's a duplicate
                            if "town points" not in file_dict[
                                    "file_path"].lower():
                                if table_list_add:
                                    table_list.append(file_dict["pg_table"])
                                    create_list.append(file_dict)
                                else:
                                    # # don't add duplicates if more than one Authority Code file per boundary type
                                    # if "_aut_" not in file_name.lower():
                                    append_list.append(file_dict)
                            else:
                                if not file_dict["file_path"].lower().endswith(
                                        "_locality_shp.dbf"):
                                    if table_list_add:
                                        table_list.append(
                                            file_dict["pg_table"])
                                        create_list.append(file_dict)
                                    else:
                                        # # don't add duplicates if more than one Authority Code file per boundary type
                                        # if "_aut_" not in file_name.lower():
                                        append_list.append(file_dict)

    # [print(table) for table in create_list]
    # print("---------------------------------------------------------------------------------------")
    # [print(table) for table in append_list]

    # are there any files to load?
    if len(create_list) == 0:
        logger.fatal(
            "No admin boundary files found\nACTION: Check your 'admin-bdys-path' argument"
        )
    else:
        # load files in separate processes
        geoscape.multiprocess_shapefile_load(create_list, logger)

        # Run the appends one at a time (Can't multiprocess as sets of parallel INSERTs can cause database deadlocks)
        for shp in append_list:
            result = geoscape.import_shapefile_to_postgres(
                shp["file_path"], shp["pg_table"], shp["pg_schema"],
                shp["delete_table"], shp["spatial"])

            if result != "SUCCESS":
                logger.warning(result)

        logger.info(
            "\t- Step 1 of 3 : raw admin boundaries loaded : {0}".format(
                datetime.now() - start_time))
Example #16
0
def main():
    full_start_time = datetime.now()

    # set command line arguments
    args = set_arguments()
    # get settings from arguments
    settings = get_settings(args)
    # connect to Postgres
    try:
        pg_conn = psycopg2.connect(settings['pg_connect_string'])
    except psycopg2.Error:
        logger.fatal(
            "Unable to connect to database\nACTION: Check your Postgres parameters and/or database security"
        )
        return False

    pg_conn.autocommit = True
    pg_cur = pg_conn.cursor()

    # log postgres/postgis versions being used
    geoscape.check_postgis_version(pg_cur, settings, logger)

    logger.info("")

    # get SRID of locality boundaries
    sql = geoscape.prep_sql(
        f"select Find_SRID('{settings['admin_bdys_schema']}', 'locality_bdys', 'geom')",
        settings)
    pg_cur.execute(sql)
    settings['srid'] = int(pg_cur.fetchone()[0])
    if settings['srid'] == 4283:
        logger.info(
            f"Locality boundary coordinate system is EPSG:{settings['srid']} (GDA94)"
        )
    elif settings['srid'] == 7844:
        logger.info(
            f"Locality boundary coordinate system is EPSG:{settings['srid']} (GDA2020)"
        )
    else:
        logger.fatal(
            "Invalid coordinate system (SRID) - EXITING!\nValid values are 4283 (GDA94) and 7844 (GDA2020)"
        )
        exit()

    # add Postgres functions to clean out non-polygon geometries from GeometryCollections
    pg_cur.execute(
        geoscape.open_sql_file("create-polygon-intersection-function.sql",
                               settings).format(settings['srid']))
    pg_cur.execute(
        geoscape.open_sql_file("create-multi-linestring-split-function.sql",
                               settings))

    # let's build some clean localities!
    logger.info("")
    create_states_and_prep_localities(settings)
    get_split_localities(pg_cur, settings)
    verify_locality_polygons(pg_cur, settings)
    get_locality_state_border_gaps(pg_cur, settings)
    finalise_display_localities(pg_cur, settings)
    create_display_postcodes(pg_cur, settings)
    export_display_localities(pg_cur, settings)
    qa_display_localities(pg_cur, settings)

    pg_cur.close()
    pg_conn.close()

    logger.info("Total time : {0}".format(datetime.now() - full_start_time))

    return True
Example #17
0
def create_reference_tables(pg_cur):
    # set postgres search path back to the default
    pg_cur.execute("SET search_path = public, pg_catalog")

    # Step 1 of 14 : create reference tables
    start_time = datetime.now()
    pg_cur.execute(geoscape.open_sql_file("03-01-reference-create-tables.sql"))
    logger.info("\t- Step  1 of 14 : create reference tables : {0}".format(
        datetime.now() - start_time))

    # Step 2 of 14 : populate localities
    start_time = datetime.now()
    pg_cur.execute(
        geoscape.open_sql_file("03-02-reference-populate-localities.sql"))
    logger.info("\t- Step  2 of 14 : localities populated : {0}".format(
        datetime.now() - start_time))

    # Step 3 of 14 : populate locality aliases
    start_time = datetime.now()
    pg_cur.execute(
        geoscape.open_sql_file(
            "03-03-reference-populate-locality-aliases.sql"))
    logger.info("\t- Step  3 of 14 : locality aliases populated : {0}".format(
        datetime.now() - start_time))

    # Step 4 of 14 : populate locality neighbours
    start_time = datetime.now()
    pg_cur.execute(
        geoscape.open_sql_file(
            "03-04-reference-populate-locality-neighbours.sql"))
    logger.info(
        "\t- Step  4 of 14 : locality neighbours populated : {0}".format(
            datetime.now() - start_time))

    # Step 5 of 14 : populate streets
    start_time = datetime.now()
    pg_cur.execute(
        geoscape.open_sql_file("03-05-reference-populate-streets.sql"))
    logger.info(
        "\t- Step  5 of 14 : streets populated : {0}".format(datetime.now() -
                                                             start_time))

    # Step 6 of 14 : populate street aliases
    start_time = datetime.now()
    pg_cur.execute(
        geoscape.open_sql_file("03-06-reference-populate-street-aliases.sql"))
    logger.info("\t- Step  6 of 14 : street aliases populated : {0}".format(
        datetime.now() - start_time))

    # Step 7 of 14 : populate addresses, using multiprocessing
    start_time = datetime.now()
    sql = geoscape.open_sql_file("03-07-reference-populate-addresses-1.sql")
    sql_list = geoscape.split_sql_into_list(pg_cur, sql, settings.gnaf_schema,
                                            "streets", "str", "gid", logger)
    if sql_list is not None:
        geoscape.multiprocess_list("sql", sql_list, logger)
    pg_cur.execute(geoscape.prep_sql("ANALYZE gnaf.temp_addresses;"))
    logger.info(
        "\t- Step  7 of 14 : addresses populated : {0}".format(datetime.now() -
                                                               start_time))

    # Step 8 of 14 : populate principal alias lookup
    start_time = datetime.now()
    pg_cur.execute(
        geoscape.open_sql_file(
            "03-08-reference-populate-address-alias-lookup.sql"))
    logger.info(
        "\t- Step  8 of 14 : principal alias lookup populated : {0}".format(
            datetime.now() - start_time))

    # Step 9 of 14 : populate primary secondary lookup
    start_time = datetime.now()
    pg_cur.execute(
        geoscape.open_sql_file(
            "03-09-reference-populate-address-secondary-lookup.sql"))
    pg_cur.execute(
        geoscape.prep_sql("VACUUM ANALYSE gnaf.address_secondary_lookup"))
    logger.info(
        "\t- Step  9 of 14 : primary secondary lookup populated : {0}".format(
            datetime.now() - start_time))

    # Step 10 of 14 : split the Melbourne locality into its 2 postcodes (3000, 3004)
    start_time = datetime.now()
    pg_cur.execute(
        geoscape.open_sql_file("03-10-reference-split-melbourne.sql"))
    logger.info(
        "\t- Step 10 of 14 : Melbourne split : {0}".format(datetime.now() -
                                                           start_time))

    # Step 11 of 14 : finalise localities assigned to streets and addresses
    start_time = datetime.now()
    pg_cur.execute(
        geoscape.open_sql_file("03-11-reference-finalise-localities.sql"))
    logger.info("\t- Step 11 of 14 : localities finalised : {0}".format(
        datetime.now() - start_time))

    # Step 12 of 14 : finalise addresses, using multiprocessing
    start_time = datetime.now()
    sql = geoscape.open_sql_file("03-12-reference-populate-addresses-2.sql")
    sql_list = geoscape.split_sql_into_list(pg_cur, sql, settings.gnaf_schema,
                                            "localities", "loc", "gid", logger)
    if sql_list is not None:
        geoscape.multiprocess_list("sql", sql_list, logger)

    # turf the temp address table
    pg_cur.execute(
        geoscape.prep_sql("DROP TABLE IF EXISTS gnaf.temp_addresses"))
    logger.info(
        "\t- Step 12 of 14 : addresses finalised : {0}".format(datetime.now() -
                                                               start_time))

    # Step 13 of 14 : create almost correct postcode boundaries by aggregating localities, using multiprocessing
    start_time = datetime.now()
    sql = geoscape.open_sql_file("03-13-reference-derived-postcode-bdys.sql")
    sql_list = []
    for state in settings.states_to_load:
        state_sql = sql.replace("GROUP BY ",
                                "WHERE state = '{0}' GROUP BY ".format(state))
        sql_list.append(state_sql)
    geoscape.multiprocess_list("sql", sql_list, logger)

    # create analysis table?
    if settings.st_subdivide_supported:
        pg_cur.execute(
            geoscape.open_sql_file(
                "03-13a-create-postcode-analysis-table.sql"))

    logger.info("\t- Step 13 of 14 : postcode boundaries created : {0}".format(
        datetime.now() - start_time))

    # Step 14 of 14 : create indexes, primary and foreign keys, using multiprocessing
    start_time = datetime.now()
    raw_sql_list = geoscape.open_sql_file(
        "03-14-reference-create-indexes.sql").split("\n")
    sql_list = []
    for sql in raw_sql_list:
        if sql[0:2] != "--" and sql[0:2] != "":
            sql_list.append(sql)
    geoscape.multiprocess_list("sql", sql_list, logger)
    logger.info(
        "\t- Step 14 of 14 : create primary & foreign keys and indexes : {0}".
        format(datetime.now() - start_time))
Example #18
0
def boundary_tag_gnaf(pg_cur):

    # create bdy table list
    # remove localities, postcodes and states as these IDs are already assigned to GNAF addresses
    table_list = list()
    for table in settings.admin_bdy_list:
        if table[0] not in ["locality_bdys", "postcode_bdys", "state_bdys"]:
            # if no analysis tables created - use the full tables instead of the subdivided ones
            # WARNING: this can add hours to the processing
            if settings.st_subdivide_supported:
                table_name = "{}_analysis".format(table[0], )
            else:
                table_name = table[0]

            table_list.append([table_name, table[1]])

    # create bdy tagged address tables
    for address_table in ["address_principal", "address_alias"]:
        pg_cur.execute(
            "DROP TABLE IF EXISTS {}.{}_admin_boundaries CASCADE".format(
                settings.gnaf_schema, address_table))
        create_table_list = list()
        create_table_list.append(
            "CREATE TABLE {}.{}_admin_boundaries (gid serial NOT NULL,"
            "gnaf_pid text NOT NULL,"
            # "alias_principal character(1) NOT NULL,"
            "locality_pid text NOT NULL,"
            "old_locality_pid text NULL,"
            "locality_name text NOT NULL,"
            "postcode text,"
            "state text NOT NULL".format(settings.gnaf_schema, address_table))

        for table in table_list:
            pid_field = table[1]
            name_field = pid_field.replace("_pid", "_name")
            create_table_list.append(", {} text, {} text".format(
                pid_field, name_field))
        create_table_list.append(
            ") WITH (OIDS=FALSE);ALTER TABLE {}.{}_admin_boundaries OWNER TO {}"
            .format(settings.gnaf_schema, address_table, settings.pg_user))
        pg_cur.execute("".join(create_table_list))

    # Step 1 of 7 : tag gnaf addresses with admin boundary IDs, using multiprocessing
    start_time = datetime.now()

    # create temp tables
    template_sql = geoscape.open_sql_file(
        "04-01a-bdy-tag-create-table-template.sql")
    for table in table_list:
        pg_cur.execute(template_sql.format(table[0], ))

    # create temp tables of bdy tagged gnaf_pids
    template_sql = geoscape.open_sql_file("04-01b-bdy-tag-template.sql")
    sql_list = list()
    for table in table_list:
        sql = template_sql.format(table[0], table[1])

        short_sql_list = geoscape.split_sql_into_list(
            pg_cur, sql, settings.admin_bdys_schema, table[0], "bdys", "gid",
            logger)

        if short_sql_list is not None:
            sql_list.extend(short_sql_list)

    # logger.info("\n".join(sql_list))

    if sql_list is not None:
        geoscape.multiprocess_list("sql", sql_list, logger)

    logger.info(
        "\t- Step 1 of 7 : principal addresses tagged with admin boundary IDs: {}"
        .format(datetime.now() - start_time, ))
    start_time = datetime.now()

    # Step 2 of 7 : delete invalid matches, create indexes and analyse tables
    sql_list = list()
    for table in table_list:
        sql = "DELETE FROM {0}.temp_{1}_tags WHERE gnaf_state <> bdy_state AND gnaf_state <> 'OT';" \
              "CREATE INDEX temp_{1}_tags_gnaf_pid_idx ON {0}.temp_{1}_tags USING btree(gnaf_pid);" \
              "ANALYZE {0}.temp_{1}_tags".format(settings.gnaf_schema, table[0])
        sql_list.append(sql)
    geoscape.multiprocess_list("sql", sql_list, logger)

    logger.info(
        "\t- Step 2 of 7 : principal addresses - invalid matches deleted & bdy tag indexes created : {}"
        .format(datetime.now() - start_time, ))
    start_time = datetime.now()

    # Step 3 of 7 : insert boundary tagged addresses

    # create insert statement for multiprocessing
    insert_field_list = list()
    insert_field_list.append(
        "(gnaf_pid, locality_pid, old_locality_pid, locality_name, postcode, state"
    )

    insert_join_list = list()
    insert_join_list.append("FROM {}.address_principals AS pnts ".format(
        settings.gnaf_schema, ))

    select_field_list = list()
    select_field_list.append(
        "SELECT pnts.gnaf_pid, pnts.locality_pid, pnts.old_locality_pid, "
        "pnts.locality_name, pnts.postcode, pnts.state")

    drop_table_list = list()

    for table in table_list:
        pid_field = table[1]
        name_field = pid_field.replace("_pid", "_name")
        insert_field_list.append(", {0}, {1}".format(pid_field, name_field))
        select_field_list.append(
            ", temp_{0}_tags.bdy_pid, temp_{0}_tags.bdy_name ".format(
                table[0]))
        insert_join_list.append(
            "LEFT OUTER JOIN {0}.temp_{1}_tags ON pnts.gnaf_pid = temp_{1}_tags.gnaf_pid "
            .format(settings.gnaf_schema, table[0]))
        drop_table_list.append(
            "DROP TABLE IF EXISTS {0}.temp_{1}_tags;".format(
                settings.gnaf_schema, table[0]))

    insert_field_list.append(") ")

    insert_statement_list = list()
    insert_statement_list.append(
        "INSERT INTO {0}.address_principal_admin_boundaries ".format(
            settings.gnaf_schema, ))
    insert_statement_list.append("".join(insert_field_list))
    insert_statement_list.append("".join(select_field_list))
    insert_statement_list.append("".join(insert_join_list))

    sql = "".join(insert_statement_list) + ";"
    sql_list = geoscape.split_sql_into_list(pg_cur, sql, settings.gnaf_schema,
                                            "address_principals", "pnts",
                                            "gid", logger)
    # logger.info("\n".join(sql_list)

    if sql_list is not None:
        geoscape.multiprocess_list("sql", sql_list, logger)

    # drop temp tables
    pg_cur.execute("".join(drop_table_list))

    # get stats
    pg_cur.execute("ANALYZE {0}.address_principal_admin_boundaries ".format(
        settings.gnaf_schema))

    logger.info(
        "\t- Step 3 of 7 : principal addresses - bdy tags added to output table : {}"
        .format(datetime.now() - start_time, ))

    start_time = datetime.now()

    # Step 4 of 7 : add index to output table
    sql = "CREATE INDEX address_principal_admin_boundaries_gnaf_pid_idx " \
          "ON {0}.address_principal_admin_boundaries USING btree (gnaf_pid)"\
        .format(settings.gnaf_schema)
    pg_cur.execute(sql)

    logger.info(
        "\t- Step 4 of 7 : created index on bdy tagged address table : {}".
        format(datetime.now() - start_time, ))
    start_time = datetime.now()

    # Step 5 of 7 : log duplicates - happens when 2 boundaries overlap by a very small amount
    # (can be ignored if there's a small number of records affected)
    sql = "SELECT gnaf_pid FROM (SELECT Count(*) AS cnt, gnaf_pid FROM {0}.address_principal_admin_boundaries " \
          "GROUP BY gnaf_pid) AS sqt WHERE cnt > 1".format(settings.gnaf_schema)
    pg_cur.execute(sql)

    # get cursor description to test if any rows returned safely
    columns = pg_cur.description

    # log gnaf_pids that got duplicate results
    if columns is not None:
        duplicates = pg_cur.fetchall()
        gnaf_pids = list()

        for duplicate in duplicates:
            gnaf_pids.append("\t\t" + duplicate[0])

        if len(gnaf_pids) > 0:
            logger.warning(
                "\t- Step 5 of 7 : found boundary tag duplicates : {}".format(
                    datetime.now() - start_time, ))
            logger.warning("\n".join(gnaf_pids))
        else:
            logger.info(
                "\t- Step 5 of 7 : no boundary tag duplicates : {}".format(
                    datetime.now() - start_time, ))
    else:
        logger.info("\t- Step 5 of 7 : no boundary tag duplicates : {}".format(
            datetime.now() - start_time, ))

    # Step 6 of 7 : Copy principal boundary tags to alias addresses
    pg_cur.execute(
        geoscape.open_sql_file("04-06-bdy-tags-for-alias-addresses.sql"))
    logger.info(
        "\t- Step 6 of 7 : alias addresses boundary tagged : {}".format(
            datetime.now() - start_time, ))
    start_time = datetime.now()

    # Step 7 of 7 : Create view of all bdy tags
    pg_cur.execute(geoscape.open_sql_file("04-07-create-bdy-tag-view.sql"))
    logger.info(
        "\t- Step 7 of 7 : boundary tagged address view created : {}".format(
            datetime.now() - start_time, ))
Example #19
0
def export_display_localities(pg_cur, settings):
    start_time = datetime.now()

    # create export path
    pathlib.Path(settings['output_path']).mkdir(parents=True, exist_ok=True)

    sql = geoscape.open_sql_file("07-export-display-localities.sql", settings)

    if platform.system() == "Windows":
        password_str = "SET"
    else:
        password_str = "export"

    password_str += " PGPASSWORD={0}&&".format(settings['pg_password'])

    cmd = password_str + "pgsql2shp -f \"{0}\" -u {1} -h {2} -p {3} {4} \"{5}\""\
        .format(settings['shapefile_export_path'], settings['pg_user'], settings['pg_host'],
                settings['pg_port'], settings['pg_db'], sql)

    # logger.info(cmd
    geoscape.run_command_line(cmd)

    # zip shapefile
    if settings['srid'] == 4283:
        shp_zip_path = settings['shapefile_name'] + "-shapefile.zip"
    else:
        shp_zip_path = settings['shapefile_name'] + "-gda2020-shapefile.zip"

    output_zipfile = os.path.join(settings['output_path'], shp_zip_path)
    zf = zipfile.ZipFile(output_zipfile, mode="w")

    for ext in settings['shapefile_extensions']:
        file_name = settings['shapefile_name'] + ext
        file_path = os.path.join(settings['output_path'], file_name)
        zf.write(file_path, file_name, compress_type=zipfile.ZIP_DEFLATED)

    zf.close()

    time_elapsed = datetime.now() - start_time

    logger.info(
        "\t- Step 7 of 8 : display localities exported to SHP : {0}".format(
            time_elapsed))
    if time_elapsed.seconds < 2:
        logger.warning(
            "\t\t- This step took < 2 seconds - it may have failed silently. "
            "Check your output directory!")

    start_time = datetime.now()

    # Export as GeoJSON FeatureCollection
    sql = geoscape.prep_sql(
        "SELECT gid, locality_pid, locality_name, COALESCE(postcode, '') AS postcode, state, "
        "locality_class, address_count, street_count, ST_AsGeoJSON(geom, 5, 0) AS geom "
        "FROM {0}.locality_bdys_display".format(settings['admin_bdys_schema']),
        settings)
    pg_cur.execute(sql)

    # Create the GeoJSON output with an array of dictionaries containing the field names and values

    # get column names from cursor
    column_names = [desc[0] for desc in pg_cur.description]

    json_dicts = []
    row = pg_cur.fetchone()

    if row is not None:
        while row is not None:
            rec = {}
            props = {}
            i = 0
            rec["type"] = "Feature"

            for column in column_names:
                if column == "geometry" or column == "geom":
                    rec["geometry"] = row[i]
                else:
                    props[column] = row[i]

                i += 1

            rec["properties"] = props
            json_dicts.append(rec)
            row = pg_cur.fetchone()

    gj = json.dumps(json_dicts).replace("\\",
                                        "").replace('"{',
                                                    '{').replace('}"', '}')

    geojson = ''.join(['{"type":"FeatureCollection","features":', gj, '}'])

    text_file = open(settings['geojson_export_path'], "w")
    text_file.write(geojson)
    text_file.close()

    # compress GeoJSON
    if settings['srid'] == 4283:
        geojson_zip_path = settings['geojson_export_path'].replace(
            ".geojson", "-geojson.zip")
    else:
        geojson_zip_path = settings['geojson_export_path'].replace(
            ".geojson", "-gda2020-geojson.zip")

    zipfile.ZipFile(geojson_zip_path, mode="w")\
        .write(settings['geojson_export_path'], compress_type=zipfile.ZIP_DEFLATED)

    logger.info(
        "\t- Step 7 of 8 : display localities exported to GeoJSON : {0}".
        format(datetime.now() - start_time))