예제 #1
0
def create_admin_bdys_for_analysis():
    # Step 3 of 3 : create admin bdy tables optimised for spatial analysis
    start_time = datetime.now()

    if settings.st_subdivide_supported:
        template_sql = geoscape.open_sql_file(
            "02-03-create-admin-bdy-analysis-tables_template.sql")
        sql_list = list()

        for table in settings.admin_bdy_list:
            sql = template_sql.format(table[0], table[1])
            if table[
                    0] == "locality_bdys":  # special case, need to change schema name
                # sql = sql.replace(settings.raw_admin_bdys_schema, settings.admin_bdys_schema)
                sql = sql.replace("name", "locality_name")
                # add postcodes
                sql = sql.replace(
                    "locality_name text NOT NULL,",
                    "old_locality_pid text NULL, locality_name text NOT NULL, postcode text NULL,"
                )
                sql = sql.replace(
                    "locality_name,",
                    "old_locality_pid, locality_name, postcode,")

            sql_list.append(sql)
        geoscape.multiprocess_list("sql", sql_list, logger)
        logger.info(
            "\t- Step 3 of 3 : admin boundaries for analysis created : {0}".
            format(datetime.now() - start_time))
    else:
        logger.warning(
            "\t- Step 3 of 3 : admin boundaries for analysis NOT created - "
            "requires PostGIS 2.2+ with GEOS 3.5.0+")
예제 #2
0
def populate_raw_gnaf(pg_cur):
    # Step 4 of 7 : load raw gnaf authority & state tables
    start_time = datetime.now()

    # authority code file list
    sql_list = get_raw_gnaf_files("authority_code")

    # add state file lists
    for state in settings.states_to_load:
        logger.info(f"\t\t- Loading state {state}")
        sql_list.extend(get_raw_gnaf_files(state))

    # are there any files to load?
    if len(sql_list) == 0:
        logger.fatal(
            "No raw GNAF PSV files found\nACTION: Check your 'gnaf_network_directory' path"
        )
        logger.fatal("\t- Step 4 of 7 : table populate FAILED!")
    else:
        # load all PSV files using multiprocessing
        geoscape.multiprocess_list("sql", sql_list, logger)

        # fix missing geocodes (added due to missing data in 202202 release)
        sql = geoscape.open_sql_file("01-04-raw-gnaf-fix-missing-geocodes.sql")
        pg_cur.execute(sql)

        logger.info(
            f"\t- Step 4 of 7 : tables populated : {datetime.now() - start_time}"
        )
        logger.info("\t\t- fixed missing geocodes")
예제 #3
0
def get_locality_state_border_gaps(pg_cur, settings):
    start_time = datetime.now()
    sql = geoscape.open_sql_file("04-create-holes-along-borders.sql", settings)
    sql_list = geoscape.split_sql_into_list(
        pg_cur, sql, settings['admin_bdys_schema'],
        "temp_state_border_buffers_subdivided", "ste", "new_gid", settings,
        logger)
    geoscape.multiprocess_list("sql", sql_list, settings, logger)
    logger.info("\t- Step 4 of 8 : locality holes created : {0}".format(
        datetime.now() - start_time))
예제 #4
0
def get_split_localities(pg_cur, settings):
    start_time = datetime.now()
    sql = geoscape.open_sql_file("02-split-localities-by-state-borders.sql",
                                 settings)
    sql_list = geoscape.split_sql_into_list(pg_cur, sql,
                                            settings['admin_bdys_schema'],
                                            "temp_localities", "loc", "gid",
                                            settings, logger)
    geoscape.multiprocess_list("sql", sql_list, settings, logger)
    logger.info("\t- Step 2 of 8 : localities split by state : {0}".format(
        datetime.now() - start_time))
예제 #5
0
def create_states_and_prep_localities(settings):
    start_time = datetime.now()
    sql_list = [
        geoscape.open_sql_file("01a-create-states-from-sa4s.sql",
                               settings).format(settings['srid']),
        geoscape.open_sql_file("01b-prep-locality-boundaries.sql",
                               settings).format(settings['srid'])
    ]
    geoscape.multiprocess_list("sql", sql_list, settings, logger)
    logger.info(
        "\t- Step 1 of 8 : state table created & localities prepped : {0}".
        format(datetime.now() - start_time))
예제 #6
0
def prep_admin_bdys(pg_cur):
    # Step 3 of 4 : create admin bdy tables read to be used
    start_time = datetime.now()

    # create tables using multiprocessing - using flag in file to split file up into sets of statements
    sql_list = geoscape.open_sql_file(
        "02-02a-prep-admin-bdys-tables.sql").split("-- # --")
    sql_list = sql_list + geoscape.open_sql_file(
        "02-02b-prep-census-2011-bdys-tables.sql").split("-- # --")
    sql_list = sql_list + geoscape.open_sql_file(
        "02-02c-prep-census-2016-bdys-tables.sql").split("-- # --")
    sql_list = sql_list + geoscape.open_sql_file(
        "02-02d-prep-census-2021-bdys-tables.sql").split("-- # --")

    # # Account for bdys that are not in states to load - not yet working
    # for sql in sql_list:
    #     if settings.states_to_load == ["OT"] and ".commonwealth_electorates " in sql:
    #         sql_list.remove(sql)
    #
    #     if settings.states_to_load == ["ACT"] and ".local_government_areas " in sql:
    #         sql_list.remove(sql)
    #
    #     logger.info(settings.states_to_load
    #
    #     if not ("NT" in settings.states_to_load or "SA" in settings.states_to_load
    #             or "VIC" in settings.states_to_load or "WA" in settings.states_to_load) \
    #             and ".local_government_wards " in sql:
    #         sql_list.remove(sql)
    #
    #     if settings.states_to_load == ["OT"] and ".state_lower_house_electorates " in sql:
    #         sql_list.remove(sql)
    #
    #     if not ("TAS" in settings.states_to_load or "VIC" in settings.states_to_load
    #             or "WA" in settings.states_to_load) and ".state_upper_house_electorates " in sql:
    #         sql_list.remove(sql)

    geoscape.multiprocess_list("sql", sql_list, logger)

    # Special case - remove custom outback bdy if South Australia not requested
    if "SA" not in settings.states_to_load:
        pg_cur.execute(
            geoscape.prep_sql(
                "DELETE FROM admin_bdys.locality_bdys WHERE locality_pid = 'SA999999'"
            ))
        pg_cur.execute(
            geoscape.prep_sql("VACUUM ANALYZE admin_bdys.locality_bdys"))

    logger.info("\t- Step 2 of 3 : admin boundaries prepped : {0}".format(
        datetime.now() - start_time))
예제 #7
0
def index_raw_gnaf(pg_cur):
    # Step 5 of 7 : create indexes
    start_time = datetime.now()

    raw_sql_list = geoscape.open_sql_file(
        "01-05-raw-gnaf-create-indexes.sql").split("\n")
    sql_list = []
    for sql in raw_sql_list:
        if sql[0:2] != "--" and sql[0:2] != "":
            sql_list.append(sql)

    geoscape.multiprocess_list("sql", sql_list, logger)

    # # create distinct new & old locality pid lookup table
    # pg_cur.execute(geoscape.open_sql_file("01-05b-create-distinct-locality-pid-linkage-table.sql"))

    logger.info(
        f"\t- Step 5 of 7 : indexes created : {datetime.now() - start_time}")
예제 #8
0
def analyse_raw_gnaf_tables(pg_cur):
    start_time = datetime.now()

    # get list of tables that haven't been analysed (i.e. that have no real row count)
    sql = f"""SELECT nspname|| '.' || relname AS table_name 
              FROM pg_class C LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace)
              WHERE nspname = '{settings.raw_gnaf_schema}' AND relkind='r' AND reltuples = 0"""
    pg_cur.execute(sql)

    sql_list = []

    for pg_row in pg_cur:
        sql_list.append(f"ANALYZE {pg_row[0]}")

    # run queries in separate processes
    geoscape.multiprocess_list("sql", sql_list, logger)

    logger.info(
        f"\t- Step 7 of 7 : tables analysed : {datetime.now() - start_time}")
예제 #9
0
def create_primary_foreign_keys():
    start_time = datetime.now()

    key_sql = geoscape.open_sql_file(
        "01-06-raw-gnaf-create-primary-foreign-keys.sql")
    key_sql_list = key_sql.split("--")
    sql_list = []

    for sql in key_sql_list:
        sql = sql.strip()
        if sql[0:6] == "ALTER ":
            # add schema to tables names, in case raw gnaf schema not the default
            sql = sql.replace(f"ALTER TABLE ONLY ",
                              "ALTER TABLE ONLY {settings.raw_gnaf_schema}.")
            sql_list.append(sql)

    # run queries in separate processes
    geoscape.multiprocess_list("sql", sql_list, logger)

    logger.info(
        f"\t- Step 6 of 7 : primary & foreign keys created : {datetime.now() - start_time}"
    )
예제 #10
0
def populate_raw_gnaf():
    # Step 4 of 7 : load raw gnaf authority & state tables
    start_time = datetime.now()

    # authority code file list
    sql_list = get_raw_gnaf_files("authority_code")

    # add state file lists
    for state in settings.states_to_load:
        logger.info("\t\t- Loading state {}".format(state))
        sql_list.extend(get_raw_gnaf_files(state))

    # are there any files to load?
    if len(sql_list) == 0:
        logger.fatal(
            "No raw GNAF PSV files found\nACTION: Check your 'gnaf_network_directory' path"
        )
        logger.fatal("\t- Step 4 of 7 : table populate FAILED!")
    else:
        # load all PSV files using multiprocessing
        geoscape.multiprocess_list("sql", sql_list, logger)
        logger.info(
            "\t- Step 4 of 7 : tables populated : {0}".format(datetime.now() -
                                                              start_time))
예제 #11
0
def boundary_tag_gnaf(pg_cur):

    # create bdy table list
    # remove localities, postcodes and states as these IDs are already assigned to GNAF addresses
    table_list = list()
    for table in settings.admin_bdy_list:
        if table[0] not in ["locality_bdys", "postcode_bdys", "state_bdys"]:
            # if no analysis tables created - use the full tables instead of the subdivided ones
            # WARNING: this can add hours to the processing
            if settings.st_subdivide_supported:
                table_name = "{}_analysis".format(table[0], )
            else:
                table_name = table[0]

            table_list.append([table_name, table[1]])

    # create bdy tagged address tables
    for address_table in ["address_principal", "address_alias"]:
        pg_cur.execute(
            "DROP TABLE IF EXISTS {}.{}_admin_boundaries CASCADE".format(
                settings.gnaf_schema, address_table))
        create_table_list = list()
        create_table_list.append(
            "CREATE TABLE {}.{}_admin_boundaries (gid serial NOT NULL,"
            "gnaf_pid text NOT NULL,"
            # "alias_principal character(1) NOT NULL,"
            "locality_pid text NOT NULL,"
            "old_locality_pid text NULL,"
            "locality_name text NOT NULL,"
            "postcode text,"
            "state text NOT NULL".format(settings.gnaf_schema, address_table))

        for table in table_list:
            pid_field = table[1]
            name_field = pid_field.replace("_pid", "_name")
            create_table_list.append(", {} text, {} text".format(
                pid_field, name_field))
        create_table_list.append(
            ") WITH (OIDS=FALSE);ALTER TABLE {}.{}_admin_boundaries OWNER TO {}"
            .format(settings.gnaf_schema, address_table, settings.pg_user))
        pg_cur.execute("".join(create_table_list))

    # Step 1 of 7 : tag gnaf addresses with admin boundary IDs, using multiprocessing
    start_time = datetime.now()

    # create temp tables
    template_sql = geoscape.open_sql_file(
        "04-01a-bdy-tag-create-table-template.sql")
    for table in table_list:
        pg_cur.execute(template_sql.format(table[0], ))

    # create temp tables of bdy tagged gnaf_pids
    template_sql = geoscape.open_sql_file("04-01b-bdy-tag-template.sql")
    sql_list = list()
    for table in table_list:
        sql = template_sql.format(table[0], table[1])

        short_sql_list = geoscape.split_sql_into_list(
            pg_cur, sql, settings.admin_bdys_schema, table[0], "bdys", "gid",
            logger)

        if short_sql_list is not None:
            sql_list.extend(short_sql_list)

    # logger.info("\n".join(sql_list))

    if sql_list is not None:
        geoscape.multiprocess_list("sql", sql_list, logger)

    logger.info(
        "\t- Step 1 of 7 : principal addresses tagged with admin boundary IDs: {}"
        .format(datetime.now() - start_time, ))
    start_time = datetime.now()

    # Step 2 of 7 : delete invalid matches, create indexes and analyse tables
    sql_list = list()
    for table in table_list:
        sql = "DELETE FROM {0}.temp_{1}_tags WHERE gnaf_state <> bdy_state AND gnaf_state <> 'OT';" \
              "CREATE INDEX temp_{1}_tags_gnaf_pid_idx ON {0}.temp_{1}_tags USING btree(gnaf_pid);" \
              "ANALYZE {0}.temp_{1}_tags".format(settings.gnaf_schema, table[0])
        sql_list.append(sql)
    geoscape.multiprocess_list("sql", sql_list, logger)

    logger.info(
        "\t- Step 2 of 7 : principal addresses - invalid matches deleted & bdy tag indexes created : {}"
        .format(datetime.now() - start_time, ))
    start_time = datetime.now()

    # Step 3 of 7 : insert boundary tagged addresses

    # create insert statement for multiprocessing
    insert_field_list = list()
    insert_field_list.append(
        "(gnaf_pid, locality_pid, old_locality_pid, locality_name, postcode, state"
    )

    insert_join_list = list()
    insert_join_list.append("FROM {}.address_principals AS pnts ".format(
        settings.gnaf_schema, ))

    select_field_list = list()
    select_field_list.append(
        "SELECT pnts.gnaf_pid, pnts.locality_pid, pnts.old_locality_pid, "
        "pnts.locality_name, pnts.postcode, pnts.state")

    drop_table_list = list()

    for table in table_list:
        pid_field = table[1]
        name_field = pid_field.replace("_pid", "_name")
        insert_field_list.append(", {0}, {1}".format(pid_field, name_field))
        select_field_list.append(
            ", temp_{0}_tags.bdy_pid, temp_{0}_tags.bdy_name ".format(
                table[0]))
        insert_join_list.append(
            "LEFT OUTER JOIN {0}.temp_{1}_tags ON pnts.gnaf_pid = temp_{1}_tags.gnaf_pid "
            .format(settings.gnaf_schema, table[0]))
        drop_table_list.append(
            "DROP TABLE IF EXISTS {0}.temp_{1}_tags;".format(
                settings.gnaf_schema, table[0]))

    insert_field_list.append(") ")

    insert_statement_list = list()
    insert_statement_list.append(
        "INSERT INTO {0}.address_principal_admin_boundaries ".format(
            settings.gnaf_schema, ))
    insert_statement_list.append("".join(insert_field_list))
    insert_statement_list.append("".join(select_field_list))
    insert_statement_list.append("".join(insert_join_list))

    sql = "".join(insert_statement_list) + ";"
    sql_list = geoscape.split_sql_into_list(pg_cur, sql, settings.gnaf_schema,
                                            "address_principals", "pnts",
                                            "gid", logger)
    # logger.info("\n".join(sql_list)

    if sql_list is not None:
        geoscape.multiprocess_list("sql", sql_list, logger)

    # drop temp tables
    pg_cur.execute("".join(drop_table_list))

    # get stats
    pg_cur.execute("ANALYZE {0}.address_principal_admin_boundaries ".format(
        settings.gnaf_schema))

    logger.info(
        "\t- Step 3 of 7 : principal addresses - bdy tags added to output table : {}"
        .format(datetime.now() - start_time, ))

    start_time = datetime.now()

    # Step 4 of 7 : add index to output table
    sql = "CREATE INDEX address_principal_admin_boundaries_gnaf_pid_idx " \
          "ON {0}.address_principal_admin_boundaries USING btree (gnaf_pid)"\
        .format(settings.gnaf_schema)
    pg_cur.execute(sql)

    logger.info(
        "\t- Step 4 of 7 : created index on bdy tagged address table : {}".
        format(datetime.now() - start_time, ))
    start_time = datetime.now()

    # Step 5 of 7 : log duplicates - happens when 2 boundaries overlap by a very small amount
    # (can be ignored if there's a small number of records affected)
    sql = "SELECT gnaf_pid FROM (SELECT Count(*) AS cnt, gnaf_pid FROM {0}.address_principal_admin_boundaries " \
          "GROUP BY gnaf_pid) AS sqt WHERE cnt > 1".format(settings.gnaf_schema)
    pg_cur.execute(sql)

    # get cursor description to test if any rows returned safely
    columns = pg_cur.description

    # log gnaf_pids that got duplicate results
    if columns is not None:
        duplicates = pg_cur.fetchall()
        gnaf_pids = list()

        for duplicate in duplicates:
            gnaf_pids.append("\t\t" + duplicate[0])

        if len(gnaf_pids) > 0:
            logger.warning(
                "\t- Step 5 of 7 : found boundary tag duplicates : {}".format(
                    datetime.now() - start_time, ))
            logger.warning("\n".join(gnaf_pids))
        else:
            logger.info(
                "\t- Step 5 of 7 : no boundary tag duplicates : {}".format(
                    datetime.now() - start_time, ))
    else:
        logger.info("\t- Step 5 of 7 : no boundary tag duplicates : {}".format(
            datetime.now() - start_time, ))

    # Step 6 of 7 : Copy principal boundary tags to alias addresses
    pg_cur.execute(
        geoscape.open_sql_file("04-06-bdy-tags-for-alias-addresses.sql"))
    logger.info(
        "\t- Step 6 of 7 : alias addresses boundary tagged : {}".format(
            datetime.now() - start_time, ))
    start_time = datetime.now()

    # Step 7 of 7 : Create view of all bdy tags
    pg_cur.execute(geoscape.open_sql_file("04-07-create-bdy-tag-view.sql"))
    logger.info(
        "\t- Step 7 of 7 : boundary tagged address view created : {}".format(
            datetime.now() - start_time, ))
예제 #12
0
def create_reference_tables(pg_cur):
    # set postgres search path back to the default
    pg_cur.execute("SET search_path = public, pg_catalog")

    # Step 1 of 14 : create reference tables
    start_time = datetime.now()
    pg_cur.execute(geoscape.open_sql_file("03-01-reference-create-tables.sql"))
    logger.info("\t- Step  1 of 14 : create reference tables : {0}".format(
        datetime.now() - start_time))

    # Step 2 of 14 : populate localities
    start_time = datetime.now()
    pg_cur.execute(
        geoscape.open_sql_file("03-02-reference-populate-localities.sql"))
    logger.info("\t- Step  2 of 14 : localities populated : {0}".format(
        datetime.now() - start_time))

    # Step 3 of 14 : populate locality aliases
    start_time = datetime.now()
    pg_cur.execute(
        geoscape.open_sql_file(
            "03-03-reference-populate-locality-aliases.sql"))
    logger.info("\t- Step  3 of 14 : locality aliases populated : {0}".format(
        datetime.now() - start_time))

    # Step 4 of 14 : populate locality neighbours
    start_time = datetime.now()
    pg_cur.execute(
        geoscape.open_sql_file(
            "03-04-reference-populate-locality-neighbours.sql"))
    logger.info(
        "\t- Step  4 of 14 : locality neighbours populated : {0}".format(
            datetime.now() - start_time))

    # Step 5 of 14 : populate streets
    start_time = datetime.now()
    pg_cur.execute(
        geoscape.open_sql_file("03-05-reference-populate-streets.sql"))
    logger.info(
        "\t- Step  5 of 14 : streets populated : {0}".format(datetime.now() -
                                                             start_time))

    # Step 6 of 14 : populate street aliases
    start_time = datetime.now()
    pg_cur.execute(
        geoscape.open_sql_file("03-06-reference-populate-street-aliases.sql"))
    logger.info("\t- Step  6 of 14 : street aliases populated : {0}".format(
        datetime.now() - start_time))

    # Step 7 of 14 : populate addresses, using multiprocessing
    start_time = datetime.now()
    sql = geoscape.open_sql_file("03-07-reference-populate-addresses-1.sql")
    sql_list = geoscape.split_sql_into_list(pg_cur, sql, settings.gnaf_schema,
                                            "streets", "str", "gid", logger)
    if sql_list is not None:
        geoscape.multiprocess_list("sql", sql_list, logger)
    pg_cur.execute(geoscape.prep_sql("ANALYZE gnaf.temp_addresses;"))
    logger.info(
        "\t- Step  7 of 14 : addresses populated : {0}".format(datetime.now() -
                                                               start_time))

    # Step 8 of 14 : populate principal alias lookup
    start_time = datetime.now()
    pg_cur.execute(
        geoscape.open_sql_file(
            "03-08-reference-populate-address-alias-lookup.sql"))
    logger.info(
        "\t- Step  8 of 14 : principal alias lookup populated : {0}".format(
            datetime.now() - start_time))

    # Step 9 of 14 : populate primary secondary lookup
    start_time = datetime.now()
    pg_cur.execute(
        geoscape.open_sql_file(
            "03-09-reference-populate-address-secondary-lookup.sql"))
    pg_cur.execute(
        geoscape.prep_sql("VACUUM ANALYSE gnaf.address_secondary_lookup"))
    logger.info(
        "\t- Step  9 of 14 : primary secondary lookup populated : {0}".format(
            datetime.now() - start_time))

    # Step 10 of 14 : split the Melbourne locality into its 2 postcodes (3000, 3004)
    start_time = datetime.now()
    pg_cur.execute(
        geoscape.open_sql_file("03-10-reference-split-melbourne.sql"))
    logger.info(
        "\t- Step 10 of 14 : Melbourne split : {0}".format(datetime.now() -
                                                           start_time))

    # Step 11 of 14 : finalise localities assigned to streets and addresses
    start_time = datetime.now()
    pg_cur.execute(
        geoscape.open_sql_file("03-11-reference-finalise-localities.sql"))
    logger.info("\t- Step 11 of 14 : localities finalised : {0}".format(
        datetime.now() - start_time))

    # Step 12 of 14 : finalise addresses, using multiprocessing
    start_time = datetime.now()
    sql = geoscape.open_sql_file("03-12-reference-populate-addresses-2.sql")
    sql_list = geoscape.split_sql_into_list(pg_cur, sql, settings.gnaf_schema,
                                            "localities", "loc", "gid", logger)
    if sql_list is not None:
        geoscape.multiprocess_list("sql", sql_list, logger)

    # turf the temp address table
    pg_cur.execute(
        geoscape.prep_sql("DROP TABLE IF EXISTS gnaf.temp_addresses"))
    logger.info(
        "\t- Step 12 of 14 : addresses finalised : {0}".format(datetime.now() -
                                                               start_time))

    # Step 13 of 14 : create almost correct postcode boundaries by aggregating localities, using multiprocessing
    start_time = datetime.now()
    sql = geoscape.open_sql_file("03-13-reference-derived-postcode-bdys.sql")
    sql_list = []
    for state in settings.states_to_load:
        state_sql = sql.replace("GROUP BY ",
                                "WHERE state = '{0}' GROUP BY ".format(state))
        sql_list.append(state_sql)
    geoscape.multiprocess_list("sql", sql_list, logger)

    # create analysis table?
    if settings.st_subdivide_supported:
        pg_cur.execute(
            geoscape.open_sql_file(
                "03-13a-create-postcode-analysis-table.sql"))

    logger.info("\t- Step 13 of 14 : postcode boundaries created : {0}".format(
        datetime.now() - start_time))

    # Step 14 of 14 : create indexes, primary and foreign keys, using multiprocessing
    start_time = datetime.now()
    raw_sql_list = geoscape.open_sql_file(
        "03-14-reference-create-indexes.sql").split("\n")
    sql_list = []
    for sql in raw_sql_list:
        if sql[0:2] != "--" and sql[0:2] != "":
            sql_list.append(sql)
    geoscape.multiprocess_list("sql", sql_list, logger)
    logger.info(
        "\t- Step 14 of 14 : create primary & foreign keys and indexes : {0}".
        format(datetime.now() - start_time))