def taxlot_zoning_etl(): """Run ETL for taxlot zoning cross-reference.""" keys = { "taxlot": ["maptaxlot", "maptaxlot_hyphen", "map", "taxlot"], "zoning": [ "zonejuris", "zonecode", "zonename", "subarea", "subareaname", "alloverlays", "overlay1", "overlay2", "overlay3", "overlay4", ], } keys["all"] = keys["taxlot"] + keys["zoning"] with arcetl.ArcETL("Taxlot Zoning") as etl: etl.init_schema(dataset.TAXLOT_ZONING.path()) etl.transform(arcetl.dataset.add_field, field_name="zoning_id", field_type="long") # To avoid memory/topoengine errors when processing, run ETL on subsets. subsets = taxlot_subset_temp_copies(REAL_LOT_SQL, field_names=keys["taxlot"]) for subset in subsets: with subset: arcetl.dataset.add_field(subset.path, field_name="zoning_id", field_type="long") arcetl.geoset.identity( dataset_path=subset.path, field_name="zoning_id", identity_dataset_path=dataset.ZONING.path("pub"), identity_field_name="zoning_id", ) arcetl.features.delete(subset.path, dataset_where_sql="zoning_id is null") # Must join these in the subset copy, before the dissolve: Final order # attribute on zoning means zoning ID can be different for matching # juris + zone + subarea + overlays. for key in keys["zoning"]: arcetl.dataset.join_field( dataset_path=subset.path, join_dataset_path=dataset.ZONING.path("pub"), join_field_name=key, on_field_name="zoning_id", on_join_field_name="zoning_id", ) # Dissolve on lot & overlay, for proper area representation. # Currently get topoengine error with tolerance on certain lot. for where_sql, tolerance in [ ("maptaxlot = '2135093001000'", None), ("maptaxlot <> '2135093001000'", TOLERANCE["xy"]), ]: arcetl.features.dissolve( subset.path, dissolve_field_names=keys["all"], dataset_where_sql=where_sql, tolerance=tolerance, ) etl.transform( arcetl.features.insert_from_path, insert_dataset_path=subset.path, field_names=keys["all"], ) # Add descriptions/names. etl.transform( arcetl.attributes.update_by_mapping, field_name="zonejuris_name", mapping=zone_jurisdiction_name_map, key_field_names="zonejuris", ) for i in [1, 2, 3, 4]: etl.transform( arcetl.attributes.update_by_joined_value, field_name="overlay{}_name".format(i), join_dataset_path=dataset.ZONING_OVERLAY.path(), join_field_name="overlay_name", on_field_pairs=[ ("zonejuris", "zoning_juris"), ("overlay{}".format(i), "overlay_code"), ], ) # Yeah, the -9999 is weird. However, trying to delete null-geometry features # that arise from every-so-tiny slivers in identity results. etl.transform( arcetl.attributes.update_by_function, field_name="approx_acres", function=(lambda x: x / 43560.0 if x is not None else -9999), field_as_first_arg=False, arg_field_names=["shape@area"], ) etl.transform(arcetl.features.delete, dataset_where_sql="approx_acres <= 0") etl.transform( arcetl.attributes.update_by_function, field_name="approx_taxlot_acres", function=(lambda lot, sqft=taxlot_area_map(): sqft[lot] / 43560.0), field_as_first_arg=False, arg_field_names=["maptaxlot"], ) etl.transform( arcetl.attributes.update_by_function, field_name="taxlot_area_ratio", function=(lambda acres, taxlot_acres: acres / taxlot_acres), field_as_first_arg=False, arg_field_names=["approx_acres", "approx_taxlot_acres"], ) etl.transform( arcetl.attributes.update_by_function, field_name="compactness_ratio", function=arcetl.geometry.compactness_ratio, field_as_first_arg=False, arg_field_names=["shape@"], ) # Remove minimal overlays. etl.transform(arcetl.features.delete, dataset_where_sql="taxlot_area_ratio <= 0.01") # Must be done after filtering minimal/bad overlays. etl.transform( arcetl.attributes.update_by_feature_match, field_name="taxlot_zoning_count", id_field_names=["maptaxlot"], update_type="match_count", ) ##TODO: Still useful after allowing auto-splits? etl.transform( arcetl.attributes.update_by_function, field_name="at_taxlot_centroid", function=functools.partial( zoning_matches_taxlot_yn, taxlot_zoning_map=taxlot_centroid_zoning_map()), field_as_first_arg=False, kwarg_field_names=keys["all"], ) ##TODO: Remove after auto-splits. etl.transform( arcetl.attributes.update_by_function, field_name="in_split_table", function=(lambda x, split_lots=split_zoned_taxlots(): "Y" if x in split_lots else "N"), field_as_first_arg=False, arg_field_names=["maptaxlot"], ) # ##TODO: Repoint output & delete test-output. # etl.load( # os.path.join( # path.CPA_WORK_SHARE, "Boundary\\Zoning\\Taxlot_Zoning.gdb\\TaxlotZoning" # ) # ) etl.load(dataset.TAXLOT_ZONING.path())
def taxlot_fire_protection_etl(): """Run ETL for taxlot fire protection cross-reference.""" keys = {"taxlot": ["maptaxlot", "maptaxlot_hyphen", "map", "taxlot"]} with arcetl.ArcETL("Taxlot Fire Protection") as etl: etl.init_schema(dataset.TAXLOT_FIRE_PROTECTION.path()) # To avoid memory/topoengine errors when processing, run ETL on subsets. subsets = taxlot_subset_temp_copies(REAL_LOT_SQL, field_names=keys["taxlot"]) for subset in subsets: with subset: arcetl.dataset.add_field(subset.path, field_name="provider_code", field_type="text") arcetl.geoset.identity( dataset_path=subset.path, field_name="provider_code", identity_dataset_path=dataset.FIRE_PROTECTION_AREA.path( "pub"), identity_field_name="fireprotprov", ) # DO NOT remove features without overlay; unprotected areas important. # Dissolve on lot & overlay, for proper area representation. arcetl.features.dissolve( dataset_path=subset.path, dissolve_field_names=keys["taxlot"] + ["provider_code"], tolerance=TOLERANCE["xy"], ) etl.transform( arcetl.features.insert_from_path, insert_dataset_path=subset.path, field_names=keys["taxlot"] + ["provider_code"], ) # Join fire protection attributes. attr_join_key = { "provider_name": "fpprovname", "protection_type_code": "fireprottype", "protection_type_description": "fptypename", "tax_district": "taxdist", "contact_phone": "contact_phone", "contact_email": "contact_email", "contact_mailing_address": "contact_mailing_address", "website_link": "website_link", } for key, join_key in attr_join_key.items(): etl.transform( arcetl.attributes.update_by_joined_value, field_name=key, join_dataset_path=dataset.FIRE_PROTECTION_AREA.path("pub"), join_field_name=join_key, on_field_pairs=[("provider_code", "fireprotprov")], ) # Yeah, the -9999 is weird. However, trying to delete null-geometry features # that arise from every-so-tiny slivers in identity results, this was best way. etl.transform( arcetl.attributes.update_by_function, field_name="approx_acres", function=(lambda x: x / 43560.0 if x is not None else -9999), field_as_first_arg=False, arg_field_names=["shape@area"], ) etl.transform(arcetl.features.delete, dataset_where_sql="approx_acres < 0") etl.transform( arcetl.attributes.update_by_function, field_name="approx_taxlot_acres", function=(lambda lot, sqft=taxlot_area_map(): sqft[lot] / 43560.0), field_as_first_arg=False, arg_field_names=["maptaxlot"], ) etl.transform( arcetl.attributes.update_by_function, field_name="taxlot_area_ratio", function=(lambda acres, taxlot_acres: acres / taxlot_acres), field_as_first_arg=False, arg_field_names=["approx_acres", "approx_taxlot_acres"], ) # Remove minimal overlays. etl.transform(arcetl.features.delete, dataset_where_sql="taxlot_area_ratio <= 0.001") etl.load(dataset.TAXLOT_FIRE_PROTECTION.path())
def taxlot_soil_etl(): """Run ETL for taxlot soil cross-reference.""" keys = {"taxlot": ["maptaxlot", "maptaxlot_hyphen", "map", "taxlot"]} with arcetl.ArcETL("Taxlot Soil") as etl: etl.init_schema(dataset.TAXLOT_SOIL.path()) # To avoid memory/topoengine errors when processing, run ETL on subsets. subsets = taxlot_subset_temp_copies(REAL_LOT_SQL, field_names=keys["taxlot"]) for subset in subsets: with subset: arcetl.dataset.add_field(subset.path, field_name="mukey", field_type="text") arcetl.geoset.identity( dataset_path=subset.path, field_name="mukey", identity_dataset_path=PATH["soil"], identity_field_name="mukey", ) # Remove features without overlay. arcetl.features.delete(dataset_path=subset.path, dataset_where_sql="mukey is null") # Dissolve on lot & overlay, for proper area representation. arcetl.features.dissolve( dataset_path=subset.path, dissolve_field_names=keys["taxlot"] + ["mukey"], tolerance=TOLERANCE["xy"], ) etl.transform( arcetl.features.insert_from_path, insert_dataset_path=subset.path, field_names=keys["taxlot"] + ["mukey"], ) key_components = soil_key_components_map() etl.transform( arcetl.attributes.update_by_function, field_name="all_components", function=(lambda mukey, components=key_components: ",".join( components[mukey])), field_as_first_arg=False, arg_field_names=["mukey"], ) for i in [1, 2, 3]: etl.transform( arcetl.attributes.update_by_function, field_name="compname{}".format(i), function=(lambda mukey, components=key_components, i=i: components[mukey][i - 1] if len(components[mukey]) >= i else None), field_as_first_arg=False, arg_field_names=["mukey"], ) # Assign joinable attributes. join_kwargs = [ { "field_name": "musym", "join_dataset_path": PATH["soil"], "join_field_name": "musym", "on_field_pairs": [("mukey", "mukey")], }, { "field_name": "muname", "join_dataset_path": PATH["soil_map_unit_aggregate"], "join_field_name": "muname", "on_field_pairs": [("mukey", "mukey")], }, { "field_name": "land_capability_class", "join_dataset_path": PATH["soil_map_unit_aggregate"], "join_field_name": "niccdcd", "on_field_pairs": [("mukey", "mukey")], }, { "field_name": "hydric_presence", "join_dataset_path": PATH["soil_map_unit_aggregate"], "join_field_name": "hydclprs", "on_field_pairs": [("mukey", "mukey")], }, { "field_name": "farm_high_value", "join_dataset_path": PATH["soil_agriculture_ratings"], "join_field_name": "farm_high_value", "on_field_pairs": [("musym", "musym")], }, { "field_name": "farm_high_value_if_drained", "join_dataset_path": PATH["soil_agriculture_ratings"], "join_field_name": "farm_high_value_if_drained", "on_field_pairs": [("musym", "musym")], }, { "field_name": "farm_high_value_if_protected", "join_dataset_path": PATH["soil_agriculture_ratings"], "join_field_name": "farm_high_value_if_protected", "on_field_pairs": [("musym", "musym")], }, { "field_name": "farm_potential_high_value", "join_dataset_path": PATH["soil_agriculture_ratings"], "join_field_name": "farm_potential_high_value", "on_field_pairs": [("musym", "musym")], }, ] join_kwargs += [{ "field_name": "nirrcapcl" + str(i), "join_dataset_path": PATH["soil_component"], "join_field_name": "nirrcapcl", "on_field_pairs": [ ("mukey", "mukey"), ("compname{}".format(i), "compname"), ], } for i in [1, 2, 3]] transform.update_attributes_by_joined_values(etl, join_kwargs) # Assign the legacy hydric flag using hydric_presence. etl.transform( arcetl.attributes.update_by_function, field_name="hydric", function=(lambda x: "Y" if x and int(x) >= 50 else "N"), field_as_first_arg=False, arg_field_names=["hydric_presence"], ) # Convert Lane soil rating nulls to "N". transform.force_yn( etl, field_names=[ "farm_high_value", "farm_high_value_if_drained", "farm_high_value_if_protected", "farm_potential_high_value", ], default="N", ) etl.transform( arcetl.attributes.update_by_function, field_name="approx_acres", function=(lambda x: x / 43560.0), field_as_first_arg=False, arg_field_names=["shape@area"], ) etl.transform( arcetl.attributes.update_by_function, field_name="approx_taxlot_acres", function=(lambda lot, sqft=taxlot_area_map(): sqft[lot] / 43560.0), field_as_first_arg=False, arg_field_names=["maptaxlot"], ) etl.transform( arcetl.attributes.update_by_function, field_name="taxlot_area_ratio", function=(lambda acres, taxlot_acres: acres / taxlot_acres), field_as_first_arg=False, arg_field_names=["approx_acres", "approx_taxlot_acres"], ) # KEEP minimal overlays. etl.load(dataset.TAXLOT_SOIL.path())
def taxlot_plat_document_etl(): """Run ETL for taxlot/plat document cross-reference.""" keys = {"taxlot": ["maptaxlot", "maptaxlot_hyphen", "map", "taxlot"]} with arcetl.ArcETL("Taxlot Plat Documents") as etl: etl.init_schema(dataset.TAXLOT_PLAT_DOCUMENT.path()) # To avoid memory/topoengine errors when processing, run ETL on subsets. subsets = taxlot_subset_temp_copies(REAL_LOT_SQL, field_names=keys["taxlot"]) plat_documents = plat_documents_map() for subset in subsets: with subset: arcetl.dataset.add_field(subset.path, field_name="document_number", field_type="long") arcetl.geoset.overlay( dataset_path=subset.path, field_name="document_number", overlay_dataset_path=PATH["plat"], overlay_field_name="AGENCYDOCN", overlay_central_coincident=True, ) # Remove features without overlay. arcetl.features.delete( dataset_path=subset.path, dataset_where_sql= "document_number is null or document_number = 0", ) plat_document_rows = [] for plat in arcetl.attributes.as_dicts( dataset_path=subset.path, field_names=keys["taxlot"] + ["document_number"], ): plat.update({ "plat_name": None, "document_name": None, "document_type": None, }) # If plat has no documents, add a document-less row. if plat["document_number"] not in plat_documents: plat_document_rows.append(plat) continue for document in plat_documents[plat["document_number"]]: row = copy(plat) row.update(document) plat_document_rows.append(row) if plat_document_rows: etl.transform( arcetl.features.insert_from_dicts, insert_features=plat_document_rows, field_names=plat_document_rows[0].keys(), ) # Build URL values. for field_name, ext in [ ("rlid_document_url", ".pdf"), ("rlid_image_url", ".tif"), ]: etl.transform( arcetl.attributes.update_by_function, field_name=field_name, function=(url.RLID_IMAGE_SHARE + "/plats/" + "{}" + ext).format, field_as_first_arg=False, arg_field_names=["document_name"], dataset_where_sql="document_name is not null", ) etl.load(dataset.TAXLOT_PLAT_DOCUMENT.path())
def taxlot_petition_document_etl(): """Run ETL for taxlot/petition document cross-reference.""" keys = {"taxlot": ["maptaxlot", "maptaxlot_hyphen", "map", "taxlot"]} with arcetl.ArcETL("Taxlot Petition Documents") as etl: etl.init_schema(dataset.TAXLOT_PETITION_DOCUMENT.path()) # To avoid memory/topoengine errors when processing, run ETL on subsets. subsets = taxlot_subset_temp_copies(REAL_LOT_SQL, field_names=keys["taxlot"]) petition_documents = petition_documents_map() for subset in subsets: with subset: arcetl.dataset.add_field(subset.path, field_name="petition_id", field_type="text") arcetl.geoset.overlay( dataset_path=subset.path, field_name="petition_id", overlay_dataset_path=PATH["petition"], overlay_field_name="ID_NUM", overlay_central_coincident=True, ) arcetl.attributes.update_by_function( dataset_path=subset.path, field_name="petition_id", function=clean_whitespace, ) # Remove features without overlay. arcetl.features.delete(dataset_path=subset.path, dataset_where_sql="petition_id is null") petition_document_rows = [] for petition in arcetl.attributes.as_dicts( dataset_path=subset.path, field_names=keys["taxlot"] + ["petition_id"], ): petition.update({ "document_name": None, "document_type": None }) # If petition has no documents, add a document-less row. if petition["petition_id"] not in petition_documents: petition_document_rows.append(petition) continue for document in petition_documents[ petition["petition_id"]]: row = copy(petition) row.update(document) petition_document_rows.append(row) if petition_document_rows: etl.transform( arcetl.features.insert_from_dicts, insert_features=petition_document_rows, field_names=petition_document_rows[0].keys(), ) # Set petition jurisdiction (only Eugene petitions at the moment). etl.transform( arcetl.attributes.update_by_value, field_name="petition_jurisdiction_code", value="EUG", ) # Add temp field for convertable string values from petition lots. etl.transform( arcetl.dataset.add_field, field_name="petition_date_string", field_type="text", field_length=32, ) # Assign joinable attributes. join_kwargs = [ { "field_name": "petition_number", "join_field_name": "PETNUM" }, { "field_name": "petition_type_code", "join_field_name": "PET_TYPE" }, { "field_name": "petition_date_string", "join_field_name": "DATE" }, { "field_name": "is_active", "join_field_name": "ACTIVE" }, { "field_name": "alley_petition", "join_field_name": "ALY" }, { "field_name": "bikepath_petition", "join_field_name": "BP" }, { "field_name": "paving_petition", "join_field_name": "PAV" }, { "field_name": "pedway_petition", "join_field_name": "PED" }, { "field_name": "rehab_petition", "join_field_name": "RHB" }, { "field_name": "sanitary_petition", "join_field_name": "SAN" }, { "field_name": "sidewalk_petition", "join_field_name": "CW" }, { "field_name": "storm_petition", "join_field_name": "STM" }, { "field_name": "streetlight_petition", "join_field_name": "SL" }, ] for kwargs in join_kwargs: etl.transform(arcetl.attributes.update_by_joined_value, join_dataset_path=PATH["petition"], on_field_pairs=[("petition_id", "ID_NUM")], **kwargs) petition_fields = [ "alley_petition", "bikepath_petition", "paving_petition", "pedway_petition", "rehab_petition", "sanitary_petition", "sidewalk_petition", "storm_petition", "streetlight_petition", ] # Clean added values from sources of unknown maintenance. transform.clean_whitespace( etl, field_names=petition_fields + [ "petition_number", "petition_type_code", "petition_date_string", "is_active", ], ) # RLID uses Y/N flags, convert these Yes/No ones. for field_name in petition_fields + ["is_active"]: etl.transform( arcetl.attributes.update_by_function, field_name=field_name, function=(lambda x: "Y" if x and x.upper() in ["Y", "YES"] else "N"), ) # Update petition_date from the string value version. etl.transform( arcetl.attributes.update_by_function, field_name="petition_date", function=datetime_from_string, field_as_first_arg=False, arg_field_names=["petition_date_string"], ) etl.transform(arcetl.dataset.delete_field, field_name="petition_date_string") # Add values derived from other values. petition_type = { "A": "Prepaid Assessment", "I": "Irrevocable Petition", "P": "Poll", "S": "Survey", "V": "Voluntary Petition", "X": "Adjacent to Unimproved Street or Alley", } etl.transform( arcetl.attributes.update_by_function, field_name="petition_type_description", function=petition_type.get, field_as_first_arg=False, arg_field_names=["petition_type_code"], ) # Build URL values. for field_name, ext in [ ("rlid_document_url", ".pdf"), ("rlid_image_url", ".tif"), ]: etl.transform( arcetl.attributes.update_by_function, field_name=field_name, function=(url.RLID_IMAGE_SHARE + "/petitions/" + "{}" + ext).format, field_as_first_arg=False, arg_field_names=["document_name"], dataset_where_sql="document_name is not null", ) etl.load(dataset.TAXLOT_PETITION_DOCUMENT.path())
def taxlot_flood_hazard_etl(): """Run ETL for taxlot/flood hazard cross-reference.""" keys = {"taxlot": ["maptaxlot", "maptaxlot_hyphen", "map", "taxlot"]} with arcetl.ArcETL("Taxlot Flood Hazard") as etl: etl.init_schema(dataset.TAXLOT_FLOOD_HAZARD.path()) # To avoid memory/topoengine errors when processing, run ETL on subsets. subsets = taxlot_subset_temp_copies(REAL_LOT_SQL, field_names=keys["taxlot"]) for subset in subsets: with subset: arcetl.dataset.add_field(subset.path, field_name="flood_area_id", field_type="text") arcetl.geoset.identity( dataset_path=subset.path, field_name="flood_area_id", identity_dataset_path=PATH["flood_hazard_area"], identity_field_name="fld_ar_id", ) # Remove features without overlay. arcetl.features.delete( dataset_path=subset.path, dataset_where_sql="flood_area_id is null") # Dissolve on lot & overlay, for proper area representation. arcetl.features.dissolve( dataset_path=subset.path, dissolve_field_names=keys["taxlot"] + ["flood_area_id"], tolerance=TOLERANCE["xy"], ) etl.transform( arcetl.features.insert_from_path, insert_dataset_path=subset.path, field_names=keys["taxlot"] + ["flood_area_id"], ) # Assign joinable attributes. join_kwargs = [ { "field_name": "flood_zone_code", "join_dataset_path": PATH["flood_hazard_area"], "join_field_name": "fld_zone", "on_field_pairs": [("flood_area_id", "fld_ar_id")], }, { "field_name": "flood_zone_subtype", "join_dataset_path": PATH["flood_hazard_area"], "join_field_name": "zone_subty", "on_field_pairs": [("flood_area_id", "fld_ar_id")], }, { "field_name": "old_flood_zone_code", "join_dataset_path": PATH["flood_zone_code"], "join_field_name": "old_flood_zone_code", "on_field_pairs": [ ("flood_zone_code", "flood_zone_code"), ("flood_zone_subtype", "flood_zone_subtype"), ], }, { "field_name": "flood_zone_description", "join_dataset_path": PATH["flood_zone_code"], "join_field_name": "flood_zone_description", "on_field_pairs": [ ("flood_zone_code", "flood_zone_code"), ("flood_zone_subtype", "flood_zone_subtype"), ], }, ] transform.update_attributes_by_joined_values(etl, join_kwargs) etl.transform( arcetl.attributes.update_by_function, field_name="approx_acres", function=(lambda x: x / 43560.0), arg_field_names=["shape@area"], field_as_first_arg=False, ) etl.transform( arcetl.attributes.update_by_function, field_name="approx_taxlot_acres", function=(lambda lot, sqft=taxlot_area_map(): sqft[lot] / 43560.0), field_as_first_arg=False, arg_field_names=["maptaxlot"], ) etl.transform( arcetl.attributes.update_by_function, field_name="taxlot_area_ratio", function=(lambda acres, taxlot_acres: acres / taxlot_acres), field_as_first_arg=False, arg_field_names=["approx_acres", "approx_taxlot_acres"], ) # KEEP minimal overlays. etl.load(dataset.TAXLOT_FLOOD_HAZARD.path())