Ejemplo n.º 1
0
def incorporated_city_limits_etl():
    """Run ETL for incorporated city limits."""
    with arcetl.ArcETL("Incorporated City Limits") as etl:
        etl.extract(dataset.ANNEXATION_HISTORY.path("pub"))
        etl.transform(
            arcetl.features.dissolve,
            dissolve_field_names=["annexcity"],
            tolerance=TOLERANCE["xy"],
        )
        transform.add_missing_fields(etl, dataset.INCORPORATED_CITY_LIMITS)
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="inccityabbr",
            function=(lambda x: x),
            field_as_first_arg=False,
            arg_field_names=["annexcity"],
        )
        etl.transform(
            arcetl.attributes.update_by_joined_value,
            field_name="inccityname",
            join_field_name="CityName",
            join_dataset_path=dataset.CITY.path(),
            on_field_pairs=[("inccityabbr", "CityNameAbbr")],
        )
        etl.load(dataset.INCORPORATED_CITY_LIMITS.path())
def plan_designation_county_etl():
    """Run ETL for county plan designations."""
    with arcetl.ArcETL("County Plan Designations") as etl:
        etl.extract(dataset.PLAN_DESIGNATION_COUNTY.path("maint"))
        transform.add_missing_fields(etl,
                                     dataset.PLAN_DESIGNATION_COUNTY,
                                     tags=["pub"])
        etl.transform(arcetl.attributes.update_by_value,
                      field_name="planjuris",
                      value="LC")
        for new_name, old_name in [("plandes", "ZONE_"),
                                   ("plandesnam", "ZONE_NAME")]:
            etl.transform(
                arcetl.attributes.update_by_function,
                field_name=new_name,
                function=(lambda x: x),
                field_as_first_arg=False,
                arg_field_names=[old_name],
            )
        # Remove county designations where city ones exist.
        etl.transform(
            arcetl.features.erase,
            erase_dataset_path=dataset.PLAN_DESIGNATION_CITY.path("pub"),
        )
        transform.clean_whitespace(
            etl,
            field_names=["planjuris", "plandes", "plandesnam", "finalorder"])
        etl.transform(arcetl.features.delete,
                      dataset_where_sql="plandes is null")
        etl.transform(
            arcetl.features.dissolve,
            dissolve_field_names=dataset.PLAN_DESIGNATION_COUNTY.field_names,
            tolerance=TOLERANCE["xy"],
        )
        etl.load(dataset.PLAN_DESIGNATION_COUNTY.path("pub"))
def plss_township_etl():
    """Run ETL for PLSS townships."""
    with arcetl.ArcETL("PLSS Townships") as etl:
        etl.extract(dataset.PLSS_TOWNSHIP.path("maint"))
        etl.transform(
            arcetl.features.dissolve,
            dissolve_field_names=["tr"],
            tolerance=TOLERANCE["xy"],
        )
        transform.add_missing_fields(etl, dataset.PLSS_TOWNSHIP, tags=["pub"])
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="tnum",
            function=(lambda tr: tr // 100),
            field_as_first_arg=False,
            arg_field_names=["tr"],
        )
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="rnum",
            function=(lambda tr: tr % 100),
            field_as_first_arg=False,
            arg_field_names=["tr"],
        )
        etl.load(dataset.PLSS_TOWNSHIP.path("pub"))
def emergency_service_zone_etl():
    """Run ETL for emergency service zones."""
    with arcetl.ArcETL("Emergency Service Zones") as etl:
        etl.extract(dataset.TILLAMOOK_EMS.path())
        transform.add_missing_fields(etl,
                                     dataset.TILLAMOOK_EMERGENCY_SERVICE_ZONE)
        identity_kwargs = [
            {
                "field_name": "ems",
                "identity_field_name": "district",
                "identity_dataset_path": dataset.TILLAMOOK_EMS.path(),
            },
            {
                "field_name": "fire",
                "identity_field_name": "district",
                "identity_dataset_path": dataset.TILLAMOOK_FIRE.path(),
            },
            {
                "field_name": "police",
                "identity_field_name": "district",
                "identity_dataset_path": dataset.TILLAMOOK_POLICE.path(),
            },
        ]
        for kwargs in identity_kwargs:
            etl.transform(arcetl.geoset.identity, tolerance=2, **kwargs)
        # Drop where feature lacks city, fire, & ambulance.
        etl.transform(
            arcetl.features.delete,
            dataset_where_sql=" and ".join(
                ["ems is null", "fire is null", "police is null"]),
        )
        join_kwargs = [{
            "field_name":
            "esn",
            "join_field_name":
            "esn",
            "join_dataset_path":
            dataset.TILLAMOOK_EMERGENCY_SERVICE_NUMBER.path(),
            "on_field_pairs": [
                ("police", "police"),
                ("fire", "fire"),
                ("ems", "ems"),
            ],
        }]
        for kwargs in join_kwargs:
            etl.transform(arcetl.attributes.update_by_joined_value, **kwargs)
        etl.transform(
            arcetl.features.dissolve,
            dissolve_field_names=[
                field["name"]
                for field in dataset.TILLAMOOK_EMERGENCY_SERVICE_NUMBER.fields
            ],
            tolerance=TOLERANCE,
        )
        etl.load(dataset.TILLAMOOK_EMERGENCY_SERVICE_ZONE.path())
Ejemplo n.º 5
0
def facility_etl():
    """Run ETL for facilities.

    Currently only undertaken for other ETL purposes--not publication.
    """
    with arcetl.ArcETL("Facilities") as etl:
        etl.extract(dataset.FACILITY.path("maint"))
        etl.transform(
            arcetl.dataset.rename_field,
            field_name="geofeat_id",
            new_field_name="address_intid",
        )
        # Clean maintenance values.
        transform.clear_nonpositive(etl, field_names=["address_intid"])
        transform.clean_whitespace(
            etl, field_names=["label", "label_full", "type", "type_full"])
        transform.force_lowercase(etl, field_names=["type"])
        transform.force_uppercase(etl, field_names=["label"])
        transform.add_missing_fields(etl, dataset.FACILITY, tags=["pub"])
        # Assign geometry attributes.
        coordinate_system_xy_keys = {
            2914: {
                "x": "x_coordinate",
                "y": "y_coordinate"
            },
            4326: {
                "x": "longitude",
                "y": "latitude"
            },
        }
        for spatial_reference_id, xy_key in coordinate_system_xy_keys.items():
            for axis, key in xy_key.items():
                etl.transform(
                    arcetl.attributes.update_by_geometry,
                    field_name=key,
                    spatial_reference_item=spatial_reference_id,
                    geometry_properties=["centroid", axis],
                )
        etl.transform(
            arcetl.attributes.update_by_mapping,
            field_name="address_uuid",
            mapping=address_intid_to_uuid_map,
            key_field_names=["address_intid"],
        )
        etl.load(dataset.FACILITY.path("pub"))
Ejemplo n.º 6
0
def epud_subdistrict_etl():
    """Run ETL for EPUD subdistricts."""
    with arcetl.ArcETL("EPUD Subdistricts") as etl:
        etl.extract(dataset.EPUD_SUBDISTRICT.path("source"))
        transform.add_missing_fields(etl,
                                     dataset.EPUD_SUBDISTRICT,
                                     tags=["pub"])
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="boardid",
            function=(lambda x: int(x) if x.isdigit() else None),
            field_as_first_arg=False,
            arg_field_names=["boardid_"],
        )
        etl.transform(
            arcetl.features.dissolve,
            dissolve_field_names=["boardid", "boardmbr"],
            tolerance=TOLERANCE["xy"],
        )
        etl.load(dataset.EPUD_SUBDISTRICT.path("pub"))
def plss_section_etl():
    """Run ETL for PLSS sections."""
    with arcetl.ArcETL("PLSS Sections") as etl:
        etl.extract(dataset.PLSS_SECTION.path("maint"))
        transform.add_missing_fields(etl, dataset.PLSS_SECTION, tags=["pub"])
        etl.transform(
            arcetl.features.dissolve,
            dissolve_field_names=[
                field["name"] for field in dataset.PLSS_SECTION.fields
                if "pub" in field["tags"]
            ],
            tolerance=TOLERANCE["xy"],
        )
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="trsalt",
            function=convert_lane_trs_to_common,
            field_as_first_arg=False,
            arg_field_names=["tnum", "rnum", "sec"],
        )
        etl.load(dataset.PLSS_SECTION.path("pub"))
Ejemplo n.º 8
0
def city_ward_etl():
    """Run ETL for city wards."""
    with arcetl.ArcETL("City Wards") as etl:
        etl.extract(
            dataset.ELECTION_PRECINCT.path("maint"),
            extract_where_sql="precntnum like '1%' or precntnum like '2%'",
        )
        transform.add_missing_fields(etl, dataset.CITY_WARD)
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="ward",
            function=precinct_ward,
            field_as_first_arg=False,
            arg_field_names=["precntnum"],
        )
        # Add Cottage Grove wards.
        etl.transform(
            arcetl.features.insert_from_path,
            insert_dataset_path=dataset.COT_CITY_WARD.path(),
        )
        etl.transform(
            arcetl.features.dissolve,
            dissolve_field_names="ward",
            tolerance=TOLERANCE["xy"],
        )
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="wardcity",
            function=ward_city_code,
            field_as_first_arg=False,
            arg_field_names=["ward"],
        )
        etl.transform(
            arcetl.attributes.update_by_joined_value,
            field_name="councilor",
            join_dataset_path=dataset.CITY_COUNCILOR.path(),
            join_field_name="Councilor",
            on_field_pairs=[("ward", "Ward")],
        )
        etl.load(dataset.CITY_WARD.path())
def plss_dlc_etl():
    """Run ETL for PLSS donation land claims."""
    with arcetl.ArcETL("PLSS Donation Land Claims") as etl:
        etl.extract(dataset.PLSS_DLC.path("maint"))
        transform.clean_whitespace(etl, field_names=["name", "trs"])
        transform.add_missing_fields(etl, dataset.PLSS_DLC, tags=["pub"])
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="dlcname",
            function=(lambda x: x),
            field_as_first_arg=False,
            arg_field_names=["NAME"],
        )
        etl.transform(
            arcetl.features.dissolve,
            dissolve_field_names=[
                field["name"] for field in dataset.PLSS_DLC.fields
                if "pub" in field["tags"]
            ],
            tolerance=TOLERANCE["xy"],
        )
        etl.load(dataset.PLSS_DLC.path("pub"))
def zoning_county_etl():
    """Run ETL for county zoning."""
    overlay_field_names = [
        name for name in dataset.ZONING_COUNTY.field_names
        if name.lower().startswith("over")
    ]
    with arcetl.ArcETL("County Zoning") as etl:
        etl.extract(dataset.ZONING_COUNTY.path("maint"))
        etl.transform(
            arcetl.features.insert_from_path,
            insert_dataset_path=dataset.ZONING_COUNTY.path("insert"),
        )
        transform.add_missing_fields(etl, dataset.ZONING_COUNTY, tags=["pub"])
        for new_name, old_name in [("zonecode", "ZONE_"),
                                   ("zonename", "ZONE_NAME")]:
            etl.transform(
                arcetl.attributes.update_by_function,
                field_name=new_name,
                function=(lambda x: x),
                field_as_first_arg=False,
                arg_field_names=[old_name],
            )
        # UGB zoning has slightly different names. We want to standardize on the main
        # zoning dataset names.
        etl.transform(
            arcetl.attributes.update_by_mapping,
            field_name="zonename",
            mapping=county_zone_name_map,
            key_field_names="zonecode",
        )
        # Clean maintenance values.
        transform.clean_whitespace(etl, field_names=["zonecode", "zonename"])
        etl.transform(arcetl.features.delete,
                      dataset_where_sql="zonecode is null")
        # Remove county zoning where city ones exist.
        etl.transform(arcetl.features.erase,
                      erase_dataset_path=dataset.ZONING_CITY.path("pub"))
        # Assign zoning overlays.
        identity_kwargs = [
            {
                "field_name":
                "coastalzonecode",
                "identity_field_name":
                "TYPE",
                "identity_dataset_path":
                os.path.join(LANE_ZONING_STAGING_PATH, "coastal_zones.shp"),
            },
            {
                "field_name":
                "overas",
                "identity_field_name":
                "AIRPORT",
                "identity_dataset_path":
                os.path.join(LANE_ZONING_STAGING_PATH, "aszone.shp"),
                "replacement_value":
                "Y",
            },
            {
                "field_name":
                "overcas",
                "identity_field_name":
                "AIRPORT",
                "identity_dataset_path":
                os.path.join(LANE_ZONING_STAGING_PATH, "caszone.shp"),
                "replacement_value":
                "Y",
            },
            {
                "field_name":
                "overdms",
                "identity_field_name":
                "TYPE",
                "identity_dataset_path":
                os.path.join(LANE_ZONING_STAGING_PATH, "dredge_sites.shp"),
                "replacement_value":
                "Y",
            },
            {
                "field_name":
                "overbd",
                "identity_field_name":
                "Shape_Leng",
                "identity_dataset_path":
                os.path.join(LANE_ZONING_STAGING_PATH, "beach_dune.shp"),
                "replacement_value":
                "Y",
            },
            {
                "field_name":
                "overu",
                "identity_field_name":
                "urban",
                "identity_dataset_path":
                os.path.join(LANE_ZONING_STAGING_PATH, "interim_urban.shp"),
                "replacement_value":
                "Y",
            },
        ]
        for kwargs in identity_kwargs:
            etl.transform(arcetl.geoset.identity, **kwargs)
        # Clean identity values.
        transform.clean_whitespace(etl, field_names=["coastalzonecode"])
        etl.transform(arcetl.attributes.update_by_value,
                      field_name="zonejuris",
                      value="LC")
        etl.transform(
            arcetl.features.dissolve,
            dissolve_field_names=[
                field["name"] for field in dataset.ZONING_COUNTY.fields
                if "pub" in field["tags"]
            ],
            tolerance=TOLERANCE["xy"],
        )
        # Assign the overlay flags dependent on coastal zone code.
        for code in ["CE", "DE", "MD", "NE", "NRC", "PW", "RD", "SN"]:
            etl.transform(
                arcetl.attributes.update_by_function,
                field_name="over{}".format(code.lower()),
                function=(lambda czc, c=code: "Y" if czc == c else "N"),
                field_as_first_arg=False,
                arg_field_names=["coastalzonecode"],
            )
        transform.force_uppercase(etl, overlay_field_names)
        transform.force_yn(etl, overlay_field_names, default="N")
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="alloverlays",
            function=concatenate_zoning_overlays,
            field_as_first_arg=False,
            kwarg_field_names=overlay_field_names,
        )
        etl.load(dataset.ZONING_COUNTY.path("pub"))
def road_centerline_etl():
    """Run ETL for road centerlines."""
    with arcetl.ArcETL("Road Centerlines") as etl:
        etl.extract(dataset.TILLAMOOK_ROAD_CENTERLINE.path("maint"))
        transform.add_missing_fields(etl,
                                     dataset.TILLAMOOK_ROAD_CENTERLINE,
                                     tags=["pub"])
        # Assign overlays.
        overlay_kwargs = [
            {
                "field_name":
                "esn_L",
                "overlay_field_name":
                "esn",
                "overlay_dataset_path":
                dataset.TILLAMOOK_EMERGENCY_SERVICE_ZONE.path(),
            },
            {
                "field_name":
                "esn_R",
                "overlay_field_name":
                "esn",
                "overlay_dataset_path":
                dataset.TILLAMOOK_EMERGENCY_SERVICE_ZONE.path(),
            },
        ]
        for kwargs in overlay_kwargs:
            etl.transform(arcetl.attributes.update_by_overlay,
                          overlay_central_coincident=True,
                          **kwargs)
        # Assign joined values.
        etl.transform(
            arcetl.attributes.update_by_joined_value,
            field_name="join_id",
            join_dataset_path=dataset.TILLAMOOK_ALTERNATE_STREET_NAME.path(),
            join_field_name="join_id",
            on_field_pairs=[
                ("predir", "prime_predir"),
                ("name", "prime_name"),
                ("type", "prime_type"),
                ("sufdir", "prime_sufdir"),
            ],
        )
        # Build values: Translations.
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="st_class",
            function=(lambda cc, cc_st_map=CCLASS_ST_CLASS: cc_st_map.get(cc)),
            field_as_first_arg=False,
            arg_field_names=["cclass"],
        )
        # Build values: Constants.
        value_kwargs = [
            {
                "field_name": "state_L",
                "value": "OR"
            },
            {
                "field_name": "state_R",
                "value": "OR"
            },
        ]
        transform.update_attributes_by_values(etl, value_kwargs)
        # Build values: Concatenations.
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="full_name",
            function=concatenate_arguments,
            field_as_first_arg=False,
            arg_field_names=["predir", "name", "type", "sufdir"],
        )
        etl.load(dataset.TILLAMOOK_ROAD_CENTERLINE.path("pub"))
def address_point_etl():
    """Run ETL for address points."""
    with arcetl.ArcETL("Address Points") as etl:
        etl.extract(dataset.TILLAMOOK_ADDRESS_POINT.path("maint"))
        # Remove addresses flagged in validationas "not OK to publish".
        etl.transform(
            arcetl.dataset.join_field,
            join_dataset_path=dataset.TILLAMOOK_ADDRESS_POINT_ISSUES.path(),
            join_field_name="ok_to_publish",
            on_field_name="address_id",
            on_join_field_name="address_id",
        )
        etl.transform(arcetl.features.delete,
                      dataset_where_sql="ok_to_publish = 0")
        etl.transform(arcetl.dataset.delete_field, field_name="ok_to_publish")
        # Clean maintenance values.
        transform.clear_nonpositive(etl, field_names=["stnum"])
        transform.clean_whitespace(
            etl,
            field_names=[
                "stnumsuf",
                "predir",
                "name",
                "type",
                "sufdir",
                "unit_type",
                "unit",
                "postcomm",
                "zip",
                "county",
            ],
        )
        transform.force_uppercase(
            etl,
            field_names=[
                "stnumsuf",
                "predir",
                "name",
                "type",
                "unit_type",
                "unit",
                "postcomm",
                "county",
                "valid",
                "archived",
                "confidence",
            ],
        )
        transform.clear_non_numeric_text(etl, field_names=["zip"])
        transform.force_yn(etl, field_names=["archived"], default="N")
        transform.force_yn(etl, field_names=["valid"], default="Y")
        transform.add_missing_fields(etl,
                                     dataset.TILLAMOOK_ADDRESS_POINT,
                                     tags=["pub"])
        # Assign geometry attributes.
        for x_name, y_name, srid in [("lon", "lat", 4326)]:
            for name, axis in [(x_name, "x"), (y_name, "y")]:
                etl.transform(
                    arcetl.attributes.update_by_geometry,
                    field_name=name,
                    spatial_reference_item=srid,
                    geometry_properties=["centroid", axis],
                )
        # Assign joined values.
        etl.transform(
            arcetl.attributes.update_by_joined_value,
            field_name="join_id",
            join_dataset_path=dataset.TILLAMOOK_ALTERNATE_STREET_NAME.path(),
            join_field_name="join_id",
            on_field_pairs=[
                ("predir", "prime_predir"),
                ("name", "prime_name"),
                ("type", "prime_type"),
                ("sufdir", "prime_sufdir"),
            ],
        )
        # Assign overlays.
        overlay_kwargs = [
            {
                "field_name": "city_limit",
                "overlay_field_name": "city",
                "overlay_dataset_path": dataset.TILLAMOOK_CITY_LIMITS.path(),
            },
            {
                "field_name": "ems",
                "overlay_field_name": "district",
                "overlay_dataset_path": dataset.TILLAMOOK_EMS.path(),
            },
            {
                "field_name":
                "esn",
                "overlay_field_name":
                "esn",
                "overlay_dataset_path":
                dataset.TILLAMOOK_EMERGENCY_SERVICE_ZONE.path(),
            },
            {
                "field_name": "fire",
                "overlay_field_name": "district",
                "overlay_dataset_path": dataset.TILLAMOOK_FIRE.path(),
            },
            {
                "field_name": "police",
                "overlay_field_name": "district",
                "overlay_dataset_path": dataset.TILLAMOOK_POLICE.path(),
            },
        ]
        for kwargs in overlay_kwargs:
            etl.transform(arcetl.attributes.update_by_overlay,
                          overlay_central_coincident=True,
                          **kwargs)
        # Build values: Constants.
        value_kwargs = [{"field_name": "state", "value": "OR"}]
        transform.update_attributes_by_values(etl, value_kwargs)
        # Build values: Concatenations.
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="address",
            function=concatenate_arguments,
            field_as_first_arg=False,
            arg_field_names=[
                "stnum",
                "stnumsuf",
                "predir",
                "name",
                "type",
                "sufdir",
                "unit_type",
                "unit",
            ],
        )
        etl.load(dataset.TILLAMOOK_ADDRESS_POINT.path("pub"))
def closest_hydrant_etl():
    """Run ETL for closest hydrant for each site address."""
    etl = arcetl.ArcETL("Closest hydrants for addresses")
    hydrants_copy = arcetl.TempDatasetCopy(dataset.HYDRANT.path(), field_names=[])
    with etl, hydrants_copy:
        # Add unique ID, longitude, latitude to temp hydrants.
        arcetl.dataset.add_field(
            hydrants_copy.path, field_name="hydrant_id", field_type="long"
        )
        arcetl.attributes.update_by_unique_id(hydrants_copy.path, "hydrant_id")
        for field_name, axis in [("longitude", "x"), ("latitude", "y")]:
            arcetl.dataset.add_field(
                hydrants_copy.path, field_name, field_type="double"
            )
            arcetl.attributes.update_by_geometry(
                hydrants_copy.path,
                field_name,
                spatial_reference_item=4326,
                geometry_properties=["centroid", axis],
            )
        etl.extract(dataset.SITE_ADDRESS.path("pub"))
        field_name_change = {
            "site_address_gfid": "site_address_uuid",
            "geofeat_id": "site_address_intid",
        }
        transform.rename_fields(etl, field_name_change)
        transform.add_missing_fields(etl, dataset.SITE_ADDRESS_CLOSEST_HYDRANT)
        id_near_info = arcetl.proximity.id_near_info_map(
            dataset_path=etl.transform_path,
            dataset_id_field_name="site_address_intid",
            near_dataset_path=hydrants_copy.path,
            near_id_field_name="hydrant_id",
            near_rank=1,
        )
        near_key_field_name = {
            "near_id": "facility_intid",
            "distance": "facility_distance_feet",
            "near_x": "facility_x_coordinate",
            "near_y": "facility_y_coordinate",
        }
        for near_key, field_name in near_key_field_name.items():
            etl.transform(
                arcetl.attributes.update_by_function,
                field_name=field_name,
                function=(lambda id_, key=near_key: id_near_info[id_][key]),
                field_as_first_arg=False,
                arg_field_names=["site_address_intid"],
            )
        # Add longitude/latitude.
        for name in ["longitude", "latitude"]:
            etl.transform(
                arcetl.attributes.update_by_joined_value,
                field_name="facility_" + name,
                join_dataset_path=hydrants_copy.path,
                join_field_name=name,
                on_field_pairs=[("facility_intid", "hydrant_id")],
            )
        # Remove features without a near-hydrant (should not happen).
        etl.transform(
            arcetl.features.delete, dataset_where_sql="facility_intid is null"
        )
        etl.load(dataset.SITE_ADDRESS_CLOSEST_HYDRANT.path())
Ejemplo n.º 14
0
def emergency_service_zone_etl():
    """Run ETL for emergency service zones."""
    with arcetl.ArcETL("Emergency Service Zones") as etl:
        # Use county boundary, to get a blank slate for identity overlay.
        etl.extract(dataset.COUNTY_BOUNDARY.path())
        transform.add_missing_fields(etl, dataset.EMERGENCY_SERVICE_ZONE)
        identity_kwargs = [
            {
                "temporary_field": True,
                "field_name": "inccityabbr",
                "identity_dataset_path":
                dataset.INCORPORATED_CITY_LIMITS.path(),
                "identity_field_name": "inccityabbr",
            },
            {
                "temporary_field": True,
                "field_name": "fireprotprov",
                "identity_dataset_path":
                dataset.FIRE_PROTECTION_AREA.path("pub"),
                "identity_field_name": "fireprotprov",
            },
            {
                "field_name": "asa_code",
                "identity_dataset_path":
                dataset.AMBULANCE_SERVICE_AREA.path("pub"),
                "identity_field_name": "asacode",
            },
            {
                "field_name": "psap_code",
                "identity_dataset_path": dataset.PSAP_AREA.path("pub"),
                "identity_field_name": "psap_code",
            },
        ]
        for kwargs in identity_kwargs:
            if kwargs.get("temporary_field"):
                etl.transform(
                    arcetl.dataset.add_field,
                    field_name=kwargs["field_name"],
                    field_type="text",
                )
            etl.transform(arcetl.geoset.identity, tolerance=2.0, **kwargs)
        # Drop where feature lacks any city, fire, & ambulance.
        etl.transform(
            arcetl.features.delete,
            dataset_where_sql=
            ("inccityabbr is null and fireprotprov is null and asa_code is null"
             ),
        )
        etl.transform(
            arcetl.features.dissolve,
            dissolve_field_names=[
                "inccityabbr",
                "fireprotprov",
                "asa_code",
                "psap_code",
            ],
            tolerance=TOLERANCE["xy"],
        )
        etl.transform(
            arcetl.attributes.update_by_joined_value,
            field_name="emergency_service_number",
            join_field_name="emergency_service_number",
            join_dataset_path=dataset.EMERGENCY_SERVICE_NUMBER.path(),
            on_field_pairs=[
                ("inccityabbr", "city_limits"),
                ("fireprotprov", "fire_district"),
                ("asa_code", "asa_code"),
                ("psap_code", "psap_code"),
            ],
        )
        join_kwargs = [
            {
                "field_name":
                "law_provider",
                "join_field_name":
                "law_provider",
                "join_dataset_path":
                dataset.EMERGENCY_SERVICE_NUMBER.path(),
                "on_field_pairs":
                [("emergency_service_number", "emergency_service_number")],
            },
            {
                "field_name":
                "fire_coverage_description",
                "join_field_name":
                "fire_coverage_description",
                "join_dataset_path":
                dataset.EMERGENCY_SERVICE_NUMBER.path(),
                "on_field_pairs":
                [("emergency_service_number", "emergency_service_number")],
            },
            {
                "field_name":
                "asa_code",
                "join_field_name":
                "asa_code",
                "join_dataset_path":
                dataset.EMERGENCY_SERVICE_NUMBER.path(),
                "on_field_pairs":
                [("emergency_service_number", "emergency_service_number")],
            },
            {
                "field_name":
                "psap_code",
                "join_field_name":
                "psap_code",
                "join_dataset_path":
                dataset.EMERGENCY_SERVICE_NUMBER.path(),
                "on_field_pairs":
                [("emergency_service_number", "emergency_service_number")],
            },
            {
                "field_name": "asa_name",
                "join_field_name": "asa",
                "join_dataset_path":
                dataset.AMBULANCE_SERVICE_AREA.path("pub"),
                "on_field_pairs": [("asa_code", "asacode")],
            },
            {
                "field_name": "psap_name",
                "join_field_name": "psap_name",
                "join_dataset_path": dataset.PSAP_AREA.path("pub"),
                "on_field_pairs": [("psap_code", "psap_code")],
            },
        ]
        for kwargs in join_kwargs:
            etl.transform(arcetl.attributes.update_by_joined_value, **kwargs)
        etl.load(dataset.EMERGENCY_SERVICE_ZONE.path())
Ejemplo n.º 15
0
def fire_protection_area_etl():
    """Run ETL for fire protection areas."""
    city_provider_code = {
        "EUG": "EGF",
        "OAK": "OKF",
        "SPR": "SPF",
        "WEF": "WEF"
    }
    city_fire_protection_sql = "inccityabbr in ({})".format(", ".join(
        repr(city) for city in city_provider_code))
    with arcetl.ArcETL("Fire Protection Areas") as etl:
        etl.extract(dataset.FIRE_PROTECTION_AREA.path("maint"))
        etl.transform(
            arcetl.features.dissolve,
            dissolve_field_names=[
                "fireprotprov",
                "fireprottype",
                "dateformed",
                "taxdist",
            ],
            tolerance=TOLERANCE["xy"],
        )
        transform.add_missing_fields(etl,
                                     dataset.FIRE_PROTECTION_AREA,
                                     tags=["pub"])
        # Erase fire protection that overlaps with city limits with fire protection.
        # City incorporation often leads removal of a property from an FPA after a
        # variable amount of time, so we assume precedence of the city fire protection.
        etl.transform(
            arcetl.features.erase,
            erase_dataset_path=dataset.INCORPORATED_CITY_LIMITS.path(),
            erase_where_sql=city_fire_protection_sql,
        )
        # Temporarily add inccityabbr field, so we can transfer city name.
        etl.transform(arcetl.dataset.add_field,
                      field_name="inccityabbr",
                      field_type="text")
        # Append cities with fire protection.
        etl.transform(
            arcetl.features.insert_from_path,
            insert_dataset_path=dataset.INCORPORATED_CITY_LIMITS.path(),
            insert_where_sql=city_fire_protection_sql,
        )
        # Set the values for in-city fire protection areas.
        etl.transform(
            arcetl.attributes.update_by_value,
            field_name="fireprottype",
            value="CITY",
            dataset_where_sql="inccityabbr is not null",
        )
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="fireprotprov",
            function=city_provider_code.get,
            field_as_first_arg=False,
            arg_field_names=["inccityabbr"],
            dataset_where_sql="inccityabbr is not null",
        )
        etl.transform(
            arcetl.attributes.update_by_value,
            field_name="taxdist",
            value="Y",
            dataset_where_sql="inccityabbr is not null",
        )
        etl.transform(
            arcetl.attributes.update_by_domain_code,
            field_name="fptypename",
            code_field_name="fireprottype",
            domain_name="FireProtectionType",
            domain_workspace_path=database.LCOGGEO.path,
        )
        # Assign joinable field values city overlays/additions.
        attr_join_key = {
            "fpprovname": "provider_name",
            "contact_phone": "contact_phone",
            "contact_email": "contact_email",
            "contact_mailing_address": "contact_mailing_address",
            "website_link": "website_link",
        }
        for key, join_key in attr_join_key.items():
            etl.transform(
                arcetl.attributes.update_by_joined_value,
                field_name=key,
                join_dataset_path=dataset.FIRE_PROTECTION_PROVIDER.path(),
                join_field_name=join_key,
                on_field_pairs=[("fireprotprov", "provider_code")],
            )
        etl.load(dataset.FIRE_PROTECTION_AREA.path("pub"))
Ejemplo n.º 16
0
def site_address_etl():
    """Run ETL for site addresses."""
    with arcetl.ArcETL("Site Addresses") as etl:
        etl.extract(dataset.SITE_ADDRESS.path("maint"))
        # Clean maintenance values.
        transform.clear_nonpositive(etl, field_names=["house_nbr"])
        transform.clean_whitespace(
            etl,
            field_names=[
                "house_suffix_code",
                "pre_direction_code",
                "street_name",
                "street_type_code",
                "unit_type_code",
                "unit_id",
                "city_name",
                "landuse",
                "maptaxlot",
                "account",
            ],
        )
        transform.force_uppercase(
            etl,
            field_names=[
                "house_suffix_code",
                "pre_direction_code",
                "street_name",
                "street_type_code",
                "unit_type_code",
                "unit_id",
                "maptaxlot",
                "valid",
                "archived",
            ],
        )
        transform.clear_non_numeric_text(etl, field_names=["account"])
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="landuse",
            function=(lambda x: x if is_numeric(x) else "0"),
        )
        transform.force_yn(etl, field_names=["archived"], default="N")
        transform.force_yn(etl, field_names=["valid"], default="Y")
        transform.add_missing_fields(etl, dataset.SITE_ADDRESS, tags=["pub"])
        # Assign geometry attributes.
        coordinate_system_xy_keys = {
            2914: {
                "x": "x_coordinate",
                "y": "y_coordinate"
            },
            4326: {
                "x": "longitude",
                "y": "latitude"
            },
        }
        for spatial_reference_id, xy_key in coordinate_system_xy_keys.items():
            for axis, key in xy_key.items():
                etl.transform(
                    arcetl.attributes.update_by_geometry,
                    field_name=key,
                    spatial_reference_item=spatial_reference_id,
                    geometry_properties=["centroid", axis],
                )
        # Assign overlays.
        overlay_kwargs = [
            # City attributes.
            {
                "field_name": "geocity",
                "overlay_field_name": "inccityabbr",
                "overlay_dataset_path":
                dataset.INCORPORATED_CITY_LIMITS.path(),
            },
            {
                "field_name": "annexhist",
                "overlay_field_name": "annexnum",
                "overlay_dataset_path": dataset.ANNEXATION_HISTORY.path("pub"),
            },
            # Have to do overlay rather than join because some lack codes.
            {
                "field_name": "yearanx",
                "overlay_field_name": "annexyear",
                "overlay_dataset_path": dataset.ANNEXATION_HISTORY.path("pub"),
            },
            {
                "field_name": "ugb",
                "overlay_field_name": "ugbcity",
                "overlay_dataset_path": dataset.UGB.path("pub"),
            },
            # Planning & zoning attributes.
            {
                "field_name":
                "greenwy",
                "overlay_field_name":
                "greenway",
                "overlay_dataset_path":
                dataset.WILLAMETTE_RIVER_GREENWAY.path("pub"),
            },
            {
                "field_name": "nodaldev",
                "overlay_field_name": "nodearea",
                "overlay_dataset_path":
                dataset.NODAL_DEVELOPMENT_AREA.path("pub"),
            },
            {
                "field_name": "plandes_id",
                "overlay_field_name": "plandes_id",
                "overlay_dataset_path": dataset.PLAN_DESIGNATION.path("pub"),
            },
            {
                "field_name": "sprsvcbndy",
                "overlay_field_name": "is_inside",
                "overlay_dataset_path":
                dataset.SPRINGFIELD_HANSEN_EXTENT.path(),
            },
            # Public safety attributes.
            {
                "field_name": "ambulance_district",
                "overlay_field_name": "asacode",
                "overlay_dataset_path":
                dataset.AMBULANCE_SERVICE_AREA.path("pub"),
            },
            {
                "field_name": "firedist",
                "overlay_field_name": "fireprotprov",
                "overlay_dataset_path":
                dataset.FIRE_PROTECTION_AREA.path("pub"),
            },
            {
                "field_name":
                "police_beat",
                "overlay_field_name":
                "CAD",
                "overlay_dataset_path":
                os.path.join(
                    path.LCOG_GIS_PROJECTS,
                    "Public_Safety\\PSAPS\\CLPSAP\\SunGard_CAD\\Maintained_Layers",
                    "Maintained_Layers.gdb\\Fire_Law_Tow\\law_beat",
                ),
            },
            {
                "field_name": "psap_code",
                "overlay_field_name": "psap_code",
                "overlay_dataset_path": dataset.PSAP_AREA.path("pub"),
            },
            # Election attributes.
            {
                "field_name": "electionpr",
                "overlay_field_name": "precntnum",
                "overlay_dataset_path": dataset.ELECTION_PRECINCT.path("pub"),
            },
            {
                "field_name": "ccward",
                "overlay_field_name": "ward",
                "overlay_dataset_path": dataset.CITY_WARD.path(),
            },
            {
                "field_name":
                "clpud_subdivision",
                "overlay_field_name":
                "SUBDIVISIO",
                "overlay_dataset_path":
                os.path.join(
                    path.LCOG_GIS_PROJECTS,
                    "UtilityDistricts\\CentralLincolnPUD\\Redistricting2012",
                    "CLPUD_Subdivisions.shp",
                ),
            },
            {
                "field_name":
                "cocommdist",
                "overlay_field_name":
                "commrdist",
                "overlay_dataset_path":
                (dataset.COUNTY_COMMISSIONER_DISTRICT.path("pub")),
            },
            {
                "field_name": "epud",
                "overlay_field_name": "boardid",
                "overlay_dataset_path": dataset.EPUD_SUBDISTRICT.path("pub"),
            },
            {
                "field_name":
                "hwpud_subdivision",
                "overlay_field_name":
                "BoardZone",
                "overlay_dataset_path":
                os.path.join(
                    path.LCOG_GIS_PROJECTS,
                    "UtilityDistricts\\HecetaWaterPUD\\NewBoardSubzones",
                    "HecetaData.gdb",
                    "ScenarioB",
                ),
            },
            {
                "field_name": "lcczone",
                "overlay_field_name": "lccbrdzone",
                "overlay_dataset_path": dataset.LCC_BOARD_ZONE.path("pub"),
            },
            {
                "field_name": "senatedist",
                "overlay_field_name": "sendist",
                "overlay_dataset_path":
                dataset.STATE_SENATOR_DISTRICT.path("pub"),
            },
            {
                "field_name":
                "strepdist",
                "overlay_field_name":
                "repdist",
                "overlay_dataset_path":
                (dataset.STATE_REPRESENTATIVE_DISTRICT.path("pub")),
            },
            {
                "field_name":
                "swcd",
                "overlay_field_name":
                "swcdist",
                "overlay_dataset_path":
                (dataset.SOIL_WATER_CONSERVATION_DISTRICT.path("pub")),
            },
            {
                "field_name":
                "swcdzone",
                "overlay_field_name":
                "swczone",
                "overlay_dataset_path":
                (dataset.SOIL_WATER_CONSERVATION_DISTRICT.path("pub")),
            },
            # Education attributes.
            {
                "field_name": "schooldist",
                "overlay_field_name": "district",
                "overlay_dataset_path": dataset.SCHOOL_DISTRICT.path("pub"),
            },
            {
                "field_name": "elem",
                "overlay_field_name": "attend",
                "overlay_dataset_path":
                dataset.ELEMENTARY_SCHOOL_AREA.path("pub"),
            },
            {
                "field_name": "middle",
                "overlay_field_name": "attend",
                "overlay_dataset_path": dataset.MIDDLE_SCHOOL_AREA.path("pub"),
            },
            {
                "field_name": "high",
                "overlay_field_name": "attend",
                "overlay_dataset_path": dataset.HIGH_SCHOOL_AREA.path("pub"),
            },
            # Transportation attributes.
            {
                "field_name":
                "ltddist",
                "overlay_field_name":
                "LTD",
                "overlay_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "transport\\ltd\\2012 LTD Boundary.shp"),
            },
            {
                "field_name":
                "ltdridesrc",
                "overlay_field_name":
                "LTD",
                "overlay_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "transport\\ltd\\2015 RideSource Boundary.shp"),
            },
            {
                "field_name":
                "cats",
                "overlay_field_name":
                "CATSBNDY",
                "overlay_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "transport\\eug\\catsbndy.shp"),
            },
            {
                "field_name":
                "trans_analysis_zone",
                "overlay_field_name":
                "TAZ_NUM",
                "overlay_dataset_path":
                os.path.join(path.REGIONAL_DATA, "transport\\MTAZ16.shp"),
            },
            # Natural attributes.
            {
                "field_name":
                "firmnumber",
                "overlay_field_name":
                "firm_pan",
                "overlay_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "natural\\flood\\Flood.gdb\\FIRMPanel"),
            },
            {
                "field_name":
                "soilkey",
                "overlay_field_name":
                "mukey",
                "overlay_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "natural\\soils\\Soils.gdb\\Soil"),
            },
            {
                "field_name":
                "wetland",
                "overlay_field_name":
                "WET_TYPE",
                "overlay_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "natural\\eug\\Wetland\\wetlands.shp"),
            },
            # Census attributes.
            {
                "field_name":
                "ctract",
                "overlay_field_name":
                "TRACT",
                "overlay_dataset_path":
                os.path.join(
                    path.REGIONAL_DATA,
                    "federal\\census\\lane\\2010",
                    "lc_census2010.gdb\\lc_tracts2010",
                ),
            },
            {
                "field_name":
                "blockgr",
                "overlay_field_name":
                "BlockGroup",
                "overlay_dataset_path":
                os.path.join(
                    path.REGIONAL_DATA,
                    "federal\\census\\lane\\2010",
                    "lc_census2010.gdb\\lc_blockgroups2010",
                ),
            },
            # Other district attributes.
            {
                "field_name":
                "neighbor",
                "overlay_field_name":
                "NEIBORHD",
                "overlay_dataset_path":
                os.path.join(
                    path.REGIONAL_DATA,
                    "boundary\\districts\\eug",
                    "Boundary.gdb\\EugNeighborhoods",
                ),
            },
        ]
        for kwargs in overlay_kwargs:
            etl.transform(arcetl.attributes.update_by_overlay,
                          overlay_central_coincident=True,
                          **kwargs)
        # Override overlays for special cases.
        for override in OVERRIDE_ATTRS:
            for kwargs in OVERRIDE_ATTRS[override].get("overlay_kwargs", []):
                etl.transform(arcetl.attributes.update_by_value,
                              dataset_where_sql=OVERRIDE_ATTRS[override].get(
                                  "where_sql"),
                              **kwargs)
        # Clean overlay values.
        transform.clean_whitespace(etl,
                                   field_names=[
                                       "police_beat", "wetland", "ctract",
                                       "blockgr", "neighbor"
                                   ])
        transform.force_uppercase(
            etl, field_names=["cats", "ltddist", "ltdridesrc"])
        # Set default overlay values where missing.
        transform.force_yn(
            etl,
            field_names=[
                "greenwy", "sprsvcbndy", "cats", "ltddist", "ltdridesrc"
            ],
            default="N",
        )
        # Remove invalid overlay values.
        transform.clear_nonpositive(etl, field_names=["ctract", "blockgr"])
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="neighbor",
            function=(lambda x: x if x and int(x) != 99 else None),
        )
        # Assign joinable field values after overlays.
        join_kwargs = [
            # Core attributes.
            {
                "field_name": "pre_direction",
                "join_field_name": "description",
                "join_dataset_path": dataset.STREET_DIRECTION.path(),
                "on_field_pairs": [("pre_direction_code", "code")],
            },
            {
                "field_name": "street_type",
                "join_field_name": "description",
                "join_dataset_path": dataset.STREET_TYPE.path(),
                "on_field_pairs": [("street_type_code", "code")],
            },
            {
                "field_name": "unit_type",
                "join_field_name": "description",
                "join_dataset_path": dataset.UNIT_TYPE.path(),
                "on_field_pairs": [("unit_type_code", "code")],
            },
            {
                "field_name": "city_name_abbr",
                "join_field_name": "CityNameAbbr",
                "join_dataset_path": dataset.CITY.path(),
                "on_field_pairs": [("city_name", "CityName")],
            },
            # Extended attributes.
            {
                "field_name": "five_digit_zip_code",
                "join_field_name": "zip_code",
                "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(),
                "on_field_pairs": [("geofeat_id", "geofeat_id")],
            },
            # Any addresses not assigned zip from USPS gets an overlay zip.
            {
                "field_name": "five_digit_zip_code",
                "dataset_where_sql": "five_digit_zip_code is null",
                "join_field_name": "zip_code_overlay",
                "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(),
                "on_field_pairs": [("geofeat_id", "geofeat_id")],
            },
            {
                "field_name": "four_digit_zip_code",
                "join_field_name": "plus_four_code",
                "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(),
                "on_field_pairs": [("geofeat_id", "geofeat_id")],
            },
            {
                "field_name": "usps_delivery_point_code",
                "join_field_name": "delivery_point_code",
                "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(),
                "on_field_pairs": [("geofeat_id", "geofeat_id")],
            },
            {
                "field_name": "postal_carrier_route",
                "join_field_name": "carrier_route",
                "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(),
                "on_field_pairs": [("geofeat_id", "geofeat_id")],
            },
            {
                "field_name": "usps_is_cmra",
                "join_field_name": "is_cmra",
                "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(),
                "on_field_pairs": [("geofeat_id", "geofeat_id")],
            },
            {
                "field_name": "usps_is_vacant",
                "join_field_name": "is_vacant",
                "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(),
                "on_field_pairs": [("geofeat_id", "geofeat_id")],
            },
            {
                "field_name": "usps_has_mail_service",
                "join_field_name": "has_mail_service",
                "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(),
                "on_field_pairs": [("geofeat_id", "geofeat_id")],
            },
            {
                "field_name": "landuse_desc",
                "join_field_name": "ludesc",
                "join_dataset_path":
                dataset.LAND_USE_CODES_DETAILED.path("pub"),
                "on_field_pairs": [("landuse", "landusec")],
            },
            {
                "field_name": "usecode",
                "join_field_name": "usecode",
                "join_dataset_path":
                dataset.LAND_USE_CODES_DETAILED.path("pub"),
                "on_field_pairs": [("landuse", "landusec")],
            },
            {
                "field_name": "usedesc",
                "join_field_name": "ucname",
                "join_dataset_path":
                dataset.LAND_USE_CODES_USE_CODES.path("pub"),
                "on_field_pairs": [("usecode", "usecode")],
            },
            # A&T attributes.
            {
                "field_name": "tca",
                "join_field_name": "tax_code_overlay",
                "join_dataset_path": dataset.ADDRESS_ASSESS_TAX_INFO.path(),
                "on_field_pairs": [("geofeat_id", "geofeat_id")],
            },
            # City attributes.
            {
                "field_name": "geocity_name",
                "join_field_name": "inccityname",
                "join_dataset_path": dataset.INCORPORATED_CITY_LIMITS.path(),
                "on_field_pairs": [("geocity", "inccityabbr")],
            },
            {
                "field_name": "ugb_city_name",
                "join_field_name": "ugbcityname",
                "join_dataset_path": dataset.UGB.path("pub"),
                "on_field_pairs": [("ugb", "ugbcity")],
            },
            # Planning & zoning attributes.
            {
                "field_name": "nodaldev_name",
                "join_field_name": "nodename",
                "join_dataset_path":
                dataset.NODAL_DEVELOPMENT_AREA.path("pub"),
                "on_field_pairs": [("nodaldev", "nodearea")],
            },
            {
                "field_name": "plandesjuris",
                "join_field_name": "planjuris",
                "join_dataset_path": dataset.PLAN_DESIGNATION.path("pub"),
                "on_field_pairs": [("plandes_id", "plandes_id")],
            },
            {
                "field_name": "plandes",
                "join_field_name": "plandes",
                "join_dataset_path": dataset.PLAN_DESIGNATION.path("pub"),
                "on_field_pairs": [("plandes_id", "plandes_id")],
            },
            {
                "field_name": "plandesdesc",
                "join_field_name": "plandesnam",
                "join_dataset_path": dataset.PLAN_DESIGNATION.path("pub"),
                "on_field_pairs": [("plandes_id", "plandes_id")],
            },
            # Public safety attributes.
            {
                "field_name": "ambulance_service_area",
                "join_field_name": "asa",
                "join_dataset_path":
                dataset.AMBULANCE_SERVICE_AREA.path("pub"),
                "on_field_pairs": [("ambulance_district", "asacode")],
            },
            {
                "field_name": "ambulance_service_provider",
                "join_field_name": "provider",
                "join_dataset_path":
                dataset.AMBULANCE_SERVICE_AREA.path("pub"),
                "on_field_pairs": [("ambulance_district", "asacode")],
            },
            {
                "field_name": "fire_protection_provider",
                "join_field_name": "fpprovname",
                "join_dataset_path": dataset.FIRE_PROTECTION_AREA.path("pub"),
                "on_field_pairs": [("firedist", "fireprotprov")],
            },
            {
                "field_name": "psap_name",
                "join_field_name": "psap_name",
                "join_dataset_path": dataset.PSAP_AREA.path("pub"),
                "on_field_pairs": [("psap_code", "psap_code")],
            },
            {
                "field_name":
                "emergency_service_number",
                "join_field_name":
                "emergency_service_number",
                "join_dataset_path":
                dataset.EMERGENCY_SERVICE_NUMBER.path(),
                "on_field_pairs": [
                    # City used as proxy for police.
                    ("geocity", "city_limits"),
                    ("ambulance_district", "asa_code"),
                    ("firedist", "fire_district"),
                    ("psap_code", "psap_code")
                ],
            },
            {
                "field_name":
                "emergency_service_number",
                "join_field_name":
                "emergency_service_number",
                "join_dataset_path":
                dataset.EMERGENCY_SERVICE_NUMBER.path(),
                "on_field_pairs": [
                    # City used as proxy for police.
                    ("geocity", "city_limits"),
                    ("ambulance_district", "asa_code"),
                    ("firedist", "fire_district"),
                ],
                "dataset_where_sql":
                "emergency_service_number is null",
            },
            # Election attributes.
            {
                "field_name": "city_councilor",
                "join_field_name": "councilor",
                "join_dataset_path": dataset.CITY_WARD.path(),
                "on_field_pairs": [("ccward", "ward")],
            },
            {
                "field_name":
                "cocommdist_name",
                "join_field_name":
                "cmdistname",
                "join_dataset_path":
                dataset.COUNTY_COMMISSIONER_DISTRICT.path("pub"),
                "on_field_pairs": [("cocommdist", "commrdist")],
            },
            {
                "field_name":
                "county_commissioner",
                "join_field_name":
                "commrname",
                "join_dataset_path":
                dataset.COUNTY_COMMISSIONER_DISTRICT.path("pub"),
                "on_field_pairs": [("cocommdist", "commrdist")],
            },
            {
                "field_name": "eweb_commissioner_name",
                "join_field_name": "eweb_commissioner_name",
                "join_dataset_path": dataset.EWEB_COMMISSIONER.path("pub"),
                "on_field_pairs": [("ccward", "city_council_ward")],
            },
            {
                "field_name":
                "state_representative",
                "join_field_name":
                "repname",
                "join_dataset_path":
                dataset.STATE_REPRESENTATIVE_DISTRICT.path("pub"),
                "on_field_pairs": [("strepdist", "repdist")],
            },
            {
                "field_name": "state_senator",
                "join_field_name": "senname",
                "join_dataset_path":
                dataset.STATE_SENATOR_DISTRICT.path("pub"),
                "on_field_pairs": [("senatedist", "sendist")],
            },
            # Education attributes.
            {
                "field_name": "schooldist_name",
                "join_field_name": "names",
                "join_dataset_path": dataset.SCHOOL_DISTRICT.path("pub"),
                "on_field_pairs": [("schooldist", "district")],
            },
            {
                "field_name": "elem_name",
                "join_field_name": "elem_school",
                "join_dataset_path":
                dataset.ELEMENTARY_SCHOOL_AREA.path("pub"),
                "on_field_pairs": [("elem", "attend")],
            },
            {
                "field_name": "middle_name",
                "join_field_name": "middle_school",
                "join_dataset_path": dataset.MIDDLE_SCHOOL_AREA.path("pub"),
                "on_field_pairs": [("middle", "attend")],
            },
            {
                "field_name": "high_name",
                "join_field_name": "high_school",
                "join_dataset_path": dataset.HIGH_SCHOOL_AREA.path("pub"),
                "on_field_pairs": [("high", "attend")],
            },
            # Natural attributes.
            {
                "field_name":
                "firmprinted",
                "join_field_name":
                "panel_printed",
                "join_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "natural\\flood\\Flood.gdb\\FIRMPanel"),
                "on_field_pairs": [("firmnumber", "firm_pan")],
            },
            {
                "field_name":
                "firm_community_id",
                "join_field_name":
                "com_nfo_id",
                "join_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "natural\\flood\\Flood.gdb\\CommunityInfo"),
                "on_field_pairs": [("geocity", "community_code")],
            },
            {
                "field_name":
                "firm_community_post_firm_date",
                "join_field_name":
                "in_frm_dat",
                "join_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "natural\\flood\\Flood.gdb\\CommunityInfo"),
                "on_field_pairs": [("geocity", "community_code")],
            },
            {
                "field_name":
                "soiltype",
                "join_field_name":
                "musym",
                "join_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "natural\\soils\\Soils.gdb\\MUAggAtt"),
                "on_field_pairs": [("soilkey", "mukey")],
            },
            # Other district attributes.
            {
                "field_name":
                "neighborhood_name",
                "join_field_name":
                "NAME",
                "join_dataset_path":
                os.path.join(
                    path.REGIONAL_DATA,
                    "boundary\\districts\\eug\\Boundary.gdb\\EugNeighborhoods",
                ),
                "on_field_pairs": [("neighbor", "NEIBORHD")],
            },
        ]
        for kwargs in join_kwargs:
            etl.transform(arcetl.attributes.update_by_joined_value, **kwargs)
        # Clean join values.
        transform.clean_whitespace(etl, field_names=["neighborhood_name"])
        # Remove Metro Plan designations, per City of Eugene request.
        transform.clear_all_values(
            etl,
            field_names=["plandes", "plandesdesc"],
            dataset_where_sql="plandesjuris = 'MTP'",
        )
        # Remove +4 ZIP where initial ZIP is missing.
        transform.clear_all_values(
            etl,
            field_names=["four_digit_zip_code"],
            dataset_where_sql="five_digit_zip_code is null",
        )
        # Assign constants.
        constant_kwargs = [
            {
                "field_name": "state_code",
                "value": "OR"
            },
            {
                "field_name": "state_name",
                "value": "Oregon"
            },
            {
                "field_name": "county_name",
                "value": "Lane"
            },
        ]
        for kwargs in constant_kwargs:
            etl.transform(arcetl.attributes.update_by_value, **kwargs)
        # Override constants for special cases.
        for override in OVERRIDE_ATTRS:
            for kwargs in OVERRIDE_ATTRS[override].get("constant_kwargs", []):
                etl.transform(arcetl.attributes.update_by_value,
                              dataset_where_sql=OVERRIDE_ATTRS[override].get(
                                  "where_sql"),
                              **kwargs)
        # Build values from functions.
        function_kwargs = [
            {
                "field_name":
                "street_name_full",
                "function":
                concatenate_arguments,
                "arg_field_names": [
                    "pre_direction_code",
                    "street_name",
                    "street_type_code",
                ],
            },
            {
                "field_name":
                "city_state_zip",
                "function":
                city_state_zip,
                "kwarg_field_names":
                ["city_name", "state_code", "five_digit_zip_code"],
            },
            {
                "field_name":
                "concat_address_no_unit",
                "function":
                concatenate_arguments,
                "arg_field_names": [
                    "house_nbr",
                    "house_suffix_code",
                    "street_name_full",
                ],
            },
            {
                "field_name":
                "concat_address",
                "function":
                concatenate_arguments,
                "arg_field_names": [
                    "concat_address_no_unit",
                    "unit_type_code",
                    "unit_id",
                ],
            },
            {
                "field_name":
                "concat_address_no_direction",
                "function":
                concatenate_arguments,
                "arg_field_names": [
                    "house_nbr",
                    "house_suffix_code",
                    "street_name",
                    "street_type_code",
                    "unit_type_code",
                    "unit_id",
                ],
            },
            {
                "field_name":
                "concat_address_full",
                "function":
                concat_address_full,
                "kwarg_field_names": [
                    "concat_address",
                    "city_name",
                    "state_code",
                    "five_digit_zip_code",
                    "four_digit_zip_code",
                ],
            },
            {
                "field_name": "mapnumber",
                "function": (lambda x: x[:8] if x else None),
                "arg_field_names": ["maptaxlot"],
            },
            {
                "field_name": "taxlot",
                "function": (lambda x: x[-5:] if x else None),
                "arg_field_names": ["maptaxlot"],
            },
            {
                "field_name": "maptaxlot_hyphen",
                "function": maptaxlot_separated,
                "arg_field_names": ["maptaxlot"],
            },
        ]
        for kwargs in function_kwargs:
            etl.transform(arcetl.attributes.update_by_function,
                          field_as_first_arg=False,
                          **kwargs)
        # Take care of addresses flagged not to update in publication.
        ids = {}
        id_set_kwargs = {
            "in_publication": {
                "dataset_path": dataset.SITE_ADDRESS.path("pub")
            },
            "in_transform": {
                "dataset_path": etl.transform_path
            },
            "no_update": {
                "dataset_path": dataset.ADDRESS_ISSUES.path(),
                "dataset_where_sql": "update_publication = 0",
            },
        }
        for key, kwargs in id_set_kwargs.items():
            ids[key] = set(_id for _id, in arcetl.attributes.as_iters(
                field_names="site_address_gfid", **kwargs))
        ids["rollback"] = ids["no_update"] & ids["in_transform"] & ids[
            "in_publication"]
        ids["hold"] = ids["no_update"] & (ids["in_transform"] -
                                          ids["in_publication"])
        rollback_features = [
            feat for feat in arcetl.attributes.as_dicts(
                dataset.SITE_ADDRESS.path("pub"))
            if feat["site_address_gfid"] in ids["rollback"]
        ]
        # Strip OIDs (not part of update).
        for feat in rollback_features:
            del feat["oid@"]
        if rollback_features:
            etl.transform(
                arcetl.features.update_from_dicts,
                update_features=rollback_features,
                id_field_names="site_address_gfid",
                field_names=rollback_features[0].keys(),
                delete_missing_features=False,
            )
        etl.transform(
            arcetl.features.delete_by_id,
            delete_ids=ids["hold"],
            id_field_names="site_address_gfid",
        )
        LOG.info("%s addresses held from publication", len(ids["hold"]))
        LOG.info("%s addresses rolled-back from publication",
                 len(ids["rollback"]))
        if any([ids["hold"], ids["rollback"]]):
            send_publication_issues_message()
        etl.load(dataset.SITE_ADDRESS.path("pub"))
    send_new_lincom_address_message()
Ejemplo n.º 17
0
def land_use_area_etl():
    """Run ETL for land use areas."""
    with arcetl.ArcETL("Land Use Areas") as etl:
        etl.extract(dataset.LAND_USE_AREA.path("maint"))
        # Clean maintenance values.
        transform.clean_whitespace(etl, field_names=["maptaxlot"])
        transform.clear_non_numeric_text(etl, field_names=["maptaxlot"])
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="landuse",
            function=(lambda x: 0 if x is None or x < 0 else x),
        )
        # Remove features with missing core identifiers.
        for name in dataset.LAND_USE_AREA.id_field_names:
            etl.transform(arcetl.features.delete,
                          dataset_where_sql="{} is null".format(name))
        # Dissolve on core maintenance fields that are used in publication.
        etl.transform(
            arcetl.features.dissolve,
            dissolve_field_names=dataset.LAND_USE_AREA.id_field_names,
            tolerance=TOLERANCE["xy"],
        )
        transform.add_missing_fields(etl, dataset.LAND_USE_AREA, tags=["pub"])
        # Assign geometry attributes.
        coordinate_system_xy_keys = {
            2914: {
                "x": "xcoord",
                "y": "ycoord"
            },
            4326: {
                "x": "longitude",
                "y": "latitude"
            },
        }
        for spatial_reference_id, xy_key in coordinate_system_xy_keys.items():
            for axis, key in xy_key.items():
                etl.transform(
                    arcetl.attributes.update_by_geometry,
                    field_name=key,
                    spatial_reference_item=spatial_reference_id,
                    geometry_properties=["centroid", axis],
                )
        # Assign overlays.
        overlay_kwargs = [
            # City attributes.
            {
                "field_name": "geocity",
                "overlay_field_name": "inccityabbr",
                "overlay_dataset_path":
                dataset.INCORPORATED_CITY_LIMITS.path(),
            },
            {
                "field_name": "yearanx",
                "overlay_field_name": "annexyear",
                "overlay_dataset_path": dataset.ANNEXATION_HISTORY.path("pub"),
            },
            {
                "field_name": "ugb",
                "overlay_field_name": "ugbcity",
                "overlay_dataset_path": dataset.UGB.path("pub"),
            },
            # Planning & zoning attributes.
            {
                "field_name":
                "greenwy",
                "overlay_field_name":
                "greenway",
                "overlay_dataset_path":
                dataset.WILLAMETTE_RIVER_GREENWAY.path("pub"),
            },
            # Public safety attributes.
            {
                "field_name": "firedist",
                "overlay_field_name": "fireprotprov",
                "overlay_dataset_path":
                dataset.FIRE_PROTECTION_AREA.path("pub"),
            },
            # Election attributes.
            {
                "field_name": "lcczone",
                "overlay_field_name": "lccbrdzone",
                "overlay_dataset_path": dataset.LCC_BOARD_ZONE.path("pub"),
            },
            # Education attributes.
            {
                "field_name": "elem",
                "overlay_field_name": "attend",
                "overlay_dataset_path":
                dataset.ELEMENTARY_SCHOOL_AREA.path("pub"),
            },
            {
                "field_name": "middle",
                "overlay_field_name": "attend",
                "overlay_dataset_path": dataset.MIDDLE_SCHOOL_AREA.path("pub"),
            },
            {
                "field_name": "high",
                "overlay_field_name": "attend",
                "overlay_dataset_path": dataset.HIGH_SCHOOL_AREA.path("pub"),
            },
            # Transportation attributes.
            {
                "field_name":
                "ltddist",
                "overlay_field_name":
                "LTD",
                "overlay_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "transport\\ltd\\2012 LTD Boundary.shp"),
            },
            # Natural attributes.
            {
                "field_name":
                "flood",
                "overlay_field_name":
                "fld_zone",
                "overlay_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "natural\\flood\\Flood.gdb\\FloodHazardArea"),
            },
            # Census attributes.
            {
                "field_name":
                "ctract",
                "overlay_field_name":
                "TRACT",
                "overlay_dataset_path":
                os.path.join(
                    path.REGIONAL_DATA,
                    "federal\\census\\lane\\2010\\lc_census2010.gdb\\lc_tracts2010",
                ),
            },
            {
                "field_name":
                "blockgr",
                "overlay_field_name":
                "BlockGroup",
                "overlay_dataset_path":
                os.path.join(
                    path.REGIONAL_DATA,
                    "federal\\census\\lane\\2010\\lc_census2010.gdb",
                    "lc_blockgroups2010",
                ),
            },
            # Other district attributes.
            {
                "field_name":
                "neighbor",
                "overlay_field_name":
                "NEIBORHD",
                "overlay_dataset_path":
                os.path.join(
                    path.REGIONAL_DATA,
                    "boundary\\districts\\eug\\Boundary.gdb\\EugNeighborhoods",
                ),
            },
        ]
        for kwargs in overlay_kwargs:
            etl.transform(arcetl.attributes.update_by_overlay,
                          overlay_central_coincident=True,
                          **kwargs)
        # Clean overlay values.
        transform.clean_whitespace(
            etl, field_names=["ctract", "blockgr", "neighbor"])
        transform.force_uppercase(etl, field_names=["ltddist"])
        # Set default overlay values where missing.
        transform.force_yn(etl,
                           field_names=["greenwy", "ltddist"],
                           default="N")
        # Remove invalid overlay values.
        transform.clear_nonpositive(etl, field_names=["ctract", "blockgr"])
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="neighbor",
            function=(lambda x: x if x and int(x) != 99 else None),
        )
        # Assign joinable field values after overlays.
        join_kwargs = [
            # Core attributes.
            {
                "field_name": "landusedes",
                "join_field_name": "ludesc",
                "join_dataset_path":
                dataset.LAND_USE_CODES_DETAILED.path("pub"),
                "on_field_pairs": [("landuse", "landuse")],
            },
            {
                "field_name": "usecode",
                "join_field_name": "usecode",
                "join_dataset_path":
                dataset.LAND_USE_CODES_DETAILED.path("pub"),
                "on_field_pairs": [("landuse", "landuse")],
            },
            {
                "field_name": "usecodedes",
                "join_field_name": "ucname",
                "join_dataset_path":
                dataset.LAND_USE_CODES_USE_CODES.path("pub"),
                "on_field_pairs": [("usecode", "usecode")],
            },
        ]
        for kwargs in join_kwargs:
            etl.transform(arcetl.attributes.update_by_joined_value, **kwargs)
        # Build values from functions.
        function_kwargs = [
            {
                "field_name": "mapnumber",
                "function": (lambda x: x[:8] if x else None),
                "arg_field_names": ["maptaxlot"],
            },
            {
                "field_name": "taxlot",
                "function": (lambda x: x[-5:] if x else None),
                "arg_field_names": ["maptaxlot"],
            },
            {
                "field_name": "maptaxlot_hyphen",
                "function": maptaxlot_separated,
                "arg_field_names": ["maptaxlot"],
            },
        ]
        for kwargs in function_kwargs:
            etl.transform(arcetl.attributes.update_by_function,
                          field_as_first_arg=False,
                          **kwargs)
        # Build values from mappings.
        mapping_kwargs = [
            {
                "field_name": "units",
                "mapping": total_units,
                "key_field_names": ["maptaxlot", "landuse"],
            },
            {
                "field_name": "acres",
                "mapping": total_acres,
                "key_field_names": ["maptaxlot", "landuse"],
            },
        ]
        for kwargs in mapping_kwargs:
            etl.transform(arcetl.attributes.update_by_mapping, **kwargs)
        etl.transform(
            arcetl.attributes.update_by_feature_match,
            field_name="landusecount",
            id_field_names=["maptaxlot"],
            update_type="match_count",
        )
        etl.load(dataset.LAND_USE_AREA.path("pub"))