Python clean_whitespace Exemples, helper.transform.clean_whitespace Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : exec_planning_development_datasets.py Projet : denkide/ColumbiaCarto

def plan_designation_county_etl():
    """Run ETL for county plan designations."""
    with arcetl.ArcETL("County Plan Designations") as etl:
        etl.extract(dataset.PLAN_DESIGNATION_COUNTY.path("maint"))
        transform.add_missing_fields(etl,
                                     dataset.PLAN_DESIGNATION_COUNTY,
                                     tags=["pub"])
        etl.transform(arcetl.attributes.update_by_value,
                      field_name="planjuris",
                      value="LC")
        for new_name, old_name in [("plandes", "ZONE_"),
                                   ("plandesnam", "ZONE_NAME")]:
            etl.transform(
                arcetl.attributes.update_by_function,
                field_name=new_name,
                function=(lambda x: x),
                field_as_first_arg=False,
                arg_field_names=[old_name],
            )
        # Remove county designations where city ones exist.
        etl.transform(
            arcetl.features.erase,
            erase_dataset_path=dataset.PLAN_DESIGNATION_CITY.path("pub"),
        )
        transform.clean_whitespace(
            etl,
            field_names=["planjuris", "plandes", "plandesnam", "finalorder"])
        etl.transform(arcetl.features.delete,
                      dataset_where_sql="plandes is null")
        etl.transform(
            arcetl.features.dissolve,
            dissolve_field_names=dataset.PLAN_DESIGNATION_COUNTY.field_names,
            tolerance=TOLERANCE["xy"],
        )
        etl.load(dataset.PLAN_DESIGNATION_COUNTY.path("pub"))

Exemple #2

0

Afficher le fichier

Fichier : exec_assess_tax_datasets.py Projet : denkide/ColumbiaCarto

def tax_code_area_etl():
    """Run ETL for tax code areas."""
    with arcetl.ArcETL("Tax Code Areas") as etl:
        etl.extract(dataset.TAX_CODE_AREA.path("maint"))
        transform.clean_whitespace(
            etl, field_names=["taxcode", "source", "ordinance", "schooldist"])
        etl.transform(
            arcetl.features.dissolve,
            dissolve_field_names=dataset.TAX_CODE_AREA.field_names,
            tolerance=TOLERANCE["xy"],
        )
        etl.load(dataset.TAX_CODE_AREA.path("pub"))

Exemple #3

0

Afficher le fichier

Fichier : exec_address_datasets.py Projet : denkide/ColumbiaCarto

def facility_etl():
    """Run ETL for facilities.

    Currently only undertaken for other ETL purposes--not publication.
    """
    with arcetl.ArcETL("Facilities") as etl:
        etl.extract(dataset.FACILITY.path("maint"))
        etl.transform(
            arcetl.dataset.rename_field,
            field_name="geofeat_id",
            new_field_name="address_intid",
        )
        # Clean maintenance values.
        transform.clear_nonpositive(etl, field_names=["address_intid"])
        transform.clean_whitespace(
            etl, field_names=["label", "label_full", "type", "type_full"])
        transform.force_lowercase(etl, field_names=["type"])
        transform.force_uppercase(etl, field_names=["label"])
        transform.add_missing_fields(etl, dataset.FACILITY, tags=["pub"])
        # Assign geometry attributes.
        coordinate_system_xy_keys = {
            2914: {
                "x": "x_coordinate",
                "y": "y_coordinate"
            },
            4326: {
                "x": "longitude",
                "y": "latitude"
            },
        }
        for spatial_reference_id, xy_key in coordinate_system_xy_keys.items():
            for axis, key in xy_key.items():
                etl.transform(
                    arcetl.attributes.update_by_geometry,
                    field_name=key,
                    spatial_reference_item=spatial_reference_id,
                    geometry_properties=["centroid", axis],
                )
        etl.transform(
            arcetl.attributes.update_by_mapping,
            field_name="address_uuid",
            mapping=address_intid_to_uuid_map,
            key_field_names=["address_intid"],
        )
        etl.load(dataset.FACILITY.path("pub"))

Exemple #4

0

Afficher le fichier

Fichier : exec_assess_tax_datasets.py Projet : denkide/ColumbiaCarto

def plat_etl():
    """Run ETL for plats."""
    with arcetl.ArcETL("Plats") as etl:
        etl.extract(dataset.PLAT.path("maint"))
        transform.clean_whitespace(etl, field_names=["platname", "docnumber"])
        transform.force_uppercase(etl, field_names=["platname"])
        transform.clear_nonpositive(etl, field_names=["agencydocn"])
        pub_field_names = {
            field["name"]
            for field in dataset.PLAT.fields if "pub" in field["tags"]
        }
        etl.transform(
            arcetl.features.delete,
            dataset_where_sql=" and ".join("{} is null".format(name)
                                           for name in pub_field_names),
        )
        etl.transform(
            arcetl.features.dissolve,
            dissolve_field_names=pub_field_names,
            tolerance=TOLERANCE["xy"],
        )
        etl.load(dataset.PLAT.path("pub"))

Exemple #5

0

Afficher le fichier

Fichier : exec_assess_tax_datasets.py Projet : denkide/ColumbiaCarto

def plss_dlc_etl():
    """Run ETL for PLSS donation land claims."""
    with arcetl.ArcETL("PLSS Donation Land Claims") as etl:
        etl.extract(dataset.PLSS_DLC.path("maint"))
        transform.clean_whitespace(etl, field_names=["name", "trs"])
        transform.add_missing_fields(etl, dataset.PLSS_DLC, tags=["pub"])
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="dlcname",
            function=(lambda x: x),
            field_as_first_arg=False,
            arg_field_names=["NAME"],
        )
        etl.transform(
            arcetl.features.dissolve,
            dissolve_field_names=[
                field["name"] for field in dataset.PLSS_DLC.fields
                if "pub" in field["tags"]
            ],
            tolerance=TOLERANCE["xy"],
        )
        etl.load(dataset.PLSS_DLC.path("pub"))

Exemple #6

0

Afficher le fichier

Fichier : exec_planning_development_datasets.py Projet : denkide/ColumbiaCarto

def zoning_county_etl():
    """Run ETL for county zoning."""
    overlay_field_names = [
        name for name in dataset.ZONING_COUNTY.field_names
        if name.lower().startswith("over")
    ]
    with arcetl.ArcETL("County Zoning") as etl:
        etl.extract(dataset.ZONING_COUNTY.path("maint"))
        etl.transform(
            arcetl.features.insert_from_path,
            insert_dataset_path=dataset.ZONING_COUNTY.path("insert"),
        )
        transform.add_missing_fields(etl, dataset.ZONING_COUNTY, tags=["pub"])
        for new_name, old_name in [("zonecode", "ZONE_"),
                                   ("zonename", "ZONE_NAME")]:
            etl.transform(
                arcetl.attributes.update_by_function,
                field_name=new_name,
                function=(lambda x: x),
                field_as_first_arg=False,
                arg_field_names=[old_name],
            )
        # UGB zoning has slightly different names. We want to standardize on the main
        # zoning dataset names.
        etl.transform(
            arcetl.attributes.update_by_mapping,
            field_name="zonename",
            mapping=county_zone_name_map,
            key_field_names="zonecode",
        )
        # Clean maintenance values.
        transform.clean_whitespace(etl, field_names=["zonecode", "zonename"])
        etl.transform(arcetl.features.delete,
                      dataset_where_sql="zonecode is null")
        # Remove county zoning where city ones exist.
        etl.transform(arcetl.features.erase,
                      erase_dataset_path=dataset.ZONING_CITY.path("pub"))
        # Assign zoning overlays.
        identity_kwargs = [
            {
                "field_name":
                "coastalzonecode",
                "identity_field_name":
                "TYPE",
                "identity_dataset_path":
                os.path.join(LANE_ZONING_STAGING_PATH, "coastal_zones.shp"),
            },
            {
                "field_name":
                "overas",
                "identity_field_name":
                "AIRPORT",
                "identity_dataset_path":
                os.path.join(LANE_ZONING_STAGING_PATH, "aszone.shp"),
                "replacement_value":
                "Y",
            },
            {
                "field_name":
                "overcas",
                "identity_field_name":
                "AIRPORT",
                "identity_dataset_path":
                os.path.join(LANE_ZONING_STAGING_PATH, "caszone.shp"),
                "replacement_value":
                "Y",
            },
            {
                "field_name":
                "overdms",
                "identity_field_name":
                "TYPE",
                "identity_dataset_path":
                os.path.join(LANE_ZONING_STAGING_PATH, "dredge_sites.shp"),
                "replacement_value":
                "Y",
            },
            {
                "field_name":
                "overbd",
                "identity_field_name":
                "Shape_Leng",
                "identity_dataset_path":
                os.path.join(LANE_ZONING_STAGING_PATH, "beach_dune.shp"),
                "replacement_value":
                "Y",
            },
            {
                "field_name":
                "overu",
                "identity_field_name":
                "urban",
                "identity_dataset_path":
                os.path.join(LANE_ZONING_STAGING_PATH, "interim_urban.shp"),
                "replacement_value":
                "Y",
            },
        ]
        for kwargs in identity_kwargs:
            etl.transform(arcetl.geoset.identity, **kwargs)
        # Clean identity values.
        transform.clean_whitespace(etl, field_names=["coastalzonecode"])
        etl.transform(arcetl.attributes.update_by_value,
                      field_name="zonejuris",
                      value="LC")
        etl.transform(
            arcetl.features.dissolve,
            dissolve_field_names=[
                field["name"] for field in dataset.ZONING_COUNTY.fields
                if "pub" in field["tags"]
            ],
            tolerance=TOLERANCE["xy"],
        )
        # Assign the overlay flags dependent on coastal zone code.
        for code in ["CE", "DE", "MD", "NE", "NRC", "PW", "RD", "SN"]:
            etl.transform(
                arcetl.attributes.update_by_function,
                field_name="over{}".format(code.lower()),
                function=(lambda czc, c=code: "Y" if czc == c else "N"),
                field_as_first_arg=False,
                arg_field_names=["coastalzonecode"],
            )
        transform.force_uppercase(etl, overlay_field_names)
        transform.force_yn(etl, overlay_field_names, default="N")
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="alloverlays",
            function=concatenate_zoning_overlays,
            field_as_first_arg=False,
            kwarg_field_names=overlay_field_names,
        )
        etl.load(dataset.ZONING_COUNTY.path("pub"))

Exemple #7

0

Afficher le fichier

Fichier : exec_planning_development_datasets.py Projet : denkide/ColumbiaCarto

def zoning_city_etl():
    """Run ETL for city zoning."""
    overlay_field_names = [
        name for name in dataset.ZONING_CITY.field_names
        if name.lower().startswith("over")
    ]
    with arcetl.ArcETL("City Zoning") as etl:
        etl.init_schema(dataset.ZONING_CITY.path("pub"))
        for _path in dataset.ZONING_CITY.path("inserts"):
            etl.transform(arcetl.features.insert_from_path,
                          insert_dataset_path=_path)
        # Clean maintenance values.
        transform.force_uppercase(etl, overlay_field_names)
        transform.force_yn(etl, overlay_field_names, default="N")
        etl.transform(arcetl.features.delete,
                      dataset_where_sql="zonecode is null")
        etl.transform(
            arcetl.features.dissolve,
            dissolve_field_names=dataset.ZONING_CITY.field_names,
            tolerance=TOLERANCE["xy"],
        )
        juris_domain = {
            "COB": "CoburgZoning",
            "COT": "CottageGroveZoning",
            "CRE": "CreswellZoning",
            "DUN": "DunesCityZoning",
            "EUG": "EugeneZoning",
            "FLO": "FlorenceZoning",
            "JUN": "JunctionCityZoning",
            "LOW": "LowellZoning",
            "OAK": "OakridgeZoning",
            "SPR": "SpringfieldZoning",
            "VEN": "VenetaZoning",
            "WES": "WestfirZoning",
        }
        for juris_code, domain_name in juris_domain.items():
            etl.transform(
                arcetl.attributes.update_by_domain_code,
                field_name="zonename",
                code_field_name="zonecode",
                domain_name=domain_name,
                domain_workspace_path=database.LCOGGEO.path,
                dataset_where_sql="zonejuris = '{}'".format(juris_code),
            )
        etl.transform(
            arcetl.attributes.update_by_domain_code,
            field_name="subareaname",
            code_field_name="subarea",
            domain_name="EugeneZoningSubarea",
            domain_workspace_path=database.LCOGGEO.path,
        )
        # Clean domain-derived values.
        transform.clean_whitespace(etl,
                                   field_names=["zonename", "subareaname"])
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="alloverlays",
            function=concatenate_zoning_overlays,
            field_as_first_arg=False,
            kwarg_field_names=overlay_field_names,
        )
        etl.load(dataset.ZONING_CITY.path("pub"))

Exemple #8

0

Afficher le fichier

Fichier : oem_exec_tillamook_datasets.py Projet : denkide/ColumbiaCarto

def address_point_etl():
    """Run ETL for address points."""
    with arcetl.ArcETL("Address Points") as etl:
        etl.extract(dataset.TILLAMOOK_ADDRESS_POINT.path("maint"))
        # Remove addresses flagged in validationas "not OK to publish".
        etl.transform(
            arcetl.dataset.join_field,
            join_dataset_path=dataset.TILLAMOOK_ADDRESS_POINT_ISSUES.path(),
            join_field_name="ok_to_publish",
            on_field_name="address_id",
            on_join_field_name="address_id",
        )
        etl.transform(arcetl.features.delete,
                      dataset_where_sql="ok_to_publish = 0")
        etl.transform(arcetl.dataset.delete_field, field_name="ok_to_publish")
        # Clean maintenance values.
        transform.clear_nonpositive(etl, field_names=["stnum"])
        transform.clean_whitespace(
            etl,
            field_names=[
                "stnumsuf",
                "predir",
                "name",
                "type",
                "sufdir",
                "unit_type",
                "unit",
                "postcomm",
                "zip",
                "county",
            ],
        )
        transform.force_uppercase(
            etl,
            field_names=[
                "stnumsuf",
                "predir",
                "name",
                "type",
                "unit_type",
                "unit",
                "postcomm",
                "county",
                "valid",
                "archived",
                "confidence",
            ],
        )
        transform.clear_non_numeric_text(etl, field_names=["zip"])
        transform.force_yn(etl, field_names=["archived"], default="N")
        transform.force_yn(etl, field_names=["valid"], default="Y")
        transform.add_missing_fields(etl,
                                     dataset.TILLAMOOK_ADDRESS_POINT,
                                     tags=["pub"])
        # Assign geometry attributes.
        for x_name, y_name, srid in [("lon", "lat", 4326)]:
            for name, axis in [(x_name, "x"), (y_name, "y")]:
                etl.transform(
                    arcetl.attributes.update_by_geometry,
                    field_name=name,
                    spatial_reference_item=srid,
                    geometry_properties=["centroid", axis],
                )
        # Assign joined values.
        etl.transform(
            arcetl.attributes.update_by_joined_value,
            field_name="join_id",
            join_dataset_path=dataset.TILLAMOOK_ALTERNATE_STREET_NAME.path(),
            join_field_name="join_id",
            on_field_pairs=[
                ("predir", "prime_predir"),
                ("name", "prime_name"),
                ("type", "prime_type"),
                ("sufdir", "prime_sufdir"),
            ],
        )
        # Assign overlays.
        overlay_kwargs = [
            {
                "field_name": "city_limit",
                "overlay_field_name": "city",
                "overlay_dataset_path": dataset.TILLAMOOK_CITY_LIMITS.path(),
            },
            {
                "field_name": "ems",
                "overlay_field_name": "district",
                "overlay_dataset_path": dataset.TILLAMOOK_EMS.path(),
            },
            {
                "field_name":
                "esn",
                "overlay_field_name":
                "esn",
                "overlay_dataset_path":
                dataset.TILLAMOOK_EMERGENCY_SERVICE_ZONE.path(),
            },
            {
                "field_name": "fire",
                "overlay_field_name": "district",
                "overlay_dataset_path": dataset.TILLAMOOK_FIRE.path(),
            },
            {
                "field_name": "police",
                "overlay_field_name": "district",
                "overlay_dataset_path": dataset.TILLAMOOK_POLICE.path(),
            },
        ]
        for kwargs in overlay_kwargs:
            etl.transform(arcetl.attributes.update_by_overlay,
                          overlay_central_coincident=True,
                          **kwargs)
        # Build values: Constants.
        value_kwargs = [{"field_name": "state", "value": "OR"}]
        transform.update_attributes_by_values(etl, value_kwargs)
        # Build values: Concatenations.
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="address",
            function=concatenate_arguments,
            field_as_first_arg=False,
            arg_field_names=[
                "stnum",
                "stnumsuf",
                "predir",
                "name",
                "type",
                "sufdir",
                "unit_type",
                "unit",
            ],
        )
        etl.load(dataset.TILLAMOOK_ADDRESS_POINT.path("pub"))

Exemple #9

0

Afficher le fichier

Fichier : exec_address_datasets.py Projet : denkide/ColumbiaCarto

def site_address_etl():
    """Run ETL for site addresses."""
    with arcetl.ArcETL("Site Addresses") as etl:
        etl.extract(dataset.SITE_ADDRESS.path("maint"))
        # Clean maintenance values.
        transform.clear_nonpositive(etl, field_names=["house_nbr"])
        transform.clean_whitespace(
            etl,
            field_names=[
                "house_suffix_code",
                "pre_direction_code",
                "street_name",
                "street_type_code",
                "unit_type_code",
                "unit_id",
                "city_name",
                "landuse",
                "maptaxlot",
                "account",
            ],
        )
        transform.force_uppercase(
            etl,
            field_names=[
                "house_suffix_code",
                "pre_direction_code",
                "street_name",
                "street_type_code",
                "unit_type_code",
                "unit_id",
                "maptaxlot",
                "valid",
                "archived",
            ],
        )
        transform.clear_non_numeric_text(etl, field_names=["account"])
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="landuse",
            function=(lambda x: x if is_numeric(x) else "0"),
        )
        transform.force_yn(etl, field_names=["archived"], default="N")
        transform.force_yn(etl, field_names=["valid"], default="Y")
        transform.add_missing_fields(etl, dataset.SITE_ADDRESS, tags=["pub"])
        # Assign geometry attributes.
        coordinate_system_xy_keys = {
            2914: {
                "x": "x_coordinate",
                "y": "y_coordinate"
            },
            4326: {
                "x": "longitude",
                "y": "latitude"
            },
        }
        for spatial_reference_id, xy_key in coordinate_system_xy_keys.items():
            for axis, key in xy_key.items():
                etl.transform(
                    arcetl.attributes.update_by_geometry,
                    field_name=key,
                    spatial_reference_item=spatial_reference_id,
                    geometry_properties=["centroid", axis],
                )
        # Assign overlays.
        overlay_kwargs = [
            # City attributes.
            {
                "field_name": "geocity",
                "overlay_field_name": "inccityabbr",
                "overlay_dataset_path":
                dataset.INCORPORATED_CITY_LIMITS.path(),
            },
            {
                "field_name": "annexhist",
                "overlay_field_name": "annexnum",
                "overlay_dataset_path": dataset.ANNEXATION_HISTORY.path("pub"),
            },
            # Have to do overlay rather than join because some lack codes.
            {
                "field_name": "yearanx",
                "overlay_field_name": "annexyear",
                "overlay_dataset_path": dataset.ANNEXATION_HISTORY.path("pub"),
            },
            {
                "field_name": "ugb",
                "overlay_field_name": "ugbcity",
                "overlay_dataset_path": dataset.UGB.path("pub"),
            },
            # Planning & zoning attributes.
            {
                "field_name":
                "greenwy",
                "overlay_field_name":
                "greenway",
                "overlay_dataset_path":
                dataset.WILLAMETTE_RIVER_GREENWAY.path("pub"),
            },
            {
                "field_name": "nodaldev",
                "overlay_field_name": "nodearea",
                "overlay_dataset_path":
                dataset.NODAL_DEVELOPMENT_AREA.path("pub"),
            },
            {
                "field_name": "plandes_id",
                "overlay_field_name": "plandes_id",
                "overlay_dataset_path": dataset.PLAN_DESIGNATION.path("pub"),
            },
            {
                "field_name": "sprsvcbndy",
                "overlay_field_name": "is_inside",
                "overlay_dataset_path":
                dataset.SPRINGFIELD_HANSEN_EXTENT.path(),
            },
            # Public safety attributes.
            {
                "field_name": "ambulance_district",
                "overlay_field_name": "asacode",
                "overlay_dataset_path":
                dataset.AMBULANCE_SERVICE_AREA.path("pub"),
            },
            {
                "field_name": "firedist",
                "overlay_field_name": "fireprotprov",
                "overlay_dataset_path":
                dataset.FIRE_PROTECTION_AREA.path("pub"),
            },
            {
                "field_name":
                "police_beat",
                "overlay_field_name":
                "CAD",
                "overlay_dataset_path":
                os.path.join(
                    path.LCOG_GIS_PROJECTS,
                    "Public_Safety\\PSAPS\\CLPSAP\\SunGard_CAD\\Maintained_Layers",
                    "Maintained_Layers.gdb\\Fire_Law_Tow\\law_beat",
                ),
            },
            {
                "field_name": "psap_code",
                "overlay_field_name": "psap_code",
                "overlay_dataset_path": dataset.PSAP_AREA.path("pub"),
            },
            # Election attributes.
            {
                "field_name": "electionpr",
                "overlay_field_name": "precntnum",
                "overlay_dataset_path": dataset.ELECTION_PRECINCT.path("pub"),
            },
            {
                "field_name": "ccward",
                "overlay_field_name": "ward",
                "overlay_dataset_path": dataset.CITY_WARD.path(),
            },
            {
                "field_name":
                "clpud_subdivision",
                "overlay_field_name":
                "SUBDIVISIO",
                "overlay_dataset_path":
                os.path.join(
                    path.LCOG_GIS_PROJECTS,
                    "UtilityDistricts\\CentralLincolnPUD\\Redistricting2012",
                    "CLPUD_Subdivisions.shp",
                ),
            },
            {
                "field_name":
                "cocommdist",
                "overlay_field_name":
                "commrdist",
                "overlay_dataset_path":
                (dataset.COUNTY_COMMISSIONER_DISTRICT.path("pub")),
            },
            {
                "field_name": "epud",
                "overlay_field_name": "boardid",
                "overlay_dataset_path": dataset.EPUD_SUBDISTRICT.path("pub"),
            },
            {
                "field_name":
                "hwpud_subdivision",
                "overlay_field_name":
                "BoardZone",
                "overlay_dataset_path":
                os.path.join(
                    path.LCOG_GIS_PROJECTS,
                    "UtilityDistricts\\HecetaWaterPUD\\NewBoardSubzones",
                    "HecetaData.gdb",
                    "ScenarioB",
                ),
            },
            {
                "field_name": "lcczone",
                "overlay_field_name": "lccbrdzone",
                "overlay_dataset_path": dataset.LCC_BOARD_ZONE.path("pub"),
            },
            {
                "field_name": "senatedist",
                "overlay_field_name": "sendist",
                "overlay_dataset_path":
                dataset.STATE_SENATOR_DISTRICT.path("pub"),
            },
            {
                "field_name":
                "strepdist",
                "overlay_field_name":
                "repdist",
                "overlay_dataset_path":
                (dataset.STATE_REPRESENTATIVE_DISTRICT.path("pub")),
            },
            {
                "field_name":
                "swcd",
                "overlay_field_name":
                "swcdist",
                "overlay_dataset_path":
                (dataset.SOIL_WATER_CONSERVATION_DISTRICT.path("pub")),
            },
            {
                "field_name":
                "swcdzone",
                "overlay_field_name":
                "swczone",
                "overlay_dataset_path":
                (dataset.SOIL_WATER_CONSERVATION_DISTRICT.path("pub")),
            },
            # Education attributes.
            {
                "field_name": "schooldist",
                "overlay_field_name": "district",
                "overlay_dataset_path": dataset.SCHOOL_DISTRICT.path("pub"),
            },
            {
                "field_name": "elem",
                "overlay_field_name": "attend",
                "overlay_dataset_path":
                dataset.ELEMENTARY_SCHOOL_AREA.path("pub"),
            },
            {
                "field_name": "middle",
                "overlay_field_name": "attend",
                "overlay_dataset_path": dataset.MIDDLE_SCHOOL_AREA.path("pub"),
            },
            {
                "field_name": "high",
                "overlay_field_name": "attend",
                "overlay_dataset_path": dataset.HIGH_SCHOOL_AREA.path("pub"),
            },
            # Transportation attributes.
            {
                "field_name":
                "ltddist",
                "overlay_field_name":
                "LTD",
                "overlay_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "transport\\ltd\\2012 LTD Boundary.shp"),
            },
            {
                "field_name":
                "ltdridesrc",
                "overlay_field_name":
                "LTD",
                "overlay_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "transport\\ltd\\2015 RideSource Boundary.shp"),
            },
            {
                "field_name":
                "cats",
                "overlay_field_name":
                "CATSBNDY",
                "overlay_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "transport\\eug\\catsbndy.shp"),
            },
            {
                "field_name":
                "trans_analysis_zone",
                "overlay_field_name":
                "TAZ_NUM",
                "overlay_dataset_path":
                os.path.join(path.REGIONAL_DATA, "transport\\MTAZ16.shp"),
            },
            # Natural attributes.
            {
                "field_name":
                "firmnumber",
                "overlay_field_name":
                "firm_pan",
                "overlay_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "natural\\flood\\Flood.gdb\\FIRMPanel"),
            },
            {
                "field_name":
                "soilkey",
                "overlay_field_name":
                "mukey",
                "overlay_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "natural\\soils\\Soils.gdb\\Soil"),
            },
            {
                "field_name":
                "wetland",
                "overlay_field_name":
                "WET_TYPE",
                "overlay_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "natural\\eug\\Wetland\\wetlands.shp"),
            },
            # Census attributes.
            {
                "field_name":
                "ctract",
                "overlay_field_name":
                "TRACT",
                "overlay_dataset_path":
                os.path.join(
                    path.REGIONAL_DATA,
                    "federal\\census\\lane\\2010",
                    "lc_census2010.gdb\\lc_tracts2010",
                ),
            },
            {
                "field_name":
                "blockgr",
                "overlay_field_name":
                "BlockGroup",
                "overlay_dataset_path":
                os.path.join(
                    path.REGIONAL_DATA,
                    "federal\\census\\lane\\2010",
                    "lc_census2010.gdb\\lc_blockgroups2010",
                ),
            },
            # Other district attributes.
            {
                "field_name":
                "neighbor",
                "overlay_field_name":
                "NEIBORHD",
                "overlay_dataset_path":
                os.path.join(
                    path.REGIONAL_DATA,
                    "boundary\\districts\\eug",
                    "Boundary.gdb\\EugNeighborhoods",
                ),
            },
        ]
        for kwargs in overlay_kwargs:
            etl.transform(arcetl.attributes.update_by_overlay,
                          overlay_central_coincident=True,
                          **kwargs)
        # Override overlays for special cases.
        for override in OVERRIDE_ATTRS:
            for kwargs in OVERRIDE_ATTRS[override].get("overlay_kwargs", []):
                etl.transform(arcetl.attributes.update_by_value,
                              dataset_where_sql=OVERRIDE_ATTRS[override].get(
                                  "where_sql"),
                              **kwargs)
        # Clean overlay values.
        transform.clean_whitespace(etl,
                                   field_names=[
                                       "police_beat", "wetland", "ctract",
                                       "blockgr", "neighbor"
                                   ])
        transform.force_uppercase(
            etl, field_names=["cats", "ltddist", "ltdridesrc"])
        # Set default overlay values where missing.
        transform.force_yn(
            etl,
            field_names=[
                "greenwy", "sprsvcbndy", "cats", "ltddist", "ltdridesrc"
            ],
            default="N",
        )
        # Remove invalid overlay values.
        transform.clear_nonpositive(etl, field_names=["ctract", "blockgr"])
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="neighbor",
            function=(lambda x: x if x and int(x) != 99 else None),
        )
        # Assign joinable field values after overlays.
        join_kwargs = [
            # Core attributes.
            {
                "field_name": "pre_direction",
                "join_field_name": "description",
                "join_dataset_path": dataset.STREET_DIRECTION.path(),
                "on_field_pairs": [("pre_direction_code", "code")],
            },
            {
                "field_name": "street_type",
                "join_field_name": "description",
                "join_dataset_path": dataset.STREET_TYPE.path(),
                "on_field_pairs": [("street_type_code", "code")],
            },
            {
                "field_name": "unit_type",
                "join_field_name": "description",
                "join_dataset_path": dataset.UNIT_TYPE.path(),
                "on_field_pairs": [("unit_type_code", "code")],
            },
            {
                "field_name": "city_name_abbr",
                "join_field_name": "CityNameAbbr",
                "join_dataset_path": dataset.CITY.path(),
                "on_field_pairs": [("city_name", "CityName")],
            },
            # Extended attributes.
            {
                "field_name": "five_digit_zip_code",
                "join_field_name": "zip_code",
                "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(),
                "on_field_pairs": [("geofeat_id", "geofeat_id")],
            },
            # Any addresses not assigned zip from USPS gets an overlay zip.
            {
                "field_name": "five_digit_zip_code",
                "dataset_where_sql": "five_digit_zip_code is null",
                "join_field_name": "zip_code_overlay",
                "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(),
                "on_field_pairs": [("geofeat_id", "geofeat_id")],
            },
            {
                "field_name": "four_digit_zip_code",
                "join_field_name": "plus_four_code",
                "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(),
                "on_field_pairs": [("geofeat_id", "geofeat_id")],
            },
            {
                "field_name": "usps_delivery_point_code",
                "join_field_name": "delivery_point_code",
                "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(),
                "on_field_pairs": [("geofeat_id", "geofeat_id")],
            },
            {
                "field_name": "postal_carrier_route",
                "join_field_name": "carrier_route",
                "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(),
                "on_field_pairs": [("geofeat_id", "geofeat_id")],
            },
            {
                "field_name": "usps_is_cmra",
                "join_field_name": "is_cmra",
                "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(),
                "on_field_pairs": [("geofeat_id", "geofeat_id")],
            },
            {
                "field_name": "usps_is_vacant",
                "join_field_name": "is_vacant",
                "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(),
                "on_field_pairs": [("geofeat_id", "geofeat_id")],
            },
            {
                "field_name": "usps_has_mail_service",
                "join_field_name": "has_mail_service",
                "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(),
                "on_field_pairs": [("geofeat_id", "geofeat_id")],
            },
            {
                "field_name": "landuse_desc",
                "join_field_name": "ludesc",
                "join_dataset_path":
                dataset.LAND_USE_CODES_DETAILED.path("pub"),
                "on_field_pairs": [("landuse", "landusec")],
            },
            {
                "field_name": "usecode",
                "join_field_name": "usecode",
                "join_dataset_path":
                dataset.LAND_USE_CODES_DETAILED.path("pub"),
                "on_field_pairs": [("landuse", "landusec")],
            },
            {
                "field_name": "usedesc",
                "join_field_name": "ucname",
                "join_dataset_path":
                dataset.LAND_USE_CODES_USE_CODES.path("pub"),
                "on_field_pairs": [("usecode", "usecode")],
            },
            # A&T attributes.
            {
                "field_name": "tca",
                "join_field_name": "tax_code_overlay",
                "join_dataset_path": dataset.ADDRESS_ASSESS_TAX_INFO.path(),
                "on_field_pairs": [("geofeat_id", "geofeat_id")],
            },
            # City attributes.
            {
                "field_name": "geocity_name",
                "join_field_name": "inccityname",
                "join_dataset_path": dataset.INCORPORATED_CITY_LIMITS.path(),
                "on_field_pairs": [("geocity", "inccityabbr")],
            },
            {
                "field_name": "ugb_city_name",
                "join_field_name": "ugbcityname",
                "join_dataset_path": dataset.UGB.path("pub"),
                "on_field_pairs": [("ugb", "ugbcity")],
            },
            # Planning & zoning attributes.
            {
                "field_name": "nodaldev_name",
                "join_field_name": "nodename",
                "join_dataset_path":
                dataset.NODAL_DEVELOPMENT_AREA.path("pub"),
                "on_field_pairs": [("nodaldev", "nodearea")],
            },
            {
                "field_name": "plandesjuris",
                "join_field_name": "planjuris",
                "join_dataset_path": dataset.PLAN_DESIGNATION.path("pub"),
                "on_field_pairs": [("plandes_id", "plandes_id")],
            },
            {
                "field_name": "plandes",
                "join_field_name": "plandes",
                "join_dataset_path": dataset.PLAN_DESIGNATION.path("pub"),
                "on_field_pairs": [("plandes_id", "plandes_id")],
            },
            {
                "field_name": "plandesdesc",
                "join_field_name": "plandesnam",
                "join_dataset_path": dataset.PLAN_DESIGNATION.path("pub"),
                "on_field_pairs": [("plandes_id", "plandes_id")],
            },
            # Public safety attributes.
            {
                "field_name": "ambulance_service_area",
                "join_field_name": "asa",
                "join_dataset_path":
                dataset.AMBULANCE_SERVICE_AREA.path("pub"),
                "on_field_pairs": [("ambulance_district", "asacode")],
            },
            {
                "field_name": "ambulance_service_provider",
                "join_field_name": "provider",
                "join_dataset_path":
                dataset.AMBULANCE_SERVICE_AREA.path("pub"),
                "on_field_pairs": [("ambulance_district", "asacode")],
            },
            {
                "field_name": "fire_protection_provider",
                "join_field_name": "fpprovname",
                "join_dataset_path": dataset.FIRE_PROTECTION_AREA.path("pub"),
                "on_field_pairs": [("firedist", "fireprotprov")],
            },
            {
                "field_name": "psap_name",
                "join_field_name": "psap_name",
                "join_dataset_path": dataset.PSAP_AREA.path("pub"),
                "on_field_pairs": [("psap_code", "psap_code")],
            },
            {
                "field_name":
                "emergency_service_number",
                "join_field_name":
                "emergency_service_number",
                "join_dataset_path":
                dataset.EMERGENCY_SERVICE_NUMBER.path(),
                "on_field_pairs": [
                    # City used as proxy for police.
                    ("geocity", "city_limits"),
                    ("ambulance_district", "asa_code"),
                    ("firedist", "fire_district"),
                    ("psap_code", "psap_code")
                ],
            },
            {
                "field_name":
                "emergency_service_number",
                "join_field_name":
                "emergency_service_number",
                "join_dataset_path":
                dataset.EMERGENCY_SERVICE_NUMBER.path(),
                "on_field_pairs": [
                    # City used as proxy for police.
                    ("geocity", "city_limits"),
                    ("ambulance_district", "asa_code"),
                    ("firedist", "fire_district"),
                ],
                "dataset_where_sql":
                "emergency_service_number is null",
            },
            # Election attributes.
            {
                "field_name": "city_councilor",
                "join_field_name": "councilor",
                "join_dataset_path": dataset.CITY_WARD.path(),
                "on_field_pairs": [("ccward", "ward")],
            },
            {
                "field_name":
                "cocommdist_name",
                "join_field_name":
                "cmdistname",
                "join_dataset_path":
                dataset.COUNTY_COMMISSIONER_DISTRICT.path("pub"),
                "on_field_pairs": [("cocommdist", "commrdist")],
            },
            {
                "field_name":
                "county_commissioner",
                "join_field_name":
                "commrname",
                "join_dataset_path":
                dataset.COUNTY_COMMISSIONER_DISTRICT.path("pub"),
                "on_field_pairs": [("cocommdist", "commrdist")],
            },
            {
                "field_name": "eweb_commissioner_name",
                "join_field_name": "eweb_commissioner_name",
                "join_dataset_path": dataset.EWEB_COMMISSIONER.path("pub"),
                "on_field_pairs": [("ccward", "city_council_ward")],
            },
            {
                "field_name":
                "state_representative",
                "join_field_name":
                "repname",
                "join_dataset_path":
                dataset.STATE_REPRESENTATIVE_DISTRICT.path("pub"),
                "on_field_pairs": [("strepdist", "repdist")],
            },
            {
                "field_name": "state_senator",
                "join_field_name": "senname",
                "join_dataset_path":
                dataset.STATE_SENATOR_DISTRICT.path("pub"),
                "on_field_pairs": [("senatedist", "sendist")],
            },
            # Education attributes.
            {
                "field_name": "schooldist_name",
                "join_field_name": "names",
                "join_dataset_path": dataset.SCHOOL_DISTRICT.path("pub"),
                "on_field_pairs": [("schooldist", "district")],
            },
            {
                "field_name": "elem_name",
                "join_field_name": "elem_school",
                "join_dataset_path":
                dataset.ELEMENTARY_SCHOOL_AREA.path("pub"),
                "on_field_pairs": [("elem", "attend")],
            },
            {
                "field_name": "middle_name",
                "join_field_name": "middle_school",
                "join_dataset_path": dataset.MIDDLE_SCHOOL_AREA.path("pub"),
                "on_field_pairs": [("middle", "attend")],
            },
            {
                "field_name": "high_name",
                "join_field_name": "high_school",
                "join_dataset_path": dataset.HIGH_SCHOOL_AREA.path("pub"),
                "on_field_pairs": [("high", "attend")],
            },
            # Natural attributes.
            {
                "field_name":
                "firmprinted",
                "join_field_name":
                "panel_printed",
                "join_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "natural\\flood\\Flood.gdb\\FIRMPanel"),
                "on_field_pairs": [("firmnumber", "firm_pan")],
            },
            {
                "field_name":
                "firm_community_id",
                "join_field_name":
                "com_nfo_id",
                "join_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "natural\\flood\\Flood.gdb\\CommunityInfo"),
                "on_field_pairs": [("geocity", "community_code")],
            },
            {
                "field_name":
                "firm_community_post_firm_date",
                "join_field_name":
                "in_frm_dat",
                "join_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "natural\\flood\\Flood.gdb\\CommunityInfo"),
                "on_field_pairs": [("geocity", "community_code")],
            },
            {
                "field_name":
                "soiltype",
                "join_field_name":
                "musym",
                "join_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "natural\\soils\\Soils.gdb\\MUAggAtt"),
                "on_field_pairs": [("soilkey", "mukey")],
            },
            # Other district attributes.
            {
                "field_name":
                "neighborhood_name",
                "join_field_name":
                "NAME",
                "join_dataset_path":
                os.path.join(
                    path.REGIONAL_DATA,
                    "boundary\\districts\\eug\\Boundary.gdb\\EugNeighborhoods",
                ),
                "on_field_pairs": [("neighbor", "NEIBORHD")],
            },
        ]
        for kwargs in join_kwargs:
            etl.transform(arcetl.attributes.update_by_joined_value, **kwargs)
        # Clean join values.
        transform.clean_whitespace(etl, field_names=["neighborhood_name"])
        # Remove Metro Plan designations, per City of Eugene request.
        transform.clear_all_values(
            etl,
            field_names=["plandes", "plandesdesc"],
            dataset_where_sql="plandesjuris = 'MTP'",
        )
        # Remove +4 ZIP where initial ZIP is missing.
        transform.clear_all_values(
            etl,
            field_names=["four_digit_zip_code"],
            dataset_where_sql="five_digit_zip_code is null",
        )
        # Assign constants.
        constant_kwargs = [
            {
                "field_name": "state_code",
                "value": "OR"
            },
            {
                "field_name": "state_name",
                "value": "Oregon"
            },
            {
                "field_name": "county_name",
                "value": "Lane"
            },
        ]
        for kwargs in constant_kwargs:
            etl.transform(arcetl.attributes.update_by_value, **kwargs)
        # Override constants for special cases.
        for override in OVERRIDE_ATTRS:
            for kwargs in OVERRIDE_ATTRS[override].get("constant_kwargs", []):
                etl.transform(arcetl.attributes.update_by_value,
                              dataset_where_sql=OVERRIDE_ATTRS[override].get(
                                  "where_sql"),
                              **kwargs)
        # Build values from functions.
        function_kwargs = [
            {
                "field_name":
                "street_name_full",
                "function":
                concatenate_arguments,
                "arg_field_names": [
                    "pre_direction_code",
                    "street_name",
                    "street_type_code",
                ],
            },
            {
                "field_name":
                "city_state_zip",
                "function":
                city_state_zip,
                "kwarg_field_names":
                ["city_name", "state_code", "five_digit_zip_code"],
            },
            {
                "field_name":
                "concat_address_no_unit",
                "function":
                concatenate_arguments,
                "arg_field_names": [
                    "house_nbr",
                    "house_suffix_code",
                    "street_name_full",
                ],
            },
            {
                "field_name":
                "concat_address",
                "function":
                concatenate_arguments,
                "arg_field_names": [
                    "concat_address_no_unit",
                    "unit_type_code",
                    "unit_id",
                ],
            },
            {
                "field_name":
                "concat_address_no_direction",
                "function":
                concatenate_arguments,
                "arg_field_names": [
                    "house_nbr",
                    "house_suffix_code",
                    "street_name",
                    "street_type_code",
                    "unit_type_code",
                    "unit_id",
                ],
            },
            {
                "field_name":
                "concat_address_full",
                "function":
                concat_address_full,
                "kwarg_field_names": [
                    "concat_address",
                    "city_name",
                    "state_code",
                    "five_digit_zip_code",
                    "four_digit_zip_code",
                ],
            },
            {
                "field_name": "mapnumber",
                "function": (lambda x: x[:8] if x else None),
                "arg_field_names": ["maptaxlot"],
            },
            {
                "field_name": "taxlot",
                "function": (lambda x: x[-5:] if x else None),
                "arg_field_names": ["maptaxlot"],
            },
            {
                "field_name": "maptaxlot_hyphen",
                "function": maptaxlot_separated,
                "arg_field_names": ["maptaxlot"],
            },
        ]
        for kwargs in function_kwargs:
            etl.transform(arcetl.attributes.update_by_function,
                          field_as_first_arg=False,
                          **kwargs)
        # Take care of addresses flagged not to update in publication.
        ids = {}
        id_set_kwargs = {
            "in_publication": {
                "dataset_path": dataset.SITE_ADDRESS.path("pub")
            },
            "in_transform": {
                "dataset_path": etl.transform_path
            },
            "no_update": {
                "dataset_path": dataset.ADDRESS_ISSUES.path(),
                "dataset_where_sql": "update_publication = 0",
            },
        }
        for key, kwargs in id_set_kwargs.items():
            ids[key] = set(_id for _id, in arcetl.attributes.as_iters(
                field_names="site_address_gfid", **kwargs))
        ids["rollback"] = ids["no_update"] & ids["in_transform"] & ids[
            "in_publication"]
        ids["hold"] = ids["no_update"] & (ids["in_transform"] -
                                          ids["in_publication"])
        rollback_features = [
            feat for feat in arcetl.attributes.as_dicts(
                dataset.SITE_ADDRESS.path("pub"))
            if feat["site_address_gfid"] in ids["rollback"]
        ]
        # Strip OIDs (not part of update).
        for feat in rollback_features:
            del feat["oid@"]
        if rollback_features:
            etl.transform(
                arcetl.features.update_from_dicts,
                update_features=rollback_features,
                id_field_names="site_address_gfid",
                field_names=rollback_features[0].keys(),
                delete_missing_features=False,
            )
        etl.transform(
            arcetl.features.delete_by_id,
            delete_ids=ids["hold"],
            id_field_names="site_address_gfid",
        )
        LOG.info("%s addresses held from publication", len(ids["hold"]))
        LOG.info("%s addresses rolled-back from publication",
                 len(ids["rollback"]))
        if any([ids["hold"], ids["rollback"]]):
            send_publication_issues_message()
        etl.load(dataset.SITE_ADDRESS.path("pub"))
    send_new_lincom_address_message()

Exemple #10

0

Afficher le fichier

def land_use_area_etl():
    """Run ETL for land use areas."""
    with arcetl.ArcETL("Land Use Areas") as etl:
        etl.extract(dataset.LAND_USE_AREA.path("maint"))
        # Clean maintenance values.
        transform.clean_whitespace(etl, field_names=["maptaxlot"])
        transform.clear_non_numeric_text(etl, field_names=["maptaxlot"])
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="landuse",
            function=(lambda x: 0 if x is None or x < 0 else x),
        )
        # Remove features with missing core identifiers.
        for name in dataset.LAND_USE_AREA.id_field_names:
            etl.transform(arcetl.features.delete,
                          dataset_where_sql="{} is null".format(name))
        # Dissolve on core maintenance fields that are used in publication.
        etl.transform(
            arcetl.features.dissolve,
            dissolve_field_names=dataset.LAND_USE_AREA.id_field_names,
            tolerance=TOLERANCE["xy"],
        )
        transform.add_missing_fields(etl, dataset.LAND_USE_AREA, tags=["pub"])
        # Assign geometry attributes.
        coordinate_system_xy_keys = {
            2914: {
                "x": "xcoord",
                "y": "ycoord"
            },
            4326: {
                "x": "longitude",
                "y": "latitude"
            },
        }
        for spatial_reference_id, xy_key in coordinate_system_xy_keys.items():
            for axis, key in xy_key.items():
                etl.transform(
                    arcetl.attributes.update_by_geometry,
                    field_name=key,
                    spatial_reference_item=spatial_reference_id,
                    geometry_properties=["centroid", axis],
                )
        # Assign overlays.
        overlay_kwargs = [
            # City attributes.
            {
                "field_name": "geocity",
                "overlay_field_name": "inccityabbr",
                "overlay_dataset_path":
                dataset.INCORPORATED_CITY_LIMITS.path(),
            },
            {
                "field_name": "yearanx",
                "overlay_field_name": "annexyear",
                "overlay_dataset_path": dataset.ANNEXATION_HISTORY.path("pub"),
            },
            {
                "field_name": "ugb",
                "overlay_field_name": "ugbcity",
                "overlay_dataset_path": dataset.UGB.path("pub"),
            },
            # Planning & zoning attributes.
            {
                "field_name":
                "greenwy",
                "overlay_field_name":
                "greenway",
                "overlay_dataset_path":
                dataset.WILLAMETTE_RIVER_GREENWAY.path("pub"),
            },
            # Public safety attributes.
            {
                "field_name": "firedist",
                "overlay_field_name": "fireprotprov",
                "overlay_dataset_path":
                dataset.FIRE_PROTECTION_AREA.path("pub"),
            },
            # Election attributes.
            {
                "field_name": "lcczone",
                "overlay_field_name": "lccbrdzone",
                "overlay_dataset_path": dataset.LCC_BOARD_ZONE.path("pub"),
            },
            # Education attributes.
            {
                "field_name": "elem",
                "overlay_field_name": "attend",
                "overlay_dataset_path":
                dataset.ELEMENTARY_SCHOOL_AREA.path("pub"),
            },
            {
                "field_name": "middle",
                "overlay_field_name": "attend",
                "overlay_dataset_path": dataset.MIDDLE_SCHOOL_AREA.path("pub"),
            },
            {
                "field_name": "high",
                "overlay_field_name": "attend",
                "overlay_dataset_path": dataset.HIGH_SCHOOL_AREA.path("pub"),
            },
            # Transportation attributes.
            {
                "field_name":
                "ltddist",
                "overlay_field_name":
                "LTD",
                "overlay_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "transport\\ltd\\2012 LTD Boundary.shp"),
            },
            # Natural attributes.
            {
                "field_name":
                "flood",
                "overlay_field_name":
                "fld_zone",
                "overlay_dataset_path":
                os.path.join(path.REGIONAL_DATA,
                             "natural\\flood\\Flood.gdb\\FloodHazardArea"),
            },
            # Census attributes.
            {
                "field_name":
                "ctract",
                "overlay_field_name":
                "TRACT",
                "overlay_dataset_path":
                os.path.join(
                    path.REGIONAL_DATA,
                    "federal\\census\\lane\\2010\\lc_census2010.gdb\\lc_tracts2010",
                ),
            },
            {
                "field_name":
                "blockgr",
                "overlay_field_name":
                "BlockGroup",
                "overlay_dataset_path":
                os.path.join(
                    path.REGIONAL_DATA,
                    "federal\\census\\lane\\2010\\lc_census2010.gdb",
                    "lc_blockgroups2010",
                ),
            },
            # Other district attributes.
            {
                "field_name":
                "neighbor",
                "overlay_field_name":
                "NEIBORHD",
                "overlay_dataset_path":
                os.path.join(
                    path.REGIONAL_DATA,
                    "boundary\\districts\\eug\\Boundary.gdb\\EugNeighborhoods",
                ),
            },
        ]
        for kwargs in overlay_kwargs:
            etl.transform(arcetl.attributes.update_by_overlay,
                          overlay_central_coincident=True,
                          **kwargs)
        # Clean overlay values.
        transform.clean_whitespace(
            etl, field_names=["ctract", "blockgr", "neighbor"])
        transform.force_uppercase(etl, field_names=["ltddist"])
        # Set default overlay values where missing.
        transform.force_yn(etl,
                           field_names=["greenwy", "ltddist"],
                           default="N")
        # Remove invalid overlay values.
        transform.clear_nonpositive(etl, field_names=["ctract", "blockgr"])
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="neighbor",
            function=(lambda x: x if x and int(x) != 99 else None),
        )
        # Assign joinable field values after overlays.
        join_kwargs = [
            # Core attributes.
            {
                "field_name": "landusedes",
                "join_field_name": "ludesc",
                "join_dataset_path":
                dataset.LAND_USE_CODES_DETAILED.path("pub"),
                "on_field_pairs": [("landuse", "landuse")],
            },
            {
                "field_name": "usecode",
                "join_field_name": "usecode",
                "join_dataset_path":
                dataset.LAND_USE_CODES_DETAILED.path("pub"),
                "on_field_pairs": [("landuse", "landuse")],
            },
            {
                "field_name": "usecodedes",
                "join_field_name": "ucname",
                "join_dataset_path":
                dataset.LAND_USE_CODES_USE_CODES.path("pub"),
                "on_field_pairs": [("usecode", "usecode")],
            },
        ]
        for kwargs in join_kwargs:
            etl.transform(arcetl.attributes.update_by_joined_value, **kwargs)
        # Build values from functions.
        function_kwargs = [
            {
                "field_name": "mapnumber",
                "function": (lambda x: x[:8] if x else None),
                "arg_field_names": ["maptaxlot"],
            },
            {
                "field_name": "taxlot",
                "function": (lambda x: x[-5:] if x else None),
                "arg_field_names": ["maptaxlot"],
            },
            {
                "field_name": "maptaxlot_hyphen",
                "function": maptaxlot_separated,
                "arg_field_names": ["maptaxlot"],
            },
        ]
        for kwargs in function_kwargs:
            etl.transform(arcetl.attributes.update_by_function,
                          field_as_first_arg=False,
                          **kwargs)
        # Build values from mappings.
        mapping_kwargs = [
            {
                "field_name": "units",
                "mapping": total_units,
                "key_field_names": ["maptaxlot", "landuse"],
            },
            {
                "field_name": "acres",
                "mapping": total_acres,
                "key_field_names": ["maptaxlot", "landuse"],
            },
        ]
        for kwargs in mapping_kwargs:
            etl.transform(arcetl.attributes.update_by_mapping, **kwargs)
        etl.transform(
            arcetl.attributes.update_by_feature_match,
            field_name="landusecount",
            id_field_names=["maptaxlot"],
            update_type="match_count",
        )
        etl.load(dataset.LAND_USE_AREA.path("pub"))

Exemple #11

0

Afficher le fichier

def taxlot_petition_document_etl():
    """Run ETL for taxlot/petition document cross-reference."""
    keys = {"taxlot": ["maptaxlot", "maptaxlot_hyphen", "map", "taxlot"]}
    with arcetl.ArcETL("Taxlot Petition Documents") as etl:
        etl.init_schema(dataset.TAXLOT_PETITION_DOCUMENT.path())
        # To avoid memory/topoengine errors when processing, run ETL on subsets.
        subsets = taxlot_subset_temp_copies(REAL_LOT_SQL,
                                            field_names=keys["taxlot"])
        petition_documents = petition_documents_map()
        for subset in subsets:
            with subset:
                arcetl.dataset.add_field(subset.path,
                                         field_name="petition_id",
                                         field_type="text")
                arcetl.geoset.overlay(
                    dataset_path=subset.path,
                    field_name="petition_id",
                    overlay_dataset_path=PATH["petition"],
                    overlay_field_name="ID_NUM",
                    overlay_central_coincident=True,
                )
                arcetl.attributes.update_by_function(
                    dataset_path=subset.path,
                    field_name="petition_id",
                    function=clean_whitespace,
                )
                # Remove features without overlay.
                arcetl.features.delete(dataset_path=subset.path,
                                       dataset_where_sql="petition_id is null")
                petition_document_rows = []
                for petition in arcetl.attributes.as_dicts(
                        dataset_path=subset.path,
                        field_names=keys["taxlot"] + ["petition_id"],
                ):
                    petition.update({
                        "document_name": None,
                        "document_type": None
                    })
                    # If petition has no documents, add a document-less row.
                    if petition["petition_id"] not in petition_documents:
                        petition_document_rows.append(petition)
                        continue

                    for document in petition_documents[
                            petition["petition_id"]]:
                        row = copy(petition)
                        row.update(document)
                        petition_document_rows.append(row)
                if petition_document_rows:
                    etl.transform(
                        arcetl.features.insert_from_dicts,
                        insert_features=petition_document_rows,
                        field_names=petition_document_rows[0].keys(),
                    )
        # Set petition jurisdiction (only Eugene petitions at the moment).
        etl.transform(
            arcetl.attributes.update_by_value,
            field_name="petition_jurisdiction_code",
            value="EUG",
        )
        # Add temp field for convertable string values from petition lots.
        etl.transform(
            arcetl.dataset.add_field,
            field_name="petition_date_string",
            field_type="text",
            field_length=32,
        )
        # Assign joinable attributes.
        join_kwargs = [
            {
                "field_name": "petition_number",
                "join_field_name": "PETNUM"
            },
            {
                "field_name": "petition_type_code",
                "join_field_name": "PET_TYPE"
            },
            {
                "field_name": "petition_date_string",
                "join_field_name": "DATE"
            },
            {
                "field_name": "is_active",
                "join_field_name": "ACTIVE"
            },
            {
                "field_name": "alley_petition",
                "join_field_name": "ALY"
            },
            {
                "field_name": "bikepath_petition",
                "join_field_name": "BP"
            },
            {
                "field_name": "paving_petition",
                "join_field_name": "PAV"
            },
            {
                "field_name": "pedway_petition",
                "join_field_name": "PED"
            },
            {
                "field_name": "rehab_petition",
                "join_field_name": "RHB"
            },
            {
                "field_name": "sanitary_petition",
                "join_field_name": "SAN"
            },
            {
                "field_name": "sidewalk_petition",
                "join_field_name": "CW"
            },
            {
                "field_name": "storm_petition",
                "join_field_name": "STM"
            },
            {
                "field_name": "streetlight_petition",
                "join_field_name": "SL"
            },
        ]
        for kwargs in join_kwargs:
            etl.transform(arcetl.attributes.update_by_joined_value,
                          join_dataset_path=PATH["petition"],
                          on_field_pairs=[("petition_id", "ID_NUM")],
                          **kwargs)
        petition_fields = [
            "alley_petition",
            "bikepath_petition",
            "paving_petition",
            "pedway_petition",
            "rehab_petition",
            "sanitary_petition",
            "sidewalk_petition",
            "storm_petition",
            "streetlight_petition",
        ]
        # Clean added values from sources of unknown maintenance.
        transform.clean_whitespace(
            etl,
            field_names=petition_fields + [
                "petition_number",
                "petition_type_code",
                "petition_date_string",
                "is_active",
            ],
        )
        # RLID uses Y/N flags, convert these Yes/No ones.
        for field_name in petition_fields + ["is_active"]:
            etl.transform(
                arcetl.attributes.update_by_function,
                field_name=field_name,
                function=(lambda x: "Y"
                          if x and x.upper() in ["Y", "YES"] else "N"),
            )
        # Update petition_date from the string value version.
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="petition_date",
            function=datetime_from_string,
            field_as_first_arg=False,
            arg_field_names=["petition_date_string"],
        )
        etl.transform(arcetl.dataset.delete_field,
                      field_name="petition_date_string")
        # Add values derived from other values.
        petition_type = {
            "A": "Prepaid Assessment",
            "I": "Irrevocable Petition",
            "P": "Poll",
            "S": "Survey",
            "V": "Voluntary Petition",
            "X": "Adjacent to Unimproved Street or Alley",
        }
        etl.transform(
            arcetl.attributes.update_by_function,
            field_name="petition_type_description",
            function=petition_type.get,
            field_as_first_arg=False,
            arg_field_names=["petition_type_code"],
        )
        # Build URL values.
        for field_name, ext in [
            ("rlid_document_url", ".pdf"),
            ("rlid_image_url", ".tif"),
        ]:
            etl.transform(
                arcetl.attributes.update_by_function,
                field_name=field_name,
                function=(url.RLID_IMAGE_SHARE + "/petitions/" + "{}" +
                          ext).format,
                field_as_first_arg=False,
                arg_field_names=["document_name"],
                dataset_where_sql="document_name is not null",
            )
        etl.load(dataset.TAXLOT_PETITION_DOCUMENT.path())