def plan_designation_county_etl(): """Run ETL for county plan designations.""" with arcetl.ArcETL("County Plan Designations") as etl: etl.extract(dataset.PLAN_DESIGNATION_COUNTY.path("maint")) transform.add_missing_fields(etl, dataset.PLAN_DESIGNATION_COUNTY, tags=["pub"]) etl.transform(arcetl.attributes.update_by_value, field_name="planjuris", value="LC") for new_name, old_name in [("plandes", "ZONE_"), ("plandesnam", "ZONE_NAME")]: etl.transform( arcetl.attributes.update_by_function, field_name=new_name, function=(lambda x: x), field_as_first_arg=False, arg_field_names=[old_name], ) # Remove county designations where city ones exist. etl.transform( arcetl.features.erase, erase_dataset_path=dataset.PLAN_DESIGNATION_CITY.path("pub"), ) transform.clean_whitespace( etl, field_names=["planjuris", "plandes", "plandesnam", "finalorder"]) etl.transform(arcetl.features.delete, dataset_where_sql="plandes is null") etl.transform( arcetl.features.dissolve, dissolve_field_names=dataset.PLAN_DESIGNATION_COUNTY.field_names, tolerance=TOLERANCE["xy"], ) etl.load(dataset.PLAN_DESIGNATION_COUNTY.path("pub"))
def taxlot_focus_box_etl(): """Run ETL for taxlot focus boxes.""" county_box = BoundingBox(xmin=3956505, xmax=4588181, ymin=650607, ymax=972071).buffer(BUFFER_FEET["county"]) for scale in ["large", "small"]: taxlot_boxes = taxlot_bounding_boxes() buffered_boxes = ((maptaxlot, box.buffer(distance=BUFFER_FEET["taxlot"])) for maptaxlot, box in taxlot_boxes) # Expansion must come before binding. expanded_boxes = ((maptaxlot, expand_focus_box(box, SCALE_FACTOR[scale])) for maptaxlot, box in buffered_boxes) bound_boxes = ((maptaxlot, bind_focus_box(box, county_box, replace_all=True)) for maptaxlot, box in expanded_boxes) features = ((maptaxlot, box.esri_geometry) for maptaxlot, box in bound_boxes) with arcetl.ArcETL("Taxlot Focus Boxes - {}".format( scale.title())) as etl: etl.init_schema(dataset.TAXLOT_FOCUS_BOX.path(scale)) etl.transform( arcetl.features.insert_from_iters, insert_features=features, field_names=["maptaxlot", "shape@"], ) etl.load(dataset.TAXLOT_FOCUS_BOX.path(scale))
def address_workfile_etl(): """Run ETL for address workfile. Workfiles are for processing with Mailers+4 app for postal info. """ temp_field_type = {"house_nbr": "long", "house_suffix_code": "text"} with arcetl.ArcETL("Address Workfile") as etl: etl.init_schema(field_metadata_list=dataset.ADDRESS_WORKFILE.fields) for key, _type in temp_field_type.items(): etl.transform(arcetl.dataset.add_field, field_name=key, field_type=_type) etl.transform( arcetl.features.insert_from_path, insert_dataset_path=dataset.SITE_ADDRESS.path("maint"), insert_where_sql=VALID_ADDRESS_WHERE_SQL, ) field_concat_keys = { # Mailers+4 doesn"t separate house number from suffix. "house": ["house_nbr", "house_suffix_code"], "concat_address": [ "house_nbr", "house_suffix_code", "pre_direction_code", "street_name", "street_type_code", "unit_type_code", "unit_id", ], } for key, concat_keys in field_concat_keys.items(): etl.transform( arcetl.attributes.update_by_function, field_name=key, function=concatenate_arguments, field_as_first_arg=False, arg_field_names=concat_keys, ) for key in [ "house", "pre_direction_code", "street_name", "street_type_code", "unit_type_code", "unit_id", "city_name", ]: etl.transform( arcetl.attributes.update_by_function, field_name="mp4_" + key, function=(lambda x: x), field_as_first_arg=False, arg_field_names=[key], ) etl.transform(arcetl.attributes.update_by_value, field_name="state_code", value="OR") etl.load(dataset.ADDRESS_WORKFILE.path()) check_addresses(parameters_path=os.path.join( path.REGIONAL_STAGING, "MailersPlus4\\Address.parameters.txt"))
def plss_township_etl(): """Run ETL for PLSS townships.""" with arcetl.ArcETL("PLSS Townships") as etl: etl.extract(dataset.PLSS_TOWNSHIP.path("maint")) etl.transform( arcetl.features.dissolve, dissolve_field_names=["tr"], tolerance=TOLERANCE["xy"], ) transform.add_missing_fields(etl, dataset.PLSS_TOWNSHIP, tags=["pub"]) etl.transform( arcetl.attributes.update_by_function, field_name="tnum", function=(lambda tr: tr // 100), field_as_first_arg=False, arg_field_names=["tr"], ) etl.transform( arcetl.attributes.update_by_function, field_name="rnum", function=(lambda tr: tr % 100), field_as_first_arg=False, arg_field_names=["tr"], ) etl.load(dataset.PLSS_TOWNSHIP.path("pub"))
def incorporated_city_limits_etl(): """Run ETL for incorporated city limits.""" with arcetl.ArcETL("Incorporated City Limits") as etl: etl.extract(dataset.ANNEXATION_HISTORY.path("pub")) etl.transform( arcetl.features.dissolve, dissolve_field_names=["annexcity"], tolerance=TOLERANCE["xy"], ) transform.add_missing_fields(etl, dataset.INCORPORATED_CITY_LIMITS) etl.transform( arcetl.attributes.update_by_function, field_name="inccityabbr", function=(lambda x: x), field_as_first_arg=False, arg_field_names=["annexcity"], ) etl.transform( arcetl.attributes.update_by_joined_value, field_name="inccityname", join_field_name="CityName", join_dataset_path=dataset.CITY.path(), on_field_pairs=[("inccityabbr", "CityNameAbbr")], ) etl.load(dataset.INCORPORATED_CITY_LIMITS.path())
def address_point_not_in_range_etl(): """Run procedures to find address points not covered by a road range. This could be folded into address_point_issues, if so desired. """ validators = [address_issues_not_in_range] with arcetl.ArcETL("Address Point Issues: Not in Range") as etl: LOG.info("Start: Collect address attributes.") addresses = list(address_point_generator()) LOG.info("End: Collect.") LOG.info("Start: Validate address attributes.") issues = [] for validator in validators: LOG.info(validator.__name__) issues.extend(validator(addresses)) LOG.info("End: Validate.") etl.init_schema( dataset.TILLAMOOK_ADDRESS_POINT_NOT_IN_ROAD_RANGE.path()) if issues: maint_info = address_issue_maint_info_map( dataset.TILLAMOOK_ADDRESS_POINT_NOT_IN_ROAD_RANGE.path()) for issue in issues: issue_maint_info = maint_info.get( (issue["address_id"], issue["description"]), {}) issue.update(issue_maint_info) issue.setdefault("maint_notes") issue.setdefault("maint_init_date", datetime.datetime.now()) etl.transform( arcetl.features.insert_from_dicts, insert_features=issues, field_names=issues[0].keys(), ) etl.load(dataset.TILLAMOOK_ADDRESS_POINT_NOT_IN_ROAD_RANGE.path())
def msag_ranges_current_etl(): """Run ETL for current model of Master Street Address Guide (MSAG).""" with arcetl.ArcETL("MSAG Ranges - Current") as etl: etl.init_schema(dataset.TILLAMOOK_MSAG_RANGE.path("current")) etl.transform( arcetl.features.insert_from_dicts, insert_features=msag_ranges, field_names=MSAG_KEYS["core"] + ["shape@"], ) old_msag_id = { row[:-1]: row[-1] for row in arcetl.attributes.as_iters( dataset.TILLAMOOK_MSAG_RANGE.path("master"), field_names=MSAG_KEYS["core"] + ["msag_id"], dataset_where_sql="expiration_date is null", ) } def _assign_msag_id(*core_values): """Assign MSAG ID for the given range core attributes. Args: core_values: Tuple of MSAG range core values. Returns: str: Esri/MS-style UUID for MSAG range. """ if core_values in old_msag_id: _id = old_msag_id[core_values] else: _id = "{" + str(uuid.uuid4()) + "}" return _id etl.transform( arcetl.attributes.update_by_function, field_name="msag_id", function=_assign_msag_id, arg_field_names=MSAG_KEYS["core"], field_as_first_arg=False, ) # Get effective date for ranges already in master. etl.transform( arcetl.attributes.update_by_joined_value, field_name="effective_date", join_field_name="effective_date", join_dataset_path=dataset.TILLAMOOK_MSAG_RANGE.path("master"), on_field_pairs=[("msag_id", "msag_id")], ) # Assign effective date for new ranges. etl.transform( arcetl.attributes.update_by_function, field_name="effective_date", function=datetime.date.today, field_as_first_arg=False, dataset_where_sql="effective_date is null", ) etl.load(dataset.TILLAMOOK_MSAG_RANGE.path("current"))
def zip_code_area_etl(): """Run ETL for ZIP code areas.""" with arcetl.ArcETL("ZIP Code Areas") as etl: etl.extract(dataset.ZIP_CODE_AREA.path("maint")) etl.transform( arcetl.features.dissolve, dissolve_field_names=dataset.ZIP_CODE_AREA.field_names, tolerance=TOLERANCE["xy"], ) etl.load(dataset.ZIP_CODE_AREA.path("pub"))
def state_senator_district_etl(): """Run ETL for state senator districts.""" with arcetl.ArcETL("State Senator Districts") as etl: etl.extract(dataset.STATE_SENATOR_DISTRICT.path("maint")) etl.transform( arcetl.features.dissolve, dissolve_field_names=dataset.STATE_SENATOR_DISTRICT.field_names, tolerance=TOLERANCE["xy"], ) etl.load(dataset.STATE_SENATOR_DISTRICT.path("pub"))
def lcc_board_zone_etl(): """Run ETL for LCC board zones.""" with arcetl.ArcETL("LCC Board Zones") as etl: etl.extract(dataset.LCC_BOARD_ZONE.path("maint")) etl.transform( arcetl.features.dissolve, dissolve_field_names=dataset.LCC_BOARD_ZONE.field_names, tolerance=TOLERANCE["xy"], ) etl.load(dataset.LCC_BOARD_ZONE.path("pub"))
def election_precinct_etl(): """Run ETL for election precincts.""" with arcetl.ArcETL("Election Precincts") as etl: etl.extract(dataset.ELECTION_PRECINCT.path("maint")) etl.transform( arcetl.features.dissolve, dissolve_field_names=dataset.ELECTION_PRECINCT.field_names, tolerance=TOLERANCE["xy"], ) etl.load(dataset.ELECTION_PRECINCT.path("pub"))
def school_district_etl(): """Run ETL for school districts.""" with arcetl.ArcETL("School Districts") as etl: etl.extract(dataset.SCHOOL_DISTRICT.path("maint")) etl.transform( arcetl.features.dissolve, dissolve_field_names=dataset.SCHOOL_DISTRICT.field_names, tolerance=TOLERANCE["xy"], ) etl.load(dataset.SCHOOL_DISTRICT.path("pub"))
def middle_school_area_etl(): """Run ETL for middle school areas.""" with arcetl.ArcETL("Middle School Areas") as etl: etl.extract(dataset.MIDDLE_SCHOOL_AREA.path("maint")) etl.transform( arcetl.features.dissolve, dissolve_field_names=dataset.MIDDLE_SCHOOL_AREA.field_names, tolerance=TOLERANCE["xy"], ) etl.load(dataset.MIDDLE_SCHOOL_AREA.path("pub"))
def elementary_school_area_etl(): """Run ETL for elementary school areas.""" with arcetl.ArcETL("Elementary School Areas") as etl: etl.extract(dataset.ELEMENTARY_SCHOOL_AREA.path("maint")) etl.transform( arcetl.features.dissolve, dissolve_field_names=dataset.ELEMENTARY_SCHOOL_AREA.field_names, tolerance=TOLERANCE["xy"], ) etl.load(dataset.ELEMENTARY_SCHOOL_AREA.path("pub"))
def ambulance_service_area_etl(): """Run ETL for ambulance service areas.""" with arcetl.ArcETL("Ambulance Service Areas") as etl: etl.extract(dataset.AMBULANCE_SERVICE_AREA.path("maint")) etl.transform( arcetl.features.dissolve, dissolve_field_names=dataset.AMBULANCE_SERVICE_AREA.field_names, tolerance=TOLERANCE["xy"], ) etl.load(dataset.AMBULANCE_SERVICE_AREA.path("pub"))
def willamette_river_greenway_etl(): """Run ETL for the Willamette River Greenway.""" with arcetl.ArcETL("Willamette River Greenway") as etl: etl.extract(dataset.WILLAMETTE_RIVER_GREENWAY.path("maint")) etl.transform( arcetl.features.dissolve, dissolve_field_names=dataset.WILLAMETTE_RIVER_GREENWAY.field_names, tolerance=TOLERANCE["xy"], ) etl.load(dataset.WILLAMETTE_RIVER_GREENWAY.path("pub"))
def metro_plan_boundary_etl(): """Run ETL for the Metro Plan boundary.""" with arcetl.ArcETL("Metro Plan Boundary") as etl: etl.extract(dataset.METRO_PLAN_BOUNDARY.path("maint")) etl.transform( arcetl.features.dissolve, dissolve_field_names=dataset.METRO_PLAN_BOUNDARY.field_names, tolerance=TOLERANCE["xy"], ) etl.load(dataset.METRO_PLAN_BOUNDARY.path("pub"))
def nodal_development_area_etl(): """Run ETL for nodal development areas.""" with arcetl.ArcETL("Nodal Development Areas") as etl: etl.extract(dataset.NODAL_DEVELOPMENT_AREA.path("maint")) etl.transform( arcetl.features.dissolve, dissolve_field_names=dataset.NODAL_DEVELOPMENT_AREA.field_names, tolerance=TOLERANCE["xy"], ) etl.load(dataset.NODAL_DEVELOPMENT_AREA.path("pub"))
def psap_area_etl(): """Run ETL for PSAP areas.""" with arcetl.ArcETL("PSAP Areas") as etl: etl.extract(dataset.PSAP_AREA.path("maint")) etl.transform( arcetl.features.dissolve, dissolve_field_names=dataset.PSAP_AREA.field_names, tolerance=TOLERANCE["xy"], ) etl.load(dataset.PSAP_AREA.path("pub"))
def emergency_service_zone_etl(): """Run ETL for emergency service zones.""" with arcetl.ArcETL("Emergency Service Zones") as etl: etl.extract(dataset.TILLAMOOK_EMS.path()) transform.add_missing_fields(etl, dataset.TILLAMOOK_EMERGENCY_SERVICE_ZONE) identity_kwargs = [ { "field_name": "ems", "identity_field_name": "district", "identity_dataset_path": dataset.TILLAMOOK_EMS.path(), }, { "field_name": "fire", "identity_field_name": "district", "identity_dataset_path": dataset.TILLAMOOK_FIRE.path(), }, { "field_name": "police", "identity_field_name": "district", "identity_dataset_path": dataset.TILLAMOOK_POLICE.path(), }, ] for kwargs in identity_kwargs: etl.transform(arcetl.geoset.identity, tolerance=2, **kwargs) # Drop where feature lacks city, fire, & ambulance. etl.transform( arcetl.features.delete, dataset_where_sql=" and ".join( ["ems is null", "fire is null", "police is null"]), ) join_kwargs = [{ "field_name": "esn", "join_field_name": "esn", "join_dataset_path": dataset.TILLAMOOK_EMERGENCY_SERVICE_NUMBER.path(), "on_field_pairs": [ ("police", "police"), ("fire", "fire"), ("ems", "ems"), ], }] for kwargs in join_kwargs: etl.transform(arcetl.attributes.update_by_joined_value, **kwargs) etl.transform( arcetl.features.dissolve, dissolve_field_names=[ field["name"] for field in dataset.TILLAMOOK_EMERGENCY_SERVICE_NUMBER.fields ], tolerance=TOLERANCE, ) etl.load(dataset.TILLAMOOK_EMERGENCY_SERVICE_ZONE.path())
def county_commissioner_dist_etl(): """Run ETL for county commissioner districts.""" with arcetl.ArcETL("County Commissioner Districts") as etl: etl.extract(dataset.COUNTY_COMMISSIONER_DISTRICT.path("maint")) etl.transform( arcetl.features.dissolve, dissolve_field_names=dataset.COUNTY_COMMISSIONER_DISTRICT. field_names, tolerance=TOLERANCE["xy"], ) etl.load(dataset.COUNTY_COMMISSIONER_DISTRICT.path("pub"))
def plan_designation_etl(): """Run ETL for plan designations.""" with arcetl.ArcETL("Plan Designations") as etl: etl.init_schema(dataset.PLAN_DESIGNATION.path("pub")) for _path in dataset.PLAN_DESIGNATION.path("inserts"): etl.transform(arcetl.features.insert_from_path, insert_dataset_path=_path) # Need a singular ID for juris-designation-overlay. etl.transform(arcetl.attributes.update_by_unique_id, field_name="plandes_id") etl.load(dataset.PLAN_DESIGNATION.path("pub"))
def swc_district_etl(): """Run ETL for soil & water conservation districts.""" with arcetl.ArcETL("Soil & Water Conservation Districts") as etl: etl.extract(dataset.SOIL_WATER_CONSERVATION_DISTRICT.path("maint")) etl.transform( arcetl.features.dissolve, dissolve_field_names=dataset.SOIL_WATER_CONSERVATION_DISTRICT. field_names, tolerance=TOLERANCE["xy"], ) etl.load(dataset.SOIL_WATER_CONSERVATION_DISTRICT.path("pub"))
def state_representative_dist_etl(): """Run ETL for state representative districts.""" with arcetl.ArcETL("State Representative Districts") as etl: etl.extract(dataset.STATE_REPRESENTATIVE_DISTRICT.path("maint")) etl.transform( arcetl.features.dissolve, dissolve_field_names=dataset.STATE_REPRESENTATIVE_DISTRICT. field_names, tolerance=TOLERANCE["xy"], ) etl.load(dataset.STATE_REPRESENTATIVE_DISTRICT.path("pub"))
def tax_code_area_etl(): """Run ETL for tax code areas.""" with arcetl.ArcETL("Tax Code Areas") as etl: etl.extract(dataset.TAX_CODE_AREA.path("maint")) transform.clean_whitespace( etl, field_names=["taxcode", "source", "ordinance", "schooldist"]) etl.transform( arcetl.features.dissolve, dissolve_field_names=dataset.TAX_CODE_AREA.field_names, tolerance=TOLERANCE["xy"], ) etl.load(dataset.TAX_CODE_AREA.path("pub"))
def template_etl(): """Run ETL for {##TODO}.""" ##TODO: Execute with UNC connection? # conn = credential.UNCPathCredential(path.XX_SHARE, **credential.XX_SHARE) # with conn, arcetl.ArcETL("##TODO: ETL Name") as etl: with arcetl.ArcETL("##TODO: ETL Name") as etl: ##TODO: Add extract keyword arguments (if necessary). etl.extract("##TODO: dataset_path") ##TODO: Add transform keyword arguments (if necessary). etl.transform("##TODO: transformation (e.g. arcetl.features.dissolve)") ##TODO: Add load keyword arguments (if necessary). etl.load("##TODO: dataset_path")
def template_update(): """Run update for {##TODO}.""" LOG.info("Start: {##TODO}.") ##TODO: Execute with UNC connection? # conn = credential.UNCPathCredential(path.XX_SHARE, **credential.XX_SHARE) # with conn, arcetl.ArcETL("##TODO: Update Name") as etl: with arcetl.ArcETL("##TODO: Update Name") as etl: ##TODO: Add extract keyword arguments (if necessary). etl.extract("##TODO: dataset_path") ##TODO: Add transform keyword arguments (if necessary). etl.transform("##TODO: transformation (e.g. arcetl.features.dissolve)") ##TODO: Add load keyword arguments (if necessary). etl.update("##TODO: dataset_path", "##TODO: id_field_names")
def plss_quarter_section_etl(): """Run ETL for PLSS quarter sections.""" with arcetl.ArcETL("PLSS Quarter-sections") as etl: etl.extract(dataset.PLSS_QUARTER_SECTION.path("maint")) etl.transform( arcetl.features.dissolve, dissolve_field_names=[ field["name"] for field in dataset.PLSS_QUARTER_SECTION.fields if "pub" in field["tags"] ], tolerance=TOLERANCE["xy"], ) etl.load(dataset.PLSS_QUARTER_SECTION.path("pub"))
def taxlot_owner_etl(): """Run ETL for taxlot owners (real & active).""" rlid_field_name = { "maptaxlot": "maplot", "acctno": "account_stripped", "ownname": "owner_name", "addr1": "addr_line1", "addr2": "addr_line2", "addr3": "addr_line3", "ownercity": "city", "ownerprvst": "prov_state", "ownerzip": "zip_code", "ownercntry": "country", } with arcetl.ArcETL("Taxlot Owners") as etl: LOG.info("Start: Collect accounts.") accounts = set(acct for acct, in arcetl.attributes.as_iters( os.path.join(database.RLID.path, "dbo.Account"), field_names=["account_stripped"], dataset_where_sql=""" (active_this_year = 'Y' or new_acct_active_next_year = 'Y') and account_int < 4000000 """, )) LOG.info("End: Collect.") LOG.info("Start: Collect owners.") _owners = arcetl.attributes.as_dicts( dataset.TAXLOT_OWNER.path("source"), field_names=rlid_field_name.values(), dataset_where_sql="account_int < 4000000", ) owners = [] for owner in _owners: if owner["account_stripped"] not in accounts: continue # Rename keys & clean value (many in RLID are char/varchar type). owner = { key: clean_whitespace(owner[rlid_key]) for key, rlid_key in rlid_field_name.items() } if owner not in owners: owners.append(owner) LOG.info("End: Collect.") etl.init_schema(dataset.TAXLOT_OWNER.path("pub")) etl.transform( arcetl.features.insert_from_dicts, insert_features=owners, field_names=rlid_field_name.keys(), ) etl.load(dataset.TAXLOT_OWNER.path("pub"))
def building_etl(): """Run ETL for building footprints.""" with arcetl.ArcETL("Buildings") as etl: etl.extract(dataset.BUILDING.path("maint")) etl.transform( arcetl.attributes.update_by_function, field_name="height", function=(lambda x: None if x is None or x <= 0 else x), ) etl.update( dataset.BUILDING.path("pub"), id_field_names=dataset.BUILDING.id_field_names, use_edit_session=False, )