def __init__(self, dataset_path, dataset_where_sql=None, **kwargs): """Initialize instance. Args: dataset_path (str): Path of dataset. dataset_where_sql (str): SQL where-clause for dataset subselection. **kwargs: Arbitrary keyword arguments. See below. Keyword Args: view_name (str): Name of view. Default is None (auto-generate name). field_names (iter): Collection of field names to include in view. If field_names not specified, all fields will be included. force_nonspatial (bool): Flag that forces a nonspatial view. Default is False. """ self.name = kwargs.get("view_name", unique_name("view")) self.dataset_path = dataset_path self.dataset_meta = dataset_metadata(dataset_path) self.field_names = list( kwargs.get("field_names", self.dataset_meta["field_names"]) ) self.is_spatial = all( [self.dataset_meta["is_spatial"], not kwargs.get("force_nonspatial", False)] ) self._where_sql = dataset_where_sql
def table_to_points( dataset_path, output_path, x_field_name, y_field_name, spatial_reference_item=4326, **kwargs ): """Convert coordinate table to a new point dataset. Args: dataset_path (str): Path of the dataset. output_path (str): Path of the output dataset. x_field_name (str): Name of field with x-coordinate. y_field_name (str): Name of field with y-coordinate. spatial_reference_item: Item from which the spatial reference of the output geometry will be derived. Default is 4326 (EPSG code for unprojected WGS84). **kwargs: Arbitrary keyword arguments. See below. Keyword Args: dataset_where_sql (str): SQL where-clause for dataset subselection. z_field_name (str): Name of the field with z-coordinate. log_level (str): Level to log the function at. Default is "info". Returns: str: Path of the converted dataset. """ kwargs.setdefault("dataset_where_sql") kwargs.setdefault("z_field_name") log = leveled_logger(LOG, kwargs.setdefault("log_level", "info")) log("Start: Convert %s to spatial dataset %s.", dataset_path, output_path) meta = {"spatial": spatial_reference_metadata(spatial_reference_item)} view_name = unique_name() arcpy.management.MakeXYEventLayer( table=dataset_path, out_layer=view_name, in_x_field=x_field_name, in_y_field=y_field_name, in_z_field=kwargs.get("z_field_name"), spatial_reference=meta["spatial"]["object"], ) dataset.copy( view_name, output_path, dataset_where_sql=kwargs["dataset_where_sql"], log_level=None, ) dataset.delete(view_name, log_level=None) log("End: Convert.") return output_path
def insert_from_path(dataset_path, insert_dataset_path, field_names=None, **kwargs): """Insert features into dataset from another dataset. Args: dataset_path (str): Path of the dataset. insert_dataset_path (str): Path of dataset to insert features from. field_names (iter): Collection of field names to insert. Listed field must be present in both datasets. If field_names is None, all fields will be inserted. **kwargs: Arbitrary keyword arguments. See below. Keyword Args: insert_where_sql (str): SQL where-clause for insert-dataset subselection. use_edit_session (bool): Flag to perform updates in an edit session. Default is False. log_level (str): Level to log the function at. Default is 'info'. Returns: collections.Counter: Counts for each feature action. """ kwargs.setdefault('insert_where_sql') kwargs.setdefault('use_edit_session', False) log = leveled_logger(LOG, kwargs.setdefault('log_level', 'info')) log("Start: Insert features into %s from %s.", dataset_path, insert_dataset_path) meta = { 'dataset': arcobj.dataset_metadata(dataset_path), 'insert': arcobj.dataset_metadata(insert_dataset_path), } if field_names is None: keys = set.intersection(*(set( name.lower() for name in _meta['field_names_tokenized']) for _meta in meta.values())) else: keys = set(name.lower() for name in contain(field_names)) # OIDs & area/length "fields" have no business being part of an insert. # Geometry itself is handled separately in append function. for _meta in meta.values(): for key in chain(*_meta['field_token'].items()): keys.discard(key) append_kwargs = { 'inputs': unique_name('view'), 'target': dataset_path, 'schema_type': 'no_test', 'field_mapping': arcpy.FieldMappings(), } # Create field maps. # ArcGIS Pro's no-test append is case-sensitive (verified 1.0-1.1.1). # Avoid this problem by using field mapping. # BUG-000090970 - ArcGIS Pro 'No test' field mapping in Append tool does not auto- # map to the same field name if naming convention differs. for key in keys: field_map = arcpy.FieldMap() field_map.addInputField(insert_dataset_path, key) append_kwargs['field_mapping'].addFieldMap(field_map) view = arcobj.DatasetView( insert_dataset_path, kwargs['insert_where_sql'], view_name=append_kwargs['inputs'], # Must be nonspatial to append to nonspatial table. force_nonspatial=(not meta['dataset']['is_spatial']), ) session = arcobj.Editor(meta['dataset']['workspace_path'], kwargs['use_edit_session']) with view, session: arcpy.management.Append(**append_kwargs) feature_count = Counter({'inserted': view.count}) log("%s features inserted.", feature_count['inserted']) log("End: Insert.") return feature_count
def union(dataset_path, field_name, union_dataset_path, union_field_name, **kwargs): """Assign union attribute to features, splitting where necessary. Note: This function has a 'chunking' loop routine in order to avoid an unhelpful output error that occurs when the inputs are rather large. For some reason the identity will 'succeed' with an empty output warning, but not create an output dataset. Running the identity against smaller sets of data generally avoids this conundrum. Args: dataset_path (str): Path of the dataset. field_name (str): Name of the dataset's field to assign to. union_dataset_path (str): Path of the union dataset. union_field_name (str): Name of union dataset's field with values to assign. **kwargs: Arbitrary keyword arguments. See below. Keyword Args: chunk_size (int): Number of features to process per loop. Default is 4096. dataset_where_sql (str): SQL where-clause for dataset subselection. union_where_sql (str): SQL where-clause for the union dataset subselection. replacement_value: Value to replace overlay field values with. tolerance (float): Tolerance for coincidence, in dataset's units. log_level (str): Level to log the function at. Default is 'info'. Returns: str: Path of the dataset updated. """ kwargs.setdefault('chunk_size', 4096) kwargs.setdefault('dataset_where_sql') kwargs.setdefault('union_where_sql') kwargs.setdefault('tolerance') log = leveled_logger(LOG, kwargs.setdefault('log_level', 'info')) log( "Start: Union-set attributes in %s on %s by overlay values in %s on %s.", field_name, dataset_path, union_field_name, union_dataset_path, ) if 'replacement_value' in kwargs and kwargs[ 'replacement_value'] is not None: update_function = (lambda x: kwargs['replacement_value'] if x else None) else: # Union puts empty string when identity feature not present. # Fix to null (replacement value function does this inherently). update_function = (lambda x: None if x == '' else x) view = { 'dataset': arcobj.DatasetView(dataset_path, kwargs['dataset_where_sql']) } # Create a temporary copy of the union dataset. temp_union = arcobj.TempDatasetCopy(union_dataset_path, kwargs['union_where_sql'], field_names=[union_field_name]) with view['dataset'], temp_union: # Avoid field name collisions with neutral field name. temp_union.field_name = dataset.rename_field( temp_union.path, union_field_name, new_field_name=unique_name(union_field_name), log_level=None, ) for view['chunk'] in view['dataset'].as_chunks(kwargs['chunk_size']): temp_output_path = unique_path('output') arcpy.analysis.Union( in_features=[view['chunk'].name, temp_union.path], out_feature_class=temp_output_path, join_attributes='all', cluster_tolerance=kwargs['tolerance'], gaps=False, ) # Clean up bad or null geometry created in processing. arcpy.management.RepairGeometry(temp_output_path) # Push identity (or replacement) value from temp to update field. attributes.update_by_function( temp_output_path, field_name, update_function, field_as_first_arg=False, arg_field_names=[temp_union.field_name], log_level=None, ) # Replace original chunk features with new features. features.delete(view['chunk'].name, log_level=None) features.insert_from_path(dataset_path, temp_output_path, log_level=None) dataset.delete(temp_output_path, log_level=None) log("End: Union.") return dataset_path
def overlay(dataset_path, field_name, overlay_dataset_path, overlay_field_name, **kwargs): """Assign overlay attribute to features, splitting where necessary. Note: Only one overlay flag at a time can be used. If mutliple are set to True, the first one referenced in the code will be used. If no overlay flags are set, the operation will perform a basic intersection check, and the result will be at the whim of the geoprocessing environment's merge rule for the update field. This function has a 'chunking' loop routine in order to avoid an unhelpful output error that occurs when the inputs are rather large. For some reason the identity will 'succeed' with an empty output warning, but not create an output dataset. Running the identity against smaller sets of data generally avoids this conundrum. Args: dataset_path (str): Path of the dataset. field_name (str): Name of the dataset's field to assign to. overlay_dataset_path (str): Path of the overlay dataset. overlay_field_name (str): Name of overlay dataset's field with values to assign. **kwargs: Arbitrary keyword arguments. See below. Keyword Args: chunk_size (int): Number of features to process per loop. Default is 4096. dataset_where_sql (str): SQL where-clause for dataset subselection. overlay_central_coincident (bool): Flag to overlay the centrally-coincident value. Default is False. overlay_most_coincident (bool): Flag to overlay the most coincident value. Default is False. overlay_where_sql (str): SQL where-clause for the overlay dataset subselection. replacement_value: Value to replace overlay field values with. tolerance (float): Tolerance for coincidence, in dataset's units. log_level (str): Level to log the function at. Default is 'info'. Returns: str: Path of the dataset updated. """ kwargs.setdefault('chunk_size', 4096) kwargs.setdefault('dataset_where_sql') kwargs.setdefault('overlay_central_coincident', False) kwargs.setdefault('overlay_most_coincident', False) kwargs.setdefault('overlay_where_sql') log = leveled_logger(LOG, kwargs.setdefault('log_level', 'info')) log( "Start: Overlay-set attributes in %s on %s by overlay values in %s on %s.", field_name, dataset_path, overlay_field_name, overlay_dataset_path, ) # Check flags & set details for spatial join call. join_kwargs = { 'join_operation': 'join_one_to_many', 'join_type': 'keep_all' } if kwargs['overlay_central_coincident']: join_kwargs['match_option'] = 'have_their_center_in' elif kwargs['overlay_most_coincident']: raise NotImplementedError( "overlay_most_coincident not yet implemented.") else: join_kwargs['match_option'] = 'intersect' if 'replacement_value' in kwargs and kwargs[ 'replacement_value'] is not None: update_function = (lambda x: kwargs['replacement_value'] if x else None) else: update_function = (lambda x: x) meta = {'orig_tolerance': arcpy.env.XYTolerance} view = { 'dataset': arcobj.DatasetView(dataset_path, kwargs['dataset_where_sql']) } # Create temporary copy of overlay dataset. temp_overlay = arcobj.TempDatasetCopy( overlay_dataset_path, kwargs['overlay_where_sql'], field_names=[overlay_field_name], ) with view['dataset'], temp_overlay: # Avoid field name collisions with neutral field name. temp_overlay.field_name = dataset.rename_field( temp_overlay.path, overlay_field_name, new_field_name=unique_name(overlay_field_name), log_level=None, ) if 'tolerance' in kwargs: arcpy.env.XYTolerance = kwargs['tolerance'] for view['chunk'] in view['dataset'].as_chunks(kwargs['chunk_size']): temp_output_path = unique_path('output') arcpy.analysis.SpatialJoin(target_features=view['chunk'].name, join_features=temp_overlay.path, out_feature_class=temp_output_path, **join_kwargs) # Clean up bad or null geometry created in processing. arcpy.management.RepairGeometry(temp_output_path) # Push identity (or replacement) value from temp to update field. attributes.update_by_function( temp_output_path, field_name, update_function, field_as_first_arg=False, arg_field_names=[temp_overlay.field_name], log_level=None, ) # Replace original chunk features with new features. features.delete(view['chunk'].name, log_level=None) features.insert_from_path(dataset_path, temp_output_path, log_level=None) dataset.delete(temp_output_path, log_level=None) if 'tolerance' in kwargs: arcpy.env.XYTolerance = meta['orig_tolerance'] log("End: Overlay.") return dataset_path
def update_by_overlay( dataset_path, field_name, overlay_dataset_path, overlay_field_name, **kwargs ): """Update attribute values by finding overlay feature value. Note: Since only one value will be selected in the overlay, operations with multiple overlaying features will respect the geoprocessing environment merge rule. This rule generally defaults to the value of the "first" feature. Only one overlay flag at a time can be used (e.g. "overlay_most_coincident", "overlay_central_coincident"). If multiple are set to True, the first one referenced in the code will be used. If no overlay flags are set, the operation will perform a basic intersection check, and the result will be at the whim of the geoprocessing environment merge rule for the update field. Args: dataset_path (str): Path of the dataset. field_name (str): Name of the field. overlay_dataset_path (str): Path of the overlay-dataset. overlay_field_name (str): Name of the overlay-field. **kwargs: Arbitrary keyword arguments. See below. Keyword Args: dataset_where_sql (str): SQL where-clause for dataset subselection. overlay_central_coincident (bool): Overlay will use the centrally-coincident value if True. Default is False. overlay_most_coincident (bool): Overlay will use the most coincident value if True. Default is False. overlay_where_sql (str): SQL where-clause for overlay dataset subselection. replacement_value: Value to replace a present overlay-field value with. tolerance (float): Tolerance for coincidence, in units of the dataset. use_edit_session (bool): Updates are done in an edit session if True. Default is False. log_level (str): Level to log the function at. Default is "info". Returns: collections.Counter: Counts for each feature action. """ kwargs.setdefault("dataset_where_sql") kwargs.setdefault("overlay_central_coincident", False) kwargs.setdefault("overlay_most_coincident", False) kwargs.setdefault("overlay_where_sql") kwargs.setdefault("use_edit_session", False) log = leveled_logger(LOG, kwargs.setdefault("log_level", "info")) log( "Start: Update attributes in %s on %s by overlay values in %s on %s.", field_name, dataset_path, overlay_field_name, overlay_dataset_path, ) meta = { "dataset": dataset_metadata(dataset_path), "original_tolerance": arcpy.env.XYTolerance, } join_kwargs = {"join_operation": "join_one_to_many", "join_type": "keep_all"} if kwargs["overlay_central_coincident"]: join_kwargs["match_option"] = "have_their_center_in" ##TODO: Implement overlay_most_coincident. elif kwargs["overlay_most_coincident"]: raise NotImplementedError("overlay_most_coincident not yet implemented.") # else: # join_kwargs["match_option"] = "intersect" dataset_view = DatasetView(dataset_path, kwargs["dataset_where_sql"]) overlay_copy = TempDatasetCopy( overlay_dataset_path, kwargs["overlay_where_sql"], field_names=[overlay_field_name], ) with dataset_view, overlay_copy: # Avoid field name collisions with neutral name. overlay_copy.field_name = dataset.rename_field( overlay_copy.path, overlay_field_name, new_field_name=unique_name(overlay_field_name), log_level=None, ) if "tolerance" in kwargs: arcpy.env.XYTolerance = kwargs["tolerance"] # Create temp output of the overlay. temp_output_path = unique_path("output") arcpy.analysis.SpatialJoin( target_features=dataset_view.name, join_features=overlay_copy.path, out_feature_class=temp_output_path, **join_kwargs ) if "tolerance" in kwargs: arcpy.env.XYTolerance = meta["original_tolerance"] # Push overlay (or replacement) value from output to update field. if "replacement_value" in kwargs and kwargs["replacement_value"] is not None: function = lambda x: kwargs["replacement_value"] if x else None else: function = lambda x: x update_by_function( temp_output_path, field_name, function, field_as_first_arg=False, arg_field_names=[overlay_copy.field_name], log_level=None, ) # Update values in original dataset. update_action_count = update_by_joined_value( dataset_path, field_name, join_dataset_path=temp_output_path, join_field_name=field_name, on_field_pairs=[(meta["dataset"]["oid_field_name"], "target_fid")], dataset_where_sql=kwargs["dataset_where_sql"], use_edit_session=kwargs["use_edit_session"], log_level=None, ) dataset.delete(temp_output_path, log_level=None) for action, count in sorted(update_action_count.items()): log("%s attributes %s.", count, action) log("End: Update.") return update_action_count