def attributes_as_iters(dataset_path, field_names, **kwargs): """Generator for iterables of feature attributes. Use ArcPy cursor token names for object IDs and geometry objects/properties. Args: dataset_path (str): Path of the dataset. field_names (iter): Collection of field names. The order of the names in the collection will determine where its value will fall in the yielded item. **kwargs: Arbitrary keyword arguments. See below. Keyword Args: dataset_where_sql (str): SQL where-clause for dataset subselection. spatial_reference_item: Item from which the output geometry's spatial reference will be derived. iter_type: Iterable type to yield. Default is tuple. Yields: iter: Collection of attribute values. """ kwargs.setdefault('dataset_where_sql') kwargs.setdefault('spatial_reference_item') kwargs.setdefault('iter_type', tuple) keys = {'field': tuple(contain(field_names))} sref = spatial_reference(kwargs['spatial_reference_item']) cursor = arcpy.da.SearchCursor(in_table=dataset_path, field_names=keys['field'], where_clause=kwargs['dataset_where_sql'], spatial_reference=sref) with cursor: for feature in cursor: yield kwargs['iter_type'](feature)
def id_attributes_map(dataset_path, id_field_names, field_names, **kwargs): """Return dictionary mapping of field attribute for each feature ID. Note: There is no guarantee that the ID field(s) are unique. Use ArcPy cursor token names for object IDs and geometry objects/ properties. Args: dataset_path (str): Path of the dataset. id_field_names (iter, str): Name(s) of the ID field(s). field_names (iter, str): Name(s) of the field(s). **kwargs: Arbitrary keyword arguments. See below. Keyword Args: dataset_where_sql (str): SQL where-clause for dataset subselection. spatial_reference_item: Item from which the output geometry's spatial reference will be derived. Returns: dict: Mapping of feature ID to feature attribute(s). """ field_names = tuple(contain(field_names)) id_field_names = tuple(contain(id_field_names)) sref = spatial_reference(kwargs.get('spatial_reference_item')) cursor = arcpy.da.SearchCursor( dataset_path, field_names=id_field_names + field_names, where_clause=kwargs.get('dataset_where_sql'), spatial_reference=sref) with cursor: result = {} for row in cursor: map_id = row[:len(id_field_names)] map_value = row[len(id_field_names):] if len(id_field_names) == 1: map_id = map_id[0] if len(field_names) == 1: map_value = map_value[0] result[map_id] = map_value return result
def update_attributes_by_mapping(dataset_path, field_name, mapping, key_field_names, **kwargs): """Update attribute values by finding them in a mapping. Note: Wraps update_by_function. Args: dataset_path (str): Path of the dataset. field_name (str): Name of the field. mapping (object): Mapping to get values from. key_field_names (iter): Name of the fields whose values will be the mapping's keys. **kwargs: Arbitrary keyword arguments. See below. Keyword Args: dataset_where_sql (str): SQL where-clause for dataset subselection. default_value: Value to return from mapping if key value on feature not present. Defaults to None. use_edit_session (bool): Flag to perform updates in an edit session. Default is False. log_level (str): Level to log the function at. Defaults to 'info'. Returns: str: Name of the field updated. """ kwargs.setdefault('dataset_where_sql') kwargs.setdefault('default_value') kwargs.setdefault('use_edit_session', False) log = leveled_logger(LOG, kwargs.get('log_level', 'info')) log("Start: Update attributes in %s on %s by mapping with key(s) in %s.", field_name, dataset_path, key_field_names) keys = tuple(contain(key_field_names)) session = Editor( dataset_metadata(dataset_path)['workspace_path'], kwargs['use_edit_session']) cursor = arcpy.da.UpdateCursor(dataset_path, (field_name, ) + keys, kwargs['dataset_where_sql']) with session, cursor: for row in cursor: old_value = row[0] key = row[1] if len(keys) == 1 else tuple(row[1:]) new_value = mapping.get(key, kwargs['default_value']) if old_value != new_value: try: cursor.updateRow([new_value] + row[1:]) except RuntimeError: LOG.error("Offending value is %s", new_value) raise RuntimeError log("End: Update.") return field_name
def predict_bug(self, bug): # DESCRIPTION description = str(bug[config.SUMMARY]) + ' ' + str(bug[config.DESCRIPTION]) description = self.__preprocess_text(description, self.text_processing_parameters['word_min_len'], self.text_processing_parameters['remove_digits'], self.text_processing_parameters['use_stemmer_lemmmatizer']) x_description = self.vectorizer.transform([description]) # PLATFORM platform = str(bug[config.PLATFORM]) if utils.contain(self.platform_names, platform) == False: platform = config.OTHER_PLATFORMS x_platform = self.platform_encoder.transform([platform]) x_platform = self.platform_onehot_encoder.transform(x_platform[:, np.newaxis]) # OP_SYS op_sys = str(bug[config.OP_SYS]) if utils.contain(self.op_sys_names, op_sys) == False: op_sys = config.OTHER_OP_SYS x_op_sys = self.op_sys_encoder.transform([op_sys]) x_op_sys = self.op_sys_onehot_encoder.transform(x_op_sys[:, np.newaxis]) # REPORTER reporter = str(bug[config.REPORTER]) if utils.contain(self.reporter_names, reporter) == False: reporter = config.OTHER_REPORTERS x_reporter = self.reporter_encoder.transform([reporter]) x_reporter = self.reporter_onehot_encoder.transform(x_reporter[:, np.newaxis]) x = sp.sparse.hstack([x_description, x_platform, x_op_sys, x_reporter]).tocsr() probabilities = self.model.predict_proba(x) component_index = np.argmax(probabilities, axis=1)[0] #component_index = self.model.predict(x)[0] component = self.index2component[component_index] probability = probabilities[0, component_index] return component, probability
def attributes_as_dicts(dataset_path, field_names=None, **kwargs): """Generator for dictionaries of feature attributes. Use ArcPy cursor token names for object IDs and geometry objects/properties. Args: dataset_path (str): Path of the dataset. field_names (iter): Collection of field names. Names will be the keys in the dictionary mapping to their values. If value is None, all attributes fields will be used. **kwargs: Arbitrary keyword arguments. See below. Keyword Args: dataset_where_sql (str): SQL where-clause for dataset subselection. spatial_reference_item: Item from which the output geometry's spatial reference will be derived. Yields: dict: Mapping of feature attribute field names to values. """ kwargs.setdefault('dataset_where_sql') kwargs.setdefault('spatial_reference_item') if field_names is None: meta = {'dataset': dataset_metadata(dataset_path)} keys = { 'field': tuple(key.lower() for key in meta['dataset']['field_names_tokenized']) } else: keys = {'field': tuple(contain(field_names))} sref = spatial_reference(kwargs['spatial_reference_item']) cursor = arcpy.da.SearchCursor(in_table=dataset_path, field_names=keys, where_clause=kwargs['dataset_where_sql'], spatial_reference=sref) with cursor: for feature in cursor: yield dict(zip(cursor.fields, feature))
def insert_features_from_iters(dataset_path, insert_features, field_names, **kwargs): """Insert features into dataset from iterables. Args: dataset_path (str): Path of the dataset. insert_features (iter of iter): Collection of iterables representing features. field_names (iter): Collection of field names to insert. These must match the order of their attributes in the insert_features items. **kwargs: Arbitrary keyword arguments. See below. Keyword Args: use_edit_session (bool): Flag to perform updates in an edit session. Default is False. log_level (str): Level to log the function at. Defaults to 'info'. Returns: str: Path of the dataset updated. """ kwargs.setdefault('use_edit_session', False) log = leveled_logger(LOG, kwargs.get('log_level', 'info')) log("Start: Insert features into %s from iterables.", dataset_path) meta = {'dataset': dataset_metadata(dataset_path)} keys = {'row': tuple(contain(field_names))} if inspect.isgeneratorfunction(insert_features): insert_features = insert_features() session = Editor(meta['dataset']['workspace_path'], kwargs['use_edit_session']) cursor = arcpy.da.InsertCursor(dataset_path, field_names=keys['row']) feature_count = Counter() with session, cursor: for row in insert_features: cursor.insertRow(tuple(row)) feature_count['inserted'] += 1 log("%s features inserted.", feature_count['inserted']) log("End: Insert.") return feature_count
def insert_features_from_dicts(dataset_path, insert_features, field_names, **kwargs): """Insert features into dataset from dictionaries. Args: dataset_path (str): Path of the dataset. insert_features (iter of dict): Collection of dictionaries representing features. field_names (iter): Collection of field names/keys to insert. **kwargs: Arbitrary keyword arguments. See below. Keyword Args: use_edit_session (bool): Flag to perform updates in an edit session. Default is False. log_level (str): Level to log the function at. Defaults to 'info'. Returns: str: Path of the dataset updated. """ kwargs.setdefault('use_edit_session', False) log = leveled_logger(LOG, kwargs.get('log_level', 'info')) log("Start: Insert features into %s from dictionaries.", dataset_path) keys = {'row': tuple(contain(field_names))} if inspect.isgeneratorfunction(insert_features): insert_features = insert_features() iters = ((feature[key] for key in keys['row']) for feature in insert_features) feature_count = insert_features_from_iters( dataset_path, iters, field_names, use_edit_session=kwargs['use_edit_session'], log_level=None, ) log("%s features inserted.", feature_count['inserted']) log("End: Insert.") return feature_count
def insert_features_from_path(dataset_path, insert_dataset_path, field_names=None, **kwargs): """Insert features into dataset from another dataset. Args: dataset_path (str): Path of the dataset. insert_dataset_path (str): Path of dataset to insert features from. field_names (iter): Collection of field names to insert. Listed field must be present in both datasets. If field_names is None, all fields will be inserted. **kwargs: Arbitrary keyword arguments. See below. Keyword Args: insert_where_sql (str): SQL where-clause for insert-dataset subselection. use_edit_session (bool): Flag to perform updates in an edit session. Default is False. log_level (str): Level to log the function at. Defaults to 'info'. Returns: str: Path of the dataset updated. """ kwargs.setdefault('insert_where_sql') kwargs.setdefault('use_edit_session', False) log = leveled_logger(LOG, kwargs.get('log_level', 'info')) log("Start: Insert features into %s from %s.", dataset_path, insert_dataset_path) meta = { 'dataset': dataset_metadata(dataset_path), 'insert': dataset_metadata(insert_dataset_path) } if field_names is None: keys = set.intersection(*(set( name.lower() for name in _meta['field_names_tokenized']) for _meta in meta.values())) else: keys = set(name.lower() for name in contain(field_names)) # OIDs & area/length "fields" have no business being part of an update. # Geometry itself is handled separately in append function. for _meta in meta.values(): for key in chain(*_meta['field_token'].items()): keys.discard(key) append_kwargs = { 'inputs': unique_name('view'), 'target': dataset_path, 'schema_type': 'no_test', 'field_mapping': arcpy.FieldMappings() } # Create field maps. # ArcGIS Pro's no-test append is case-sensitive (verified 1.0-1.1.1). # Avoid this problem by using field mapping. # BUG-000090970 - ArcGIS Pro 'No test' field mapping in Append tool does # not auto-map to the same field name if naming convention differs. for key in keys: field_map = arcpy.FieldMap() field_map.addInputField(insert_dataset_path, key) append_kwargs['field_mapping'].addFieldMap(field_map) view = DatasetView( insert_dataset_path, kwargs['insert_where_sql'], view_name=append_kwargs['inputs'], # Must be nonspatial to append to nonspatial table. force_nonspatial=(not meta['dataset']['is_spatial'])) session = Editor(meta['dataset']['workspace_path'], kwargs['use_edit_session']) with view, session: arcpy.management.Append(**append_kwargs) feature_count = Counter({'inserted': view.count}) log("%s features inserted.", feature_count['inserted']) log("End: Insert.") return feature_count