def attributes_as_iters(dataset_path, field_names, **kwargs):
    """Generator for iterables of feature attributes.

    Use ArcPy cursor token names for object IDs and geometry objects/properties.

    Args:
        dataset_path (str): Path of the dataset.
        field_names (iter): Collection of field names. The order of the names in the
            collection will determine where its value will fall in the yielded item.
        **kwargs: Arbitrary keyword arguments. See below.

    Keyword Args:
        dataset_where_sql (str): SQL where-clause for dataset subselection.
        spatial_reference_item: Item from which the output geometry's spatial
            reference will be derived.
        iter_type: Iterable type to yield. Default is tuple.

    Yields:
        iter: Collection of attribute values.

    """
    kwargs.setdefault('dataset_where_sql')
    kwargs.setdefault('spatial_reference_item')
    kwargs.setdefault('iter_type', tuple)
    keys = {'field': tuple(contain(field_names))}
    sref = spatial_reference(kwargs['spatial_reference_item'])
    cursor = arcpy.da.SearchCursor(in_table=dataset_path,
                                   field_names=keys['field'],
                                   where_clause=kwargs['dataset_where_sql'],
                                   spatial_reference=sref)
    with cursor:
        for feature in cursor:
            yield kwargs['iter_type'](feature)
def id_attributes_map(dataset_path, id_field_names, field_names, **kwargs):
    """Return dictionary mapping of field attribute for each feature ID.

    Note:
        There is no guarantee that the ID field(s) are unique.
        Use ArcPy cursor token names for object IDs and geometry objects/
        properties.

    Args:
        dataset_path (str): Path of the dataset.
        id_field_names (iter, str): Name(s) of the ID field(s).
        field_names (iter, str): Name(s) of the field(s).
        **kwargs: Arbitrary keyword arguments. See below.

    Keyword Args:
        dataset_where_sql (str): SQL where-clause for dataset subselection.
        spatial_reference_item: Item from which the output geometry's spatial
            reference will be derived.

    Returns:
        dict: Mapping of feature ID to feature attribute(s).

    """
    field_names = tuple(contain(field_names))
    id_field_names = tuple(contain(id_field_names))
    sref = spatial_reference(kwargs.get('spatial_reference_item'))
    cursor = arcpy.da.SearchCursor(
        dataset_path,
        field_names=id_field_names + field_names,
        where_clause=kwargs.get('dataset_where_sql'),
        spatial_reference=sref)
    with cursor:
        result = {}
        for row in cursor:
            map_id = row[:len(id_field_names)]
            map_value = row[len(id_field_names):]
            if len(id_field_names) == 1:
                map_id = map_id[0]
            if len(field_names) == 1:
                map_value = map_value[0]
            result[map_id] = map_value
    return result
def update_attributes_by_mapping(dataset_path, field_name, mapping,
                                 key_field_names, **kwargs):
    """Update attribute values by finding them in a mapping.

    Note:
        Wraps update_by_function.

    Args:
        dataset_path (str): Path of the dataset.
        field_name (str): Name of the field.
        mapping (object): Mapping to get values from.
        key_field_names (iter): Name of the fields whose values will be the mapping's
            keys.
        **kwargs: Arbitrary keyword arguments. See below.

    Keyword Args:
        dataset_where_sql (str): SQL where-clause for dataset subselection.
        default_value: Value to return from mapping if key value on feature not
            present. Defaults to None.
        use_edit_session (bool): Flag to perform updates in an edit session. Default is
            False.
        log_level (str): Level to log the function at. Defaults to 'info'.

    Returns:
        str: Name of the field updated.

    """
    kwargs.setdefault('dataset_where_sql')
    kwargs.setdefault('default_value')
    kwargs.setdefault('use_edit_session', False)
    log = leveled_logger(LOG, kwargs.get('log_level', 'info'))
    log("Start: Update attributes in %s on %s by mapping with key(s) in %s.",
        field_name, dataset_path, key_field_names)
    keys = tuple(contain(key_field_names))
    session = Editor(
        dataset_metadata(dataset_path)['workspace_path'],
        kwargs['use_edit_session'])
    cursor = arcpy.da.UpdateCursor(dataset_path, (field_name, ) + keys,
                                   kwargs['dataset_where_sql'])
    with session, cursor:
        for row in cursor:
            old_value = row[0]
            key = row[1] if len(keys) == 1 else tuple(row[1:])
            new_value = mapping.get(key, kwargs['default_value'])
            if old_value != new_value:
                try:
                    cursor.updateRow([new_value] + row[1:])
                except RuntimeError:
                    LOG.error("Offending value is %s", new_value)
                    raise RuntimeError
    log("End: Update.")
    return field_name
Beispiel #4
0
    def predict_bug(self, bug):
        # DESCRIPTION
        description = str(bug[config.SUMMARY]) + ' ' + str(bug[config.DESCRIPTION])
        description = self.__preprocess_text(description,
                                             self.text_processing_parameters['word_min_len'],
                                             self.text_processing_parameters['remove_digits'],
                                             self.text_processing_parameters['use_stemmer_lemmmatizer'])
        x_description = self.vectorizer.transform([description])

        # PLATFORM
        platform = str(bug[config.PLATFORM])
        if utils.contain(self.platform_names, platform) == False:
            platform = config.OTHER_PLATFORMS
        x_platform = self.platform_encoder.transform([platform])
        x_platform = self.platform_onehot_encoder.transform(x_platform[:, np.newaxis])

        # OP_SYS
        op_sys = str(bug[config.OP_SYS])
        if utils.contain(self.op_sys_names, op_sys) == False:
            op_sys = config.OTHER_OP_SYS
        x_op_sys = self.op_sys_encoder.transform([op_sys])
        x_op_sys = self.op_sys_onehot_encoder.transform(x_op_sys[:, np.newaxis])

        # REPORTER
        reporter = str(bug[config.REPORTER])
        if utils.contain(self.reporter_names, reporter) == False:
            reporter = config.OTHER_REPORTERS
        x_reporter = self.reporter_encoder.transform([reporter])
        x_reporter = self.reporter_onehot_encoder.transform(x_reporter[:, np.newaxis])

        x = sp.sparse.hstack([x_description, x_platform, x_op_sys, x_reporter]).tocsr()
        probabilities = self.model.predict_proba(x)
        component_index = np.argmax(probabilities, axis=1)[0]
        #component_index = self.model.predict(x)[0]
        component = self.index2component[component_index]
        probability = probabilities[0, component_index]
        return component, probability
def attributes_as_dicts(dataset_path, field_names=None, **kwargs):
    """Generator for dictionaries of feature attributes.

    Use ArcPy cursor token names for object IDs and geometry objects/properties.

    Args:
        dataset_path (str): Path of the dataset.
        field_names (iter): Collection of field names. Names will be the keys in the
            dictionary mapping to their values. If value is None, all attributes fields
            will be used.
        **kwargs: Arbitrary keyword arguments. See below.

    Keyword Args:
        dataset_where_sql (str): SQL where-clause for dataset subselection.
        spatial_reference_item: Item from which the output geometry's spatial
            reference will be derived.

    Yields:
        dict: Mapping of feature attribute field names to values.

    """
    kwargs.setdefault('dataset_where_sql')
    kwargs.setdefault('spatial_reference_item')
    if field_names is None:
        meta = {'dataset': dataset_metadata(dataset_path)}
        keys = {
            'field':
            tuple(key.lower()
                  for key in meta['dataset']['field_names_tokenized'])
        }
    else:
        keys = {'field': tuple(contain(field_names))}
    sref = spatial_reference(kwargs['spatial_reference_item'])
    cursor = arcpy.da.SearchCursor(in_table=dataset_path,
                                   field_names=keys,
                                   where_clause=kwargs['dataset_where_sql'],
                                   spatial_reference=sref)
    with cursor:
        for feature in cursor:
            yield dict(zip(cursor.fields, feature))
def insert_features_from_iters(dataset_path, insert_features, field_names,
                               **kwargs):
    """Insert features into dataset from iterables.

    Args:
        dataset_path (str): Path of the dataset.
        insert_features (iter of iter): Collection of iterables representing
            features.
        field_names (iter): Collection of field names to insert. These must
            match the order of their attributes in the insert_features items.
        **kwargs: Arbitrary keyword arguments. See below.

    Keyword Args:
        use_edit_session (bool): Flag to perform updates in an edit session.
            Default is False.
        log_level (str): Level to log the function at. Defaults to 'info'.

    Returns:
        str: Path of the dataset updated.

    """
    kwargs.setdefault('use_edit_session', False)
    log = leveled_logger(LOG, kwargs.get('log_level', 'info'))
    log("Start: Insert features into %s from iterables.", dataset_path)
    meta = {'dataset': dataset_metadata(dataset_path)}
    keys = {'row': tuple(contain(field_names))}
    if inspect.isgeneratorfunction(insert_features):
        insert_features = insert_features()
    session = Editor(meta['dataset']['workspace_path'],
                     kwargs['use_edit_session'])
    cursor = arcpy.da.InsertCursor(dataset_path, field_names=keys['row'])
    feature_count = Counter()
    with session, cursor:
        for row in insert_features:
            cursor.insertRow(tuple(row))
            feature_count['inserted'] += 1
    log("%s features inserted.", feature_count['inserted'])
    log("End: Insert.")
    return feature_count
def insert_features_from_dicts(dataset_path, insert_features, field_names,
                               **kwargs):
    """Insert features into dataset from dictionaries.

    Args:
        dataset_path (str): Path of the dataset.
        insert_features (iter of dict): Collection of dictionaries
            representing features.
        field_names (iter): Collection of field names/keys to insert.
        **kwargs: Arbitrary keyword arguments. See below.

    Keyword Args:
        use_edit_session (bool): Flag to perform updates in an edit session.
            Default is False.
        log_level (str): Level to log the function at. Defaults to 'info'.

    Returns:
        str: Path of the dataset updated.

    """
    kwargs.setdefault('use_edit_session', False)
    log = leveled_logger(LOG, kwargs.get('log_level', 'info'))
    log("Start: Insert features into %s from dictionaries.", dataset_path)
    keys = {'row': tuple(contain(field_names))}
    if inspect.isgeneratorfunction(insert_features):
        insert_features = insert_features()
    iters = ((feature[key] for key in keys['row'])
             for feature in insert_features)
    feature_count = insert_features_from_iters(
        dataset_path,
        iters,
        field_names,
        use_edit_session=kwargs['use_edit_session'],
        log_level=None,
    )
    log("%s features inserted.", feature_count['inserted'])
    log("End: Insert.")
    return feature_count
def insert_features_from_path(dataset_path,
                              insert_dataset_path,
                              field_names=None,
                              **kwargs):
    """Insert features into dataset from another dataset.

    Args:
        dataset_path (str): Path of the dataset.
        insert_dataset_path (str): Path of dataset to insert features from.
        field_names (iter): Collection of field names to insert. Listed field must be
            present in both datasets. If field_names is None, all fields will be
            inserted.
        **kwargs: Arbitrary keyword arguments. See below.

    Keyword Args:
        insert_where_sql (str): SQL where-clause for insert-dataset subselection.
        use_edit_session (bool): Flag to perform updates in an edit session. Default is
            False.
        log_level (str): Level to log the function at. Defaults to 'info'.

    Returns:
        str: Path of the dataset updated.

    """
    kwargs.setdefault('insert_where_sql')
    kwargs.setdefault('use_edit_session', False)
    log = leveled_logger(LOG, kwargs.get('log_level', 'info'))
    log("Start: Insert features into %s from %s.", dataset_path,
        insert_dataset_path)
    meta = {
        'dataset': dataset_metadata(dataset_path),
        'insert': dataset_metadata(insert_dataset_path)
    }
    if field_names is None:
        keys = set.intersection(*(set(
            name.lower() for name in _meta['field_names_tokenized'])
                                  for _meta in meta.values()))
    else:
        keys = set(name.lower() for name in contain(field_names))
    # OIDs & area/length "fields" have no business being part of an update.
    # Geometry itself is handled separately in append function.
    for _meta in meta.values():
        for key in chain(*_meta['field_token'].items()):
            keys.discard(key)
    append_kwargs = {
        'inputs': unique_name('view'),
        'target': dataset_path,
        'schema_type': 'no_test',
        'field_mapping': arcpy.FieldMappings()
    }
    # Create field maps.
    # ArcGIS Pro's no-test append is case-sensitive (verified 1.0-1.1.1).
    # Avoid this problem by using field mapping.
    # BUG-000090970 - ArcGIS Pro 'No test' field mapping in Append tool does
    # not auto-map to the same field name if naming convention differs.
    for key in keys:
        field_map = arcpy.FieldMap()
        field_map.addInputField(insert_dataset_path, key)
        append_kwargs['field_mapping'].addFieldMap(field_map)
    view = DatasetView(
        insert_dataset_path,
        kwargs['insert_where_sql'],
        view_name=append_kwargs['inputs'],
        # Must be nonspatial to append to nonspatial table.
        force_nonspatial=(not meta['dataset']['is_spatial']))
    session = Editor(meta['dataset']['workspace_path'],
                     kwargs['use_edit_session'])
    with view, session:
        arcpy.management.Append(**append_kwargs)
        feature_count = Counter({'inserted': view.count})
    log("%s features inserted.", feature_count['inserted'])
    log("End: Insert.")
    return feature_count