Python pattern_match Exemples, pyam.utils.pattern_match Python Exemples

Exemple #1

0

Afficher le fichier

def _apply_filters(data, meta, filters):
    keep = np.array([True] * len(data))

    # filter by columns and list of values
    for col, values in filters.items():
        if col in meta.columns:
            matches = pattern_match(meta[col], values)
            cat_idx = meta[matches].index
            keep_col = data[META_IDX].set_index(META_IDX).index.isin(cat_idx)

        elif col in ['model', 'scenario', 'region', 'unit']:
            keep_col = pattern_match(data[col], values)

        elif col == 'variable':
            level = filters['level'] if 'level' in filters.keys() else None
            keep_col = pattern_match(data[col], values, level)

        elif col == 'year':
            keep_col = years_match(data[col], values)

        elif col == 'level':
            if 'variable' not in filters.keys():
                keep_col = pattern_match(data['variable'], '*', level=values)
            else:
                continue
        else:
            raise ValueError('filter by column ' + col + ' not supported')
        keep &= keep_col

    return keep

Exemple #2

0

Afficher le fichier

def filter_by_meta(data, df, join_meta=False, **kwargs):
    """Filter by and join meta columns from an IamDataFrame to a pd.DataFrame

    Parameters
    ----------
    data: pd.DataFrame instance
        DataFrame to which meta columns are to be joined,
        index or columns must include `['model', 'scenario']`
    df: IamDataFrame instance
        IamDataFrame from which meta columns are filtered and joined (optional)
    join_meta: bool, default False
        join selected columns from `df.meta` on `data`
    kwargs:
        meta columns to be filtered/joined, where `col=...` applies filters
        by the given arguments (using `utils.pattern_match()`) and `col=None`
        joins the column without filtering (setting col to `np.nan`
        if `(model, scenario) not in df.meta.index`)
    """
    if not set(META_IDX).issubset(data.index.names + list(data.columns)):
        raise ValueError('missing required index dimensions or columns!')

    meta = pd.DataFrame(df.meta[list(set(kwargs) - set(META_IDX))].copy())

    # filter meta by columns
    keep = np.array([True] * len(meta))
    apply_filter = False
    for col, values in kwargs.items():
        if col in META_IDX and values is not None:
            _col = meta.index.get_level_values(0 if col is 'model' else 1)
            keep &= pattern_match(_col, values, has_nan=False)
            apply_filter = True
        elif values is not None:
            keep &= pattern_match(meta[col], values)
        apply_filter |= values is not None
    meta = meta[keep]

    # set the data index to META_IDX and apply filtered meta index
    data = data.copy()
    idx = list(data.index.names) if not data.index.names == [None] else None
    data = data.reset_index().set_index(META_IDX)
    meta = meta.loc[meta.index.intersection(data.index)]
    meta.index.names = META_IDX
    if apply_filter:
        data = data.loc[meta.index]
    data.index.names = META_IDX

    # join meta (optional), reset index to format as input arg
    data = data.join(meta) if join_meta else data
    data = data.reset_index().set_index(idx or 'index')
    if idx is None:
        data.index.name = None

    return data

Exemple #3

0

Afficher le fichier

Fichier : test_utils.py Projet : stujen/pyam

def test_pattern_match_dollar():
    data = pd.Series(['foo$bar', 'foo'])
    values = ['foo$bar']

    obs = utils.pattern_match(data, values)
    exp = [True, False]

    assert (obs == exp).all()

Exemple #4

0

Afficher le fichier

Fichier : test_utils.py Projet : stujen/pyam

def test_pattern_match_brackets():
    data = pd.Series(['foo (bar)', 'foo bar'])
    values = ['foo (bar)']

    obs = utils.pattern_match(data, values)
    exp = [True, False]

    assert (obs == exp).all()

Exemple #5

0

Afficher le fichier

Fichier : test_utils.py Projet : stujen/pyam

def test_pattern_match_dot():
    data = pd.Series(['foo', 'fo.'])
    values = ['fo.']

    obs = utils.pattern_match(data, values)
    exp = [False, True]

    assert (obs == exp).all()

Exemple #6

0

Afficher le fichier

Fichier : test_utils.py Projet : stujen/pyam

def test_pattern_match_none():
    data = pd.Series(['foo', 'bar'])
    values = ['baz']

    obs = utils.pattern_match(data, values)
    exp = [False, False]

    assert (obs == exp).all()

Exemple #7

0

Afficher le fichier

Fichier : test_utils.py Projet : stujen/pyam

def test_pattern_match_plus():
    data = pd.Series(['foo', 'foo+', '+bar', 'b+az'])
    values = ['*+*']

    obs = utils.pattern_match(data, values)
    exp = [False, True, True, True]

    assert (obs == exp).all()

Exemple #8

0

Afficher le fichier

Fichier : test_utils.py Projet : stujen/pyam

def test_pattern_match_ast_regex():
    data = pd.Series(['foo', 'foo2', 'bar'])
    values = ['foo*']

    obs = utils.pattern_match(data, values)
    exp = [True, True, False]

    assert (obs == exp).all()

Exemple #9

0

Afficher le fichier

def _apply_filters(data, meta, filters):
    """Applies filters to the data and meta tables of an IamDataFrame.

    Parametersp
    ----------
    data: pd.DataFrame
        data table of an IamDataFrame
    meta: pd.DataFrame
        meta table of an IamDataFrame
    filters: dict
        dictionary of filters ({col: values}}); uses a pseudo-regexp syntax by
        default, but accepts `regexp: True` to use direct regexp
    """
    regexp = filters.pop('regexp', False)
    keep = np.array([True] * len(data))

    # filter by columns and list of values
    for col, values in filters.items():
        if col in meta.columns:
            matches = pattern_match(meta[col], values, regexp=regexp)
            cat_idx = meta[matches].index
            keep_col = data[META_IDX].set_index(META_IDX).index.isin(cat_idx)

        elif col in ['model', 'scenario', 'region', 'unit']:
            keep_col = pattern_match(data[col], values, regexp=regexp)

        elif col == 'variable':
            level = filters['level'] if 'level' in filters else None
            keep_col = pattern_match(data[col], values, level, regexp)

        elif col == 'year':
            keep_col = years_match(data[col], values)

        elif col == 'level':
            if 'variable' not in filters.keys():
                keep_col = pattern_match(data['variable'],
                                         '*',
                                         values,
                                         regexp=regexp)
            else:
                continue
        else:
            raise ValueError('filter by column ' + col + ' not supported')
        keep &= keep_col

    return keep

Exemple #10

0

Afficher le fichier

Fichier : test_utils.py Projet : stujen/pyam

def test_pattern_regexp():
    data = pd.Series(['foo', 'foa', 'foo$'])
    values = ['fo.$']

    obs = utils.pattern_match(data, values, regexp=True)
    exp = [True, True, False]

    assert (obs == exp).all()

Exemple #11

0

Afficher le fichier

 def _match(data, patterns):
     # this is empty, return empty list which means "everything"
     if not patterns:
         return []
     # otherwise match everything
     matches = np.array([False] * len(data))
     for p in patterns:
         matches |= pattern_match(data, p)
     return data[matches].unique()

Exemple #12

0

Afficher le fichier

    def check_aggregate(self, variable, components=None, units=None,
                        exclude_on_fail=False, multiplier=1, **kwargs):
        """Check whether the timeseries data match the aggregation
        of components or sub-categories

        Parameters
        ----------
        variable: str
            variable to be checked for matching aggregation of sub-categories
        components: list of str, default None
            list of variables, defaults to all sub-categories of `variable`
        units: str or list of str, default None
            filter variable and components for given unit(s)
        exclude_on_fail: boolean, default False
            flag scenarios failing validation as `exclude: True`
        multiplier: number, default 1
            factor when comparing variable and sum of components
        kwargs: passed to `np.isclose()`
        """
        # default components to all variables one level below `variable`
        if components is None:
            var_list = pd.Series(self.data.variable.unique())
            components = var_list[pattern_match(var_list,
                                                '{}|*'.format(variable), 0)]

        if not len(components):
            msg = 'cannot check aggregate for {} because it has no components'
            logger().info(msg.format(variable))

            return

        # filter and groupby data, use `pd.Series.align` for matching index
        df_variable, df_components = (
            _aggregate_by_variables(self.data, variable, units)
            .align(_aggregate_by_variables(self.data, components, units))
        )

        # use `np.isclose` for checking match
        diff = df_variable[~np.isclose(df_variable, multiplier * df_components,
                                       **kwargs)]

        if len(diff):
            msg = '{} - {} of {} data points are not aggregates of components'
            logger().info(msg.format(variable, len(diff), len(df_variable)))

            if exclude_on_fail:
                self._exclude_on_fail(diff.index.droplevel([2, 3]))

            diff = pd.concat([diff], keys=[variable], names=['variable'])

            return diff.unstack().rename_axis(None, axis=1)

Exemple #13

0

Afficher le fichier

Fichier : core.py Projet : vruijven/pyam

def filter_by_meta(data, df, join_meta=False, **kwargs):
    """Filter by and join meta columns from an IamDataFrame to a pd.DataFrame

    Parameters
    ----------
    data: pd.DataFrame instance
        DataFrame to which meta columns are to be joined,
        index or columns must include `['model', 'scenario']`
    df: IamDataFrame instance
        IamDataFrame from which meta columns are filtered and joined (optional)
    join_meta: bool, default False
        join selected columns from `df.meta` on `data`
    kwargs:
        meta columns to be joined, where `col=...` applies filters
        by the given arguments (using `utils.pattern_match()`) and `col=None`
        joins the column without filtering
    """
    if not set(META_IDX).issubset(data.index.names + list(data.columns)):
        raise ValueError('missing required index dimensions or columns!')

    meta = df.meta[list(kwargs)].copy()

    # filter meta by columns
    keep = np.array([True] * len(meta))
    for col, values in kwargs.items():
        if values is not None:
            keep_col = pattern_match(meta[col], values)
            keep &= keep_col
    meta = meta[keep]

    # set the data index to META_IDX and apply filtered meta index
    data = data.copy()
    idx = list(data.index.names) if not data.index.names == [None] else None
    data = data.reset_index().set_index(META_IDX).loc[meta.index]

    # join meta (optional), reset index to format as input arg
    data = data.join(meta) if join_meta else data
    data = data.reset_index().set_index(idx or 'index')
    if idx is None:
        data.index.name = None

    return data

Exemple #14

0

Afficher le fichier

def test_pattern_match_ast2_regex():
    data = pd.Series(['foo|bar', 'foo', 'bar'])
    values = ['*o*b*']

    obs = utils.pattern_match(data, values)
    assert (obs == [True, False, False]).all()

Exemple #15

0

Afficher le fichier

def test_pattern_match_nan():
    data = pd.Series(['foo', np.nan])
    values = ['baz']

    obs = utils.pattern_match(data, values, has_nan=True)
    assert (obs == [False, False]).all()

Exemple #16

0

Afficher le fichier

def test_pattern_match_one():
    data = pd.Series(['foo', 'bar'])
    values = ['foo']

    obs = utils.pattern_match(data, values)
    assert (obs == [True, False]).all()

Exemple #17

0

Afficher le fichier