Beispiel #1
0
def test_pd_join_by_meta_nonmatching_index(meta_df):
    data = df_filter_by_meta_nonmatching_idx
    meta_df.set_meta(['a', 'b'], 'string')

    obs = filter_by_meta(data, meta_df, join_meta=True, string=None)
    obs = obs.reindex(columns=['scenario', 2010, 2020, 'string'])

    exp = data.copy()
    exp['string'] = [np.nan, np.nan, 'b']

    pd.testing.assert_frame_equal(obs.sort_index(level=1), exp)
Beispiel #2
0
def test_pd_join_by_meta_nonmatching_index(test_df):
    data = df_filter_by_meta_nonmatching_idx
    test_df.set_meta(["a", "b"], "string")

    obs = filter_by_meta(data, test_df, join_meta=True, string=None)
    obs = obs.reindex(columns=["scenario", 2010, 2020, "string"])

    exp = data.copy()
    exp["string"] = [np.nan, np.nan, "b"]

    pd.testing.assert_frame_equal(obs.sort_index(level=1), exp)
Beispiel #3
0
def test_pd_filter_by_meta_nonmatching_index(meta_df):
    data = df_filter_by_meta_nonmatching_idx
    meta_df.set_meta(['a', 'b'], 'string')

    obs = filter_by_meta(data, meta_df, join_meta=True, string='b')
    obs = obs.reindex(columns=['scenario', 2010, 2020, 'string'])

    exp = data.iloc[2:3].copy()
    exp['string'] = 'b'

    pd.testing.assert_frame_equal(obs, exp)
Beispiel #4
0
def test_pd_filter_by_meta_nonmatching_index(test_df):
    data = df_filter_by_meta_nonmatching_idx
    test_df.set_meta(["a", "b"], "string")

    obs = filter_by_meta(data, test_df, join_meta=True, string="b")
    obs = obs.reindex(columns=["scenario", 2010, 2020, "string"])

    exp = data.iloc[2:3].copy()
    exp["string"] = "b"

    pd.testing.assert_frame_equal(obs, exp)
Beispiel #5
0
def test_pd_filter_by_meta_no_index(meta_df):
    data = df_filter_by_meta_matching_idx

    meta_df.set_meta([True, False], 'boolean')
    meta_df.set_meta(0, 'int')

    obs = filter_by_meta(data, meta_df, join_meta=True, boolean=True, int=None)
    obs = obs.reindex(columns=META_IDX + ['region', 'col', 'boolean', 'int'])

    exp = data.iloc[0:2].copy()
    exp['boolean'] = True
    exp['int'] = 0

    pd.testing.assert_frame_equal(obs, exp)
Beispiel #6
0
def test_pd_filter_by_meta_no_index(test_df):
    data = df_filter_by_meta_matching_idx

    test_df.set_meta([True, False], "boolean")
    test_df.set_meta(0, "int")

    obs = filter_by_meta(data, test_df, join_meta=True, boolean=True, int=None)
    obs = obs.reindex(columns=META_IDX + ["region", "col", "boolean", "int"])

    exp = data.iloc[0:2].copy()
    exp["boolean"] = True
    exp["int"] = 0

    pd.testing.assert_frame_equal(obs, exp)
Beispiel #7
0
def test_pd_filter_by_meta(test_df):
    data = df_filter_by_meta_matching_idx.set_index(["model", "region"])

    test_df.set_meta([True, False], "boolean")
    test_df.set_meta(0, "integer")

    obs = filter_by_meta(data, test_df, join_meta=True, boolean=True, integer=None)
    obs = obs.reindex(columns=["scenario", "col", "boolean", "integer"])

    exp = data.iloc[0:2].copy()
    exp["boolean"] = True
    exp["integer"] = 0

    pd.testing.assert_frame_equal(obs, exp)
Beispiel #8
0
def test_pd_filter_by_meta(test_df):
    data = df_filter_by_meta_matching_idx.set_index(['model', 'region'])

    test_df.set_meta([True, False], 'boolean')
    test_df.set_meta(0, 'integer')

    obs = filter_by_meta(data, test_df, join_meta=True,
                         boolean=True, integer=None)
    obs = obs.reindex(columns=['scenario', 'col', 'boolean', 'integer'])

    exp = data.iloc[0:2].copy()
    exp['boolean'] = True
    exp['integer'] = 0

    pd.testing.assert_frame_equal(obs, exp)
Beispiel #9
0
def test_pd_filter_by_meta_no_index(meta_df):
    data = pd.DataFrame([
        ['a_model', 'a_scenario', 'a_region1', 1],
        ['a_model', 'a_scenario', 'a_region2', 2],
        ['a_model', 'a_scenario2', 'a_region3', 3],
    ],
                        columns=['model', 'scenario', 'region', 'col'])

    meta_df.set_meta([True, False], 'boolean')
    meta_df.set_meta(0, 'int')

    obs = filter_by_meta(data, meta_df, join_meta=True, boolean=True, int=None)
    obs = obs.reindex(columns=META_IDX + ['region', 'col', 'boolean', 'int'])

    exp = data.iloc[0:2].copy()
    exp['boolean'] = True
    exp['int'] = 0

    pd.testing.assert_frame_equal(obs, exp)
Beispiel #10
0
    def add(self, data, header, row=None, subheader=None):
        """Filter `data` by arguments of this SummaryStats instance,
        then apply `pd.describe()` and format the statistics

        Parameters
        ----------
        data : pd.DataFrame or pd.Series
            data for which summary statistics should be computed
        header : str
            column name for descriptive statistics
        row : str
            row name for descriptive statistics
            (required if `pyam.Statistics(rows=True)`)
        subheader : str, optional
            column name (level=1) if data is a unnamed `pd.Series`
        """
        # verify validity of specifications
        if self.rows is not None and row is None:
            raise ValueError('row specification required')
        if self.rows is None and row is not None:
            raise ValueError('row arg illegal for this `Statistics` instance')
        if isinstance(data, pd.Series):
            if subheader is not None:
                data.name = subheader
            elif data.name is None:
                msg = '`data` must be named `pd.Series` or provide `subheader`'
                raise ValueError(msg)
            data = pd.DataFrame(data)

        if self.rows is not None and row not in self.rows:
            self.rows.append(row)

        _stats = None

        # describe with groupby feature
        if self.groupby is not None:
            filter_args = dict(data=data, df=self.df, join_meta=True)
            filter_args.update(self.groupby)
            _stats = (filter_by_meta(**filter_args).groupby(
                self.col).describe(percentiles=self.percentiles))
            _stats = pd.concat([_stats], keys=[self.col], names=[''], axis=0)
            if self.rows:
                _stats['row'] = row
                _stats.set_index('row', append=True, inplace=True)
            _stats.index.names = [''] * 3 if self.rows else [''] * 2

        # describe with filter feature
        for (idx, _filter) in self.filters:
            filter_args = dict(data=data, df=self.df)
            filter_args.update(_filter)
            _stats_f = (filter_by_meta(**filter_args).describe(
                percentiles=self.percentiles))
            _stats_f = pd.DataFrame(_stats_f.unstack()).T
            if self.idx_depth == 1:
                levels = [[idx]]
            else:
                levels = [[idx[0]], [idx[1]]]
            lvls, lbls = (levels, [[0]] * self.idx_depth) if not self.rows \
                else (levels + [[row]], [[0]] * (self.idx_depth + 1))
            _stats_f.index = pd.MultiIndex(levels=lvls, codes=lbls)
            _stats = _stats_f if _stats is None else _stats.append(_stats_f)

        # add header
        _stats = pd.concat([_stats], keys=[header], names=[''], axis=1)
        subheader = _stats.columns.get_level_values(1).unique()
        self._add_to_header(header, subheader)

        # set statistics
        if self.stats is None:
            self.stats = _stats
        else:
            self.stats = _stats.combine_first(self.stats)