コード例 #1
0
    def test_metricx2_reversed(self):
        result = Pandas(mock_dataset.fields.wins, mock_dataset.fields.votes).transform(dimx0_metricx2_df, [], [])

        expected = dimx0_metricx2_df.copy()[[f('wins'), f('votes')]]
        expected.columns = ['Wins', 'Votes']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(format_float)

        pandas.testing.assert_frame_equal(expected, result)
コード例 #2
0
    def test_dimx1_date(self):
        result = Pandas(mock_dataset.fields.wins).transform(dimx1_date_df, [mock_dataset.fields.timestamp], [])

        expected = dimx1_date_df.copy()[[f('wins')]]
        expected.index.names = ['Timestamp']
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(format_float)

        pandas.testing.assert_frame_equal(expected, result)
コード例 #3
0
    def test_metricx1(self):
        result = Pandas(mock_dataset.fields.votes) \
            .transform(dimx0_metricx1_df, mock_dataset, [], [])

        expected = dimx0_metricx1_df.copy()[[f('votes')]]
        expected.columns = ['Votes']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(_format_float)

        pandas.testing.assert_frame_equal(expected, result)
コード例 #4
0
    def test_dimx1_str(self):
        result = Pandas(mock_dataset.fields.wins).transform(dimx1_str_df, [mock_dataset.fields.political_party], [])

        expected = dimx1_str_df.copy()[[f('wins')]]
        expected.index = pd.Index(['Democrat', 'Independent', 'Republican'], name='Party')
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(format_float)

        pandas.testing.assert_frame_equal(expected, result)
コード例 #5
0
    def test_dimx1_date_with_operation(self):
        result = Pandas(CumSum(mock_dataset.fields.votes)) \
            .transform(dimx1_date_operation_df, mock_dataset, [mock_dataset.fields.timestamp], [])

        expected = dimx1_date_operation_df.copy()[[f('cumsum(votes)')]]
        expected.index.names = ['Timestamp']
        expected.columns = ['CumSum(Votes)']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(_format_float)

        pandas.testing.assert_frame_equal(expected, result)
コード例 #6
0
    def test_sort_with_no_index(self):
        result = Pandas(mock_dataset.fields.wins, sort=[0]) \
            .transform(no_index_df, mock_dataset, [mock_dataset.fields.timestamp], [])

        expected = no_index_df.copy()[[f('wins')]]
        expected.index.names = ['Timestamp']
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(_format_float)

        pandas.testing.assert_frame_equal(expected, result)
コード例 #7
0
    def test_sort_value_greater_than_number_of_columns_is_ignored(self):
        result = Pandas(mock_dataset.fields.wins, sort=[5]) \
            .transform(dimx1_date_df, mock_dataset, [mock_dataset.fields.timestamp], [])

        expected = dimx1_date_df.copy()[[f('wins')]]
        expected.index.names = ['Timestamp']
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(_format_float)

        pandas.testing.assert_frame_equal(expected, result)
コード例 #8
0
    def test_dimx2_date_str(self):
        dimensions = [mock_dataset.fields.timestamp, mock_dataset.fields.political_party]
        result = Pandas(mock_dataset.fields.wins).transform(dimx2_date_str_df, dimensions, [])

        expected = dimx2_date_str_df.copy()[[f('wins')]]
        expected.index.names = ['Timestamp', 'Party']
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(format_float)

        pandas.testing.assert_frame_equal(expected, result)
コード例 #9
0
    def test_dimx1_int(self):
        result = Pandas(mock_dataset.fields.wins) \
            .transform(dimx1_str_df, mock_dataset, [mock_dataset.fields['candidate-id']], [])

        expected = dimx1_str_df.copy()[[f('wins')]]
        expected.index.names = ['Candidate ID']
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(_format_float)

        pandas.testing.assert_frame_equal(expected, result)
コード例 #10
0
    def test_time_series_ref(self):
        dimensions = [mock_dataset.fields.timestamp, mock_dataset.fields.political_party]
        references = [ElectionOverElection(mock_dataset.fields.timestamp)]
        result = Pandas(mock_dataset.fields.votes).transform(dimx2_date_str_ref_df, dimensions, references)

        expected = dimx2_date_str_ref_df.copy()[[f('votes'), f('votes_eoe')]]
        expected.index.names = ['Timestamp', 'Party']
        expected.columns = ['Votes', 'Votes EoE']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(format_float)

        pandas.testing.assert_frame_equal(expected, result)
コード例 #11
0
    def test_pivoted_dimx2_date_str(self):
        result = Pandas(mock_dataset.fields.wins, pivot=[mock_dataset.fields.political_party]) \
            .transform(dimx2_date_str_df, mock_dataset,
                       [mock_dataset.fields.timestamp, mock_dataset.fields.political_party], [])

        expected = dimx2_date_str_df.copy()[[f('wins')]]
        expected = expected.unstack(level=[1])
        expected.index.names = ['Timestamp']
        expected.columns = ['Democrat', 'Independent', 'Republican']
        expected.columns.names = ['Party']
        expected = expected.applymap(_format_float)

        pandas.testing.assert_frame_equal(expected, result)
コード例 #12
0
    def test_metricx2_sort_value_desc(self):
        result = Pandas(mock_dataset.fields.wins, sort=[1], ascending=[False]) \
            .transform(dimx1_date_df, mock_dataset, [mock_dataset.fields.timestamp], [])

        expected = dimx1_date_df.copy()[[f('wins')]]
        expected.index.names = ['Timestamp']
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'

        expected = expected.sort_values(['Wins'], ascending=False)
        expected = expected.applymap(_format_float)

        pandas.testing.assert_frame_equal(expected, result)
コード例 #13
0
    def test_fetch_only_dimx2_date_str(self):
        dimensions = [mock_dataset.fields.timestamp, mock_dataset.fields.political_party]
        dimensions[1].fetch_only = True
        result = Pandas(mock_dataset.fields.wins).transform(dimx2_date_str_df, dimensions, [])
        dimensions[1].fetch_only = False

        expected = dimx2_date_str_df.copy()[[f('wins')]]
        expected.reset_index('$political_party', inplace=True, drop=True)
        expected.index.names = ['Timestamp']
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(format_float)

        pandas.testing.assert_frame_equal(expected, result)
コード例 #14
0
    def test_pivoted_dimx2_date_str_sort_index_level_1_desc(self):
        result = Pandas(mock_dataset.fields.wins, sort=[1], ascending=[False]).transform(
            dimx2_date_str_df, [mock_dataset.fields.timestamp, mock_dataset.fields.political_party], []
        )

        expected = dimx2_date_str_df.copy()[[f('wins')]]
        expected.index.names = ['Timestamp', 'Party']
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'

        expected = expected.reset_index().sort_values(['Party'], ascending=[False]).set_index(['Timestamp', 'Party'])
        expected = expected.applymap(format_float)

        pandas.testing.assert_frame_equal(expected, result)
コード例 #15
0
    def test_neginf_in_metrics(self):
        cat_dim_df_with_nan = dimx1_str_df.copy()
        cat_dim_df_with_nan['$wins'] = cat_dim_df_with_nan['$wins'].apply(float)
        cat_dim_df_with_nan.iloc[2, 1] = np.inf

        result = Pandas(mock_dataset.fields.wins) \
            .transform(cat_dim_df_with_nan, mock_dataset, [mock_dataset.fields.political_party], [])

        expected = cat_dim_df_with_nan.copy()[[f('wins')]]
        expected.index = pd.Index(['Democrat', 'Independent', 'Republican'], name='Party')
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(_format_float)

        pandas.testing.assert_frame_equal(expected, result)
コード例 #16
0
    def test_use_pandas_default_for_ascending_when_arg_empty_list(self):
        result = Pandas(
            mock_dataset.fields.votes, pivot=[mock_dataset.fields.political_party], sort=[0, 2], ascending=[]
        ).transform(dimx2_date_str_df, [mock_dataset.fields.timestamp, mock_dataset.fields.political_party], [])

        expected = dimx2_date_str_df.copy()[[f('votes')]]
        expected = expected.unstack(level=[1])
        expected.index.names = ['Timestamp']
        expected.columns = ['Democrat', 'Independent', 'Republican']
        expected.columns.names = ['Party']

        expected = expected.reset_index().sort_values(['Timestamp', 'Democrat'], ascending=None).set_index('Timestamp')
        expected = expected.applymap(format_float)

        pandas.testing.assert_frame_equal(expected, result)
コード例 #17
0
    def test_pivoted_dimx2_date_str_with_sort_second_metric_desc(self):
        result = Pandas(
            mock_dataset.fields.votes, pivot=[mock_dataset.fields.political_party], sort=1, ascending=False
        ).transform(dimx2_date_str_df, [mock_dataset.fields.timestamp, mock_dataset.fields.political_party], [])

        expected = dimx2_date_str_df.copy()[[f('votes')]]
        expected = expected.unstack(level=[1])
        expected.index.names = ['Timestamp']
        expected.columns = ['Democrat', 'Independent', 'Republican']
        expected.columns.names = ['Party']

        expected = expected.reset_index().sort_values(['Democrat'], ascending=False).set_index('Timestamp')
        expected = expected.applymap(format_float)

        pandas.testing.assert_frame_equal(expected, result)
コード例 #18
0
    def test_metricx2_sort_index_and_value(self):
        result = Pandas(mock_dataset.fields.wins, sort=[-0, 1]).transform(
            dimx1_date_df, [mock_dataset.fields.timestamp], []
        )

        expected = dimx1_date_df.copy()[[f('wins')]]
        expected.index.names = ['Timestamp']
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'

        expected = (
            expected.reset_index().sort_values(['Timestamp', 'Wins'], ascending=[True, False]).set_index('Timestamp')
        )
        expected = expected.applymap(format_float)

        pandas.testing.assert_frame_equal(expected, result)
コード例 #19
0
    def test_inf_in_metrics_with_precision_zero(self):
        cat_dim_df_with_nan = dimx1_str_df.copy()
        cat_dim_df_with_nan['$wins'] = cat_dim_df_with_nan['$wins'].apply(float)
        cat_dim_df_with_nan.iloc[2, 1] = np.inf

        slicer_modified = copy.deepcopy(mock_dataset)
        slicer_modified.fields.wins.precision = 0

        result = Pandas(slicer_modified.fields.wins) \
            .transform(cat_dim_df_with_nan, slicer_modified, [slicer_modified.fields.political_party], [])

        expected = cat_dim_df_with_nan.copy()[[f('wins')]]
        expected.index = pd.Index(['Democrat', 'Independent', 'Republican'], name='Party')
        expected['$wins'] = ['6', '0', 'Inf']
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'

        pandas.testing.assert_frame_equal(expected, result)
コード例 #20
0
    def test_metric_format(self):
        import copy

        votes = copy.copy(mock_dataset.fields.votes)
        votes.prefix = '$'
        votes.suffix = '€'
        votes.precision = 2

        # divide the data frame by 3 to get a repeating decimal so we can check precision
        result = Pandas(votes).transform(dimx1_date_df / 3, [mock_dataset.fields.timestamp], [])

        f_votes = f('votes')
        expected = dimx1_date_df.copy()[[f_votes]]
        expected[f_votes] = ['${0:,.2f}€'.format(x) for x in expected[f_votes] / 3]
        expected.index.names = ['Timestamp']
        expected.columns = ['Votes']
        expected.columns.name = 'Metrics'

        pandas.testing.assert_frame_equal(expected, result)
コード例 #21
0
    def test_pivoted_dimx1_metricx2(self):
        result = Pandas(
            mock_dataset.fields.votes, mock_dataset.fields.wins, pivot=[mock_dataset.fields.timestamp]
        ).transform(dimx2_date_str_df, [mock_dataset.fields.timestamp, mock_dataset.fields.political_party], [])

        expected = dimx2_date_str_df.copy()[[f('votes'), f('wins')]]
        expected = expected.unstack(level=0)
        expected.index.names = ['Party']
        expected.columns = pd.MultiIndex.from_product(
            [
                ['Votes', 'Wins'],
                pd.DatetimeIndex(['1996-01-01', '2000-01-01', '2004-01-01', '2008-01-01', '2012-01-01', '2016-01-01']),
            ],
            names=['Metrics', 'Timestamp'],
        )

        expected = expected.applymap(format_float)

        pandas.testing.assert_frame_equal(expected, result)
コード例 #22
0
    def test_pivoted_df_transformation_formats_totals_correctly(self):
        test_table = Table('test')

        ds = DataSet(
            table=test_table,
            database=test_database,
            fields=[
                Field('date', label='Date', definition=test_table.date, data_type=DataType.date),
                Field('locale', label='Locale', definition=test_table.locale, data_type=DataType.text),
                Field('company', label='Company', definition=test_table.text, data_type=DataType.text),
                Field('metric1', label='Metric1', definition=Sum(test_table.number), data_type=DataType.number),
                Field('metric2', label='Metric2', definition=Sum(test_table.number), data_type=DataType.number),
            ],
        )

        df = pd.DataFrame.from_dict(
            {
                '$metric1': {('~~totals', '~~totals'): 3, ('za', '~~totals'): 3, ('za', 'C1'): 2, ('za', 'C2'): 1},
                '$metric2': {('~~totals', '~~totals'): 4, ('za', '~~totals'): 4, ('za', 'C1'): 2, ('za', 'C2'): 2},
            }
        )
        df.index.names = [f(ds.fields.locale.alias), f(ds.fields.company.alias)]

        result = Pandas(ds.fields.metric1, ds.fields.metric2, pivot=[ds.fields.company]).transform(
            df, [Rollup(ds.fields.locale), Rollup(ds.fields.company)], [], use_raw_values=True
        )

        self.assertEqual(['Metrics', 'Company'], list(result.columns.names))
        self.assertEqual(
            [
                ('Metric1', 'C1'),
                ('Metric1', 'C2'),
                ('Metric1', 'Totals'),
                ('Metric2', 'C1'),
                ('Metric2', 'C2'),
                ('Metric2', 'Totals'),
            ],
            result.columns.values.tolist(),
        )
        self.assertEqual(['Locale'], list(result.index.names))
        self.assertEqual(['za', 'Totals'], result.index.values.tolist())
        self.assertEqual([['2', '1', '3', '2', '2', '4'], ['', '', '3', '', '', '4']], result.values.tolist())