Beispiel #1
0
    def test_multiple_metrics_reversed(self):
        result = CSV(mock_dataset.fields.wins, mock_dataset.fields.votes) \
            .transform(dimx0_metricx2_df, [], [])

        expected = dimx0_metricx2_df.copy()[[f('wins'), f('votes')]]
        expected.columns = ['Wins', 'Votes']
        expected = expected.applymap(format_float_raw)

        self.assertEqual(expected.to_csv(**csv_options), result)
Beispiel #2
0
    def test_metricx2_reversed(self):
        result = Pandas(mock_dataset.fields.wins, mock_dataset.fields.votes).transform(dimx0_metricx2_df, [], [])

        expected = dimx0_metricx2_df.copy()[[f('wins'), f('votes')]]
        expected.columns = ['Wins', 'Votes']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(format_float)

        pandas.testing.assert_frame_equal(expected, result)
Beispiel #3
0
    def test_time_series_multi_ref(self):
        query_dimensions = [mock_dataset.fields.timestamp, mock_dataset.fields.political_party]
        query_references = [ElectionOverElection(mock_dataset.fields.timestamp)]
        result = CSV(mock_dataset.fields.votes, mock_dataset.fields.wins) \
            .transform(dimx2_date_str_ref_df, mock_dataset, query_dimensions, query_references)

        expected = dimx2_date_str_ref_df.copy()[[f('votes'), f('votes_eoe'), f('wins'), f('wins_eoe')]]
        expected.index.names = ['Timestamp', 'Party']
        expected.columns = ['Votes', 'Votes EoE', 'Wins', 'Wins EoE']
        expected = expected.applymap(_format_float)

        self.assertEqual(expected.to_csv(**csv_options), result)
Beispiel #4
0
    def test_time_series_ref(self):
        dimensions = [mock_dataset.fields.timestamp, mock_dataset.fields.political_party]
        references = [ElectionOverElection(mock_dataset.fields.timestamp)]
        result = Pandas(mock_dataset.fields.votes).transform(dimx2_date_str_ref_df, dimensions, references)

        expected = dimx2_date_str_ref_df.copy()[[f('votes'), f('votes_eoe')]]
        expected.index.names = ['Timestamp', 'Party']
        expected.columns = ['Votes', 'Votes EoE']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(format_float)

        pandas.testing.assert_frame_equal(expected, result)
Beispiel #5
0
    def test_int_dim(self):
        result = CSV(mock_dataset.fields.wins) \
            .transform(dimx1_num_df, [mock_dataset.fields['candidate-id']], [])

        expected = dimx1_num_df.copy()[[f('wins')]]
        expected.index = pd.Index(list(range(1, 12)), name='Candidate ID')
        expected.columns = ['Wins']

        self.assertEqual(expected.to_csv(**csv_options), result)
Beispiel #6
0
    def test_str_dim(self):
        result = CSV(mock_dataset.fields.wins) \
            .transform(dimx1_str_df, mock_dataset, [mock_dataset.fields.political_party], [])

        expected = dimx1_str_df.copy()[[f('wins')]]
        expected.index = pd.Index(['Democrat', 'Independent', 'Republican'], name='Party')
        expected.columns = ['Wins']

        self.assertEqual(expected.to_csv(**csv_options), result)
Beispiel #7
0
    def test_single_metric(self):
        result = CSV(mock_dataset.fields.votes) \
            .transform(dimx0_metricx1_df, [], [])

        expected = dimx0_metricx1_df.copy()[[f('votes')]]
        expected.columns = ['Votes']
        expected = expected.applymap(format_float_raw)

        self.assertEqual(expected.to_csv(**csv_options), result)
Beispiel #8
0
    def test_dimx1_date(self):
        result = Pandas(mock_dataset.fields.wins).transform(dimx1_date_df, [mock_dataset.fields.timestamp], [])

        expected = dimx1_date_df.copy()[[f('wins')]]
        expected.index.names = ['Timestamp']
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(format_float)

        pandas.testing.assert_frame_equal(expected, result)
Beispiel #9
0
    def test_time_series_dim(self):
        result = CSV(mock_dataset.fields.wins) \
            .transform(dimx1_date_df, [mock_dataset.fields.timestamp], [])

        expected = dimx1_date_df.copy()[[f('wins')]]
        expected.index.names = ['Timestamp']
        expected.columns = ['Wins']
        expected = expected.applymap(format_float_raw)

        self.assertEqual(expected.to_csv(**csv_options), result)
Beispiel #10
0
    def test_pivoted_dimx1_metricx2(self):
        result = Pandas(
            mock_dataset.fields.votes, mock_dataset.fields.wins, pivot=[mock_dataset.fields.timestamp]
        ).transform(dimx2_date_str_df, [mock_dataset.fields.timestamp, mock_dataset.fields.political_party], [])

        expected = dimx2_date_str_df.copy()[[f('votes'), f('wins')]]
        expected = expected.unstack(level=0)
        expected.index.names = ['Party']
        expected.columns = pd.MultiIndex.from_product(
            [
                ['Votes', 'Wins'],
                pd.DatetimeIndex(['1996-01-01', '2000-01-01', '2004-01-01', '2008-01-01', '2012-01-01', '2016-01-01']),
            ],
            names=['Metrics', 'Timestamp'],
        )

        expected = expected.applymap(format_float)

        pandas.testing.assert_frame_equal(expected, result)
Beispiel #11
0
    def test_dimx1_str(self):
        result = Pandas(mock_dataset.fields.wins).transform(dimx1_str_df, [mock_dataset.fields.political_party], [])

        expected = dimx1_str_df.copy()[[f('wins')]]
        expected.index = pd.Index(['Democrat', 'Independent', 'Republican'], name='Party')
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(format_float)

        pandas.testing.assert_frame_equal(expected, result)
Beispiel #12
0
    def test_metricx1(self):
        result = Pandas(mock_dataset.fields.votes) \
            .transform(dimx0_metricx1_df, mock_dataset, [], [])

        expected = dimx0_metricx1_df.copy()[[f('votes')]]
        expected.columns = ['Votes']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(_format_float)

        pandas.testing.assert_frame_equal(expected, result)
Beispiel #13
0
    def test_multi_dimx2_date_str(self):
        query_dimensions = [mock_dataset.fields.timestamp, mock_dataset.fields.political_party]
        result = CSV(mock_dataset.fields.wins) \
            .transform(dimx2_date_str_df, mock_dataset, query_dimensions, [])

        expected = dimx2_date_str_df.copy()[[f('wins')]]
        expected.index.names = ['Timestamp', 'Party']
        expected.columns = ['Wins']

        self.assertEqual(expected.to_csv(**csv_options), result)
Beispiel #14
0
    def test_pivoted_df_transformation_formats_totals_correctly(self):
        test_table = Table('test')

        ds = DataSet(
            table=test_table,
            database=test_database,
            fields=[
                Field('date', label='Date', definition=test_table.date, data_type=DataType.date),
                Field('locale', label='Locale', definition=test_table.locale, data_type=DataType.text),
                Field('company', label='Company', definition=test_table.text, data_type=DataType.text),
                Field('metric1', label='Metric1', definition=Sum(test_table.number), data_type=DataType.number),
                Field('metric2', label='Metric2', definition=Sum(test_table.number), data_type=DataType.number),
            ],
        )

        df = pd.DataFrame.from_dict(
            {
                '$metric1': {('~~totals', '~~totals'): 3, ('za', '~~totals'): 3, ('za', 'C1'): 2, ('za', 'C2'): 1},
                '$metric2': {('~~totals', '~~totals'): 4, ('za', '~~totals'): 4, ('za', 'C1'): 2, ('za', 'C2'): 2},
            }
        )
        df.index.names = [f(ds.fields.locale.alias), f(ds.fields.company.alias)]

        result = Pandas(ds.fields.metric1, ds.fields.metric2, pivot=[ds.fields.company]).transform(
            df, [Rollup(ds.fields.locale), Rollup(ds.fields.company)], [], use_raw_values=True
        )

        self.assertEqual(['Metrics', 'Company'], list(result.columns.names))
        self.assertEqual(
            [
                ('Metric1', 'C1'),
                ('Metric1', 'C2'),
                ('Metric1', 'Totals'),
                ('Metric2', 'C1'),
                ('Metric2', 'C2'),
                ('Metric2', 'Totals'),
            ],
            result.columns.values.tolist(),
        )
        self.assertEqual(['Locale'], list(result.index.names))
        self.assertEqual(['za', 'Totals'], result.index.values.tolist())
        self.assertEqual([['2', '1', '3', '2', '2', '4'], ['', '', '3', '', '', '4']], result.values.tolist())
Beispiel #15
0
    def test_pivoted_single_dimension_transposes_data_frame(self):
        result = CSV(mock_dataset.fields.wins, pivot=[mock_dataset.fields.political_party]) \
            .transform(dimx1_str_df, mock_dataset, [mock_dataset.fields.political_party], [])

        expected = dimx1_str_df.copy()[[f('wins')]]
        expected.index = pd.Index(['Democrat', 'Independent', 'Republican'], name='Party')
        expected.columns = ['Wins']
        expected.columns.names = ['Metrics']
        expected = expected.transpose()

        self.assertEqual(expected.to_csv(**csv_options), result)
Beispiel #16
0
    def test_dimx2_date_str(self):
        dimensions = [mock_dataset.fields.timestamp, mock_dataset.fields.political_party]
        result = Pandas(mock_dataset.fields.wins).transform(dimx2_date_str_df, dimensions, [])

        expected = dimx2_date_str_df.copy()[[f('wins')]]
        expected.index.names = ['Timestamp', 'Party']
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(format_float)

        pandas.testing.assert_frame_equal(expected, result)
Beispiel #17
0
    def test_dimx1_date_with_operation(self):
        result = Pandas(CumSum(mock_dataset.fields.votes)) \
            .transform(dimx1_date_operation_df, mock_dataset, [mock_dataset.fields.timestamp], [])

        expected = dimx1_date_operation_df.copy()[[f('cumsum(votes)')]]
        expected.index.names = ['Timestamp']
        expected.columns = ['CumSum(Votes)']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(_format_float)

        pandas.testing.assert_frame_equal(expected, result)
Beispiel #18
0
    def test_sort_with_no_index(self):
        result = Pandas(mock_dataset.fields.wins, sort=[0]) \
            .transform(no_index_df, mock_dataset, [mock_dataset.fields.timestamp], [])

        expected = no_index_df.copy()[[f('wins')]]
        expected.index.names = ['Timestamp']
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(_format_float)

        pandas.testing.assert_frame_equal(expected, result)
Beispiel #19
0
    def test_dimx1_int(self):
        result = Pandas(mock_dataset.fields.wins) \
            .transform(dimx1_str_df, mock_dataset, [mock_dataset.fields['candidate-id']], [])

        expected = dimx1_str_df.copy()[[f('wins')]]
        expected.index.names = ['Candidate ID']
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(_format_float)

        pandas.testing.assert_frame_equal(expected, result)
Beispiel #20
0
    def test_sort_value_greater_than_number_of_columns_is_ignored(self):
        result = Pandas(mock_dataset.fields.wins, sort=[5]) \
            .transform(dimx1_date_df, mock_dataset, [mock_dataset.fields.timestamp], [])

        expected = dimx1_date_df.copy()[[f('wins')]]
        expected.index.names = ['Timestamp']
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(_format_float)

        pandas.testing.assert_frame_equal(expected, result)
Beispiel #21
0
    def test_time_series_dim_with_operation(self):
        query_dimensions = [mock_dataset.fields.timestamp]
        result = CSV(CumSum(mock_dataset.fields.votes)) \
            .transform(dimx1_date_operation_df, query_dimensions, [])

        expected = dimx1_date_operation_df.copy()[[f('cumsum(votes)')]]
        expected.index.names = ['Timestamp']
        expected.columns = ['CumSum(Votes)']
        expected = expected.applymap(format_float_raw)

        self.assertEqual(expected.to_csv(**csv_options), result)
Beispiel #22
0
    def test_pivoted_multi_dimx2_date_num(self):
        query_dimensions = [mock_dataset.fields.timestamp, mock_dataset.fields['candidate-id']]
        result = CSV(mock_dataset.fields.votes, pivot=[mock_dataset.fields['candidate-id']]) \
            .transform(dimx2_date_num_df, mock_dataset, query_dimensions, [])

        expected = dimx2_date_num_df.copy()[[f('votes')]]
        expected = expected.unstack(level=1)
        expected.index.names = ['Timestamp']
        expected.columns = list(range(1, 12))
        expected = expected.applymap(_format_float)

        self.assertEqual(expected.to_csv(**csv_options), result)
Beispiel #23
0
    def test_pivoted_multi_dimx2_date_str(self):
        result = CSV(mock_dataset.fields.wins, pivot=[mock_dataset.fields.political_party]) \
            .transform(dimx2_date_str_df,
                       [mock_dataset.fields.timestamp, mock_dataset.fields.political_party], [])

        expected = dimx2_date_str_df.copy()[[f('wins')]]
        expected = expected.unstack(level=[1])
        expected.index.names = ['Timestamp']
        expected.columns = ['Democrat', 'Independent', 'Republican']
        expected = expected.applymap(format_float_raw)

        self.assertEqual(expected.to_csv(**csv_options), result)
Beispiel #24
0
    def test_metricx2_sort_value_desc(self):
        result = Pandas(mock_dataset.fields.wins, sort=[1], ascending=[False]) \
            .transform(dimx1_date_df, mock_dataset, [mock_dataset.fields.timestamp], [])

        expected = dimx1_date_df.copy()[[f('wins')]]
        expected.index.names = ['Timestamp']
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'

        expected = expected.sort_values(['Wins'], ascending=False)
        expected = expected.applymap(_format_float)

        pandas.testing.assert_frame_equal(expected, result)
Beispiel #25
0
    def test_pivoted_dimx2_date_str(self):
        result = Pandas(mock_dataset.fields.wins, pivot=[mock_dataset.fields.political_party]) \
            .transform(dimx2_date_str_df, mock_dataset,
                       [mock_dataset.fields.timestamp, mock_dataset.fields.political_party], [])

        expected = dimx2_date_str_df.copy()[[f('wins')]]
        expected = expected.unstack(level=[1])
        expected.index.names = ['Timestamp']
        expected.columns = ['Democrat', 'Independent', 'Republican']
        expected.columns.names = ['Party']
        expected = expected.applymap(_format_float)

        pandas.testing.assert_frame_equal(expected, result)
Beispiel #26
0
    def test_hidden_dimx2_date_str(self):
        dimensions = [
            mock_dataset.fields.timestamp, mock_dataset.fields.political_party
        ]
        result = CSV(mock_dataset.fields.wins, hide=[mock_dataset.fields.political_party]) \
            .transform(dimx2_date_str_df, dimensions, [])

        expected = dimx2_date_str_df.copy()[[f('wins')]]
        expected.reset_index('$political_party', inplace=True, drop=True)
        expected.index.names = ['Timestamp']
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'

        self.assertEqual(expected.to_csv(**csv_options), result)
Beispiel #27
0
    def test_fetch_only_dimx2_date_str(self):
        dimensions = [mock_dataset.fields.timestamp, mock_dataset.fields.political_party]
        dimensions[1].fetch_only = True
        result = Pandas(mock_dataset.fields.wins).transform(dimx2_date_str_df, dimensions, [])
        dimensions[1].fetch_only = False

        expected = dimx2_date_str_df.copy()[[f('wins')]]
        expected.reset_index('$political_party', inplace=True, drop=True)
        expected.index.names = ['Timestamp']
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(format_float)

        pandas.testing.assert_frame_equal(expected, result)
Beispiel #28
0
    def test_pivoted_dimx2_date_str_sort_index_level_1_desc(self):
        result = Pandas(mock_dataset.fields.wins, sort=[1], ascending=[False]).transform(
            dimx2_date_str_df, [mock_dataset.fields.timestamp, mock_dataset.fields.political_party], []
        )

        expected = dimx2_date_str_df.copy()[[f('wins')]]
        expected.index.names = ['Timestamp', 'Party']
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'

        expected = expected.reset_index().sort_values(['Party'], ascending=[False]).set_index(['Timestamp', 'Party'])
        expected = expected.applymap(format_float)

        pandas.testing.assert_frame_equal(expected, result)
Beispiel #29
0
    def test_neginf_in_metrics(self):
        cat_dim_df_with_nan = dimx1_str_df.copy()
        cat_dim_df_with_nan['$wins'] = cat_dim_df_with_nan['$wins'].apply(float)
        cat_dim_df_with_nan.iloc[2, 1] = np.inf

        result = Pandas(mock_dataset.fields.wins) \
            .transform(cat_dim_df_with_nan, mock_dataset, [mock_dataset.fields.political_party], [])

        expected = cat_dim_df_with_nan.copy()[[f('wins')]]
        expected.index = pd.Index(['Democrat', 'Independent', 'Republican'], name='Party')
        expected.columns = ['Wins']
        expected.columns.name = 'Metrics'
        expected = expected.applymap(_format_float)

        pandas.testing.assert_frame_equal(expected, result)
Beispiel #30
0
    def test_use_pandas_default_for_ascending_when_arg_empty_list(self):
        result = Pandas(
            mock_dataset.fields.votes, pivot=[mock_dataset.fields.political_party], sort=[0, 2], ascending=[]
        ).transform(dimx2_date_str_df, [mock_dataset.fields.timestamp, mock_dataset.fields.political_party], [])

        expected = dimx2_date_str_df.copy()[[f('votes')]]
        expected = expected.unstack(level=[1])
        expected.index.names = ['Timestamp']
        expected.columns = ['Democrat', 'Independent', 'Republican']
        expected.columns.names = ['Party']

        expected = expected.reset_index().sort_values(['Timestamp', 'Democrat'], ascending=None).set_index('Timestamp')
        expected = expected.applymap(format_float)

        pandas.testing.assert_frame_equal(expected, result)