def test_do_not_remove_totals_for_rollup_dimensions_with_multiindex_and_all_rolled_up(self):
        result = scrub_totals_from_share_results(dimx2_date_str_totalsx2_df,
                                                 [Rollup(mock_dataset.fields.timestamp),
                                                  Rollup(mock_dataset.fields.political_party)])

        expected = dimx2_date_str_totalsx2_df

        pandas.testing.assert_frame_equal(result, expected)
    def test_reduce_single_result_set_with_dimx2_date_str_str_totals_date(
            self):
        expected = dimx3_date_str_str_totalsx3_df.loc[(
            slice(None),
            slice("Democrat", "Republican"),
            slice("California", "Texas"),
        ), :, ].append(dimx3_date_str_str_totalsx3_df.iloc[-1])

        raw_df = replace_totals(dimx3_date_str_str_df)
        totals_df = pd.merge(
            pd.DataFrame(
                [[
                    RollupValue.CONSTANT, RollupValue.CONSTANT,
                    RollupValue.CONSTANT
                ]],
                columns=["$timestamp", "$political_party", "$state"],
            ),
            pd.DataFrame([raw_df[metrics].sum(axis=0)]),
            how="outer",
            left_index=True,
            right_index=True,
        )
        totals_df = totals_df[["$timestamp", "$political_party", "$state"] +
                              metrics]

        dimensions = (
            Rollup(mock_dataset.fields.timestamp),
            mock_dataset.fields.political_party,
            mock_dataset.fields.state,
        )
        result = reduce_result_set([raw_df, totals_df], (), dimensions, ())

        pandas.testing.assert_frame_equal(expected, result)
Exemple #3
0
    def test_reduce_single_result_set_with_date_str_str_dimensions_str1_totals(self):
        expected = (
            dimx3_date_str_str_totalsx3_df.loc[
                (slice(None), slice(None), slice("California", "Texas")), :
            ]
            .append(
                dimx3_date_str_str_totalsx3_df.loc[(slice(None), "~~totals"), :].iloc[
                    :-1
                ]
            )
            .sort_index()
        )

        raw_df = replace_totals(dimx3_date_str_str_df)
        totals_df = raw_df.groupby("$timestamp").sum().reset_index()
        totals_df["$political_party"] = None
        totals_df["$state"] = None
        totals_df = totals_df[["$timestamp", "$political_party", "$state"] + metrics]

        dimensions = (
            mock_dataset.fields.timestamp,
            Rollup(mock_dataset.fields.political_party),
            mock_dataset.fields.state,
        )
        result = reduce_result_set([raw_df, totals_df], (), dimensions, ())

        pandas.testing.assert_frame_equal(expected, result)
    def test_do_not_remove_totals_for_rollup_dimensions(self):
        result = scrub_totals_from_share_results(dimx1_str_totals_df,
                                                 [Rollup(mock_dataset.fields.political_party)])

        expected = dimx1_str_totals_df

        pandas.testing.assert_frame_equal(result, expected)
    def test_do_not_remove_totals_for_rollup_dimensions_with_multiindex_and_lower_dimension_totals(self):
        result = scrub_totals_from_share_results(dimx2_date_str_totalsx2_df,
                                                 [mock_dataset.fields.timestamp,
                                                  Rollup(mock_dataset.fields.political_party)])

        expected = dimx2_date_str_totalsx2_df.loc[:TIMESTAMP_UPPERBOUND]

        pandas.testing.assert_frame_equal(result, expected)
Exemple #6
0
    def test_reduce_single_result_set_with_date_str_str_dimensions_str1_totals_with_null_in_date_dim(
        self,
    ):
        index_names = list(dimx3_date_str_str_totalsx3_df.index.names)
        nulls = pd.DataFrame(
            [
                [np.nan, "d", "1", "Texas", 5, 0],
                [np.nan, "d", "2", "California", 2, 0],
                [np.nan, "i", "1", "Texas", 5, 0],
                [np.nan, "i", "2", "California", 7, 0],
                [np.nan, "r", "1", "Texas", 11, 0],
                [np.nan, "r", "2", "California", 3, 0],
            ],
            columns=index_names + list(dimx3_date_str_str_totalsx3_df.columns),
        )
        nulls_totals = pd.DataFrame([nulls[metrics].sum()])
        nulls_totals[index_names[0]] = np.nan
        nulls_totals[index_names[1]] = "~~totals"
        nulls_totals[index_names[2]] = "~~totals"

        expected = (
            dimx3_date_str_str_totalsx3_df.loc[
                (slice(None), slice(None), slice("1", "2")), :
            ]
            .append(
                dimx3_date_str_str_totalsx3_df.loc[(slice(None), "~~totals"), :].iloc[
                    :-1
                ]
            )
            .append(nulls.set_index(index_names))
            .append(nulls_totals.set_index(index_names))
            .sort_index()
        )
        raw_df = replace_totals(dimx3_date_str_str_df)
        raw_df = nulls.append(raw_df).sort_values(
            ["$timestamp", "$political_party", "$state"]
        )

        totals_df = raw_df.groupby("$timestamp").sum().reset_index()
        null_totals_df = pd.DataFrame(
            [raw_df[raw_df["$timestamp"].isnull()][metrics].sum()]
        )
        null_totals_df["$timestamp"] = None
        totals_df = totals_df.append(null_totals_df)
        totals_df["$political_party"] = None
        totals_df["$state"] = None
        totals_df = totals_df[["$timestamp", "$political_party", "$state"] + metrics]

        dimensions = (
            mock_dataset.fields.timestamp,
            Rollup(mock_dataset.fields.political_party),
            mock_dataset.fields.state,
        )
        result = reduce_result_set([raw_df, totals_df], (), dimensions, ())

        pandas.testing.assert_frame_equal(expected, result)
    def test_do_not_remove_totals_for_rollup_dimensions_with_multiindex_and_higher_dimension_totals(self):
        result = scrub_totals_from_share_results(dimx2_date_str_totalsx2_df,
                                                 [Rollup(mock_dataset.fields.timestamp),
                                                  mock_dataset.fields.political_party])

        expected = dimx2_date_str_totalsx2_df.loc[(slice(None),
                                                   slice('Democrat', 'Republican')), :] \
            .append(dimx2_date_str_totalsx2_df.iloc[-1])

        pandas.testing.assert_frame_equal(result, expected)
Exemple #8
0
    def test_reduce_single_result_set_with_date_str_dimensions_str_totals(self):
        expected = dimx2_date_str_totals_df
        raw_df = replace_totals(dimx2_date_str_df)
        totals_df = raw_df.groupby("$timestamp").sum().reset_index()
        totals_df["$political_party"] = None
        totals_df = totals_df[["$timestamp", "$political_party"] + metrics]

        dimensions = (
            mock_dataset.fields.timestamp,
            Rollup(mock_dataset.fields.political_party),
        )
        result = reduce_result_set([raw_df, totals_df], (), dimensions, ())

        pandas.testing.assert_frame_equal(expected, result)
    def test_reduce_single_result_set_with_str_dimension(self):
        expected = dimx1_str_totals_df
        raw_df = replace_totals(dimx1_str_df)
        totals_df = pd.merge(
            pd.DataFrame([RollupValue.CONSTANT], columns=["$political_party"]),
            pd.DataFrame([raw_df[metrics].sum(axis=0)]),
            how="outer",
            left_index=True,
            right_index=True,
        )

        dimensions = (Rollup(mock_dataset.fields.political_party), )
        result = reduce_result_set([raw_df, totals_df], (), dimensions, ())

        pandas.testing.assert_frame_equal(expected, result)
Exemple #10
0
    def test_reduce_single_result_set_with_date_str_str_dimensions_str2_totals(
            self):
        expected = dimx3_date_str_str_totalsx3_df.loc[(
            slice(None), slice("Democrat", "Republican")), :]
        raw_df = replace_totals(dimx3_date_str_str_df)
        totals_df = raw_df.groupby(["$timestamp",
                                    "$political_party"]).sum().reset_index()
        totals_df["$state"] = RollupValue.CONSTANT
        totals_df = totals_df[["$timestamp", "$political_party", "$state"] +
                              metrics]

        dimensions = (
            mock_dataset.fields.timestamp,
            mock_dataset.fields.political_party,
            Rollup(mock_dataset.fields.state),
        )
        result = reduce_result_set([raw_df, totals_df], (), dimensions, ())

        pandas.testing.assert_frame_equal(expected, result)