def test_do_not_remove_totals_for_rollup_dimensions_with_multiindex_and_all_rolled_up(self): result = scrub_totals_from_share_results(dimx2_date_str_totalsx2_df, [Rollup(mock_dataset.fields.timestamp), Rollup(mock_dataset.fields.political_party)]) expected = dimx2_date_str_totalsx2_df pandas.testing.assert_frame_equal(result, expected)
def test_reduce_single_result_set_with_dimx2_date_str_str_totals_date( self): expected = dimx3_date_str_str_totalsx3_df.loc[( slice(None), slice("Democrat", "Republican"), slice("California", "Texas"), ), :, ].append(dimx3_date_str_str_totalsx3_df.iloc[-1]) raw_df = replace_totals(dimx3_date_str_str_df) totals_df = pd.merge( pd.DataFrame( [[ RollupValue.CONSTANT, RollupValue.CONSTANT, RollupValue.CONSTANT ]], columns=["$timestamp", "$political_party", "$state"], ), pd.DataFrame([raw_df[metrics].sum(axis=0)]), how="outer", left_index=True, right_index=True, ) totals_df = totals_df[["$timestamp", "$political_party", "$state"] + metrics] dimensions = ( Rollup(mock_dataset.fields.timestamp), mock_dataset.fields.political_party, mock_dataset.fields.state, ) result = reduce_result_set([raw_df, totals_df], (), dimensions, ()) pandas.testing.assert_frame_equal(expected, result)
def test_reduce_single_result_set_with_date_str_str_dimensions_str1_totals(self): expected = ( dimx3_date_str_str_totalsx3_df.loc[ (slice(None), slice(None), slice("California", "Texas")), : ] .append( dimx3_date_str_str_totalsx3_df.loc[(slice(None), "~~totals"), :].iloc[ :-1 ] ) .sort_index() ) raw_df = replace_totals(dimx3_date_str_str_df) totals_df = raw_df.groupby("$timestamp").sum().reset_index() totals_df["$political_party"] = None totals_df["$state"] = None totals_df = totals_df[["$timestamp", "$political_party", "$state"] + metrics] dimensions = ( mock_dataset.fields.timestamp, Rollup(mock_dataset.fields.political_party), mock_dataset.fields.state, ) result = reduce_result_set([raw_df, totals_df], (), dimensions, ()) pandas.testing.assert_frame_equal(expected, result)
def test_do_not_remove_totals_for_rollup_dimensions(self): result = scrub_totals_from_share_results(dimx1_str_totals_df, [Rollup(mock_dataset.fields.political_party)]) expected = dimx1_str_totals_df pandas.testing.assert_frame_equal(result, expected)
def test_do_not_remove_totals_for_rollup_dimensions_with_multiindex_and_lower_dimension_totals(self): result = scrub_totals_from_share_results(dimx2_date_str_totalsx2_df, [mock_dataset.fields.timestamp, Rollup(mock_dataset.fields.political_party)]) expected = dimx2_date_str_totalsx2_df.loc[:TIMESTAMP_UPPERBOUND] pandas.testing.assert_frame_equal(result, expected)
def test_reduce_single_result_set_with_date_str_str_dimensions_str1_totals_with_null_in_date_dim( self, ): index_names = list(dimx3_date_str_str_totalsx3_df.index.names) nulls = pd.DataFrame( [ [np.nan, "d", "1", "Texas", 5, 0], [np.nan, "d", "2", "California", 2, 0], [np.nan, "i", "1", "Texas", 5, 0], [np.nan, "i", "2", "California", 7, 0], [np.nan, "r", "1", "Texas", 11, 0], [np.nan, "r", "2", "California", 3, 0], ], columns=index_names + list(dimx3_date_str_str_totalsx3_df.columns), ) nulls_totals = pd.DataFrame([nulls[metrics].sum()]) nulls_totals[index_names[0]] = np.nan nulls_totals[index_names[1]] = "~~totals" nulls_totals[index_names[2]] = "~~totals" expected = ( dimx3_date_str_str_totalsx3_df.loc[ (slice(None), slice(None), slice("1", "2")), : ] .append( dimx3_date_str_str_totalsx3_df.loc[(slice(None), "~~totals"), :].iloc[ :-1 ] ) .append(nulls.set_index(index_names)) .append(nulls_totals.set_index(index_names)) .sort_index() ) raw_df = replace_totals(dimx3_date_str_str_df) raw_df = nulls.append(raw_df).sort_values( ["$timestamp", "$political_party", "$state"] ) totals_df = raw_df.groupby("$timestamp").sum().reset_index() null_totals_df = pd.DataFrame( [raw_df[raw_df["$timestamp"].isnull()][metrics].sum()] ) null_totals_df["$timestamp"] = None totals_df = totals_df.append(null_totals_df) totals_df["$political_party"] = None totals_df["$state"] = None totals_df = totals_df[["$timestamp", "$political_party", "$state"] + metrics] dimensions = ( mock_dataset.fields.timestamp, Rollup(mock_dataset.fields.political_party), mock_dataset.fields.state, ) result = reduce_result_set([raw_df, totals_df], (), dimensions, ()) pandas.testing.assert_frame_equal(expected, result)
def test_do_not_remove_totals_for_rollup_dimensions_with_multiindex_and_higher_dimension_totals(self): result = scrub_totals_from_share_results(dimx2_date_str_totalsx2_df, [Rollup(mock_dataset.fields.timestamp), mock_dataset.fields.political_party]) expected = dimx2_date_str_totalsx2_df.loc[(slice(None), slice('Democrat', 'Republican')), :] \ .append(dimx2_date_str_totalsx2_df.iloc[-1]) pandas.testing.assert_frame_equal(result, expected)
def test_reduce_single_result_set_with_date_str_dimensions_str_totals(self): expected = dimx2_date_str_totals_df raw_df = replace_totals(dimx2_date_str_df) totals_df = raw_df.groupby("$timestamp").sum().reset_index() totals_df["$political_party"] = None totals_df = totals_df[["$timestamp", "$political_party"] + metrics] dimensions = ( mock_dataset.fields.timestamp, Rollup(mock_dataset.fields.political_party), ) result = reduce_result_set([raw_df, totals_df], (), dimensions, ()) pandas.testing.assert_frame_equal(expected, result)
def test_reduce_single_result_set_with_str_dimension(self): expected = dimx1_str_totals_df raw_df = replace_totals(dimx1_str_df) totals_df = pd.merge( pd.DataFrame([RollupValue.CONSTANT], columns=["$political_party"]), pd.DataFrame([raw_df[metrics].sum(axis=0)]), how="outer", left_index=True, right_index=True, ) dimensions = (Rollup(mock_dataset.fields.political_party), ) result = reduce_result_set([raw_df, totals_df], (), dimensions, ()) pandas.testing.assert_frame_equal(expected, result)
def test_reduce_single_result_set_with_date_str_str_dimensions_str2_totals( self): expected = dimx3_date_str_str_totalsx3_df.loc[( slice(None), slice("Democrat", "Republican")), :] raw_df = replace_totals(dimx3_date_str_str_df) totals_df = raw_df.groupby(["$timestamp", "$political_party"]).sum().reset_index() totals_df["$state"] = RollupValue.CONSTANT totals_df = totals_df[["$timestamp", "$political_party", "$state"] + metrics] dimensions = ( mock_dataset.fields.timestamp, mock_dataset.fields.political_party, Rollup(mock_dataset.fields.state), ) result = reduce_result_set([raw_df, totals_df], (), dimensions, ()) pandas.testing.assert_frame_equal(expected, result)