def test_reduce_single_result_set_with_dimx2_date_str_str_totals_date( self): expected = dimx3_date_str_str_totalsx3_df.loc[( slice(None), slice("Democrat", "Republican"), slice("California", "Texas"), ), :, ].append(dimx3_date_str_str_totalsx3_df.iloc[-1]) raw_df = replace_totals(dimx3_date_str_str_df) totals_df = pd.merge( pd.DataFrame( [[ RollupValue.CONSTANT, RollupValue.CONSTANT, RollupValue.CONSTANT ]], columns=["$timestamp", "$political_party", "$state"], ), pd.DataFrame([raw_df[metrics].sum(axis=0)]), how="outer", left_index=True, right_index=True, ) totals_df = totals_df[["$timestamp", "$political_party", "$state"] + metrics] dimensions = ( Rollup(mock_dataset.fields.timestamp), mock_dataset.fields.political_party, mock_dataset.fields.state, ) result = reduce_result_set([raw_df, totals_df], (), dimensions, ()) pandas.testing.assert_frame_equal(expected, result)
def test_reduce_single_result_set_with_cont_dimension(self): expected = dimx1_date_df raw_df = replace_totals(expected) dimensions = (mock_dataset.fields.timestamp, ) result = reduce_result_set([raw_df], (), dimensions, ()) pandas.testing.assert_frame_equal(expected, result)
def test_reduce_with_multiple_references_of_the_same_type_over_the_same_dimension( self): raw_df = pd.DataFrame( [[date(2019, 1, 2), 1], [date(2019, 1, 3), 2]], columns=["$timestamp", "$metric"], ) ref_df = pd.DataFrame( [[date(2019, 1, 2), 2], [date(2019, 1, 3), 0]], columns=["$timestamp", "$metric_dod"], ) expected = raw_df.copy() expected["$metric_dod"] = pd.Series([2, 0]) expected["$metric_dod_delta"] = pd.Series([-1, 2], dtype=object) expected["$metric_dod_delta_percent"] = pd.Series([-50, np.nan], dtype=object) expected.set_index("$timestamp", inplace=True) timestamp = mock_dataset.fields.timestamp reference_groups = ([ DayOverDay(timestamp), DayOverDay(timestamp, delta=True), DayOverDay(timestamp, delta_percent=True) ], ) dimensions = (timestamp, ) result = reduce_result_set([raw_df, ref_df], reference_groups, dimensions, ()) pandas.testing.assert_frame_equal(expected, result)
def test_reduce_single_result_set_with_1x_dimension(self): expected = dimx1_str_df raw_df = replace_totals(expected) dimensions = (mock_dataset.fields.political_party, ) result = reduce_result_set([raw_df], (), dimensions, ()) pandas.testing.assert_frame_equal(expected, result)
def test_reduce_single_result_set_no_dimensions(self): expected = dimx0_metricx1_df raw_df = expected dimensions = () result = reduce_result_set([raw_df], (), dimensions, ()) pandas.testing.assert_frame_equal(expected, result)
def test_reduce_single_result_set_with_date_str_str_dimensions_str1_totals(self): expected = ( dimx3_date_str_str_totalsx3_df.loc[ (slice(None), slice(None), slice("California", "Texas")), : ] .append( dimx3_date_str_str_totalsx3_df.loc[(slice(None), "~~totals"), :].iloc[ :-1 ] ) .sort_index() ) raw_df = replace_totals(dimx3_date_str_str_df) totals_df = raw_df.groupby("$timestamp").sum().reset_index() totals_df["$political_party"] = None totals_df["$state"] = None totals_df = totals_df[["$timestamp", "$political_party", "$state"] + metrics] dimensions = ( mock_dataset.fields.timestamp, Rollup(mock_dataset.fields.political_party), mock_dataset.fields.state, ) result = reduce_result_set([raw_df, totals_df], (), dimensions, ()) pandas.testing.assert_frame_equal(expected, result)
def test_reduce_single_result_set_with_date_str_str_dimensions_str1_totals_with_null_in_date_dim( self, ): index_names = list(dimx3_date_str_str_totalsx3_df.index.names) nulls = pd.DataFrame( [ [np.nan, "d", "1", "Texas", 5, 0], [np.nan, "d", "2", "California", 2, 0], [np.nan, "i", "1", "Texas", 5, 0], [np.nan, "i", "2", "California", 7, 0], [np.nan, "r", "1", "Texas", 11, 0], [np.nan, "r", "2", "California", 3, 0], ], columns=index_names + list(dimx3_date_str_str_totalsx3_df.columns), ) nulls_totals = pd.DataFrame([nulls[metrics].sum()]) nulls_totals[index_names[0]] = np.nan nulls_totals[index_names[1]] = "~~totals" nulls_totals[index_names[2]] = "~~totals" expected = ( dimx3_date_str_str_totalsx3_df.loc[ (slice(None), slice(None), slice("1", "2")), : ] .append( dimx3_date_str_str_totalsx3_df.loc[(slice(None), "~~totals"), :].iloc[ :-1 ] ) .append(nulls.set_index(index_names)) .append(nulls_totals.set_index(index_names)) .sort_index() ) raw_df = replace_totals(dimx3_date_str_str_df) raw_df = nulls.append(raw_df).sort_values( ["$timestamp", "$political_party", "$state"] ) totals_df = raw_df.groupby("$timestamp").sum().reset_index() null_totals_df = pd.DataFrame( [raw_df[raw_df["$timestamp"].isnull()][metrics].sum()] ) null_totals_df["$timestamp"] = None totals_df = totals_df.append(null_totals_df) totals_df["$political_party"] = None totals_df["$state"] = None totals_df = totals_df[["$timestamp", "$political_party", "$state"] + metrics] dimensions = ( mock_dataset.fields.timestamp, Rollup(mock_dataset.fields.political_party), mock_dataset.fields.state, ) result = reduce_result_set([raw_df, totals_df], (), dimensions, ()) pandas.testing.assert_frame_equal(expected, result)
def test_reduce_single_result_set_with_str_num_dimensions(self): expected = dimx2_str_num_df.sort_index() raw_df = replace_totals(expected) dimensions = ( mock_dataset.fields.political_party, mock_dataset.fields["candidate-id"], ) result = reduce_result_set([raw_df], (), dimensions, ()) pandas.testing.assert_frame_equal(expected, result)
def test_reduce_single_result_set_with_date_str_dimensions_str_totals(self): expected = dimx2_date_str_totals_df raw_df = replace_totals(dimx2_date_str_df) totals_df = raw_df.groupby("$timestamp").sum().reset_index() totals_df["$political_party"] = None totals_df = totals_df[["$timestamp", "$political_party"] + metrics] dimensions = ( mock_dataset.fields.timestamp, Rollup(mock_dataset.fields.political_party), ) result = reduce_result_set([raw_df, totals_df], (), dimensions, ()) pandas.testing.assert_frame_equal(expected, result)
def test_reduce_single_result_set_with_str_dimension(self): expected = dimx1_str_totals_df raw_df = replace_totals(dimx1_str_df) totals_df = pd.merge( pd.DataFrame([RollupValue.CONSTANT], columns=["$political_party"]), pd.DataFrame([raw_df[metrics].sum(axis=0)]), how="outer", left_index=True, right_index=True, ) dimensions = (Rollup(mock_dataset.fields.political_party), ) result = reduce_result_set([raw_df, totals_df], (), dimensions, ()) pandas.testing.assert_frame_equal(expected, result)
def test_reduce_single_result_set_with_date_str_str_dimensions_str2_totals( self): expected = dimx3_date_str_str_totalsx3_df.loc[( slice(None), slice("Democrat", "Republican")), :] raw_df = replace_totals(dimx3_date_str_str_df) totals_df = raw_df.groupby(["$timestamp", "$political_party"]).sum().reset_index() totals_df["$state"] = RollupValue.CONSTANT totals_df = totals_df[["$timestamp", "$political_party", "$state"] + metrics] dimensions = ( mock_dataset.fields.timestamp, mock_dataset.fields.political_party, Rollup(mock_dataset.fields.state), ) result = reduce_result_set([raw_df, totals_df], (), dimensions, ()) pandas.testing.assert_frame_equal(expected, result)
def test_reduce_delta_result_with_non_aligned_index(self): raw_df = pd.DataFrame( [[date(2019, 1, 2), 1], [date(2019, 1, 3), 2]], columns=["$timestamp", "$metric"], ) ref_df = pd.DataFrame( [[date(2019, 1, 2), 2]], columns=["$timestamp", "$metric_dod"] ) expected = raw_df.copy() expected["$metric_dod_delta"] = pd.Series([-1.0, 2.0], dtype=object) expected.set_index("$timestamp", inplace=True) timestamp = mock_dataset.fields.timestamp reference_groups = ([DayOverDay(timestamp, delta=True)],) dimensions = (timestamp,) result = reduce_result_set([raw_df, ref_df], reference_groups, dimensions, ()) pandas.testing.assert_frame_equal(expected, result)
def test_reduce_with_references_and_extra_values_in_reference(self): raw_df = pd.DataFrame( [[date(2019, 1, 2), 1], [date(2019, 1, 3), 9]], columns=["$timestamp", "$metric"], ) ref_df = pd.DataFrame( [[date(2019, 1, 2), 7], [date(2019, 1, 4), 8]], columns=["$timestamp", "$metric_dod"], ) expected_df = pd.DataFrame( [[date(2019, 1, 2), 1, 7], [date(2019, 1, 3), 9, np.nan]], columns=["$timestamp", "$metric", "$metric_dod"], ) expected_df.set_index("$timestamp", inplace=True) timestamp = mock_dataset.fields.timestamp reference_groups = ([ DayOverDay(timestamp), ], ) dimensions = (timestamp, ) result = reduce_result_set([raw_df, ref_df], reference_groups, dimensions, ()) pandas.testing.assert_frame_equal(expected_df, result)