def test_compare_should_not_side_effect(): _timeseries_df2 = timeseries_df2.copy() pp.compare( df=_timeseries_df2, source_columns=["y"], compare_columns=["z"], compare_type=PPC.DIFF, ) assert _timeseries_df2.equals(timeseries_df2)
def test_compare(self): # `difference` comparison post_df = proc.compare( df=timeseries_df2, source_columns=["y"], compare_columns=["z"], compare_type="difference", ) self.assertListEqual( post_df.columns.tolist(), ["label", "y", "z", "difference__y__z",] ) self.assertListEqual( series_to_list(post_df["difference__y__z"]), [0.0, -2.0, -8.0, -6.0], ) # drop original columns post_df = proc.compare( df=timeseries_df2, source_columns=["y"], compare_columns=["z"], compare_type="difference", drop_original_columns=True, ) self.assertListEqual(post_df.columns.tolist(), ["label", "difference__y__z",]) # `percentage` comparison post_df = proc.compare( df=timeseries_df2, source_columns=["y"], compare_columns=["z"], compare_type="percentage", ) self.assertListEqual( post_df.columns.tolist(), ["label", "y", "z", "percentage__y__z",] ) self.assertListEqual( series_to_list(post_df["percentage__y__z"]), [0.0, -0.5, -0.8, -0.75], ) # `ratio` comparison post_df = proc.compare( df=timeseries_df2, source_columns=["y"], compare_columns=["z"], compare_type="ratio", ) self.assertListEqual( post_df.columns.tolist(), ["label", "y", "z", "ratio__y__z",] ) self.assertListEqual( series_to_list(post_df["ratio__y__z"]), [1.0, 0.5, 0.2, 0.25], )
def test_compare_percentage(): # `percentage` comparison post_df = pp.compare( df=timeseries_df2, source_columns=["y"], compare_columns=["z"], compare_type=PPC.PCT, ) """ label y z percentage__y__z 2019-01-01 x 2.0 2.0 0.0 2019-01-02 y 2.0 4.0 -0.50 2019-01-05 z 2.0 10.0 -0.80 2019-01-07 q 2.0 8.0 -0.75 """ assert post_df.equals( pd.DataFrame( index=timeseries_df2.index, data={ "label": ["x", "y", "z", "q"], "y": [2.0, 2.0, 2.0, 2.0], "z": [2.0, 4.0, 10.0, 8.0], "percentage__y__z": [0.0, -0.50, -0.80, -0.75], }, ))
def test_compare_ratio(): # `ratio` comparison post_df = pp.compare( df=timeseries_df2, source_columns=["y"], compare_columns=["z"], compare_type=PPC.RAT, ) """ label y z ratio__y__z 2019-01-01 x 2.0 2.0 1.00 2019-01-02 y 2.0 4.0 0.50 2019-01-05 z 2.0 10.0 0.20 2019-01-07 q 2.0 8.0 0.25 """ assert post_df.equals( pd.DataFrame( index=timeseries_df2.index, data={ "label": ["x", "y", "z", "q"], "y": [2.0, 2.0, 2.0, 2.0], "z": [2.0, 4.0, 10.0, 8.0], "ratio__y__z": [1.00, 0.50, 0.20, 0.25], }, ))
def test_compare_after_pivot(): pivot_df = pp.pivot( df=multiple_metrics_df, index=["dttm"], columns=["country"], aggregates={ "sum_metric": { "operator": "sum" }, "count_metric": { "operator": "sum" }, }, flatten_columns=False, reset_index=False, ) """ count_metric sum_metric country UK US UK US dttm 2019-01-01 1 2 5 6 2019-01-02 3 4 7 8 """ compared_df = pp.compare( pivot_df, source_columns=["count_metric"], compare_columns=["sum_metric"], compare_type=PPC.DIFF, drop_original_columns=True, ) """ difference__count_metric__sum_metric country UK US dttm 2019-01-01 -4 -4 2019-01-02 -4 -4 """ flat_df = pp.flatten(compared_df) """ dttm difference__count_metric__sum_metric, UK difference__count_metric__sum_metric, US 0 2019-01-01 -4 -4 1 2019-01-02 -4 -4 """ assert flat_df.equals( pd.DataFrame( data={ "dttm": pd.to_datetime(["2019-01-01", "2019-01-02"]), FLAT_COLUMN_SEPARATOR.join([ "difference__count_metric__sum_metric", "UK" ]): [-4, -4], FLAT_COLUMN_SEPARATOR.join([ "difference__count_metric__sum_metric", "US" ]): [-4, -4], }))
def test_compare(): # `difference` comparison post_df = compare( df=timeseries_df2, source_columns=["y"], compare_columns=["z"], compare_type="difference", ) assert post_df.columns.tolist() == ["label", "y", "z", "difference__y__z"] assert series_to_list( post_df["difference__y__z"]) == [0.0, -2.0, -8.0, -6.0] # drop original columns post_df = compare( df=timeseries_df2, source_columns=["y"], compare_columns=["z"], compare_type="difference", drop_original_columns=True, ) assert post_df.columns.tolist() == ["label", "difference__y__z"] # `percentage` comparison post_df = compare( df=timeseries_df2, source_columns=["y"], compare_columns=["z"], compare_type="percentage", ) assert post_df.columns.tolist() == ["label", "y", "z", "percentage__y__z"] assert series_to_list( post_df["percentage__y__z"]) == [0.0, -0.5, -0.8, -0.75] # `ratio` comparison post_df = compare( df=timeseries_df2, source_columns=["y"], compare_columns=["z"], compare_type="ratio", ) assert post_df.columns.tolist() == ["label", "y", "z", "ratio__y__z"] assert series_to_list(post_df["ratio__y__z"]) == [1.0, 0.5, 0.2, 0.25]
def test_compare_diff(): # `difference` comparison post_df = pp.compare( df=timeseries_df2, source_columns=["y"], compare_columns=["z"], compare_type=PPC.DIFF, ) """ label y z difference__y__z 2019-01-01 x 2.0 2.0 0.0 2019-01-02 y 2.0 4.0 -2.0 2019-01-05 z 2.0 10.0 -8.0 2019-01-07 q 2.0 8.0 -6.0 """ assert post_df.equals( pd.DataFrame( index=timeseries_df2.index, data={ "label": ["x", "y", "z", "q"], "y": [2.0, 2.0, 2.0, 2.0], "z": [2.0, 4.0, 10.0, 8.0], "difference__y__z": [0.0, -2.0, -8.0, -6.0], }, )) # drop original columns post_df = pp.compare( df=timeseries_df2, source_columns=["y"], compare_columns=["z"], compare_type=PPC.DIFF, drop_original_columns=True, ) assert post_df.equals( pd.DataFrame( index=timeseries_df2.index, data={ "label": ["x", "y", "z", "q"], "difference__y__z": [0.0, -2.0, -8.0, -6.0], }, ))
def test_compare_multi_index_column(): index = pd.to_datetime(["2021-01-01", "2021-01-02", "2021-01-03"]) index.name = "__timestamp" iterables = [["m1", "m2"], ["a", "b"], ["x", "y"]] columns = pd.MultiIndex.from_product(iterables, names=[None, "level1", "level2"]) df = pd.DataFrame(index=index, columns=columns, data=1) """ m1 m2 level1 a b a b level2 x y x y x y x y __timestamp 2021-01-01 1 1 1 1 1 1 1 1 2021-01-02 1 1 1 1 1 1 1 1 2021-01-03 1 1 1 1 1 1 1 1 """ post_df = pp.compare( df, source_columns=["m1"], compare_columns=["m2"], compare_type=PPC.DIFF, drop_original_columns=True, ) flat_df = pp.flatten(post_df) """ __timestamp difference__m1__m2, a, x difference__m1__m2, a, y difference__m1__m2, b, x difference__m1__m2, b, y 0 2021-01-01 0 0 0 0 1 2021-01-02 0 0 0 0 2 2021-01-03 0 0 0 0 """ assert flat_df.equals( pd.DataFrame( data={ "__timestamp": pd.to_datetime(["2021-01-01", "2021-01-02", "2021-01-03"]), "difference__m1__m2, a, x": [0, 0, 0], "difference__m1__m2, a, y": [0, 0, 0], "difference__m1__m2, b, x": [0, 0, 0], "difference__m1__m2, b, y": [0, 0, 0], }))