def test_or_dict_of_lists(): """ Test OrAggregator with input as a dict of lists of time stamps or time stamp 2-tuples """ lists = { "A": [ (Timestamp("2017-1-1"), Timestamp("2017-1-2")), (Timestamp("2017-1-5"), Timestamp("2017-1-8")), Timestamp("2017-1-10"), ], "B": [ Timestamp("2017-1-2"), (Timestamp("2017-1-3"), Timestamp("2017-1-6")), Timestamp("2017-1-8"), (Timestamp("2017-1-7"), Timestamp("2017-1-9")), (Timestamp("2017-1-11"), Timestamp("2017-1-11")), ], } assert aggt.OrAggregator().aggregate(lists) == [ (Timestamp("2017-01-01 00:00:00"), Timestamp("2017-01-02 00:00:00")), (Timestamp("2017-01-03 00:00:00"), Timestamp("2017-01-09 00:00:00")), Timestamp("2017-1-10"), Timestamp("2017-1-11"), ]
def test_pipe_summary(): """ Test summary """ reg_ab = LinearRegression() reg_ac = LinearRegression() my_pipe = Pipenet( { "A-B-regression-ad": { "model": detector.RegressionAD(regressor=reg_ab, target="B"), "input": "original", "subset": ["A", "B"], }, "A-C-regression-error": { "model": transformer.RegressionResidual( regressor=reg_ac, target="C" ), "input": "original", "subset": ["A", "C"], }, "A-C-regression-ad": { "model": detector.InterQuartileRangeAD(), "input": "A-C-regression-error", "subset": "all", }, "ABC-ad": { "model": aggregator.OrAggregator(), "input": ["A-B-regression-ad", "A-C-regression-ad"], }, "D-ad": { "model": detector.QuantileAD(high=0.9, low=0.1), "input": "original", "subset": ["D"], }, "ABCD-ad": { "model": aggregator.OrAggregator(), "input": ["ABC-ad", "D-ad"], }, } ) my_pipe.summary()
def test_or_df(): """ Test OrAggregator with input as a DataFrame """ df = pd.DataFrame( [[1, 1], [1, 0], [0, 1], [0, 0], [float("nan"), 1], [0, float("nan")]], index=pd.date_range(start="2017-1-1", periods=6, freq="D"), ) pd.testing.assert_series_equal( aggt.OrAggregator().aggregate(df), pd.Series( [1, 1, 1, 0, 1, float("nan")], index=pd.date_range(start="2017-1-1", periods=6, freq="D"), ), )
def test_skip_fit(): reg_ab = LinearRegression() reg_ac = LinearRegression() my_pipe = Pipenet( { "A-B-regression-ad": { "model": detector.RegressionAD(regressor=reg_ab, target="B"), "input": "original", "subset": ["A", "B"], }, "A-C-regression-error": { "model": transformer.RegressionResidual( regressor=reg_ac, target="C" ), "input": "original", "subset": ["A", "C"], }, "A-C-regression-ad": { "model": detector.InterQuartileRangeAD(), "input": "A-C-regression-error", "subset": "all", }, "ABC-ad": { "model": aggregator.OrAggregator(), "input": ["A-B-regression-ad", "A-C-regression-ad"], }, "D-ad": { "model": detector.QuantileAD(high=0.9, low=0.1), "input": "original", "subset": ["D"], }, "ABCD-ad": { "model": aggregator.OrAggregator(), "input": ["ABC-ad", "D-ad"], }, } ) df = pd.DataFrame( np.array( [ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], [0, 100, 200, 300, 400, 500, 600, 700, 800, 900], [0, 0, 0, 0, 0, 0, 0, 100, 0, 0], ] ).T, index=pd.date_range(start="2017-1-1", periods=10, freq="D"), columns=["A", "B", "C", "D"], ) my_pipe.fit(df) df = pd.DataFrame( np.array( [ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 10, 20, 30, 41, 50, 60, 70, 80, 90], [0, 100, 200, 300, 400, 500, 601, 700, 800, 900], [0, 0, 0, 0, 0, 0, 0, 100, 0, 0], ] ).T, index=pd.date_range(start="2017-1-1", periods=10, freq="D"), columns=["A", "B", "C", "D"], ) my_pipe.fit(df, skip_fit=["A-B-regression-ad", "A-C-regression-error"]) assert reg_ab.coef_[0] == pytest.approx(10) assert reg_ac.coef_[0] == pytest.approx(100) assert my_pipe.steps["A-C-regression-ad"]["model"].abs_high_ == 0 assert my_pipe.steps["A-C-regression-ad"]["model"].abs_low_ == 0 my_pipe.fit(df, skip_fit=["A-B-regression-ad"]) assert reg_ab.coef_[0] == pytest.approx(10) assert reg_ac.coef_[0] != pytest.approx(100) assert my_pipe.steps["A-C-regression-ad"]["model"].abs_high_ != 0 assert my_pipe.steps["A-C-regression-ad"]["model"].abs_low_ != 0
def test_pipenet_return_list_return_intermediate(): """ Test pipenet with return_list=True and return_intermediate=True """ df = pd.DataFrame( np.array( [ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 10, 20, 30, 41, 50, 60, 70, 80, 90], [0, 100, 200, 300, 400, 500, 601, 700, 800, 900], [0, 0, 0, 0, 0, 0, 0, 100, 0, 0], ] ).T, index=pd.date_range(start="2017-1-1", periods=10, freq="D"), columns=["A", "B", "C", "D"], ) reg_ab = LinearRegression() reg_ac = LinearRegression() my_pipe = Pipenet( { "A-B-regression-ad": { "model": detector.RegressionAD(regressor=reg_ab, target="B"), "input": "original", "subset": ["A", "B"], }, "A-C-regression-error": { "model": transformer.RegressionResidual( regressor=reg_ac, target="C" ), "input": "original", "subset": ["A", "C"], }, "A-C-regression-ad": { "model": detector.InterQuartileRangeAD(), "input": "A-C-regression-error", "subset": "all", }, "ABC-ad": { "model": aggregator.OrAggregator(), "input": ["A-B-regression-ad", "A-C-regression-ad"], }, "D-ad": { "model": detector.QuantileAD(high=0.9, low=0.1), "input": "original", "subset": ["D"], }, "ABCD-ad": { "model": aggregator.OrAggregator(), "input": ["ABC-ad", "D-ad"], }, } ) results = my_pipe.fit_detect( df, return_list=True, return_intermediate=True ) assert set(results.keys()) == set(my_pipe.steps.keys()).union({"original"}) assert results["A-B-regression-ad"] == [ ( pd.Timestamp("2017-01-05 00:00:00"), pd.Timestamp("2017-01-05 23:59:59.999999999"), ) ] assert results["A-C-regression-ad"] == [ ( pd.Timestamp("2017-01-07 00:00:00"), pd.Timestamp("2017-01-07 23:59:59.999999999"), ) ] assert results["ABC-ad"] == [ ( pd.Timestamp("2017-01-05 00:00:00"), pd.Timestamp("2017-01-05 23:59:59.999999999"), ), ( pd.Timestamp("2017-01-07 00:00:00"), pd.Timestamp("2017-01-07 23:59:59.999999999"), ), ] assert results["D-ad"] == [ ( pd.Timestamp("2017-01-08 00:00:00"), pd.Timestamp("2017-01-08 23:59:59.999999999"), ) ] assert results["ABCD-ad"] == [ ( pd.Timestamp("2017-01-05 00:00:00"), pd.Timestamp("2017-01-05 23:59:59.999999999"), ), ( pd.Timestamp("2017-01-07 00:00:00"), pd.Timestamp("2017-01-08 23:59:59.999999999"), ), ]
def test_pipenet_return_intermediate(): """ Test pipenet with return_intermediate=True """ df = pd.DataFrame( np.array( [ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 10, 20, 30, 41, 50, 60, 70, 80, 90], [0, 100, 200, 300, 400, 500, 601, 700, 800, 900], [0, 0, 0, 0, 0, 0, 0, 100, 0, 0], ] ).T, index=pd.date_range(start="2017-1-1", periods=10, freq="D"), columns=["A", "B", "C", "D"], ) reg_ab = LinearRegression() reg_ac = LinearRegression() my_pipe = Pipenet( { "A-B-regression-ad": { "model": detector.RegressionAD(regressor=reg_ab, target="B"), "input": "original", "subset": ["A", "B"], }, "A-C-regression-error": { "model": transformer.RegressionResidual( regressor=reg_ac, target="C" ), "input": "original", "subset": ["A", "C"], }, "A-C-regression-ad": { "model": detector.InterQuartileRangeAD(), "input": "A-C-regression-error", "subset": "all", }, "ABC-ad": { "model": aggregator.OrAggregator(), "input": ["A-B-regression-ad", "A-C-regression-ad"], }, "D-ad": { "model": detector.QuantileAD(high=0.9, low=0.1), "input": "original", "subset": ["D"], }, "ABCD-ad": { "model": aggregator.OrAggregator(), "input": ["ABC-ad", "D-ad"], }, } ) results = my_pipe.fit(df, return_intermediate=True) assert set(results.keys()) == set(my_pipe.steps.keys()).union({"original"}) assert results["A-B-regression-ad"] is None assert results["A-C-regression-error"] is not None assert results["A-C-regression-ad"] is None assert results["ABC-ad"] is None assert results["D-ad"] is None assert results["ABCD-ad"] is None results = my_pipe.fit_detect(df, return_intermediate=True) assert set(results.keys()) == set(my_pipe.steps.keys()).union({"original"}) pd.testing.assert_series_equal( results["A-B-regression-ad"], pd.Series([0, 0, 0, 0, 1, 0, 0, 0, 0, 0], index=df.index), check_dtype=False, check_names=False, ) pd.testing.assert_series_equal( results["A-C-regression-ad"], pd.Series([0, 0, 0, 0, 0, 0, 1, 0, 0, 0], index=df.index), check_dtype=False, check_names=False, ) pd.testing.assert_series_equal( results["ABC-ad"], pd.Series([0, 0, 0, 0, 1, 0, 1, 0, 0, 0], index=df.index), check_dtype=False, check_names=False, ) pd.testing.assert_series_equal( results["D-ad"], pd.Series([0, 0, 0, 0, 0, 0, 0, 1, 0, 0], index=df.index), check_dtype=False, check_names=False, ) pd.testing.assert_series_equal( results["ABCD-ad"], pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index), check_dtype=False, check_names=False, )
def test_pipenet_default(): """ Test default setting of pipenet """ df = pd.DataFrame( np.array( [ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 10, 20, 30, 41, 50, 60, 70, 80, 90], [0, 100, 200, 300, 400, 500, 601, 700, 800, 900], [0, 0, 0, 0, 0, 0, 0, 100, 0, 0], ] ).T, index=pd.date_range(start="2017-1-1", periods=10, freq="D"), columns=["A", "B", "C", "D"], ) reg_ab = LinearRegression() reg_ac = LinearRegression() my_pipe = Pipenet( { "A-B-regression-ad": { "model": detector.RegressionAD(regressor=reg_ab, target="B"), "input": "original", "subset": ["A", "B"], }, "A-C-regression-error": { "model": transformer.RegressionResidual( regressor=reg_ac, target="C" ), "input": "original", "subset": ["A", "C"], }, "A-C-regression-ad": { "model": detector.InterQuartileRangeAD(), "input": "A-C-regression-error", "subset": "all", }, "ABC-ad": { "model": aggregator.OrAggregator(), "input": ["A-B-regression-ad", "A-C-regression-ad"], }, "D-ad": { "model": detector.QuantileAD(high=0.9, low=0.1), "input": "original", "subset": ["D"], }, "ABCD-ad": { "model": aggregator.OrAggregator(), "input": ["ABC-ad", "D-ad"], }, } ) anomaly = my_pipe.fit_detect(df) pd.testing.assert_series_equal( anomaly, pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index), check_dtype=False, ) assert ( my_pipe.score( df, pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index), scoring="recall", ) == 1 ) assert ( my_pipe.score( df, pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index), scoring="precision", ) == 1 ) assert ( my_pipe.score( df, pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index), scoring="iou", ) == 1 ) assert ( my_pipe.score( df, pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index), scoring="f1", ) == 1 )