def test_nonunique_output(): with pytest.raises(ValueError, match="ambiguous"): Pipenet( { "deseasonal_residual": { "model": ( transformer.ClassicSeasonalDecomposition(freq=6) ), "input": "original", }, "abs_residual": { "model": transformer.CustomizedTransformer1D( transform_func=abs ), "input": "deseasonal_residual", }, "iqr_ad": { "model": detector.InterQuartileRangeAD(c=(None, 3)), "input": "abs_residual", }, "sign_check": { "model": detector.ThresholdAD(high=0.0, low=-float("inf")), "input": "deseasonal_residual", }, } )
def test_pipeline(): s = pd.Series( [0, 1, 2, 3, 2, 1] * 10, index=pd.date_range(start="2017-1-1", periods=60, freq="D"), ) my_pipe = Pipeline([ ( "deseasonal_residual", transformer.NaiveSeasonalDecomposition(freq=6), ), ( "abs_residual", transformer.CustomizedTransformer1D(transform_func=abs), ), ("iqr_ad", detector.InterQuartileRangeAD(c=(None, 3))), ]) my_pipe.fit_detect(s)
def test_pipe_summary(): """ Test summary """ reg_ab = LinearRegression() reg_ac = LinearRegression() my_pipe = Pipenet( { "A-B-regression-ad": { "model": detector.RegressionAD(regressor=reg_ab, target="B"), "input": "original", "subset": ["A", "B"], }, "A-C-regression-error": { "model": transformer.RegressionResidual( regressor=reg_ac, target="C" ), "input": "original", "subset": ["A", "C"], }, "A-C-regression-ad": { "model": detector.InterQuartileRangeAD(), "input": "A-C-regression-error", "subset": "all", }, "ABC-ad": { "model": aggregator.OrAggregator(), "input": ["A-B-regression-ad", "A-C-regression-ad"], }, "D-ad": { "model": detector.QuantileAD(high=0.9, low=0.1), "input": "original", "subset": ["D"], }, "ABCD-ad": { "model": aggregator.OrAggregator(), "input": ["ABC-ad", "D-ad"], }, } ) my_pipe.summary()
def test_detector_return_intermediate(): s = pd.Series( [0, 1, 2, 3, 2, 1] * 10, index=pd.date_range(start="2017-1-1", periods=60, freq="D"), ) my_pipe = Pipenet([ { "name": "deseasonal_residual", "model": (transformer.NaiveSeasonalDecomposition(freq=6)), "input": "original", }, { "name": "abs_residual", "model": transformer.CustomizedTransformer1D(transform_func=abs), "input": "deseasonal_residual", }, { "name": "iqr_ad", "model": detector.InterQuartileRangeAD(c=(None, 3)), "input": "abs_residual", }, { "name": "sign_check", "model": detector.ThresholdAD(high=0.0, low=-float("inf")), "input": "deseasonal_residual", }, { "name": "and", "model": aggregator.AndAggregator(), "input": ["iqr_ad", "sign_check"], }, ]) result = my_pipe.fit_detect(s, return_intermediate=True) assert set(result.keys()) == { "original", "deseasonal_residual", "abs_residual", "iqr_ad", "sign_check", "and", }
def test_skip_fit(): s = pd.Series( [0, 1, 2, 3, 2, 1] * 10, index=pd.date_range(start="2017-1-1", periods=60, freq="D"), ) deseasonal_residual = transformer.NaiveSeasonalDecomposition(freq=6) my_pipe = Pipenet([ { "name": "deseasonal_residual", "model": deseasonal_residual, "input": "original", }, { "name": "abs_residual", "model": transformer.CustomizedTransformer1D(transform_func=abs), "input": "deseasonal_residual", }, { "name": "iqr_ad", "model": detector.InterQuartileRangeAD(c=(None, 3)), "input": "abs_residual", }, { "name": "sign_check", "model": detector.ThresholdAD(high=0.0, low=-float("inf")), "input": "deseasonal_residual", }, { "name": "and", "model": aggregator.AndAggregator(), "input": ["iqr_ad", "sign_check"], }, ]) with pytest.raises(RuntimeError): my_pipe.fit_detect(s, skip_fit=["deseasonal_residual"]) my_pipe.fit_detect(s)
import adtk.transformer as transformer from sklearn.neighbors import LocalOutlierFactor from sklearn.cluster import KMeans from sklearn.linear_model import LinearRegression # We have 4 types of models # - one-to-one: input a univariate series, output a univariate series # - one-to-many: input a univariate series, output a multivariate series # - many-to-one: input a multivariate series, output a univariate series # - many-to-many: input a multivariate series, output a multivariate series one2one_models = [ detector.ThresholdAD(), detector.QuantileAD(), detector.InterQuartileRangeAD(), detector.GeneralizedESDTestAD(), detector.PersistAD(), detector.LevelShiftAD(), detector.VolatilityShiftAD(), detector.AutoregressionAD(), detector.SeasonalAD(freq=2), transformer.RollingAggregate(agg="median"), transformer.RollingAggregate(agg="quantile", agg_params={"q": 0.5}), transformer.DoubleRollingAggregate(agg="median"), transformer.DoubleRollingAggregate( agg="quantile", agg_params={"q": [0.1, 0.5, 0.9]} ), transformer.DoubleRollingAggregate( agg="hist", agg_params={"bins": [30, 50, 70]} ),
"model": detector.QuantileAD(low=0.1), "s": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "a": {"abs_low_": 1, "abs_high_": float("inf")}, }, { "model": detector.QuantileAD(high=0.9), "s": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "a": {"abs_low_": -float("inf"), "abs_high_": 9}, }, { "model": detector.QuantileAD(low=0.1, high=0.9), "s": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "a": {"abs_low_": 1, "abs_high_": 9}, }, { "model": detector.InterQuartileRangeAD(), "s": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "a": {"abs_low_": 2.5 - 15, "abs_high_": 7.5 + 15}, }, { "model": detector.InterQuartileRangeAD(c=2), "s": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "a": {"abs_low_": 2.5 - 10, "abs_high_": 7.5 + 10}, }, { "model": detector.InterQuartileRangeAD(c=(2, 4)), "s": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "a": {"abs_low_": 2.5 - 10, "abs_high_": 7.5 + 20}, }, { "model": detector.InterQuartileRangeAD(c=(2, None)),
def test_skip_fit(): reg_ab = LinearRegression() reg_ac = LinearRegression() my_pipe = Pipenet( { "A-B-regression-ad": { "model": detector.RegressionAD(regressor=reg_ab, target="B"), "input": "original", "subset": ["A", "B"], }, "A-C-regression-error": { "model": transformer.RegressionResidual( regressor=reg_ac, target="C" ), "input": "original", "subset": ["A", "C"], }, "A-C-regression-ad": { "model": detector.InterQuartileRangeAD(), "input": "A-C-regression-error", "subset": "all", }, "ABC-ad": { "model": aggregator.OrAggregator(), "input": ["A-B-regression-ad", "A-C-regression-ad"], }, "D-ad": { "model": detector.QuantileAD(high=0.9, low=0.1), "input": "original", "subset": ["D"], }, "ABCD-ad": { "model": aggregator.OrAggregator(), "input": ["ABC-ad", "D-ad"], }, } ) df = pd.DataFrame( np.array( [ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], [0, 100, 200, 300, 400, 500, 600, 700, 800, 900], [0, 0, 0, 0, 0, 0, 0, 100, 0, 0], ] ).T, index=pd.date_range(start="2017-1-1", periods=10, freq="D"), columns=["A", "B", "C", "D"], ) my_pipe.fit(df) df = pd.DataFrame( np.array( [ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 10, 20, 30, 41, 50, 60, 70, 80, 90], [0, 100, 200, 300, 400, 500, 601, 700, 800, 900], [0, 0, 0, 0, 0, 0, 0, 100, 0, 0], ] ).T, index=pd.date_range(start="2017-1-1", periods=10, freq="D"), columns=["A", "B", "C", "D"], ) my_pipe.fit(df, skip_fit=["A-B-regression-ad", "A-C-regression-error"]) assert reg_ab.coef_[0] == pytest.approx(10) assert reg_ac.coef_[0] == pytest.approx(100) assert my_pipe.steps["A-C-regression-ad"]["model"].abs_high_ == 0 assert my_pipe.steps["A-C-regression-ad"]["model"].abs_low_ == 0 my_pipe.fit(df, skip_fit=["A-B-regression-ad"]) assert reg_ab.coef_[0] == pytest.approx(10) assert reg_ac.coef_[0] != pytest.approx(100) assert my_pipe.steps["A-C-regression-ad"]["model"].abs_high_ != 0 assert my_pipe.steps["A-C-regression-ad"]["model"].abs_low_ != 0
def test_pipenet_return_list_return_intermediate(): """ Test pipenet with return_list=True and return_intermediate=True """ df = pd.DataFrame( np.array( [ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 10, 20, 30, 41, 50, 60, 70, 80, 90], [0, 100, 200, 300, 400, 500, 601, 700, 800, 900], [0, 0, 0, 0, 0, 0, 0, 100, 0, 0], ] ).T, index=pd.date_range(start="2017-1-1", periods=10, freq="D"), columns=["A", "B", "C", "D"], ) reg_ab = LinearRegression() reg_ac = LinearRegression() my_pipe = Pipenet( { "A-B-regression-ad": { "model": detector.RegressionAD(regressor=reg_ab, target="B"), "input": "original", "subset": ["A", "B"], }, "A-C-regression-error": { "model": transformer.RegressionResidual( regressor=reg_ac, target="C" ), "input": "original", "subset": ["A", "C"], }, "A-C-regression-ad": { "model": detector.InterQuartileRangeAD(), "input": "A-C-regression-error", "subset": "all", }, "ABC-ad": { "model": aggregator.OrAggregator(), "input": ["A-B-regression-ad", "A-C-regression-ad"], }, "D-ad": { "model": detector.QuantileAD(high=0.9, low=0.1), "input": "original", "subset": ["D"], }, "ABCD-ad": { "model": aggregator.OrAggregator(), "input": ["ABC-ad", "D-ad"], }, } ) results = my_pipe.fit_detect( df, return_list=True, return_intermediate=True ) assert set(results.keys()) == set(my_pipe.steps.keys()).union({"original"}) assert results["A-B-regression-ad"] == [ ( pd.Timestamp("2017-01-05 00:00:00"), pd.Timestamp("2017-01-05 23:59:59.999999999"), ) ] assert results["A-C-regression-ad"] == [ ( pd.Timestamp("2017-01-07 00:00:00"), pd.Timestamp("2017-01-07 23:59:59.999999999"), ) ] assert results["ABC-ad"] == [ ( pd.Timestamp("2017-01-05 00:00:00"), pd.Timestamp("2017-01-05 23:59:59.999999999"), ), ( pd.Timestamp("2017-01-07 00:00:00"), pd.Timestamp("2017-01-07 23:59:59.999999999"), ), ] assert results["D-ad"] == [ ( pd.Timestamp("2017-01-08 00:00:00"), pd.Timestamp("2017-01-08 23:59:59.999999999"), ) ] assert results["ABCD-ad"] == [ ( pd.Timestamp("2017-01-05 00:00:00"), pd.Timestamp("2017-01-05 23:59:59.999999999"), ), ( pd.Timestamp("2017-01-07 00:00:00"), pd.Timestamp("2017-01-08 23:59:59.999999999"), ), ]
def test_pipenet_return_intermediate(): """ Test pipenet with return_intermediate=True """ df = pd.DataFrame( np.array( [ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 10, 20, 30, 41, 50, 60, 70, 80, 90], [0, 100, 200, 300, 400, 500, 601, 700, 800, 900], [0, 0, 0, 0, 0, 0, 0, 100, 0, 0], ] ).T, index=pd.date_range(start="2017-1-1", periods=10, freq="D"), columns=["A", "B", "C", "D"], ) reg_ab = LinearRegression() reg_ac = LinearRegression() my_pipe = Pipenet( { "A-B-regression-ad": { "model": detector.RegressionAD(regressor=reg_ab, target="B"), "input": "original", "subset": ["A", "B"], }, "A-C-regression-error": { "model": transformer.RegressionResidual( regressor=reg_ac, target="C" ), "input": "original", "subset": ["A", "C"], }, "A-C-regression-ad": { "model": detector.InterQuartileRangeAD(), "input": "A-C-regression-error", "subset": "all", }, "ABC-ad": { "model": aggregator.OrAggregator(), "input": ["A-B-regression-ad", "A-C-regression-ad"], }, "D-ad": { "model": detector.QuantileAD(high=0.9, low=0.1), "input": "original", "subset": ["D"], }, "ABCD-ad": { "model": aggregator.OrAggregator(), "input": ["ABC-ad", "D-ad"], }, } ) results = my_pipe.fit(df, return_intermediate=True) assert set(results.keys()) == set(my_pipe.steps.keys()).union({"original"}) assert results["A-B-regression-ad"] is None assert results["A-C-regression-error"] is not None assert results["A-C-regression-ad"] is None assert results["ABC-ad"] is None assert results["D-ad"] is None assert results["ABCD-ad"] is None results = my_pipe.fit_detect(df, return_intermediate=True) assert set(results.keys()) == set(my_pipe.steps.keys()).union({"original"}) pd.testing.assert_series_equal( results["A-B-regression-ad"], pd.Series([0, 0, 0, 0, 1, 0, 0, 0, 0, 0], index=df.index), check_dtype=False, check_names=False, ) pd.testing.assert_series_equal( results["A-C-regression-ad"], pd.Series([0, 0, 0, 0, 0, 0, 1, 0, 0, 0], index=df.index), check_dtype=False, check_names=False, ) pd.testing.assert_series_equal( results["ABC-ad"], pd.Series([0, 0, 0, 0, 1, 0, 1, 0, 0, 0], index=df.index), check_dtype=False, check_names=False, ) pd.testing.assert_series_equal( results["D-ad"], pd.Series([0, 0, 0, 0, 0, 0, 0, 1, 0, 0], index=df.index), check_dtype=False, check_names=False, ) pd.testing.assert_series_equal( results["ABCD-ad"], pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index), check_dtype=False, check_names=False, )
def test_pipenet_default(): """ Test default setting of pipenet """ df = pd.DataFrame( np.array( [ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 10, 20, 30, 41, 50, 60, 70, 80, 90], [0, 100, 200, 300, 400, 500, 601, 700, 800, 900], [0, 0, 0, 0, 0, 0, 0, 100, 0, 0], ] ).T, index=pd.date_range(start="2017-1-1", periods=10, freq="D"), columns=["A", "B", "C", "D"], ) reg_ab = LinearRegression() reg_ac = LinearRegression() my_pipe = Pipenet( { "A-B-regression-ad": { "model": detector.RegressionAD(regressor=reg_ab, target="B"), "input": "original", "subset": ["A", "B"], }, "A-C-regression-error": { "model": transformer.RegressionResidual( regressor=reg_ac, target="C" ), "input": "original", "subset": ["A", "C"], }, "A-C-regression-ad": { "model": detector.InterQuartileRangeAD(), "input": "A-C-regression-error", "subset": "all", }, "ABC-ad": { "model": aggregator.OrAggregator(), "input": ["A-B-regression-ad", "A-C-regression-ad"], }, "D-ad": { "model": detector.QuantileAD(high=0.9, low=0.1), "input": "original", "subset": ["D"], }, "ABCD-ad": { "model": aggregator.OrAggregator(), "input": ["ABC-ad", "D-ad"], }, } ) anomaly = my_pipe.fit_detect(df) pd.testing.assert_series_equal( anomaly, pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index), check_dtype=False, ) assert ( my_pipe.score( df, pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index), scoring="recall", ) == 1 ) assert ( my_pipe.score( df, pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index), scoring="precision", ) == 1 ) assert ( my_pipe.score( df, pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index), scoring="iou", ) == 1 ) assert ( my_pipe.score( df, pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index), scoring="f1", ) == 1 )