Exemple #1
0
def test_or_dict_of_lists():
    """
    Test OrAggregator with input as a dict of lists of time stamps or time
    stamp 2-tuples
    """
    lists = {
        "A": [
            (Timestamp("2017-1-1"), Timestamp("2017-1-2")),
            (Timestamp("2017-1-5"), Timestamp("2017-1-8")),
            Timestamp("2017-1-10"),
        ],
        "B": [
            Timestamp("2017-1-2"),
            (Timestamp("2017-1-3"), Timestamp("2017-1-6")),
            Timestamp("2017-1-8"),
            (Timestamp("2017-1-7"), Timestamp("2017-1-9")),
            (Timestamp("2017-1-11"), Timestamp("2017-1-11")),
        ],
    }
    assert aggt.OrAggregator().aggregate(lists) == [
        (Timestamp("2017-01-01 00:00:00"), Timestamp("2017-01-02 00:00:00")),
        (Timestamp("2017-01-03 00:00:00"), Timestamp("2017-01-09 00:00:00")),
        Timestamp("2017-1-10"),
        Timestamp("2017-1-11"),
    ]
Exemple #2
0
def test_pipe_summary():
    """
    Test summary
    """

    reg_ab = LinearRegression()
    reg_ac = LinearRegression()
    my_pipe = Pipenet(
        {
            "A-B-regression-ad": {
                "model": detector.RegressionAD(regressor=reg_ab, target="B"),
                "input": "original",
                "subset": ["A", "B"],
            },
            "A-C-regression-error": {
                "model": transformer.RegressionResidual(
                    regressor=reg_ac, target="C"
                ),
                "input": "original",
                "subset": ["A", "C"],
            },
            "A-C-regression-ad": {
                "model": detector.InterQuartileRangeAD(),
                "input": "A-C-regression-error",
                "subset": "all",
            },
            "ABC-ad": {
                "model": aggregator.OrAggregator(),
                "input": ["A-B-regression-ad", "A-C-regression-ad"],
            },
            "D-ad": {
                "model": detector.QuantileAD(high=0.9, low=0.1),
                "input": "original",
                "subset": ["D"],
            },
            "ABCD-ad": {
                "model": aggregator.OrAggregator(),
                "input": ["ABC-ad", "D-ad"],
            },
        }
    )
    my_pipe.summary()
Exemple #3
0
def test_or_df():
    """
    Test OrAggregator with input as a DataFrame
    """
    df = pd.DataFrame(
        [[1, 1], [1, 0], [0, 1], [0, 0], [float("nan"), 1], [0, float("nan")]],
        index=pd.date_range(start="2017-1-1", periods=6, freq="D"),
    )
    pd.testing.assert_series_equal(
        aggt.OrAggregator().aggregate(df),
        pd.Series(
            [1, 1, 1, 0, 1, float("nan")],
            index=pd.date_range(start="2017-1-1", periods=6, freq="D"),
        ),
    )
Exemple #4
0
def test_skip_fit():
    reg_ab = LinearRegression()
    reg_ac = LinearRegression()
    my_pipe = Pipenet(
        {
            "A-B-regression-ad": {
                "model": detector.RegressionAD(regressor=reg_ab, target="B"),
                "input": "original",
                "subset": ["A", "B"],
            },
            "A-C-regression-error": {
                "model": transformer.RegressionResidual(
                    regressor=reg_ac, target="C"
                ),
                "input": "original",
                "subset": ["A", "C"],
            },
            "A-C-regression-ad": {
                "model": detector.InterQuartileRangeAD(),
                "input": "A-C-regression-error",
                "subset": "all",
            },
            "ABC-ad": {
                "model": aggregator.OrAggregator(),
                "input": ["A-B-regression-ad", "A-C-regression-ad"],
            },
            "D-ad": {
                "model": detector.QuantileAD(high=0.9, low=0.1),
                "input": "original",
                "subset": ["D"],
            },
            "ABCD-ad": {
                "model": aggregator.OrAggregator(),
                "input": ["ABC-ad", "D-ad"],
            },
        }
    )

    df = pd.DataFrame(
        np.array(
            [
                [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                [0, 10, 20, 30, 40, 50, 60, 70, 80, 90],
                [0, 100, 200, 300, 400, 500, 600, 700, 800, 900],
                [0, 0, 0, 0, 0, 0, 0, 100, 0, 0],
            ]
        ).T,
        index=pd.date_range(start="2017-1-1", periods=10, freq="D"),
        columns=["A", "B", "C", "D"],
    )
    my_pipe.fit(df)

    df = pd.DataFrame(
        np.array(
            [
                [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                [0, 10, 20, 30, 41, 50, 60, 70, 80, 90],
                [0, 100, 200, 300, 400, 500, 601, 700, 800, 900],
                [0, 0, 0, 0, 0, 0, 0, 100, 0, 0],
            ]
        ).T,
        index=pd.date_range(start="2017-1-1", periods=10, freq="D"),
        columns=["A", "B", "C", "D"],
    )
    my_pipe.fit(df, skip_fit=["A-B-regression-ad", "A-C-regression-error"])
    assert reg_ab.coef_[0] == pytest.approx(10)
    assert reg_ac.coef_[0] == pytest.approx(100)
    assert my_pipe.steps["A-C-regression-ad"]["model"].abs_high_ == 0
    assert my_pipe.steps["A-C-regression-ad"]["model"].abs_low_ == 0

    my_pipe.fit(df, skip_fit=["A-B-regression-ad"])
    assert reg_ab.coef_[0] == pytest.approx(10)
    assert reg_ac.coef_[0] != pytest.approx(100)
    assert my_pipe.steps["A-C-regression-ad"]["model"].abs_high_ != 0
    assert my_pipe.steps["A-C-regression-ad"]["model"].abs_low_ != 0
Exemple #5
0
def test_pipenet_return_list_return_intermediate():
    """
    Test pipenet with return_list=True and return_intermediate=True
    """
    df = pd.DataFrame(
        np.array(
            [
                [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                [0, 10, 20, 30, 41, 50, 60, 70, 80, 90],
                [0, 100, 200, 300, 400, 500, 601, 700, 800, 900],
                [0, 0, 0, 0, 0, 0, 0, 100, 0, 0],
            ]
        ).T,
        index=pd.date_range(start="2017-1-1", periods=10, freq="D"),
        columns=["A", "B", "C", "D"],
    )

    reg_ab = LinearRegression()
    reg_ac = LinearRegression()
    my_pipe = Pipenet(
        {
            "A-B-regression-ad": {
                "model": detector.RegressionAD(regressor=reg_ab, target="B"),
                "input": "original",
                "subset": ["A", "B"],
            },
            "A-C-regression-error": {
                "model": transformer.RegressionResidual(
                    regressor=reg_ac, target="C"
                ),
                "input": "original",
                "subset": ["A", "C"],
            },
            "A-C-regression-ad": {
                "model": detector.InterQuartileRangeAD(),
                "input": "A-C-regression-error",
                "subset": "all",
            },
            "ABC-ad": {
                "model": aggregator.OrAggregator(),
                "input": ["A-B-regression-ad", "A-C-regression-ad"],
            },
            "D-ad": {
                "model": detector.QuantileAD(high=0.9, low=0.1),
                "input": "original",
                "subset": ["D"],
            },
            "ABCD-ad": {
                "model": aggregator.OrAggregator(),
                "input": ["ABC-ad", "D-ad"],
            },
        }
    )

    results = my_pipe.fit_detect(
        df, return_list=True, return_intermediate=True
    )
    assert set(results.keys()) == set(my_pipe.steps.keys()).union({"original"})
    assert results["A-B-regression-ad"] == [
        (
            pd.Timestamp("2017-01-05 00:00:00"),
            pd.Timestamp("2017-01-05 23:59:59.999999999"),
        )
    ]
    assert results["A-C-regression-ad"] == [
        (
            pd.Timestamp("2017-01-07 00:00:00"),
            pd.Timestamp("2017-01-07 23:59:59.999999999"),
        )
    ]
    assert results["ABC-ad"] == [
        (
            pd.Timestamp("2017-01-05 00:00:00"),
            pd.Timestamp("2017-01-05 23:59:59.999999999"),
        ),
        (
            pd.Timestamp("2017-01-07 00:00:00"),
            pd.Timestamp("2017-01-07 23:59:59.999999999"),
        ),
    ]
    assert results["D-ad"] == [
        (
            pd.Timestamp("2017-01-08 00:00:00"),
            pd.Timestamp("2017-01-08 23:59:59.999999999"),
        )
    ]
    assert results["ABCD-ad"] == [
        (
            pd.Timestamp("2017-01-05 00:00:00"),
            pd.Timestamp("2017-01-05 23:59:59.999999999"),
        ),
        (
            pd.Timestamp("2017-01-07 00:00:00"),
            pd.Timestamp("2017-01-08 23:59:59.999999999"),
        ),
    ]
Exemple #6
0
def test_pipenet_return_intermediate():
    """
    Test pipenet with return_intermediate=True
    """
    df = pd.DataFrame(
        np.array(
            [
                [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                [0, 10, 20, 30, 41, 50, 60, 70, 80, 90],
                [0, 100, 200, 300, 400, 500, 601, 700, 800, 900],
                [0, 0, 0, 0, 0, 0, 0, 100, 0, 0],
            ]
        ).T,
        index=pd.date_range(start="2017-1-1", periods=10, freq="D"),
        columns=["A", "B", "C", "D"],
    )

    reg_ab = LinearRegression()
    reg_ac = LinearRegression()
    my_pipe = Pipenet(
        {
            "A-B-regression-ad": {
                "model": detector.RegressionAD(regressor=reg_ab, target="B"),
                "input": "original",
                "subset": ["A", "B"],
            },
            "A-C-regression-error": {
                "model": transformer.RegressionResidual(
                    regressor=reg_ac, target="C"
                ),
                "input": "original",
                "subset": ["A", "C"],
            },
            "A-C-regression-ad": {
                "model": detector.InterQuartileRangeAD(),
                "input": "A-C-regression-error",
                "subset": "all",
            },
            "ABC-ad": {
                "model": aggregator.OrAggregator(),
                "input": ["A-B-regression-ad", "A-C-regression-ad"],
            },
            "D-ad": {
                "model": detector.QuantileAD(high=0.9, low=0.1),
                "input": "original",
                "subset": ["D"],
            },
            "ABCD-ad": {
                "model": aggregator.OrAggregator(),
                "input": ["ABC-ad", "D-ad"],
            },
        }
    )

    results = my_pipe.fit(df, return_intermediate=True)
    assert set(results.keys()) == set(my_pipe.steps.keys()).union({"original"})
    assert results["A-B-regression-ad"] is None
    assert results["A-C-regression-error"] is not None
    assert results["A-C-regression-ad"] is None
    assert results["ABC-ad"] is None
    assert results["D-ad"] is None
    assert results["ABCD-ad"] is None

    results = my_pipe.fit_detect(df, return_intermediate=True)
    assert set(results.keys()) == set(my_pipe.steps.keys()).union({"original"})
    pd.testing.assert_series_equal(
        results["A-B-regression-ad"],
        pd.Series([0, 0, 0, 0, 1, 0, 0, 0, 0, 0], index=df.index),
        check_dtype=False,
        check_names=False,
    )
    pd.testing.assert_series_equal(
        results["A-C-regression-ad"],
        pd.Series([0, 0, 0, 0, 0, 0, 1, 0, 0, 0], index=df.index),
        check_dtype=False,
        check_names=False,
    )
    pd.testing.assert_series_equal(
        results["ABC-ad"],
        pd.Series([0, 0, 0, 0, 1, 0, 1, 0, 0, 0], index=df.index),
        check_dtype=False,
        check_names=False,
    )
    pd.testing.assert_series_equal(
        results["D-ad"],
        pd.Series([0, 0, 0, 0, 0, 0, 0, 1, 0, 0], index=df.index),
        check_dtype=False,
        check_names=False,
    )
    pd.testing.assert_series_equal(
        results["ABCD-ad"],
        pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index),
        check_dtype=False,
        check_names=False,
    )
Exemple #7
0
def test_pipenet_default():
    """
    Test default setting of pipenet
    """
    df = pd.DataFrame(
        np.array(
            [
                [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                [0, 10, 20, 30, 41, 50, 60, 70, 80, 90],
                [0, 100, 200, 300, 400, 500, 601, 700, 800, 900],
                [0, 0, 0, 0, 0, 0, 0, 100, 0, 0],
            ]
        ).T,
        index=pd.date_range(start="2017-1-1", periods=10, freq="D"),
        columns=["A", "B", "C", "D"],
    )

    reg_ab = LinearRegression()
    reg_ac = LinearRegression()
    my_pipe = Pipenet(
        {
            "A-B-regression-ad": {
                "model": detector.RegressionAD(regressor=reg_ab, target="B"),
                "input": "original",
                "subset": ["A", "B"],
            },
            "A-C-regression-error": {
                "model": transformer.RegressionResidual(
                    regressor=reg_ac, target="C"
                ),
                "input": "original",
                "subset": ["A", "C"],
            },
            "A-C-regression-ad": {
                "model": detector.InterQuartileRangeAD(),
                "input": "A-C-regression-error",
                "subset": "all",
            },
            "ABC-ad": {
                "model": aggregator.OrAggregator(),
                "input": ["A-B-regression-ad", "A-C-regression-ad"],
            },
            "D-ad": {
                "model": detector.QuantileAD(high=0.9, low=0.1),
                "input": "original",
                "subset": ["D"],
            },
            "ABCD-ad": {
                "model": aggregator.OrAggregator(),
                "input": ["ABC-ad", "D-ad"],
            },
        }
    )

    anomaly = my_pipe.fit_detect(df)
    pd.testing.assert_series_equal(
        anomaly,
        pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index),
        check_dtype=False,
    )

    assert (
        my_pipe.score(
            df,
            pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index),
            scoring="recall",
        )
        == 1
    )
    assert (
        my_pipe.score(
            df,
            pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index),
            scoring="precision",
        )
        == 1
    )
    assert (
        my_pipe.score(
            df,
            pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index),
            scoring="iou",
        )
        == 1
    )
    assert (
        my_pipe.score(
            df,
            pd.Series([0, 0, 0, 0, 1, 0, 1, 1, 0, 0], index=df.index),
            scoring="f1",
        )
        == 1
    )