def test_ignore_bad_data():
    """
    There might be some "bad data" drawn. For example, when you quickly hit double-click you might
    draw a line instead of a poly. Bokeh is "okeh" with it, but our point-in-poly algorithm is not.
    """
    data = [
        {
            "chart_id": "9ec8e755-2",
            "x": "bill_length_mm",
            "y": "bill_depth_mm",
            "polygons": {
                "Adelie": {"bill_length_mm": [], "bill_depth_mm": []},
                "Gentoo": {"bill_length_mm": [], "bill_depth_mm": []},
                "Chinstrap": {"bill_length_mm": [], "bill_depth_mm": []},
            },
        },
        {
            "chart_id": "11640372-c",
            "x": "flipper_length_mm",
            "y": "body_mass_g",
            "polygons": {
                "Adelie": {
                    "flipper_length_mm": [[214.43261376806052, 256.2612913545137]],
                    "body_mass_g": [[3950.9482324534456, 3859.9137496948247]],
                },
                "Gentoo": {"flipper_length_mm": [], "body_mass_g": []},
                "Chinstrap": {"flipper_length_mm": [], "body_mass_g": []},
            },
        },
    ]

    clf = InteractiveOutlierDetector(json_desc=data)
    assert len(list(clf.poly_data)) == 0
Beispiel #2
0
def test_estimator_checks(test_fn):
    """
    We're skipping a lot of tests here mainly because this model is "bespoke"
    it is *not* general. Therefore a lot of assumptions are broken.
    """
    clf = InteractiveOutlierDetector.from_json(
        "tests/test_classification/demo-data.json")
    test_fn(InteractiveOutlierDetector, clf)
Beispiel #3
0
def test_base_predict_usecase():
    clf = InteractiveOutlierDetector.from_json(
        "tests/test_classification/demo-data.json")
    df = load_penguins(as_frame=True).dropna()
    X, y = df.drop(columns=["species"]), df["species"]

    preds = clf.fit(X, y).predict(X)

    assert preds.shape[0] == df.shape[0]
Beispiel #4
0
def test_grid_predict():
    clf = InteractiveOutlierDetector.from_json(
        "tests/test_classification/demo-data.json")
    pipe = Pipeline([
        ("id", PipeTransformer(identity)),
        ("mod", clf),
    ])
    grid = GridSearchCV(
        pipe,
        cv=5,
        param_grid={},
        scoring={"acc": make_scorer(accuracy_score)},
        refit="acc",
    )
    df = load_penguins(as_frame=True).dropna()
    X = df.drop(columns=["species", "island", "sex"])
    y = (np.random.random(df.shape[0]) < 0.1).astype(int)

    preds = grid.fit(X, y).predict(X)
    assert preds.shape[0] == df.shape[0]