def test_grid_predict_usecase(): clf = InteractiveClassifier.from_json( "tests/test_classification/demo-data.json") pipe = Pipeline([ ("id", PipeTransformer(identity)), ("mod", clf), ]) grid = GridSearchCV(pipe, cv=5, param_grid={}) df = load_penguins(as_frame=True).dropna() X, y = df.drop(columns=["species", "island", "sex"]), df["species"] preds = grid.fit(X, y).predict_proba(X) assert preds.shape[0] == df.shape[0] assert preds.shape[1] == 3
def test_grid_predict_usecase(): tfm = InteractivePreprocessor.from_json( "tests/test_classification/demo-data.json") pipe = Pipeline([ ( "features", FeatureUnion([("original", PipeTransformer(identity)), ("new_feats", tfm)]), ), ]) df = load_penguins(as_frame=True).dropna() X, y = df.drop(columns=["species", "island", "sex"]), df["species"] preds = pipe.fit(X, y).transform(X) assert preds.shape[0] == df.shape[0] assert preds.shape[1] == X.shape[1] + 3
def test_grid_predict(): clf = InteractiveOutlierDetector.from_json( "tests/test_classification/demo-data.json") pipe = Pipeline([ ("id", PipeTransformer(identity)), ("mod", clf), ]) grid = GridSearchCV( pipe, cv=5, param_grid={}, scoring={"acc": make_scorer(accuracy_score)}, refit="acc", ) df = load_penguins(as_frame=True).dropna() X = df.drop(columns=["species", "island", "sex"]) y = (np.random.random(df.shape[0]) < 0.1).astype(int) preds = grid.fit(X, y).predict(X) assert preds.shape[0] == df.shape[0]
def test_works_with_pipeline_gridsearch(random_xy_dataset_clf): X, y = random_xy_dataset_clf pipe = Pipeline([("pipe", PipeTransformer(func=double, factor=1)), ("mod", GaussianNB())]) grid = GridSearchCV(pipe, cv=2, param_grid={"pipe__factor": [1, 2, 3]}) grid.fit(X, y).predict(X)
def test_basic_example(factor): np.random.seed(42) X = np.random.normal(0, 1, (1000, 4)) tfm = PipeTransformer(func=double, factor=factor) X_tfm = tfm.fit_transform(X) assert np.all(np.isclose(X * factor, X_tfm))
def test_estimator_checks(test_fn): clf = PipeTransformer(func=double) test_fn(PipeTransformer.__name__, clf)
def test_works_with_partial_fit(random_xy_dataset_clf): X, y = random_xy_dataset_clf pipe = PipeTransformer(func=double, factor=2) assert np.all(np.isclose(pipe.partial_fit(X, y).transform(X), X * 2))