Beispiel #1
0
def test_online_batch_consistent():

    # Batch

    batch = preprocessing.StandardScaler() | multiclass.OneVsRestClassifier(
        linear_model.LogisticRegression())

    dataset = datasets.ImageSegments()

    batch_metric = metrics.MacroF1()

    for i, x in enumerate(pd.read_csv(dataset.path, chunksize=1)):
        y = x.pop("category")
        y_pred = batch.predict_many(x)
        batch.learn_many(x, y)

        for yt, yp in zip(y, y_pred):
            if yp is not None:
                batch_metric.update(yt, yp)

        if i == 30:
            break

    # Online

    online = preprocessing.StandardScaler() | multiclass.OneVsRestClassifier(
        linear_model.LogisticRegression())

    online_metric = metrics.MacroF1()

    X = pd.read_csv(dataset.path)
    Y = X.pop("category")

    for i, (x, y) in enumerate(stream.iter_pandas(X, Y)):
        y_pred = online.predict_one(x)
        online.learn_one(x, y)

        if y_pred is not None:
            online_metric.update(y, y_pred)

        if i == 30:
            break

    assert online_metric.get() == batch_metric.get()
Beispiel #2
0
                continue
            params = obj._unit_test_params()
            yield obj(**params)


@pytest.mark.parametrize(
    "estimator, check",
    [
        pytest.param(estimator, check, id=f"{estimator}:{check.__name__}")
        for estimator in list(get_all_estimators()) + [
            feature_extraction.TFIDF(),
            linear_model.LogisticRegression(),
            preprocessing.StandardScaler() | linear_model.LinearRegression(),
            preprocessing.StandardScaler() | linear_model.PAClassifier(),
            (preprocessing.StandardScaler()
             | multiclass.OneVsRestClassifier(
                 linear_model.LogisticRegression())),
            (preprocessing.StandardScaler()
             | multiclass.OneVsRestClassifier(linear_model.PAClassifier())),
            naive_bayes.GaussianNB(),
            preprocessing.StandardScaler(),
            cluster.KMeans(n_clusters=5, seed=42),
            preprocessing.MinMaxScaler(),
            preprocessing.MinMaxScaler() + preprocessing.StandardScaler(),
            feature_extraction.PolynomialExtender(),
            (feature_extraction.PolynomialExtender()
             | preprocessing.StandardScaler()
             | linear_model.LinearRegression()),
            feature_selection.VarianceThreshold(),
            feature_selection.SelectKBest(similarity=stats.PearsonCorr()),
        ] for check in utils.estimator_checks.yield_checks(estimator)
        if check.__name__ not in estimator._unit_test_skips()