Exemple #1
0
def test_online_batch_consistent():

    # Batch

    batch = (
        preprocessing.StandardScaler() |
        multiclass.OneVsRestClassifier(
            linear_model.LogisticRegression()
        )
    )

    dataset = datasets.ImageSegments()

    batch_metric = metrics.MacroF1()

    for i, x in enumerate(pd.read_csv(dataset.path, chunksize=1)):
        y = x.pop('category')
        y_pred = batch.predict_many(x)
        batch.fit_many(x, y)

        for yt, yp in zip(y, y_pred):
            if yp is not None:
                batch_metric.update(yt, yp)

        if i == 30:
            break

    # Online

    online = (
        preprocessing.StandardScaler() |
        multiclass.OneVsRestClassifier(
            linear_model.LogisticRegression()
        )
    )

    online_metric = metrics.MacroF1()

    X = pd.read_csv(dataset.path)
    Y = X.pop('category')

    for i, (x, y) in enumerate(stream.iter_pandas(X, Y)):
        y_pred = online.predict_one(x)
        online.fit_one(x, y)

        if y_pred is not None:
            online_metric.update(y, y_pred)

        if i == 30:
            break

    assert online_metric.get() == batch_metric.get()
Exemple #2
0
def yield_datasets(model):

    from creme import base
    from creme import datasets
    from creme import stream
    from sklearn import datasets as sk_datasets

    model = guess_model(model)

    if isinstance(model, (base.BinaryClassifier, base.MultiClassifier)):
        yield datasets.Phishing()
    if isinstance(model, base.MultiClassifier):
        yield datasets.ImageSegments().take(500)
    if isinstance(model, base.Regressor):
        yield datasets.TrumpApproval()
    if isinstance(model, base.MultiOutputRegressor):
        yield stream.iter_sklearn_dataset(sk_datasets.load_linnerud())
    if isinstance(model, base.MultiOutputClassifier):
        yield datasets.Music()
Exemple #3
0
def yield_datasets(model):

    from creme import base
    from creme import compose
    from creme import datasets
    from creme import preprocessing
    from creme import stream
    from sklearn import datasets as sk_datasets

    model = guess_model(model)

    # Classification
    if isinstance(model, (base.BinaryClassifier, base.MultiClassifier)):
        yield datasets.Phishing()

    # Multi-class classification
    if isinstance(model, base.MultiClassifier):
        yield datasets.ImageSegments().take(500)

    # Regression
    if isinstance(model, base.Regressor):
        yield datasets.TrumpApproval()

    # Multi-output regression
    if isinstance(model, base.MultiOutputRegressor):

        # 1
        yield stream.iter_sklearn_dataset(sk_datasets.load_linnerud())

        # 2
        class SolarFlare:
            """One-hot encoded version of `datasets.SolarFlare`."""
            def __iter__(self):
                oh = (compose.SelectType(str) |
                      preprocessing.OneHotEncoder()) + compose.SelectType(int)
                for x, y in datasets.SolarFlare():
                    yield oh.transform_one(x), y

        yield SolarFlare()

    # Multi-output classification
    if isinstance(model, base.MultiOutputClassifier):
        yield datasets.Music()
Exemple #4
0
def yield_datasets(model):

    from creme import base
    from creme import datasets
    from creme import stream
    from sklearn import datasets as sk_datasets

    model = guess_model(model)

    if isinstance(model, (base.BinaryClassifier, base.MultiClassifier)):
        yield datasets.Phishing()
    if isinstance(model, base.MultiClassifier):
        yield datasets.ImageSegments().take(500)
    if isinstance(model, base.Regressor):
        yield datasets.TrumpApproval()
    if isinstance(model, base.MultiOutputRegressor):
        yield stream.iter_sklearn_dataset(sk_datasets.load_linnerud())
    if isinstance(model, base.MultiOutputClassifier):
        yeast = stream.iter_sklearn_dataset(sk_datasets.fetch_openml('yeast', version=4))
        yield itertools.islice(yeast, 100)
Exemple #5
0
def yield_datasets(model):

    from creme import compose
    from creme import datasets
    from creme import preprocessing
    from creme import stream
    from creme import utils
    from sklearn import datasets as sk_datasets

    # Classification
    if utils.inspect.isclassifier(model):
        yield datasets.Phishing()

        # Multi-class classification
        if model._multiclass:
            yield datasets.ImageSegments().take(500)

    # Regression
    if utils.inspect.isregressor(model):
        yield datasets.TrumpApproval()

    # Multi-output regression
    if utils.inspect.ismoregressor(model):

        # 1
        yield stream.iter_sklearn_dataset(sk_datasets.load_linnerud())

        # 2
        class SolarFlare:
            """One-hot encoded version of `datasets.SolarFlare`."""
            def __iter__(self):
                oh = (compose.SelectType(str) | preprocessing.OneHotEncoder()) + compose.SelectType(int)
                for x, y in datasets.SolarFlare():
                    yield oh.transform_one(x), y
        yield SolarFlare()

    # Multi-output classification
    if utils.inspect.ismoclassifier(model):
        yield datasets.Music()