Esempio n. 1
0
def test_online_batch_consistent():

    # Batch

    batch = (
        preprocessing.StandardScaler() |
        multiclass.OneVsRestClassifier(
            linear_model.LogisticRegression()
        )
    )

    dataset = datasets.ImageSegments()

    batch_metric = metrics.MacroF1()

    for i, x in enumerate(pd.read_csv(dataset.path, chunksize=1)):
        y = x.pop('category')
        y_pred = batch.predict_many(x)
        batch.fit_many(x, y)

        for yt, yp in zip(y, y_pred):
            if yp is not None:
                batch_metric.update(yt, yp)

        if i == 30:
            break

    # Online

    online = (
        preprocessing.StandardScaler() |
        multiclass.OneVsRestClassifier(
            linear_model.LogisticRegression()
        )
    )

    online_metric = metrics.MacroF1()

    X = pd.read_csv(dataset.path)
    Y = X.pop('category')

    for i, (x, y) in enumerate(stream.iter_pandas(X, Y)):
        y_pred = online.predict_one(x)
        online.fit_one(x, y)

        if y_pred is not None:
            online_metric.update(y, y_pred)

        if i == 30:
            break

    assert online_metric.get() == batch_metric.get()
 def build_oracle(self) -> compose.Pipeline:
     model = compose.Pipeline(
         ('scale', preprocessing.StandardScaler()),
         ('learn',
          multiclass.OneVsRestClassifier(
              binary_classifier=linear_model.LogisticRegression())))
     return model
Esempio n. 3
0
def test_phishing(client, app):

    r = client.post('/api/init', json={'flavor': 'binary'})

    model = preprocessing.StandardScaler() | linear_model.LogisticRegression()
    client.post('/api/model', data=pickle.dumps(model))

    for i, (x, y) in enumerate(datasets.Phishing().take(30)):

        # Predict/learn via chantilly
        r = client.post('/api/predict',
                        data=json.dumps({
                            'id': i,
                            'features': x
                        }),
                        content_type='application/json')

        client.post('/api/learn',
                    data=json.dumps({
                        'id': i,
                        'ground_truth': y
                    }),
                    content_type='application/json')

        # Predict/learn directly via creme
        y_pred = model.predict_proba_one(x)
        model.fit_one(x, y)

        # Compare the predictions from both sides
        assert math.isclose(y_pred[True], r.json['prediction']['true'])
Esempio n. 4
0
def test_phishing_without_id(client, app):

    r = client.post('/api/init', json={'flavor': 'binary'})

    model = preprocessing.StandardScaler() | linear_model.LogisticRegression()
    client.post('/api/model', data=pickle.dumps(model))

    for x, y in datasets.Phishing().take(30):

        # Predict/learn via chantilly
        r = client.post('/api/predict',
                        data=json.dumps({'features': x}),
                        content_type='application/json')
        client.post('/api/learn',
                    data=json.dumps({
                        'features': x,
                        'ground_truth': y
                    }),
                    content_type='application/json')

        # Predict/learn directly via creme
        y_pred = model.predict_proba_one(x)

        # Because no ID is provided, chantilly will ask the model to make a prediction a second
        # time in order to update the metric
        model.predict_proba_one(x)

        model.fit_one(x, y)

        # Compare the predictions from both sides
        assert math.isclose(y_pred[True], r.json['prediction']['true'])
Esempio n. 5
0
def test_log_reg_sklearn_coherence():
    """Checks that the sklearn and creme implementations produce the same results."""

    ss = preprocessing.StandardScaler()
    cr = lm.LogisticRegression(optimizer=optim.SGD(.01))
    sk = sklm.SGDClassifier(learning_rate='constant', eta0=.01, alpha=.0, loss='log')

    for x, y in datasets.Bananas():
        x = ss.fit_one(x).transform_one(x)
        cr.fit_one(x, y)
        sk.partial_fit([list(x.values())], [y], classes=[False, True])

    for i, w in enumerate(cr.weights.values()):
        assert math.isclose(w, sk.coef_[0][i])

    assert math.isclose(cr.intercept, sk.intercept_[0])
Esempio n. 6
0
 def _default_params(cls):
     return {
         'classifier': linear_model.LogisticRegression(),
         'code_size': 6
     }
Esempio n. 7
0
def get_all_estimators():

    ignored = (CremeBaseWrapper, SKLBaseWrapper, base.Wrapper,
               compose.FuncTransformer, ensemble.StackingBinaryClassifier,
               feature_extraction.Agg, feature_extraction.TargetAgg,
               feature_extraction.Differ, linear_model.FMRegressor,
               linear_model.SoftmaxRegression, multioutput.ClassifierChain,
               multioutput.RegressorChain, naive_bayes.BernoulliNB,
               naive_bayes.ComplementNB, preprocessing.OneHotEncoder,
               tree.DecisionTreeClassifier)

    def is_estimator(obj):
        return inspect.isclass(obj) and issubclass(obj, base.Estimator)

    for submodule in importlib.import_module('creme').__all__:

        if submodule == 'base':
            continue

        for name, obj in inspect.getmembers(
                importlib.import_module(f'creme.{submodule}'), is_estimator):

            if issubclass(obj, ignored):
                continue

            if issubclass(obj, dummy.StatisticRegressor):
                inst = obj(statistic=stats.Mean())

            elif issubclass(obj, ensemble.BaggingClassifier):
                inst = obj(linear_model.LogisticRegression())

            elif issubclass(obj, ensemble.BaggingRegressor):
                inst = obj(linear_model.LinearRegression())

            elif issubclass(obj, ensemble.HedgeRegressor):
                inst = obj([
                    preprocessing.StandardScaler()
                    | linear_model.LinearRegression(intercept_lr=0.1),
                    preprocessing.StandardScaler()
                    | linear_model.PARegressor(),
                ])

            elif issubclass(obj, feature_selection.RandomDiscarder):
                inst = obj(n_to_keep=5)

            elif issubclass(obj, feature_selection.SelectKBest):
                inst = obj(similarity=stats.PearsonCorrelation())

            elif issubclass(obj, linear_model.LinearRegression):
                inst = preprocessing.StandardScaler() | obj(intercept_lr=0.1)

            elif issubclass(obj, linear_model.PARegressor):
                inst = preprocessing.StandardScaler() | obj()

            elif issubclass(obj, multiclass.OneVsRestClassifier):
                inst = obj(binary_classifier=linear_model.LogisticRegression())

            else:
                inst = obj()

            yield inst
Esempio n. 8
0
                inst = preprocessing.StandardScaler() | obj()

            elif issubclass(obj, multiclass.OneVsRestClassifier):
                inst = obj(binary_classifier=linear_model.LogisticRegression())

            else:
                inst = obj()

            yield inst


@pytest.mark.parametrize('estimator', [
    pytest.param(copy.deepcopy(estimator), id=str(estimator))
    for estimator in list(get_all_estimators()) + [
        feature_extraction.TFIDF(),
        linear_model.LogisticRegression(),
        preprocessing.StandardScaler() | linear_model.LinearRegression(),
        preprocessing.StandardScaler() | linear_model.PAClassifier(),
        preprocessing.StandardScaler() | multiclass.OneVsRestClassifier(linear_model.LogisticRegression()),
        preprocessing.StandardScaler() | multiclass.OneVsRestClassifier(linear_model.PAClassifier()),
        naive_bayes.GaussianNB(),
        preprocessing.StandardScaler(),
        cluster.KMeans(n_clusters=5, seed=42),
        preprocessing.MinMaxScaler(),
        preprocessing.MinMaxScaler() + preprocessing.StandardScaler(),
        preprocessing.PolynomialExtender(),
        feature_selection.VarianceThreshold(),
        feature_selection.SelectKBest(similarity=stats.PearsonCorrelation())
    ]
])
def test_check_estimator(estimator):
Esempio n. 9
0
 def _default_params(cls):
     return {'model': linear_model.LogisticRegression()}
Esempio n. 10
0
]


@pytest.mark.parametrize('estimator', [
    pytest.param(copy.deepcopy(estimator), id=str(estimator))
    for estimator in ESTIMATORS
])
def test_sklearn_check_estimator(estimator):
    estimator_checks.check_estimator(
        compat.convert_creme_to_sklearn(estimator))


@pytest.mark.parametrize(
    'estimator',
    [
        pytest.param(copy.deepcopy(estimator), id=str(estimator))
        for estimator in ESTIMATORS + [
            # sklearn's check_estimator doesn't binary classifiers yet
            linear_model.LogisticRegression(),
            # sklearn's check_estimator doesn't support pipelines yet
            preprocessing.StandardScaler() | linear_model.LinearRegression(),
            preprocessing.StandardScaler() | linear_model.PAClassifier(),
            preprocessing.StandardScaler() | multiclass.OneVsRestClassifier(
                linear_model.LogisticRegression()),
            preprocessing.StandardScaler()
            | multiclass.OneVsRestClassifier(linear_model.PAClassifier()),
        ]
    ])
def test_creme_check_estimator(estimator):
    utils.check_estimator(estimator)
Esempio n. 11
0
def main():

    benchmark.benchmark(
        get_X_y=datasets.fetch_electricity,
        n=45312,
        get_pp=preprocessing.StandardScaler,
        models=[
            # ('No-change', 'No-change', dummy.NoChangeClassifier()),
            ('creme', 'Logistic regression',
             linear_model.LogisticRegression(optimizer=optim.VanillaSGD(0.05),
                                             l2=0,
                                             intercept_lr=0.05)),
            # ('creme', 'PA-I', linear_model.PAClassifier(C=1, mode=1)),
            # ('creme', 'PA-II', linear_model.PAClassifier(C=1, mode=2)),
            ('sklearn', 'Logistic regression',
             compat.CremeClassifierWrapper(
                 sklearn_estimator=sk_linear_model.SGDClassifier(
                     loss='log',
                     learning_rate='constant',
                     eta0=0.05,
                     penalty='none'),
                 classes=[False, True])),
            # ('sklearn', 'PA-I', compat.CremeClassifierWrapper(
            #     sklearn_estimator=sk_linear_model.PassiveAggressiveClassifier(
            #         C=1,
            #         loss='hinge'
            #     ),
            #     classes=[False, True]
            # )),
            # ('sklearn', 'PA-II', compat.CremeClassifierWrapper(
            #     sklearn_estimator=sk_linear_model.PassiveAggressiveClassifier(
            #         C=1,
            #         loss='squared_hinge'
            #     ),
            #     classes=[False, True]
            # )),

            # ('sklearn', 'Logistic regression NI', compat.CremeClassifierWrapper(
            #     sklearn_estimator=sk_linear_model.SGDClassifier(
            #         loss='log',
            #         learning_rate='constant',
            #         eta0=0.01,
            #         fit_intercept=True,
            #         penalty='none'
            #     ),
            #     classes=[False, True]
            # )),
            # ('sklearn', 'PA-I NI', compat.CremeClassifierWrapper(
            #     sklearn_estimator=sk_linear_model.PassiveAggressiveClassifier(
            #         C=1,
            #         loss='hinge',
            #         fit_intercept=False
            #     ),
            #     classes=[False, True]
            # )),
            # ('sklearn', 'PA-II NI', compat.CremeClassifierWrapper(
            #     sklearn_estimator=sk_linear_model.PassiveAggressiveClassifier(
            #         C=1,
            #         loss='squared_hinge',
            #         fit_intercept=False
            #     ),
            #     classes=[False, True]
            # )),
        ],
        get_metric=metrics.Accuracy)
Esempio n. 12
0
     preprocessing.StandardScaler(),
     compat.CremeClassifierWrapper(
         sklearn_estimator=PassiveAggressiveClassifier(),
         classes=[False, True]
     )
 ]),
 'No-change classifier': dummy.NoChangeClassifier(),
 'Passive-aggressive II': compose.Pipeline([
     preprocessing.StandardScaler(),
     linear_model.PAClassifier(C=1, mode=2)
 ]),
 'Logistic regression w/ VanillaSGD': compose.Pipeline([
     preprocessing.StandardScaler(),
     linear_model.LogisticRegression(
         optimizer=optim.VanillaSGD(
             lr=optim.OptimalLR()
         )
     )
 ]),
 'Logistic regression w/ Adam': compose.Pipeline([
     preprocessing.StandardScaler(),
     linear_model.LogisticRegression(optim.Adam(optim.OptimalLR()))
 ]),
 'Logistic regression w/ AdaGrad': compose.Pipeline([
     preprocessing.StandardScaler(),
     linear_model.LogisticRegression(optim.AdaGrad(optim.OptimalLR()))
 ]),
 'Logistic regression w/ RMSProp': compose.Pipeline([
     preprocessing.StandardScaler(),
     linear_model.LogisticRegression(optim.RMSProp(optim.OptimalLR()))
 ])
Esempio n. 13
0
elif (opt == "Adam"):
    optimizer = optim.Adam(lr, beta_1, beta_2, eps)
elif (opt == "FTRLProximal"):
    optimizer = optim.FTRLProximal(alpha, beta, l1, l2)
elif (opt == "Momentum"):
    optimizer = optim.Momentum(lr, rho)
elif (opt == "RMSProp"):
    optimizer = optim.RMSProp(lr, rho, eps)
elif (opt == "VanillaSGD"):
    optimizer = optim.VanillaSGD(lr)
elif (opt == "NesterovMomentum"):
    optimizer = optim.NesterovMomentum(lr, rho)
else:
    optimizer = None

log_reg = linear_model.LogisticRegression(optimizer, l2=l2)
OVRClassifier = multiclass.OneVsRestClassifier(log_reg)

output = {}

while True:

    #wait request
    data = input()

    Xi = json.loads(data)
    y = float(Xi.pop(target))

    output["Predict"] = OVRClassifier.predict_one(Xi)
    output["Truth"] = y
Esempio n. 14
0
def main():

    benchmark.benchmark(
        get_X_y=functools.partial(stream.iter_sklearn_dataset, datasets.load_breast_cancer()),
        n=569,
        get_pp=preprocessing.StandardScaler,
        models=[
            ('creme', 'Log reg', linear_model.LogisticRegression(
                optimizer=optim.VanillaSGD(0.01),
                l2=0,
                intercept_lr=0.01
            )),
            ('sklearn', 'SGD', compat.CremeClassifierWrapper(
                sklearn_estimator=sk_linear_model.SGDClassifier(
                    loss='log',
                    learning_rate='constant',
                    eta0=0.01,
                    penalty='none'
                ),
                classes=[False, True]
            )),

            ('creme', 'PA-I', linear_model.PAClassifier(
                C=0.01,
                mode=1,
                fit_intercept=True
            )),
            ('sklearn', 'PA-I', compat.CremeClassifierWrapper(
                sklearn_estimator=sk_linear_model.PassiveAggressiveClassifier(
                    C=0.01,
                    loss='hinge',
                    fit_intercept=True
                ),
                classes=[False, True]
            )),

            # ('creme', 'PA-I', linear_model.PAClassifier(C=1, mode=1)),
            # ('creme', 'PA-II', linear_model.PAClassifier(C=1, mode=2)),
            # ('sklearn', 'PA-I', compat.CremeClassifierWrapper(
            #     sklearn_estimator=sk_linear_model.PassiveAggressiveClassifier(
            #         C=1,
            #         loss='hinge'
            #     ),
            #     classes=[False, True]
            # )),
            # ('sklearn', 'PA-II', compat.CremeClassifierWrapper(
            #     sklearn_estimator=sk_linear_model.PassiveAggressiveClassifier(
            #         C=1,
            #         loss='squared_hinge'
            #     ),
            #     classes=[False, True]
            # )),

            # ('sklearn', 'Logistic regression NI', compat.CremeClassifierWrapper(
            #     sklearn_estimator=sk_linear_model.SGDClassifier(
            #         loss='log',
            #         learning_rate='constant',
            #         eta0=0.01,
            #         fit_intercept=True,
            #         penalty='none'
            #     ),
            #     classes=[False, True]
            # )),
            # ('sklearn', 'PA-I NI', compat.CremeClassifierWrapper(
            #     sklearn_estimator=sk_linear_model.PassiveAggressiveClassifier(
            #         C=1,
            #         loss='hinge',
            #         fit_intercept=False
            #     ),
            #     classes=[False, True]
            # )),
            # ('sklearn', 'PA-II NI', compat.CremeClassifierWrapper(
            #     sklearn_estimator=sk_linear_model.PassiveAggressiveClassifier(
            #         C=1,
            #         loss='squared_hinge',
            #         fit_intercept=False
            #     ),
            #     classes=[False, True]
            # )),
        ],
        get_metric=metrics.Accuracy
    )