Esempio n. 1
0
def load_binary_clf_tracks() -> typing.List[Track]:
    """Return binary classification tracks."""

    return [
        Track(
            name="Phishing",
            dataset=datasets.Phishing(),
            metric=metrics.Accuracy() + metrics.F1(),
        ),
        Track(
            name="Bananas",
            dataset=datasets.Bananas(),
            metric=metrics.Accuracy() + metrics.F1(),
        ),
    ]
Esempio n. 2
0
def test_perceptron_sklearn_coherence():
    """Checks that the sklearn and river implementations produce the same results."""

    ss = preprocessing.StandardScaler()
    cr = lm.Perceptron()
    sk = sklm.Perceptron()

    for x, y in datasets.Bananas():
        x = ss.learn_one(x).transform_one(x)
        cr.learn_one(x, y)
        sk.partial_fit([list(x.values())], [y], classes=[False, True])

    for i, w in enumerate(cr.weights.values()):
        assert math.isclose(w, sk.coef_[0][i])

    assert math.isclose(cr.intercept, sk.intercept_[0])
Esempio n. 3
0
def test_log_reg_sklearn_coherence(river_params, sklearn_params):
    """Checks that the sklearn and river implementations produce the same results."""

    ss = preprocessing.StandardScaler()
    rv = lm.LogisticRegression(**river_params)
    sk = sklm.SGDClassifier(**sklearn_params)

    for x, y in datasets.Bananas().take(100):
        x = ss.learn_one(x).transform_one(x)
        rv.learn_one(x, y)
        sk.partial_fit([list(x.values())], [y], classes=[False, True])

    for i, w in enumerate(rv.weights.values()):
        assert math.isclose(w, sk.coef_[0][i])

    assert math.isclose(rv.intercept, sk.intercept_[0])
Esempio n. 4
0
def test_log_reg_sklearn_coherence():
    """Checks that the sklearn and river implementations produce the same results."""

    ss = preprocessing.StandardScaler()
    cr = lm.LogisticRegression(optimizer=optim.SGD(.01))
    sk = sklm.SGDClassifier(learning_rate='constant',
                            eta0=.01,
                            alpha=.0,
                            loss='log')

    for x, y in datasets.Bananas():
        x = ss.learn_one(x).transform_one(x)
        cr.learn_one(x, y)
        sk.partial_fit([list(x.values())], [y], classes=[False, True])

    for i, w in enumerate(cr.weights.values()):
        assert math.isclose(w, sk.coef_[0][i])

    assert math.isclose(cr.intercept, sk.intercept_[0])
Esempio n. 5
0
        p = {j: random.gauss(0, 1) for j in keys}
        norm = utils.math.norm(p, order=2)
        for j in p:
            p[j] /= norm
        yield p


@pytest.mark.parametrize(
    'lm, dataset',
    [
        pytest.param(lm(
            optimizer=copy.deepcopy(optimizer), initializer=initializer, l2=0),
                     dataset,
                     id=f'{lm.__name__} - {optimizer} - {initializer}')
        for lm, dataset in [(lm.LinearRegression, datasets.TrumpApproval()
                             ), (lm.LogisticRegression, datasets.Bananas())]
        for optimizer, initializer in itertools.product(
            [
                optim.AdaBound(),
                optim.AdaDelta(),
                optim.AdaGrad(),
                optim.AdaMax(),
                optim.Adam(),
                optim.AMSGrad(),
                # TODO: check momentum optimizers
                # optim.Momentum(),
                # optim.NesterovMomentum(),
                optim.RMSProp(),
                optim.SGD()
            ],
            [