Example #1
0
def test_ogdlr_grad_numerically(alr):
    epsilon = 1e-6
    clf = OGDLR(alr_schedule=alr)
    clf.fit(X[:100], y[:100])
    for xx, yy in zip(X[:10], y[:10]):
        grad, grad_num = clf.numerical_grad(xx, yy, epsilon)
        assert np.allclose(grad, grad_num, atol=epsilon)
Example #2
0
def test_ogdlr_grad_numerically_l2(lambda2):
    epsilon = 1e-6
    clf = OGDLR(lambda2=lambda2)
    clf.fit(X[:100], y[:100])
    for xx, yy in zip(X[:10], y[:10]):
        grad, grad_num = clf.numerical_grad(xx, yy, epsilon)
        assert np.allclose(grad, grad_num, atol=epsilon)
Example #3
0
def test_fit_dont_skip_1():
    # if all is skipped, the call count should equal the number of 1's
    # in y
    with patch('FTRLprox.models.OGDLR._update') as update:
        clf = OGDLR()
        clf.fit(X, y, class_weight=0.)
        assert update.call_count == sum(y)
Example #4
0
def test_fit_dont_skip_1():
    # if all is skipped, the call count should equal the number of 1's
    # in y
    with patch('FTRLprox.models.OGDLR._update') as update:
        clf = OGDLR()
        clf.fit(X, y, class_weight=0.)
        assert update.call_count == sum(y)
Example #5
0
def test_ogdlr_grad_numerically_l2(lambda2):
    epsilon = 1e-6
    clf = OGDLR(lambda2=lambda2)
    clf.fit(X[:100], y[:100])
    for xx, yy in zip(X[:10], y[:10]):
        grad, grad_num = clf.numerical_grad(xx, yy, epsilon)
        assert np.allclose(grad, grad_num, atol=epsilon)
Example #6
0
def test_ogdlr_grad_numerically(alr):
    epsilon = 1e-6
    clf = OGDLR(alr_schedule=alr)
    clf.fit(X[:100], y[:100])
    for xx, yy in zip(X[:10], y[:10]):
        grad, grad_num = clf.numerical_grad(xx, yy, epsilon)
        assert np.allclose(grad, grad_num, atol=epsilon)
Example #7
0
def test_callback_count(callback_period):
    mock_cb = MagicMock()
    mock_cb.plot = MagicMock(return_value=0)
    clf = OGDLR(callback=mock_cb, callback_period=callback_period)
    clf.fit(X, y)
    expected = (N - 1) // callback_period
    result = mock_cb.plot.call_count
    assert result == expected
Example #8
0
def test_fit_skip_sample(skip_list, count):
    with patch('FTRLprox.models.OGDLR._get_skip_sample') as skip:
        skip.return_value = skip_list
        with patch('FTRLprox.models.OGDLR._update') as update:
            n_samples = len(skip_list)
            clf = OGDLR()
            clf.fit(X[:n_samples], y[:n_samples])
            assert update.call_count == count
Example #9
0
def test_callback_count(callback_period):
    mock_cb = MagicMock()
    mock_cb.plot = MagicMock(return_value=0)
    clf = OGDLR(callback=mock_cb, callback_period=callback_period)
    clf.fit(X, y)
    expected = (N - 1) // callback_period
    result = mock_cb.plot.call_count
    assert result == expected
Example #10
0
def test_fit_skip_sample(skip_list, count):
    with patch('FTRLprox.models.OGDLR._get_skip_sample') as skip:
        skip.return_value = skip_list
        with patch('FTRLprox.models.OGDLR._update') as update:
            n_samples = len(skip_list)
            clf = OGDLR()
            clf.fit(X[:n_samples], y[:n_samples])
            assert update.call_count == count
Example #11
0
def test_effect_of_beta():
    # higher beta should lead to slower learning, thus lower weights
    mean_abs_weights = []
    for beta in [10 ** n for n in range(7)]:
        clf = OGDLR(beta=beta)
        clf.fit(X[:100], y[:100])
        mean_abs_weights.append(np.abs(clf.weights()).mean())
    assert np.allclose(mean_abs_weights[-1], 0, atol=1e-6)
    assert all(np.diff(mean_abs_weights) < 0)
Example #12
0
def test_effect_of_beta():
    # higher beta should lead to slower learning, thus lower weights
    mean_abs_weights = []
    for beta in [10**n for n in range(7)]:
        clf = OGDLR(beta=beta)
        clf.fit(X[:100], y[:100])
        mean_abs_weights.append(np.abs(clf.weights()).mean())
    assert np.allclose(mean_abs_weights[-1], 0, atol=1e-6)
    assert all(np.diff(mean_abs_weights) < 0)
Example #13
0
def test_effect_of_alpha():
    # higher alpha should lead to faster learning, thus higher weights
    mean_abs_weights = []
    for alpha in [0, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 1e1]:
        clf = OGDLR(alpha=alpha)
        clf.fit(X[:100], y[:100])
        mean_abs_weights.append(np.abs(clf.weights()).mean())
    assert mean_abs_weights[0] == 0.
    assert all(np.diff(mean_abs_weights) > 0)
Example #14
0
def test_effect_of_alpha():
    # higher alpha should lead to faster learning, thus higher weights
    mean_abs_weights = []
    for alpha in [0, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 1e1]:
        clf = OGDLR(alpha=alpha)
        clf.fit(X[:100], y[:100])
        mean_abs_weights.append(np.abs(clf.weights()).mean())
    assert mean_abs_weights[0] == 0.
    assert all(np.diff(mean_abs_weights) > 0)
Example #15
0
def test_ogdlr_grad_numerically_l1_l2(args):
    # test all combinations of lambda1 and lambda2 values of 0,
    # 0.5, and 2 and for alpha = 0.02 and beta = 1.
    epsilon = 1e-6
    clf = OGDLR(*args)
    clf.fit(X[:100], y[:100])
    close = []
    for xx, yy in zip(X[:10], y[:10]):
        grad, grad_num = clf.numerical_grad(xx, yy, epsilon)
        assert np.allclose(grad, grad_num, atol=epsilon)
Example #16
0
def test_ogdlr_grad_numerically_l1_l2(args):
    # test all combinations of lambda1 and lambda2 values of 0,
    # 0.5, and 2 and for alpha = 0.02 and beta = 1.
    epsilon = 1e-6
    clf = OGDLR(*args)
    clf.fit(X[:100], y[:100])
    close = []
    for xx, yy in zip(X[:10], y[:10]):
        grad, grad_num = clf.numerical_grad(xx, yy, epsilon)
        assert np.allclose(grad, grad_num, atol=epsilon)
Example #17
0
def test_valid_history(n_samples):
    clf = OGDLR()
    clf.fit(X[:n_samples], y[:n_samples])

    # validation history as long as training
    assert len(clf.valid_history) == n_samples
    y_true, y_prob = zip(*clf.valid_history)
    # all true values 0 or 1
    assert set(y_true) <= set([0, 1])
    # all y_probs are probabilities
    assert all([isinstance(pr, float) for pr in y_prob])
Example #18
0
def test_valid_history(n_samples):
    clf = OGDLR()
    clf.fit(X[:n_samples], y[:n_samples])

    # validation history as long as training
    assert len(clf.valid_history) == n_samples
    y_true, y_prob = zip(*clf.valid_history)
    # all true values 0 or 1
    assert set(y_true) <= set([0, 1])
    # all y_probs are probabilities
    assert all([isinstance(pr, float) for pr in y_prob])
Example #19
0
def test_effect_of_lambda1(lambda1):
    # gradients should be the same regardless of magnitude of weights
    weights = range(-5, 0) + range(1, 6)
    clf = OGDLR(lambda1=lambda1)
    grads = clf._get_grads(0, 0, weights)
    # gradient only depends on sign of weight
    abso = [gr * np.sign(w) for gr, w in zip(grads, weights)]
    assert np.allclose(abso[0], abso)

    # contingency test: should fail
    frac = [gr / w for gr, w in zip(grads, weights)]
    with pytest.raises(AssertionError):
        assert np.allclose(frac[0], frac)
Example #20
0
def test_effect_of_lambda2(lambda2):
    # relative difference in gradients should be the same
    weights = range(-5, 0) + range(1, 6)
    clf = OGDLR(lambda2=lambda2)
    grads = clf._get_grads(0, 0, weights)
    # gradient is proportional to weights
    frac = [gr / w for gr, w in zip(grads, weights)]
    assert np.allclose(frac[0], frac)

    # contingencey test: should fail
    abso = [gr * np.sign(w) for gr, w in zip(grads, weights)]
    with pytest.raises(AssertionError):
        assert np.allclose(abso[0], abso)
Example #21
0
def test_effect_of_lambda2(lambda2):
    # relative difference in gradients should be the same
    weights = range(-5, 0) + range(1, 6)
    clf = OGDLR(lambda2=lambda2)
    grads = clf._get_grads(0, 0, weights)
    # gradient is proportional to weights
    frac = [gr / w for gr, w in zip(grads, weights)]
    assert np.allclose(frac[0], frac)

    # contingencey test: should fail
    abso = [gr * np.sign(w) for gr, w in zip(grads, weights)]
    with pytest.raises(AssertionError):
        assert np.allclose(abso[0], abso)
Example #22
0
def test_effect_of_lambda1(lambda1):
    # gradients should be the same regardless of magnitude of weights
    weights = range(-5, 0) + range(1, 6)
    clf = OGDLR(lambda1=lambda1)
    grads = clf._get_grads(0, 0, weights)
    # gradient only depends on sign of weight
    abso = [gr * np.sign(w) for gr, w in zip(grads, weights)]
    assert np.allclose(abso[0], abso)

    # contingency test: should fail
    frac = [gr / w for gr, w in zip(grads, weights)]
    with pytest.raises(AssertionError):
        assert np.allclose(frac[0], frac)
Example #23
0
def test_get_x():
    # if using list, get_x should get ints
    clf = OGDLR(ndims=100)
    clf.fit(X[:100], y[:100])
    xt = ['sunny', 'cold', X[0, 2]]
    result = clf._get_x(xt)
    assert all([isinstance(r, int) for r in result])

    # if using dict, get_x should give dictionary keys
    xt = ['sunny', 'cold', X[0, 2]]
    result = ogdlr_after._get_x(xt)
    expected = [
        'BIAS', 'weather__sunny', 'temperature__cold', 'noise__' + X[0, 2]
    ]
    assert result == expected
Example #24
0
def test_get_x():
    # if using list, get_x should get ints
    clf = OGDLR(ndims=100)
    clf.fit(X[:100], y[:100])
    xt = ['sunny', 'cold', X[0, 2]]
    result = clf._get_x(xt)
    assert all([isinstance(r, int) for r in result])

    # if using dict, get_x should give dictionary keys
    xt = ['sunny', 'cold', X[0, 2]]
    result = ogdlr_after._get_x(xt)
    expected = ['BIAS',
                'weather__sunny',
                'temperature__cold',
                'noise__' + X[0, 2]]
    assert result == expected
Example #25
0
def test_get_num_count():
    # if adaptive learning rate is constant, all nums should be 0
    assert all(np.array(ogdlr_after.num.values()) == 0)

    # if adaptive learning rate by counting, all nums should be
    # integers from 0 to number of examples + 1 (from bias)
    clf = OGDLR(alr_schedule='count')
    clf.fit(X[:100], y[:100])
    assert clf.num['BIAS'] == 100  # bias term
    result = set(clf.num.values()) - set(range(N + 1))
    assert result == set([])

    clf = OGDLR(alr_schedule='gradient')
    clf.fit(X[:100], y[:100])
    # if adaptive learning rate by gradient, all nums should be floats
    result = clf.num.values()
    assert all([isinstance(ni, float) for ni in result])
Example #26
0
def test_get_num_count():
    # if adaptive learning rate is constant, all nums should be 0
    assert all(np.array(ogdlr_after.num.values()) == 0)

    # if adaptive learning rate by counting, all nums should be
    # integers from 0 to number of examples + 1 (from bias)
    clf = OGDLR(alr_schedule='count')
    clf.fit(X[:100], y[:100])
    assert clf.num['BIAS'] == 100  # bias term
    result = set(clf.num.values()) - set(range(N + 1))
    assert result == set([])

    clf = OGDLR(alr_schedule='gradient')
    clf.fit(X[:100], y[:100])
    # if adaptive learning rate by gradient, all nums should be floats
    result = clf.num.values()
    assert all([isinstance(ni, float) for ni in result])
Example #27
0
        pr2 = 0.5 * np.random.rand() if X[i, 1] == 'warm' else 0
        pr3 = np.random.rand()
        y[i] = 1 if pr1 + pr2 + pr3 > 0.95 else 0
    return X, y


# generate data and various models
N = 10000
X, y = create_training_data(n=N)
COLS = ['weather', 'temperature', 'noise']
LAMBDA1 = 0
LAMBDA2 = 0
ALPHA = 0.1
NDIMS = 2 ** 20

ogdlr_before = OGDLR(lambda1=LAMBDA1, alpha=ALPHA, alr_schedule='constant')
ogdlr_before.fit(X[:10], y[:10], COLS)

ogdlr_after = OGDLR(lambda1=LAMBDA1, alpha=ALPHA, alr_schedule='constant')
ogdlr_after.fit(X, y, COLS)

ftrl_before = FTRLprox(lambda1=LAMBDA1, lambda2=LAMBDA2, alpha=ALPHA, beta=1,
                       alr_schedule='constant')
ftrl_before.fit(X[:10], y[:10], COLS)

ftrl_after = FTRLprox(lambda1=LAMBDA1, lambda2=LAMBDA2, alpha=ALPHA, beta=1,
                      alr_schedule='constant')
ftrl_after.fit(X, y, COLS)

hash_before = OGDLR(lambda1=LAMBDA1, alpha=ALPHA,
                    alr_schedule='constant', ndims=NDIMS)
Example #28
0
        pr2 = 0.5 * np.random.rand() if X[i, 1] == 'warm' else 0
        pr3 = np.random.rand()
        y[i] = 1 if pr1 + pr2 + pr3 > 0.95 else 0
    return X, y


# generate data and various models
N = 10000
X, y = create_training_data(n=N)
COLS = ['weather', 'temperature', 'noise']
LAMBDA1 = 0
LAMBDA2 = 0
ALPHA = 0.1
NDIMS = 2**20

ogdlr_before = OGDLR(lambda1=LAMBDA1, alpha=ALPHA, alr_schedule='constant')
ogdlr_before.fit(X[:10], y[:10], COLS)

ogdlr_after = OGDLR(lambda1=LAMBDA1, alpha=ALPHA, alr_schedule='constant')
ogdlr_after.fit(X, y, COLS)

ftrl_before = FTRLprox(lambda1=LAMBDA1,
                       lambda2=LAMBDA2,
                       alpha=ALPHA,
                       beta=1,
                       alr_schedule='constant')
ftrl_before.fit(X[:10], y[:10], COLS)

ftrl_after = FTRLprox(lambda1=LAMBDA1,
                      lambda2=LAMBDA2,
                      alpha=ALPHA,
Example #29
0
def test_ogdlr_init_ndims(arg, expected):
    clf = OGDLR(ndims=arg)
    clf.fit(X[:10], y[:10])
    assert isinstance(clf.w, expected)
    assert isinstance(clf.num, expected)
Example #30
0
def test_ogdlr_init_cols():
    # case where cols are given
    assert ogdlr_after.cols == ['weather', 'temperature', 'noise']

    clf = OGDLR()
    clf.fit(X[:10], y[:10])
    # creates default cols 0 ... 2
    assert clf.cols == ['col0', 'col1', 'col2']

    clf.fit(X[:10], y[:10], cols=['a', 'b', 'c'])
    # cols do not change
    assert clf.cols == ['col0', 'col1', 'col2']

    clf = OGDLR()
    clf.fit(np.random.random((10, 25)), y[:10])
    # creates cols 01 ... 24
    assert clf.cols[0] == 'col00'
    assert clf.cols[13] == 'col13'
    assert clf.cols[-1] == 'col24'

    # column names must be unique
    with pytest.raises(ValueError):
        clf = OGDLR()
        clf.fit(X[:10], y[:10], cols=['1', '2', '1'])
Example #31
0
def test_get_delta_num(alr, expected):
    clf = OGDLR(alr_schedule=alr)
    clf.fit(X[:100], y[:100])
    result = clf._get_delta_num(1.23)
    assert result == expected ** 2
Example #32
0
def test_alr_schedule_error():
    clf = OGDLR(alr_schedule='nonsense')
    with pytest.raises(TypeError):
        clf.fit(X[:10], y[:10])
Example #33
0
def test_alr_schedule_error():
    clf = OGDLR(alr_schedule='nonsense')
    with pytest.raises(TypeError):
        clf.fit(X[:10], y[:10])
Example #34
0
def test_get_delta_num(alr, expected):
    clf = OGDLR(alr_schedule=alr)
    clf.fit(X[:100], y[:100])
    result = clf._get_delta_num(1.23)
    assert result == expected**2
Example #35
0
def test_ogdlr_init_cols():
    # case where cols are given
    assert ogdlr_after.cols == ['weather', 'temperature', 'noise']

    clf = OGDLR()
    clf.fit(X[:10], y[:10])
    # creates default cols 0 ... 2
    assert clf.cols == ['col0', 'col1', 'col2']

    clf.fit(X[:10], y[:10], cols=['a', 'b', 'c'])
    # cols do not change
    assert clf.cols == ['col0', 'col1', 'col2']

    clf = OGDLR()
    clf.fit(np.random.random((10, 25)), y[:10])
    # creates cols 01 ... 24
    assert clf.cols[0] == 'col00'
    assert clf.cols[13] == 'col13'
    assert clf.cols[-1] == 'col24'

    # column names must be unique
    with pytest.raises(ValueError):
        clf = OGDLR()
        clf.fit(X[:10], y[:10], cols=['1', '2', '1'])
Example #36
0
def test_ogdlr_init_ndims(arg, expected):
    clf = OGDLR(ndims=arg)
    clf.fit(X[:10], y[:10])
    assert isinstance(clf.w, expected)
    assert isinstance(clf.num, expected)