Beispiel #1
0
def run_theano_binary_asgd_speed():

    N_POINTS = 500
    sizes=[1e2, 1e3, 1e4, 1e5]
    dtypes=['float32', 'float64']

    rstate = RandomState(42)
    XX, y = get_fake_data(N_POINTS, max(sizes), rstate)

    dtype_orig = theano.config.floatX

    for dtype in dtypes:
        theano.config.floatX = dtype
        for N_FEATURES in sizes:
            X = XX[:,:N_FEATURES].astype(dtype)

            kwargs = dict(DEFAULT_KWARGS)
            kwargs['sgd_step_size0'] = None
            kwargs['dtype'] = dtype

            clf0 = NaiveBinaryASGD(N_FEATURES, rstate=copy(rstate), **kwargs)
            clf1 = TheanoBinaryASGD(N_FEATURES, rstate=copy(rstate), **kwargs)

            t = time.time()
            clf0.fit(X, y)
            t0 = time.time() - t

            t = time.time()
            clf1.fit(X, y)
            t1 = time.time() - t
            print 'N_FEAT:%i  dtype:%s  Naive:%.3f  Theano:%.3f' % (
                    N_FEATURES, dtype, t0, t1)

    theano.config.floatX = dtype_orig
Beispiel #2
0
def test_theano_binary_asgd_early_stopping():

    rstate = RandomState(42)

    X, y = get_fake_data(N_POINTS, N_FEATURES, rstate)
    Xtst, ytst = get_fake_data(N_POINTS, N_FEATURES, rstate)

    kwargs = dict(DEFAULT_KWARGS)
    kwargs['n_iterations'] = 30

    clf0 = NaiveBinaryASGD(*DEFAULT_ARGS, rstate=copy(rstate), **kwargs)
    clf1 = TheanoBinaryASGD(*DEFAULT_ARGS, rstate=copy(rstate), **kwargs)

    clf0.fit(X, y)
    clf1.fit(X, y)

    assert clf0.n_observations == clf1.n_observations
    assert clf0.n_observations < N_POINTS * kwargs['n_iterations']
Beispiel #3
0
def test_theano_binary_asgd_like_naive_asgd():

    rstate = RandomState(42)

    X, y = get_fake_data(N_POINTS, N_FEATURES, rstate)
    Xtst, ytst = get_fake_data(N_POINTS, N_FEATURES, rstate)

    clf0 = NaiveBinaryASGD(*DEFAULT_ARGS, rstate=copy(rstate), **DEFAULT_KWARGS)
    clf1 = TheanoBinaryASGD(*DEFAULT_ARGS, rstate=copy(rstate), **DEFAULT_KWARGS)
    clf1.min_n_iterations = clf1.n_iterations

    for clf in [clf0, clf1]:
        clf.fit(X, y)
        ytrn_preds = clf.predict(X)
        ytst_preds = clf.predict(Xtst)
        ytrn_acc = (ytrn_preds == y).mean()
        ytst_acc = (ytst_preds == y).mean()
        assert_equal(ytrn_acc, 0.72)
        assert_equal(ytst_acc, 0.522)
Beispiel #4
0
def test_binary_fit():
    rstate = RandomState(42)
    n_features = 20

    for L in [100, DEFAULT_MAX_EXAMPLES, int(DEFAULT_MAX_EXAMPLES * 1.5),
            int(DEFAULT_MAX_EXAMPLES * 3)]:

        clf = get_new_model(n_features, rstate, L)
        X, y = get_fake_data(L, n_features, rstate, separation=0.1)
        best = find_sgd_step_size0(clf, (X, y))
        _clf = binary_fit(clf, (X, y))
        assert _clf is clf
        assert 0 < clf.sgd_step_size0 <= best
Beispiel #5
0
def test_binary_sgd_step_size0():
    rstate = RandomState(42)
    n_features = 20

    X, y = get_fake_data(100, n_features, rstate)

    clf = get_new_model(n_features, rstate, 100)
    best0 = find_sgd_step_size0(clf, (X, y))
    print best0
    assert np.allclose(best0, 0.04, atol=.1, rtol=.5)

    # find_sgd_step_size0 does not change clf
    assert clf.sgd_step_size0 == 1000.0
Beispiel #6
0
def test_fit_replicable():

    n_features = 20

    X, y = get_fake_data(100, n_features, RandomState(4))

    m0 = get_new_model(n_features, RandomState(45))
    m0 = binary_fit(m0, X, y)

    m1 = get_new_model(n_features, RandomState(45))
    m1 = binary_fit(m1, X, y)

    assert_array_equal(m0.sgd_weights, m1.sgd_weights)
    assert_array_equal(m0.sgd_bias, m1.sgd_bias)
Beispiel #7
0
def test_binary_fit():
    rstate = RandomState(42)
    n_features = 20

    clf100 = get_new_model(n_features, rstate)
    X, y = get_fake_data(100, n_features, rstate)
    _clf100 = binary_fit(clf100, X, y)
    assert _clf100 is clf100
    assert_almost_equal(clf100.sgd_step_size0, 0.04812, decimal=4)

    # smoke test
    clf1000 = get_new_model(n_features, rstate)
    X, y = get_fake_data(DEFAULT_MAX_EXAMPLES, n_features, rstate)
    _clf1000 = binary_fit(clf1000, X, y)
    assert _clf1000 is clf1000
    assert_almost_equal(clf1000.sgd_step_size0, 0.0047, decimal=4)

    # smoke test that at least it runs
    clf2000 = get_new_model(n_features, rstate)
    X, y = get_fake_data(2000, n_features, rstate)
    _clf2000 = binary_fit(clf2000, X, y)
    assert _clf2000 == clf2000
    assert_almost_equal(clf2000.sgd_step_size0, 0.0067, decimal=4)
Beispiel #8
0
def test_fit_replicable():

    n_features = 20

    X, y = get_fake_data(100, n_features, RandomState(4))

    m0 = get_new_model(n_features, RandomState(45), 100)
    m0 = binary_fit(m0, (X, y))

    m1 = get_new_model(n_features, RandomState(45), 100)
    m1 = binary_fit(m1, (X, y))

    assert_array_equal(m0.sgd_weights, m1.sgd_weights)
    assert_array_equal(m0.sgd_bias, m1.sgd_bias)
Beispiel #9
0
def test_binary_fit():
    rstate = RandomState(42)
    n_features = 20

    clf100 = get_new_model(n_features, rstate)
    X, y = get_fake_data(100, n_features, rstate)
    _clf100 = binary_fit(clf100, X, y)
    assert _clf100 is clf100
    assert_almost_equal(clf100.sgd_step_size0, 0.04812, decimal=4)

    # smoke test
    clf1000 = get_new_model(n_features, rstate)
    X, y = get_fake_data(DEFAULT_MAX_EXAMPLES, n_features, rstate)
    _clf1000 = binary_fit(clf1000, X, y)
    assert _clf1000 is clf1000
    assert_almost_equal(clf1000.sgd_step_size0, 0.0047, decimal=4)

    # smoke test that at least it runs
    clf2000 = get_new_model(n_features, rstate)
    X, y = get_fake_data(2000, n_features, rstate)
    _clf2000 = binary_fit(clf2000, X, y)
    assert _clf2000 == clf2000
    assert_almost_equal(clf2000.sgd_step_size0, 0.0067, decimal=4)
Beispiel #10
0
def test_binary_sgd_step_size0():
    rstate = RandomState(42)
    n_features = 20

    X, y = get_fake_data(100, n_features, rstate)

    clf = get_new_model(n_features, rstate)
    best = find_sgd_step_size0(clf, X, y, (.25, .5))
    assert_almost_equal(best, -4.9927, decimal=4)

    # start a little lower, still works
    best = find_sgd_step_size0(clf, X, y, (.125, .25))
    assert_almost_equal(best, -4.6180, decimal=4)

    # find_sgd_step_size0 does not change clf
    assert clf.sgd_step_size0 == 1000.0
Beispiel #11
0
def test_binary_sgd_step_size0():
    rstate = RandomState(42)
    n_features = 20

    X, y = get_fake_data(100, n_features, rstate)

    clf = get_new_model(n_features, rstate)
    best = find_sgd_step_size0(clf, X, y, (.25, .5))
    assert_almost_equal(best, -4.9927, decimal=4)

    # start a little lower, still works
    best = find_sgd_step_size0(clf, X, y, (.125, .25))
    assert_almost_equal(best, -4.6180, decimal=4)

    # find_sgd_step_size0 does not change clf
    assert clf.sgd_step_size0 == 1000.0