def test_init_exceptions(): with pytest.raises(TypeError): Neuralnet() with pytest.raises(TypeError): Neuralnet(0, 0.3, 0.1, 1., alr_schedule='constant') with pytest.raises(NotImplementedError): Neuralnet(num_units=10, ndims=2**20)
def test_init_args(): clf = Neuralnet(num_units=12) clf.lambda1 == 0. clf.alr_schedule == 'gradient' clf.num_units == 12 clf = Neuralnet(3, num_units=13) clf.lambda1 == 3 clf.alr_schedule == 'gradient' clf.num_units == 13 clf = Neuralnet(num_units=1, alr_schedule='count') clf.lambda1 == 0. clf.alr_schedule == 'count' clf.num_units == 1 clf = Neuralnet(alr_schedule='count', num_units=12) clf.lambda1 == 0. clf.alr_schedule == 'count' clf.num_units == 12 clf = Neuralnet(0.5, alr_schedule='count', num_units=12) clf.lambda1 == 0.5 clf.alr_schedule == 'count' clf.num_units == 12
def test_effect_of_beta(num_units): # Lower beta should lead to faster learning, thus higher # weights. However, this is not as true for neural nets as for # logistic regressions, since the weights of neural nets are not # initialized at 0. Therefore, for high alpha, it may happen that # the weights would actually be higher. # loop through layers: for l in [0, 1]: mean_abs_weights = [] for beta in [10 ** n for n in range(5)]: clf = Neuralnet(num_units=num_units, alpha=1, beta=beta) clf.fit(X[:5000], y[:5000]) mean_abs_weights.append(np.abs(clf.weights()[l]).mean()) assert all(np.diff(mean_abs_weights) < 0)
def test_effect_of_beta(num_units): # Lower beta should lead to faster learning, thus higher # weights. However, this is not as true for neural nets as for # logistic regressions, since the weights of neural nets are not # initialized at 0. Therefore, for high alpha, it may happen that # the weights would actually be higher. # loop through layers: for l in [0, 1]: mean_abs_weights = [] for beta in [10**n for n in range(5)]: clf = Neuralnet(num_units=num_units, alpha=1, beta=beta) clf.fit(X[:5000], y[:5000]) mean_abs_weights.append(np.abs(clf.weights()[l]).mean()) assert all(np.diff(mean_abs_weights) < 0)
def test_effect_of_lambda1(lambda1): # gradients should be the same regardless of magnitude of weights clf = Neuralnet(lambda1=lambda1, num_units=8) clf.fit(X[:10], y[:10]) activities = clf._get_p(X[0]) activities[-1] = 0 # set prediction to outcome, so that y_err = 0 weights = clf._get_w(X[0]) grads = clf._get_grads(0, activities, weights) # gradient only depends on sign of weight for l in [0, 1]: # loop through layers abso = [ gr * np.sign(w) for gr, w in zip(grads[l].flatten(), weights[l].flatten()) ] assert np.allclose(abso[0], abso) # contingency test: should fail frac = [ gr / w for gr, w in zip(grads[l].flatten(), weights[l].flatten()) ] with pytest.raises(AssertionError): assert np.allclose(frac[0], frac)
def test_effect_of_lambda1(lambda1): # gradients should be the same regardless of magnitude of weights clf = Neuralnet(lambda1=lambda1, num_units=8) clf.fit(X[:10], y[:10]) activities = clf._get_p(X[0]) activities[-1] = 0 # set prediction to outcome, so that y_err = 0 weights = clf._get_w(X[0]) grads = clf._get_grads(0, activities, weights) # gradient only depends on sign of weight for l in [0, 1]: # loop through layers abso = [gr * np.sign(w) for gr, w in zip(grads[l].flatten(), weights[l].flatten())] assert np.allclose(abso[0], abso) # contingency test: should fail frac = [gr / w for gr, w in zip(grads[l].flatten(), weights[l].flatten())] with pytest.raises(AssertionError): assert np.allclose(frac[0], frac)
ogdlr_before = OGDLR(lambda1=LAMBDA1, alpha=ALPHA, alr_schedule='constant') ogdlr_before.fit(X[:10], y[:10], COLS) ogdlr_after = OGDLR(lambda1=LAMBDA1, alpha=ALPHA, alr_schedule='constant') ogdlr_after.fit(X, y, COLS) ftrl_before = FTRLprox(lambda1=LAMBDA1, lambda2=LAMBDA2, alpha=ALPHA, beta=1, alr_schedule='constant') ftrl_before.fit(X[:10], y[:10], COLS) ftrl_after = FTRLprox(lambda1=LAMBDA1, lambda2=LAMBDA2, alpha=ALPHA, beta=1, alr_schedule='constant') ftrl_after.fit(X, y, COLS) hash_before = OGDLR(lambda1=LAMBDA1, alpha=ALPHA, alr_schedule='constant', ndims=NDIMS) hash_before.fit(X[:10], y[:10], COLS) hash_after = OGDLR(lambda1=LAMBDA1, alpha=ALPHA, alr_schedule='constant', ndims=NDIMS) hash_after.fit(X, y, COLS) nn_before = Neuralnet(lambda1=LAMBDA1, lambda2=LAMBDA2, alpha=ALPHA, beta=1, alr_schedule='constant', num_units=16) nn_before.fit(X[:10], y[:10], COLS) nn_after = Neuralnet(lambda1=LAMBDA1, lambda2=LAMBDA2, alpha=ALPHA, beta=1, alr_schedule='constant', num_units=16) nn_after.fit(X, y, COLS)
alr_schedule='constant') ftrl_after.fit(X, y, COLS) hash_before = OGDLR(lambda1=LAMBDA1, alpha=ALPHA, alr_schedule='constant', ndims=NDIMS) hash_before.fit(X[:10], y[:10], COLS) hash_after = OGDLR(lambda1=LAMBDA1, alpha=ALPHA, alr_schedule='constant', ndims=NDIMS) hash_after.fit(X, y, COLS) nn_before = Neuralnet(lambda1=LAMBDA1, lambda2=LAMBDA2, alpha=ALPHA, beta=1, alr_schedule='constant', num_units=16) nn_before.fit(X[:10], y[:10], COLS) nn_after = Neuralnet(lambda1=LAMBDA1, lambda2=LAMBDA2, alpha=ALPHA, beta=1, alr_schedule='constant', num_units=16) nn_after.fit(X, y, COLS)