def test_fit_replicable(): n_features = 20 X, y = get_fake_data(100, n_features, RandomState(4)) m0 = get_new_model(n_features, RandomState(45)) m0 = binary_fit(m0, X, y) m1 = get_new_model(n_features, RandomState(45)) m1 = binary_fit(m1, X, y) assert_array_equal(m0.sgd_weights, m1.sgd_weights) assert_array_equal(m0.sgd_bias, m1.sgd_bias)
def test_fit_replicable(): n_features = 20 X, y = get_fake_data(100, n_features, RandomState(4)) m0 = get_new_model(n_features, RandomState(45), 100) m0 = binary_fit(m0, (X, y)) m1 = get_new_model(n_features, RandomState(45), 100) m1 = binary_fit(m1, (X, y)) assert_array_equal(m0.sgd_weights, m1.sgd_weights) assert_array_equal(m0.sgd_bias, m1.sgd_bias)
def test_binary_fit(): rstate = RandomState(42) n_features = 20 for L in [100, DEFAULT_MAX_EXAMPLES, int(DEFAULT_MAX_EXAMPLES * 1.5), int(DEFAULT_MAX_EXAMPLES * 3)]: clf = get_new_model(n_features, rstate, L) X, y = get_fake_data(L, n_features, rstate, separation=0.1) best = find_sgd_step_size0(clf, (X, y)) _clf = binary_fit(clf, (X, y)) assert _clf is clf assert 0 < clf.sgd_step_size0 <= best
def train_svm(Xyd, l2_regularization, max_observations): """ Return a sklearn-like classification model. """ train_X, train_y, decisions = Xyd if train_X.ndim != 2: raise ValueError('train_X must be matrix') assert len(train_X) == len(train_y), (len(train_X), len(train_y)) assert len(train_X) == len(decisions), (len(train_X), len(decisions)) # doctor the decisions so that there is always something to learn margin = train_y * decisions margin_mean = margin.mean() decisions = decisions - margin_mean * train_y svm = MarginBinaryASGD( n_features=train_X.shape[1], l2_regularization=l2_regularization, dtype=train_X.dtype, rstate=np.random.RandomState(1234), max_observations=max_observations, ) binary_fit(svm, (train_X, train_y, np.asarray(decisions))) return svm
def test_binary_fit(): rstate = RandomState(42) n_features = 20 clf100 = get_new_model(n_features, rstate) X, y = get_fake_data(100, n_features, rstate) _clf100 = binary_fit(clf100, X, y) assert _clf100 is clf100 assert_almost_equal(clf100.sgd_step_size0, 0.04812, decimal=4) # smoke test clf1000 = get_new_model(n_features, rstate) X, y = get_fake_data(DEFAULT_MAX_EXAMPLES, n_features, rstate) _clf1000 = binary_fit(clf1000, X, y) assert _clf1000 is clf1000 assert_almost_equal(clf1000.sgd_step_size0, 0.0047, decimal=4) # smoke test that at least it runs clf2000 = get_new_model(n_features, rstate) X, y = get_fake_data(2000, n_features, rstate) _clf2000 = binary_fit(clf2000, X, y) assert _clf2000 == clf2000 assert_almost_equal(clf2000.sgd_step_size0, 0.0067, decimal=4)