def test_intercept_logistic_helper(): n_samples, n_features = 10, 5 X, y = make_classification(n_samples=n_samples, n_features=n_features, random_state=0) # Fit intercept case. alpha = 1. w = np.ones(n_features + 1) grad_interp, hess_interp = _logistic_grad_hess(w, X, y, alpha) loss_interp = _logistic_loss(w, X, y, alpha) # Do not fit intercept. This can be considered equivalent to adding # a feature vector of ones, i.e column of one vectors. X_ = np.hstack((X, np.ones(10)[:, np.newaxis])) grad, hess = _logistic_grad_hess(w, X_, y, alpha) loss = _logistic_loss(w, X_, y, alpha) # In the fit_intercept=False case, the feature vector of ones is # penalized. This should be taken care of. assert_almost_equal(loss_interp + 0.5 * (w[-1]**2), loss) # Check gradient. assert_array_almost_equal(grad_interp[:n_features], grad[:n_features]) assert_almost_equal(grad_interp[-1] + alpha * w[-1], grad[-1]) rng = np.random.RandomState(0) grad = rng.rand(n_features + 1) hess_interp = hess_interp(grad) hess = hess(grad) assert_array_almost_equal(hess_interp[:n_features], hess[:n_features]) assert_almost_equal(hess_interp[-1] + alpha * grad[-1], hess[-1])
def test_intercept_logistic_helper(): n_samples, n_features = 10, 5 X, y = make_classification(n_samples=n_samples, n_features=n_features, random_state=0) # Fit intercept case. alpha = 1. w = np.ones(n_features + 1) grad_interp, hess_interp = _logistic_grad_hess(w, X, y, alpha) loss_interp = _logistic_loss(w, X, y, alpha) # Do not fit intercept. This can be considered equivalent to adding # a feature vector of ones, i.e column of one vectors. X_ = np.hstack((X, np.ones(10)[:, np.newaxis])) grad, hess = _logistic_grad_hess(w, X_, y, alpha) loss = _logistic_loss(w, X_, y, alpha) # In the fit_intercept=False case, the feature vector of ones is # penalized. This should be taken care of. assert_almost_equal(loss_interp + 0.5 * (w[-1] ** 2), loss) # Check gradient. assert_array_almost_equal(grad_interp[:n_features], grad[:n_features]) assert_almost_equal(grad_interp[-1] + alpha * w[-1], grad[-1]) rng = np.random.RandomState(0) grad = rng.rand(n_features + 1) hess_interp = hess_interp(grad) hess = hess(grad) assert_array_almost_equal(hess_interp[:n_features], hess[:n_features]) assert_almost_equal(hess_interp[-1] + alpha * grad[-1], hess[-1])
def test_logistic_grad_hess(): rng = np.random.RandomState(0) n_samples, n_features = 50, 5 X_ref = rng.randn(n_samples, n_features) y = np.sign(X_ref.dot(5 * rng.randn(n_features))) X_ref -= X_ref.mean() X_ref /= X_ref.std() X_sp = X_ref.copy() X_sp[X_sp < .1] = 0 X_sp = sp.csr_matrix(X_sp) for X in (X_ref, X_sp): w = .1 * np.ones(n_features) # First check that _logistic_grad_hess is consistent # with _logistic_loss_and_grad loss, grad = _logistic_loss_and_grad(w, X, y, alpha=1.) grad_2, hess = _logistic_grad_hess(w, X, y, alpha=1.) assert_array_almost_equal(grad, grad_2) # Now check our hessian along the second direction of the grad vector = np.zeros_like(grad) vector[1] = 1 hess_col = hess(vector) # Computation of the Hessian is particularly fragile to numerical # errors when doing simple finite differences. Here we compute the # grad along a path in the direction of the vector and then use a # least-square regression to estimate the slope e = 1e-3 d_x = np.linspace(-e, e, 30) d_grad = np.array([ _logistic_loss_and_grad(w + t * vector, X, y, alpha=1.)[1] for t in d_x ]) d_grad -= d_grad.mean(axis=0) approx_hess_col = linalg.lstsq(d_x[:, np.newaxis], d_grad)[0].ravel() assert_array_almost_equal(approx_hess_col, hess_col, decimal=3) # Second check that our intercept implementation is good w = np.zeros(n_features + 1) loss_interp, grad_interp = _logistic_loss_and_grad(w, X, y, alpha=1.) loss_interp_2 = _logistic_loss(w, X, y, alpha=1.) grad_interp_2, hess = _logistic_grad_hess(w, X, y, alpha=1.) assert_array_almost_equal(loss_interp, loss_interp_2) assert_array_almost_equal(grad_interp, grad_interp_2)
def main(): np.random.seed(0) from sklearn.metrics import accuracy_score from sklearn.linear_model import LogisticRegression from sklearn.linear_model.logistic import _logistic_loss with gzip.open('../data/svm_data.pkl.gz', 'rb') as f: train_X, train_y, test_X, test_y = pickle.load(f) cls = SVM(1, 600) cls.fit(train_X, train_y) y_pred_train = cls.predict(train_X) obj = cls.objective(train_X, train_y) y_pred = cls.predict(test_X) w_svm, b_svm = cls.get_model() acc_test = accuracy_score(test_y, y_pred) acc_train = accuracy_score(train_y, y_pred_train) print(f'SVC Objective= {obj:.2f}') print(f'SVC Test Accuracy = {acc_test*100:.2f}%') print(f'SVC Train Accuracy = {acc_train*100:.2f}%') cls_logistic = LogisticRegression() cls_logistic.fit(train_X, train_y) y_pred = cls_logistic.predict(train_X) y_pred_test = cls_logistic.predict(test_X) acc_logistic = accuracy_score(train_y, y_pred) acc_logistic_test = accuracy_score(test_y, y_pred_test) print(f'Logistic Train Accuracy = {acc_logistic*100:.2f}%') print(f'Logistic Test Accuracy = {acc_logistic_test*100:.2f}%') w_lr = cls_logistic.coef_.reshape(w_svm.shape) b_lr = cls_logistic.intercept_.reshape(b_svm.shape) obj = _logistic_loss(w_lr, train_X, train_y, alpha=1) print(f'Logistic Objective = {obj:.2f}') cls = SVM() cls.set_model(w_lr, b_lr) obj = cls.objective(train_X, train_y) print(f'SVC Objective at w_lr, b_lr = {obj:.2f}') y_pred_train = cls.predict(train_X) acc_train = accuracy_score(train_y, y_pred_train) print(f'SVC Train Accuracy at w_lr, b_lr = {100*acc_train:.2f}%')
def logloss(x): return logistic._logistic_loss(x, X, y, alpha)
def logistic_objective(K, y, alpha, coef, lamda, beta): obj = sum( _logistic_loss(alpha[i], np.tensordot(coef, K[i], axes=1), y[i], lamda) for i in range(len(K))) obj += beta * np.abs(coef).sum() return obj
A, b = loader() else: raise NotImplementedError n_samples, n_features = A.shape if not os.path.exists('data'): os.mkdir('data') alpha = 1.0 / n_samples x0 = np.zeros(n_features) for it in range(3): print('Iteration %s' % it) _, _, _, trace_saga_x, trace_saga_time = sag_solver( A, b, sample_weight=None, loss='log', alpha=1., beta=0., max_iter=50, is_saga=True) trace_saga_func = [_logistic_loss(xi, A, b, 1.) for xi in trace_saga_x] np.save('data/saga_trace_time_%s_%s.npy' % (dataset, it), trace_saga_time - trace_saga_time[0]) np.save('data/saga_trace_func_%s_%s.npy' % (dataset, it), trace_saga_func) x, trace_sps_x, trace_sps_time = SPSAGA(A, b, np.zeros(n_features), alpha, 0, max_iter=40, line_search=False) trace_sps_func = [_logistic_loss(xi, A, b, 1.) for xi in trace_sps_x] np.save('data/sps_trace_time_%s_%s.npy' % (dataset, it), trace_sps_time) np.save('data/sps_trace_func_%s_%s.npy' % (dataset, it), trace_sps_func) x, trace_sps_x, trace_sps_time = SPSAGA(A, b, np.zeros(n_features), alpha, 0, max_iter=40, line_search=True) trace_sps_func = [_logistic_loss(xi, A, b, 1.) for xi in trace_sps_x] np.save('data/spsls_trace_time_%s_%s.npy' % (dataset, it), trace_sps_time) np.save('data/spsls_trace_func_%s_%s.npy' % (dataset, it), trace_sps_func)
def logistic_objective(K, y, alpha, coef, lamda, beta): X = np.tensordot(coef, K, axes=1) return np.array( _logistic_loss(alpha, X, y, lamda) + beta * np.abs(coef).sum())
def logistic_loss(K, y, alpha, coef, lamda, beta): X = np.tensordot(coef, K, axes=1) return np.array( _logistic_loss(alpha, X, y, lamda) - .5 * lamda * np.dot(alpha, alpha))
def logistic_objective(K, y, alpha, coef, lamda, beta): obj = sum(_logistic_loss(alpha[i], np.tensordot(coef, K[i], axes=1), y[i], lamda) for i in range(len(K))) obj += beta * np.abs(coef).sum() return obj
def logloss(x): return logistic._logistic_loss(x, X, y, 1.)