def test_intercept_logistic_helper(): n_samples, n_features = 10, 5 X, y = make_classification(n_samples=n_samples, n_features=n_features, random_state=0) # Fit intercept case. alpha = 1. w = np.ones(n_features + 1) loss_interp, grad_interp, hess_interp = _logistic_loss_grad_hess( w, X, y, alpha) # Do not fit intercept. This can be considered equivalent to adding # a feature vector of ones, i.e column of one vectors. X_ = np.hstack((X, np.ones(10)[:, np.newaxis])) loss, grad, hess = _logistic_loss_grad_hess(w, X_, y, alpha) # In the fit_intercept=False case, the feature vector of ones is # penalized. This should be taken care of. assert_almost_equal(loss_interp + 0.5 * (w[-1] ** 2), loss) # Check gradient. assert_array_almost_equal(grad_interp[:n_features], grad[:n_features]) assert_almost_equal(grad_interp[-1] + alpha * w[-1], grad[-1]) rng = np.random.RandomState(0) grad = rng.rand(n_features + 1) hess_interp = hess_interp(grad) hess = hess(grad) assert_array_almost_equal(hess_interp[:n_features], hess[:n_features]) assert_almost_equal(hess_interp[-1] + alpha * grad[-1], hess[-1])
def test_intercept_logistic_helper(): n_samples, n_features = 10, 5 X, y = make_classification(n_samples=n_samples, n_features=n_features, random_state=0) # Fit intercept case. alpha = 1. w = np.ones(n_features + 1) loss_interp, grad_interp, hess_interp = _logistic_loss_grad_hess( w, X, y, alpha) # Do not fit intercept. This can be considered equivalent to adding # a feature vector of ones, i.e column of one vectors. X_ = np.hstack((X, np.ones(10)[:, np.newaxis])) loss, grad, hess = _logistic_loss_grad_hess(w, X_, y, alpha) # In the fit_intercept=False case, the feature vector of ones is # penalized. This should be taken care of. assert_almost_equal(loss_interp + 0.5 * (w[-1]**2), loss) # Check gradient. assert_array_almost_equal(grad_interp[:n_features], grad[:n_features]) assert_almost_equal(grad_interp[-1] + alpha * w[-1], grad[-1]) rng = np.random.RandomState(0) grad = rng.rand(n_features + 1) hess_interp = hess_interp(grad) hess = hess(grad) assert_array_almost_equal(hess_interp[:n_features], hess[:n_features]) assert_almost_equal(hess_interp[-1] + alpha * grad[-1], hess[-1])
def test_logistic_loss_grad_hess(): rng = np.random.RandomState(0) n_samples, n_features = 50, 5 X_ref = rng.randn(n_samples, n_features) y = np.sign(X_ref.dot(5 * rng.randn(n_features))) X_ref -= X_ref.mean() X_ref /= X_ref.std() X_sp = X_ref.copy() X_sp[X_sp < .1] = 0 X_sp = sp.csr_matrix(X_sp) for X in (X_ref, X_sp): w = .1 * np.ones(n_features) # First check that _logistic_loss_grad_hess is consistent # with _logistic_loss_and_grad loss, grad = _logistic_loss_and_grad(w, X, y, alpha=1.) loss_2, grad_2, hess = _logistic_loss_grad_hess(w, X, y, alpha=1.) assert_array_almost_equal(grad, grad_2) # Now check our hessian along the second direction of the grad vector = np.zeros_like(grad) vector[1] = 1 hess_col = hess(vector) # Computation of the Hessian is particularly fragile to numerical # errors when doing simple finite differences. Here we compute the # grad along a path in the direction of the vector and then use a # least-square regression to estimate the slope e = 1e-3 d_x = np.linspace(-e, e, 30) d_grad = np.array([ _logistic_loss_and_grad(w + t * vector, X, y, alpha=1.)[1] for t in d_x ]) d_grad -= d_grad.mean(axis=0) approx_hess_col = linalg.lstsq(d_x[:, np.newaxis], d_grad)[0].ravel() assert_array_almost_equal(approx_hess_col, hess_col, decimal=3) # Second check that our intercept implementation is good w = np.zeros(n_features + 1) loss_interp, grad_interp = _logistic_loss_and_grad( w, X, y, alpha=1. ) loss_interp_2, grad_interp_2, hess = \ _logistic_loss_grad_hess(w, X, y, alpha=1.) assert_array_almost_equal(loss_interp, loss_interp_2) assert_array_almost_equal(grad_interp, grad_interp_2)