def test_multinomial_grad_hess(): rng = np.random.RandomState(0) n_samples, n_features, n_classes = 100, 5, 3 X = rng.randn(n_samples, n_features) w = rng.rand(n_classes, n_features) Y = np.zeros((n_samples, n_classes)) ind = np.argmax(np.dot(X, w.T), axis=1) Y[range(0, n_samples), ind] = 1 w = w.ravel() sample_weights = np.ones(X.shape[0]) grad, hessp = _multinomial_grad_hess(w, X, Y, alpha=1., sample_weight=sample_weights) # extract first column of hessian matrix vec = np.zeros(n_features * n_classes) vec[0] = 1 hess_col = hessp(vec) # Estimate hessian using least squares as done in # test_logistic_grad_hess e = 1e-3 d_x = np.linspace(-e, e, 30) d_grad = np.array([ _multinomial_grad_hess(w + t * vec, X, Y, alpha=1., sample_weight=sample_weights)[0] for t in d_x ]) d_grad -= d_grad.mean(axis=0) approx_hess_col = linalg.lstsq(d_x[:, np.newaxis], d_grad)[0].ravel() assert_array_almost_equal(hess_col, approx_hess_col)
def test_multinomial_grad_hess(): rng = np.random.RandomState(0) n_samples, n_features, n_classes = 100, 5, 3 X = rng.randn(n_samples, n_features) w = rng.rand(n_classes, n_features) Y = np.zeros((n_samples, n_classes)) ind = np.argmax(np.dot(X, w.T), axis=1) Y[range(0, n_samples), ind] = 1 w = w.ravel() sample_weights = np.ones(X.shape[0]) grad, hessp = _multinomial_grad_hess(w, X, Y, alpha=1., sample_weight=sample_weights) # extract first column of hessian matrix vec = np.zeros(n_features * n_classes) vec[0] = 1 hess_col = hessp(vec) # Estimate hessian using least squares as done in # test_logistic_grad_hess e = 1e-3 d_x = np.linspace(-e, e, 30) d_grad = np.array([ _multinomial_grad_hess(w + t * vec, X, Y, alpha=1., sample_weight=sample_weights)[0] for t in d_x ]) d_grad -= d_grad.mean(axis=0) approx_hess_col = linalg.lstsq(d_x[:, np.newaxis], d_grad)[0].ravel() assert_array_almost_equal(hess_col, approx_hess_col)
def _hessvec_fun_mult(w, v, X, y, sample_weight=None, reg_param=0): temp = _multinomial_grad_hess(w, X, y, reg_param, sample_weight)[1] return temp(v)