def test_changepoint_scaled(): p = 150 M = multiscale(p) M.minsize = 10 X = ra.adjoint(M) Y = np.random.standard_normal(p) Y[20:50] += 8 Y += 2 meanY = Y.mean() lammax = np.fabs(np.sqrt(M.sizes) * X.adjoint_map(Y) / (1 + np.sqrt(np.log(M.sizes)))).max() penalty = rr.weighted_l1norm((1 + np.sqrt(np.log(M.sizes))) / np.sqrt(M.sizes), lagrange=0.5*lammax) loss = rr.squared_error(X, Y - meanY) problem = rr.simple_problem(loss, penalty) soln = problem.solve() Yhat = X.linear_map(soln) Yhat += meanY if INTERACTIVE: plt.scatter(np.arange(p), Y) plt.plot(np.arange(p), Yhat) plt.show()
def test_nesta_nonnegative(): n, p, q = 1000, 20, 5 X = np.random.standard_normal((n, p)) A = np.random.standard_normal((q, p)) coef = 10 * np.fabs(np.random.standard_normal(q)) + 1 coef[:2] = -0.2 beta = np.dot(np.linalg.pinv(A), coef) Y = np.random.standard_normal(n) + np.dot(X, beta) loss = rr.squared_error(X, Y) penalty = rr.l1norm(p, lagrange=0.2) constraint = rr.nonnegative.linear(A) primal, dual = rr.nesta(loss, penalty, constraint, max_iters=300, coef_tol=1.e-4, tol=1.e-4) print np.dot(A, primal) assert_almost_nonnegative(np.dot(A, primal), tol=1.e-3)
def test_nesta_nonnegative(): state = np.random.get_state() np.random.seed(10) n, p, q = 1000, 20, 5 X = np.random.standard_normal((n, p)) A = np.random.standard_normal((q, p)) coef = 10 * np.fabs(np.random.standard_normal(q)) + 1 coef[:2] = -0.2 beta = np.dot(np.linalg.pinv(A), coef) print(r'\beta', beta) print(r'A\beta', np.dot(A, beta)) Y = np.random.standard_normal(n) + np.dot(X, beta) loss = rr.squared_error(X, Y) penalty = rr.l1norm(p, lagrange=0.2) constraint = rr.nonnegative.linear(A) primal, dual = rr.nesta(loss, penalty, constraint, max_iters=300, coef_tol=1.e-10, tol=1.e-10) print(r'A \hat{\beta}', np.dot(A, primal)) assert_almost_nonnegative(np.dot(A, primal), tol=1.e-3) np.random.set_state(state)
def test_outputs_SLOPE_weights(n=500, p=100, signal_fac=1., s=5, sigma=3., rho=0.35): inst = gaussian_instance signal = np.sqrt(signal_fac * 2. * np.log(p)) X, Y, beta = inst(n=n, p=p, signal=signal, s=s, equicorrelated=False, rho=rho, sigma=sigma, random_signs=True)[:3] sigma_ = np.sqrt(np.linalg.norm(Y - X.dot(np.linalg.pinv(X).dot(Y))) ** 2 / (n - p)) r_beta, r_E, r_lambda_seq, r_sigma = slope_R(X, Y, W = None, normalize = True, choice_weights = "gaussian", sigma = sigma_) print("estimated sigma", sigma_, r_sigma) print("weights output by R", r_lambda_seq) print("output of est coefs R", r_beta) pen = slope_atom(r_sigma * r_lambda_seq, lagrange=1.) loss = rr.squared_error(X, Y) problem = rr.simple_problem(loss, pen) soln = problem.solve() print("output of est coefs python", soln) print("relative difference in solns", np.linalg.norm(soln-r_beta)/np.linalg.norm(r_beta))
def test_using_SLOPE_weights(): n, p = 500, 50 X = np.random.standard_normal((n, p)) #Y = np.random.standard_normal(n) X -= X.mean(0)[None, :] X /= (X.std(0)[None, :] * np.sqrt(n)) beta = np.zeros(p) beta[:5] = 5. Y = X.dot(beta) + np.random.standard_normal(n) output_R = fit_slope_R(X, Y, W = None, normalize = True, choice_weights = "bhq") r_beta = output_R[0] r_lambda_seq = output_R[2] W = r_lambda_seq pen = slope(W, lagrange=1.) loss = rr.squared_error(X, Y) problem = rr.simple_problem(loss, pen) soln = problem.solve(tol=1.e-14, min_its=500) # we get a better objective value nt.assert_true(problem.objective(soln) < problem.objective(np.asarray(r_beta))) nt.assert_true(np.linalg.norm(soln - r_beta) < 1.e-6 * np.linalg.norm(soln))
def test_nesta_lasso(): n, p = 1000, 20 X = np.random.standard_normal((n, p)) beta = np.zeros(p) beta[:4] = 30 Y = np.random.standard_normal(n) + np.dot(X, beta) loss = rr.squared_error(X, Y) penalty = rr.l1norm(p, lagrange=4.) # using nesta z = rr.zero(p) primal, dual = rr.nesta(loss, z, penalty, tol=1.e-10, epsilon=2.**(-np.arange(30))) # using simple problem problem = rr.simple_problem(loss, penalty) problem.solve() nt.assert_true( np.linalg.norm(primal - problem.coefs) / np.linalg.norm(problem.coefs) < 1.e-3)
def test_conjugate_sqerror(): """ This verifies the conjugate class can compute the conjugate of a quadratic function. """ ridge_coef = 0.4 X = np.random.standard_normal((10,4)) Y = np.random.standard_normal(10) l = rr.squared_error(X, Y) q = rr.identity_quadratic(ridge_coef,0,0,0) atom_conj = rr.conjugate(l, q, tol=1.e-12, min_its=100) w = np.random.standard_normal(4) u11, u12 = atom_conj.smooth_objective(w) # check that objective is half of squared error np.testing.assert_allclose(l.smooth_objective(w, mode='func'), 0.5 * np.linalg.norm(Y - np.dot(X, w))**2) np.testing.assert_allclose(atom_conj.atom.smooth_objective(w, mode='func'), 0.5 * np.linalg.norm(Y - np.dot(X, w))**2) XTX = np.dot(X.T, X) XTXi = np.linalg.pinv(XTX) quadratic_term = XTX + ridge_coef * np.identity(4) linear_term = np.dot(X.T, Y) + w b = u22 = np.linalg.solve(quadratic_term, linear_term) u21 = (w*u12).sum() - l.smooth_objective(u12, mode='func') - q.objective(u12, mode='func') np.testing.assert_allclose(u12, u22, rtol=1.0e-05) np.testing.assert_approx_equal(u11, u21)
def test_using_SLOPE_weights(): n, p = 500, 50 X = np.random.standard_normal((n, p)) #Y = np.random.standard_normal(n) X -= X.mean(0)[None, :] X /= (X.std(0)[None, :] * np.sqrt(n)) beta = np.zeros(p) beta[:5] = 5. Y = X.dot(beta) + np.random.standard_normal(n) output_R = fit_slope_R(X, Y) r_beta = np.squeeze(output_R[0])[:, 3] r_lambda_seq = np.array(output_R[2]).reshape(-1) alpha = output_R[-1] W = np.asarray(r_lambda_seq * alpha[3]).reshape(-1) pen = slope(W, lagrange=1.) loss = rr.squared_error(X, Y) problem = rr.simple_problem(loss, pen) soln = problem.solve(tol=1.e-14, min_its=500) # we get a better objective value nt.assert_true( problem.objective(soln) < problem.objective(np.asarray(r_beta))) nt.assert_true( np.linalg.norm(soln - r_beta) < 1.e-6 * np.linalg.norm(soln))
def test_nesta_nonnegative(): state = np.random.get_state() np.random.seed(10) n, p, q = 1000, 20, 5 X = np.random.standard_normal((n, p)) A = np.random.standard_normal((q,p)) coef = 10 * np.fabs(np.random.standard_normal(q)) + 1 coef[:2] = -0.2 beta = np.dot(np.linalg.pinv(A), coef) print(r'\beta', beta) print(r'A\beta', np.dot(A, beta)) Y = np.random.standard_normal(n) + np.dot(X, beta) loss = rr.squared_error(X,Y) penalty = rr.l1norm(p, lagrange=0.2) constraint = rr.nonnegative.linear(A) primal, dual = rr.nesta(loss, penalty, constraint, max_iters=300, coef_tol=1.e-10, tol=1.e-10) print(r'A \hat{\beta}', np.dot(A, primal)) assert_almost_nonnegative(np.dot(A,primal), tol=1.e-3) np.random.set_state(state)
def test_nesta_lasso(): n, p = 1000, 20 X = np.random.standard_normal((n, p)) beta = np.zeros(p) beta[:4] = 30 Y = np.random.standard_normal(n) + np.dot(X, beta) loss = rr.squared_error(X,Y) penalty = rr.l1norm(p, lagrange=2.) # using nesta z = rr.zero(p) primal, dual = rr.nesta(loss, z, penalty, tol=1.e-10, epsilon=2.**(-np.arange(30)), initial_dual=np.zeros(p)) # using simple problem problem = rr.simple_problem(loss, penalty) problem.solve() nt.assert_true(np.linalg.norm(primal - problem.coefs) / np.linalg.norm(problem.coefs) < 1.e-3) # test None as smooth_atom rr.nesta(None, z, penalty, tol=1.e-10, epsilon=2.**(-np.arange(30)), initial_dual=np.zeros(p)) # using coefficients to stop rr.nesta(loss, z, penalty, tol=1.e-10, epsilon=2.**(-np.arange(30)), initial_dual=np.zeros(p), coef_stop=True)
def test_conjugate_sqerror(): """ This verifies the conjugate class can compute the conjugate of a quadratic function. """ ridge_coef = 0.4 X = np.random.standard_normal((10, 4)) Y = np.random.standard_normal(10) l = rr.squared_error(X, Y) q = rr.identity_quadratic(ridge_coef, 0, 0, 0) atom_conj = rr.conjugate(l, q, tol=1.e-12, min_its=100) w = np.random.standard_normal(4) u11, u12 = atom_conj.smooth_objective(w) # check that objective is half of squared error np.testing.assert_allclose(l.smooth_objective(w, mode='func'), 0.5 * np.linalg.norm(Y - np.dot(X, w))**2) np.testing.assert_allclose(atom_conj.atom.smooth_objective(w, mode='func'), 0.5 * np.linalg.norm(Y - np.dot(X, w))**2) XTX = np.dot(X.T, X) XTXi = np.linalg.pinv(XTX) quadratic_term = XTX + ridge_coef * np.identity(4) linear_term = np.dot(X.T, Y) + w b = u22 = np.linalg.solve(quadratic_term, linear_term) u21 = (w * u12).sum() - l.smooth_objective(u12, mode='func') - q.objective( u12, mode='func') np.testing.assert_allclose(u12, u22, rtol=1.0e-05) np.testing.assert_approx_equal(u11, u21)
def __init__(self, X, Y): self.X = X n, p = X.shape self.Y = Y self._constant_term = (Y**2).sum() if n > p: self._quadratic_term = X.T.dot(X) self._linear_term = -2 * X.T.dot(Y) self._sqerror = rr.squared_error(X, Y)
def __init__(self, X, Y): self.X = rr.astransform(X) n, p = self.X.output_shape[0], self.X.input_shape[0] self.Y = Y if n > p: self._quadratic_term = np.dot(X.T, X) self._linear_term = -2 * np.dot(X.T, Y) self._constant_term = (Y**2).sum() self._sqerror = rr.squared_error(X, Y)
def test_weighted_l1_bound_loose(): n, p = 100, 10 X = np.random.standard_normal((n, p)) Y = np.random.standard_normal(n) beta = np.linalg.pinv(X).dot(Y) bound = 2 * np.fabs(beta).sum() atom = rr.weighted_l1norm(np.ones(p), bound=bound) loss = rr.squared_error(X, Y) problem = rr.simple_problem(loss, atom) soln = problem.solve(tol=1.e-12, min_its=100) npt.assert_allclose(soln, beta)
def test_path_group_lasso(): ''' this test looks at the paths of three different parameterizations of the same problem ''' n = 100 X = np.random.standard_normal((n, 10)) U = np.random.standard_normal((n, 2)) Y = np.random.standard_normal(100) betaX = np.array([3, 4, 5, 0, 0] + [0] * 5) betaU = np.array([10, -5]) Y += (np.dot(X, betaX) + np.dot(U, betaU)) * 5 Xn = rr.normalize(np.hstack([np.ones((100, 1)), X]), inplace=True, center=True, scale=True, intercept_column=0).normalized_array() lasso = rr.lasso.squared_error(Xn[:, 1:], Y, penalty_structure=[0] * 7 + [1] * 3, nstep=10) sol = lasso.main(inner_tol=1.e-12, verbose=True) beta = np.array(sol['beta'].todense()) sols = [] sols_sep = [] for l in sol['lagrange']: loss = rr.squared_error(Xn, Y, coef=1. / n) penalty = rr.group_lasso([rr.UNPENALIZED] + [0] * 7 + [1] * 3, l) # matrix contains an intercept... problem = rr.simple_problem(loss, penalty) sols.append(problem.solve(tol=1.e-12).copy()) sep = rr.separable((11, ), [ rr.l2norm((7, ), np.sqrt(7) * l), rr.l2norm((3, ), np.sqrt(3) * l) ], [np.arange(1, 8), np.arange(8, 11)]) sep_problem = rr.simple_problem(loss, sep) sols_sep.append(sep_problem.solve(tol=1.e-12).copy()) sols = np.array(sols).T sols_sep = np.array(sols_sep).T nt.assert_true( np.linalg.norm(beta - sols) / (1 + np.linalg.norm(beta)) <= 1.e-4) nt.assert_true( np.linalg.norm(beta - sols_sep) / (1 + np.linalg.norm(beta)) <= 1.e-4)
def __init__(self, X, Y, quadratic=None, initial=None, offset=None): rr.smooth_atom.__init__(self, rr.astransform(X).input_shape, coef=1., offset=offset, quadratic=quadratic, initial=initial) self.X = X self.Y = Y self.data = (X, Y) self._sqerror = rr.squared_error(X, Y)
def __init__(self, X, Y, quadratic=None, initial=None, offset=None): rr.smooth_atom.__init__(self, X.input_shape, coef=1., offset=offset, quadratic=quadratic, initial=initial) self.X = X self.Y = Y self._sqerror = rr.squared_error(X, Y)
def test_nesta_nonnegative(): n, p, q = 1000, 20, 5 X = np.random.standard_normal((n, p)) beta = np.zeros(p) beta[:4] = 3 Y = np.random.standard_normal(n) + np.dot(X, beta) A = np.random.standard_normal((q,p)) loss = rr.squared_error(X,Y) penalty = rr.l1norm(p, lagrange=0.2) constraint = rr.nonnegative.linear(A) primal, dual = rr.nesta(loss, penalty, constraint) assert_almost_nonnegative(np.dot(A,primal))
def test_class(): """ runs several class methods on generic instance """ n, p = 100, 20 X = np.random.standard_normal((n, p)) Y = np.random.standard_normal(n) loss = rr.squared_error(X, Y) pen = rr.l1norm(p, lagrange=1.0) problem = rr.simple_problem(loss, pen) problem.latexify() for debug, coef_stop, max_its in product([True, False], [True, False], [5, 100]): rr.gengrad(problem, rr.power_L(X) ** 2, max_its=max_its, debug=debug, coef_stop=coef_stop)
def test_admm(n=100, p=10): X = np.random.standard_normal((n, p)) Y = np.random.standard_normal(n) loss = rr.squared_error(X, Y) D = np.identity(p) pen = rr.l1norm(p, lagrange=1.5) ADMM = admm_problem(loss, pen, ra.astransform(D), 0.5) ADMM.solve(niter=1000) coef1 = ADMM.atom_coefs problem2 = rr.simple_problem(loss, pen) coef2 = problem2.solve(tol=1.e-12, min_its=500) np.testing.assert_allclose(coef1, coef2, rtol=1.e-3, atol=1.e-4)
def test_nesta_nonnegative(): n, p, q = 1000, 20, 5 X = np.random.standard_normal((n, p)) beta = np.zeros(p) beta[:4] = 3 Y = np.random.standard_normal(n) + np.dot(X, beta) A = np.random.standard_normal((q, p)) loss = rr.squared_error(X, Y) penalty = rr.l1norm(p, lagrange=0.2) constraint = rr.nonnegative.linear(A) primal, dual = rr.nesta(loss, penalty, constraint) assert_almost_nonnegative(np.dot(A, primal))
def test_solve_QP(): """ Check the R coordinate descent LASSO solver """ n, p = 100, 200 lam = 10 np.random.seed(0) X = np.random.standard_normal((n, p)) Y = np.random.standard_normal(n) loss = rr.squared_error(X, Y) pen = rr.l1norm(p, lagrange=lam) problem = rr.simple_problem(loss, pen) soln = problem.solve(min_its=500, tol=1.e-12) import rpy2.robjects.numpy2ri rpy2.robjects.numpy2ri.activate() tol = 1.e-5 rpy.r.assign('X', X) rpy.r.assign('Y', Y) rpy.r.assign('lam', lam) R_code = """ library(selectiveInference) p = ncol(X) soln_R = rep(0, p) grad = -t(X) %*% Y ever_active = c(1, rep(0, p-1)) nactive = as.integer(1) kkt_tol = 1.e-12 objective_tol = 1.e-12 maxiter = 500 soln_R = selectiveInference:::solve_QP(t(X) %*% X, lam, maxiter, soln_R, -t(X) %*% Y, grad, ever_active, nactive, kkt_tol, objective_tol, p)$soln """ rpy.r(R_code) soln_R = np.asarray(rpy.r('soln_R')) rpy2.robjects.numpy2ri.deactivate() yield np.testing.assert_allclose, soln, soln_R, tol, tol, False, 'checking coordinate QP solver'
def test_path_group_lasso(): """ this test looks at the paths of three different parameterizations of the same problem """ n = 100 X = np.random.standard_normal((n, 10)) U = np.random.standard_normal((n, 2)) Y = np.random.standard_normal(100) betaX = np.array([3, 4, 5, 0, 0] + [0] * 5) betaU = np.array([10, -5]) Y += (np.dot(X, betaX) + np.dot(U, betaU)) * 5 Xn = rr.normalize( np.hstack([np.ones((100, 1)), X]), inplace=True, center=True, scale=True, intercept_column=0 ).normalized_array() lasso = rr.lasso.squared_error(Xn[:, 1:], Y, penalty_structure=[0] * 7 + [1] * 3, nstep=10) sol = lasso.main(inner_tol=1.0e-12, verbose=True) beta = np.array(sol["beta"].todense()) sols = [] sols_sep = [] for l in sol["lagrange"]: loss = rr.squared_error(Xn, Y, coef=1.0 / n) penalty = rr.mixed_lasso([rr.UNPENALIZED] + [0] * 7 + [1] * 3, lagrange=l) # matrix contains an intercept... problem = rr.simple_problem(loss, penalty) sols.append(problem.solve(tol=1.0e-12).copy()) sep = rr.separable( (11,), [rr.l2norm((7,), np.sqrt(7) * l), rr.l2norm((3,), np.sqrt(3) * l)], [np.arange(1, 8), np.arange(8, 11)], ) sep_problem = rr.simple_problem(loss, sep) sols_sep.append(sep_problem.solve(tol=1.0e-12).copy()) sols = np.array(sols).T sols_sep = np.array(sols_sep).T nt.assert_true(np.linalg.norm(beta - sols) / (1 + np.linalg.norm(beta)) <= 1.0e-4) nt.assert_true(np.linalg.norm(beta - sols_sep) / (1 + np.linalg.norm(beta)) <= 1.0e-4)
def test_nesta_nonnegative(): n, p, q = 1000, 20, 5 X = np.random.standard_normal((n, p)) A = np.random.standard_normal((q,p)) coef = 10 * np.fabs(np.random.standard_normal(q)) + 1 coef[:2] = -0.2 beta = np.dot(np.linalg.pinv(A), coef) Y = np.random.standard_normal(n) + np.dot(X, beta) loss = rr.squared_error(X,Y) penalty = rr.l1norm(p, lagrange=0.2) constraint = rr.nonnegative.linear(A) primal, dual = rr.nesta(loss, penalty, constraint, max_iters=300, coef_tol=1.e-4, tol=1.e-4) print np.dot(A, primal) assert_almost_nonnegative(np.dot(A,primal), tol=1.e-3)
def test_choose_parameter(delta=2, p=60): signal = np.zeros(p) signal[(p//2):] += delta Z = np.random.standard_normal(p) + signal p = Z.shape[0] M = multiscale(p) M.scaling = np.sqrt(M.sizes) lam = choose_tuning_parameter(M) weights = (lam + np.sqrt(2 * np.log(p / M.sizes))) / np.sqrt(p) Z0 = Z - Z.mean() loss = rr.squared_error(ra.adjoint(M), Z0) penalty = rr.weighted_l1norm(weights, lagrange=1.) problem = rr.simple_problem(loss, penalty) coef = problem.solve() active = coef != 0 if active.sum(): X = M.form_matrix(M.slices[active])[0]
def __init__(self, X, Y, l_theory, l_min, l_1se, sigma_reid): parametric_method.__init__(self, X, Y, l_theory, l_min, l_1se, sigma_reid) self.lagrange = l_1se * np.ones(X.shape[1]) n, p = self.X.shape n1 = int(self.selection_frac * n) X1, X2 = self.X1, self.X2 = self.X[:n1], self.X[n1:] Y1, Y2 = self.Y1, self.Y2 = self.Y[:n1], self.Y[n1:] pen = rr.weighted_l1norm(np.sqrt(n1) * self.lagrange, lagrange=1.) loss = rr.squared_error(X1, Y1) problem = rr.simple_problem(loss, pen) soln = problem.solve() self.active_set = np.nonzero(soln)[0] self.signs = np.sign(soln)[self.active_set] self._fit = True
def test_class(): ''' runs several class methods on generic instance ''' n, p = 100, 20 X = np.random.standard_normal((n, p)) Y = np.random.standard_normal(n) loss = rr.squared_error(X, Y) pen = rr.l1norm(p, lagrange=1.) problem = rr.simple_problem(loss, pen) problem.latexify() for debug, coef_stop, max_its in product([True, False], [True, False], [5, 100]): rr.gengrad(problem, rr.power_L(X)**2, max_its=max_its, debug=debug, coef_stop=coef_stop)
def test_changepoint(): p = 150 M = multiscale(p) M.minsize = 10 X = ra.adjoint(M) Y = np.random.standard_normal(p) Y[20:50] += 8 Y += 2 meanY = Y.mean() lammax = np.fabs(X.adjoint_map(Y)).max() penalty = rr.l1norm(X.input_shape, lagrange=0.5*lammax) loss = rr.squared_error(X, Y - meanY) problem = rr.simple_problem(loss, penalty) soln = problem.solve() Yhat = X.linear_map(soln) Yhat += meanY plt.scatter(np.arange(p), Y) plt.plot(np.arange(p), Yhat)
def test_changepoint(): p = 150 M = multiscale(p) M.minsize = 10 X = ra.adjoint(M) Y = np.random.standard_normal(p) Y[20:50] += 8 Y += 2 meanY = Y.mean() lammax = np.fabs(X.adjoint_map(Y)).max() penalty = rr.l1norm(X.input_shape, lagrange=0.5*lammax) loss = rr.squared_error(X, Y - meanY) problem = rr.simple_problem(loss, penalty) soln = problem.solve() Yhat = X.linear_map(soln) Yhat += meanY plt.scatter(np.arange(p), Y) plt.plot(np.arange(p), Yhat) plt.show()
def highdim_model_inference(X, y, truth, selection_algorithm, sampler, lam_min, dispersion, success_params=(1, 1), fit_probability=keras_fit, fit_args={ 'epochs': 10, 'sizes': [100] * 5, 'dropout': 0., 'activation': 'relu' }, alpha=0.1, B=2000, naive=True, learner_klass=mixture_learner, how_many=None): n, p = X.shape XTX = X.T.dot(X) instance_hash = hashlib.md5() instance_hash.update(X.tobytes()) instance_hash.update(y.tobytes()) instance_hash.update(truth.tobytes()) instance_id = instance_hash.hexdigest() # run selection algorithm observed_set = repeat_selection(selection_algorithm, sampler, *success_params) observed_list = sorted(observed_set) # observed debiased LASSO estimate loss = rr.squared_error(X, y) pen = rr.l1norm(p, lagrange=lam_min) problem = rr.simple_problem(loss, pen) soln = problem.solve() grad = X.T.dot(X.dot(soln) - y) # gradient at beta_hat M = pseudoinverse_debiasing_matrix(X, observed_list) observed_target = soln[observed_list] - M.dot(grad) tmp = X.dot(M.T) target_cov = tmp.T.dot(tmp) * dispersion cross_cov = np.identity(p)[:, observed_list] * dispersion if len(observed_list) > 0: if how_many is None: how_many = len(observed_list) observed_list = observed_list[:how_many] # find the target, based on the observed outcome (pivots, covered, lengths, pvalues, lower, upper) = [], [], [], [], [], [] targets = [] true_target = truth[observed_list] results = infer_set_target(selection_algorithm, observed_set, observed_list, sampler, observed_target, target_cov, cross_cov, hypothesis=true_target, fit_probability=fit_probability, fit_args=fit_args, success_params=success_params, alpha=alpha, B=B, learner_klass=learner_klass) for i, result in enumerate(results): (pivot, interval, pvalue, _) = result pvalues.append(pvalue) pivots.append(pivot) covered.append((interval[0] < true_target[i]) * (interval[1] > true_target[i])) lengths.append(interval[1] - interval[0]) lower.append(interval[0]) upper.append(interval[1]) if len(pvalues) > 0: df = pd.DataFrame({ 'pivot': pivots, 'pvalue': pvalues, 'coverage': covered, 'length': lengths, 'upper': upper, 'lower': lower, 'id': [instance_id] * len(pvalues), 'target': true_target, 'variable': observed_list, 'B': [B] * len(pvalues) }) if naive: (naive_pvalues, naive_pivots, naive_covered, naive_lengths, naive_upper, naive_lower) = [], [], [], [], [], [] for j, idx in enumerate(observed_list): true_target = truth[idx] target_sd = np.sqrt(target_cov[j, j]) observed_target_j = observed_target[j] quantile = normal_dbn.ppf(1 - 0.5 * alpha) naive_interval = (observed_target_j - quantile * target_sd, observed_target_j + quantile * target_sd) naive_upper.append(naive_interval[1]) naive_lower.append(naive_interval[0]) naive_pivot = (1 - normal_dbn.cdf( (observed_target_j - true_target) / target_sd)) naive_pivot = 2 * min(naive_pivot, 1 - naive_pivot) naive_pivots.append(naive_pivot) naive_pvalue = ( 1 - normal_dbn.cdf(observed_target_j / target_sd)) naive_pvalue = 2 * min(naive_pvalue, 1 - naive_pvalue) naive_pvalues.append(naive_pvalue) naive_covered.append((naive_interval[0] < true_target) * (naive_interval[1] > true_target)) naive_lengths.append(naive_interval[1] - naive_interval[0]) naive_df = pd.DataFrame({ 'naive_pivot': naive_pivots, 'naive_pvalue': naive_pvalues, 'naive_coverage': naive_covered, 'naive_length': naive_lengths, 'naive_upper': naive_upper, 'naive_lower': naive_lower, 'variable': observed_list, }) df = pd.merge(df, naive_df, on='variable') return df
def test_scaling_and_centering_intercept_fit(debug=False): # N - number of data points # P - number of columns in design == number of betas N, P = 40, 30 # an arbitrary positive offset for data and design offset = 2 # design - with ones as last column X = np.random.normal(size=(N, P)) + 0 * offset X2 = X - X.mean(0)[None, :] X2 = X2 / np.std(X2, 0, ddof=1)[None, :] X2 = np.hstack([np.ones((X2.shape[0], 1)), X2]) L = rr.normalize(X, center=True, scale=True, intercept=True) # data Y = np.random.normal(size=(N, )) + offset # lagrange for penalty lagrange = .1 # Loss function (squared difference between fitted and actual data) loss = rr.squared_error(L, Y) penalties = [ rr.constrained_positive_part(25, lagrange=lagrange), rr.nonnegative(5) ] groups = [slice(0, 25), slice(25, 30)] penalty = rr.separable((P + 1, ), penalties, groups) initial = np.random.standard_normal(P + 1) composite_form = rr.separable_problem.fromatom(penalty, loss) solver = rr.FISTA(composite_form) solver.debug = debug solver.fit(tol=1.0e-12, min_its=200) coefs = solver.composite.coefs # Solve the problem with X2 loss2 = rr.squared_error(X2, Y) initial2 = np.random.standard_normal(P + 1) composite_form2 = rr.separable_problem.fromatom(penalty, loss2) solver2 = rr.FISTA(composite_form2) solver2.debug = debug solver2.fit(tol=1.0e-12, min_its=200) coefs2 = solver2.composite.coefs for _ in range(10): beta = np.random.standard_normal(P + 1) g1 = loss.smooth_objective(beta, mode='grad') g2 = loss2.smooth_objective(beta, mode='grad') np.testing.assert_almost_equal(g1, g2) b1 = penalty.proximal(sq(1, beta, g1, 0)) b2 = penalty.proximal(sq(1, beta, g2, 0)) np.testing.assert_almost_equal(b1, b2) f1 = composite_form.objective(beta) f2 = composite_form2.objective(beta) np.testing.assert_almost_equal(f1, f2) np.testing.assert_almost_equal(composite_form.objective(coefs), composite_form.objective(coefs2)) np.testing.assert_almost_equal(composite_form2.objective(coefs), composite_form2.objective(coefs2)) nt.assert_true( np.linalg.norm(coefs - coefs2) / max(np.linalg.norm(coefs), 1) < 1.0e-04)
def test_solve_QP(): """ Check the R coordinate descent LASSO solver """ n, p = 100, 50 lam = 0.08 X = np.random.standard_normal((n, p)) loss = rr.squared_error(X, np.zeros(n), coef=1. / n) pen = rr.l1norm(p, lagrange=lam) E = np.zeros(p) E[2] = 1 Q = rr.identity_quadratic(0, 0, E, 0) problem = rr.simple_problem(loss, pen) soln = problem.solve(Q, min_its=500, tol=1.e-12) numpy2ri.activate() rpy.r.assign('X', X) rpy.r.assign('E', E) rpy.r.assign('lam', lam) R_code = """ library(selectiveInference) p = ncol(X) n = nrow(X) soln_R = rep(0, p) grad = 1. * E ever_active = as.integer(c(1, rep(0, p-1))) nactive = as.integer(1) kkt_tol = 1.e-12 objective_tol = 1.e-16 parameter_tol = 1.e-10 maxiter = 500 soln_R = selectiveInference:::solve_QP(t(X) %*% X / n, lam, maxiter, soln_R, E, grad, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, p, TRUE, TRUE, TRUE)$soln # test wide solver Xtheta = rep(0, n) nactive = as.integer(1) ever_active = as.integer(c(1, rep(0, p-1))) soln_R_wide = rep(0, p) grad = 1. * E soln_R_wide = selectiveInference:::solve_QP_wide(X, rep(lam, p), 0, maxiter, soln_R_wide, E, grad, Xtheta, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, p, TRUE, TRUE, TRUE)$soln """ rpy.r(R_code) soln_R = np.asarray(rpy.r('soln_R')) soln_R_wide = np.asarray(rpy.r('soln_R_wide')) numpy2ri.deactivate() tol = 1.e-5 print(soln - soln_R) print(soln_R - soln_R_wide) G = X.T.dot(X).dot(soln) / n + E yield np.testing.assert_allclose, soln, soln_R, tol, tol, False, 'checking coordinate QP solver' yield np.testing.assert_allclose, soln, soln_R_wide, tol, tol, False, 'checking wide coordinate QP solver' yield np.testing.assert_allclose, G[soln != 0], -np.sign( soln[soln != 0] ) * lam, tol, tol, False, 'checking active coordinate KKT for QP solver' yield nt.assert_true, np.fabs( G).max() < lam * (1. + 1.e-6), 'testing linfinity norm'
def form_loss(self, active_set): return squared_error(self.X[:,active_set], self.y)
def __init__(self, X, Y): self.X = X self.Y = Y self._sqerror = rr.squared_error(X, Y)
def test_solve_QP_lasso(): """ Check the R coordinate descent LASSO solver """ n, p = 100, 200 lam = 0.1 X = np.random.standard_normal((n, p)) Y = np.random.standard_normal(n) loss = rr.squared_error(X, Y, coef=1. / n) pen = rr.l1norm(p, lagrange=lam) problem = rr.simple_problem(loss, pen) soln = problem.solve(min_its=500, tol=1.e-12) numpy2ri.activate() rpy.r.assign('X', X) rpy.r.assign('Y', Y) rpy.r.assign('lam', lam) R_code = """ library(selectiveInference) p = ncol(X) n = nrow(X) soln_R = rep(0, p) grad = -t(X) %*% Y / n ever_active = as.integer(c(1, rep(0, p-1))) nactive = as.integer(1) kkt_tol = 1.e-12 objective_tol = 1.e-16 parameter_tol = 1.e-10 maxiter = 500 soln_R = selectiveInference:::solve_QP(t(X) %*% X / n, lam, maxiter, soln_R, 1. * grad, grad, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, p, TRUE, TRUE, TRUE)$soln # test wide solver Xtheta = rep(0, n) nactive = as.integer(1) ever_active = as.integer(c(1, rep(0, p-1))) soln_R_wide = rep(0, p) grad = - t(X) %*% Y / n soln_R_wide = selectiveInference:::solve_QP_wide(X, rep(lam, p), 0, maxiter, soln_R_wide, 1. * grad, grad, Xtheta, ever_active, nactive, kkt_tol, objective_tol, parameter_tol, p, TRUE, TRUE, TRUE)$soln """ rpy.r(R_code) soln_R = np.asarray(rpy.r('soln_R')) soln_R_wide = np.asarray(rpy.r('soln_R_wide')) numpy2ri.deactivate() tol = 1.e-5 print(soln - soln_R) print(soln_R - soln_R_wide) yield np.testing.assert_allclose, soln, soln_R, tol, tol, False, 'checking coordinate QP solver for LASSO problem' yield np.testing.assert_allclose, soln, soln_R_wide, tol, tol, False, 'checking wide coordinate QP solver for LASSO problem'
def __init__(self, X, Y): self.X = X self.Y = Y self._sqerror = rr.squared_error(X, Y)
def form_loss(self, active_set): return squared_error(self.X[:, active_set], self.y)