def test_SmallLogReg(): blitzl1.set_use_intercept(False) blitzl1.set_tolerance(0.0) blitzl1.set_verbose(False) A = np.arange(20).reshape(5, 4) b = np.array([1, -1, -1, 1, 1]) A = sparse.csc_matrix(A) prob = blitzl1.LogRegProblem(A, b) sol = prob.solve(2) if not approx_equal(sol.objective_value, 3.312655451335882): print "test SmallLogReg obj failed" if not approx_equal(sol.x[0], 0.0520996109147): print "test SmallLogReg x[0] failed" python_obj = sol.evaluate_loss(A, b) + 2 * np.linalg.norm(sol.x, ord=1) if not approx_equal(sol.objective_value, python_obj): print "test SmallLogReg python_obj failed" blitzl1.set_use_intercept(True) blitzl1.set_tolerance(0.0001) sol = prob.solve(1.5) blitzl1.set_tolerance(0.01) sol2 = prob.solve(1.5, initial_x=sol.x, initial_intercept=sol.intercept) if sol2._num_iterations != 1: print "test SmallLogReg initial conditions failed"
def test_SimpleLasso(): blitzl1.set_use_intercept(False) blitzl1.set_tolerance(0.0) blitzl1.set_verbose(False) A = np.eye(4) A[3,3] = 2.0 A[2,2] = 2.0 b = np.array([5., -2., 2., -6.]) A = sparse.csc_matrix(A) prob = blitzl1.LassoProblem(A, b) sol = prob.solve(1) if not approx_equal(sol.x[0], 4.0) or not approx_equal(sol.x[3], -2.75): print "test SimpleLasso basic failed" blitzl1.set_use_intercept(True) sol = prob.solve(1) if not approx_equal(sol.intercept, -0.25): print "test SimpleLasso intercept failed" if not approx_equal(sol.objective_value, 9.75): print "test SimpleLasso obj failed" python_obj = sol.evaluate_loss(A, b) + np.linalg.norm(sol.x, ord=1) if not approx_equal(sol.objective_value, python_obj): print "test SimpleLasso python_obj failed"
def test_SimpleLasso(): blitzl1.set_use_intercept(False) blitzl1.set_tolerance(0.0) blitzl1.set_verbose(False) A = np.eye(4) A[3, 3] = 2.0 A[2, 2] = 2.0 b = np.array([5., -2., 2., -6.]) A = sparse.csc_matrix(A) prob = blitzl1.LassoProblem(A, b) sol = prob.solve(1) if not approx_equal(sol.x[0], 4.0) or not approx_equal(sol.x[3], -2.75): print "test SimpleLasso basic failed" blitzl1.set_use_intercept(True) sol = prob.solve(1) if not approx_equal(sol.intercept, -0.25): print "test SimpleLasso intercept failed" if not approx_equal(sol.objective_value, 9.75): print "test SimpleLasso obj failed" python_obj = sol.evaluate_loss(A, b) + np.linalg.norm(sol.x, ord=1) if not approx_equal(sol.objective_value, python_obj): print "test SimpleLasso python_obj failed"
def set_objective(self, X, y, lmbd): self.X, self.y, self.lmbd = X, y, lmbd # n_samples = self.X.shape[0] # self.lmbd /= n_samples blitzl1.set_use_intercept(False) self.problem = blitzl1.LogRegProblem(self.X, self.y)
def test_SmallLasso(): blitzl1.set_use_intercept(False) blitzl1.set_tolerance(0.0) blitzl1.set_verbose(False) A = np.arange(20).reshape(5, 4) b = np.arange(5) A = sparse.csc_matrix(A) prob = blitzl1.LassoProblem(A, b) sol = prob.solve(2) if not approx_equal(sol.objective_value, 0.4875): print "test SmallLasso obj failed" save_path = "/tmp/blitzl1_save_test" sol.save(save_path) sol2 = blitzl1.load_solution(save_path) if not np.all(sol.x == sol2.x): print "test SmallLasso save_x failed" if sol.objective_value != sol2.objective_value: print "test SmallLasso save_obj failed" os.remove(save_path) blitzl1.set_tolerance(0.1) log_path = "/tmp/blitzl1_log_test/" sol = prob.solve(5.0, log_directory=log_path) log_point = 0 while True: time_file = "%s/time.%d" % (log_path, log_point) obj_file = "%s/obj.%d" % (log_path, log_point) try: time = float(open(time_file).read()) obj = float(open(obj_file).read()) except: break log_point += 1 if not approx_equal(obj, sol.objective_value): print "test SmallLasso log_obj failed" if time <= 0.0: print "test SmallLasso log_time failed" os.system("rm -r %s" % log_path)
def test_SolverOptions(): blitzl1.set_tolerance(0.027) if blitzl1.get_tolerance() != 0.027: print "test SolverOptions tolerance failed" blitzl1.set_max_time(557.0) if blitzl1.get_max_time() != 557.0: print "test SolverOptions max_time failed" blitzl1.set_use_intercept(True) if blitzl1.get_use_intercept() != True: print "test SolverOptions use_intercept (True) failed" blitzl1.set_use_intercept(False) if blitzl1.get_use_intercept() != False: print "test SolverOptions use_intercept (False) failed" blitzl1.set_verbose(True) if blitzl1.get_verbose() != True: print "test SolverOptions verbose (True) failed" blitzl1.set_verbose(False) if blitzl1.get_verbose() != False: print "test SolverOptions verbose (False) failed"
def linear_cv(dataset_name, tol=1e-3, compute_jac=True, model_name="lasso"): X, y = load_libsvm(dataset_name) X = csc_matrix(X) n_samples, n_features = X.shape p_alpha = p_alphas[dataset_name, model_name] max_iter = max_iters[dataset_name] if model_name == "lasso": model = Lasso(X, y, 0, max_iter=max_iter, tol=tol) elif model_name == "logreg": model = SparseLogreg(X, y, 0, max_iter=max_iter, tol=tol) alpha_max = np.exp(model.compute_alpha_max()) alpha = p_alpha * alpha_max if model_name == "lasso": clf = Lasso_cel(alpha=alpha, fit_intercept=False, warm_start=True, tol=tol * norm(y)**2 / 2, max_iter=10000) clf.fit(X, y) beta_star = clf.coef_ mask = beta_star != 0 dense = beta_star[mask] elif model_name == "logreg": # clf = LogisticRegression( # penalty='l1', C=(1 / (alpha * n_samples)), # fit_intercept=False, # warm_start=True, max_iter=10000, # tol=tol, verbose=True).fit(X, y) # clf = LogisticRegression( # penalty='l1', C=(1 / (alpha * n_samples)), # fit_intercept=False, # warm_start=True, max_iter=10000, # tol=tol, verbose=True, # solver='liblinear').fit(X, y) # beta_star = clf.coef_[0] blitzl1.set_use_intercept(False) blitzl1.set_tolerance(1e-32) blitzl1.set_verbose(True) # blitzl1.set_min_time(60) prob = blitzl1.LogRegProblem(X, y) # # lammax = prob.compute_lambda_max() clf = prob.solve(alpha * n_samples) beta_star = clf.x mask = beta_star != 0 mask = np.array(mask) dense = beta_star[mask] # if model == "lasso": v = -n_samples * alpha * np.sign(beta_star[mask]) mat_to_inv = model.get_hessian(mask, dense, np.log(alpha)) # mat_to_inv = X[:, mask].T @ X[:, mask] jac_temp = cg(mat_to_inv, v, tol=1e-10) jac_star = np.zeros(n_features) jac_star[mask] = jac_temp[0] # elif model == "logreg": # v = - n_samples * alpha * np.sign(beta_star[mask]) log_alpha = np.log(alpha) list_beta, list_jac = get_beta_jac_iterdiff(X, y, log_alpha, model, save_iterates=True, tol=tol, max_iter=max_iter, compute_jac=compute_jac) diff_beta = norm(list_beta - beta_star, axis=1) diff_jac = norm(list_jac - jac_star, axis=1) supp_star = beta_star != 0 n_iter = list_beta.shape[0] for i in np.arange(n_iter)[::-1]: supp = list_beta[i, :] != 0 if not np.all(supp == supp_star): supp_id = i + 1 break supp_id = 0 return dataset_name, p_alpha, diff_beta, diff_jac, n_iter, supp_id
def set_objective(self, X, y, lmbd): self.X, self.y, self.lmbd = X, y, lmbd blitzl1.set_use_intercept(False) self.problem = blitzl1.LassoProblem(self.X, self.y)
def set_objective(self, X, y, lmbd): self.X, self.y, self.lmbd = X, y, lmbd blitzl1.set_use_intercept(False) blitzl1.set_tolerance(0) self.problem = blitzl1.LogRegProblem(self.X, self.y)
indptr[start:(end + 1)] = tmp_indptr + tot names[start:end] = [(j, k) for k in range(j, p)] tot += Xint_sub.getnnz() start = end end += Xint_sub.shape[1] - 1 indices = np.hstack(indices) indices = indices.astype('int64') Xint = sp.csc_matrix((np.ones(tot, dtype=np.int8), indices, indptr), dtype=np.int8, shape=(n, p * (p + 1) / 2)) # Run blitz on SNPs data with interactions blitzl1.set_tolerance(args.tol) blitzl1.set_verbose(False) blitzl1.set_use_intercept(args.useBias) prob = blitzl1.LassoProblem(Xint, y) # Compute lambda_max t0_lammax = time.time() lammax = prob.compute_lambda_max() t1_lammax = time.time() csv_writer_preproc.writerow([t1_lammax - t0_lammax]) file_preproc.close() # Define the values of lambda for which the solution will be computed lam = [ lammax * pow(10, np.log10(args.lambdaMinRatio) * t / args.nlambda) for t in range(1, args.nlambda + 1) ]
tol=tol, verbose=True, strategy=3, min_ws_size=min_ws_size, screening=0) dur_a5g = time.time() - t0 print("A5G time %.4f" % (dur_a5g)) beta = np.array(a5g_res[0]) gaps = a5g_res[2] times = a5g_res[3] print(beta[beta != 0]) from a5g.utils import primal, dual R = y - X_new.dot(beta) dual_scale = max(alpha, np.max(np.abs(X_new.T.dot(R)))) p_obj = primal(R, beta, alpha) d_obj = dual(y, R / dual_scale, alpha, (y**2).sum()) print(p_obj - d_obj) assert (p_obj - d_obj) < tol t0 = time.time() prob = blitzl1.LassoProblem(X_new, y) blitzl1.set_use_intercept(True) blitzl1.set_tolerance(tol) sol = prob.solve(alpha) print("Blitz time %.3f s" % (time.time() - t0)) beta_blitz = sol.x[sol.x != 0] R = y - X_new.dot(sol.x) p_obj_blitz = 0.5 * (R**2).sum() + alpha * norm(sol.x, ord=1)
# parameters j_star = np.argmax(np.abs(X.T.dot(y))) alpha_max = np.linalg.norm(X.T.dot(y), ord=np.inf) n_alphas = 5 eps = 1e-3 alpha_ratio = eps**(1. / (n_alphas - 1)) # alphas = np.array([alpha_max * (alpha_ratio ** i) for i in range(0, n_alphas)]) max_iter = 5000 tol = 1e-8 scg = NO_SCREENING X = csc_matrix(X) tic = time.time() intercept, sp_beta, sp_gap, sp_n_iters, _ =\ sp_lasso_path(X, y.copy(), [alpha_max / 100.], eps=tol, max_iter=max_iter, screening=scg, j_star=j_star) print "our time = ", time.time() - tic from sklearn import linear_model clf = linear_model.Lasso(alpha=alpha_max / 100. / n_samples, fit_intercept=True) clf.fit(X, y) import blitzl1 blitzl1.set_use_intercept(1) prob = blitzl1.LassoProblem(X, y) sol = prob.solve(alpha_max / 100.) print "intercept = ", intercept, clf.intercept_, sol.intercept