def test_SmallLogReg(): blitzl1.set_use_intercept(False) blitzl1.set_tolerance(0.0) blitzl1.set_verbose(False) A = np.arange(20).reshape(5, 4) b = np.array([1, -1, -1, 1, 1]) A = sparse.csc_matrix(A) prob = blitzl1.LogRegProblem(A, b) sol = prob.solve(2) if not approx_equal(sol.objective_value, 3.312655451335882): print "test SmallLogReg obj failed" if not approx_equal(sol.x[0], 0.0520996109147): print "test SmallLogReg x[0] failed" python_obj = sol.evaluate_loss(A, b) + 2 * np.linalg.norm(sol.x, ord=1) if not approx_equal(sol.objective_value, python_obj): print "test SmallLogReg python_obj failed" blitzl1.set_use_intercept(True) blitzl1.set_tolerance(0.0001) sol = prob.solve(1.5) blitzl1.set_tolerance(0.01) sol2 = prob.solve(1.5, initial_x=sol.x, initial_intercept=sol.intercept) if sol2._num_iterations != 1: print "test SmallLogReg initial conditions failed"
def test_SimpleLasso(): blitzl1.set_use_intercept(False) blitzl1.set_tolerance(0.0) blitzl1.set_verbose(False) A = np.eye(4) A[3,3] = 2.0 A[2,2] = 2.0 b = np.array([5., -2., 2., -6.]) A = sparse.csc_matrix(A) prob = blitzl1.LassoProblem(A, b) sol = prob.solve(1) if not approx_equal(sol.x[0], 4.0) or not approx_equal(sol.x[3], -2.75): print "test SimpleLasso basic failed" blitzl1.set_use_intercept(True) sol = prob.solve(1) if not approx_equal(sol.intercept, -0.25): print "test SimpleLasso intercept failed" if not approx_equal(sol.objective_value, 9.75): print "test SimpleLasso obj failed" python_obj = sol.evaluate_loss(A, b) + np.linalg.norm(sol.x, ord=1) if not approx_equal(sol.objective_value, python_obj): print "test SimpleLasso python_obj failed"
def test_SimpleLasso(): blitzl1.set_use_intercept(False) blitzl1.set_tolerance(0.0) blitzl1.set_verbose(False) A = np.eye(4) A[3, 3] = 2.0 A[2, 2] = 2.0 b = np.array([5., -2., 2., -6.]) A = sparse.csc_matrix(A) prob = blitzl1.LassoProblem(A, b) sol = prob.solve(1) if not approx_equal(sol.x[0], 4.0) or not approx_equal(sol.x[3], -2.75): print "test SimpleLasso basic failed" blitzl1.set_use_intercept(True) sol = prob.solve(1) if not approx_equal(sol.intercept, -0.25): print "test SimpleLasso intercept failed" if not approx_equal(sol.objective_value, 9.75): print "test SimpleLasso obj failed" python_obj = sol.evaluate_loss(A, b) + np.linalg.norm(sol.x, ord=1) if not approx_equal(sol.objective_value, python_obj): print "test SimpleLasso python_obj failed"
def test_SmallLasso(): blitzl1.set_use_intercept(False) blitzl1.set_tolerance(0.0) blitzl1.set_verbose(False) A = np.arange(20).reshape(5, 4) b = np.arange(5) A = sparse.csc_matrix(A) prob = blitzl1.LassoProblem(A, b) sol = prob.solve(2) if not approx_equal(sol.objective_value, 0.4875): print "test SmallLasso obj failed" save_path = "/tmp/blitzl1_save_test" sol.save(save_path) sol2 = blitzl1.load_solution(save_path) if not np.all(sol.x == sol2.x): print "test SmallLasso save_x failed" if sol.objective_value != sol2.objective_value: print "test SmallLasso save_obj failed" os.remove(save_path) blitzl1.set_tolerance(0.1) log_path = "/tmp/blitzl1_log_test/" sol = prob.solve(5.0, log_directory=log_path) log_point = 0 while True: time_file = "%s/time.%d" % (log_path, log_point) obj_file = "%s/obj.%d" % (log_path, log_point) try: time = float(open(time_file).read()) obj = float(open(obj_file).read()) except: break log_point += 1 if not approx_equal(obj, sol.objective_value): print "test SmallLasso log_obj failed" if time <= 0.0: print "test SmallLasso log_time failed" os.system("rm -r %s" % log_path)
def test_StatusMessage(): blitzl1.set_tolerance(0.0) blitzl1.set_verbose(False) n = 10 d = 10 A = np.arange(n*d, dtype=np.float).reshape(n, d) b = np.arange(n, dtype=np.float) prob = blitzl1.LassoProblem(A, b) sol = prob.solve(2000.) if sol.status != "reached machine precision": print "test StatusMessage machine precision failed" blitzl1.set_tolerance(0.1) sol = prob.solve(2000.) if sol.status != "reached stopping tolerance": print "test StatusMessage stopping tolerance failed" blitzl1.set_tolerance(0.0) blitzl1.set_max_time(0.0) sol = prob.solve(2000.) if sol.status != "reached time limit": print "test StatusMessage time limit failed"
def test_SolverOptions(): blitzl1.set_tolerance(0.027) if blitzl1.get_tolerance() != 0.027: print "test SolverOptions tolerance failed" blitzl1.set_max_time(557.0) if blitzl1.get_max_time() != 557.0: print "test SolverOptions max_time failed" blitzl1.set_use_intercept(True) if blitzl1.get_use_intercept() != True: print "test SolverOptions use_intercept (True) failed" blitzl1.set_use_intercept(False) if blitzl1.get_use_intercept() != False: print "test SolverOptions use_intercept (False) failed" blitzl1.set_verbose(True) if blitzl1.get_verbose() != True: print "test SolverOptions verbose (True) failed" blitzl1.set_verbose(False) if blitzl1.get_verbose() != False: print "test SolverOptions verbose (False) failed"
def linear_cv(dataset_name, tol=1e-3, compute_jac=True, model_name="lasso"): X, y = load_libsvm(dataset_name) X = csc_matrix(X) n_samples, n_features = X.shape p_alpha = p_alphas[dataset_name, model_name] max_iter = max_iters[dataset_name] if model_name == "lasso": model = Lasso(X, y, 0, max_iter=max_iter, tol=tol) elif model_name == "logreg": model = SparseLogreg(X, y, 0, max_iter=max_iter, tol=tol) alpha_max = np.exp(model.compute_alpha_max()) alpha = p_alpha * alpha_max if model_name == "lasso": clf = Lasso_cel(alpha=alpha, fit_intercept=False, warm_start=True, tol=tol * norm(y)**2 / 2, max_iter=10000) clf.fit(X, y) beta_star = clf.coef_ mask = beta_star != 0 dense = beta_star[mask] elif model_name == "logreg": # clf = LogisticRegression( # penalty='l1', C=(1 / (alpha * n_samples)), # fit_intercept=False, # warm_start=True, max_iter=10000, # tol=tol, verbose=True).fit(X, y) # clf = LogisticRegression( # penalty='l1', C=(1 / (alpha * n_samples)), # fit_intercept=False, # warm_start=True, max_iter=10000, # tol=tol, verbose=True, # solver='liblinear').fit(X, y) # beta_star = clf.coef_[0] blitzl1.set_use_intercept(False) blitzl1.set_tolerance(1e-32) blitzl1.set_verbose(True) # blitzl1.set_min_time(60) prob = blitzl1.LogRegProblem(X, y) # # lammax = prob.compute_lambda_max() clf = prob.solve(alpha * n_samples) beta_star = clf.x mask = beta_star != 0 mask = np.array(mask) dense = beta_star[mask] # if model == "lasso": v = -n_samples * alpha * np.sign(beta_star[mask]) mat_to_inv = model.get_hessian(mask, dense, np.log(alpha)) # mat_to_inv = X[:, mask].T @ X[:, mask] jac_temp = cg(mat_to_inv, v, tol=1e-10) jac_star = np.zeros(n_features) jac_star[mask] = jac_temp[0] # elif model == "logreg": # v = - n_samples * alpha * np.sign(beta_star[mask]) log_alpha = np.log(alpha) list_beta, list_jac = get_beta_jac_iterdiff(X, y, log_alpha, model, save_iterates=True, tol=tol, max_iter=max_iter, compute_jac=compute_jac) diff_beta = norm(list_beta - beta_star, axis=1) diff_jac = norm(list_jac - jac_star, axis=1) supp_star = beta_star != 0 n_iter = list_beta.shape[0] for i in np.arange(n_iter)[::-1]: supp = list_beta[i, :] != 0 if not np.all(supp == supp_star): supp_id = i + 1 break supp_id = 0 return dataset_name, p_alpha, diff_beta, diff_jac, n_iter, supp_id
import blitzl1 import sys import os import numpy as np from scipy import sparse blitzl1.set_verbose(True) blitzl1.set_tolerance(0.0) n = 100 d = 1000 A = np.random.randn(n, d) A = sparse.csc_matrix(A) b = np.random.randn(n) prob = blitzl1.LassoProblem(A, b) lammax = prob.compute_lambda_max() print "lammax is", lammax sol = prob.solve(lammax * 0.1) from IPython import embed embed()
tmp_indptr = Xint_sub.indptr indptr[start:(end + 1)] = tmp_indptr + tot names[start:end] = [(j, k) for k in range(j, p)] tot += Xint_sub.getnnz() start = end end += Xint_sub.shape[1] - 1 indices = np.hstack(indices) indices = indices.astype('int64') Xint = sp.csc_matrix((np.ones(tot, dtype=np.int8), indices, indptr), dtype=np.int8, shape=(n, p * (p + 1) / 2)) # Run blitz on SNPs data with interactions blitzl1.set_tolerance(args.tol) blitzl1.set_verbose(False) blitzl1.set_use_intercept(args.useBias) prob = blitzl1.LassoProblem(Xint, y) # Compute lambda_max t0_lammax = time.time() lammax = prob.compute_lambda_max() t1_lammax = time.time() csv_writer_preproc.writerow([t1_lammax - t0_lammax]) file_preproc.close() # Define the values of lambda for which the solution will be computed lam = [ lammax * pow(10, np.log10(args.lambdaMinRatio) * t / args.nlambda) for t in range(1, args.nlambda + 1)