def test_SmallLogReg(): blitzl1.set_use_intercept(False) blitzl1.set_tolerance(0.0) blitzl1.set_verbose(False) A = np.arange(20).reshape(5, 4) b = np.array([1, -1, -1, 1, 1]) A = sparse.csc_matrix(A) prob = blitzl1.LogRegProblem(A, b) sol = prob.solve(2) if not approx_equal(sol.objective_value, 3.312655451335882): print "test SmallLogReg obj failed" if not approx_equal(sol.x[0], 0.0520996109147): print "test SmallLogReg x[0] failed" python_obj = sol.evaluate_loss(A, b) + 2 * np.linalg.norm(sol.x, ord=1) if not approx_equal(sol.objective_value, python_obj): print "test SmallLogReg python_obj failed" blitzl1.set_use_intercept(True) blitzl1.set_tolerance(0.0001) sol = prob.solve(1.5) blitzl1.set_tolerance(0.01) sol2 = prob.solve(1.5, initial_x=sol.x, initial_intercept=sol.intercept) if sol2._num_iterations != 1: print "test SmallLogReg initial conditions failed"
def set_objective(self, X, y, lmbd): self.X, self.y, self.lmbd = X, y, lmbd # n_samples = self.X.shape[0] # self.lmbd /= n_samples blitzl1.set_use_intercept(False) self.problem = blitzl1.LogRegProblem(self.X, self.y)
def sparseCoefRecovery(X, l=0.001): d, n = X.shape C = np.zeros((n, n)) for i in xrange(n): if i % 100 == 0: print "Processed for " + str(i) + "samples" A = np.delete(X, (i), axis=1) b = X[:, i] prob = blitzl1.LogRegProblem(A, b) lammax = prob.compute_lambda_max() sol = prob.solve(l * lammax) c_val = sol.x if i > 1: C[:i - 1, i] = c_val[:i - 1] if i < n: C[i + 1:n, i] = c_val[i:n] C[i, i] = 0 return C
def linear_cv(dataset_name, tol=1e-3, compute_jac=True, model_name="lasso"): X, y = load_libsvm(dataset_name) X = csc_matrix(X) n_samples, n_features = X.shape p_alpha = p_alphas[dataset_name, model_name] max_iter = max_iters[dataset_name] if model_name == "lasso": model = Lasso(X, y, 0, max_iter=max_iter, tol=tol) elif model_name == "logreg": model = SparseLogreg(X, y, 0, max_iter=max_iter, tol=tol) alpha_max = np.exp(model.compute_alpha_max()) alpha = p_alpha * alpha_max if model_name == "lasso": clf = Lasso_cel(alpha=alpha, fit_intercept=False, warm_start=True, tol=tol * norm(y)**2 / 2, max_iter=10000) clf.fit(X, y) beta_star = clf.coef_ mask = beta_star != 0 dense = beta_star[mask] elif model_name == "logreg": # clf = LogisticRegression( # penalty='l1', C=(1 / (alpha * n_samples)), # fit_intercept=False, # warm_start=True, max_iter=10000, # tol=tol, verbose=True).fit(X, y) # clf = LogisticRegression( # penalty='l1', C=(1 / (alpha * n_samples)), # fit_intercept=False, # warm_start=True, max_iter=10000, # tol=tol, verbose=True, # solver='liblinear').fit(X, y) # beta_star = clf.coef_[0] blitzl1.set_use_intercept(False) blitzl1.set_tolerance(1e-32) blitzl1.set_verbose(True) # blitzl1.set_min_time(60) prob = blitzl1.LogRegProblem(X, y) # # lammax = prob.compute_lambda_max() clf = prob.solve(alpha * n_samples) beta_star = clf.x mask = beta_star != 0 mask = np.array(mask) dense = beta_star[mask] # if model == "lasso": v = -n_samples * alpha * np.sign(beta_star[mask]) mat_to_inv = model.get_hessian(mask, dense, np.log(alpha)) # mat_to_inv = X[:, mask].T @ X[:, mask] jac_temp = cg(mat_to_inv, v, tol=1e-10) jac_star = np.zeros(n_features) jac_star[mask] = jac_temp[0] # elif model == "logreg": # v = - n_samples * alpha * np.sign(beta_star[mask]) log_alpha = np.log(alpha) list_beta, list_jac = get_beta_jac_iterdiff(X, y, log_alpha, model, save_iterates=True, tol=tol, max_iter=max_iter, compute_jac=compute_jac) diff_beta = norm(list_beta - beta_star, axis=1) diff_jac = norm(list_jac - jac_star, axis=1) supp_star = beta_star != 0 n_iter = list_beta.shape[0] for i in np.arange(n_iter)[::-1]: supp = list_beta[i, :] != 0 if not np.all(supp == supp_star): supp_id = i + 1 break supp_id = 0 return dataset_name, p_alpha, diff_beta, diff_jac, n_iter, supp_id
import sys import os from scipy import sparse import numpy as np from sklearn.datasets import load_svmlight_file blitzl1.set_verbose(True) def format_b(b): max_b = max(b) min_b = min(b) scale = 2.00 / (max_b - min_b) return scale * (b - max_b) + 1.0 (A, b) = load_svmlight_file(os.path.join(pwd, "../benchmark/data/news20")) A_csc = sparse.csc_matrix(A) b = format_b(b) from IPython import embed embed() prob = blitzl1.LogRegProblem(A_csc, b) lammax = prob.compute_lambda_max() sol = prob.solve(0.001 * lammax) from IPython import embed embed()
import blitzl1 import sys import os import numpy as np from scipy import sparse blitzl1.set_verbose(True) blitzl1.set_tolerance(0.0) n = 100 d = 1000 A = np.random.randn(n, d) A = sparse.csc_matrix(A) b = 2*np.random.rand(n) - 1 prob = blitzl1.LogRegProblem(A, b) lammax = prob.compute_lambda_max() print "lammax is", lammax sol = prob.solve(lammax * 0.1) from IPython import embed embed()
def set_objective(self, X, y, lmbd): self.X, self.y, self.lmbd = X, y, lmbd blitzl1.set_use_intercept(False) blitzl1.set_tolerance(0) self.problem = blitzl1.LogRegProblem(self.X, self.y)