def fit(self, X_train, Y_train, params, groups=None): print "FISTA Flat:", params self.n_samples = X_train.shape[0] self.n_features = X_train.shape[1] self.groups = groups self.params = params if params['loss'] == 'multi-logistic': W0 = np.zeros((self.n_features, len(np.unique(Y_train))), dtype=np.float, order="F") else: W0 = np.zeros((self.n_features, 1), dtype=np.float, order="F") if groups is not None: W, optim_info = spams.fistaFlat(Y_train, X_train, W0, True, groups=np.array(groups, dtype=np.int32), **params) else: W, optim_info = spams.fistaFlat(Y_train, X_train, W0, True, **params) self.W = W return optim_info
def spams_glasso(Y, Z, lmbda): ''' b = argmin_b (0.5)||Y - [Z^T (x) I]b||_2^2 + lmbda sum_g\in G ||b_g||_2 (Z^T (x) I) = vec(BZ) (kron product) this is needed to specify groups. ''' if spams_glasso.b0 is not None: b0 = spams_glasso.b0 else: B0 = fit_olst(Y, Z, lmbda) b0 = vec(B0) b0 = np.asfortranarray(b0.reshape((b0.shape[0], 1))) spams_glasso.b0 = b0 n = Y.shape[0] y = vec(Y) Z = bsr_matrix(np.kron(np.eye(n), Z.T)) #This will be huge... y_spm = np.asfortranarray(y.reshape((y.shape[0], 1))) Z_spm = np.asfortranarray(Z) b, results = fistaFlat(y_spm, Z_spm, b0, True, loss='square', regul='l1', lambda1=lmbda, verbose=False) spams_glasso.b0 = b B = np.array(b.reshape((n, n))) return B
def optimize(self, max_iters=None): p= self.X.shape[1] tasks_n = self.tasks.shape[1] X = np.asfortranarray(np.tile(self.X,(1,tasks_n))*np.repeat(self.tasks,p, axis=1)) groups = np.asfortranarray(np.tile(np.arange(1,p+1).reshape(p),(tasks_n)), 'int32') beta0=np.asfortranarray(self.beta.reshape(p*tasks_n,1)) self.beta= spams.fistaFlat(np.asfortranarray(self.Y), X, beta0, False, loss='square', regul='sparse-group-lasso-l2', lambda1=self.lambda_1, lambda2=self.lambda_2, groups=groups).reshape(tasks_n,p)
def run(self, n_iter): y = np.expand_dims(np.asfortranarray(self.y), axis=1) if (scipy.sparse.issparse(self.X)): W0 = np.zeros((self.X.shape[1], 1), dtype=y.dtype, order="F") self.w = fistaFlat(y, self.X, W0, **self.solver_parameter, regul='l1', it0=10000, loss='square', tol=1e-12, max_it=n_iter).flatten() else: self.w = lasso(y, D=self.X, L=n_iter, **self.solver_parameter).toarray().flatten()
def score_proposals(X, D, params): """ Scores a proposal segment using the reconstruction error from a pretrained dictionary. """ X = np.asfortranarray(X.T.copy()) D = np.asfortranarray(D.T.copy()) A_0 = np.zeros((D.shape[1], X.shape[1]), order='FORTRAN') A = spams.fistaFlat(X, D, A_0, **params) cost = (1.0/X.shape[1]) * ((X - np.dot(D, A))**2).sum() return cost
def score_proposals(X, D, params): """ Scores a proposal segment using the reconstruction error from a pretrained dictionary. """ X = np.asfortranarray(X.T.copy()) D = np.asfortranarray(D.T.copy()) A_0 = np.zeros((D.shape[1], X.shape[1]), order='FORTRAN') A = spams.fistaFlat(X, D, A_0, **params) cost = (1.0 / X.shape[1]) * ((X - np.dot(D, A))**2).sum() return cost
def learn_class_independent_model(X, D, tol=0.01, max_iter=250, verbose=True, params=None): """Class independent dictionary learning. Parameters ---------- X : ndarray 2D numpy array containing an stack of features with shape nxm where n is the number of samples and m the feature dimensionality. D : ndarray 2D numpy array containing an initial guess for the dictionary D. Its shape is dxm where d is the number of dictionary elements. tol : float, optional Global tolerance for optimization convergence. max_iter : int, optional Maximum number of iterations. verbose : bool, optional Enable verbosity. params : dict, optional Dictionary containing the optimization parameters (for Spams). """ if not params: params = {'loss': 'square', 'regul': 'l1l2', 'numThreads' : -1, 'verbose' : False, 'compute_gram': True, 'ista': True, 'linesearch_mode': 2, 'lambda1' : 0.05, 'tol' : 1e-1} X = np.asfortranarray(X.T.copy()) D = np.asfortranarray(D.T.copy()) A = np.zeros((D.shape[1], X.shape[1]), order='FORTRAN') prev_cost = 1e9 n_samples = X.shape[1] for i in range(1, max_iter + 1): # Solves coding step. A = spams.fistaFlat(np.sqrt(1.0) * X, np.sqrt(1.0) * D, A, **params) # Dictionary update as least square. D = np.dot(np.dot(np.linalg.inv(np.dot(A, A.T)), A), X.T).T # Compute cost. cost = (1.0/n_samples) * ((X - np.dot(D, A))**2).sum() + \ 2 * params['lambda1'] * (A**2).sum() # Check convergence conditions. if prev_cost - cost <= tol: break else: prev_cost = cost if verbose: #if not i % 10: print 'Iteration [{}] / Cost function [{}]'.format(i, cost) return D.T, A.T, cost
def compute_beta(Hmat, Tmat, C): # rows, cols = Hmat.shape # t=time.time() # if rows <= cols: # beta1 = np.dot(Hmat.T, solve(np.eye(rows) / C + np.dot(Hmat, Hmat.T), Tmat)) # else: # beta1 = solve(np.eye(cols) / C + np.dot(Hmat.T, Hmat), np.dot(Hmat.T, Tmat)) # print time.time()-t assert C > 0. and C < 1. rows, cols = Tmat.shape t = time.time() params = {'numThreads': -1, 'it0': 5, 'max_it': 50, 'L0': 0.1, 'tol': 1e-6, 'lambda1': rows * cols * 0.5 * 0.0001 * C, 'lambda2': rows * cols * 0.5 * 0.0001 * (1. - C), 'loss': 'square', 'regul': 'elastic-net'} W0 = np.zeros((Hmat.shape[1], Tmat.shape[1]), dtype=theano.config.floatX, order="F") Hmat = np.asfortranarray(Hmat, dtype=theano.config.floatX) Tmat = np.asfortranarray(Tmat, dtype=theano.config.floatX) beta = spams.fistaFlat(Tmat, Hmat, W0, **params) print time.time() - t return np.ascontiguousarray(beta)
def get_betas_spam2(self, xs, ys, groups, lambdas, n, q, itermax, tol): #n = xs.shape[0] p = len(np.unique(groups)) lambdas = np.asarray(lambdas, dtype=np.float64) yadd = np.expand_dims(ys, 1) groups = np.asarray(groups, dtype=np.int32) + 1 W0 = np.zeros((xs.shape[1], yadd.shape[1]), dtype=np.float32) Xsam = np.asfortranarray(xs, dtype=np.float32) Ysam = np.asfortranarray(yadd, dtype=np.float32) coeffs = np.zeros((len(lambdas), q, n, p)) for i in range(len(lambdas)): #alpha = spams.fistaFlat(Xsam,Dsam2,alpha0sam,ind_groupsam,lambda1 = lambdas[i],mode = mode,itermax = itermax,tol = tol,numThreads = numThreads, regul = "group-lasso-l2") #spams.fistaFlat(Y,X,W0,TRUE,numThreads = 1,verbose = TRUE,lambda1 = 0.05, it0 = 10, max_it = 200,L0 = 0.1, tol = 1e-3, intercept = FALSE,pos = FALSE,compute_gram = TRUE, loss = 'square',regul = 'l1') output = spams.fistaFlat(Ysam, Xsam, W0, True, groups=groups, numThreads=-1, verbose=True, lambda1=lambdas[i], it0=100, max_it=itermax, L0=0.5, tol=tol, intercept=False, pos=False, compute_gram=True, loss='square', regul='group-lasso-l2', ista=False, subgrad=False, a=0.1, b=1000) coeffs[i, :, :, :] = np.reshape(output[0], (q, n, p)) #print(output[1]) return (coeffs)
def spams_trace(Y, Z, lmbda): ''' B = argmin_B (0.5)||Y - BZ||_F^2 + lmbda||B||_* ''' if spams_trace.B0 is not None: B0 = spams_trace.B0 else: B0 = fit_olst(Y, Z, lmbda) B0 = np.asfortranarray(B0.T) spams_trace.B0 = B0 Y_spm = np.asfortranarray(Y.T) Z_spm = np.asfortranarray(Z.T) B, results = fistaFlat(Y_spm, Z_spm, B0, True, loss='square', regul='trace-norm', lambda1=lmbda, verbose=False, tol=SPAMS_TRACE_TOL) spams_trace.B0 = B return B.T
epoch_res["V"] = V spamsParams['loss'] = "square-missing" for r in range(R): # this is (N x U x T) Yr = (Y - Y_mean)[:, :, r] Vrstack = V[r] Yrstack = zeros((T * N, T)) + nan for t in range(T): Yrstack[t * N:(t + 1) * N, t:t + 1] = Yr[:, t:t + 1] Yspams = asfortranarray(Yrstack) Xspams = ssp.hstack( [Vrstack, ssp.csc_matrix(ones((N * T, 1)))], format="csc") ur0 = asfortranarray(zeros((Xspams.shape[1], Yspams.shape[1]))) ur = spams.fistaFlat(Yspams, Xspams, ur0, False, **spamsParams) u_hat[r, :, :] = ur[:U, :].T b_hat[:, r] = ur[U:U + 1, :] epoch_res_r = {} epoch_res_r['Xspams'] = dc(Xspams[:, :-1]) epoch_res_r['Yspams'] = dc(Yspams) epoch_res_r['ur0'] = dc(ur0[:-1, :]) epoch_res_r['params'] = dc(spamsParams) epoch_res_r['ur'] = dc(u_hat[r, :, :]) epoch_res_r['br'] = dc(b_hat[:, r]) epoch_res["Vr"] += [epoch_res_r] epoch_res["u_hat"] = dc(u_hat) epoch_res["b_hat"] = dc(b_hat) # Tracer()() # phase 2:
def get_x_y_estimated_beta(self): """ Reference: --------- http://spams-devel.gforge.inria.fr/doc-python/html/doc_spams006.html#toc23 """ shape = (4, 4, 1) num_samples = 10 coefficient = 0.05 num_ft = shape[0] * shape[1] * shape[2] X = np.random.random((num_samples, num_ft)) beta = np.random.random((num_ft, 1)) # y = dot(X, beta) + noise y = np.dot(X, beta) + np.random.random((num_samples, 1)) * 0.0001 try: import spams # Normalization for X X = np.asfortranarray(X) X = np.asfortranarray(X - np.tile( np.mean(X, 0), (X.shape[0], 1))) X = spams.normalize(X) # Normalization for y y = np.asfortranarray(y) y = np.asfortranarray(y - np.tile( np.mean(y, 0), (y.shape[0], 1))) y = spams.normalize(y) weight0 = np.zeros((X.shape[1], y.shape[1]), dtype=np.float64, order="FORTRAN") param = {'numThreads': 1, 'verbose': True, 'lambda1': coefficient, 'it0': 10, 'max_it': 200, 'L0': 0.1, 'tol': 1e-3, 'intercept': False, 'pos': False} param['compute_gram'] = True param['loss'] = 'square' param['regul'] = 'l2' (weight_ridge, optim_info) = spams.fistaFlat(y, X, weight0, True, **param) param['regul'] = 'l1' (weight_l1, optim_info) = spams.fistaFlat(y, X, weight0, True, **param) # print "X = ", repr(X) # print "y = ", repr(y) # print "weight_ridge =", repr(weight_ridge) # print "weight_l1 =", repr(weight_l1) except ImportError: # TODO: Don't use print directly. print "Cannot import spams. Default values will be used." X = np.asarray([ [ 0.26856766, 0.30620391, 0.26995615, 0.3806023 , 0.41311465, -0.24685479, 0.34108499, -0.22786788, -0.2267594 , 0.30325884, -0.00382229, 0.3503643 , 0.21786749, -0.15275043, -0.24074157, -0.25639825], [-0.14305316, -0.19553497, 0.45250255, -0.17317269, -0.00304901, 0.43838073, 0.01606735, 0.09267714, 0.47763275, 0.23234948, 0.38694597, 0.72591941, 0.21028899, 0.42317021, 0.276003 , 0.42198486], [-0.08738645, 0.10795947, 0.45813373, -0.34232048, 0.43621128, -0.36984753, 0.16555311, 0.55188325, -0.48169657, -0.52844883, 0.15140672, 0.06074575, -0.36873621, 0.23679974, 0.47195386, -0.09728514], [ 0.16461237, 0.30299873, -0.32108348, -0.53918274, 0.02287831, 0.01105383, -0.11124968, 0.18629018, 0.30017151, -0.04217922, -0.46066699, -0.33612491, -0.52611772, -0.25397362, -0.27198468, -0.42883518], [ 0.4710195 , 0.35047152, -0.07990029, 0.34911632, 0.07206932, -0.20270895, -0.0684226 , -0.18958745, -0.08433092, 0.14453963, 0.28095469, -0.35894296, 0.11680455, -0.37598039, -0.28331446, -0.00825299], [-0.420528 , -0.74469306, 0.22732681, 0.34362884, 0.16006124, -0.29691759, 0.27029047, -0.31077084, -0.048071 , 0.36495065, 0.49364453, -0.16903801, 0.07577839, -0.36492748, 0.09448284, -0.37055486], [ 0.4232946 , -0.26373387, -0.01430445, -0.2353587 , -0.5005603 , -0.35899458, 0.32702596, -0.38311949, 0.31862621, -0.31931012, -0.41836583, -0.02855145, -0.50315227, -0.34807958, -0.05252361, 0.11551424], [-0.28443208, 0.07677476, -0.23720305, 0.11056299, -0.48742565, 0.36772457, -0.56074202, 0.3145033 , -0.22811763, 0.36482173, -0.01786535, -0.02929555, 0.35635411, 0.45838473, 0.45853286, 0.00159594], [-0.45779277, 0.10020579, -0.30873257, 0.28114072, 0.18120182, 0.33333004, 0.17928387, 0.31572323, 0.32902088, -0.10396976, -0.33296829, 0.05277326, 0.27139148, 0.18653329, 0.06068255, -0.01942451], [ 0.06569833, -0.04065228, -0.44669538, -0.17501657, -0.29450165, 0.32483427, -0.55889145, -0.34973144, -0.35647584, -0.41601239, -0.07926316, -0.26784983, 0.14952119, 0.19082353, -0.51309079, 0.6416559 ]]) y = np.asarray([ [ 0.15809895], [ 0.69496971], [ 0.01214928], [-0.39826324], [-0.01682498], [-0.03372654], [-0.45148804], [ 0.21735376], [ 0.08795349], [-0.27022239]]) weight_ridge = np.asarray([ [ 0.038558 ], [ 0.12605106], [ 0.19115798], [ 0.07187217], [ 0.09472713], [ 0.14943554], [-0.01968095], [ 0.11695959], [ 0.15049031], [ 0.18930644], [ 0.26086626], [ 0.23243305], [ 0.17425178], [ 0.13200238], [ 0.11710994], [ 0.11272092]]) weight_l1 = np.asarray([ [ 0. ], [ 0.02664519], [ 0. ], [ 0. ], [ 0. ], [ 0.10357106], [ 0. ], [ 0.2103012 ], [ 0.00399881], [ 0.10815184], [ 0.32221254], [ 0.49350083], [ 0.21351531], [ 0. ], [ 0. ], [ 0. ]]) ret_data = {} ret_data['X'] = X ret_data['y'] = y ret_data['weight_ridge'] = weight_ridge ret_data['weight_l1'] = weight_l1 ret_data['coefficient'] = coefficient ret_data['shape'] = shape ret_data['num_samples'] = num_samples ret_data['num_ft'] = num_ft return ret_data
Y = BatchBivariateLearner._expandY(Y) logger.debug("Input created!") def cols_for_day(day): return slice(day * nusers, (day + 1) * nusers) logger.debug("Creating Vprime!") Vprime = BatchBivariateLearner._calculateVprime(X, U) logger.debug("Calculating W") W = spams.fistaFlat(Y, Vprime, W, False, loss="square", regul="l1", lambda1=0.01) W = ssp.csc_matrix(W) logger.debug("Creating DPrime") Dprime = BatchBivariateLearner._calculateDprime(X, W, U.shape) U = np.asfortranarray(zeros((nusers, ntasks))) logger.debug("Calculating U") U = spams.fistaFlat(Y, Dprime, U, False, loss="square", regul="l1", lambda1=0.01)
def learn_class_induced_model(X, D, Y, tol=0.01, max_iter=300, verbose=True, local_params=None, params=None): """Class induced dictionary learning. Parameters ---------- X : ndarray 2D numpy array containing an stack of features with shape nxm where n is the number of samples and m the feature dimensionality. D : ndarray 2D numpy array containing an initial guess for the dictionary D. Its shape is dxm where d is the number of dictionary elements. Y : ndarrat 2D numpy array containing a matrix that maps features and labels. Its shape is nxc where c is the number of classes. tol : float, optional Global tolerance for optimization convergence. max_iter : int, optional Maximum number of iterations. verbose : bool, optional Enable verbosity. local_params : dict, optional Dictionary containing the values of lambda for each optimization term. params : dict, optional Dictionary containing the optimization parameters (for Spams). """ if not local_params: local_params = {'lambda1': 0.05, 'lambda2': 0.05, 'lambda3': 0.025} if not params: params = {'loss': 'square', 'regul': 'l1l2', 'numThreads' : -1, 'verbose' : False, 'compute_gram': True, 'ista': True, 'linesearch_mode': 2, 'lambda1': local_params['lambda1'], 'tol' : 1e-1} X = np.asfortranarray(X.T.copy()) D = np.asfortranarray(D.T.copy()) Y = np.asfortranarray(Y.T.copy()) n_dict_elem = D.shape[1] n_samples = X.shape[1] # Initialize A without classification loss. A = spams.fistaFlat(X, D, np.zeros((D.shape[1], X.shape[1]), order='FORTRAN'), **params) prev_cost = 1e9 for i in range(1, max_iter + 1): # Solves W update. rl = local_params['lambda3'] / local_params['lambda2'] W = np.dot( np.linalg.inv(np.dot(A, A.T) + \ np.diag(np.ones(n_dict_elem) * rl)), np.dot(A, Y.T)) # Solves Dictionary update. D = np.dot(np.dot(np.linalg.inv(np.dot(A, A.T)), A), X.T).T # Solves coding step. U = np.vstack((X, np.sqrt(local_params['lambda2']) * Y)) V = np.vstack((D, np.sqrt(local_params['lambda2']) * W.T)) A = spams.fistaFlat(U, V, A, **params) # Compute cost. cost = (1.0/n_samples) * ((X - np.dot(D, A))**2).sum() + \ local_params['lambda1'] * (A**2).sum() + \ local_params['lambda2'] * ((np.dot(W.T, A) - Y)**2).sum() + \ local_params['lambda3'] * (W**2).sum() print i,cost # Check convergence conditions. if prev_cost - cost <= tol: break else: prev_cost = cost if verbose: #if not i % 10: print 'Iteration [{}] / Cost function [{}]'.format(i, cost) return D.T, A.T, W.T, cost
def test_nonoverlapping_smooth(self): # Spams: http://spams-devel.gforge.inria.fr/doc-python/doc_spams.pdf import numpy as np from parsimony.functions import CombinedFunction import parsimony.algorithms.proximal as proximal import parsimony.functions as functions import parsimony.functions.nesterov.gl as gl import parsimony.datasets.simulate.l1_l2_glmu as l1_l2_glmu import parsimony.utils.weights as weights np.random.seed(42) # Note that p must be even! n, p = 25, 20 groups = [list(range(0, int(p / 2))), list(range(int(p / 2), p))] # weights = [1.5, 0.5] A = gl.linear_operator_from_groups(p, groups=groups) # , weights=weights) l = 0.0 k = 0.0 g = 0.9 start_vector = weights.RandomUniformWeights(normalise=True) beta = start_vector.get_weights(p) alpha = 1.0 Sigma = alpha * np.eye(p, p) \ + (1.0 - alpha) * np.random.randn(p, p) mean = np.zeros(p) M = np.random.multivariate_normal(mean, Sigma, n) e = np.random.randn(n, 1) snr = 100.0 mu_min = 5e-8 X, y, beta_star = l1_l2_glmu.load(l, k, g, beta, M, e, A, mu=mu_min, snr=snr) eps = 1e-8 max_iter = 18000 beta_start = start_vector.get_weights(p) mus = [5e-0, 5e-2, 5e-4, 5e-6, 5e-8] fista = proximal.FISTA(eps=eps, max_iter=max_iter / len(mus)) beta_parsimony = beta_start for mu in mus: # function = functions.LinearRegressionL1L2GL(X, y, l, k, g, # A=A, mu=mu, # penalty_start=0) function = CombinedFunction() function.add_loss( functions.losses.LinearRegression(X, y, mean=False)) function.add_penalty( gl.GroupLassoOverlap(l=g, A=A, mu=mu, penalty_start=0)) beta_parsimony = fista.run(function, beta_parsimony) try: import spams params = { "loss": "square", "regul": "group-lasso-l2", "groups": np.array([1] * (int(p / 2)) + [2] * (int(p / 2)), dtype=np.int32), "lambda1": g, "max_it": max_iter, "tol": eps, "ista": False, "numThreads": -1, } beta_spams, optim_info = \ spams.fistaFlat(Y=np.asfortranarray(y), X=np.asfortranarray(X), W0=np.asfortranarray(beta_start), return_optim_info=True, **params) # print beta_spams except ImportError: # beta_spams = np.asarray([[15.56784201], # [39.51679274], # [30.42583205], # [24.8816362], # [6.48671072], # [6.48350546], # [2.41477318], # [36.00285723], # [24.98522184], # [29.43128643], # [0.85520539], # [40.31463542], # [34.60084146], # [8.82322513], # [7.55741642], # [7.62364398], # [12.64594707], # [21.81113869], # [17.95400007], # [12.10507338]]) beta_spams = np.asarray([[-11.93855944], [42.889350930], [22.076438880], [9.3869208300], [-32.73310431], [-32.73509107], [-42.05298794], [34.844819990], [9.6210946300], [19.799892400], [-45.62041548], [44.716039010], [31.634706630], [-27.37416567], [-30.27711859], [-30.12673231], [-18.62803747], [2.3561952400], [-6.476922020], [-19.86630857]]) berr = np.linalg.norm(beta_parsimony - beta_spams) # print berr assert berr < 5e-3 f_parsimony = function.f(beta_parsimony) f_spams = function.f(beta_spams) ferr = abs(f_parsimony - f_spams) # print ferr assert ferr < 5e-6
import scipy.sparse as ssp import sys if len(sys.argv[1:]) != 4: nusers = 10 nwords = 20 ntasks = 1 ndays = 3 else: (nusers, nwords, ntasks, ndays) = [int(x) for x in sys.argv[1:]] print "" W = np.asfortranarray(zeros((nwords,ntasks))) U = ssp.csc_matrix(ones((nusers,ntasks))) X = ssp.rand(nwords,nusers*ndays,format="csc") Y = np.asfortranarray(rand(ndays,ntasks)) Y = BatchBivariateLearner._expandY(Y) logger.debug("Input created!") def cols_for_day(day): return slice(day * nusers, (day+1) * nusers) logger.debug("Creating Vprime!") Vprime = BatchBivariateLearner._calculateVprime(X,U) logger.debug("Calculating W") W = spams.fistaFlat(Y,Vprime,W,False,loss="square",regul="l1",lambda1=0.01) W = ssp.csc_matrix(W) logger.debug("Creating DPrime") Dprime = BatchBivariateLearner._calculateDprime(X,W,U.shape) U = np.asfortranarray(zeros((nusers,ntasks))) logger.debug("Calculating U") U = spams.fistaFlat(Y,Dprime,U,False,loss="square",regul="l1",lambda1=0.01)
for t in range(T): Yrstack[t*N:(t+1)*N,t:t+1] = Yr[:,t:t+1] Yspams = asfortranarray(Yrstack) Xspams = ssp.hstack( [ Vrstack, ssp.csc_matrix(ones((N*T,1))) ], format="csc" ) ur0 = asfortranarray(zeros((Xspams.shape[1],Yspams.shape[1]))) ur = spams.fistaFlat( Yspams, Xspams, ur0, False,**spamsParams ) u_hat[r,:,:] = ur[:U,:].T b_hat[:,r] = ur[U:U+1,:] epoch_res_r = {} epoch_res_r['Xspams'] = dc(Xspams[:,:-1]) epoch_res_r['Yspams'] = dc(Yspams) epoch_res_r['ur0'] = dc(ur0[:-1,:]) epoch_res_r['params'] = dc(spamsParams) epoch_res_r['ur'] = dc(u_hat[r,:,:]) epoch_res_r['br'] = dc(b_hat[:,r]) epoch_res["Vr"] += [epoch_res_r] epoch_res["u_hat"] = dc(u_hat) epoch_res["b_hat"] = dc(b_hat) # Tracer()()
def get_x_y_estimated_beta(self): """ Reference: --------- http://spams-devel.gforge.inria.fr/doc-python/html/doc_spams006.html#toc23 """ shape = (4, 4, 1) num_samples = 10 coefficient = 0.05 num_ft = shape[0] * shape[1] * shape[2] X = np.random.random((num_samples, num_ft)) beta = np.random.random((num_ft, 1)) # y = dot(X, beta) + noise y = np.dot(X, beta) + np.random.random((num_samples, 1)) * 0.0001 try: import spams # Normalization for X X = np.asfortranarray(X) X = np.asfortranarray(X - np.tile( np.mean(X, 0), (X.shape[0], 1))) X = spams.normalize(X) # Normalization for y y = np.asfortranarray(y) y = np.asfortranarray(y - np.tile( np.mean(y, 0), (y.shape[0], 1))) y = spams.normalize(y) weight0 = np.zeros((X.shape[1], y.shape[1]), dtype=np.float64, order="FORTRAN") param = {'numThreads': 1, 'verbose': True, 'lambda1': coefficient, 'it0': 10, 'max_it': 200, 'L0': 0.1, 'tol': 1e-3, 'intercept': False, 'pos': False} param['compute_gram'] = True param['loss'] = 'square' param['regul'] = 'l2' (weight_ridge, optim_info) = spams.fistaFlat(y, X, weight0, True, **param) param['regul'] = 'l1' (weight_l1, optim_info) = spams.fistaFlat(y, X, weight0, True, **param) # print "X = ", repr(X) # print "y = ", repr(y) # print "weight_ridge =", repr(weight_ridge) # print "weight_l1 =", repr(weight_l1) except ImportError: # TODO: Don't use print directly. print("Cannot import spams. Default values will be used.") X = np.asarray([ [ 0.26856766, 0.30620391, 0.26995615, 0.3806023 , 0.41311465, -0.24685479, 0.34108499, -0.22786788, -0.2267594 , 0.30325884, -0.00382229, 0.3503643 , 0.21786749, -0.15275043, -0.24074157, -0.25639825], [-0.14305316, -0.19553497, 0.45250255, -0.17317269, -0.00304901, 0.43838073, 0.01606735, 0.09267714, 0.47763275, 0.23234948, 0.38694597, 0.72591941, 0.21028899, 0.42317021, 0.276003 , 0.42198486], [-0.08738645, 0.10795947, 0.45813373, -0.34232048, 0.43621128, -0.36984753, 0.16555311, 0.55188325, -0.48169657, -0.52844883, 0.15140672, 0.06074575, -0.36873621, 0.23679974, 0.47195386, -0.09728514], [ 0.16461237, 0.30299873, -0.32108348, -0.53918274, 0.02287831, 0.01105383, -0.11124968, 0.18629018, 0.30017151, -0.04217922, -0.46066699, -0.33612491, -0.52611772, -0.25397362, -0.27198468, -0.42883518], [ 0.4710195 , 0.35047152, -0.07990029, 0.34911632, 0.07206932, -0.20270895, -0.0684226 , -0.18958745, -0.08433092, 0.14453963, 0.28095469, -0.35894296, 0.11680455, -0.37598039, -0.28331446, -0.00825299], [-0.420528 , -0.74469306, 0.22732681, 0.34362884, 0.16006124, -0.29691759, 0.27029047, -0.31077084, -0.048071 , 0.36495065, 0.49364453, -0.16903801, 0.07577839, -0.36492748, 0.09448284, -0.37055486], [ 0.4232946 , -0.26373387, -0.01430445, -0.2353587 , -0.5005603 , -0.35899458, 0.32702596, -0.38311949, 0.31862621, -0.31931012, -0.41836583, -0.02855145, -0.50315227, -0.34807958, -0.05252361, 0.11551424], [-0.28443208, 0.07677476, -0.23720305, 0.11056299, -0.48742565, 0.36772457, -0.56074202, 0.3145033 , -0.22811763, 0.36482173, -0.01786535, -0.02929555, 0.35635411, 0.45838473, 0.45853286, 0.00159594], [-0.45779277, 0.10020579, -0.30873257, 0.28114072, 0.18120182, 0.33333004, 0.17928387, 0.31572323, 0.32902088, -0.10396976, -0.33296829, 0.05277326, 0.27139148, 0.18653329, 0.06068255, -0.01942451], [ 0.06569833, -0.04065228, -0.44669538, -0.17501657, -0.29450165, 0.32483427, -0.55889145, -0.34973144, -0.35647584, -0.41601239, -0.07926316, -0.26784983, 0.14952119, 0.19082353, -0.51309079, 0.6416559 ]]) y = np.asarray([ [ 0.15809895], [ 0.69496971], [ 0.01214928], [-0.39826324], [-0.01682498], [-0.03372654], [-0.45148804], [ 0.21735376], [ 0.08795349], [-0.27022239]]) weight_ridge = np.asarray([ [ 0.038558 ], [ 0.12605106], [ 0.19115798], [ 0.07187217], [ 0.09472713], [ 0.14943554], [-0.01968095], [ 0.11695959], [ 0.15049031], [ 0.18930644], [ 0.26086626], [ 0.23243305], [ 0.17425178], [ 0.13200238], [ 0.11710994], [ 0.11272092]]) weight_l1 = np.asarray([ [ 0. ], [ 0.02664519], [ 0. ], [ 0. ], [ 0. ], [ 0.10357106], [ 0. ], [ 0.2103012 ], [ 0.00399881], [ 0.10815184], [ 0.32221254], [ 0.49350083], [ 0.21351531], [ 0. ], [ 0. ], [ 0. ]]) ret_data = {} ret_data['X'] = X ret_data['y'] = y ret_data['weight_ridge'] = weight_ridge ret_data['weight_l1'] = weight_l1 ret_data['coefficient'] = coefficient ret_data['shape'] = shape ret_data['num_samples'] = num_samples ret_data['num_ft'] = num_ft return ret_data
print('\nVarious regression experiments') param['compute_gram'] = True print('\nFISTA + Regression l1') param['loss'] = 'square' param['regul'] = 'l1' # param.regul='group-lasso-l2'; # param.size_group=10; (W, optim_info) = spams.fistaFlat(Y, X, W0, True, **param) ## print "XX %s" %str(optim_info.shape);return None print( 'mean loss: %f, mean relative duality_gap: %f, number of iterations: %f' % (np.mean(optim_info[0, :], 0), np.mean(optim_info[2, :], 0), np.mean(optim_info[3, :], 0))) ### print('\nISTA + Regression l1') param['ista'] = True (W, optim_info) = spams.fistaFlat(Y, X, W0, True, **param)
def correct_with_regression(df_load, dikt_errors, prefix=None, prefix_plot=None, bool_plot_corrections=None, bool_plot_trash=None): """ Learn a predictor for each bad site to predict the irrelevant values from the values of the other sites that do not have irrelevant values """ print('correct_with_regression - ', end='') fname_load = os.path.join(prefix, 'df_corrected_load.csv') fname_trash = os.path.join(prefix, 'trash_sites.pkl') try: df_corrected_load = pd.read_csv( fname_load, index_col=0, #header = [0], ) df_corrected_load.index = pd.to_datetime(df_corrected_load.index) with open(fname_trash, 'rb') as f: trash_sites = pickle.load(f) print('Loaded df_corrected_load and trash_sites') except Exception as e: print('\n{0}'.format(colored(e, 'red'))) print('df_corrected_load not loaded') bad_sites = sorted( set([site for k, v in dikt_errors.items() for site in v])) df_corrected_load = df_load.copy() trash_sites = [] X = df_load[sorted(set(df_load.columns) - set(bad_sites))] assert not pd.isnull(X).sum().sum() for ii, site in enumerate(bad_sites): print('\r{0:6} / {1:6} - '.format(ii, len(bad_sites)), end='') y = df_load[site] flags = { dd: error_type for error_type in dikt_errors for ii, dd in dikt_errors[error_type].get(site, []) } samples_unkown = [ (ii, dd) for error_type in dikt_errors for ii, dd in dikt_errors[error_type].get(site, []) ] ind_unknown, dates_unknown = list(zip(*samples_unkown)) ind_unknown = sorted(ind_unknown) dates_unknown = sorted(dates_unknown) ind_known = [ ii for ii in range(y.shape[0]) if ii not in ind_unknown ] # Indices corresponding to sane observations assert not pd.isnull(y.iloc[ind_known]).sum() if len(ind_known) == 0: trash_sites.append((site, 'dates_known empty')) df_corrected_load = df_corrected_load.drop(site, axis=1) print('{0:6} -> drop because dates known empty'.format(site)) continue shuffled_ind_known = ind_known.copy() np.random.shuffle(shuffled_ind_known) cut = int(0.9 * len(shuffled_ind_known)) # Divide the sane observations into a training and a test sets ind_train = sorted(shuffled_ind_known[:cut]) ind_test = sorted(shuffled_ind_known[cut:]) # Train y_train = y.iloc[ind_train] X_train = X.iloc[ind_train] # Validation y_test = y.iloc[ind_test] X_test = X.iloc[ind_test] # Pred X_pred = X.iloc[ind_unknown] # Normalization covariates X_mean = X_train.mean(axis=0) X_std = X_train.std(axis=0) X_train = (X_train - X_mean) / X_std X_test = (X_test - X_mean) / X_std X_pred = (X_pred - X_mean) / X_std # Normalization target y_mean = y_train.mean(axis=0) y_std = y_train.std(axis=0) y_train = (y_train - y_mean) / y_std assert np.allclose(X_train.sum(), 0) assert np.allclose(y_train.sum(), 0) regressor = 'rf' # 'rf' # 'xgb' # 'spams' # Assess the quality of a predictor from the other sane sites # We de not have a criteria to decide which algorithms is the most # appropriate and have used alternatively spams of random forests. if regressor == 'rf': model = RandomForestRegressor() model.fit(X_train, y_train) y_hat_train = model.predict(X_train) y_hat_test = model.predict(X_test) y_hat_pred = model.predict(X_pred) elif regressor == 'xgb': model = xgb.XGBRegressor() model.fit(X_train, y_train) y_hat_train = model.predict(X_train) y_hat_test = model.predict(X_test) y_hat_pred = model.predict(X_pred) elif regressor == 'spams': hprm = { 'loss': 'square', 'numThreads': -1, 'verbose': False, 'lambda1': 0.03 * X_train.shape[0], 'lambda2': 0.1, # For elastic_net 'it0': 10, # nb_iter between two dual gap computations 'max_it': int( 1e4 ), # (optional, maximum number of iterations, 100 by default) 'L0': 0.1, # (optional, initial parameter L in fista, 0.1 by default, should be small enough) 'regul': 'l2', 'tol': 1e-4, 'intercept': False, #(optional, do not regularize last row of W, false by default) 'compute_gram': True, 'return_optim_info': True } beta0 = np.zeros( (X_train.shape[1], 1), dtype=np.float64, order="F", ) beta_cen, optim_info = spams.fistaFlat( np.asfortranarray(y_train, dtype=np.float64).reshape( (-1, 1)), np.asfortranarray(X_train, dtype=np.float64), beta0, **hprm, ) beta = beta_cen[:, 0] y_hat_train = X_train @ beta y_hat_test = X_test @ beta y_hat_pred = X_pred @ beta y_train = y_train * y_std + y_mean y_hat_train = y_hat_train * y_std + y_mean y_hat_test = y_hat_test * y_std + y_mean y_hat_pred = y_hat_pred * y_std + y_mean rr_train = 1 - ( (y_train - y_hat_train)**2).mean() / y_train.std()**2 rr_test = 1 - ((y_test - y_hat_test)**2).mean() / y_test.std()**2 if not ( rr_train > 0.9 and rr_test > 0.5 ): # If the performances are not good enough on the training and the test sets, drop the site trash_sites.append(( site, 'rr_train = {rr_train:.2} - rr_test = {rr_test:.2}'.format( rr_train=rr_train, rr_test=rr_test, ))) df_corrected_load = df_corrected_load.drop(site, axis=1) print( '{0:6} -> drop because prediction not good enough - rr_train = {rr_train:.2} - rr_test = {rr_test:.2}' .format( site, rr_train=rr_train, rr_test=rr_test, )) continue if bool_plot_corrections: plot_tools.plot_corrections( y, dates_unknown, y_hat_pred, os.path.join( prefix_plot, 'corrections', ), regressor, rr_test, flags, ) print( '{0:6} -> {1:5} values corrected - rr_train = {rr_train:.2} - rr_test = {rr_test:.2}' .format( site, len(ind_unknown), rr_train=rr_train, rr_test=rr_test, )) df_corrected_load[site].iloc[ind_unknown] = y_hat_pred df_corrected_load.to_csv(fname_load) with open(fname_trash, 'wb') as f: pickle.dump(trash_sites, f) if bool_plot_trash: plot_tools.plot_trash( trash_sites, df_load, os.path.join( prefix_plot, 'trash_sites', ), ) # Plot the sites that are discarded print( 'done - df_corrected_load.shape = {0} - len(trash_sites) = {1}\n{2}'. format(df_corrected_load.shape, len(trash_sites), '#' * tools.NB_SIGNS), ) return df_corrected_load, trash_sites
def _call(self, x, y, w0): w = spams.fistaFlat(y, x, w0, False, **self.params) return w
## X1 = Y.reshape(m) f = open('datay','w') for x in X1: print >> f,"%f" %x f.close() ## Y = np.asfortranarray(Y - np.tile(np.mean(Y,0),(Y.shape[0],1))) Y = spams.normalize(Y) W0 = np.zeros((X.shape[1],Y.shape[1]),dtype=np.float64,order="FORTRAN") param['compute_gram'] = True param['verbose'] = True param['loss'] = 'square' param['regul'] = 'l1' if False: (W, optim_info) = spams.fistaFlat(Y,X,W0,True,**param) print 'mean loss: %f, mean relative duality_gap: %f, number of iterations: %f\n' %(np.mean(optim_info[0,:]),np.mean(optim_info[2,:]),np.mean(optim_info[3,:])) param['regul'] = 'group-lasso-l2' param2=param param2['groups'] = np.array(np.random.random_integers(1,5,X.shape[1]),dtype = np.int32) param2['lambda1'] *= 10 (W, optim_info) = spams.fistaFlat(Y,X,W0,True,**param) exit() param['ista'] = False param['subgrad'] = True param['a'] = 0.1 param['b'] = 1000 # arbitrary parameters max_it = param['max_it'] it0 = param['it0'] param['max_it'] = 500
def test_nonoverlapping_smooth(self): # Spams: http://spams-devel.gforge.inria.fr/doc-python/doc_spams.pdf import numpy as np from parsimony.functions import CombinedFunction import parsimony.algorithms.proximal as proximal import parsimony.functions as functions import parsimony.functions.nesterov.gl as gl import parsimony.datasets.simulate.l1_l2_glmu as l1_l2_glmu import parsimony.utils.start_vectors as start_vectors np.random.seed(42) # Note that p must be even! n, p = 25, 20 groups = [range(0, p / 2), range(p / 2, p)] # weights = [1.5, 0.5] A = gl.A_from_groups(p, groups=groups) # , weights=weights) l = 0.0 k = 0.0 g = 0.9 start_vector = start_vectors.RandomStartVector(normalise=True) beta = start_vector.get_vector(p) alpha = 1.0 Sigma = alpha * np.eye(p, p) \ + (1.0 - alpha) * np.random.randn(p, p) mean = np.zeros(p) M = np.random.multivariate_normal(mean, Sigma, n) e = np.random.randn(n, 1) snr = 100.0 mu_min = 5e-8 X, y, beta_star = l1_l2_glmu.load(l, k, g, beta, M, e, A, mu=mu_min, snr=snr) eps = 1e-8 max_iter = 18000 beta_start = start_vector.get_vector(p) mus = [5e-0, 5e-2, 5e-4, 5e-6, 5e-8] fista = proximal.FISTA(eps=eps, max_iter=max_iter / len(mus)) beta_parsimony = beta_start for mu in mus: # function = functions.LinearRegressionL1L2GL(X, y, l, k, g, # A=A, mu=mu, # penalty_start=0) function = CombinedFunction() function.add_function(functions.losses.LinearRegression(X, y, mean=False)) function.add_penalty(gl.GroupLassoOverlap(l=g, A=A, mu=mu, penalty_start=0)) beta_parsimony = fista.run(function, beta_parsimony) try: import spams params = {"loss": "square", "regul": "group-lasso-l2", "groups": np.array([1] * (p / 2) + [2] * (p / 2), dtype=np.int32), "lambda1": g, "max_it": max_iter, "tol": eps, "ista": False, "numThreads": -1, } beta_spams, optim_info = \ spams.fistaFlat(Y=np.asfortranarray(y), X=np.asfortranarray(X), W0=np.asfortranarray(beta_start), return_optim_info=True, **params) # print beta_spams except ImportError: beta_spams = np.asarray([[15.56784201], [39.51679274], [30.42583205], [24.8816362], [6.48671072], [6.48350546], [2.41477318], [36.00285723], [24.98522184], [29.43128643], [0.85520539], [40.31463542], [34.60084146], [8.82322513], [7.55741642], [7.62364398], [12.64594707], [21.81113869], [17.95400007], [12.10507338]]) berr = np.linalg.norm(beta_parsimony - beta_spams) # print berr assert berr < 5e-3 f_parsimony = function.f(beta_parsimony) f_spams = function.f(beta_spams) ferr = abs(f_parsimony - f_spams) # print ferr assert ferr < 5e-6
def learn_class_induced_model(X, D, Y, tol=0.01, max_iter=300, verbose=True, local_params=None, params=None): """Class induced dictionary learning. Parameters ---------- X : ndarray 2D numpy array containing an stack of features with shape nxm where n is the number of samples and m the feature dimensionality. D : ndarray 2D numpy array containing an initial guess for the dictionary D. Its shape is dxm where d is the number of dictionary elements. Y : ndarrat 2D numpy array containing a matrix that maps features and labels. Its shape is nxc where c is the number of classes. tol : float, optional Global tolerance for optimization convergence. max_iter : int, optional Maximum number of iterations. verbose : bool, optional Enable verbosity. local_params : dict, optional Dictionary containing the values of lambda for each optimization term. params : dict, optional Dictionary containing the optimization parameters (for Spams). """ if not local_params: local_params = {'lambda1': 0.05, 'lambda2': 0.05, 'lambda3': 0.025} if not params: params = {'loss': 'square', 'regul': 'l1l2', 'numThreads' : -1, 'verbose' : False, 'compute_gram': True, 'ista': True, 'linesearch_mode': 2, 'lambda1': local_params['lambda1'], 'tol' : 1e-1} X = np.asfortranarray(X.T.copy()) D = np.asfortranarray(D.T.copy()) Y = np.asfortranarray(Y.T.copy()) n_dict_elem = D.shape[1] n_samples = X.shape[1] # Initialize A without classification loss. A = spams.fistaFlat(X, D, np.zeros((D.shape[1], X.shape[1]), order='FORTRAN'), **params) prev_cost = 1e9 for i in range(1, max_iter + 1): # Solves W update. rl = local_params['lambda3'] / local_params['lambda2'] W = np.dot( np.linalg.inv(np.dot(A, A.T) + \ np.diag(np.ones(n_dict_elem) * rl)), np.dot(A, Y.T)) # Solves Dictionary update. D = np.dot(np.dot(np.linalg.inv(np.dot(A, A.T)), A), X.T).T # Solves coding step. U = np.vstack((X, np.sqrt(local_params['lambda2']) * Y)) V = np.vstack((D, np.sqrt(local_params['lambda2']) * W.T)) A = spams.fistaFlat(U, V, A, **params) # Compute cost. cost = (1.0/n_samples) * ((X - np.dot(D, A))**2).sum() + \ local_params['lambda1'] * (A**2).sum() + \ local_params['lambda2'] * ((np.dot(W.T, A) - Y)**2).sum() + \ local_params['lambda3'] * (W**2).sum() # Check convergence conditions. if prev_cost - cost <= tol: break else: prev_cost = cost if verbose: #if not i % 10: print 'Iteration [{}] / Cost function [{}]'.format(i, cost) return D.T, A.T, W.T, cost
def test_nonoverlapping_nonsmooth(self): # Spams: http://spams-devel.gforge.inria.fr/doc-python/doc_spams.pdf import numpy as np from parsimony.functions import CombinedFunction import parsimony.algorithms.proximal as proximal import parsimony.functions as functions import parsimony.functions.nesterov.gl as gl import parsimony.datasets.simulate.l1_l2_gl as l1_l2_gl import parsimony.utils.start_vectors as start_vectors np.random.seed(42) # Note that p must be even! n, p = 25, 20 groups = [range(0, p / 2), range(p / 2, p)] # weights = [1.5, 0.5] A = gl.A_from_groups(p, groups=groups) # , weights=weights) l = 0.0 k = 0.0 g = 1.0 start_vector = start_vectors.RandomStartVector(normalise=True) beta = start_vector.get_vector(p) alpha = 1.0 Sigma = alpha * np.eye(p, p) \ + (1.0 - alpha) * np.random.randn(p, p) mean = np.zeros(p) M = np.random.multivariate_normal(mean, Sigma, n) e = np.random.randn(n, 1) snr = 100.0 X, y, beta_star = l1_l2_gl.load(l, k, g, beta, M, e, A, snr=snr) eps = 1e-8 max_iter = 8500 beta_start = start_vector.get_vector(p) mus = [5e-2, 5e-4, 5e-6, 5e-8] fista = proximal.FISTA(eps=eps, max_iter=max_iter / len(mus)) beta_parsimony = beta_start for mu in mus: # function = functions.LinearRegressionL1L2GL(X, y, l, k, g, # A=A, mu=mu, # penalty_start=0) function = CombinedFunction() function.add_function(functions.losses.LinearRegression(X, y, mean=False)) function.add_penalty(gl.GroupLassoOverlap(l=g, A=A, mu=mu, penalty_start=0)) beta_parsimony = fista.run(function, beta_parsimony) try: import spams params = {"loss": "square", "regul": "group-lasso-l2", "groups": np.array([1] * (p / 2) + [2] * (p / 2), dtype=np.int32), "lambda1": g, "max_it": max_iter, "tol": eps, "ista": False, "numThreads": -1, } beta_spams, optim_info = \ spams.fistaFlat(Y=np.asfortranarray(y), X=np.asfortranarray(X), W0=np.asfortranarray(beta_start), return_optim_info=True, **params) except ImportError: beta_spams = np.asarray([[14.01111427], [35.56508563], [27.38245962], [22.39716553], [5.835744940], [5.841502910], [2.172209350], [32.40227785], [22.48364756], [26.48822401], [0.770391500], [36.28288883], [31.14118214], [7.938279340], [6.800713150], [6.862914540], [11.38161678], [19.63087584], [16.15855845], [10.89356615]]) berr = np.linalg.norm(beta_parsimony - beta_spams) # print berr assert berr < 5e-2 f_parsimony = function.f(beta_parsimony) f_spams = function.f(beta_spams) ferr = abs(f_parsimony - f_spams) # print ferr assert ferr < 5e-6
def groupLasso_demo(signal_type, fig_start): X,Y,W_actual,groups = generate_data(signal_type) #Plotting the actual W plt.figure(0+fig_start) plt.plot(W_actual) plt.title("Original (D = 4096, number groups = 64, active groups = 8)") plt.savefig("W_actual_{}.png".format(signal_type) , dpi=300) ##### Applying Lasso Regression ##### # L1 norm is the sum of absolute values of coefficients lasso_reg = linear_model.Lasso(alpha=0.5) lasso_reg.fit(X, Y) W_lasso_reg = lasso_reg.coef_ ##### Debiasing step ##### ba = np.argwhere(W_lasso_reg != 0) #Finding where the coefficients are not zero X_debiased = X[:, ba] W_lasso_reg_debiased = np.linalg.lstsq(X_debiased[:,:,0],Y) #Re-estimate the chosen coefficients using least squares W_lasso_reg_debiased_2 = np.zeros((4096)) W_lasso_reg_debiased_2[ba] = W_lasso_reg_debiased[0] lasso_reg_mse = mean_squared_error(W_actual, W_lasso_reg_debiased_2) plt.figure(1+fig_start) plt.plot(W_lasso_reg_debiased_2) plt.title('Standard L1 (debiased 1, regularization param(L1 = 0.5), MSE = {:.4f})'.format(lasso_reg_mse)) plt.savefig("W_lasso_reg_{}.png".format(signal_type), dpi=300) ##### Applying Group Lasso L2 regression ##### # L2 norm is the square root of sum of squares of coefficients # PNLL(W) = NLL(W) + regularization_parameter * Σ(groups)L2-norm group_lassoL2_reg = GroupLasso( groups=groups, group_reg=3, l1_reg=1, frobenius_lipschitz=True, scale_reg="inverse_group_size", subsampling_scheme=1, supress_warning=True, n_iter=1000, tol=1e-3, ) group_lassoL2_reg.fit(X, Y) W_groupLassoL2_reg = group_lassoL2_reg.coef_ ##### Debiasing step ##### ba = np.argwhere(W_groupLassoL2_reg != 0) #Finding where the coefficients are not zero X_debiased = X[:, ba] W_group_lassoL2_reg_debiased = np.linalg.lstsq(X_debiased[:,:,0],Y) #Re-estimate the chosen coefficients using least squares W_group_lassoL2_reg_debiased_2 = np.zeros((4096)) W_group_lassoL2_reg_debiased_2[ba] = W_group_lassoL2_reg_debiased[0] groupLassoL2_mse = mean_squared_error(W_actual, W_group_lassoL2_reg_debiased_2) plt.figure(2+fig_start) plt.plot(W_group_lassoL2_reg_debiased_2) plt.title('Block-L2 (debiased 1, regularization param(L2 = 3, L1=1), MSE = {:.4f})'.format(groupLassoL2_mse)) plt.savefig("W_groupLassoL2_reg_{}.png".format(signal_type), dpi=300) ##### Applying Group Lasso Linf regression ##### # To use spams library, it is necessary to convert data to fortran normalized arrays # visit http://spams-devel.gforge.inria.fr/ for the documentation of spams library # Linf is the supremum of all the coeifficients # PNLL(W) = NLL(W) + regularization_parameter * Σ(groups)Linf-norm X_normalized = np.asfortranarray(X - np.tile(np.mean(X,0),(X.shape[0],1)),dtype=float) X_normalized = spams.normalize(X_normalized) Y_normalized = np.asfortranarray(Y - np.tile(np.mean(Y,0),(Y.shape[0],1)),dtype=float) Y_normalized = spams.normalize(Y_normalized) groups_modified = np.concatenate([[i] for i in groups]).reshape(-1, 1) W_initial = np.zeros((X_normalized.shape[1],Y_normalized.shape[1]),dtype=float,order="F") param = {'numThreads' : -1,'verbose' : True, 'lambda2' : 3, 'lambda1' : 1, 'max_it' : 500, 'L0' : 0.1, 'tol' : 1e-2, 'intercept' : False, 'pos' : False, 'loss' : 'square'} param['regul'] = "group-lasso-linf" param2=param.copy() param['size_group'] = 64 param2['groups'] = groups_modified (W_groupLassoLinf_reg, optim_info) = spams.fistaFlat(Y_normalized,X_normalized,W_initial,True,**param) ##### Debiasing step ##### ba = np.argwhere(W_groupLassoLinf_reg != 0) #Finding where the coefficients are not zero X_debiased = X[:, ba[:,0]] W_groupLassoLinf_reg_debiased = np.linalg.lstsq(X_debiased,Y) #Re-estimate the chosen coefficients using least squares W_group_lassoLinf_reg_debiased_2 = np.zeros((4096)) W_group_lassoLinf_reg_debiased_2[ba] = W_groupLassoLinf_reg_debiased[0] groupLassoLinf_mse = mean_squared_error(W_actual, W_group_lassoLinf_reg_debiased_2) plt.figure(3+fig_start) axes = plt.gca() plt.plot(W_group_lassoLinf_reg_debiased_2) plt.title('Block-Linf (debiased 1, regularization param(L2 = 3, L1=1), MSE = {:.4f})'.format(groupLassoLinf_mse)) plt.savefig("W_groupLassoLinf_reg_{}.png".format(signal_type), dpi=300) plt.show()
def test_nonoverlapping_nonsmooth(self): # Spams: http://spams-devel.gforge.inria.fr/doc-python/doc_spams.pdf import numpy as np from parsimony.functions import CombinedFunction import parsimony.algorithms.proximal as proximal import parsimony.functions as functions import parsimony.functions.nesterov.gl as gl import parsimony.datasets.simulate.l1_l2_gl as l1_l2_gl import parsimony.utils.start_vectors as start_vectors np.random.seed(42) # Note that p must be even! n, p = 25, 20 groups = [list(range(0, int(p / 2))), list(range(int(p / 2), p))] # weights = [1.5, 0.5] A = gl.linear_operator_from_groups(p, groups=groups) # , weights=weights) l = 0.0 k = 0.0 g = 1.0 start_vector = start_vectors.RandomStartVector(normalise=True) beta = start_vector.get_vector(p) alpha = 1.0 Sigma = alpha * np.eye(p, p) \ + (1.0 - alpha) * np.random.randn(p, p) mean = np.zeros(p) M = np.random.multivariate_normal(mean, Sigma, n) e = np.random.randn(n, 1) snr = 100.0 X, y, beta_star = l1_l2_gl.load(l, k, g, beta, M, e, A, snr=snr) eps = 1e-8 max_iter = 8500 beta_start = start_vector.get_vector(p) mus = [5e-2, 5e-4, 5e-6, 5e-8] fista = proximal.FISTA(eps=eps, max_iter=max_iter / len(mus)) beta_parsimony = beta_start for mu in mus: # function = functions.LinearRegressionL1L2GL(X, y, l, k, g, # A=A, mu=mu, # penalty_start=0) function = CombinedFunction() function.add_function( functions.losses.LinearRegression(X, y, mean=False)) function.add_penalty( gl.GroupLassoOverlap(l=g, A=A, mu=mu, penalty_start=0)) beta_parsimony = fista.run(function, beta_parsimony) try: import spams params = { "loss": "square", "regul": "group-lasso-l2", "groups": np.array([1] * (int(p / 2)) + [2] * (int(p / 2)), dtype=np.int32), "lambda1": g, "max_it": max_iter, "tol": eps, "ista": False, "numThreads": -1, } beta_spams, optim_info = \ spams.fistaFlat(Y=np.asfortranarray(y), X=np.asfortranarray(X), W0=np.asfortranarray(beta_start), return_optim_info=True, **params) except ImportError: beta_spams = np.asarray( [[14.01111427], [35.56508563], [27.38245962], [22.39716553], [5.835744940], [5.841502910], [2.172209350], [32.40227785], [22.48364756], [26.48822401], [0.770391500], [36.28288883], [31.14118214], [7.938279340], [6.800713150], [6.862914540], [11.38161678], [19.63087584], [16.15855845], [10.89356615]]) berr = np.linalg.norm(beta_parsimony - beta_spams) # print berr assert berr < 5e-2 f_parsimony = function.f(beta_parsimony) f_spams = function.f(beta_spams) ferr = abs(f_parsimony - f_spams) # print ferr assert ferr < 5e-6
def process(self,X,Y): if type(X) is list: if self.elementsSeen is 0: self.Q = X self.Y = Y elementsSeen = len(X) else: self.Q += X self.Y = vstack((self.Y,Y)) elementsSeen += len(X) else: Xn = [X] Yn = array(Y) if self.elementsSeen is 0: self.Q = Xn self.Y = Yn else: self.Q += Xn self.Y = vstack((self.Y,Yn)) self.elementsSeen += 1 Q = self.Q Y = self.Y def initW(): nwords = self.Q[0].shape[0] W = self.initStrat((nwords,Y.shape[1])) return ssp.csc_matrix(W) def initU(): nusers = Q[0].shape[1] U = self.initStrat((nusers,Y.shape[1])) return ssp.csc_matrix(U) U = initU() W = initW() bias = None param = self.spamsDict bivariter = 0 Y = np.asfortranarray(Y) Yflat = reshape(self.Y, [multiply(*self.Y.shape),1]) Yflat = np.asfortranarray(Yflat) """ We expand Y s.t. the values of Y for each task t are held in the diagonals of a t x t matrix whose other values are NaN """ Yexpanded = ones( ( multiply(*self.Y.shape), self.Y.shape[1] ) ) * nan for x in range(Y.shape[1]): ind = x * Y.shape[0]; indnext = (x+1) *Y.shape[0]; Yexpanded[ind:indnext,x] = Y[:,x]; Yexpanded = np.asfortranarray(Yexpanded) oldSSE = sys.float_info.max ntasks = Y.shape[1] if self.intercept: bias = Y[0:1,:] while True: bivariter += 1 # W0 = initW() # U0 = initU() W0 = np.asfortranarray(W.copy().toarray() ) U0 = np.asfortranarray(U.copy().toarray() ) Vprime = ssp.vstack([ ssp.vstack([ U[:,x:x+1].T.dot(q.T) for q in Q ]) for x in range(ntasks) ]) # ipdb.set_trace() if self.intercept: Vprime = ssp.hstack([Vprime,ones((Vprime.shape[0],1))]) W0 = np.asfortranarray(vstack([W0,bias])) # Vprime = np.asfortranarray(Vprime) Vprime = ssp.csc_matrix(Vprime) (W,optim_info) = spams.fistaFlat(Yexpanded,Vprime,W0,True,**param) if self.intercept: bias = W[-1:,:] logging.debug("W bias: %s"%str(bias)) W = ssp.csc_matrix(W[:-1,:]) else: W = ssp.csc_matrix(W) Dprime = ssp.vstack([ ssp.vstack([ W[:,x:x+1].T.dot(q) for q in Q ]) for x in range(ntasks) ]) if self.intercept: Dprime = ssp.hstack([Dprime,ones((Dprime.shape[0],1))]) U0 = np.asfortranarray(vstack([U0,bias])) Dprime = ssp.csc_matrix(Dprime) (U,optim_info) = spams.fistaFlat(Yexpanded,Dprime,U0,True,**param) logging.debug("U step optim_info:\n%s"%optim_info) if self.intercept: bias = U[-1:,:] logging.debug("U bias: %s"%str(bias)) U = ssp.csc_matrix(U[:-1,:]) else: U = ssp.csc_matrix(U) self.u = U self.w = W self.bias = bias sumSSE = self.changeEval.evaluate(self.Q,self.Y) logging.debug("This round's sumSSE: %2.9f"%sumSSE) improv = abs(oldSSE - sumSSE) oldSSE = sumSSE # print "%d,%f"%(bivariter,sumSSE) if bivariter > self.allParams['bivar_it0'] and\ ( bivariter > self.allParams['bivar_max_it'] or\ improv < self.allParams['bivar_tol'] ): logging.debug("Iteration: "+str(bivariter)) logging.debug("Improvment: "+str(improv)) # ipdb.set_trace() # logging.debug("W sparcity: %2.2f"%self._sparcity(W)) # logging.debug("U sparcity: %2.2f"%self._sparcity(U)) break return sumSSE