def _update_V(U, Y, alpha, V=None, Gram=None, method='lars'): """ Update V in sparse_pca loop. Parameters =========== V: array, optional Initial value of the dictionary, for warm restart """ n_features = Y.shape[1] n_atoms = U.shape[1] coef = np.empty((n_atoms, n_features)) if method == 'lars': if Gram is None: Gram = np.dot(U.T, U) err_mgt = np.seterr() np.seterr(all='ignore') XY = np.dot(U.T, Y) for k in xrange(n_features): # A huge amount of time is spent in this loop. It needs to be # tight. _, _, coef_path_ = lars_path(U, Y[:, k], Xy=XY[:, k], Gram=Gram, alpha_min=alpha, method='lasso') np.seterr(**err_mgt) else: clf = Lasso(alpha=alpha, fit_intercept=False) for k in range(n_features): # A huge amount of time is spent in this loop. It needs to be # tight. if V is not None: clf.coef_ = V[:,k] # Init with previous value of Vk clf.fit(U, Y[:,k], max_iter=1000, tol=1e-8) coef[:, k] = clf.coef_ return coef
def deserialize_lasso_regressor(model_dict): model = Lasso(model_dict["params"]) model.coef_ = np.array(model_dict["coef_"]) if isinstance(model_dict["n_iter_"], list): model.n_iter_ = np.array(model_dict["n_iter_"]) else: model.n_iter_ = int(model_dict["n_iter_"]) if isinstance(model_dict["intercept_"], list): model.intercept_ = np.array(model_dict["intercept_"]) else: model.intercept_ = float(model_dict["intercept_"]) return model
def deserialize_lasso_regressor(model_dict): model = Lasso(model_dict['params']) model.coef_ = np.array(model_dict['coef_']) if isinstance(model_dict['n_iter_'], list): model.n_iter_ = np.array(model_dict['n_iter_']) else: model.n_iter_ = int(model_dict['n_iter_']) if isinstance(model_dict['intercept_'], list): model.intercept_ = np.array(model_dict['intercept_']) else: model.intercept_ = float(model_dict['intercept_']) return model
def __init__(self, neutral, smiling, batch_size=300, epochs=1, alpha=1): d = neutral.shape[1] self.W = 0 self.b = 0 lasso = Lasso(alpha=alpha, warm_start=True) lasso.coef_ = np.eye(d, d) for _ in range(epochs): print "epoch", _ js = np.random.random_integers(0, neutral.shape[0] - 1, size=batch_size) X = lemur_util.distort(neutral[js]) Y = lemur_util.distort(smiling[js]) model = lasso.fit(X, Y) self.W += model.coef_ self.b += model.intercept_ print self.W.shape, self.b.shape self.W /= epochs self.b /= epochs self.b.reshape((1, d))
train_y = train_data.ix[:, 6] test_x = test_data.ix[:, 14:] test_y = test_data.ix[:, 6] #LassoCV: 基于坐标下降法的Lasso交叉验证,这里使用30折交叉验证法选择最佳alpha print("第", epoch, "次", "区间是", k) print("使用坐标轴下降法计算参数正则化路径:") model = LassoCV(cv=30).fit(train_x, train_y) m_log_alphas = -np.log10(model.alphas_) alpha = model.alpha_ lasso = Lasso(max_iter=10000, alpha=alpha) y_pred_lasso = lasso.fit(train_x, train_y).predict(test_x) a = r2_score(test_y, y_pred_lasso) print("r^2 is", a) r_sq.append(a) lasso.coef_ = sorted(lasso.coef_, reverse=True) coef_index = [] for i in range(0, 48): if lasso.coef_[i] != 0: coef_index.append(i) x = train_x.ix[:, coef_index].columns.values length = len(coef_index) print("选中了", length, "个因子", "选中的因子是", x) selected_feature.append(x) coef.append(lasso.coef_) r_int.append(r_sq) pd.DataFrame(selected_feature).to_csv('lasso_2330_selected_feature_6' + str(k) + '.csv') pd.DataFrame(r_int).to_csv('lasso_2330_r_int_6' + str(k) + '.csv') ####Select 5 or 10 parameters (sklearn-select from model & Lasso)
def sparse_encode(X, dictionary, algorithm='mp', fit_tol=None, P_cum=None, l0_sparseness=10, C=0., do_sym=True, verbose=0): """Generic sparse coding Each column of the result is the solution to a sparse coding problem. Parameters ---------- X : array of shape (n_samples, n_pixels) Data matrix. dictionary : array of shape (n_dictionary, n_pixels) The dictionary matrix against which to solve the sparse coding of the data. Some of the algorithms assume normalized rows. algorithm : {'mp', 'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'} mp : Matching Pursuit lars: uses the least angle regression method (linear_model.lars_path) lasso_lars: uses Lars to compute the Lasso solution lasso_cd: uses the coordinate descent method to compute the Lasso solution (linear_model.Lasso). lasso_lars will be faster if the estimated dictionary are sparse. omp: uses orthogonal matching pursuit to estimate the sparse solution threshold: squashes to zero all coefficients less than regularization from the projection dictionary * data' max_iter : int, 1000 by default Maximum number of iterations to perform if `algorithm='lasso_cd'`. verbose : int Controls the verbosity; the higher, the more messages. Defaults to 0. Returns ------- code : array of shape (n_samples, n_dictionary) The sparse codes """ if X.ndim == 1: X = X[:, np.newaxis] #n_samples, n_pixels = X.shape if algorithm == 'lasso_lars': alpha = float(regularization) / n_pixels # account for scaling from sklearn.linear_model import LassoLars # Not passing in verbose=max(0, verbose-1) because Lars.fit already # corrects the verbosity level. cov = np.dot(dictionary, X.T) lasso_lars = LassoLars(alpha=fit_tol, fit_intercept=False, verbose=verbose, normalize=False, precompute=None, fit_path=False) lasso_lars.fit(dictionary.T, X.T, Xy=cov) sparse_code = lasso_lars.coef_.T elif algorithm == 'lasso_cd': alpha = float(regularization) / n_pixels # account for scaling # TODO: Make verbosity argument for Lasso? # sklearn.linear_model.coordinate_descent.enet_path has a verbosity # argument that we could pass in from Lasso. from sklearn.linear_model import Lasso clf = Lasso(alpha=fit_tol, fit_intercept=False, normalize=False, precompute=None, max_iter=max_iter, warm_start=True) if init is not None: clf.coef_ = init clf.fit(dictionary.T, X.T, check_input=check_input) sparse_code = clf.coef_.T elif algorithm == 'lars': # Not passing in verbose=max(0, verbose-1) because Lars.fit already # corrects the verbosity level. from sklearn.linear_model import Lars cov = np.dot(dictionary, X.T) lars = Lars(fit_intercept=False, verbose=verbose, normalize=False, precompute=None, n_nonzero_coefs=l0_sparseness, fit_path=False) lars.fit(dictionary.T, X.T, Xy=cov) sparse_code = lars.coef_.T elif algorithm == 'threshold': cov = np.dot(dictionary, X.T) sparse_code = ((np.sign(cov) * np.maximum(np.abs(cov) - regularization, 0))).T elif algorithm == 'omp': # TODO: Should verbose argument be passed to this? from sklearn.linear_model import orthogonal_mp_gram from sklearn.utils.extmath import row_norms cov = np.dot(dictionary, X.T) gram = np.dot(dictionary, dictionary.T) sparse_code = orthogonal_mp_gram(Gram=gram, Xy=cov, n_nonzero_coefs=l0_sparseness, tol=None, norms_squared=row_norms(X, squared=True), copy_Xy=False).T elif algorithm == 'mp': sparse_code = mp(X, dictionary, l0_sparseness=l0_sparseness, fit_tol=fit_tol, P_cum=P_cum, C=C, do_sym=do_sym, verbose=verbose) else: raise ValueError( 'Sparse coding method must be "mp", "lasso_lars" ' '"lasso_cd", "lasso", "threshold" or "omp", got %s.' % algorithm) return sparse_code
featurelist = pd.DataFrame() featurelist['Feature Name'] = featureLabels alphas = [0.0001, 0.01, 0.05, 0.1] # For each alpha value in the list of alpha values, for alpha in alphas: # Create a lasso regression with that alpha value, lasso = Lasso(alpha=alpha) # Fit the lasso regression lasso.fit(X_train, y_train) # Create a column name for that alpha value column_name = 'Alpha = %f' % alpha # Create a column of coefficient values lasso.coef_ = (np.delete(lasso.coef_, -1) ) #Remove last entry due to accomodate for change in size featurelist[column_name] = lasso.coef_ #Sort the features data featurelist['Alpha = 0.010000'] = featurelist['Alpha = 0.010000'].astype( 'float') featurelist = featurelist.sort_values(by=['Alpha = 0.010000'], ascending=[False]) lasso = Lasso() #default alpha =1 lasso.fit(X_train, y_train) train_score = lasso.score(X_train, y_train) test_score = lasso.score(X_test, y_test) coeff_used = np.sum(lasso.coef_ != 0) print("training score for alpha=1:", train_score) print("test score for alpha=1: ", test_score)
def fit(self, X, y, Xstd=None): """Fit model to training data. Args: X (DataFrame): Binarized features with MultiIndex column labels y (array): Target variable Xstd (DataFrame, optional): Standardized numerical features Returns: LinearRuleRegression: Self """ # Initialization # Number of samples n = X.shape[0] # Initialize with X itself i.e. singleton conjunctions # Feature indicator and conjunction matrices z = pd.DataFrame(np.eye(X.shape[1], dtype=int), index=X.columns) # A = X.values # Remove negations indPos = X.columns.get_level_values(1).isin(['', '<=', '==']) z = z.loc[:, indPos] A = X.loc[:, indPos].values # Scale conjunction matrix to account for non-uniform penalties A = A * self.lambda0 / (self.lambda0 + self.lambda1 * z.sum().values) if self.useOrd: self.namesOrd = Xstd.columns numOrd = Xstd.shape[1] # Scale ordinal features to have similar std as "average" binary feature Astd = 0.4 * Xstd.values # Iteration counter self.it = 0 # Mean absolute deviation from mean as upper bound on lambdas that lead to non-constant solutions self.mu = y.mean() MADM = np.abs(y - self.mu).mean() # Lasso object lr = Lasso(alpha=self.lambda0 * MADM / 2, selection='cyclic') # lr = LassoLars(alpha=self.lambda0 * MADM / 2, normalize=False) # Fit Lasso model if self.useOrd: B = np.concatenate((Astd, A), axis=1) lr.fit(B, y) # Initial residual r = (lr.predict(B) - y) / n / (MADM / 2) else: lr.fit(A, y) # Initial residual r = (lr.predict(A) - y) / n / (MADM / 2) # Most "negative" subderivative among current variables (undo scaling) UB = -np.abs(np.dot(r, A)) UB *= (self.lambda0 + self.lambda1 * z.sum().values) / self.lambda0 UB += self.lambda0 + self.lambda1 * z.sum().values UB = min(UB.min(), 0) # Beam search for conjunctions with subdifferentials that exclude zero vp, zp, Ap = beam_search_K1(r, X, self.lambda0, self.lambda1, UB=UB, B=self.B, wLB=self.wLB, eps=self.eps, stopEarly=self.stopEarly) vn, zn, An = beam_search_K1(-r, X, self.lambda0, self.lambda1, UB=UB, B=self.B, wLB=self.wLB, eps=self.eps, stopEarly=self.stopEarly) v = np.append(vp, vn) while (v < UB).any() and (self.it < self.iterMax): # Subdifferentials excluding zero exist, continue self.it += 1 zNew = pd.concat([zp, zn], axis=1, ignore_index=True) Anew = np.concatenate((Ap, An), axis=1) # K conjunctions with largest subderivatives in absolute value idxLargest = np.argsort(v)[:self.K] v = v[idxLargest] zNew = zNew.iloc[:, idxLargest] Anew = Anew[:, idxLargest] # Scale new conjunction matrix to account for non-uniform penalties Anew = Anew * self.lambda0 / (self.lambda0 + self.lambda1 * zNew.sum().values) # Add to existing conjunctions z = pd.concat([z, zNew], axis=1, ignore_index=True) A = np.concatenate((A, Anew), axis=1) # Fit Lasso model if self.useOrd: B = np.concatenate((Astd, A), axis=1) lr.fit(B, y) # Residual r = (lr.predict(B) - y) / n / (MADM / 2) else: lr.fit(A, y) # Residual r = (lr.predict(A) - y) / n / (MADM / 2) # Most "negative" subderivative among current variables (undo scaling) UB = -np.abs(np.dot(r, A)) UB *= (self.lambda0 + self.lambda1 * z.sum().values) / self.lambda0 UB += self.lambda0 + self.lambda1 * z.sum().values UB = min(UB.min(), 0) # Beam search for conjunctions with subdifferentials that exclude zero vp, zp, Ap = beam_search_K1(r, X, self.lambda0, self.lambda1, UB=UB, B=self.B, wLB=self.wLB, eps=self.eps, stopEarly=self.stopEarly) vn, zn, An = beam_search_K1(-r, X, self.lambda0, self.lambda1, UB=UB, B=self.B, wLB=self.wLB, eps=self.eps, stopEarly=self.stopEarly) v = np.append(vp, vn) # Restrict model to conjunctions with nonzero coefficients try: idxNonzero = np.where(np.abs(lr.coef_) > self.eps)[0] if self.useOrd: # Nonzero indices of standardized and rule features self.idxNonzeroOrd = idxNonzero[idxNonzero < numOrd] nnzOrd = len(self.idxNonzeroOrd) idxNonzeroRules = idxNonzero[idxNonzero >= numOrd] - numOrd if self.debias and len(idxNonzero): # Re-fit Lasso model with effectively no regularization z = z.iloc[:, idxNonzeroRules] # lr.alpha = self.eps lr = Ridge(alpha=self.eps) lr.fit(B[:, idxNonzero], y) idxNonzero = np.where(np.abs(lr.coef_) > self.eps)[0] # Nonzero indices of standardized and rule features idxNonzeroOrd2 = idxNonzero[idxNonzero < nnzOrd] self.idxNonzeroOrd = self.idxNonzeroOrd[idxNonzeroOrd2] idxNonzeroRules = idxNonzero[idxNonzero >= nnzOrd] - nnzOrd self.z = z.iloc[:, idxNonzeroRules] lr.coef_ = lr.coef_[idxNonzero] else: if self.debias and len(idxNonzero): # Re-fit Lasso model with effectively no regularization z = z.iloc[:, idxNonzero] # lr.alpha = self.eps lr = Ridge(alpha=self.eps) lr.fit(A[:, idxNonzero], y) idxNonzero = np.where(np.abs(lr.coef_) > self.eps)[0] self.z = z.iloc[:, idxNonzero] lr.coef_ = lr.coef_[idxNonzero] except AttributeError: # Model has no coefficients except intercept self.z = z self.lr = lr
def optimize(self, optimizer=DEFAULT_OPTIMIZER, max_iters=100): """ For fitting the model the following elements are required: x: A numpy array of shape (n,p) for the independent variables. y: A numpy array of haspe (n,1) for the dependent variables. tasks: A numpy array with (n,T) where every row contain just one element 1 and 0 for the other numbers to indicate to which task belongs. descriptors: An (T,L) array containing the different descriptors or each task. epsilon: A parameter for defining when to stop the optimization. optimizer: Select the algorithm for optimize. 'Lasso' uses the Lasso implementation of sklearn 'LassoLars' uses the LassoLars implementation of sklearn 'spams' uses the Lasso implementation of spams """ # get input sizes and initizialize parameters n, p = self.x.shape T = self.tasks.shape[1] L = self.descriptors.shape[1] # Precalculate x_alpha matrix for optimizing alpha # It is a (n, p*L) where xD_{i,(j-1)L+l) = \theta_{j}d_l for the # corresponding task. # n_T = list(np.sum(tasks, axis=0)) repeated_descriptors = np.repeat(self.descriptors, p, axis=0) repeated_descriptors.shape = (T, L * p) x_alpha = self.tasks.dot(repeated_descriptors) * np.repeat(self.x, L, axis=1) del repeated_descriptors # Initialize theta self.theta = np.ones((1, p), dtype=np.float64) if optimizer == LASSO: optimize_theta = Lasso(warm_start=True, positive=True, alpha=self.lambda_1, **self.theta_params) optimize_alpha = Lasso(warm_start=True, alpha=self.lambda_2, **self.alpha_params) elif optimizer == LASSOLARS: optimize_theta = LassoLars(alpha=self.lambda_1, **self.theta_params) optimize_alpha = LassoLars(alpha=self.lambda_2, **self.alpha_params) elif optimizer == LASSOSPAMS: optimize_theta = SpamWrapperOptimizer(positive=True, lambda_=self.lambda_1, params=self.theta_params) optimize_alpha = SpamWrapperOptimizer(lambda_=self.lambda_2, params=self.alpha_params) else: raise Exception("Not a valid value") if self.random_init: self.alpha = np.random.normal(size=(p, L)).astype(np.float64) optimize_alpha.coef_ = self.alpha.flatten() else: self.alpha = np.zeros((p, L), dtype=np.float64) optimize_alpha.coef_ = self.alpha.flatten() beta_0 = self.get_beta(self.descriptors) x_theta = self.tasks.dot(self.descriptors.dot(self.alpha.T)) * self.x # Start the two phase optimization continue_optimization = True self.iterations = 0 while continue_optimization: # Optimize for alpha self.alpha[:] = optimize_alpha.fit( np.repeat(self.theta, L, axis=1) * x_alpha, self.y).coef_.reshape((p, L)) beta = self.get_beta(self.descriptors) # Optimize for theta x_theta[:] = self.tasks.dot(self.descriptors.dot(self.alpha.T)) * self.x self.theta[:] = optimize_theta.fit(x_theta, self.y).coef_ self.iterations += 1 if np.linalg.norm(beta.flatten() - beta_0.flatten()) < self.tol: continue_optimization = False else: beta_0 = beta if self.iterations >= max_iters: continue_optimization = False
def _sparse_encode(X, dictionary, gram, cov=None, algorithm='lasso_lars', regularization=None, copy_cov=True, init=None, max_iter=1000): """Generic sparse coding Each column of the result is the solution to a Lasso problem. Parameters ---------- X: array of shape (n_samples, n_features) Data matrix. dictionary: array of shape (n_components, n_features) The dictionary matrix against which to solve the sparse coding of the data. Some of the algorithms assume normalized rows. gram: None | array, shape=(n_components, n_components) Precomputed Gram matrix, dictionary * dictionary' gram can be None if method is 'threshold'. cov: array, shape=(n_components, n_samples) Precomputed covariance, dictionary * X' algorithm: {'lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'} lars: uses the least angle regression method (linear_model.lars_path) lasso_lars: uses Lars to compute the Lasso solution lasso_cd: uses the coordinate descent method to compute the Lasso solution (linear_model.Lasso). lasso_lars will be faster if the estimated components are sparse. omp: uses orthogonal matching pursuit to estimate the sparse solution threshold: squashes to zero all coefficients less than regularization from the projection dictionary * data' regularization : int | float The regularization parameter. It corresponds to alpha when algorithm is 'lasso_lars', 'lasso_cd' or 'threshold'. Otherwise it corresponds to n_nonzero_coefs. init: array of shape (n_samples, n_components) Initialization value of the sparse code. Only used if `algorithm='lasso_cd'`. max_iter: int, 1000 by default Maximum number of iterations to perform if `algorithm='lasso_cd'`. copy_cov: boolean, optional Whether to copy the precomputed covariance matrix; if False, it may be overwritten. Returns ------- code: array of shape (n_components, n_features) The sparse codes See also -------- sklearn.linear_model.lars_path sklearn.linear_model.orthogonal_mp sklearn.linear_model.Lasso SparseCoder """ if X.ndim == 1: X = X[:, np.newaxis] n_samples, n_features = X.shape if cov is None and algorithm != 'lasso_cd': # overwriting cov is safe copy_cov = False cov = np.dot(dictionary, X.T) if algorithm == 'lasso_admm': alpha = float(regularization) / n_features # account for scaling try: err_mgt = np.seterr(all='ignore') code, dictionary = lasso_admm(X.T, dictionary.T, gamma=alpha, gram=gram, cov=cov, max_iter=max_iter) new_code = code.T finally: np.seterr(**err_mgt) elif algorithm == 'lasso_lars': alpha = float(regularization) / n_features # account for scaling try: err_mgt = np.seterr(all='ignore') lasso_lars = LassoLars(alpha=alpha, fit_intercept=False, verbose=False, normalize=False, precompute=gram, fit_path=False) lasso_lars.fit(dictionary.T, X.T, Xy=cov) new_code = lasso_lars.coef_ finally: np.seterr(**err_mgt) elif algorithm == 'lasso_cd': alpha = float(regularization) / n_features # account for scaling clf = Lasso(alpha=alpha, fit_intercept=False, precompute=gram, max_iter=max_iter, warm_start=True) clf.coef_ = init clf.fit(dictionary.T, X.T) new_code = clf.coef_ elif algorithm == 'lars': try: err_mgt = np.seterr(all='ignore') lars = Lars(fit_intercept=False, verbose=False, normalize=False, precompute=gram, n_nonzero_coefs=int(regularization), fit_path=False) lars.fit(dictionary.T, X.T, Xy=cov) new_code = lars.coef_ finally: np.seterr(**err_mgt) elif algorithm == 'threshold': new_code = ((np.sign(cov) * np.maximum(np.abs(cov) - regularization, 0)).T) elif algorithm == 'omp': new_code = orthogonal_mp_gram(gram, cov, regularization, None, row_norms(X, squared=True), copy_Xy=copy_cov).T else: raise ValueError('Sparse coding method must be "lasso_lars" ' '"lasso_cd", "lasso", "threshold" or "omp", got %s.' % algorithm) return new_code