class lasso_transformer: def __init__(self,r,groups_ids): self.lasso = GroupLasso( groups=groups_ids, group_reg=r, l1_reg=r, n_iter = 1000, scale_reg="None", supress_warning=True, tol=1e-04, ) def fit(self,X,y): self.lasso.fit(X,y.values.reshape(-1, 1)) def transform(self,X,y=None): return self.lasso.transform(X) def fit_transform(self,X,y): self.fit(X,y) return self.transform(X,y)
def single_gene_GLG(X, ptime, L, Dt, g, sigma, lamb, mask, queue, K, Ksum, groups, group_reg): A, y, K = convert_to_FISTA(X, ptime, L, Dt, g, mask, queue, K, Ksum) nSamp, nFea = A.shape NL = int(L / Dt) #out = training(A,y,np.random.rand(A.shape[1]),0) gl = GroupLasso( groups=groups, group_reg=group_reg, l1_reg=lamb, frobenius_lipschitz=True, scale_reg="inverse_group_size", subsampling_scheme=1, supress_warning=True, n_iter=10000, tol=1e-3, ) gl.fit(A, y.reshape(-1, 1)) return gl.coef_.reshape(-1)
class GLasso(Model): # X represents the features, Y represents the labels X = None Y = None prediction = None model = None def __init__(self, X=None, Y=None, feature_headers=None, label_headers=None, groups=None, type='regressor', cfg=False): if X is not None: self.X = X if Y is not None: self.Y = Y self.type = type self.cfg = cfg self.mapping_dict = None self.label_headers = label_headers self.no_inputs = len(feature_headers) if groups is None: groups = [1 for i in range(self.no_inputs)] else: groups = groups self.model = GroupLasso(groups=groups, supress_warning=True) def fit(self, X=None, Y=None): if X is not None: self.X = X if Y is not None: self.Y = Y if self.type == 'classifier': self.Y = self.map_str_to_number(self.Y) print('Group Lasso Train started............') self.model.fit(self.X, self.Y) print('Group Lasso completed..........') return self.model def predict(self, test_features): print('Prediction started............') self.predictions = self.model.predict(test_features) if self.type == 'classifier': predictions = predictions.round() print('Prediction completed..........') return self.predictions def save(self): if self.cfg: f = open('grouplasso_configs.txt', 'w') f.write(json.dumps(self.model.get_params())) f.close() print('No models will be saved for Group lasso') def featureImportance(self): return self.model.coef_ def map_str_to_number(self, Y): mapping_flag = False if self.mapping_dict is not None: for label_header in self.label_headers: Y[label_header] = Y[label_header].map(self.mapping_dict) return Y mapping_dict = None for label_header in self.label_headers: check_list = pd.Series(Y[label_header]) for item in check_list: if type(item) == str: mapping_flag = True break if mapping_flag: classes = Y[label_header].unique() mapping_dict = {} index = 0 for c in classes: mapping_dict[c] = index index += 1 Y[label_header] = Y[label_header].map(mapping_dict) mapping_flag = False self.mapping_dict = mapping_dict return Y def map_number_to_str(self, Y, classes): Y = Y.round() Y = Y.astype(int) if self.mapping_dict is not None: mapping_dict = self.mapping_dict else: mapping_dict = {} index = 0 for c in classes: mapping_dict[index] = c index += 1 inv_map = {v: k for k, v in mapping_dict.items()} return Y.map(inv_map) def getAccuracy(self, test_labels, predictions, origin=0, hitmissr=0.8): if self.type == 'classifier': correct = 0 df = pd.DataFrame(data=predictions.flatten()) test_labels = self.map_str_to_number(test_labels.copy()) for i in range(len(df)): if (df.values[i] == test_labels.values[i]): correct = correct + 1 else: correct = 0 df = pd.DataFrame(data=predictions.flatten()) for i in range(len(df)): if 1 - abs(df.values[i] - test_labels.values[i]) / abs( df.values[i]) >= hitmissr: correct = correct + 1 return float(correct) / len(df) def getConfusionMatrix(self, test_labels, predictions, label_headers): df = pd.DataFrame(data=predictions.flatten()) if self.type == 'classifier': index = 0 for label_header in label_headers: classes = test_labels[label_header].unique() df_tmp = self.map_number_to_str(df.ix[:, index], classes) title = 'Normalized confusion matrix for Group Lasso (' + label_header + ')' self.plot_confusion_matrix(test_labels.ix[:, index], df_tmp, classes=classes, normalize=True, title=title) index = index + 1 else: return 'No Confusion Matrix for Regression' def getROC(self, test_labels, predictions, label_headers): predictions = pd.DataFrame(data=predictions.flatten()) predictions.columns = test_labels.columns.values if self.type == 'classifier': test_labels = self.map_str_to_number(test_labels) fpr, tpr, _ = roc_curve(test_labels, predictions) plt.figure(1) plt.plot([0, 1], [0, 1], 'k--') plt.plot(fpr, tpr) plt.xlabel('False positive rate') plt.ylabel('True positive rate') plt.title('ROC curve') plt.show() else: return 'No Confusion Matrix for Regression' def getRSquare(self, test_labels, predictions, mode='single'): df = pd.DataFrame(data=predictions.flatten()) if self.type == 'regressor': if mode == 'multiple': errors = r2_score(test_labels, df, multioutput='variance_weighted') else: errors = r2_score(test_labels, df) return errors else: return 'No RSquare for Classification' def getMSE(self, test_labels, predictions): df = pd.DataFrame(data=predictions.flatten()) if self.type == 'regressor': errors = mean_squared_error(test_labels, df) return errors else: return 'No MSE for Classification' def getMAPE(self, test_labels, predictions): df = pd.DataFrame(data=predictions.flatten()) if self.type == 'regressor': errors = np.mean(np.abs( (test_labels - df.values) / test_labels)) * 100 return errors.values[0] else: return 'No MAPE for Classification' def getRMSE(self, test_labels, predictions): df = pd.DataFrame(data=predictions.flatten()) if self.type == 'regressor': errors = sqrt(mean_squared_error(test_labels, df)) return errors else: return 'No RMSE for Classification'
y += np.random.randn(*y.shape) * noise_level * y y += intercept gl = GroupLasso( groups=groups, n_iter=10, tol=1e-8, l1_reg=0.05, group_reg=0.08, frobenius_lipschitz=True, subsampling_scheme=None, fit_intercept=True, ) print("Starting fit") gl.LOG_LOSSES = True gl.fit(X, y) for i in range(w.shape[1]): plt.figure() plt.subplot(211) plt.plot(w[:, i], ".", label="True weights") plt.plot(gl.coef_[:, i], ".", label="Estimated weights") plt.subplot(212) plt.plot(w[gl.sparsity_mask, i], ".", label="True weights") plt.plot(gl.coef_[gl.sparsity_mask, i], ".", label="Estimated weights") plt.legend() plt.figure() plt.plot([w.min(), w.max()], [gl.coef_.min(), gl.coef_.max()], "gray") plt.scatter(w, gl.coef_, s=10)
def groupLasso_demo(signal_type, fig_start): X,Y,W_actual,groups = generate_data(signal_type) #Plotting the actual W plt.figure(0+fig_start) plt.plot(W_actual) plt.title("Original (D = 4096, number groups = 64, active groups = 8)") plt.savefig("W_actual_{}.png".format(signal_type) , dpi=300) ##### Applying Lasso Regression ##### # L1 norm is the sum of absolute values of coefficients lasso_reg = linear_model.Lasso(alpha=0.5) lasso_reg.fit(X, Y) W_lasso_reg = lasso_reg.coef_ ##### Debiasing step ##### ba = np.argwhere(W_lasso_reg != 0) #Finding where the coefficients are not zero X_debiased = X[:, ba] W_lasso_reg_debiased = np.linalg.lstsq(X_debiased[:,:,0],Y) #Re-estimate the chosen coefficients using least squares W_lasso_reg_debiased_2 = np.zeros((4096)) W_lasso_reg_debiased_2[ba] = W_lasso_reg_debiased[0] lasso_reg_mse = mean_squared_error(W_actual, W_lasso_reg_debiased_2) plt.figure(1+fig_start) plt.plot(W_lasso_reg_debiased_2) plt.title('Standard L1 (debiased 1, regularization param(L1 = 0.5), MSE = {:.4f})'.format(lasso_reg_mse)) plt.savefig("W_lasso_reg_{}.png".format(signal_type), dpi=300) ##### Applying Group Lasso L2 regression ##### # L2 norm is the square root of sum of squares of coefficients # PNLL(W) = NLL(W) + regularization_parameter * Σ(groups)L2-norm group_lassoL2_reg = GroupLasso( groups=groups, group_reg=3, l1_reg=1, frobenius_lipschitz=True, scale_reg="inverse_group_size", subsampling_scheme=1, supress_warning=True, n_iter=1000, tol=1e-3, ) group_lassoL2_reg.fit(X, Y) W_groupLassoL2_reg = group_lassoL2_reg.coef_ ##### Debiasing step ##### ba = np.argwhere(W_groupLassoL2_reg != 0) #Finding where the coefficients are not zero X_debiased = X[:, ba] W_group_lassoL2_reg_debiased = np.linalg.lstsq(X_debiased[:,:,0],Y) #Re-estimate the chosen coefficients using least squares W_group_lassoL2_reg_debiased_2 = np.zeros((4096)) W_group_lassoL2_reg_debiased_2[ba] = W_group_lassoL2_reg_debiased[0] groupLassoL2_mse = mean_squared_error(W_actual, W_group_lassoL2_reg_debiased_2) plt.figure(2+fig_start) plt.plot(W_group_lassoL2_reg_debiased_2) plt.title('Block-L2 (debiased 1, regularization param(L2 = 3, L1=1), MSE = {:.4f})'.format(groupLassoL2_mse)) plt.savefig("W_groupLassoL2_reg_{}.png".format(signal_type), dpi=300) ##### Applying Group Lasso Linf regression ##### # To use spams library, it is necessary to convert data to fortran normalized arrays # visit http://spams-devel.gforge.inria.fr/ for the documentation of spams library # Linf is the supremum of all the coeifficients # PNLL(W) = NLL(W) + regularization_parameter * Σ(groups)Linf-norm X_normalized = np.asfortranarray(X - np.tile(np.mean(X,0),(X.shape[0],1)),dtype=float) X_normalized = spams.normalize(X_normalized) Y_normalized = np.asfortranarray(Y - np.tile(np.mean(Y,0),(Y.shape[0],1)),dtype=float) Y_normalized = spams.normalize(Y_normalized) groups_modified = np.concatenate([[i] for i in groups]).reshape(-1, 1) W_initial = np.zeros((X_normalized.shape[1],Y_normalized.shape[1]),dtype=float,order="F") param = {'numThreads' : -1,'verbose' : True, 'lambda2' : 3, 'lambda1' : 1, 'max_it' : 500, 'L0' : 0.1, 'tol' : 1e-2, 'intercept' : False, 'pos' : False, 'loss' : 'square'} param['regul'] = "group-lasso-linf" param2=param.copy() param['size_group'] = 64 param2['groups'] = groups_modified (W_groupLassoLinf_reg, optim_info) = spams.fistaFlat(Y_normalized,X_normalized,W_initial,True,**param) ##### Debiasing step ##### ba = np.argwhere(W_groupLassoLinf_reg != 0) #Finding where the coefficients are not zero X_debiased = X[:, ba[:,0]] W_groupLassoLinf_reg_debiased = np.linalg.lstsq(X_debiased,Y) #Re-estimate the chosen coefficients using least squares W_group_lassoLinf_reg_debiased_2 = np.zeros((4096)) W_group_lassoLinf_reg_debiased_2[ba] = W_groupLassoLinf_reg_debiased[0] groupLassoLinf_mse = mean_squared_error(W_actual, W_group_lassoLinf_reg_debiased_2) plt.figure(3+fig_start) axes = plt.gca() plt.plot(W_group_lassoLinf_reg_debiased_2) plt.title('Block-Linf (debiased 1, regularization param(L2 = 3, L1=1), MSE = {:.4f})'.format(groupLassoLinf_mse)) plt.savefig("W_groupLassoLinf_reg_{}.png".format(signal_type), dpi=300) plt.show()
beta_bool = np.ndarray(shape=(p, 1), dtype=bool) beta_bool[0:nZeros] = False beta_bool[nZeros:] = True Y = np.dot(X, beta) + e Y = Y.reshape(-1, 1) # l1_reg is the regularisation coeff. for coeffcient sparsiy pen. # l2_reg is the regularisation coefficient(s) for the group sparsity penalty gl = GroupLasso(groups=F_groups, l1_reg=0, group_reg=0.35, supress_warning=True) gl.fit(X, Y) yhat = gl.predict(X) beta_hat = gl.coef_ conf_m = confusion_matrix(beta_bool, gl.sparsity_mask_) print("Number of variables: {}".format(p)) print("Number of zero coefficients: {}".format(nZeros)) print("Number of choosen variables: {}".format(gl.sparsity_mask_.sum())) print(conf_m) group_bool = np.ndarray(shape=(3, nGroups), dtype=int) group_bool[0, :] = [0, 1, 2, 3, 4] group_bool[1, :] = [nZeros / 2, nZeros / 2, 0, 0, 0] Fgroup_bool = np.ndarray(shape=(3, nGroups), dtype=int)