Example #1
0
class lasso_transformer:
    def __init__(self,r,groups_ids):
        self.lasso = GroupLasso(    groups=groups_ids,    group_reg=r,     l1_reg=r,    n_iter = 1000,
                                       scale_reg="None",       supress_warning=True,    tol=1e-04,
    )
    def fit(self,X,y):
        self.lasso.fit(X,y.values.reshape(-1, 1))

    def transform(self,X,y=None):
        return self.lasso.transform(X)


    def fit_transform(self,X,y):
        self.fit(X,y)
        return self.transform(X,y)
Example #2
0
def single_gene_GLG(X, ptime, L, Dt, g, sigma, lamb, mask, queue, K, Ksum,
                    groups, group_reg):
    A, y, K = convert_to_FISTA(X, ptime, L, Dt, g, mask, queue, K, Ksum)
    nSamp, nFea = A.shape
    NL = int(L / Dt)
    #out = training(A,y,np.random.rand(A.shape[1]),0)
    gl = GroupLasso(
        groups=groups,
        group_reg=group_reg,
        l1_reg=lamb,
        frobenius_lipschitz=True,
        scale_reg="inverse_group_size",
        subsampling_scheme=1,
        supress_warning=True,
        n_iter=10000,
        tol=1e-3,
    )
    gl.fit(A, y.reshape(-1, 1))
    return gl.coef_.reshape(-1)
Example #3
0
class GLasso(Model):

    # X represents the features, Y represents the labels
    X = None
    Y = None
    prediction = None
    model = None

    def __init__(self,
                 X=None,
                 Y=None,
                 feature_headers=None,
                 label_headers=None,
                 groups=None,
                 type='regressor',
                 cfg=False):

        if X is not None:
            self.X = X

        if Y is not None:
            self.Y = Y

        self.type = type
        self.cfg = cfg

        self.mapping_dict = None
        self.label_headers = label_headers
        self.no_inputs = len(feature_headers)

        if groups is None:
            groups = [1 for i in range(self.no_inputs)]
        else:
            groups = groups
        self.model = GroupLasso(groups=groups, supress_warning=True)

    def fit(self, X=None, Y=None):
        if X is not None:
            self.X = X

        if Y is not None:
            self.Y = Y

        if self.type == 'classifier':
            self.Y = self.map_str_to_number(self.Y)

        print('Group Lasso Train started............')
        self.model.fit(self.X, self.Y)
        print('Group Lasso completed..........')

        return self.model

    def predict(self, test_features):
        print('Prediction started............')
        self.predictions = self.model.predict(test_features)
        if self.type == 'classifier':
            predictions = predictions.round()
        print('Prediction completed..........')
        return self.predictions

    def save(self):
        if self.cfg:
            f = open('grouplasso_configs.txt', 'w')
            f.write(json.dumps(self.model.get_params()))
            f.close()
        print('No models will be saved for Group lasso')

    def featureImportance(self):
        return self.model.coef_

    def map_str_to_number(self, Y):
        mapping_flag = False
        if self.mapping_dict is not None:
            for label_header in self.label_headers:
                Y[label_header] = Y[label_header].map(self.mapping_dict)
            return Y

        mapping_dict = None
        for label_header in self.label_headers:
            check_list = pd.Series(Y[label_header])
            for item in check_list:
                if type(item) == str:
                    mapping_flag = True
                    break
            if mapping_flag:
                classes = Y[label_header].unique()
                mapping_dict = {}
                index = 0
                for c in classes:
                    mapping_dict[c] = index
                    index += 1

                Y[label_header] = Y[label_header].map(mapping_dict)
                mapping_flag = False

        self.mapping_dict = mapping_dict
        return Y

    def map_number_to_str(self, Y, classes):
        Y = Y.round()
        Y = Y.astype(int)
        if self.mapping_dict is not None:
            mapping_dict = self.mapping_dict
        else:
            mapping_dict = {}
            index = 0
            for c in classes:
                mapping_dict[index] = c
                index += 1

        inv_map = {v: k for k, v in mapping_dict.items()}
        return Y.map(inv_map)

    def getAccuracy(self, test_labels, predictions, origin=0, hitmissr=0.8):
        if self.type == 'classifier':
            correct = 0
            df = pd.DataFrame(data=predictions.flatten())
            test_labels = self.map_str_to_number(test_labels.copy())
            for i in range(len(df)):
                if (df.values[i] == test_labels.values[i]):
                    correct = correct + 1
        else:
            correct = 0
            df = pd.DataFrame(data=predictions.flatten())
            for i in range(len(df)):
                if 1 - abs(df.values[i] - test_labels.values[i]) / abs(
                        df.values[i]) >= hitmissr:
                    correct = correct + 1
        return float(correct) / len(df)

    def getConfusionMatrix(self, test_labels, predictions, label_headers):
        df = pd.DataFrame(data=predictions.flatten())
        if self.type == 'classifier':
            index = 0
            for label_header in label_headers:
                classes = test_labels[label_header].unique()
                df_tmp = self.map_number_to_str(df.ix[:, index], classes)
                title = 'Normalized confusion matrix for Group Lasso (' + label_header + ')'
                self.plot_confusion_matrix(test_labels.ix[:, index],
                                           df_tmp,
                                           classes=classes,
                                           normalize=True,
                                           title=title)
                index = index + 1
        else:
            return 'No Confusion Matrix for Regression'

    def getROC(self, test_labels, predictions, label_headers):
        predictions = pd.DataFrame(data=predictions.flatten())
        predictions.columns = test_labels.columns.values
        if self.type == 'classifier':
            test_labels = self.map_str_to_number(test_labels)
            fpr, tpr, _ = roc_curve(test_labels, predictions)
            plt.figure(1)
            plt.plot([0, 1], [0, 1], 'k--')
            plt.plot(fpr, tpr)
            plt.xlabel('False positive rate')
            plt.ylabel('True positive rate')
            plt.title('ROC curve')
            plt.show()
        else:
            return 'No Confusion Matrix for Regression'

    def getRSquare(self, test_labels, predictions, mode='single'):
        df = pd.DataFrame(data=predictions.flatten())
        if self.type == 'regressor':
            if mode == 'multiple':
                errors = r2_score(test_labels,
                                  df,
                                  multioutput='variance_weighted')
            else:
                errors = r2_score(test_labels, df)
            return errors
        else:
            return 'No RSquare for Classification'

    def getMSE(self, test_labels, predictions):
        df = pd.DataFrame(data=predictions.flatten())
        if self.type == 'regressor':
            errors = mean_squared_error(test_labels, df)
            return errors
        else:
            return 'No MSE for Classification'

    def getMAPE(self, test_labels, predictions):
        df = pd.DataFrame(data=predictions.flatten())
        if self.type == 'regressor':
            errors = np.mean(np.abs(
                (test_labels - df.values) / test_labels)) * 100
            return errors.values[0]
        else:
            return 'No MAPE for Classification'

    def getRMSE(self, test_labels, predictions):
        df = pd.DataFrame(data=predictions.flatten())
        if self.type == 'regressor':
            errors = sqrt(mean_squared_error(test_labels, df))
            return errors
        else:
            return 'No RMSE for Classification'
    y += np.random.randn(*y.shape) * noise_level * y
    y += intercept

    gl = GroupLasso(
        groups=groups,
        n_iter=10,
        tol=1e-8,
        l1_reg=0.05,
        group_reg=0.08,
        frobenius_lipschitz=True,
        subsampling_scheme=None,
        fit_intercept=True,
    )
    print("Starting fit")
    gl.LOG_LOSSES = True
    gl.fit(X, y)

    for i in range(w.shape[1]):
        plt.figure()
        plt.subplot(211)
        plt.plot(w[:, i], ".", label="True weights")
        plt.plot(gl.coef_[:, i], ".", label="Estimated weights")

        plt.subplot(212)
        plt.plot(w[gl.sparsity_mask, i], ".", label="True weights")
        plt.plot(gl.coef_[gl.sparsity_mask, i], ".", label="Estimated weights")
        plt.legend()

    plt.figure()
    plt.plot([w.min(), w.max()], [gl.coef_.min(), gl.coef_.max()], "gray")
    plt.scatter(w, gl.coef_, s=10)
Example #5
0
def groupLasso_demo(signal_type, fig_start):
  X,Y,W_actual,groups = generate_data(signal_type)
  #Plotting the actual W
  plt.figure(0+fig_start)
  plt.plot(W_actual)
  plt.title("Original (D = 4096, number groups = 64, active groups = 8)")
  plt.savefig("W_actual_{}.png".format(signal_type) , dpi=300)
  ##### Applying Lasso Regression #####
  # L1 norm is the sum of absolute values of coefficients
  lasso_reg = linear_model.Lasso(alpha=0.5)
  lasso_reg.fit(X, Y)
  W_lasso_reg = lasso_reg.coef_
  ##### Debiasing step #####
  ba = np.argwhere(W_lasso_reg != 0) #Finding where the coefficients are not zero
  X_debiased = X[:, ba]
  W_lasso_reg_debiased = np.linalg.lstsq(X_debiased[:,:,0],Y) #Re-estimate the chosen coefficients using least squares
  W_lasso_reg_debiased_2 = np.zeros((4096))
  W_lasso_reg_debiased_2[ba] = W_lasso_reg_debiased[0]
  lasso_reg_mse = mean_squared_error(W_actual, W_lasso_reg_debiased_2)
  plt.figure(1+fig_start)
  plt.plot(W_lasso_reg_debiased_2)
  plt.title('Standard L1 (debiased 1, regularization param(L1 = 0.5), MSE = {:.4f})'.format(lasso_reg_mse))
  plt.savefig("W_lasso_reg_{}.png".format(signal_type), dpi=300)
  ##### Applying Group Lasso L2 regression #####
  # L2 norm is the square root of sum of squares of coefficients 
  # PNLL(W) = NLL(W) + regularization_parameter * Σ(groups)L2-norm
  group_lassoL2_reg = GroupLasso(
    groups=groups,
    group_reg=3,
    l1_reg=1,
    frobenius_lipschitz=True,
    scale_reg="inverse_group_size",
    subsampling_scheme=1,
    supress_warning=True,
    n_iter=1000,
    tol=1e-3,
  )
  group_lassoL2_reg.fit(X, Y)
  W_groupLassoL2_reg = group_lassoL2_reg.coef_
  ##### Debiasing step #####
  ba = np.argwhere(W_groupLassoL2_reg != 0) #Finding where the coefficients are not zero
  X_debiased = X[:, ba]
  W_group_lassoL2_reg_debiased = np.linalg.lstsq(X_debiased[:,:,0],Y) #Re-estimate the chosen coefficients using least squares
  W_group_lassoL2_reg_debiased_2 = np.zeros((4096))
  W_group_lassoL2_reg_debiased_2[ba] = W_group_lassoL2_reg_debiased[0]
  groupLassoL2_mse = mean_squared_error(W_actual, W_group_lassoL2_reg_debiased_2)
  plt.figure(2+fig_start)
  plt.plot(W_group_lassoL2_reg_debiased_2)
  plt.title('Block-L2 (debiased 1, regularization param(L2 = 3, L1=1), MSE = {:.4f})'.format(groupLassoL2_mse))
  plt.savefig("W_groupLassoL2_reg_{}.png".format(signal_type), dpi=300)
  ##### Applying Group Lasso Linf regression #####
  # To use spams library, it is necessary to convert data to fortran normalized arrays
  # visit http://spams-devel.gforge.inria.fr/ for the documentation of spams library
  # Linf is the supremum of all the coeifficients
  # PNLL(W) = NLL(W) + regularization_parameter * Σ(groups)Linf-norm
  X_normalized = np.asfortranarray(X - np.tile(np.mean(X,0),(X.shape[0],1)),dtype=float)
  X_normalized = spams.normalize(X_normalized)
  Y_normalized = np.asfortranarray(Y - np.tile(np.mean(Y,0),(Y.shape[0],1)),dtype=float)
  Y_normalized = spams.normalize(Y_normalized)
  groups_modified = np.concatenate([[i] for i in groups]).reshape(-1, 1)
  W_initial = np.zeros((X_normalized.shape[1],Y_normalized.shape[1]),dtype=float,order="F")
  param = {'numThreads' : -1,'verbose' : True,
  'lambda2' : 3, 'lambda1' : 1, 'max_it' : 500,
  'L0' : 0.1, 'tol' : 1e-2, 'intercept' : False,
  'pos' : False, 'loss' : 'square'}
  param['regul'] = "group-lasso-linf"
  param2=param.copy()
  param['size_group'] = 64
  param2['groups'] = groups_modified
  (W_groupLassoLinf_reg, optim_info) = spams.fistaFlat(Y_normalized,X_normalized,W_initial,True,**param)
  ##### Debiasing step #####
  ba = np.argwhere(W_groupLassoLinf_reg != 0) #Finding where the coefficients are not zero
  X_debiased = X[:, ba[:,0]]
  W_groupLassoLinf_reg_debiased = np.linalg.lstsq(X_debiased,Y) #Re-estimate the chosen coefficients using least squares
  W_group_lassoLinf_reg_debiased_2 = np.zeros((4096))
  W_group_lassoLinf_reg_debiased_2[ba] = W_groupLassoLinf_reg_debiased[0]
  groupLassoLinf_mse = mean_squared_error(W_actual, W_group_lassoLinf_reg_debiased_2)
  plt.figure(3+fig_start)
  axes = plt.gca()
  plt.plot(W_group_lassoLinf_reg_debiased_2)
  plt.title('Block-Linf (debiased 1, regularization param(L2 = 3, L1=1), MSE = {:.4f})'.format(groupLassoLinf_mse))
  plt.savefig("W_groupLassoLinf_reg_{}.png".format(signal_type), dpi=300)
  plt.show()
Example #6
0
beta_bool = np.ndarray(shape=(p, 1), dtype=bool)
beta_bool[0:nZeros] = False
beta_bool[nZeros:] = True

Y = np.dot(X, beta) + e
Y = Y.reshape(-1, 1)

# l1_reg is the regularisation coeff. for coeffcient sparsiy pen.
# l2_reg is the regularisation coefficient(s) for the group sparsity penalty

gl = GroupLasso(groups=F_groups,
                l1_reg=0,
                group_reg=0.35,
                supress_warning=True)

gl.fit(X, Y)
yhat = gl.predict(X)
beta_hat = gl.coef_

conf_m = confusion_matrix(beta_bool, gl.sparsity_mask_)

print("Number of variables: {}".format(p))
print("Number of zero coefficients: {}".format(nZeros))
print("Number of choosen variables: {}".format(gl.sparsity_mask_.sum()))
print(conf_m)

group_bool = np.ndarray(shape=(3, nGroups), dtype=int)
group_bool[0, :] = [0, 1, 2, 3, 4]
group_bool[1, :] = [nZeros / 2, nZeros / 2, 0, 0, 0]

Fgroup_bool = np.ndarray(shape=(3, nGroups), dtype=int)