Ejemplo n.º 1
0
class MultiLasso_model(Lasso_model):
    def __init__(self, train_path, test_path, pred_path):
        super().__init__(train_path, test_path, pred_path)
        self.multiLasso_model = MultiTaskLassoCV(
            alphas=[float(i) * 0.05 for i in range(1, 100)],
            cv=8,
            max_iter=1000000)

    def train(self, X_train, Y_train):
        self.multiLasso_model.fit(X_train, Y_train)

    def pred(self, X_test):
        return self.multiLasso_model.predict(X_test)

    def run(self):
        X_train_PMNF, X_test_PMNF, y_trains, y_tests = super().get_train_test()
        self.train(X_train_PMNF, np.asarray(y_trains).T)
        y_preds = self.pred(X_test_PMNF).T

        print(y_preds.shape, np.asarray(y_tests).shape)

        with open(self.pred_path, "w", newline='') as f:
            csv_writer = csv.writer(f)
            for i in range(len(y_trains)):
                for row in self.data_train_split[i]:
                    csv_writer.writerow(row)

                group = self.data_test_split[i][self.split_train_len:, :]
                for j in range(len(group)):
                    row = np.append(group[j, :], y_preds[i][j])
                    csv_writer.writerow(row)
Ejemplo n.º 2
0
    def _informativeness(self, z_p, z):
        if isinstance(self.regressor, LassoCV):
            regressor = MultiTaskLassoCV(cv=self.regressor.cv,
                                         max_iter=2000,
                                         selection='random')

        regressor.fit(z_p, z)
        return self.regressor.score(z_p)
Ejemplo n.º 3
0
def test_1d_multioutput_lasso_and_multitask_lasso_cv():
    X, y, _, _ = build_dataset(n_features=10)
    y = y[:, np.newaxis]
    clf = LassoCV(n_alphas=5, eps=2e-3)
    clf.fit(X, y[:, 0])
    clf1 = MultiTaskLassoCV(n_alphas=5, eps=2e-3)
    clf1.fit(X, y)
    assert_almost_equal(clf.alpha_, clf1.alpha_)
    assert_almost_equal(clf.coef_, clf1.coef_[0])
    assert_almost_equal(clf.intercept_, clf1.intercept_[0])
Ejemplo n.º 4
0
class MultiTaskLassoCVImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Ejemplo n.º 5
0
 def lassoCV(self, name):    
     '''
     Lasso Regression
     '''
     sciLasso = MultiTaskLassoCV( 
         fit_intercept=True,
         normalize=False,
         cv=12,
         tol = 0.001 )
     sciLasso.fit(self.X_train, self.Y_train)
     predict_test = sciLasso.predict(self.X_test)
     MSE = mean_squared_error(predict_test,self.Y_test)
     s = "Sci LassoCV            (MSE: %f)" % (MSE)
     print s
     # print  sciLasso.score(self.X_test, self.Y_test)
     print sciLasso.coef_
     print np.nonzero(sciLasso.coef_)
     predict_final = sciLasso.predict(self.X_final)
     genCSV( name + '_MSE' + str(MSE), self.index_final, predict_final )
def compare_to_lasso_analysis(adata, ccdtranscript):
    '''Perform a comparison of pseudotime analysis to LASSO analysis for finding CCD proteins'''
    prevPlotSize = plt.rcParams['figure.figsize']
    plt.rcParams['figure.figsize'] = (6, 5)

    print("ANALYZING SC-RNA-SEQ WITH LASSO")
    warnings.filterwarnings("ignore")
    fucci_rna_data = [(adata.obs["Red585"][ii], adata.obs["Green530"][ii])
                      for ii in np.arange(len(adata.obs))]
    imputer = KNNImputer(missing_values=0)
    expression = imputer.fit_transform(adata.X)
    fucci_rna_path = "output/pickles/fucci_rna_imputed_lasso.pkl"
    if os.path.exists(fucci_rna_path):
        fucci_rna = np.load(open(fucci_rna_path, 'rb'), allow_pickle=True)
    else:
        fucci_rna = MultiTaskLassoCV()
        fucci_rna.fit(expression, fucci_rna_data)
        pickle.dump(fucci_rna, open(fucci_rna_path, 'wb'))
    nz_coef = np.sum(fucci_rna.coef_, axis=0) != 0
    print(f"{sum(nz_coef)}: number of nonzero lasso coefficients")
    print(f"{adata.var_names[nz_coef]}: genes with nonzero lasso coeff")
    print(
        f"{sum(ccdtranscript[nz_coef]) / sum(nz_coef)}: % nonzero lasso found as CCD transcripts"
    )
    print(
        f"{np.sum(fucci_rna.coef_, axis=0)[nz_coef]}: coefficients for nonzero lasso coeff"
    )

    # Generate UMAP for CCD and nonCCD for the LASSO model
    adataCCd = adata[:, nz_coef]
    sc.pp.neighbors(adataCCd, n_neighbors=10, n_pcs=40)
    sc.tl.umap(adataCCd)
    sc.pl.umap(adataCCd, color="fucci_time", show=False, save=True)
    shutil.move("figures/umap.pdf", f"figures/umapRNALassoCCD.pdf")
    adataNonCCd = adata[:, ~nz_coef]
    sc.pp.neighbors(adataNonCCd, n_neighbors=10, n_pcs=40)
    sc.tl.umap(adataNonCCd)
    sc.pl.umap(adataNonCCd, color="fucci_time", show=False, save=True)
    shutil.move("figures/umap.pdf", f"figures/umapRNALassoNonCCD.pdf")
    plt.rcParams['figure.figsize'] = prevPlotSize
    warnings.filterwarnings("default")
Ejemplo n.º 7
0
 def _compare_with_lasso_cv(self,
                            lasso_X,
                            lasso_y,
                            wlasso_X,
                            wlasso_y,
                            sample_weight,
                            alphas,
                            lasso_cv=3,
                            wlasso_cv=3,
                            params={},
                            tol=1e-8):
     # Check if multitask
     if np.ndim(lasso_y) > 1:
         lassoCV = MultiTaskLassoCV(alphas=alphas, cv=lasso_cv)
         wlassoCV = WeightedMultiTaskLassoCV(alphas=alphas, cv=wlasso_cv)
     else:
         lassoCV = LassoCV(alphas=alphas, cv=lasso_cv)
         wlassoCV = WeightedLassoCV(alphas=alphas, cv=wlasso_cv)
     lassoCV.set_params(**params)
     lassoCV.fit(lasso_X, lasso_y)
     wlassoCV.set_params(**params)
     wlassoCV.fit(wlasso_X, wlasso_y, sample_weight)
     # Check that same alpha is chosen
     self.assertEqual(lassoCV.alpha_, wlassoCV.alpha_)
     # Check that the coefficients are similar
     if np.ndim(lasso_y) > 1:
         for i in range(lasso_y.shape[1]):
             np.testing.assert_allclose(lassoCV.coef_[i],
                                        wlassoCV.coef_[i],
                                        atol=tol)
             if lassoCV.get_params()["fit_intercept"]:
                 self.assertAlmostEqual(lassoCV.intercept_[i],
                                        wlassoCV.intercept_[i])
     else:
         np.testing.assert_allclose(lassoCV.coef_, wlassoCV.coef_, atol=tol)
         self.assertAlmostEqual(lassoCV.intercept_, wlassoCV.intercept_)
Ejemplo n.º 8
0
    def select_mtlasso(self, X, y):
        mtlasso_alphas = MultiTaskLassoCV(alphas=[
            0.00001, .0001, .001, .002, .003, .004, .005, .006, .007, .008,
            .009, .099, .01, .011, .012, .013, .014, .015, .016, .017, .018,
            .019, .02, .025, .03, .035, .036, .037, .038, .039, .04, .041,
            .042, .043, .044, .045, .05, .06, .075, .1, .2, .225, .23, .24,
            .245, .246, .247, .248, .249, .25, .251, .252, .253, .254, .255,
            .26, .27, .275, .3, .35, .4, .45, .46, .47, .48, .481, .482, .483,
            .484, .485, .486, .487, .488, .489, .49, .491, .492, .493, .494,
            .495, .496, .497, .498, .499, .5, .51, .511, .512, .513, .514,
            .515, .516, .517, .518, .519, .52, .525, .53, .54, .55, .6, .75,
            .752, .7527, .7528, .7529, .753, .7531, .754, .7545, .755, .756,
            .76, .765, .77, .78, .79, .8, .9, 1.0, 1.2, 1.25, 1.5, 1.75, 2.0
        ])

        sel_alpha = mtlasso_alphas.fit(X, y)
        sel_alpha.alpha_
        print(sel_alpha.alpha_)
Ejemplo n.º 9
0
path_train = 'data_train.txt'
path_test = 'data_test.txt'

X, Y = get_data_own(path_train)

print(X.shape)
print(Y.shape)

print("Split data for CV")
X_train, X_test , y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=1)

lasso = MultiTaskLasso(max_iter = max_iter, normalize = True)

print("Init train with multitasklassocv")
lassocv = MultiTaskLassoCV(alphas=None, cv=10, max_iter=max_iter, verbose=True, normalize=True)
lassocv.fit(X_train, y_train)

print("Fit multitasklasso with alpha from cv lasso")
lasso.set_params(alpha=lassocv.alpha_)
lasso.fit(X_train, y_train)

print("get mean square error")
mae = mean_absolute_error(y_test, lasso.predict(X_test))
print("mae: {}".format(mae))
rmsle = mean_squared_log_error(y_test, lasso.predict(X_test))
print("rmsle: {}".format(rmsle))
mape = mean_absolute_percentage_error(y_test, lasso.predict(X_test))
print("mape: {}".format(mape))


# logreg.fit(X_train, y_train)
# y_pred_logreg = logreg.predict(X_test)
# print('R2:    ', r2_score(y_test, y_pred_logreg))
# print('MAE:   ', metrics.mean_absolute_error(y_test, y_pred_logreg))
# print('MSE:   ', metrics.mean_squared_error(y_test, y_pred_logreg))
# print('RMSE:  ', np.sqrt(np.absolute(metrics.mean_squared_error(y_test, y_pred_logreg))))
# print('variance score:', explained_variance_score(y_test, y_pred_logreg, multioutput='uniform_average'))

# -----------------------------------------------------------------------------
# Method 3: MultiTaskLassoCV regression with 10-fold CV
# -----------------------------------------------------------------------------
print(' ')
print('## 2. Lasso Regression Results ##')
lasso = MultiTaskLassoCV(cv=10, eps=0.01, max_iter=1000)
t = time.time()
lasso.fit(X_train, y_train)
t_lasso = time.time() - t
y_pred_lasso = lasso.predict(X_test)
print('R2:    ', r2_score(y_test, y_pred_lasso))
print('MAE:   ', metrics.mean_absolute_error(y_test, y_pred_lasso))
print('MSE:   ', metrics.mean_squared_error(y_test, y_pred_lasso))
print('RMSE:  ',
      np.sqrt(np.absolute(metrics.mean_squared_error(y_test, y_pred_lasso))))
print(
    'variance score: ',
    explained_variance_score(y_test,
                             y_pred_lasso,
                             multioutput='uniform_average'))
print('training time:   ', t_lasso)
#
# # -----------------------------------------------------------------------------
    
mea_h = np.zeros(Dh)
sig_h = np.zeros(Dh)
for k in range(Dh):
    mea_h[k] = np.mean(Xh_tr[:,k])
    sig_h[k] = np.std(Xh_tr[:,k])
    Xh_tr[:,k] = (Xh_tr[:,k]-mea_h[k])/sig_h[k]     


############## LassoCV ########################################################
from sklearn.linear_model import MultiTaskLassoCV
n_alphas = 5
alphas = np.logspace(-10, 0, n_alphas)

lasso = MultiTaskLassoCV(alphas = alphas, cv = 5, fit_intercept=False, normalize=False,n_jobs=3)
lasso.fit(Xl_tr, Xh_tr)

Lasso_lambda_opt = lasso.alpha_

print('\n Optimal lambda:', Lasso_lambda_opt)
############ Validation curve #################################################
"""
# validation curve
from sklearn.linear_model import Lasso
from sklearn.learning_curve import validation_curve

lambdas_range= np.append(0, np.logspace(0, 6, 28))
train_MSE, test_MSE = validation_curve(Lasso(),Xl_tr, Xh_tr, param_name="alpha", param_range=lambdas_range, 
                                             scoring = "mean_squared_error", cv=10)

# API always tries to maximize a loss function, so MSE is actually in the flipped sign
Ejemplo n.º 12
0
#把离散特征和连续特征拼接起来
x_vec = np.concatenate((x_vec_con, x_vec_dis), axis=1)

#对于目标进行预测
y_registered = bike_rel['registered'].values.astype(float)
y_casual = bike_rel['casual'].values.astype(float)

y = np.stack((y_registered, y_casual), axis=1)

#建立模型进行预测
from sklearn.linear_model import MultiTaskLassoCV
from sklearn.model_selection import train_test_split
from sklearn.linear_model import MultiTaskElasticNetCV
x1, x2, y1, y2 = train_test_split(x_vec, y, test_size=0.2, random_state=20)

############ Lasso
mtl = MultiTaskLassoCV(alphas=np.logspace(-3, -1, 3), cv=8, verbose=3)
mtl.fit(x1, y1)
mtl.score(x1, y1)
mtl.score(x2, y2)

############ ElasticNetCV
mte = MultiTaskElasticNetCV(l1_ratio=np.logspace(-3, -1, 3),
                            alphas=np.logspace(-3, -1, 3),
                            cv=8,
                            verbose=3)
mte.fit(x1, y1)
mtl.score(x1, y1)
mtl.score(x2, y2)
Ejemplo n.º 13
0
training_data_Y = np.delete(temp2, [i for i in range(gap)], axis=0)
validation_data_X = X[:gap, :]
validation_data_Y = Y[:gap, :]

X = training_data_X
Y = training_data_Y

# Main Logic

iters = 500
alpha = 0.1

# For L1

ok = MultiTaskLassoCV(cv=5)
lcv = ok.fit(X, Y)
lasso_lmbda = ok.alpha_
print("Hyperparameter for Lasso Regularisation: " + str(lasso_lmbda))

# Train
theta2 = np.matrix(np.zeros((features, 1)))
theta2, cost_las = GradientDescent_lasso(X, Y, theta2, alpha, iters,
                                         lasso_lmbda)
lin = [i + 1 for i in range(iters)]
plt.plot(lin, cost_las)
plt.xlabel('Number of iterations')
plt.ylabel('Error')
plt.title('Error vs Iterations for Lasso regression L1 on Training set')
plt.show()
print("Accuracy with Lasso Regularisation on Tain set: " +
      str(predict(X, theta2, Y)))
Ejemplo n.º 14
0
    print "测试集MSE:", mean_squared_error(test_Y, test_Y_pred)
    print "测试集RMSE:", np.sqrt(mean_squared_error(test_Y, test_Y_pred))
    print "测试集R2:", r2_score(test_Y, test_Y_pred)

    tss, rss, ess, r2 = xss(Y, multiTaskLasso.predict(X))
    print "TSS(Total Sum of Squares): ", tss
    print "RSS(Residual Sum of Squares): ", rss
    print "ESS(Explained Sum of Squares): ", ess
    print "R^2: ", r2

    print "\n**********测试MultiTaskLassoCV类**********"
    # 在初始化MultiTaskLassoCV类时, 提供一组备选的α值, MultiTaskLassoCV类会帮我们选择一个合适的α值.
    multiTaskLassoCV = MultiTaskLassoCV(
        alphas=[0.01, 0.1, 0.5, 1, 3, 5, 7, 10, 20, 100], cv=5)
    # 拟合训练集
    multiTaskLassoCV.fit(train_X, train_Y)
    # 打印最优的α值
    print "最优的alpha值: ", multiTaskLassoCV.alpha_
    # 打印模型的系数
    print "系数:", multiTaskLassoCV.coef_
    print "截距:", multiTaskLassoCV.intercept_
    print '训练集R2: ', r2_score(train_Y, multiTaskLassoCV.predict(train_X))

    # 对于线性回归模型, 一般使用均方误差(Mean Squared Error,MSE)或者
    # 均方根误差(Root Mean Squared Error,RMSE)在测试集上的表现来评该价模型的好坏.
    test_Y_pred = multiTaskLassoCV.predict(test_X)
    print "测试集得分:", multiTaskLassoCV.score(test_X, test_Y)
    print "测试集MSE:", mean_squared_error(test_Y, test_Y_pred)
    print "测试集RMSE:", np.sqrt(mean_squared_error(test_Y, test_Y_pred))
    print "测试集R2:", r2_score(test_Y, test_Y_pred)