Esempio n. 1
0
def get_model(basis, response, time_series_cv, splits):
    if time_series_cv:
        cv = TimeSeriesSplit(n_splits=splits)
    else:
        cv = KFold(n_splits=splits)
    model = ElasticNetCV(l1_ratio=1, selection='random', cv=cv)
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        model.fit(basis, response)
        _, coefs, _ = model.path(basis, response, l1_ration=model.l1_ratio_, alphas=model.alphas_)
    return model, coefs, model.mse_path_
Esempio n. 2
0
 def _build_linear_model(self, basis, y):
     if self.time_series_cv:
         cv = TimeSeriesSplit(n_splits=self.splits)
     else:
         cv = KFold(n_splits=self.splits, random_state=self.seed, shuffle = True)
     if self.method == REGRESSION:
         #model = XGBRegressor(objective='reg:squarederror',booster='gbtree')
         model = ElasticNetCV(l1_ratio=0.1, selection='random', cv=cv, random_state=self.seed, normalize=False)
     else:
         model = LogisticRegressionCV(penalty='l1', cv=cv)
     with warnings.catch_warnings():
         warnings.simplefilter('ignore')
         model.fit(basis, y)
         #coefs = model.feature_importances_
         _, coefs, _ = model.path(basis, y, l1_ration=model.l1_ratio_, alphas=model.alphas_)
     return model, coefs, model.mse_path_
Esempio n. 3
0
 def _build_linear_model(self, basis, y):
     if self.time_series_cv:
         cv = TimeSeriesSplit(n_splits=self.splits)
     else:
         cv = KFold(n_splits=self.splits, random_state=self.seed)
     if self.method == REGRESSION:
         model = ElasticNetCV(l1_ratio=1,
                              selection='random',
                              cv=cv,
                              random_state=self.seed,
                              normalize=False)
     else:
         model = LogisticRegressionCV(penalty='l1', cv=cv)
     with warnings.catch_warnings():
         warnings.simplefilter('ignore')
         model.fit(basis, y)
         _, coefs, _ = model.path(basis,
                                  y,
                                  l1_ration=model.l1_ratio_,
                                  alphas=model.alphas_)
     return model, coefs, model.mse_path_
Esempio n. 4
0
def eNetModel(data, labels, featureNames, texts, documents, nFolds):
    # run SVM with grid search for parameters and leave-one-out cross validation
    kf = KFold(len(texts), n_folds=nFolds)
    acc = 0
    mean_coefs = []
    for train, test in kf:

        # test_docs = {}
        label_train = labels[train]
        #selected_feats = getSelectedFeatures(train, test, texts, featureNames, documents, label_train, nFeats)

        full_train_data, full_test_data, label_train, label_test = data[train], data[test], labels[train], labels[test]

        #data_train = sortBySelected(full_train_data, selected_feats, featureNames)
        #data_test = sortBySelected(full_test_data, selected_feats, featureNames)

        data_train = full_train_data
        data_test = full_test_data

        enet = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1],n_alphas=1000,alphas=[0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])

        enet.fit(data_train, label_train)

        data_train = np.asarray(data_train,dtype=float)
        label_train = np.asarray(label_train,dtype=float)

        vals = enet.path(data_train, label_train)
        mean_coefs.append(np.mean(vals[1],axis=1))

        if label_test == 1 and enet.predict(data_test) > 0.5:
            acc += 1
        elif label_test == 0 and enet.predict(data_test) < 0.5:
            acc += 1

        if len(mean_coefs) % 10 == 0:
            print str(len(mean_coefs)), 'out of %s subs finished' %(str(len(data)))

    mean_coefs = np.mean(np.array(mean_coefs), axis=0)

    return Decimal(acc)/Decimal(len(data)), mean_coefs
Esempio n. 5
0
expr_TF = expr_all.loc[only_TF_list]

#Form a dataframe of gene x TF for pvalue_gt. This DF will be row-sorted depending on the cancer
pvalue_gt_array = (-1) * np.ones(
    (len(only_gene_list), len(only_TF_list)))  #A gene x TF matrix

X_features = expr_TF.values.T
start_time = time.clock()

for i in range(len(only_gene_list)):
    print('Pvalue_gene_TF', i)
    y = expr_gene.iloc[i].values
    EN_model = ElasticNetCV(l1_ratio=l1_rat)

    ####make sure that number of nonzero coefs do not exceed max_num_coefs
    alphas1, coefs1, _ = EN_model.path(X_features, y, eps=0.01, n_alphas=10)
    num_coefs = np.sum(coefs1 != 0, axis=0)
    #print(num_coefs)
    #print(num_coefs[num_coefs <= max_num_coefs][-1])
    rep_EN = 0
    if num_coefs[-1] < max_num_coefs:
        EN_coef = coefs1[:, -1]
        selected_ind = np.array(range(len(only_TF_list)))[EN_coef != 0]
    else:
        while (num_coefs[0] !=
               num_coefs[-1]) and (max(num_coefs[num_coefs <= max_num_coefs])
                                   != max_num_coefs) and (rep_EN < 10):
            rep_EN += 1
            alpha_min = alphas1[(num_coefs <= max_num_coefs)][-1]
            alpha_max = alphas1[(num_coefs > max_num_coefs)][0]
            alphas3 = np.linspace(alpha_min, alpha_max, 10)
Esempio n. 6
0
en_cv = ElasticNetCV(fit_intercept=True,
                     n_alphas=100,
                     normalize=False,
                     l1_ratio=0.01)
en_cv.fit(x_train, y_train)
p_en_cv = en_cv.predict(x_test)

print("#--------------------------------------")
print("ElasticNetCV regression result")
print("r2_score = %.4f" % (r2_score(y_test, p_en_cv)))
print("mean squared error = %.4f" % (mean_squared_error(y_test, p_en_cv)))
print(sum(en_cv.coef_ == 0))
print("#--------------------------------------")
print("\n")

enp = en_cv.path(x_train, y_train)
enp_fig = plt.figure()
enp_plot = enp_fig.add_subplot(1, 1, 1)
enp_plot.plot(np.log(enp[0]), enp[1].T)
enp_plot.set_xlabel("lambda vale (log scale)")
enp_plot.set_ylabel("Coefficient estimate value")
enp_plot.set_title("EN solution path")
plt.savefig("en_path.jpg", dpi=300)
plt.show()

# Update results
res_df.loc[2, :] = [mean_squared_error(y_test, p_lasso_cv), 'lasso']
res_df.loc[3, :] = [mean_squared_error(y_test, p_en_cv), 'ElasticNetCV']

print("#--------------------------------------")
print("Update results")