Esempio n. 1
0
    def mtlasso_model(self, X_train, y_train, X_test, y_test):

        mtlasso_model = MultiTaskLasso(alpha=.005)

        mtlasso_model.fit(X_train, y_train)

        y_train_pred = mtlasso_model.predict(X_train)
        y_test_pred = mtlasso_model.predict(X_test)

        # Scoring the model
        print(mtlasso_model.score(X_train, y_train))
        print(mtlasso_model.score(X_test, y_test))
        print('MSE train: %.6f, MSE test: %.6f' % (mean_squared_error(
            y_train, y_train_pred), mean_squared_error(y_test, y_test_pred)))
        print('R^2 train: %.6f, R^2 test: %.6f' %
              (r2_score(y_train, y_train_pred), r2_score(y_test, y_test_pred)))
Esempio n. 2
0
#splite dataset to get necessary sub-dataset
features_train, features_test, labels_train, labels_test = train_test_split(
    features_sc, label_scm, test_size=0.33, random_state=42)

#pre-process: dimensional reduction(SVD)
svd1 = TruncatedSVD(n_components=9, random_state=1).fit(features_train)
features_train = svd1.transform(features_train)

svd2 = TruncatedSVD(n_components=9, random_state=1).fit(features_test)
features_test = svd2.transform(features_test)

#do regression
mtl = MultiTaskLasso(alpha=0.000000001, random_state=1)
mtl.fit(features_train, labels_train)
print "MultiTaskLasso", mtl.score(features_test, labels_test)

######################################################################
#this part is used to calculate the Multi-Task Elastic-net's score when the hyper-parameter is optimal

#load necessary libs
from sklearn.feature_selection import SelectKBest
from sklearn.decomposition import TruncatedSVD
from sklearn.linear_model import MultiTaskElasticNet
from sklearn.cross_validation import train_test_split

#splite dataset to get necessary sub-dataset
features_train, features_test, labels_train, labels_test = train_test_split(
    features_sc, label_scm, test_size=0.33, random_state=42)

#pre-process: dimensional reduction(SVD)
Esempio n. 3
0
from sklearn.cross_validation import train_test_split

#splite dataset to get necessary sub-dataset
features_train, features_test, labels_train, labels_test = train_test_split(features_sc,label_scm,test_size=0.33,random_state=42)

#pre-process: dimensional reduction(SVD)
svd1 = TruncatedSVD(n_components=9,random_state=1).fit(features_train)
features_train = svd1.transform(features_train)

svd2 = TruncatedSVD(n_components=9,random_state=1).fit(features_test)
features_test = svd2.transform(features_test)

#do regression
mtl = MultiTaskLasso(alpha=0.000000001,random_state=1)
mtl.fit(features_train,labels_train)
print "MultiTaskLasso",mtl.score(features_test,labels_test)

######################################################################
#this part is used to calculate the Multi-Task Elastic-net's score when the hyper-parameter is optimal 

#load necessary libs 
from sklearn.feature_selection import SelectKBest
from sklearn.decomposition import TruncatedSVD
from sklearn.linear_model import MultiTaskElasticNet
from sklearn.cross_validation import train_test_split

#splite dataset to get necessary sub-dataset
features_train, features_test, labels_train, labels_test = train_test_split(features_sc,label_scm,test_size=0.33,random_state=42)

#pre-process: dimensional reduction(SVD)
svd1 = TruncatedSVD(n_components=9,random_state=1).fit(features_train)
Esempio n. 4
0
    #
    #    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.4, random_state=0)

    import sys
    sys.path.insert(0, 'C:\\r workspace\\MultiSconES\\py')
    from load_data import load_dataset

    dataset = load_dataset()
    X = dataset["data"]
    Y = dataset["labels"]

    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        test_size=0.33,
                                                        random_state=42)

    clf = MultiTaskLasso(alpha=1)
    print "train start"
    clf.fit(X_train, Y_train)
    print "train end"
    print "coef start"
    coef_multi_task_lasso_ = clf.coef_
    print "coef end"
    plot_coef(coef_multi_task_lasso_)
    zero_coefs = get_stats(coef_multi_task_lasso_)
    print len(zero_coefs)

    Y_pred = clf.predict(X_test)
    clf_score = clf.score(X_test, Y_test)
    score = r2_score(Y_test[:, 5], Y_pred[:, 5])
Esempio n. 5
0
    print "R^2: ", r2

    print "\n**********测试MultiTaskLasso类**********"
    # 在初始化MultiTaskLasso类时, 指定参数alpha, 默认值是1.0.
    multiTaskLasso = MultiTaskLasso(alpha=1.0)
    # 拟合训练集
    multiTaskLasso.fit(train_X, train_Y)
    # 打印模型的系数
    print "系数:", multiTaskLasso.coef_
    print "截距:", multiTaskLasso.intercept_
    print '训练集R2: ', r2_score(train_Y, multiTaskLasso.predict(train_X))

    # 对于线性回归模型, 一般使用均方误差(Mean Squared Error,MSE)或者
    # 均方根误差(Root Mean Squared Error,RMSE)在测试集上的表现来评该价模型的好坏.
    test_Y_pred = multiTaskLasso.predict(test_X)
    print "测试集得分:", multiTaskLasso.score(test_X, test_Y)
    print "测试集MSE:", mean_squared_error(test_Y, test_Y_pred)
    print "测试集RMSE:", np.sqrt(mean_squared_error(test_Y, test_Y_pred))
    print "测试集R2:", r2_score(test_Y, test_Y_pred)

    tss, rss, ess, r2 = xss(Y, multiTaskLasso.predict(X))
    print "TSS(Total Sum of Squares): ", tss
    print "RSS(Residual Sum of Squares): ", rss
    print "ESS(Explained Sum of Squares): ", ess
    print "R^2: ", r2

    print "\n**********测试MultiTaskLassoCV类**********"
    # 在初始化MultiTaskLassoCV类时, 提供一组备选的α值, MultiTaskLassoCV类会帮我们选择一个合适的α值.
    multiTaskLassoCV = MultiTaskLassoCV(
        alphas=[0.01, 0.1, 0.5, 1, 3, 5, 7, 10, 20, 100], cv=5)
    # 拟合训练集
k_fold = KFold(Y_train_raw.shape[0], n_folds=10)
for train, test in k_fold:
    X1 = X_train_reduced[train]
    Y1 = Y_train_raw[train]
    
    X2 = X_train_reduced[test]
    Y2 = Y_train_raw[test]    

    ## Train Classifiers on fold
    mcl_clf = MultiTaskLasso(alpha=.3)
    mcl_clf.fit(X1, Y1)


    ## Score Classifiers on fold

    mcl_clf_score = mcl_clf.score(X2, Y2)

    print "MultiTaskLasso:  ", mcl_clf_score



## Lasso CV for parameter optimization
t1 = time.time()
clf = MultiTaskLasso(alpha=.3).fit(X_train_reduced, Y_train_raw)
t_lasso_cv = time.time() - t1
print 'time to train', t_lasso_cv

Y_predicted = clf.predict(X_test_reduced)

## Save results to csv
np.savetxt('prediction.csv', Y_predicted, fmt='%.5f',delimiter=',')