def mtlasso_model(self, X_train, y_train, X_test, y_test): mtlasso_model = MultiTaskLasso(alpha=.005) mtlasso_model.fit(X_train, y_train) y_train_pred = mtlasso_model.predict(X_train) y_test_pred = mtlasso_model.predict(X_test) # Scoring the model print(mtlasso_model.score(X_train, y_train)) print(mtlasso_model.score(X_test, y_test)) print('MSE train: %.6f, MSE test: %.6f' % (mean_squared_error( y_train, y_train_pred), mean_squared_error(y_test, y_test_pred))) print('R^2 train: %.6f, R^2 test: %.6f' % (r2_score(y_train, y_train_pred), r2_score(y_test, y_test_pred)))
#splite dataset to get necessary sub-dataset features_train, features_test, labels_train, labels_test = train_test_split( features_sc, label_scm, test_size=0.33, random_state=42) #pre-process: dimensional reduction(SVD) svd1 = TruncatedSVD(n_components=9, random_state=1).fit(features_train) features_train = svd1.transform(features_train) svd2 = TruncatedSVD(n_components=9, random_state=1).fit(features_test) features_test = svd2.transform(features_test) #do regression mtl = MultiTaskLasso(alpha=0.000000001, random_state=1) mtl.fit(features_train, labels_train) print "MultiTaskLasso", mtl.score(features_test, labels_test) ###################################################################### #this part is used to calculate the Multi-Task Elastic-net's score when the hyper-parameter is optimal #load necessary libs from sklearn.feature_selection import SelectKBest from sklearn.decomposition import TruncatedSVD from sklearn.linear_model import MultiTaskElasticNet from sklearn.cross_validation import train_test_split #splite dataset to get necessary sub-dataset features_train, features_test, labels_train, labels_test = train_test_split( features_sc, label_scm, test_size=0.33, random_state=42) #pre-process: dimensional reduction(SVD)
from sklearn.cross_validation import train_test_split #splite dataset to get necessary sub-dataset features_train, features_test, labels_train, labels_test = train_test_split(features_sc,label_scm,test_size=0.33,random_state=42) #pre-process: dimensional reduction(SVD) svd1 = TruncatedSVD(n_components=9,random_state=1).fit(features_train) features_train = svd1.transform(features_train) svd2 = TruncatedSVD(n_components=9,random_state=1).fit(features_test) features_test = svd2.transform(features_test) #do regression mtl = MultiTaskLasso(alpha=0.000000001,random_state=1) mtl.fit(features_train,labels_train) print "MultiTaskLasso",mtl.score(features_test,labels_test) ###################################################################### #this part is used to calculate the Multi-Task Elastic-net's score when the hyper-parameter is optimal #load necessary libs from sklearn.feature_selection import SelectKBest from sklearn.decomposition import TruncatedSVD from sklearn.linear_model import MultiTaskElasticNet from sklearn.cross_validation import train_test_split #splite dataset to get necessary sub-dataset features_train, features_test, labels_train, labels_test = train_test_split(features_sc,label_scm,test_size=0.33,random_state=42) #pre-process: dimensional reduction(SVD) svd1 = TruncatedSVD(n_components=9,random_state=1).fit(features_train)
# # X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.4, random_state=0) import sys sys.path.insert(0, 'C:\\r workspace\\MultiSconES\\py') from load_data import load_dataset dataset = load_dataset() X = dataset["data"] Y = dataset["labels"] X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42) clf = MultiTaskLasso(alpha=1) print "train start" clf.fit(X_train, Y_train) print "train end" print "coef start" coef_multi_task_lasso_ = clf.coef_ print "coef end" plot_coef(coef_multi_task_lasso_) zero_coefs = get_stats(coef_multi_task_lasso_) print len(zero_coefs) Y_pred = clf.predict(X_test) clf_score = clf.score(X_test, Y_test) score = r2_score(Y_test[:, 5], Y_pred[:, 5])
print "R^2: ", r2 print "\n**********测试MultiTaskLasso类**********" # 在初始化MultiTaskLasso类时, 指定参数alpha, 默认值是1.0. multiTaskLasso = MultiTaskLasso(alpha=1.0) # 拟合训练集 multiTaskLasso.fit(train_X, train_Y) # 打印模型的系数 print "系数:", multiTaskLasso.coef_ print "截距:", multiTaskLasso.intercept_ print '训练集R2: ', r2_score(train_Y, multiTaskLasso.predict(train_X)) # 对于线性回归模型, 一般使用均方误差(Mean Squared Error,MSE)或者 # 均方根误差(Root Mean Squared Error,RMSE)在测试集上的表现来评该价模型的好坏. test_Y_pred = multiTaskLasso.predict(test_X) print "测试集得分:", multiTaskLasso.score(test_X, test_Y) print "测试集MSE:", mean_squared_error(test_Y, test_Y_pred) print "测试集RMSE:", np.sqrt(mean_squared_error(test_Y, test_Y_pred)) print "测试集R2:", r2_score(test_Y, test_Y_pred) tss, rss, ess, r2 = xss(Y, multiTaskLasso.predict(X)) print "TSS(Total Sum of Squares): ", tss print "RSS(Residual Sum of Squares): ", rss print "ESS(Explained Sum of Squares): ", ess print "R^2: ", r2 print "\n**********测试MultiTaskLassoCV类**********" # 在初始化MultiTaskLassoCV类时, 提供一组备选的α值, MultiTaskLassoCV类会帮我们选择一个合适的α值. multiTaskLassoCV = MultiTaskLassoCV( alphas=[0.01, 0.1, 0.5, 1, 3, 5, 7, 10, 20, 100], cv=5) # 拟合训练集
k_fold = KFold(Y_train_raw.shape[0], n_folds=10) for train, test in k_fold: X1 = X_train_reduced[train] Y1 = Y_train_raw[train] X2 = X_train_reduced[test] Y2 = Y_train_raw[test] ## Train Classifiers on fold mcl_clf = MultiTaskLasso(alpha=.3) mcl_clf.fit(X1, Y1) ## Score Classifiers on fold mcl_clf_score = mcl_clf.score(X2, Y2) print "MultiTaskLasso: ", mcl_clf_score ## Lasso CV for parameter optimization t1 = time.time() clf = MultiTaskLasso(alpha=.3).fit(X_train_reduced, Y_train_raw) t_lasso_cv = time.time() - t1 print 'time to train', t_lasso_cv Y_predicted = clf.predict(X_test_reduced) ## Save results to csv np.savetxt('prediction.csv', Y_predicted, fmt='%.5f',delimiter=',')