class _MultiTaskElasticNetCVImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
for i in range(epochs/quanta): print 'Epoch: ', i*quanta an.trainSupervised(quanta, trndata, initialLearningrate=learningrate, decay=1,#0.999, myWeightdecay=weightDecay, momentum=momentum) netTrainFs.append(an.scoreOnDS(trndata)) X, X_test = an.transform(X_raw), an.transform(X_test_raw) if (lastX == X).all(): raise 'problem' lastX = copy.deepcopy(X) clf = MultiTaskElasticNetCV() clf.fit(X, Y) predTrain = np.array(clf.predict(X)) splits = [] for col in range(predTrain.shape[1]): bestSplit, bestF1 = labanUtil.getSplitThreshold(predTrain[:, col], Y[:, col]) splits.append(bestSplit) pred = np.array(clf.predict(X_test)) for col in range(pred.shape[1]): pred[:, col] = [1 if e>=splits[col] else 0 for e in pred[:, col]] predTrain[:, col] = [1 if e>=splits[col] else 0 for e in predTrain[:, col]] testFs.append(metrics.f1_score(Y_test, pred)) trainFs.append(metrics.f1_score(Y, predTrain)) #des+='\n EN test f1: '+ str(testF) #des+=' , EN train f1: '+ str(trainF) r = range(epochs/quanta) plt.plot(r, trainFs, label='TrainEs F1, max: '+str(max(trainFs)))
alphas=alphas, verbose=1, cv=folds, n_jobs=-1) models.fit(X_train, Y_train) models.score(X_test, Y_test) print "Alpha: ", models.alpha_ print "L1 ratio: ", models.l1_ratio_ print "Score of Elastic-net on test data: ", models.score(X_test, Y_test) model_EN = ElasticNet(l1_ratio=models.l1_ratio_, alpha=models.alpha_) model_EN.fit(np.concatenate((X_train, X_test)), np.concatenate((Y_train, Y_test))) test = np.rint(models.predict(X_test)).astype('int16') coeff = model_EN.coef_.T # coeff = models.coef_.T # high=1.0 # low=0.0 # mins = np.min(coeff, axis=0) # maxs = np.max(coeff, axis=0) # rng = maxs - mins # table = (high - (((high - low) * (maxs - coeff)) / rng)) coeff_differentiated = coeff.copy() neg_ind = np.where(coeff < 0) pos_ind = np.where(coeff >= 0) for i in range(neg_ind[0].shape[0]): coeff_differentiated[neg_ind[0][i],
p(mean_squared_error(lasso_predict, Y_test)) # ## Ridge # # In[25]: ridge_model = Ridge(alpha=0.01) ridge_model = ridge_model.fit(X=X_train, y=Y_train) ridge_predict = ridge_model.predict(X_test) p(mean_absolute_error(ridge_predict, Y_test)) p(mean_squared_error(ridge_predict, Y_test)) # ## Elastic Net # In[27]: enet_params = { 'alpha': [1e-7], } enet_model = MultiTaskElasticNetCV(alphas=enet_params['alpha']) enet_model = enet_model.fit(X=X_train, y=Y_train) enet_predict = enet_model.predict(X_test) p(mean_absolute_error(enet_predict, Y_test)) p(mean_squared_error(enet_predict, Y_test))
# -*- coding: utf-8 -*- """ Created on Thu Apr 21 23:51:12 2016 @author: patanjali """ from sklearn.linear_model import MultiTaskElasticNetCV from utils2 import load_dataset import pandas train, validate, test = load_dataset() no_classes = train[:,0].max()+1 train_y = pandas.get_dummies(train[:,0]) print no_classes, train.shape train = train[:201] validate = validate[:201] test = test[:201] for l1_ratio in [.1, .5, .7, .9, .95, .99, 1]: model = MultiTaskElasticNetCV(l1_ratio=l1_ratio, normalize=True, verbose=True, n_jobs=3) model.fit(train[:,1:], train_y) predicted_classes = (model.predict(validate[:,1:])).argmax(1) correct = sum(predicted_classes==validate[:,0]) print l1_ratio, correct, correct*1.0/validate.shape[0]
train_labels = np.vstack((import_test_labels["Ytest"], import_train["Ytrain"])) # labels of the original train data ## Standardization scaler = preprocessing.StandardScaler().fit(X_train_raw) X_train_scaled = scaler.transform(X_train_raw) X_test_scaled = scaler.transform(X_test_raw) ## PCA and Feature Selection pca = PCA(n_components=800) selection = SelectKBest(k=850) combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)]) combined_features.fit(X_train_scaled, train_labels.ravel()) # print(pca.explained_variance_ratio_) X_train_reduced = combined_features.transform(X_train_scaled) X_test_reduced = combined_features.transform(X_test_scaled) ## Lasso CV for parameter optimization t1 = time.time() alps = np.linspace(0.1, 0.625, 15) model = MultiTaskElasticNetCV(cv=3, n_jobs=-1, max_iter=25).fit(X_train_reduced, Y_train_raw) t_lasso_cv = time.time() - t1 print "time to train", t_lasso_cv print "alpha", model.alpha_ print "i1 ration", model.i1_ratio_ Y_predicted = model.predict(X_test_reduced) ## Save results to csv np.savetxt("prediction.csv", Y_predicted, fmt="%.5f", delimiter=",")
pred = pipe.predict(X) return metrics.f1_score(y, pred) accum = np.zeros((X.shape[1],)) for y in np.transpose(Y): selector = SelectKBest(f_classif, selectedFeaureNum) selector = selector.fit(X, y) accum += selector.pvalues_ selectedIndices = accum.argsort()[:selectedFeaureNum] def transform(X): return X[:, selectedIndices] X_filtered, X_test_filtered = transform(X), transform(X_test) clf = MultiTaskElasticNetCV(normalize=True) #clf = MultiTaskLasso(normalize=True) clf.fit(X_filtered, Y) predTrain = np.array(clf.predict(X_filtered)) splits = [] for col in range(predTrain.shape[1]): bestSplit, bestF1 = labanUtil.getSplitThreshold(predTrain[:, col], Y[:, col]) splits.append(bestSplit) pred = np.array(clf.predict(X_test_filtered)) for col in range(pred.shape[1]): pred[:, col] = [1 if e>=splits[col] else 0 for e in pred[:, col]] predTrain[:, col] = [1 if e>=splits[col] else 0 for e in predTrain[:, col]] ps.append(metrics.precision_score(Y_test, pred)) rs.append(metrics.recall_score(Y_test, pred)) teF = metrics.f1_score(Y_test, pred) teFs.append(teF) trFs.append(metrics.f1_score(Y, predTrain)) print 'test#: ', test p = np.mean(ps)
print "RSS(Residual Sum of Squares): ", rss print "ESS(Explained Sum of Squares): ", ess print "R^2: ", r2 print "\n**********测试MultiTaskElasticNetCV类**********" # 在初始化MultiTaskElasticNetCV类时, 提供一组备选的α值, MultiTaskElasticNetCV类会帮我们选择一个合适的α值. multiTaskElasticNetCV = MultiTaskElasticNetCV( alphas=[0.01, 0.1, 0.5, 1, 3, 5, 7, 10, 20, 100], cv=5) # 拟合训练集 multiTaskElasticNetCV.fit(train_X, train_Y) # 打印最优的α值 print "最优的alpha值: ", multiTaskElasticNetCV.alpha_ # 打印模型的系数 print "系数:", multiTaskElasticNetCV.coef_ print "截距:", multiTaskElasticNetCV.intercept_ print '训练集R2: ', r2_score(train_Y, multiTaskElasticNetCV.predict(train_X)) # 对于线性回归模型, 一般使用均方误差(Mean Squared Error,MSE)或者 # 均方根误差(Root Mean Squared Error,RMSE)在测试集上的表现来评该价模型的好坏. test_Y_pred = multiTaskElasticNetCV.predict(test_X) print "测试集得分:", multiTaskElasticNetCV.score(test_X, test_Y) print "测试集MSE:", mean_squared_error(test_Y, test_Y_pred) print "测试集RMSE:", np.sqrt(mean_squared_error(test_Y, test_Y_pred)) print "测试集R2:", r2_score(test_Y, test_Y_pred) tss, rss, ess, r2 = xss(Y, multiTaskElasticNetCV.predict(X)) print "TSS(Total Sum of Squares): ", tss print "RSS(Residual Sum of Squares): ", rss print "ESS(Explained Sum of Squares): ", ess print "R^2: ", r2