예제 #1
0
class _MultiTaskElasticNetCVImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
for  i in range(epochs/quanta):
    print 'Epoch: ', i*quanta
    an.trainSupervised(quanta, trndata,
        initialLearningrate=learningrate, 
        decay=1,#0.999,
        myWeightdecay=weightDecay,
        momentum=momentum)
    netTrainFs.append(an.scoreOnDS(trndata))    
    X, X_test = an.transform(X_raw),  an.transform(X_test_raw)
    if (lastX == X).all():
        raise 'problem'
    lastX = copy.deepcopy(X)
    clf = MultiTaskElasticNetCV()
    clf.fit(X, Y)
    predTrain = np.array(clf.predict(X))
    splits = []
    for col in range(predTrain.shape[1]):
        bestSplit, bestF1 = labanUtil.getSplitThreshold(predTrain[:, col], Y[:, col])
        splits.append(bestSplit)
    pred =  np.array(clf.predict(X_test))
    for col in range(pred.shape[1]):
        pred[:, col] = [1 if e>=splits[col] else 0 for e in pred[:, col]]
        predTrain[:, col] = [1 if e>=splits[col] else 0 for e in predTrain[:, col]]
    
    testFs.append(metrics.f1_score(Y_test, pred))
    trainFs.append(metrics.f1_score(Y, predTrain))
#des+='\n EN test f1: '+ str(testF)
#des+=' , EN train f1: '+ str(trainF)
r = range(epochs/quanta)
plt.plot(r, trainFs, label='TrainEs F1, max: '+str(max(trainFs)))
                               alphas=alphas,
                               verbose=1,
                               cv=folds,
                               n_jobs=-1)
models.fit(X_train, Y_train)
models.score(X_test, Y_test)

print "Alpha: ", models.alpha_
print "L1 ratio: ", models.l1_ratio_
print "Score of Elastic-net on test data: ", models.score(X_test, Y_test)

model_EN = ElasticNet(l1_ratio=models.l1_ratio_, alpha=models.alpha_)
model_EN.fit(np.concatenate((X_train, X_test)),
             np.concatenate((Y_train, Y_test)))

test = np.rint(models.predict(X_test)).astype('int16')
coeff = model_EN.coef_.T
# coeff = models.coef_.T

# high=1.0
# low=0.0
# mins = np.min(coeff, axis=0)
# maxs = np.max(coeff, axis=0)
# rng = maxs - mins
# table = (high - (((high - low) * (maxs - coeff)) / rng))

coeff_differentiated = coeff.copy()
neg_ind = np.where(coeff < 0)
pos_ind = np.where(coeff >= 0)
for i in range(neg_ind[0].shape[0]):
    coeff_differentiated[neg_ind[0][i],
예제 #4
0
p(mean_squared_error(lasso_predict, Y_test))

# ## Ridge
#

# In[25]:

ridge_model = Ridge(alpha=0.01)
ridge_model = ridge_model.fit(X=X_train, y=Y_train)

ridge_predict = ridge_model.predict(X_test)

p(mean_absolute_error(ridge_predict, Y_test))
p(mean_squared_error(ridge_predict, Y_test))

# ## Elastic Net

# In[27]:

enet_params = {
    'alpha': [1e-7],
}

enet_model = MultiTaskElasticNetCV(alphas=enet_params['alpha'])
enet_model = enet_model.fit(X=X_train, y=Y_train)

enet_predict = enet_model.predict(X_test)

p(mean_absolute_error(enet_predict, Y_test))
p(mean_squared_error(enet_predict, Y_test))
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 21 23:51:12 2016

@author: patanjali
"""

from sklearn.linear_model import MultiTaskElasticNetCV
from utils2 import load_dataset
import pandas

train, validate, test = load_dataset()

no_classes = train[:,0].max()+1
train_y = pandas.get_dummies(train[:,0])

print no_classes, train.shape

train = train[:201]
validate = validate[:201]
test = test[:201]

for l1_ratio in [.1, .5, .7, .9, .95, .99, 1]:
    
    model = MultiTaskElasticNetCV(l1_ratio=l1_ratio, normalize=True, verbose=True, n_jobs=3)
    model.fit(train[:,1:], train_y)
    predicted_classes = (model.predict(validate[:,1:])).argmax(1)
    
    correct = sum(predicted_classes==validate[:,0])
    print l1_ratio, correct, correct*1.0/validate.shape[0]
    
train_labels = np.vstack((import_test_labels["Ytest"], import_train["Ytrain"]))  # labels of the original train data

## Standardization
scaler = preprocessing.StandardScaler().fit(X_train_raw)
X_train_scaled = scaler.transform(X_train_raw)
X_test_scaled = scaler.transform(X_test_raw)

## PCA and Feature Selection
pca = PCA(n_components=800)
selection = SelectKBest(k=850)
combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])
combined_features.fit(X_train_scaled, train_labels.ravel())
# print(pca.explained_variance_ratio_)
X_train_reduced = combined_features.transform(X_train_scaled)
X_test_reduced = combined_features.transform(X_test_scaled)

## Lasso CV for parameter optimization
t1 = time.time()
alps = np.linspace(0.1, 0.625, 15)
model = MultiTaskElasticNetCV(cv=3, n_jobs=-1, max_iter=25).fit(X_train_reduced, Y_train_raw)
t_lasso_cv = time.time() - t1
print "time to train", t_lasso_cv
print "alpha", model.alpha_
print "i1 ration", model.i1_ratio_


Y_predicted = model.predict(X_test_reduced)

## Save results to csv
np.savetxt("prediction.csv", Y_predicted, fmt="%.5f", delimiter=",")
예제 #7
0
        pred = pipe.predict(X)
        return metrics.f1_score(y, pred)

    accum = np.zeros((X.shape[1],))
    for y in np.transpose(Y):
        selector = SelectKBest(f_classif, selectedFeaureNum)
        selector = selector.fit(X, y)
        accum += selector.pvalues_
    selectedIndices = accum.argsort()[:selectedFeaureNum]
    def transform(X):
        return X[:, selectedIndices]     
    X_filtered, X_test_filtered =  transform(X), transform(X_test)
    clf = MultiTaskElasticNetCV(normalize=True)
    #clf = MultiTaskLasso(normalize=True)
    clf.fit(X_filtered, Y)
    predTrain = np.array(clf.predict(X_filtered))
    splits = []
    for col in range(predTrain.shape[1]):
        bestSplit, bestF1 = labanUtil.getSplitThreshold(predTrain[:, col], Y[:, col])
        splits.append(bestSplit)
    pred =  np.array(clf.predict(X_test_filtered))
    for col in range(pred.shape[1]):
        pred[:, col] = [1 if e>=splits[col] else 0 for e in pred[:, col]]
        predTrain[:, col] = [1 if e>=splits[col] else 0 for e in predTrain[:, col]]
    ps.append(metrics.precision_score(Y_test, pred))
    rs.append(metrics.recall_score(Y_test, pred))
    teF  = metrics.f1_score(Y_test, pred)
    teFs.append(teF)
    trFs.append(metrics.f1_score(Y, predTrain))
    print 'test#: ', test
    p = np.mean(ps)
예제 #8
0
    print "RSS(Residual Sum of Squares): ", rss
    print "ESS(Explained Sum of Squares): ", ess
    print "R^2: ", r2

    print "\n**********测试MultiTaskElasticNetCV类**********"
    # 在初始化MultiTaskElasticNetCV类时, 提供一组备选的α值, MultiTaskElasticNetCV类会帮我们选择一个合适的α值.
    multiTaskElasticNetCV = MultiTaskElasticNetCV(
        alphas=[0.01, 0.1, 0.5, 1, 3, 5, 7, 10, 20, 100], cv=5)
    # 拟合训练集
    multiTaskElasticNetCV.fit(train_X, train_Y)
    # 打印最优的α值
    print "最优的alpha值: ", multiTaskElasticNetCV.alpha_
    # 打印模型的系数
    print "系数:", multiTaskElasticNetCV.coef_
    print "截距:", multiTaskElasticNetCV.intercept_
    print '训练集R2: ', r2_score(train_Y, multiTaskElasticNetCV.predict(train_X))

    # 对于线性回归模型, 一般使用均方误差(Mean Squared Error,MSE)或者
    # 均方根误差(Root Mean Squared Error,RMSE)在测试集上的表现来评该价模型的好坏.
    test_Y_pred = multiTaskElasticNetCV.predict(test_X)
    print "测试集得分:", multiTaskElasticNetCV.score(test_X, test_Y)
    print "测试集MSE:", mean_squared_error(test_Y, test_Y_pred)
    print "测试集RMSE:", np.sqrt(mean_squared_error(test_Y, test_Y_pred))
    print "测试集R2:", r2_score(test_Y, test_Y_pred)

    tss, rss, ess, r2 = xss(Y, multiTaskElasticNetCV.predict(X))
    print "TSS(Total Sum of Squares): ", tss
    print "RSS(Residual Sum of Squares): ", rss
    print "ESS(Explained Sum of Squares): ", ess
    print "R^2: ", r2