Example #1
0
def elasticNet(X,y):

    print("\n### ~~~~~~~~~~~~~~~~~~~~ ###")
    print("Lasso Regression")

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    myDegree = 40
    polynomialFeatures = PolynomialFeatures(degree=myDegree, include_bias=False)
    Xp = polynomialFeatures.fit_transform(X)

    myScaler = StandardScaler()
    scaled_Xp = myScaler.fit_transform(Xp)

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    elasticNet = ElasticNet(alpha=1e-7,l1_ratio=0.5)
    elasticNet.fit(scaled_Xp,y)

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    dummyX = np.arange(0,2,0.01)
    dummyX = dummyX.reshape((dummyX.shape[0],1))
    dummyXp = polynomialFeatures.fit_transform(dummyX)
    scaled_dummyXp = myScaler.transform(dummyXp)
    dummyY = elasticNet.predict(scaled_dummyXp)

    outputFILE = 'plot-elasticNet.png'
    fig, ax = plt.subplots()
    fig.set_size_inches(h = 6.0, w = 10.0)
    ax.axis([0,2,0,15])
    ax.scatter(X,y,color="black",s=10.0)
    ax.plot(dummyX, dummyY, color='red', linewidth=1.5)
    plt.savefig(filename = outputFILE, bbox_inches='tight', pad_inches=0.2, dpi = 600)

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    return( None )
def check_ElasticNet(X, y, pred, tol, reg_alpha, reg_lambda, weights):
    enet = ElasticNet(alpha=reg_alpha + reg_lambda,
                      l1_ratio=reg_alpha / (reg_alpha + reg_lambda))
    enet.fit(X, y)
    enet_pred = enet.predict(X)
    assert np.isclose(weights, enet.coef_, rtol=tol, atol=tol).all()
    assert np.isclose(enet_pred, pred, rtol=tol, atol=tol).all()
Example #3
0
def report_ff_en():
    # Fastfood approximation of Gaussian kernel
    para = FastfoodPara(n, d)
    st = time()
    PHI_train, _ = FastfoodForKernel(trainData, para, sgm)
    elapsed_ff_kern_train = time() - st
    st = time()
    PHI_valid, _ = FastfoodForKernel(validationData, para, sgm)
    elapsed_ff_kern_valid = time() - st

    # Train elastic net on projected training data
    en = ElasticNet()
    st = time()
    en.fit(PHI_train.T, trainLabels)
    elapsed_en_fit = time() - st

    # Predict labels for projected validation data
    st = time()
    y_pred = en.predict(PHI_valid.T)
    elapsed_en_pred = time() - st

    # Report performance
    mse_proj = metrics.mean_squared_error(validationLabels, y_pred)
    # print("For projected data, MSE = {:0.4g}.".format(mse_proj))

    return mse_proj, elapsed_en_fit, elapsed_ff_kern_train
Example #4
0
def enet(a):
    print ("Doing elastic net")
    clf3 = ElasticNet(alpha=a)
    clf3.fit(base_X, base_Y)
    print ("Score = %f" % clf3.score(base_X, base_Y))
    clf3_pred = clf3.predict(X_test)
    write_to_file("elastic.csv", clf3_pred)
Example #5
0
def enet_granger_causality_test(X_t, y_t, top_df, max_iter=10000000):
    """
    Return the cv-parameters tested across the whole data
    :param X_t:
    :param y_t:
    :param top_df:
    :return: res_df, test_betas
    """

    test_errs = np.zeros(len(top_df))
    scores = np.zeros(len(top_df))
    dfs = np.zeros(len(top_df))

    test_coefs = np.zeros((len(top_df), X_t.shape[1]))
    for i in range(len(top_df)):
        alpha = top_df.iloc[i]["alpha"]
        lambda_min = top_df.iloc[i]["lambda.min"]
        enet = ElasticNet(l1_ratio=alpha, alpha=lambda_min, max_iter=max_iter)
        enet.fit(X_t, y_t)
        y_pred = enet.predict(X_t)
        test_errs[i] = np.average((y_t - y_pred)**2)
        scores[i] = enet.score(X_t, y_t)
        test_coefs[i] = enet.coef_

        dfs[i] = len(np.where(enet.coef_)[0])

    top_df["test_err"] = test_errs
    top_df["score"] = scores
    top_df["df"] = dfs


    return top_df, test_coefs
Example #6
0
def fit_enet(train_X, train_y, test_X):
    """
    Use linear regression to predict. Elastic net is LR with L1 and L2
    regularisation.
    
    :param train_X:
    :param train_y:
    :param test_X:
    :return:
    """
    enet = ElasticNet()
    enet.fit(train_X, train_y)
    model = "ElasticNet int %.2f coefs %s" % (enet.intercept_, pprint(enet.coef_))
    yhat_train = enet.predict(train_X)
    yhat_test = enet.predict(test_X)
    
    return model, yhat_train, yhat_test
Example #7
0
def ElasticNetRegression(input_dict):
	# 	from sklearn.datasets import load_iris
# 	from sklearn import tree
# 	iris = load_iris()
# 	clf = tree.DecisionTreeClassifier()
# 	clf = clf.fit(iris.data, iris.target)	
	from sklearn.datasets import  load_diabetes
	dta = load_diabetes()
	n_sample = dta.data
	n_feature = dta.target
	print "*******SAMPLES********"
	print n_sample
	print "******FEARTURES*******"
	print n_feature
	from sklearn.linear_model import ElasticNet
	rgs = ElasticNet().fit(n_sample, n_feature)
	print rgs
	print rgs.predict(n_sample)
Example #8
0
    def fit_model_12(self,toWrite=False):
        model = ElasticNet(alpha=1.0)

        for data in self.cv_data:
            X_train, X_test, Y_train, Y_test = data
            model.fit(X_train,Y_train)
            pred = model.predict(X_test)
            print("Model 12 score %f" % (logloss(Y_test,pred),))

        if toWrite:
            f2 = open('model12/model.pkl','w')
            pickle.dump(model,f2)
            f2.close()
Example #9
0
 def predict_linear(self, enet=True):
     """How well can we do on this SRFF with a linear regression
     (with optional elastic-net regularisation)?"""
     if enet:
         clf = ElasticNet()
     else:
         clf = LinearRegression()
     # we have to transpose X here because sklearn uses the
     # opposite order (rows v columns). maybe this is a sign that
     # I'm using the wrong order.
     clf.fit(self.train_X.T, self.train_y)
     yhat = clf.predict(self.test_X.T)
     err = self.defn(self.test_y, yhat)
     return clf.intercept_, clf.coef_, err
Example #10
0
def report_orig_en():
    # Train elastic net on original training data
    en = ElasticNet()
    st = time()
    en.fit(trainData.T, trainLabels)
    elapsed_en_fit = time() - st

    # Predict labels for original validation data
    st = time()
    y_pred = en.predict(validationData.T)
    elapsed_en_pred = time() - st

    # Report performance
    mse_orig = metrics.mean_squared_error(validationLabels, y_pred)
    return mse_orig, elapsed_en_fit, 0.
Example #11
0
def create_ml_classifier(df):
    import operator
    X = np.array(df.drop('base_ip_release',1))
    y = np.array(df['base_ip_release'])
    #clf = LinearRegression()
    clf = ElasticNet(alpha=1,l1_ratio=0.5)
    #clf = Ridge(alpha=2)
    # train_X,test_X,train_y,test_y = cross_validation.train_test_split(X,y,train_size=0.9)
    #
    #
    # sc = StandardScaler()
    # sc.fit(train_X)
    # X_train_std = sc.transform(train_X)
    # X_test_std = sc.transform(test_X)
    #
    # clf.fit(X_train_std,train_y)
    # print clf.predict(X_test_std)
    # print accuracy_score(test_y,clf.predict(X_test_std))


    c = np.zeros(len(X)/10)
    kf = k(len(y),n_folds=10)
    c = 0
    min_dict = {}
    get_error = []
    for train,test in kf:
        get_clif = clf.fit(X[train],y[train])
        p = clf.predict(X[test])
        #print p
        e = (p - y[test])
        #print e, len(e)
        t =  np.dot(e,e)
        # print t
        c += t
        # print c
        #print p, y[test]
        min_dict[t] = get_clif
        get_error.append(t)
    #print min_dict
    min_error = min(get_error)
    print sorted(min_dict.items(),key=operator.itemgetter(0))
    print min_dict[min_error]
    print c
    print np.sqrt(c/len(X))
    return min_dict[min_error]
Example #12
0
def assert_regression_result(results, tol):
    regression_results = [r for r in results if
                          r["param"]["objective"] == "reg:linear"]
    for res in regression_results:
        X = scale(res["dataset"].X,
                  with_mean=isinstance(res["dataset"].X, np.ndarray))
        y = res["dataset"].y
        reg_alpha = res["param"]["alpha"]
        reg_lambda = res["param"]["lambda"]
        pred = res["bst"].predict(xgb.DMatrix(X))
        weights = xgb_get_weights(res["bst"])[1:]
        enet = ElasticNet(alpha=reg_alpha + reg_lambda,
                          l1_ratio=reg_alpha / (reg_alpha + reg_lambda))
        enet.fit(X, y)
        enet_pred = enet.predict(X)
        assert np.isclose(weights, enet.coef_, rtol=tol,
                          atol=tol).all(), (weights, enet.coef_)
        assert np.isclose(enet_pred, pred, rtol=tol, atol=tol).all(), (
            res["dataset"].name, enet_pred[:5], pred[:5])
Example #13
0
def Lasso():
    from sklearn.linear_model import Lasso
    from sklearn.metrics import r2_score
    alpha = 0.1
    lasso = Lasso(alpha=alpha)
    trainDat = shortData
    trainLab = shortLabels
    
    
    lassoPred = lasso.fit(trainDat,trainLab)
    labPredict = lassoPred.predict(testDat)
    r2val = r2_score(testLab,labPredict)
    print(lasso)
    print "r^2 for lasso testing is: ", r2val
    
    from sklearn.linear_model import ElasticNet
    enet = ElasticNet(alpha=alpha, l1_ratio = 0.7)
    enetPred = enet.fit(trainDat, trainLab)
    labPredict_enet = enet.predict(testDat)
    r2val_enet = r2_score(testLab, labPredict_enet)
    print enet
    print "r^2 for enet testing is: ", r2val_enet
# Baysian Ridge Regression
print 'baysian ridge'
br = BayesianRidge(compute_score=True)
#br.fit(x[:, np.newaxis], y)
#br_sts_scores = br.predict(xt[:, np.newaxis])
br.fit(x, y)
br_sts_scores = br.predict(xt)


# Elastic Net
print 'elastic net'
enr = ElasticNet()
#enr.fit(x[:, np.newaxis], y)
#enr_sts_scores = enr.predict(xt[:, np.newaxis])
enr.fit(x, y)
enr_sts_scores = enr.predict(xt)


# Passive Aggressive Regression
print 'passive aggressive'
par = PassiveAggressiveRegressor()
par.fit(x, y)
par_sts_scores = par.predict(xt)
#par.fit(x[:, np.newaxis], y)
#par_sts_scores = par.predict(xt[:, np.newaxis])

# RANSAC Regression
print 'ransac'
ransac = RANSACRegressor()
#ransac.fit(x[:, np.newaxis], y)
#ransac_sts_scores = ransac.predict(xt[:, np.newaxis])
Example #15
0
WBC = 7.13914
Urine = 3739.39

PatientX = np.array([[ICD9_code, Oxygen, PO2, Bicarbonate, Bilirubin, Sodium, Urea_Nitrogen, 
                    Potassium, WBC, Urine, Age, Gender, Admission_Type, Admissin_Location,
                    Insurance, Religion, Marital_Status, Ethnicity, Diagnosis]])


# In[55]:


#Prediction using the Elastic NET Model

model_Elnet = ElasticNet(alpha = 0.01, l1_ratio = 0.1)
model_Elnet.fit(x_train_res, y_train_res)
y_patientX = model_Elnet.predict(PatientX)

if y_patientX > 0.5:
    y_pred_px = "POSITIVE"
    prob = 100*y_patientX
else:
    y_pred_px = "NEGATIVE"
    prob = 100 - 100*y_patientX
 
print("The newly addmited patient has been classified %s for the death prediction.With a probability of %f %%" % (y_pred_px,prob))


# In[73]:

from sklearn.preprocessing import normalize
def Final_Stacking(Train_DS, y, Actual_DS, Sample_DS):

    print("***************Starting Final Stacking*************** at Time: %s" %(tm.strftime("%H:%M:%S")))
    t0 = time()

    #Setting Standard scaler for data
    # stdScaler = StandardScaler()
    # stdScaler.fit(Train_DS,y)
    # Train_DS = stdScaler.transform(Train_DS)
    # Actual_DS = stdScaler.transform(Actual_DS)

    #CV: 0.36225ff
    # clf = RandomForestRegressor(n_estimators=100,min_samples_leaf=18,max_features=None,bootstrap=True,
    #                                 min_samples_split=23,max_depth=25)

    # clf=Lasso(alpha=0.02)
    # print("lasso CV")
    # Nfold_score = Nfold_Cross_Valid(Train_DS, y, clf)

    #clf = xgb.XGBRegressor(n_estimators=2000,max_depth=1,learning_rate=0.01,nthread=4,min_child_weight=7,subsample=1,colsample_bytree=0.7,silent=True,gamma=0.8)

    #cv:0.3872
    #clf = ExtraTreesRegressor(n_estimators=1000,min_samples_leaf=19,max_features=12,bootstrap=True,min_samples_split=1,max_depth=25,n_jobs=-1)

    #################################################################################################################################################
    #CV:
    # clf = RandomForestRegressor(n_estimators=1000,n_jobs=-1)
    # #Nfold_score = Nfold_Cross_Valid(Train_DS, y, clf)
    # clf.fit(Train_DS, y)
    # Pred_Actual = clf.predict(Actual_DS).reshape(-1,1)
    # preds_RFR = pd.DataFrame(Pred_Actual,columns=Sample_DS.columns[1:]).reset_index().sort(columns='Hazard',ascending= True).reset_index(drop=True).reset_index().sort(columns='index',ascending= True).reset_index(drop=True)
    # preds_RFR = preds_RFR.drop(['Hazard','index'], axis = 1)
    #
    # preds = pd.DataFrame(np.array(preds_RFR), index=Sample_DS.Id.values, columns=Sample_DS.columns[1:])
    # preds.to_csv(file_path+'output/Submission_Stacking_RFR_1.csv', index_label='Id')
    # print("RFR Actual Model predicted")
    # print(preds_RFR.head(10))
    #
    # sys.exit(0)
    ################################################################################################################################################
    #CV:0.39009654989438813
    # clf = xgb.XGBRegressor(n_estimators=2000,max_depth=2,learning_rate=0.01,nthread=4,min_child_weight=23,subsample=0.9,colsample_bytree=0.2,silent=True,gamma=0.9)
    # #clf = xgb.XGBRegressor(n_estimators=1000)
    # #Nfold_score = Nfold_Cross_Valid(Train_DS, y, clf)
    # clf.fit(Train_DS, y)
    # Pred_Actual = clf.predict(Actual_DS).reshape(-1,1)
    # preds_XGB = pd.DataFrame(Pred_Actual,columns=Sample_DS.columns[1:]).reset_index().sort(columns='Hazard',ascending= True).reset_index(drop=True).reset_index().sort(columns='index',ascending= True).reset_index(drop=True)
    # preds_XGB = preds_XGB.drop(['Hazard','index'], axis = 1)
    #
    # preds = pd.DataFrame(np.array(preds_XGB), index=Sample_DS.Id.values, columns=Sample_DS.columns[1:])
    # preds.to_csv(file_path+'output/Submission_Stacking_XGB_1.csv', index_label='Id')
    # print("XGB Actual Model predicted")
    # print(preds_XGB.head(10))
    #################################################################################################################################################
    #CV: 0.3879 , LB:0.382419
    #clf = ElasticNet(alpha=0.1, l1_ratio=0.3)

    #CV:0.3902-.3905 , 0.385
    clf = ElasticNet(alpha=0.1, l1_ratio=0.1)
    clf = BayesianRidge(n_iter=300)
    Nfold_score = Nfold_Cross_Valid(Train_DS, y, clf)
    clf.fit(Train_DS, y)
    Pred_Actual = clf.predict(Actual_DS).reshape(-1,1)
    preds_ELN = pd.DataFrame(Pred_Actual,columns=Sample_DS.columns[1:]).reset_index().sort(columns='Hazard',ascending= True).reset_index(drop=True).reset_index().sort(columns='index',ascending= True).reset_index(drop=True)
    preds_ELN = preds_ELN.drop(['Hazard','index'], axis = 1)

    preds = pd.DataFrame(np.array(preds_ELN), index=Sample_DS.Id.values, columns=Sample_DS.columns[1:])
    preds.to_csv(file_path+'output/Submission_Stacking_ELN_1.csv', index_label='Id')
    print("ELN Actual Model predicted")
    print(preds_ELN.head(10))

    sys.exit(0)
    #######################################################################################################################################

    Pred_Actual = NN1_Regressor(Train_DS, y, Actual_DS, grid= False)
    preds_NNT = pd.DataFrame(Pred_Actual,columns=Sample_DS.columns[1:]).reset_index().sort(columns='Hazard',ascending= True).reset_index(drop=True).reset_index().sort(columns='index',ascending= True).reset_index(drop=True)
    preds_NNT = preds_NNT.drop(['Hazard','index'], axis = 1)

    preds = pd.DataFrame(np.array(preds_NNT), index=Sample_DS.Id.values, columns=Sample_DS.columns[1:])
    preds.to_csv(file_path+'output/Submission_Stacking_NNT_1.csv', index_label='Id')
    print("NNT Actual Model predicted")
    print(preds_NNT.head(10))

    pred_Actual = (preds_XGB['level_0'] + preds_ELN['level_0'] + preds_NNT['level_0'])/3

    #pred_Actual = np.power((Pred_Actual *  Pred_Actual1 * Pred_Actual2), (1/3.0))

    #Get the predictions for actual data set
    preds = pd.DataFrame(pred_Actual, index=Sample_DS.Id.values, columns=Sample_DS.columns[1:])
    preds.to_csv(file_path+'output/Submission_Stacking_1.csv', index_label='Id')
    ########################################################################################################################################

    print("***************Ending Final Stacking*************** at Time: %s" %(tm.strftime("%H:%M:%S")))
    return pred_Actual
def get_en_prediction(train_data, train_truth, test_data, test_truth, alpha=1.0, l1_ratio=0.5, iter_id=0):
    clf = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)
    clf.fit(train_data, train_truth)
    predicted = clf.predict(test_data)
    return predicted.ravel()
Example #18
0
#%%
#now for the estimation
#split into test & train set
train_X,test_X,train_Y,test_Y = train_test_split(lagged_stimuli,Y,test_size=0.3)

#%%
#try elastic net CV
#enet_model = ElasticNetCV([.1,.3,.7,.9,.99],cv=3,n_jobs=-1)
#enet_model.train(train_X,train_Y)
#pred_Y = enet_model.predict(train_X)

#for now
enet = ElasticNet(l1_ratio=0.7)
enet.fit(train_X,train_Y)
pred_Y = enet.predict(train_X)

#%%
#non-linearity first by CV NN

parameters_NN = { 'n_neighbors' : [5,10,20,40]}
NN_nonl = KNeighborsRegressor()
gs_NN = grid_search.RandomizedSearchCV(NN_nonl,parameters_NN,verbose=1)

#%%
#try Radius Neighbors Regr
#parameters_radius = { 'weights' : ('uniform','distance') , 'radius' : [0.5,1.0,3.0,5.0,10.0,20.0]}
#RN_nonl = RadiusNeighborsRegressor()
#gs_RN = grid_search.RandomizedSearchCV(RN_nonl,parameters_radius,verbose=1)

#%%
# It is made available under the MIT License

import numpy as np
from sklearn.datasets import load_svmlight_file
from sklearn.cross_validation import KFold
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error, r2_score

data, target = load_svmlight_file('data/E2006.train')

# Edit the lines below if you want to switch method:
# met = LinearRegression(fit_intercept=True)
met = ElasticNet(fit_intercept=True, alpha=.1)

kf = KFold(len(target), n_folds=5)
pred = np.zeros_like(target)
for train, test in kf:
    met.fit(data[train], target[train])
    pred[test] = met.predict(data[test])

print('[EN 0.1] RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))
print('[EN 0.1] R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred)))
print('')

met.fit(data, target)
pred = met.predict(data)
print('[EN 0.1] RMSE on training, {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))
print('[EN 0.1] R2 on training, {:.2}'.format(r2_score(target, pred)))


Example #20
0
            mean = X_train.mean(axis=0)
            X_train = (X_train - mean) / std
            X_test = (X_test - mean) / std

            std = y_train.std(axis=0)
            mean = y_train.mean(axis=0)
            y_train = (y_train - mean) / std
            y_test = (y_test - mean) / std

            gc.collect()
            print("- benchmarking ElasticNet")
            clf = ElasticNet(alpha=alpha, l1_ratio=0.5, fit_intercept=False)
            tstart = time()
            clf.fit(X_train, y_train)
            elnet_results[i, j,
                          0] = mean_squared_error(clf.predict(X_test), y_test)
            elnet_results[i, j, 1] = time() - tstart

            gc.collect()
            print("- benchmarking SGD")
            clf = SGDRegressor(alpha=alpha / n_train,
                               fit_intercept=False,
                               max_iter=max_iter,
                               learning_rate="invscaling",
                               eta0=.01,
                               power_t=0.25,
                               tol=1e-3)

            tstart = time()
            clf.fit(X_train, y_train)
            sgd_results[i, j, 0] = mean_squared_error(clf.predict(X_test),
Example #21
0
class ElasticNet(Model):

    # X represents the features, Y represents the labels
    X = None
    Y = None
    prediction = None
    model = None

    def __init__(self):
        pass

    def __init__(self, X=None, Y=None, label_headers=None,  l1_ratio=1, type='regressor', cfg=False):

        if X is not None:
            self.X = X

        if Y is not None:
            self.Y = Y

        self.type = type
        self.cfg = cfg

        self.mapping_dict = None
        self.label_headers = label_headers

        self.model = ElasticNetModel(l1_ratio=l1_ratio)


    def fit(self, X=None, Y=None):
        if X is not None:
            self.X = X

        if Y is not None:
            self.Y = Y

        if self.type == 'classifier':
            self.Y = self.map_str_to_number(self.Y)

        print('ElasticNet Train started............')
        self.model.fit(self.X, self.Y)
        print('ElasticNet completed..........')

        return self.model

    def predict(self, test_features):
        print('Prediction started............')
        self.predictions = self.model.predict(test_features)
        if self.type == 'classifier':
            predictions = predictions.round()
        print('Prediction completed..........')
        return self.predictions


    def save(self):
        if self.cfg:
            f = open('elasticnet_configs.txt', 'w')
            f.write(json.dumps(self.model.get_params()))
            f.close()
        print('No models will be saved for elasticnet')

    def featureImportance(self):

        return self.model.coef_

    def map_str_to_number(self, Y):
        mapping_flag = False
        if self.mapping_dict is not None:
            for label_header in self.label_headers:
                Y[label_header] = Y[label_header].map(self.mapping_dict)
            return Y

        mapping_dict = None
        for label_header in self.label_headers:
            check_list = pd.Series(Y[label_header])
            for item in check_list:
                if type(item) == str:
                    mapping_flag = True
                    break
            if mapping_flag:
                classes = Y[label_header].unique()
                mapping_dict = {}
                index = 0
                for c in classes:
                    mapping_dict[c] = index
                    index += 1

                Y[label_header] = Y[label_header].map(mapping_dict)
                mapping_flag = False

        self.mapping_dict = mapping_dict
        return Y

    def map_number_to_str(self, Y, classes):
        Y = Y.round()
        Y = Y.astype(int)
        if self.mapping_dict is not None:
            mapping_dict = self.mapping_dict
        else:
            mapping_dict = {}
            index = 0
            for c in classes:
                mapping_dict[index] = c
                index += 1

        inv_map = {v: k for k, v in mapping_dict.items()}
        return Y.map(inv_map)


    def getAccuracy(self, test_labels, predictions, origin=0, hitmissr=0.8):
        if self.type == 'classifier':
            correct = 0
            df = pd.DataFrame(data=predictions.flatten())
            test_labels = self.map_str_to_number(test_labels.copy())
            for i in range(len(df)):
                if (df.values[i] == test_labels.values[i]):
                    correct = correct + 1
        else:
            correct = 0
            df = pd.DataFrame(data=predictions.flatten())
            for i in range(len(df)):
                if 1 - abs(df.values[i] - test_labels.values[i])/abs(df.values[i]) >= hitmissr:
                    correct = correct + 1
        return float(correct)/len(df)

    def getConfusionMatrix(self, test_labels, predictions, label_headers):
        df = pd.DataFrame(data=predictions.flatten())
        if self.type == 'classifier':
            index = 0
            for label_header in label_headers:
                classes = test_labels[label_header].unique()
                df_tmp = self.map_number_to_str(df.ix[:,index], classes)
                title = 'Normalized confusion matrix for NeuralNetwork (' + label_header + ')'
                self.plot_confusion_matrix(test_labels.ix[:,index], df_tmp, classes=classes, normalize=True,
                          title=title)
                index = index + 1
        else:
            return 'No Confusion Matrix for Regression'

    def getROC(self, test_labels, predictions, label_headers):
        predictions=pd.DataFrame(data=predictions.flatten())
        predictions.columns=test_labels.columns.values
        if self.type == 'classifier':
            test_labels = self.map_str_to_number(test_labels)
            fpr, tpr, _ = roc_curve(test_labels, predictions)
            plt.figure(1)
            plt.plot([0, 1], [0, 1], 'k--')
            plt.plot(fpr, tpr)
            plt.xlabel('False positive rate')
            plt.ylabel('True positive rate')
            plt.title('ROC curve')
            plt.show()
        else:
            return 'No Confusion Matrix for Regression'

    def getRSquare(self, test_labels, predictions, mode='single'):
        df = pd.DataFrame(data=predictions.flatten())
        if self.type == 'regressor':
            if mode == 'multiple':
                errors = r2_score(test_labels, df, multioutput='variance_weighted')
            else:
                errors = r2_score(test_labels, df)
            return errors
        else:
            return 'No RSquare for Classification'

    def getMSE(self, test_labels, predictions):
        df = pd.DataFrame(data=predictions.flatten())
        if self.type == 'regressor':
            errors = mean_squared_error(test_labels, df)
            return errors
        else:
            return 'No MSE for Classification'

    def getMAPE(self, test_labels, predictions):
        df = pd.DataFrame(data=predictions.flatten())
        if self.type == 'regressor':
            errors = np.mean(np.abs((test_labels - df.values) / test_labels)) * 100
            return errors.values[0]
        else:
            return 'No MAPE for Classification'

    def getRMSE(self, test_labels, predictions):
        df = pd.DataFrame(data=predictions.flatten())
        if self.type == 'regressor':
            errors = sqrt(mean_squared_error(test_labels, df))
            return errors
        else:
            return 'No RMSE for Classification'
Example #22
0
plt.ylabel('Residuals')
plt.legend(loc='upper left')
plt.hlines(y=0, xmin=-10, xmax=50, color='black', lw=2)
plt.show()
print('At alpha = 0.1, MSE train: %.3f, test: %.3f' % (mean_squared_error(
    y_train, y_train_pred), mean_squared_error(y_test, y_test_pred)))
print('At alpha = 0.1, R^2 train: %.3f, test: %.3f' %
      (r2_score(y_train, y_train_pred), r2_score(y_test, y_test_pred)))

## alpha = 0.1 gives the best performing model

## Elastic Net Model
from sklearn.linear_model import ElasticNet
elanet1 = ElasticNet(alpha=1.0, l1_ratio=0.5)
elanet1.fit(X_train, y_train)
y_train_pred = elanet1.predict(X_train)
y_test_pred = elanet1.predict(X_test)
plt.scatter(y_train_pred,
            y_train_pred - y_train,
            c='green',
            marker='o',
            edgecolor='white',
            label='Training data')
plt.scatter(y_test_pred,
            y_test_pred - y_test,
            c='red',
            marker='s',
            edgecolor='white',
            label='Test data')
plt.title('Elastic Net Model with alpha = 1')
plt.xlabel('Predicted values')
Example #23
0
    from sklearn.metrics import r2_score
    from sklearn.metrics import mean_squared_error

    print('MSE train: %.3f, test: %.3f' % (mean_squared_error(
        y_train, y_train_pred), mean_squared_error(y_test, y_test_pred)))
    print('R^2 train: %.3f, test: %.3f' %
          (r2_score(y_train, y_train_pred), r2_score(y_test, y_test_pred)))

#ElasticNet
from sklearn.linear_model import ElasticNet
for n in floatrange(0.1, 0.5, 5):
    elanet = ElasticNet(alpha=n, l1_ratio=0.5)
    X = df.iloc[:, :-1].values
    y = df['MEDV'].values
    elanet.fit(X_train, y_train)
    y_train_pred = elanet.predict(X_train)
    y_test_pred = elanet.predict(X_test)
    #print('MSE train: %.3f, test: %.3f' % (
    #        mean_squared_error(y_train, y_train_pred),
    #        mean_squared_error(y_test, y_test_pred)))
    #print('R^2 train: %.3f, test: %.3f' % (
    #        r2_score(y_train, y_train_pred),
    #        r2_score(y_test, y_test_pred)))
    ary = np.array(range(100000))
    np.linalg.norm(ary)
    sp.linalg.norm(ary)
    np.sqrt(np.sum(ary**2))
    plt.scatter(y_train_pred,
                y_train_pred - y_train,
                c='steelblue',
                marker='o',
# 定义验证函数
def rmse_cv(model):
    rmse = np.sqrt(-cross_val_score(model, X_train, y, scoring="neg_mean_squared_error", cv=5))
    return (rmse)

#Lasso
clf1 = LassoCV(alphas=[1, 0.1, 0.001, 0.0005, 0.0003, 0.0002, 5e-4])
clf1.fit(X_train, y)
lasso_preds = np.expm1(clf1.predict(X_test))  # exp(x) - 1  <---->log1p(x)==log(1+x)
score1 = rmse_cv(clf1)
print("\nLasso score: {:.4f} ({:.4f})\n".format(score1.mean(), score1.std()))

#ElasticNet
clf2 = ElasticNet(alpha=0.0005, l1_ratio=0.9)
clf2.fit(X_train, y)
elas_preds = np.expm1(clf2.predict(X_test))

score2 = rmse_cv(clf2)
print("\nElasticNet score: {:.4f} ({:.4f})\n".format(score2.mean(), score2.std()))

# print(lasso_preds)
# print(elas_preds)

# Id_list=[i for i in range(1461,2920)]
# print (len(Id_list))
# price_list=[]
# for i in range(0,1459):
#     new_list=[]
#     new_list=[Id_list[i],lasso_preds[i]]
#     price_list.append(new_list)
# print(price_list)
predictions_ridge = ridge.predict(test_features)

rmse_ridge = sqrt(mean_squared_error(predictions_ridge, test_labels))
print("RMSE:", round(rmse_ridge, 2))

# In[23]:

EN = ElasticNet(alpha=0.01, max_iter=10000, normalize=True, l1_ratio=0.8)
EN.fit(train_features, train_labels)
train_EN = EN.score(train_features, train_labels)
test_EN = EN.score(test_features, test_labels)
coeff_used = np.sum(EN.coef_ != 0)

print("number of features used:", coeff_used)

predictions_EN = EN.predict(test_features)
rmse_EN = sqrt(mean_squared_error(predictions_EN, test_labels))
print("RMSE:", round(rmse_EN, 2))

# In[24]:

df = pd.read_csv("data_clean.csv")
del df["Unnamed: 0"]

df = df[[
    "gross_square_feet", "block", "land_square_feet", "lot", "age_of_building",
    "borough", "residential_units", "commercial_units", "total_units",
    "sale_price"
]]
df['borough'] = df['borough'].astype('category')
Example #26
0
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet

L = Lasso()
R = Ridge()
EN = ElasticNet()

L.fit(X_train, y_train)
R.fit(X_train, y_train)
EN.fit(X_train, y_train)

y_predL = L.predict(X_test)
y_predR = R.predict(X_test)
y_predEN = EN.predict(X_test)

# Metrics Report
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_squared_error as mse
print("Lasso Mean Absolute Error: ", mae(y_test, y_predL))
print("Lasso Mean Squared Error: ", mse(y_test, y_predL))
print("Lasso Root Mean Squared Error: ", np.sqrt(mse(y_test, y_predL)))

# Lasso Model Checking
if np.sqrt(mse(y_test, y_predL)) < (0.1 * y_mean):
    print("ALgo works properly")
else:
    print("Model needs some changes")

from sklearn.metrics import mean_absolute_error as mae
from sklearn.cross_validation import KFold
from sklearn.linear_model import LinearRegression, ElasticNet
import numpy as np
from sklearn.datasets import load_boston
boston = load_boston()
x = np.array([np.concatenate((v, [1])) for v in boston.data])
y = boston.target
FIT_EN = False

if FIT_EN:
    model = ElasticNet(fit_intercept=True, alpha=0.5)
else:
    model = LinearRegression(fit_intercept=True)
model.fit(x, y)
p = np.array([model.predict(xi) for xi in x])
e = p - y
total_error = np.dot(e, e)
rmse_train = np.sqrt(total_error / len(p))

kf = KFold(len(x), n_folds=10)
err = 0
for train, test in kf:
    model.fit(x[train], y[train])
    p = np.array([model.predict(xi) for xi in x[test]])
    e = p - y[test]
    err += np.dot(e, e)

rmse_10cv = np.sqrt(err / len(x))
print('RMSE on training: {}'.format(rmse_train))
print('RMSE on 10-fold CV: {}'.format(rmse_10cv))
Example #28
0
# print(X_train.shape)
X_train = X_train.reshape((X_train.shape[0], 1))
# print(X_train.shape)
# print(X_test.shape)
X_test = X_test.reshape(X_test.shape[0], 1)
# print(X_test.shape)

regr = ElasticNet(random_state=0)
regr.fit(X_train, y_train)

# print(regr.score(X_train, y_train))
# print(regr.coef_)
# print(regr.intercept_)

y_predicted = regr.predict(X_test)

print('y_test: ')
print(y_test)
print('y_predicted: ')
print(y_predicted)

predictions = list()
for i in range(len(test_scaled)):
    X, y = test_scaled[i, 0:-1], test_scaled[i, -1]
    yhat = y_predicted[i]
    # print("Y_test: " + str(y) + " Yhat: " + str(yhat))

    yhat = data_misc.invert_scale(scaler, X, yhat)
    # print("yhat no scaled:" + str(yhat))
Example #29
0
lasso = Lasso(alpha=5)
ridge = Ridge(alpha=3)
lr = LinearRegression()
dtr = DecisionTreeRegressor(max_depth=17)
bagger = BaggingRegressor(net, verbose = 1)

X_train, X_test, y_train, y_test = train_test_split(X_model, y)

dtr.fit(X_train,y_train)
dtr.score(X_test, y_test)
pred = dtr.predict(X_test)
plt.scatter(y_test, (pred*0.8)-y_test)

net.fit(X_train, y_train)
net.score(X_test, y_test)
preds = net.predict(X_test)
plt.scatter(y_test, (preds) - y_test, alpha = 0.7)

scores = cross_val_score(net, scale(X_model), y, cv=12)
scores.mean()

X2 = pivoted[['compilation_0', 'compilation_1', 'compilation_2']]
y2 = pivoted.compilation_3

X_train, X_test, y_train, y_test = train_test_split(X2, y2, test_size=0.2)

lr.fit(X_train, y_train)
lr.score(X_test, y_test)
pivoted.head()
mapped_pivot = pd.read_csv('pivot_catcherr.csv')
Example #30
0
        train[val] = train_tmp
        test[val] = test_tmp

    error_dict = {}
    elastic_models = {}
    finalError = []
    for val in players:

        errors2 = []

        ENreg = ElasticNet(alpha=0.5, l1_ratio=0.5, normalize=False)
        ENreg.fit(train[val]['DEFENSIVE_RATING'].values.reshape(-1, 1),
                  train[val]['POINTS'])
        elastic_models[val] = ENreg

        prediction = ENreg.predict(
            test[val]['DEFENSIVE_RATING'].values.reshape(-1, 1))
        prediction = np.round(prediction, 0)

        val_prediction = prediction
        val_actual = test[val]['POINTS'].values

        cnt = 0
        compare = {}
        for val1 in prediction:
            compare[val1] = test[val]['POINTS'].values[cnt]
            cnt += 1

        for i in range(len(prediction)):
            errors2.append(abs(prediction[i] - test[val]['POINTS'].values[i]))
            error_dict[val] = abs(prediction[i] -
                                  test[val]['POINTS'].values[i])
Example #31
0
def elastic_net_regression(test, train, seed, alpha, n):
    model = ElasticNet(alpha=alpha,
                       random_state=seed).fit(train.iloc[:, :n], train['loss'])
    predicted = model.predict(test.iloc[:, :n])
    result = mean_absolute_error(test['loss'], predicted)
    print("Elastic Net: " + str(round(result, 2)))
Example #32
0
# create an index object of our column names to plot our feature importance with
xCols = xTrain.columns

# now let's plot our coefficients
plt.style.use('ggplot')
plt.plot(range(len(xCols)), elasticNetCoef)
plt.xticks(range(len(xCols)), xCols.values)
plt.title(
    "Feature importance of independent variables for\nElasticNet model (coefficient values)"
)
plt.margins(0.02)
plt.show()

# now let's do our 2020 predictions of CPI based on this model
predict2020CPI = model.predict(x2020Forward)

# reset our myData2020Forward index to zero so we can attach these predicted CPIs
myData2020Forward.reset_index(drop=True, inplace=True)

# now let's add this back into our 2020 dataframe
myData2020Forward = pd.concat([
    myData2020Forward,
    pd.DataFrame(predict2020CPI, columns=['predicted_CPI'])
],
                              axis=1)

# next we will change the dates from ordinal back to dates so we can union them back together
myData2020Forward['Date'] = myData2020Forward.iloc[:, 0].astype(int).map(
    dt.date.fromordinal)
myDataBefore2020['Date'] = myDataBefore2020.iloc[:, 0].astype(int).map(
pred2 = nn_reg.predict(testx)
nn_reg.score(testx, testY)
#Lasso
lasso = Lasso(alpha=1)
lasso.fit(trainx, trainy[y[i]])
pred3 = lasso.predict(testx)
lasso.score(testx, testY)
#Ridge
ridge = Ridge(alpha=1.0)
ridge.fit(trainx, trainy[y[i]])
pred4 = ridge.predict(testx)
ridge.score(testx, testY)
#ElasticNet
elsnet = ElasticNet(alpha=1, l1_ratio=0.7)
elsnet.fit(trainx, trainy[y[i]])
pred5 = elsnet.predict(testx)
elsnet.score(testx, testY)

#plot among them
xx = np.linspace(0, max(testY), 100)
fig = plt.figure(figsize=(10, 8))
ax = plt.gca()
plt.scatter(testY, pred, label='Linear regression')
plt.scatter(testY, pred2, label='$k$-NN')
plt.scatter(testY, pred3, label='Lasso')
plt.scatter(testY, pred4, label='Ridge')
plt.scatter(testY, pred5, label='ElasticNet')
plt.plot(xx, xx)
plt.ylabel('Estimation', fontsize=16)
plt.xlabel('True output', fontsize=16)
plt.legend(fontsize=14)
Example #34
0
X = df.iloc[:, 0:13].values
# 正解(目的変数)に住宅価格を設定
y = df["MEDV"].values

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=0)

POLY = PolynomialFeatures(degree=2, include_bias=False)

X_train_pol = POLY.fit_transform(X_train)
X_test_pol = POLY.transform(X_test)

sc = StandardScaler()

X_train_std = sc.fit_transform(X_train_pol)
X_test_std = sc.transform(X_test_pol)

# model = LinearRegression()
# model = Lasso(alpha=0.1)
model = ElasticNet(alpha=0.1, l1_ratio=0.6)

model.fit(X_train_std, y_train)

y_train_pred = model.predict(X_train_std)
y_test_pred = model.predict(X_test_std)

print("MSE train: {0}, test: {1}".format(
    mean_squared_error(y_train, y_train_pred),
    mean_squared_error(y_test, y_test_pred)))
Example #35
0
train_file = train_file.drop(train_file.columns[0], axis=1)
train_file = train_file.values

train_X_temp = train_file[5:50000, :-1]
train_Y = train_file[6:50001, -1]
train_X = np.zeros((train_X_temp.shape[0], 8 * 5))
for i in range(train_X_temp.shape[0]):
    for j in range(5):
        for k in range(8):
            train_X[i][j * 8 + k] = train_X_temp[i - j][k]

test_file_name = dir_path + "test2.csv"
test_file = read_csv(test_file_name, skiprows=1, header=None)
test_file = test_file.values
test_X = np.array(test_file[:, :-1])
test_y = test_file[:, -1]

#best_l1_ratio, best_alpha = train_EN_model(train_X, train_Y, test_X)
#print "Best L1 ratio, Best alpha",best_l1_ratio,best_alpha
#enet = ElasticNet(l1_ratio=best_l1_ratio, alpha=best_alpha)

start = time.time()
enet = ElasticNet()
enet.fit(train_X, train_Y)
#model = "ElasticNet int %.2f coefs %s" % (enet.intercept_, pprint(enet.coef_))
prediction = enet.predict(test_X)
mse = np.mean((prediction - test_y)**2)
print "MSE: ", mse
# print prediction
print "Score: ", enet.score(test_X, test_y)
print "Time: ", (time.time() - start)
Example #36
0
def train(in_alpha, in_l1_ratio):
    import os
    import warnings
    import sys

    import pandas as pd
    import numpy as np
    from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import ElasticNet

    import mlflow
    import mlflow.sklearn

    def eval_metrics(actual, pred):
        rmse = np.sqrt(mean_squared_error(actual, pred))
        mae = mean_absolute_error(actual, pred)
        r2 = r2_score(actual, pred)
        return rmse, mae, r2

    warnings.filterwarnings("ignore")
    np.random.seed(40)

    # Read the wine-quality csv file (make sure you're running this from the root of MLflow!)
    #  Assumes wine-quality.csv is located in the same folder as the notebook
    wine_path = "wine-quality.csv"
    data = pd.read_csv(wine_path)

    # Split the data into training and test sets. (0.75, 0.25) split.
    train, test = train_test_split(data)

    # The predicted column is "quality" which is a scalar from [3, 9]
    train_x = train.drop(["quality"], axis=1)
    test_x = test.drop(["quality"], axis=1)
    train_y = train[["quality"]]
    test_y = test[["quality"]]

    # Set default values if no alpha is provided
    if float(in_alpha) is None:
        alpha = 0.5
    else:
        alpha = float(in_alpha)

    # Set default values if no l1_ratio is provided
    if float(in_l1_ratio) is None:
        l1_ratio = 0.5
    else:
        l1_ratio = float(in_l1_ratio)

    #mlflow.set_tracking_uri("http://zak-tracking-server-svc-myproject.192.168.64.12.nip.io")
    # Useful for multiple runs (only doing one run in this sample notebook)
    with mlflow.start_run():
        # Execute ElasticNet
        lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
        lr.fit(train_x, train_y)

        # Evaluate Metrics
        predicted_qualities = lr.predict(test_x)
        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

        # Print out metrics
        print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
        print("  RMSE: %s" % rmse)
        print("  MAE: %s" % mae)
        print("  R2: %s" % r2)

        # Log parameter, metrics, and model to MLflow
        mlflow.log_param("alpha", alpha)
        mlflow.log_param("l1_ratio", l1_ratio)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("r2", r2)
        mlflow.log_metric("mae", mae)

        mlflow.sklearn.log_model(lr, "model")
Example #37
0
# Train classifier
clf = ElasticNet()
train = pd.read_csv("train/subtrain.csv", chunksize = 100000, iterator = True)
all_classes = np.array([0, 1])
for chunk in train:
    y_train = chunk["click"]
    chunk = chunk[cols]
    chunk = chunk.join(pd.DataFrame([dayhour(x) for x in chunk.hour], columns=["wd", "hr"]))
    chunk.drop(["hour"], axis=1, inplace = True)
    Xcat = fh.transform(np.asarray(chunk.astype(str)))
    clf.fit(Xcat, y_train)
    
# Create a submission file
usecols = cols + ["id"]
X_test = pd.read_csv("test/mtest.csv", usecols=usecols)
X_test = X_test.join(pd.DataFrame([dayhour(x) for x in X_test.hour], columns=["wd", "hr"]))
X_test.drop(["hour"], axis=1, inplace = True)

X_enc_test = fh.transform(np.asarray(X_test.astype(str)))

y_act = pd.read_csv("test/mtest.csv", usecols=['click'])
y_pred = clf.predict(X_enc_test)

with open('logloss.txt','a') as f:
    f.write('\n'+str(log_loss(y_act, y_pred)))

with open("submission/submission_elnet.csv", "w") as f:
    f.write("id,click\n")
    for idx, xid in enumerate(X_test.id):
        f.write(str(xid) + "," + "{0:.10f}".format(y_pred[idx]) + "\n")
f.close()
Example #38
0
df_tmp[list(range(30))] = df_tmp[list(range(30))].where(df_tmp[list(range(30))]>.1,0)
topic_0 + topic_7 + topic_8 + topic_9 + topic_12  + topic_14 + topic_16 + topic_17+ topic_20 + topic_23 + topic_24 + topic_25  + topic_28
X = df[[str(x) for x in [0,7,8,9,12,14,16,17,20,23,24,25,28]]+["black_proportion","log_income","log_price","total_RE"]]
y = np.where(df['white_proportion']>np.median(df['white_proportion']),1,0)
y= df['income']
OLR = OLS(y,X).fit()
OLR.summary()
OLR.predict(exog=X)

df_full_results.params.sort_values()
df_results.params.sort_values()
df_results.summary()
EN = ElasticNet(alpha = .02, l1_ratio=.001)
EN.fit(X,y)
EN.score(X,y)
EN.predict(X)
LinR = LinearRegression()
LinR.fit(X,y)
LinR.score(X,y)

RR = Ridge()
RR.fit(X,y).score(X,y)
pd.Series(RR.coef_)
from sklearn.svm import SVR, SVC
supportR = SVR()
supportR.fit(X,y)

supportC = SVC()
supportC.fit(X,y)
supportC.score(X,y)
Example #39
0
def train_and_evaluate(config_path):
    config = read_params(config_path)
    test_data_path = config["split_data"]["test_path"]
    train_data_path = config["split_data"]["train_path"]
    random_state = config["base"]["random_state"]
    model_dir = config["model_dir"]

    alpha = config["estimators"]["ElasticNet"]["params"]["alpha"]
    l1_ratio = config["estimators"]["ElasticNet"]["params"]["l1_ratio"]

    target = [config["base"]["target_col"]]

    train = pd.read_csv(train_data_path, sep=",")
    test = pd.read_csv(test_data_path, sep=",")

    train_y = train[target]
    test_y = test[target]

    train_x = train.drop(target, axis=1)
    test_x = test.drop(target, axis=1)

    lr = ElasticNet(
        alpha=alpha,
        l1_ratio=l1_ratio,
        random_state=random_state)
    lr.fit(train_x, train_y)

    predicted_qualities = lr.predict(test_x)

    (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

    print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
    print("  RMSE: %s" % rmse)
    print("  MAE: %s" % mae)
    print("  R2: %s" % r2)

#####################################################
    scores_file = config["reports"]["scores"]
    params_file = config["reports"]["params"]

    with open(scores_file, "w") as f:
        scores = {
            "rmse": rmse,
            "mae": mae,
            "r2": r2
        }
        json.dump(scores, f, indent=4)

    with open(params_file, "w") as f:
        params = {
            "alpha": alpha,
            "l1_ratio": l1_ratio,
        }
        json.dump(params, f, indent=4)
#####################################################


    os.makedirs(model_dir, exist_ok=True)
    model_path = os.path.join(model_dir, "model.joblib")

    joblib.dump(lr, model_path)
        优化方法呢?
'''
rg = ElasticNet(alpha=1.0,
                l1_ratio=0.5,
                fit_intercept=True,
                normalize=False,
                precompute=False,
                max_iter=1000,
                copy_X=True,
                tol=0.0001,
                warm_start=False,
                positive=False,
                random_state=None,
                selection='cyclic')
rg.fit(X_train, Y_train)
Y_pre = rg.predict(X_test)
rg.score(X_test, Y_test)
rg.coef_
rg.intercept_
'''
    alpha                       两个惩罚项系数的相乘,如果是0就是最小二乘了
    l1_ratio                    混合参数,在[0,1]之间是弹性网的
    fit_intercept               是否训练截距
    normalize                   归一化否
    precompute                  是否使用Gram矩阵来加速
    max_iter                    最大迭代次数
    copy_X                      是否覆盖模型中的X
    tol                         精度
    warm_start                  是否使用上一次调用的解决方案作为初始化
    positive                    设置强制系数为正的嘛?
    random_state                随机器
Example #41
0
#Random Forest Regressor (good cv, good mape)
reg =RR(n_estimators = 100)
reg.fit(x_train, y_train)
cv_score = cross_val_score(reg,x_train,y_train,cv = 10)
cv_score.mean()
y_pred = reg.predict(x_test)
reg.score(x_test,y_test)
forecast_accuracy(y_pred,y_test) 

#Elastic Net regressor (very bad cv, very good mape)
regr = EN(random_state=0)
regr.fit(x_train, y_train)
cv_score = cross_val_score(regr,x_train,y_train,cv = 10)
cv_score.mean()
y_pred = regr.predict(x_test)
regr.score(x_test,y_test)
forecast_accuracy(y_pred,y_test) 

#K-neighbors Regressor ( bad cv, good mape)
regr2 = knr(10)
regr2.fit(x_train, y_train)
cv_score = cross_val_score(regr2,x_train,y_train,cv = 10)
cv_score.mean()
y_pred = regr2.predict(x_test)
regr2.score(x_test,y_test)
forecast_accuracy(y_pred,y_test) 

#SVR (bad cv, very good mape)
regr3 = SVR()
regr3.fit(x_train, y_train)
Example #42
0
    train, test = train_test_split(data)

    # The predicted column is "quality" which is a scalar from [3, 9]
    train_x = train.drop(["quality"], axis=1)
    test_x = test.drop(["quality"], axis=1)
    train_y = train[["quality"]]
    test_y = test[["quality"]]

    alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5
    l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5

    with mlflow.start_run():
        lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
        lr.fit(train_x, train_y)

        predicted_qualities = lr.predict(test_x)

        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

        print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
        print("  RMSE: %s" % rmse)
        print("  MAE: %s" % mae)
        print("  R2: %s" % r2)

        mlflow.log_param("alpha", alpha)
        mlflow.log_param("l1_ratio", l1_ratio)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("r2", r2)
        mlflow.log_metric("mae", mae)

        mlflow.sklearn.log_model(lr, "model")
lasso.fit(x_train_all, y_train_all)

# training mse
y_lasso_train = lasso.predict(x_train_all)
lasso_train_mse = mean_squared_error(y_train_all, y_lasso_train)

# test mse
y_lasso_pred = lasso.predict(x_test)
lasso_test_mse = mean_squared_error(y_test, y_lasso_pred)

## elastic net
elastic = ElasticNet(alpha = 1.0, l1_ratio=0.5)
elastic.fit(x_train_all, y_train_all)

# training mse
y_elastic_train = elastic.predict(x_train_all)
elastic_train_mse = mean_squared_error(y_train_all, y_elastic_train)

# test mse
y_elastic_pred = elastic.predict(x_test)
elastic_test_mse = mean_squared_error(y_test, y_lasso_pred)

#### perform cross validation to select model
## ridge regression with CV
ridgeCV = RidgeCV(alphas = [0.1, 0.3, 1.0, 3.0, 10.0, 30.0, 100.0, 300.0, 1000.0])
ridgeCV.fit(x_train_all, y_train_all)

ridgeCV.alpha_
# training mse
y_ridgeCV_train = ridgeCV.predict(x_train_all)
ridgeCV_train_mse = mean_squared_error(y_train_all, y_ridgeCV_train)
print X_test.shape


y_train=df_train["Purchase"]
df_train=df_train.drop("Purchase", axis=1)

#from sklearn.feature_selection import SelectKBest
#from sklearn.feature_selection import f_regression
#sel = SelectKBest(f_regression, k=10)
#X_tr=pd.DataFrame(sel.fit_transform(X_train,y_train))
#X_tst=pd.DataFrame(sel.transform(X_test))

#print X_tr.shape
#print X_tst.shape

from sklearn.linear_model import ElasticNet
model=ElasticNet(alpha=0.001)

model.fit(X_train,y_train)
y_pred=model.predict(X_test)
#print y_pred.shape
#print key1.shape
#print key2.shape


out=pd.DataFrame()
out["User_ID"]=key1
out["Product_ID"]=key2
out["Purchase"]=y_pred
out.to_csv('outavb.csv', index=False)
def elastic_net(datapath):
	# load mat
	datafile = os.path.join(datapath, 'data_numpy.mat')
	if os.path.exists(datafile) is False:
		print('Data file %s not found.' % datafile)

	data_numpy = sio.loadmat(datafile)
	# get training and test data
	train_x_raw = data_numpy['trainX_raw'];
	train_x_smooth= data_numpy['trainX_smooth'];
	train_y = data_numpy['trainY'];
	test_x_raw  = data_numpy['testX_raw'];
	test_x_smooth = data_numpy['testX_smooth'];
	test_y  = data_numpy['testY'];
	base_y  = data_numpy['baseY'];

	train_y = train_y.ravel()

	t_start = time.perf_counter()
	x_fft = np.fft.fft(train_x_raw)
	raw_fft_time = time.perf_counter() - t_start
	train_x_raw_fft = np.concatenate((np.imag(x_fft), np.real(x_fft)), axis=1)
	x_fft = np.fft.fft(test_x_raw)
	test_x_raw_fft = np.concatenate((np.imag(x_fft), np.real(x_fft)), axis=1)

	t_start = time.perf_counter()
	x_fft = np.fft.fft(train_x_smooth)
	smooth_fft_time = time.perf_counter() - t_start
	train_x_smooth_fft = np.concatenate((np.imag(x_fft), np.real(x_fft)), axis=1)
	x_fft = np.fft.fft(test_x_smooth)
	test_x_smooth_fft = np.concatenate((np.imag(x_fft), np.real(x_fft)), axis=1)

	enet_raw = ElasticNet(alpha=0.5, max_iter=50000, tol=0.3)
	t_start = time.perf_counter()
	enet_raw.fit(train_x_raw, train_y)
	elastic_net_raw_time = time.perf_counter() - t_start
	pred_y = enet_raw.predict(test_x_raw)
	np.savetxt(os.path.join(datapath, 'elastic_net_raw.txt'), pred_y)

	enet_raw_fft = ElasticNet(alpha=0.5, max_iter=50000, tol=0.3)
	t_start = time.perf_counter()
	enet_raw_fft.fit(train_x_raw_fft, train_y)
	elastic_net_raw_fft_time = time.perf_counter() - t_start
	pred_y = enet_raw_fft.predict(test_x_raw_fft)
	np.savetxt(os.path.join(datapath, 'elastic_net_raw_fft.txt'), pred_y)

	enet_smooth = ElasticNet(alpha=0.5, max_iter=50000, tol=0.3)
	t_start = time.perf_counter()
	enet_smooth.fit(train_x_smooth, train_y)
	elastic_net_smooth_time = time.perf_counter() - t_start
	pred_y = enet_smooth.predict(test_x_smooth)
	np.savetxt(os.path.join(datapath, 'elastic_net_smooth.txt'), pred_y)

	enet_smooth_fft = ElasticNet(alpha=0.5, max_iter=50000, tol=0.3)
	t_start = time.perf_counter()
	enet_smooth_fft.fit(train_x_smooth_fft, train_y)
	elastic_net_smooth_fft_time = time.perf_counter() - t_start
	pred_y = enet_smooth_fft.predict(test_x_smooth_fft)
	np.savetxt(os.path.join(datapath, 'elastic_net_smooth_fft.txt'), pred_y)

	f_time = open(os.path.join(datapath, 'elastic_net_time.txt'), 'w')
	f_time.write(str(raw_fft_time) + '\n')
	f_time.write(str(smooth_fft_time)+ '\n')
	f_time.write(str(elastic_net_raw_time)+ '\n')
	f_time.write(str(elastic_net_raw_fft_time)+ '\n')
	f_time.write(str(elastic_net_smooth_time)+ '\n')
	f_time.write(str(elastic_net_smooth_fft_time)+ '\n')
	f_time.close()
Example #46
0
flag = False

for i in range(20):
    X, y = 0, 0
    if (flag):
        diff_values = data_misc.difference(raw_values, 1)
        supervised = data_misc.timeseries_to_supervised(diff_values, 1)
        supervised_values = supervised.values
        train, test = supervised_values[0:-1], supervised_values[-1:]
        test_scaled = scaler.transform(test)
        X, y = test_scaled[0, 0:-1], test_scaled[0, -1]
    else:
        flag = True
        X, y = test_scaled[i, 0:-1], test_scaled[i, -1]

    yhat = regr.predict([X])
    print("Y_test: " + str(y) + " Yhat: " + str(yhat))

    yhat = data_misc.invert_scale(scaler, X, yhat)

    # Se recorre -1 porque para que no se alinie donde empezó
    yhat = data_misc.inverse_difference(raw_values, yhat, -1 - i)
    # store forecast
    predictions.append(yhat)
    allList.append(yhat)

    # df = DataFrame(raw_values)
    # print(df[0][i])
    # columns = [df[0][i] for i in range(0,df.size)]
    # columns.append(yhat)
    raw_values = allList
# ElasticNet Regression
import numpy as np
from sklearn import datasets
from sklearn.linear_model import ElasticNet
# load the diabetes datasets
dataset = datasets.load_diabetes()
# fit a model to the data
model = ElasticNet(alpha=0.1)
model.fit(dataset.data, dataset.target)
print(model)
# make predictions
expected = dataset.target
predicted = model.predict(dataset.data)
# summarize the fit of the model
mse = np.mean((predicted-expected)**2)
print(mse)
print(model.score(dataset.data, dataset.target))
def stacking(ext_total_df):
    cat_col = [15, 18]
    lin_col = [3, 9, 10, 11, 14, 17, 21, 22, 23, 24, 25] + list(range(
        27, 81)) + list(range(82, 108))  # ここまで欠損値無し?
    nan_is_m1 = [
        "home_team_match_point_prev3_mean", "home_team_match_point_prev5_mean",
        "away_team_match_point_prev3_mean", "away_team_match_point_prev5_mean"
    ]

    lin_total_df = ext_total_df.iloc[:, [0] + lin_col]
    lin_total_df = pd.concat([
        lin_total_df,
        pd.get_dummies(ext_total_df.iloc[:, cat_col].astype("str"),
                       drop_first=True)
    ],
                             axis=1)
    lin_total_df.loc[:,
                     nan_is_m1] = lin_total_df.loc[:,
                                                   nan_is_m1].replace(-1, 1.2)

    dropcol = ["attendance"]
    all_train_X = lin_total_df.query("1994 <= match_date_year").drop(dropcol,
                                                                     axis=1)
    all_train_y = np.log1p(
        lin_total_df.query("1994 <= match_date_year")["attendance"])
    all_train_y2 = lin_total_df.query(
        "1994 <= match_date_year")["attendance"] / lin_total_df.query(
            "1994 <= match_date_year")["capacity"]

    for year in tqdm(range(2002, 2017, 2)):
        window = 6
        duration = 2

        Z = all_train_X.match_date_year
        Z2 = all_train_X.division

        train = (year - duration - window < Z) & (Z <= year - duration)
        val = (year - duration < Z) & (Z <= year)

        train_X = all_train_X.loc[train, :]
        train_y = all_train_y[train]
        train_y2 = all_train_y2[train]

        val_X = all_train_X.loc[val, :]

        scl = StandardScaler()
        scl.fit(train_X.values.astype(np.float64))
        train_scl = scl.transform(train_X.values.astype(np.float64))
        val_scl = scl.transform(val_X.values.astype(np.float64))

        elastic_net = ElasticNet(alpha=10**-2.7,
                                 l1_ratio=0.75,
                                 max_iter=10000,
                                 random_state=2434)

        elastic_net.fit(train_scl, train_y2)
        ext_total_df.loc[val_X.index,
                         "elastic_net"] = elastic_net.predict(val_scl).clip(
                             0, 1)

    train = (2010 < Z) & (Z <= 2016)
    val = (2017 == Z) | ((Z == 2018) & ((all_train_X["section"] <= 17) |
                                        (33 <= all_train_X["section"])))

    train_X = all_train_X.loc[train, :]
    train_y = all_train_y[train]
    train_y2 = all_train_y2[train]

    val_X = all_train_X.loc[val, :]

    scl = StandardScaler()
    scl.fit(train_X.values.astype(np.float64))
    train_scl = scl.transform(train_X.values.astype(np.float64))
    val_scl = scl.transform(val_X.values.astype(np.float64))

    elastic_net = ElasticNet(alpha=10**-2.7, l1_ratio=0.75, random_state=2434)

    elastic_net.fit(train_scl, train_y2)
    ext_total_df.loc[val_X.index,
                     "elastic_net"] = elastic_net.predict(val_scl).clip(0, 1)
    return ext_total_df
Example #49
0
def elasticnet():
    regressor = ElasticNet(alpha=1, l1_ratio=0.5, normalize=False)
    regressor.fit(X_train, y_train)
    y_predictions = regressor.predict(X_test)
    return (regressor.score(X_test, y_test),
            sqrt(mean_squared_error(y_test, y_predictions)))
Example #50
0
y_test_pred_ridge = ridge_regressor.predict(X_test)
print('MSE train Ridge: %.3f, test: %.3f' %
      (mean_squared_error(y_train, y_train_pred_ridge),
       mean_squared_error(y_test, y_test_pred_ridge)))
print('R^2 train Ridge: %.3f, test: %.3f' % (r2_score(
    y_train, y_train_pred_ridge), r2_score(y_test, y_test_pred_ridge)))
lasso_regressor.fit(X_train, y_train)
y_train_pred_lasso = lasso_regressor.predict(X_train)
y_test_pred_lasso = lasso_regressor.predict(X_test)
print('MSE train Lasso: %.3f, test: %.3f' %
      (mean_squared_error(y_train, y_train_pred_lasso),
       mean_squared_error(y_test, y_test_pred_lasso)))
print('R^2 train Lasso: %.3f, test: %.3f' % (r2_score(
    y_train, y_train_pred_lasso), r2_score(y_test, y_test_pred_lasso)))
elastic_regressor.fit(X_train, y_train)
y_train_pred_elastic = elastic_regressor.predict(X_train)
y_test_pred_elastic = elastic_regressor.predict(X_test)
print('MSE train elastic: %.3f, test: %.3f' %
      (mean_squared_error(y_train, y_train_pred_elastic),
       mean_squared_error(y_test, y_test_pred_elastic)))
print('R^2 train elastic: %.3f, test: %.3f' % (r2_score(
    y_train, y_train_pred_elastic), r2_score(y_test, y_test_pred_elastic)))
# Now let's do a Random Forest Regression
# In[15]:
from sklearn.ensemble import RandomForestRegressor
forest = RandomForestRegressor(n_estimators=1000,
                               criterion='mse',
                               random_state=1,
                               n_jobs=-1)
forest.fit(X_train, y_train)
y_train_pred = forest.predict(X_train)
import numpy as np
from sklearn.datasets import load_svmlight_file
from sklearn.cross_validation import KFold
from sklearn.linear_model import ElasticNet, LinearRegression

data, target = load_svmlight_file('data/E2006.train')

# Edit the lines below if you want to switch method:
# met = LinearRegression(fit_intercept=True)
met = ElasticNet(fit_intercept=True, alpha=.1)

kf = KFold(len(target), n_folds=10)
err = 0
for train, test in kf:
    met.fit(data[train], target[train])
    p = met.predict(data[test])
    p = np.array(p).ravel()
    e = p - target[test]
    err += np.dot(e, e)

rmse_10cv = np.sqrt(err / len(target))


met.fit(data, target)
p = met.predict(data)
p = p.ravel()
e = p - target
total_error = np.dot(e, e)
rmse_train = np.sqrt(total_error / len(p))

gs_cv_lasso.best_params_
lasso_tuned = Lasso(**gs_cv_lasso.best_params_).fit(X_train, y_train)
y_pred = lasso_tuned.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))
pd.Series(lasso_tuned.coef_, index=X_train.columns)

# ElasticNet REGRESSION
df = load_advertising()
X = df.drop('sales', axis=1)
y = df[["sales"]]
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.20,
                                                    random_state=46)
enet_model = ElasticNet().fit(X_train, y_train)
y_pred = enet_model.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

# MODEL TUNING WITH GRIDSEARCHCV
enet_params = {
    "l1_ratio": [0.1, 0.4, 0.5, 0.6, 0.8, 1],
    "alpha": [0.1, 0.01, 0.001, 0.2, 0.3, 0.5, 0.8, 0.9, 1]
}

enet_model = ElasticNet()

gs_cv_enet = GridSearchCV(enet_model, enet_params, cv=10).fit(X_train, y_train)

gs_cv_enet.best_params_

enet_tuned = ElasticNet(**gs_cv_enet.best_params_).fit(X_train, y_train)
Example #53
0
print("Took {:0.4g}s to compute training Fastfood expansion.".format(elapsed_ff_kern_train))
st = time()
PHI_valid, _ = FastfoodForKernel(validationData, para, sgm)
elapsed_ff_kern_valid = time() - st
print("Took {:0.4g}s to compute validation Fastfood expansion.".format(elapsed_ff_kern_valid))

# Train elastic net on projected training data
en = ElasticNet()
st = time()
en.fit(PHI_train.T, trainLabels)
elapsed_en_fit = time() - st
print("Took {:0.4g}s to fit elastic net on projected training data.".format(elapsed_en_fit))

# Predict labels for projected validation data
st = time()
y_pred = en.predict(PHI_valid.T)
elapsed_en_pred = time() - st
print("Took {:0.4g}s to predict on projected validation data.".format(elapsed_en_pred))

# Report performance
mse_proj = metrics.mean_squared_error(validationLabels, y_pred)
print("For projected data, MSE = {:0.4g}.".format(mse_proj))

# Train elastic net on original training data
en = ElasticNet()
st = time()
en.fit(trainData.T, trainLabels)
elapsed_en_fit = time() - st
print("Took {:0.4g}s to fit elastic net on original training data.".format(elapsed_en_fit))

# Predict labels for original validation data
elasticnet = ElasticNet()
elasticnet.fit(X_train, y_train)

# In[1385]:

elasticnet_score = elasticnet.score(X_test, y_test)
elasticnet_score

# In[1420]:

elasticnet_score = elasticnet.score(X_test, y_test)
elasticnet_score

# In[1386]:

elasticnet_pred = elasticnet.predict(X_test)

# In[1422]:

# The mean squared error
print("Root mean squared error: %.2f" %
      sqrt(mean_squared_error(y_test, elasticnet_pred)))
# The absolute squared error
print("Mean absolute error: %.2f" %
      mean_absolute_error(y_test, elasticnet_pred))
# Explained variance score: 1 is perfect prediction
print('R-squared: %.2f' % r2_score(y_test, elasticnet_pred))

# In[1416]:

#Evaluate Models
    print ridge_scores.mean()
    print ridge_scores

# combination of ridge and Lasso
print "Elastic net regularization"

for alpha in range(1,5):
    elastic_net = ElasticNet(alpha)
    elastic_net_scores =cross_val_score(elastic_net, x, y, cv = 5)
    print "alpha={a}".format(a=alpha)
    print elastic_net_scores.mean()
    print elastic_net_scores

# best performing regressor for this data set was Elastic net with alpha=1
# with score = 0.472705248975
# draw scatter plot for values predicted with this regressor

print "Showing scatter plot for elastic net with alpha = 1"

elastic_net = ElasticNet(1)
elastic_net.fit(x, y)
predicted_y = elastic_net.predict(x)

fig = plt.figure()
plt.scatter(y, predicted_y, alpha=0.3, )
fig.suptitle('Boston real estate pricing', fontsize=20)
plt.figtext(.5,.9,'Elastic net regularization, alpha=1', fontsize=15, ha='center')
plt.xlabel('Actual value, $1000s', fontsize=18)
plt.ylabel('Predicted value, $1000s', fontsize=18)
plt.show()
Example #56
0
sc_x = StandardScaler()
sc_y = StandardScaler()
X_std = sc_x.fit_transform(X)
y_std = np.ravel(sc_y.fit_transform(y.reshape(-1, 1)))

X_train, X_test, y_train, y_test = train_test_split(
    X_std, y_std, test_size=0.3, random_state=0)

# train and test
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error

alphas = [0.001, 0.01, 0.1, 1, 10, 100, 1000]

train_errors = []
test_errors = []

for alpha in alphas:
    model = ElasticNet(alpha=alpha, l1_ratio=0.5)
    model.fit(X_train, y_train)
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    train_errors.append(mean_squared_error(y_train, y_train_pred))
    test_errors.append(mean_squared_error(y_test, y_test_pred))

print(train_errors)
print(test_errors)

    
class linReg:
    def __init__(self, in_df):
        df = self.__imputeVals(in_df.copy())
        self.X = df.drop(columns=["SalePrice"]).copy()
        self.y = np.log(df.SalePrice.values.reshape(-1, 1))

        self._gridSearch = None
        self.pipeline_X = self.__make_pipe()
        self.pipeline_y = StandardScaler()
        self._searchSpace = None
        self._params = None
        self.lm = ElasticNet()

    def __imputeVals(self, in_df):
        return imputeVals(in_df)

    def __make_pipe(self):
        nonePipeline = make_pipeline(
            SimpleImputer(strategy="constant", fill_value="None"),
            OneHotEncoder(drop="first"))
        zeroPipeline = make_pipeline(
            SimpleImputer(strategy="constant", fill_value=0),
            OneHotEncoder(drop="first", categories="auto"))
        scalePipeline = make_pipeline(
            SimpleImputer(strategy="constant", fill_value=0), StandardScaler())

        regressionPipeline = ColumnTransformer(
            [("setNone", nonePipeline, fillNone),
             ("setZero", zeroPipeline, fillZeroCat),
             ("transformed", scalePipeline, fillZeroCont),
             ("dictImputed",
              make_pipeline(dictImputer(imputeDict), OneHotEncoder(
                  drop="first")), list(imputeDict.keys())),
             ("bool", "passthrough", imputeBool),
             ("categoricalInts", "passthrough", cat_to_int),
             ("dropped", "drop", dropList)],
            remainder="drop")
        return regressionPipeline

    def gridSearch(self, params, cv=5, njobs=-1, verbose=50):
        self._searchSpace = params
        #self._params = None

        piped_X = self.pipeline_X.fit_transform(self.X)
        piped_y = self.pipeline_y.fit_transform(self.y)
        self._gridSearch = GridSearchCV(self.lm,
                                        params,
                                        cv=cv,
                                        scoring="neg_mean_squared_error",
                                        n_jobs=njobs,
                                        verbose=verbose)
        self._gridSearch.fit(piped_X, piped_y)

    def getBestParams(self):
        if self._gridSearch is not None:
            return self._gridSearch.best_params_
        else:
            raise ValueError()

    def getBestScore(self):
        if self._gridSearch is not None:
            return self._gridSearch.best_score_
        else:
            raise ValueError()

    def fitModel(self, params):
        piped_X = self.pipeline_X.fit_transform(self.X)
        piped_y = self.pipeline_y.fit_transform(self.y)
        self._params = params

        self.lm.set_params(**params)
        self.lm.fit(piped_X, piped_y)

    def __invert(self, y):
        return np.exp(self.pipeline_y.inverse_transform(y))

    def getTrainScore(self):
        piped_X = self.pipeline_X.transform(self.X)
        piped_y = self.pipeline_y.transform(self.y)
        return self.lm.score(piped_X, piped_y)

    # Root Mean Square Log Error
    def getRMSLE(self):
        piped_X = self.pipeline_X.transform(self.X)
        preds = self.pipeline_y.inverse_transform(self.lm.predict(piped_X))
        return mean_squared_error(self.y, preds)

    def predict(self, test_X):
        piped_X = self.pipeline_X.transform(self.__imputeVals(test_X))
        preds = self.lm.predict(piped_X)
        return self.__invert(preds)
Example #58
0
from sklearn.linear_model import ElasticNetCV, ElasticNet
from sklearn.metrics import mean_squared_error, r2_score
from matplotlib import pyplot as plt

data, target = load_svmlight_file('data/E2006.train')

# 다음을 변경한다
# from sklearn.linear_model import Lasso
# met = Lasso(alpha=0.1)
met = ElasticNet(alpha=0.1)

kf = KFold(len(target), n_folds=5)
pred = np.zeros_like(target)
for train, test in kf:
    met.fit(data[train], target[train])
    pred[test] = met.predict(data[test])

print('[EN 0.1] RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))
print('[EN 0.1] R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred)))
print('')

# ElasticNetCV 생성자(모든 CPU 사용)
met = ElasticNetCV(n_jobs=-1)

kf = KFold(len(target), n_folds=5)
pred = np.zeros_like(target)
for train, test in kf:
    met.fit(data[train], target[train])
    pred[test] = met.predict(data[test])

print('[EN CV] RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))
            std = X_train.std(axis=0)
            mean = X_train.mean(axis=0)
            X_train = (X_train - mean) / std
            X_test = (X_test - mean) / std

            std = y_train.std(axis=0)
            mean = y_train.mean(axis=0)
            y_train = (y_train - mean) / std
            y_test = (y_test - mean) / std

            gc.collect()
            print("- benching ElasticNet")
            clf = ElasticNet(alpha=alpha, rho=0.5, fit_intercept=False)
            tstart = time()
            clf.fit(X_train, y_train)
            elnet_results[i, j, 0] = mean_squared_error(clf.predict(X_test),
                                                       y_test)
            elnet_results[i, j, 1] = time() - tstart

            gc.collect()
            print("- benching SGD")
            n_iter = np.ceil(10 ** 4.0 / n_train)
            clf = SGDRegressor(alpha=alpha, fit_intercept=False,
                               n_iter=n_iter, learning_rate="invscaling",
                               eta0=.01, power_t=0.25)

            tstart = time()
            clf.fit(X_train, y_train)
            sgd_results[i, j, 0] = mean_squared_error(clf.predict(X_test),
                                                     y_test)
            sgd_results[i, j, 1] = time() - tstart
Example #60
0
class RiskClassifier():
    def __init__(self, train=False):
        if train:
            self.train()
        #else:
        #Load classifier

    def train(self, plot=False):
        X, y = self.prepareData()
        n_samples = X.shape[0]
        X_train, y_train = X[:n_samples // 2], y[:n_samples // 2]
        X_test, y_test = X[n_samples // 2:], y[n_samples // 2:]
        #######################################################################
        # Lasso
        alpha = 0.1
        self.lasso = Lasso(alpha=alpha)

        y_pred_lasso = self.lasso.fit(X_train, y_train).predict(X_test)
        r2_score_lasso = r2_score(y_test, y_pred_lasso)
        print(self.lasso)
        print("r^2 on test data : %f" % r2_score_lasso)
        # #####################################################################
        # ElasticNet
        from sklearn.linear_model import ElasticNet

        self.enet = ElasticNet(alpha=alpha, l1_ratio=0.7)

        y_pred_enet = self.enet.fit(X_train, y_train).predict(X_test)
        r2_score_enet = r2_score(y_test, y_pred_enet)
        print(self.enet)
        print("r^2 on test data : %f" % r2_score_enet)
        if plot:
            import matplotlib.pyplot as plt
            plt.plot(self.enet.coef_, color='lightgreen', linewidth=2, \
             label='Elastic net coefficients')
            plt.plot(self.lasso.coef_, color='gold', linewidth=2, \
                     label='Lasso coefficients')
            plt.legend(loc='best')
            plt.title("Lasso R^2: %f, Elastic Net R^2: %f" %
                      (r2_score_lasso, r2_score_enet))
            plt.show()

    def classify(self, example, test=None):
        #perform the classification
        y_pred_lasso = self.lasso.predict(example)
        y_pred_enet = self.enet.predict(example)
        if not test is None:
            r2_score_lasso = r2_score(test, y_pred_lasso)
            r2_score_enet = r2_score(test, y_pred_enet)
            print("r^2 Classify Lasso test: %f" % r2_score_lasso)
            print("r^2 Classify Enet test: %f" % r2_score_enet)

        return y_pred_lasso, y_pred_enet

    def getAllData(self):
        dfCorruption = pd.read_csv('../economy/Corruption.csv',
                                   sep=';',
                                   na_values=0)
        dfEducation = pd.read_csv('../economy/Education.csv',
                                  sep=';',
                                  na_values=0)
        dfGini = pd.read_csv('../economy/Gini.csv', sep=';', na_values=0)
        dfImports = pd.read_csv('../economy/Imports.csv', sep=';', na_values=0)
        dfInflation = pd.read_csv('../economy/Inflation.csv',
                                  sep=';',
                                  na_values=0)
        dfPopulation = pd.read_csv('../economy/Population.csv',
                                   sep=';',
                                   na_values=0)
        dfReserves = pd.read_csv('../economy/Reserves.csv',
                                 sep=';',
                                 na_values=0)
        dfUnemployment = pd.read_csv('../economy/Unemployment.csv',
                                     sep=';',
                                     na_values=0)

        riskData = pd.read_csv('../targets/psr.csv', na_values=0)

        # result = pd.concat([dfCorruption, dfEducation, dfGini, dfImports, dfInflation, dfPopulation, dfReserves, dfUnemployment], axis=1, join='inner')
        allData = dfCorruption.set_index('Country Name') \
                .join(dfEducation.set_index('Country Name'), lsuffix='_corruption') \
                .join(dfGini.set_index('Country Name'), lsuffix='_education') \
                .join(dfImports.set_index('Country Name'),lsuffix='_gini') \
                .join(dfInflation.set_index('Country Name'),lsuffix='_imports') \
                .join(dfPopulation.set_index('Country Name'), lsuffix = '_inflation') \
                .join(dfReserves.set_index('Country Name'), lsuffix = '_population') \
                .join(dfUnemployment.set_index('Country Name'), lsuffix = '_reserves', rsuffix = '_unemployment')

        allData = allData.join(riskData.set_index('Country Name'), how='inner')
        return allData

    def getDataFromYear(self, year):
        allData = self.getAllData()
        strYear = str(year)

        data = allData[[
            strYear + '_corruption', strYear + '_education', strYear + '_gini',
            strYear + '_imports', strYear + '_inflation',
            strYear + '_population', strYear + '_reserves',
            strYear + '_unemployment'
        ]].fillna(0)

        risk = allData[[strYear+'_PRS'+strYear[2:]+'VA', \
                strYear+'_PRS'+strYear[2:]+'PV', \
                strYear+'_PRS'+strYear[2:]+'GE', \
                strYear+'_PRS'+strYear[2:]+'RQ', \
                strYear+'_PRS'+strYear[2:]+'RL', \
                strYear+'_PRS'+strYear[2:]+'CC']].fillna(0)
        return data, risk

    def prepareData(self):
        # Someday we will have more data so we'll load more years
        # into our classifier
        data2016, risk2016 = self.getDataFromYear(2016)
        X = data2016.as_matrix()
        y = risk2016['2016_PRS16PV'].as_matrix()
        return X, y