예제 #1
0
def lassoreg(a):
    print ("Doing lasso regression")
    clf2 = Lasso(alpha=a)
    clf2.fit(base_X, base_Y)
    print ("Score = %f" % clf2.score(base_X, base_Y))
    clf2_pred = clf2.predict(X_test)
    write_to_file("lasso.csv", clf2_pred)
예제 #2
0
def comparaison_ridge_lasso(X,Y):
    X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.3,random_state=random.seed())
    clf_lasso = Lasso(selection='random', random_state=random.seed())
    clf_ridge = Ridge()
    clf_lasso.fit(X_train,Y_train)
    clf_ridge.fit(X_train,Y_train)
    score_lasso=clf_lasso.score(X_test,Y_test)
    score_ridge=clf_ridge.score(X_test,Y_test)
    print("Precision de Lasso={:3.2f}% \nPrecision de Ridge={:3.2f}%\n".format(score_lasso*100,score_ridge*100))
예제 #3
0
def test_alpha_opti(X,Y,nb_tests):
    score_lasso=0
    score_ridge=0
    score_lasso_opti=0
    score_ridge_opti=0
    for i in range(0,nb_tests):
        X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.3,random_state=random.seed())
        clf_lasso = Lasso(selection='random', random_state=random.seed())
        clf_ridge = Ridge()
        clf_lasso.fit(X_train,Y_train)
        clf_ridge.fit(X_train,Y_train)
        score_lasso+=clf_lasso.score(X_test,Y_test)
        score_ridge+=clf_ridge.score(X_test,Y_test)
        clf_lasso_opti = Lasso(selection='random', random_state=random.seed(),alpha=0.1)
        clf_ridge_opti = Ridge(alpha=0.1)
        clf_lasso_opti.fit(X_train,Y_train)
        clf_ridge_opti.fit(X_train,Y_train)
        score_lasso_opti+=clf_lasso_opti.score(X_test,Y_test)
        score_ridge_opti+=clf_ridge_opti.score(X_test,Y_test)
    print("Lasso (opti - non-opti) : {:3.3f}%".format(100*(score_lasso_opti-score_lasso)/nb_tests))
    print("Ridge (opti - non-opti) : {:3.3f}%".format(100*(score_ridge_opti-score_ridge)/nb_tests))
예제 #4
0
def linearReg():
    sl=Lasso(alpha=0.2)

    sl.fit(features_array,values_array)

    predict_val=sl.predict(features_array)

    print(sl.coef_)
    print(sl.score(features_array,values_array))

    fig = plt.figure()
    ax = plt.subplot(111)
    ax.bar(range(0,features.shape[1]),sl.coef_)
    plt.show()
def calc_linear_regression(files, data_matrix, target, results):

    lr = Lasso()
    lr.fit(data_matrix, target)

    rss = np.mean((lr.predict(data_matrix) - target) ** 2)
    var = lr.score(data_matrix, target)

    global best
    if rss < best:
        for i in range(0,len(target)):
            print str(target[i]) + "\t" + str(lr.predict(data_matrix[i])[0])
        print lr.coef_
        best = rss

    results.append((files, rss, var, lr.coef_))
예제 #6
0
def test_StackingEstimator_4():
    """Assert that the StackingEstimator worked as expected in scikit-learn pipeline in regression."""
    stack_reg = StackingEstimator(estimator=RandomForestRegressor(random_state=42))
    meta_reg = Lasso(random_state=42)
    sklearn_pipeline = make_pipeline(stack_reg, meta_reg)
    # fit in pipeline
    sklearn_pipeline.fit(training_features_r, training_target_r)
    # fit step by step
    stack_reg.fit(training_features_r, training_target_r)
    X_reg_transformed = stack_reg.transform(training_features_r)
    meta_reg.fit(X_reg_transformed, training_target_r)
    # scoring
    score = meta_reg.score(X_reg_transformed, training_target_r)
    pipeline_score = sklearn_pipeline.score(training_features_r, training_target_r)
    assert np.allclose(score, pipeline_score)

    # test cv score
    cv_score = np.mean(cross_val_score(sklearn_pipeline, training_features_r, training_target_r, cv=3, scoring='r2'))
    known_cv_score = 0.795877470354

    assert np.allclose(known_cv_score, cv_score)
    def _random_search(self, random_iter, x, y):
        # Default Values
        alpha = 1.0
        best_score = -sys.maxint

        if random_iter > 0:
            sys.stdout.write("Do a random search %d times" % random_iter)
            param_dist = {"alpha": uniform(loc=0.0001, scale=10-0.0001)}
            param_list = [{"alpha": alpha}, ]
            param_list.extend(list(ParameterSampler(param_dist,
                                                    n_iter=random_iter-1,
                                                    random_state=self._rng)))
            for idx, d in enumerate(param_list):
                lasso = Lasso(alpha=d["alpha"],
                              fit_intercept=True,
                              normalize=False,
                              precompute='auto',
                              copy_X=True,
                              max_iter=1000,
                              tol=0.0001,
                              warm_start=False,
                              positive=False)

                train_x, test_x, train_y, test_y = \
                    train_test_split(x, y, test_size=0.5,
                                     random_state=self._rng)
                lasso.fit(train_x, train_y)
                sc = lasso.score(test_x, test_y)
                # Tiny output
                m = "."
                if idx % 10 == 0:
                    m = "#"
                if sc > best_score:
                    m = "<"
                    best_score = sc
                    alpha = d['alpha']
                sys.stdout.write(m)
                sys.stdout.flush()
            sys.stdout.write("Using alpha: %f\n" % alpha)
        return alpha
def apply_lasso( X_train, Y_train, alpha=None ):
    alphas = [ 0.1, 0.3, 0.5 ]
    ALPHA_VALS = {}
    for a in alphas:
        model = Lasso(alpha=a, 
                    fit_intercept=True, 
                    normalize=False, 
                    precompute='auto', 
                    copy_X=True, 
                    max_iter=50000, 
                    tol=0.001, 
                    warm_start=False, 
                    positive=False)
        # sample_weights = [ 1.0/float(len(Y)) for x in Y ]
        model.fit( X_train, Y_train )# , sample_weight=sample_weights)
        R2 = model.score(X_train, Y_train)
        L1 = sum([abs(x) for x in model.coef_])
        ALPHA_VALS [a ] = [ a, R2, L1, [x for x in model.coef_] ]
        print "ALPHA: %.2f \t R^2=%7.4f \t L1(THETA)=%.2f \t THETA[1:N]=%s" % ( a, R2, L1, ", ".join(["%.4f" % x for x in model.coef_] ))
    # A = sorted([ ALPHA_VALS[x] for x in ALPHA_VALS [ a, R2, L2, model.coef_[:] ], key=lambda x: x[1], reversed=True )
    Theta = [ float( model.intercept_ ) , ]
    Theta.extend( [ float( x ) for x in model.coef_])
    ( model, Theta, J, SCORE ) = performance_analysis( model, Theta, X_train, Y_train, debug=1 )
    return ( model, Theta, J, SCORE )
예제 #9
0
파일: take2.py 프로젝트: ycheng517/rocket
print("baseline MSE: %f" % mean_squared_error(avg_pts, y_test))

std = X_train.std(axis=0)
mean = X_train.mean(axis=0)
X_train = (X_train - mean) / std
X_test = (X_test - mean) / std

#===============================================================================
# std = y_train.std(axis=0)
# mean = y_train.mean(axis=0)
# y_train = (y_train - mean) / std
# y_test = (y_test - mean) / std
#===============================================================================

linear_estimator = LinearRegression(fit_intercept=True)
linear_estimator.fit(X_train, y_train)
y_test_est = linear_estimator.predict(X_test)
print("linear regression score: %f" % linear_estimator.score(X_test, y_test))
print("linear regression MSE: %f" % mean_squared_error(y_test, y_test_est))

ridge_estimator = Ridge(alpha=1, fit_intercept=True)
ridge_estimator.fit(X_train, y_train)
y_test_est = ridge_estimator.predict(X_test)
print("ridge regression score: %f" % ridge_estimator.score(X_test, y_test))
print("ridge regression MSE: %f" % mean_squared_error(y_test, y_test_est))

lasso_estimator = Lasso(alpha=0.1, fit_intercept=True)
lasso_estimator.fit(X_train, y_train)
y_test_est = lasso_estimator.predict(X_test)
print("lasso regression score: %f" % lasso_estimator.score(X_test, y_test))
print("lasso regression MSE: %f" % mean_squared_error(y_test, y_test_est))
ypred = knn.predict(x_test)

print(knn.score(x_test, y_test))

lr = LinearRegression()
lr.fit(x_train,y_train)
lr.predict(x_test)

print(lr.score(x_test, y_test))

ls = Lasso(alpha=0.1)
ls.fit(x_train,y_train)
ls.predict(x_test)

print(ls.score(x_test, y_test))

dct = DecisionTreeClassifier()
dct.fit(x_train,y_train)
dct.predict(x_test)

print(dct.score(x_test, y_test))

#from pyspark.sql import SQLContext
#from pyspark import SparkContext

#sc = SparkContext("local","example")
#sql_sc = SQLContext(sc)

#df = pd.read_csv('data.csv')
예제 #11
0
print("Test set score: {:.2f}".format(ridge10.score(X_test, y_test)))

ridge01 = Ridge(alpha=0.1).fit(X_train, y_train)
print("Training set score: {:.2f}".format(ridge10.score(X_train, y_train)))
print("Test set score: {:.2f}".format(ridge10.score(X_test, y_test)))

# plt.plot(ridge.coef_, 's', label="Ridge alpha=1")
# plt.plot(ridge10.coef_, '^', label="Ridge alpha=10")
# plt.plot(ridge01.coef_, 'v', label="Ridge alpha=0.1")
# plt.plot(lr.coef_, 'o', label="LinearRegression")
# plt.xlabel("Coefficient index")
# plt.ylabel("Coefficient magnitude")
# plt.hlines(0, 0, len(lr.coef_))
# plt.ylim(-25, 25)
# plt.legend()

# mglearn.plots.plot_ridge_n_samples()
# plt.show()

lasso = Lasso().fit(X_train, y_train)
print("Training set score: {:.2f}".format(lasso.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lasso.score(X_test, y_test)))
print("Number of features used: {}".format(np.sum(lasso.coef_ != 0)))

# we increase the default setting of "max_iter",
# otherwise the model would warn us that we should increase max_iter.
print("---------------------------------")
lasso001 = Lasso(alpha=0.01, max_iter=100000).fit(X_train, y_train)
print("Training set score: {:.2f}".format(lasso001.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lasso001.score(X_test, y_test)))
print("Number of features used: {}".format(np.sum(lasso001.coef_ != 0)))
예제 #12
0
def CrossValidateForMSEAtDiffLambda(df, IV, DVs):
    
    avgMSELasso_values = []
    avgMSERidge_values = []
    avgMSELinReg_values = []
    avgR2Lasso_values = []
    avgR2Ridge_values = []
    avgR2LinReg_values = []
    
    
    lmda_values = [x*0.00005 for x in range(1, 1001)]
    #lmda_values = [x*0.00005 for x in range(1, 11)]
    
    var_list = deepcopy(DVs)
    var_list.insert(0, IV)
    tempDF = df[var_list].dropna()
    
    for lmda in lmda_values:
        MSELasso_list = []
        MSERidge_list = []
        MSELinReg_list = []
        R2Lasso_list = []
        R2Ridge_list = []
        R2LinReg_list = []
        
        for x in range(10):
            y_train, y_test, X_train, X_test  = train_test_split(tempDF[IV], tempDF[DVs], test_size=0.2)
            
            #print(X_train.shape,X_test.shape, y_train.shape, y_test.shape)
            #print(train_test_split(tempDF[IV], tempDF[DVs], test_size=0.2))
            
            model = Lasso(alpha=lmda, normalize=True)
            model2 = Ridge(alpha=lmda, normalize=True)
            model3 = LinearRegression(normalize=True)
            model.fit(X_train, y_train)
            model2.fit(X_train, y_train)
            model3.fit(X_train, y_train)
            #print(model.score(X_test, y_test), model2.score(X_test, y_test))
            MSELasso = np.mean((model.predict(X_test)-y_test)**2)
            MSELasso_list.append(MSELasso)
            MSERidge = np.mean((model2.predict(X_test)-y_test)**2)
            MSERidge_list.append(MSERidge)
            MSELinReg = np.mean((model3.predict(X_test)-y_test)**2)
            MSELinReg_list.append(MSELinReg)
            R2Lasso_list.append(model.score(X_test, y_test))
            R2Ridge_list.append(model2.score(X_test, y_test))
            R2LinReg_list.append(model3.score(X_test, y_test))
            #print(X_test, y_test)
            
            
        avgMSELasso_values.append(np.mean(MSELasso_list))
        avgMSERidge_values.append(np.mean(MSERidge_list))
        avgMSELinReg_values.append(np.mean(MSELinReg_list))
        avgR2Lasso_values.append(np.mean(R2Lasso_list))
        avgR2Ridge_values.append(np.mean(R2Ridge_list))
        avgR2LinReg_values.append(np.mean(R2LinReg_list))
        
    minMSE1 = min(avgMSELasso_values)
    idx = avgMSELasso_values.index(minMSE1)
    minLmda = lmda_values[idx]
    LassoAvgR2 = avgR2Lasso_values[idx]
    
    minMSE2 = min(avgMSERidge_values)
    idx2 = avgMSERidge_values.index(minMSE2)
    minLmda2 = lmda_values[idx2]
    RidgeAvgR2 = avgR2Ridge_values[idx2]
    
    LinRegAvgR2 = np.mean(avgR2LinReg_values)

    y_train, y_test, X_train, X_test = train_test_split(tempDF[IV], tempDF[DVs], test_size=0.2)
    model1 = Lasso(alpha=minLmda, normalize=True)
    model1.fit(X_train, y_train)
    model2 = Ridge(alpha=minLmda2, normalize=True)
    model2.fit(X_train, y_train)
    model3 = LinearRegression(normalize=True)
    model3.fit(X_train, y_train)

        
        
    
    print(minLmda, minMSE1, model1.coef_, model1.intercept_, np.mean(avgR2Lasso_values))
    print(minLmda2, minMSE2, model2.coef_, model2.intercept_, np.mean(avgR2Ridge_values))
    print(model3.coef_, model3.intercept_, np.mean(avgR2LinReg_values))
        
    return lmda_values, avgMSELasso_values, avgMSERidge_values, avgMSELinReg_values, avgR2Lasso_values, avgR2Ridge_values, avgR2LinReg_values 
예제 #13
0
print("Coefficient of determination R^2 <-- on train set: {}".format(
    support_regressor.score(X_train, y_train)))
print("Coefficient of determination R^2 <-- on test set: {}".format(
    support_regressor.score(X_test, y_test)))

dtr = DecisionTreeRegressor()
dtr.fit(X_train, y_train)
print("Coefficient of determination R^2 <-- on train set: {}".format(
    dtr.score(X_train, y_train)))
print("Coefficient of determination R^2 <-- on test set: {}".format(
    dtr.score(X_test, y_test)))

indiana_jones = Lasso(alpha=1.0)
indiana_jones.fit(X_train, y_train)
print("Coefficient of determination R^2 <-- on train set : {}".format(
    indiana_jones.score(X_train, y_train)))
print("Coefficient of determination R^2 <-- on test set: {}".format(
    indiana_jones.score(X_test, y_test)))

etr = ExtraTreesRegressor(n_estimators=300)
etr.fit(X_train, y_train)

print(etr.feature_importances_)
indecis = np.argsort(etr.feature_importances_)[::-1]

plt.figure(num=None, figsize=(14, 10), dpi=80, facecolor='w')
plt.title("Feature importances")
plt.bar(range(X_train.shape[1]),
        etr.feature_importances_[indecis],
        color="r",
        align="center")
예제 #14
0
import numpy as np
# 在x轴上从0到25均匀采样100个数据点
xx = np.linspace(0, 26, 100)
xx = xx.reshape(xx.shape[0], 1)
# 以上述100个数据点作为基准,预测回归直线
yy = regressor.predict(xx)

# 使用4次多项式回归模型在比萨训练样本上进行拟合
poly4 = PolynomialFeatures(degree=4)
X_train_poly4 = poly4.fit_transform(X_train)

from sklearn.linear_model import Lasso
lasso_poly4 = Lasso()
lasso_poly4.fit(X_train_poly4, y_train)
print lasso_poly4.score(X_test_poly4, y_test)

# 输出Lasso模型的参数列表
print lasso_poly4.coef_

regressor_poly4 = LinearRegression()
regressor_poly4.fit(X_train_poly4, y_train)

xx_poly4 = poly4.transform(xx)
yy_poly4 = regressor_poly4.predict(xx_poly4)

# 评估3种回归模型在测试数据集上的性能表现
# 准备测试数据
X_test = [[6], [8], [11], [16]]
y_test = [[8], [12], [15], [18]]
# Comparing coefficient magnitudes for ridge regression with different values
# of alpha and linear regression
plt.plot(ridge.coef_, 's', label="Ridge alpha=1")
plt.plot(ridge10.coef_, '^', label="Ridge alpha=10")
plt.plot(ridge01.coef_, 'v', label="Ridge alpha=0.1")
plt.plot(lr.coef_, 'o', label="LinearRegression")
plt.xlabel("Coefficient index")
plt.ylabel("Coefficient magnitude")
plt.hlines(0, 0, len(lr.coef_))
plt.ylim(-25, 25)
plt.legend()

# Lasso Regression ------------------------------------------------------------
from sklearn.linear_model import Lasso
lasso=Lasso().fit(X_train,y_train)
print('Training set score : {}'.format(lasso.score(X_train,y_train)))
print('Test set score : {}'.format(lasso.score(X_test,y_test)))
print('Number of features used : {}'.format(np.sum(lasso.coef_!=0)))    

# we increase the default setting of "max_iter",
# otherwise the model would warn us that we should increase max_iter
lasso001=Lasso(alpha=0.01,max_iter=100000).fit(X_train,y_train)
print('Training set score : {}'.format(lasso001.score(X_train,y_train)))
print('Test set score : {}'.format(lasso001.score(X_test,y_test)))
print('Number of features used : {}'.format(np.sum(lasso001.coef_!=0)))

lasso00001 = Lasso(alpha=0.0001, max_iter=100000).fit(X_train, y_train)
print("Training set score: {:.2f}".format(lasso00001.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lasso00001.score(X_test, y_test)))
print("Number of features used: {}".format(np.sum(lasso00001.coef_ != 0)))
from sklearn import metrics
print('Mean Squared Error:',
      metrics.mean_squared_error(labels_test, labels_pred))
#**********************************************************************************************

from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
lm_lasso = Lasso()
lm_ridge = Ridge()

lm_lasso.fit(features_train, labels_train)
lm_ridge.fit(features_train, labels_train)

print("RSquare Value for Lasso Regresssion TEST data is-")
print(np.round(lm_lasso.score(features_test, labels_test) * 100, 2))

print("RSquare Value for Ridge Regresssion TEST data is-")
print(np.round(lm_ridge.score(features_test, labels_test) * 100, 2))

predict_test_lasso = lm_lasso.predict(features_test)
predict_test_ridge = lm_ridge.predict(features_test)

print("Lasso Regression Mean Square Error (MSE) for TEST data is")
print(np.round(metrics.mean_squared_error(labels_test, predict_test_lasso), 2))

print("Ridge Regression Mean Square Error (MSE) for TEST data is")
print(np.round(metrics.mean_squared_error(labels_test, predict_test_ridge), 2))
'''
Code Challenges 02: (House Data)
This is kings house society data.
def upload_get_stocks(st):

    # Find one record of data from the mongo database
    # @TODO: YOUR CODE HERE!

    #cr = csv.reader(open("https://query1.finance.yahoo.com/v7/finance/download/"+st+"?period1=1454112000&period2=1611964800&interval=1d&events=history&includeAdjustedClose=true","rb"))

    #data = pd.read_csv('https://example.com/passkey=wedsmdjsjmdd')

    #df = pd.read_csv("static/data/"+st+".csv")

    #with open("static/data/"+st+".csv", "wt") as fp:
    #    writer = csv.writer(fp)
    #    # writer.writerow(["your", "header", "foo"])  # write header
    #    writer.writerows(data)

    #dateval = datetime.date.strtime("%D")
    #print(dateval)
    session = Session(engine)
    stock = session.execute("select * from stocks where symbol='" + st + "'")
    #return render_template("index.html", listings=listings)
    # Return template and data

    if (stock.rowcount == 0):
        data = pd.read_csv(
            "https://query1.finance.yahoo.com/v7/finance/download/" + st +
            "?period1=1454112000&period2=1611964800&interval=1d&events=history&includeAdjustedClose=true",
            sep=',')

        data.to_csv("static/data/" + st + ".csv", index=False, header=True)

        print(data)
        session.execute("INSERT INTO stocks VALUES ('" + st + "', '" + st +
                        " Corp')")
        session.execute("commit")

    stocks = session.execute("select * from stocks")

    resdata = [{}]

    responsedata = {'respdata': resdata}
    session.close()

    print('Hello this is test')
    data = pd.read_csv("static/data/" + st + ".csv")
    df = data
    # Drop the null columns where all values are null
    df = df.dropna(axis='columns', how='all')
    # Drop the null rows
    # This is for the MinMax Linear Regression model
    print(df.head())
    df = df.dropna()
    print(df.head())
    y = df["Open"].values.reshape(-1, 1)
    diff = df['Close'] - df["Open"]
    diff_locations = []
    for i in diff:
        if (i < 0):
            diff_locations.append(0)
        else:
            diff_locations.append(1)
    df['diff'] = pd.DataFrame(diff_locations)
    #X = df[['High', 'Low', 'Close', 'Volume','diff']]
    X = df[['High', 'Low', 'Close', 'Volume', 'diff']]
    print(X)
    print(y)
    print(X.shape, y.shape)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
    X_minmax = MinMaxScaler().fit(X_train)
    y_minmax = MinMaxScaler().fit(y_train)

    X_train_minmax = X_minmax.transform(X_train)
    X_test_minmax = X_minmax.transform(X_test)
    y_train_minmax = y_minmax.transform(y_train)
    y_test_minmax = y_minmax.transform(y_test)
    model2 = LinearRegression()
    model2.fit(X_train_minmax, y_train_minmax)
    print(f"Testing Data Score: {model2.score(X_test_minmax, y_test_minmax)}")
    minmax_predict = model2.score(X_test_minmax, y_test_minmax)
    print(minmax_predict)

    #This is standard scalar transformation
    X_scaler = StandardScaler().fit(X_train)
    y_scaler = StandardScaler().fit(y_train)
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)
    y_train_scaled = y_scaler.transform(y_train)
    y_test_scaled = y_scaler.transform(y_test)
    model = LinearRegression()
    model.fit(X_train_scaled, y_train_scaled)
    predictions = model.predict(X_test_scaled)
    scallar_MSE = mean_squared_error(y_test_scaled, predictions)
    scallar_r2 = model.score(X_test_scaled, y_test_scaled)
    plt.scatter(model.predict(X_train_scaled),
                model.predict(X_train_scaled) - y_train_scaled,
                c="blue",
                label="Training Data")
    plt.scatter(model.predict(X_test_scaled),
                model.predict(X_test_scaled) - y_test_scaled,
                c="orange",
                label="Testing Data")
    #plt.legend()
    plt.hlines(y=0, xmin=y_test_scaled.min(), xmax=y_test_scaled.max())
    plt.title("Residual Plot")
    #plt.show()
    pwd = os.getcwd()
    print(pwd)
    #p = Path(os.getcwd()+"\static\images")
    plt.savefig("static/images/" + st + ".png")
    f = open("static/images/" + st + ".png")
    plt.close()
    f.close()

    #Lasso model
    ### BEGIN SOLUTION
    lasso = Lasso(alpha=.01).fit(X_train_scaled, y_train_scaled)

    lasso_predictions = lasso.predict(X_test_scaled)

    lasso_MSE = mean_squared_error(y_test_scaled, lasso_predictions)
    lasso_r2 = lasso.score(X_test_scaled, y_test_scaled)
    ### END SOLUTION

    print(f"Lasso MSE: {lasso_MSE}, R2: {lasso_r2}")

    #Ridge model
    ridgeVal = Ridge(alpha=.01).fit(X_train_scaled, y_train_scaled)

    ridge_predictions = ridgeVal.predict(X_test_scaled)

    ridge_MSE = mean_squared_error(y_test_scaled, ridge_predictions)
    ridge_r2 = ridgeVal.score(X_test_scaled, y_test_scaled)
    print(f"ridge MSE: {ridge_MSE}, R2: {ridge_r2}")

    #elasticNet
    elasticnet = ElasticNet(alpha=.01).fit(X_train_scaled, y_train_scaled)

    elasticnet_predictions = elasticnet.predict(X_test_scaled)

    elasticnet_MSE = mean_squared_error(y_test_scaled, elasticnet_predictions)
    elasticnet_r2 = elasticnet.score(X_test_scaled, y_test_scaled)
    print(f"elasticnet MSE: {elasticnet_MSE}, R2: {elasticnet_r2}")

    fig1 = plt.figure(figsize=(12, 6))
    axes1 = fig1.add_subplot(1, 2, 1)
    axes2 = fig1.add_subplot(1, 2, 2)

    axes1.set_title("Original Data")
    axes2.set_title("Scaled Data")

    maxx = X_train["High"].max()
    maxy = y_train.max()
    axes1.set_xlim(-maxx + 1, maxx + 1)
    axes1.set_ylim(-maxy + 1, maxy + 1)

    axes2.set_xlim(-2, 2)
    axes2.set_ylim(-2, 2)
    set_axes(axes1)
    set_axes(axes2)

    axes1.scatter(X_train["High"], y_train)
    axes2.scatter(X_train_scaled[:, 0], y_train_scaled[:])

    p = Path(os.getcwd() + "/static/images")
    #q = p / "axes2"+st+".png"
    #if (q.exists()):
    fig1.savefig("static/images/axes2" + st + ".png")
    f = open("static/images/axes2" + st + ".png")
    plt.close()
    f.close()
    #else:
    #    fig1.savefig("static/images/axes2"+st+".png")
    #    plt.close()

    return render_template("indexStocks.html",
                           stocks=stocks,
                           responsedata=responsedata,
                           init_page="initpage",
                           sel_stk=st,
                           minmax_predict=minmax_predict,
                           scallar_MSE=scallar_MSE,
                           scallar_r2=scallar_r2,
                           lasso_MSE=lasso_MSE,
                           lasso_r2=lasso_r2,
                           ridge_MSE=ridge_MSE,
                           ridge_r2=ridge_r2,
                           elasticnet_MSE=elasticnet_MSE,
                           elasticnet_r2=elasticnet_r2)
def get_stocks(st):

    # Find one record of data from the mongo database
    # @TODO: YOUR CODE HERE!

    session = Session(engine)
    stocks = session.execute("select * from stocks ")
    #return render_template("index.html", listings=listings)
    # Return template and data

    resdata = [{}]

    responsedata = {'respdata': resdata}
    session.close()

    print('Hello this is test')
    df = pd.read_csv("static/data/" + st + ".csv")
    # Drop the null columns where all values are null
    df = df.dropna(axis='columns', how='all')
    # Drop the null rows
    # This is for the MinMax Linear Regression model
    print(df.head())
    df = df.dropna()
    print(df.head())
    y = df["Open"].values.reshape(-1, 1)
    diff = df['Close'] - df["Open"]
    diff_locations = []
    for i in diff:
        if (i < 0):
            diff_locations.append(0)
        else:
            diff_locations.append(1)
    df['diff'] = pd.DataFrame(diff_locations)
    #X = df[['High', 'Low', 'Close', 'Volume','diff']]
    X = df[['High', 'Low', 'Close', 'Volume', 'diff']]
    print(X)
    print(y)
    print(X.shape, y.shape)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
    X_minmax = MinMaxScaler().fit(X_train)
    y_minmax = MinMaxScaler().fit(y_train)

    X_train_minmax = X_minmax.transform(X_train)
    X_test_minmax = X_minmax.transform(X_test)
    y_train_minmax = y_minmax.transform(y_train)
    y_test_minmax = y_minmax.transform(y_test)
    model2 = LinearRegression()
    model2.fit(X_train_minmax, y_train_minmax)
    print(f"Testing Data Score: {model2.score(X_test_minmax, y_test_minmax)}")
    minmax_predict = model2.score(X_test_minmax, y_test_minmax)
    print(minmax_predict)

    #This is standard scalar transformation
    X_scaler = StandardScaler().fit(X_train)
    y_scaler = StandardScaler().fit(y_train)
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)
    y_train_scaled = y_scaler.transform(y_train)
    y_test_scaled = y_scaler.transform(y_test)
    model = LinearRegression()
    model.fit(X_train_scaled, y_train_scaled)
    predictions = model.predict(X_test_scaled)
    scallar_MSE = mean_squared_error(y_test_scaled, predictions)
    scallar_r2 = model.score(X_test_scaled, y_test_scaled)
    plt.scatter(model.predict(X_train_scaled),
                model.predict(X_train_scaled) - y_train_scaled,
                c="blue",
                label="Training Data")
    plt.scatter(model.predict(X_test_scaled),
                model.predict(X_test_scaled) - y_test_scaled,
                c="orange",
                label="Testing Data")
    #plt.legend()
    plt.hlines(y=0, xmin=y_test_scaled.min(), xmax=y_test_scaled.max())
    plt.title("Residual Plot")
    #plt.show()
    pwd = os.getcwd()
    print(pwd)
    #p = Path(os.getcwd()+"\static\images")
    plt.savefig("static/images/" + st + ".png")
    f = open("static/images/" + st + ".png")
    plt.close()
    f.close()

    #Lasso model
    ### BEGIN SOLUTION
    lasso = Lasso(alpha=.01).fit(X_train_scaled, y_train_scaled)

    lasso_predictions = lasso.predict(X_test_scaled)

    lasso_MSE = mean_squared_error(y_test_scaled, lasso_predictions)
    lasso_r2 = lasso.score(X_test_scaled, y_test_scaled)
    ### END SOLUTION

    print(f"Lasso MSE: {lasso_MSE}, R2: {lasso_r2}")

    #Ridge model
    ridgeVal = Ridge(alpha=.01).fit(X_train_scaled, y_train_scaled)

    ridge_predictions = ridgeVal.predict(X_test_scaled)

    ridge_MSE = mean_squared_error(y_test_scaled, ridge_predictions)
    ridge_r2 = ridgeVal.score(X_test_scaled, y_test_scaled)
    print(f"ridge MSE: {ridge_MSE}, R2: {ridge_r2}")

    #elasticNet
    elasticnet = ElasticNet(alpha=.01).fit(X_train_scaled, y_train_scaled)

    elasticnet_predictions = elasticnet.predict(X_test_scaled)

    elasticnet_MSE = mean_squared_error(y_test_scaled, elasticnet_predictions)
    elasticnet_r2 = elasticnet.score(X_test_scaled, y_test_scaled)
    print(f"elasticnet MSE: {elasticnet_MSE}, R2: {elasticnet_r2}")

    fig1 = plt.figure(figsize=(12, 6))
    axes1 = fig1.add_subplot(1, 2, 1)
    axes2 = fig1.add_subplot(1, 2, 2)

    axes1.set_title("Original Data")
    axes2.set_title("Scaled Data")

    maxx = X_train["High"].max()
    maxy = y_train.max()
    axes1.set_xlim(-maxx + 1, maxx + 1)
    axes1.set_ylim(-maxy + 1, maxy + 1)

    axes2.set_xlim(-2, 2)
    axes2.set_ylim(-2, 2)
    set_axes(axes1)
    set_axes(axes2)

    axes1.scatter(X_train["High"], y_train)
    axes2.scatter(X_train_scaled[:, 0], y_train_scaled[:])

    p = Path(os.getcwd() + "/static/images")
    #q = p / "axes2"+st+".png"
    #if (q.exists()):
    fig1.savefig("static/images/axes2" + st + ".png")
    f = open("static/images/axes2" + st + ".png")
    plt.close()
    f.close()
    #else:
    #    fig1.savefig("static/images/axes2"+st+".png")
    #    plt.close()

    return render_template("indexStocks.html",
                           stocks=stocks,
                           responsedata=responsedata,
                           init_page="initpage",
                           sel_stk=st,
                           minmax_predict=minmax_predict,
                           scallar_MSE=scallar_MSE,
                           scallar_r2=scallar_r2,
                           lasso_MSE=lasso_MSE,
                           lasso_r2=lasso_r2,
                           ridge_MSE=ridge_MSE,
                           ridge_r2=ridge_r2,
                           elasticnet_MSE=elasticnet_MSE,
                           elasticnet_r2=elasticnet_r2)
예제 #19
0



#second estimator --Lasso 
  

from sklearn.linear_model  import Lasso

#find best alpha for the model

alphas = [0.0001, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 1]

for a in alphas:
   lasso_model = Lasso(alpha=a,normalize=True).fit(x,y)   
   r2 = lasso_model.score(x, y) 
   y_pred = lasso_model.predict(x)
   mse = mean_squared_error(y, y_pred)  
   rmse=math.sqrt(mse)
   print("Alpha:{0:.4f}, r2:{1:.2f}, MSE:{2:.2f}, RMSE:{3:.2f}"
       .format(a, r2, mse, rmse))




lasso = Lasso(alpha =0.0001, normalize=True).fit(x_train,y_train)


#print the coefficients by sorting them from most important to less important
predictors =x_train.columns
coef=pd.Series(lasso.coef_, predictors).sort_values(ascending=False)
예제 #20
0
#plot graph of most import feature
important_features.plot(kind='bar')
plt.show()

#lasso model
alphas = np.arange(0, 10)
grid = GridSearchCV(estimator=Lasso(), param_grid={'alpha': alphas})
grid.fit(X_train, y_train)
lasso_clf = grid.best_estimator_
#best lambda
lasso_clf
#set best lambda and fit train data
lasso = Lasso()
lasso.set_params(alpha=9.0)
lasso.fit(X_train, y_train)
lasso.score(X_train, y_train)
#get cofficient
lasso.coef_
#predicted value from train data
predicted_y1 = lasso.predict(xtest)
#score of the predicted data
lasso.score(xtest, predicted_y1)

#ridge model
alphas = np.arange(0, 10)
grid = GridSearchCV(estimator=Ridge(), param_grid={'alpha': alphas})
grid.fit(X_train, y_train)
ridge_clf = grid.best_estimator_
#best lambda
ridge_clf
#set best lambda and fit train data
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
import mglearn

x, y = mglearn.datasets.load_extended_boston()
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=0)
lasso = Lasso(alpha=0.01, max_iter=100000).fit(x_train, y_train)
print('\ntrain score:{:.2f}'.format(lasso.score(x_train, y_train)))
print('test score:{:.2f}'.format(lasso.score(x_test, y_test)))
예제 #22
0
    # predict
    FS_brain = []
    idx = np.triu_indices(20, 1)
    for item in cur_paths:
        item_data = np.load(item)
        FS_brain.append(item_data[idx])
    FS_brain = np.array(FS_brain)
    FS_brain = StandardScaler().fit_transform(FS_brain)

    for i_beh in range(5):
        scores = beh_scores[:, i_beh]
        title = beh_titles[i_beh]

        from sklearn.cross_validation import cross_val_score, ShuffleSplit
        from sklearn.linear_model import Lasso

        clf = Lasso(alpha=1.0)
        scores = StandardScaler().fit_transform(scores)

        coefs = []
        r2_list = []
        folder = ShuffleSplit(n=len(scores), n_iter=500, test_size=0.1)
        for train, test in folder:
            clf.fit(X=FS_brain[train], y=scores[train])
            r2 = clf.score(FS_brain[test], scores[test])
            r2_list.append(r2)
        mean_r2 = np.mean(r2_list)
        print('%s/%s/mean-R2: %.4f' % (cur_ana, title, mean_r2))


예제 #23
0
mse = mean_squared_error(y_test_final, y_pred)
rmse = np.math.sqrt(mse)
print('RMSE: {}'.format(rmse))
print('Train Score: {}'.format(ridge_model_100_train_score))
print('Test Score: {}'.format(ridge_model_100_test_score))
#
# ******************* Lasso Regularization (0.01) ************************
lasso_001 = Lasso(alpha=0.01)
lasso_001.fit(X_train_final, y_train_final)
y_pred = model.predict(X_test_final)
scores = cross_val_score(model, X_train_final, y_train_final, cv=5)
print(
    '***************************************Lasso(0.01)***************************************'
)
print('cross validation score', scores.mean())
lasso_model_001_train_score = lasso_001.score(X_train_final, y_train_final)
lasso_model_001_test_score = lasso_001.score(X_test_final, y_test_final)

mse = mean_squared_error(y_test_final, y_pred)
rmse = np.math.sqrt(mse)
print('RMSE: {}'.format(rmse))
print('Train Score: {}'.format(lasso_model_001_train_score))
print('Test Score: {}'.format(lasso_model_001_test_score))

# ******************* Lasso (0.001) ************************
lasso_0001 = Lasso(alpha=0.001)
lasso_0001.fit(X_train_final, y_train_final)
y_pred = model.predict(X_test_final)
scores = cross_val_score(model, X_train_final, y_train_final, cv=5)
print(
    '***************************************Lasso (0.001)***************************************'
예제 #24
0
train.to_csv('dumified_train.csv')
test.to_csv('dumified_test.csv')
df1.to_csv('df1.csv')

#simple
model = LinearRegression()

train
test

y_train

model.fit(train, y_train)
model.score(train, y_train)
model.predict(test)

#lasso
lasso = Lasso(alpha=0.01, max_iter=1000)
#lasso = Lasso(alpha=0.01, max_iter=10e5)
lasso.fit(train, y_train)
lasso.score(train, y_train)

lasso.predict(test)


model2 = GradientBoostingRegressor()
model2 = model2.fit(train,y_train)
model2.score(train,y_train)

예제 #25
0
plt.plot(test_scores, label="test scores")
plt.xticks(range(4), [100, 10, 1, .01])
plt.legend(loc="best")


#Lasso (L1) Penalty (alpha Regularization Parameter)
#LASSO leads to sparse solutions, driving most coefficients to zero
from sklearn.linear_model import Lasso

lasso_models = {}
training_scores = []
test_scores = []

for alpha in [30, 10, 1, .01]:
    lasso = Lasso(alpha=alpha).fit(X_train, y_train)
    training_scores.append(lasso.score(X_train, y_train))
    test_scores.append(lasso.score(X_test, y_test))
    lasso_models[alpha] = lasso

plt.plot(training_scores, label="training scores")
plt.plot(test_scores, label="test scores")
plt.xticks(range(4), [30, 10, 1, .01])
plt.legend(loc="best") 


############################################
#Learning Curve (Analyise Model Complexity)
############################################


예제 #26
0

''' 
    the conclusion of all this is that with little data is better use linear model
    but when we have a lot of data is better use model like forest and gradient tree decision-based 
'''
    

X_train, X_test, Y_train, Y_test = train_test_split(mamoDataX,mamoDataY, random_state = 2)

model = KNeighborsRegressor(18)
model2 = Lasso()


model.fit(X_train,Y_train)
model2.fit(X_train,Y_train)

print(model.score(X_test,Y_test))
print(model2.score(X_test,Y_test))


PREDICTED = model.predict(X_test)
PREDICTED2 = model2.predict(X_test)

plt.subplot(2,2,1)
plt.hist([PREDICTED,Y_test])

plt.subplot(2,2,2)
plt.hist([PREDICTED2,Y_test])

plt.show
svm_rmse_test

test_rmse = [
    lin_rmse_test, ridge_rmse_test, lasso_rmse_test, elastic_net_rmse_test,
    SGD_rmse_test, tree_rmse_test, forest_rmse_test, xg_rmse_test,
    svm_rmse_test
]
aa1 = pd.DataFrame(test_rmse)

###########################################################################

################# R^2 Score for train data #############################

R2_lin_train = lin_reg.score(X_train, y_train)
R2_ridge_train = ridge_reg.score(X_train, y_train)
R2_lasso_train = lasso_reg.score(X_train, y_train)
R2_elastic_net_train = elastic_net_reg.score(X_train, y_train)
R2_SGD_train = SGD_reg.score(X_train, y_train)
R2_tree_train = tree_reg.score(X_train, y_train)
R2_forest_train = forest_reg.score(X_train, y_train)
R2_xg_train = xg_reg.score(X_train, y_train)
R2_svm_train = svm_reg.score(X_train, y_train)

train_r2 = [
    R2_lin_train, R2_ridge_train, R2_lasso_train, R2_elastic_net_train,
    R2_SGD_train, R2_tree_train, R2_forest_train, R2_xg_train, R2_svm_train
]
aa2 = pd.DataFrame(train_r2)

##############################################################################
Elas.fit(X_train, y_train)

# print(sqrt(mean_squared_error(ytrain, Elas.predict(xtrain))))
print(sqrt(mserr(y_test, Elas.predict(X_test))))
print('R2 Value/Coefficient of Determination: {}'.format(
    Elas.score(X_test, y_test)))

# In[34]:

# Lassoreg = Lasso(alpha = 0.5,tol = 0.1)
# Lassoreg = Lassoreg.fit(X_train,y_train)
# print(Ridgereg.score(X_train,y_train))
# print(Ridgereg.score(X_test,y_test))

from sklearn.linear_model import Lasso
from math import sqrt
from sklearn.metrics import r2_score, mean_squared_error

lassoreg = Lasso(alpha=0.001, normalize=True)
lassoreg.fit(X_train, y_train)
# lassoreg.predict(X_train)
print(sqrt(mean_squared_error(y_test, lassoreg.predict(X_test))))
print('R2 Value/Coefficient of Determination: {}'.format(
    lassoreg.score(X_test, y_test)))

# In[40]:

test_prediction = pd.DataFrame(Elas.predict(test_x), columns=['SalePrice'])
test_prediction.index.name = 'Id'
test_prediction.to_csv("C:\\Users\\Bhuvan PC\\Downloads\\final_test_pred.csv")
y_train_pred = lasso.predict(X_train)
y_test_pred = lasso.predict(X_test)
for i in range(0,26):
    print('Slope'+str(i)+':'+str(lasso.coef_[i]))
print('Intercept: %.3f' % lasso.intercept_)
plt.scatter(y_train_pred, y_train_pred - y_train, c='steelblue', marker='o', edgecolor='white', label='Training data')
plt.scatter(y_test_pred, y_test_pred - y_test, c='limegreen', marker='s', edgecolor='white', label='Test data')
plt.xlabel('Predicted values')
plt.ylabel('Residuals')
plt.title('Lasso Regression')
plt.legend(loc='upper left')
plt.hlines(y=0, xmin=-10, xmax=50, color='black', lw=2)
plt.xlim([-10, 50])
plt.figure()
plt.show()
print("R^2: {}".format(lasso.score(X_test, y_test)))
rmse = np.sqrt(mean_squared_error(y_test,y_test_pred))
print("Root Mean Squared Error: {}".format(rmse))
#best alpha for Lasso
scores=[]
ran=[]
rmse=[]
for alpha in range(1,21):
    lassob = Lasso(alpha=alpha)
    lassob.fit(X_train, y_train)
    y_train_pred = lassob.predict(X_train)
    y_test_pred = lassob.predict(X_test)
    scores.append(lassob.score(X_test, y_test))
    rmse.append(np.sqrt(mean_squared_error(y_test, y_test_pred)))
    ran.append(alpha)
plt.figure()
예제 #30
0
print(len(Y_test))
model = Lasso()  # assignating model

model.fit(X_train, Y_train)  # training the model
# print(X_test)
PREDICTED = model.predict(
    X_test)  # to predict the objective data accurate to this data
'''
    Very bad results (neither mamoDataX (without zeros) neither mamoData (Tissue = 0 | 1) 
    neither mamoData (Tissue = 0 - 1). generate a good score) for that we are gonna aplly featuring engineering and
    evaluate other models 
'''

plt.subplot(2, 2, 1)
plt.hist([PREDICTED, Y_test
          ])  # to know the relation the predict data with the Y_test data

print(model.score(X_test, Y_test))  # score of the model (very bad!!)

RESIDUALS = Y_test - PREDICTED  # to know the rate of the error
plt.subplot(2, 2, 2)
plt.scatter(Y_test, RESIDUALS)

plt.subplot(2, 2, 3)
plt.hist(RESIDUALS, bins=100, normed=1, histtype='step')

plt.show()

# map to correlation
# sb.heatmap(mamoDataX.corr()) # there are much correlation between the data that is bad
예제 #31
0
    A = f['trialdata'][:]
    A = np.transpose(A)
    A = A[:, 1000:2000]  # sample the 1000 time samples prior to stimulus

    # Do continuous wave transform
    scale = np.arange(1, 9)  # frequencies
    trialData = []
    for i in range(159):
        w = A[i, :]
        coefs, freqs = pywt.cwt(w, scale, 'morl', 0.0005)
        means = np.mean(coefs, axis=1)
        trialData.extend(means)

    X.append(trialData)

X = np.asarray(X)  #change X into numpy ndarray

### Split training and testing data

X_train, X_test, respTimes_train, respTimes_test = train_test_split(
    X, respTimes, test_size=0.3)

### Fit lasso regression
lasso = Lasso()
lasso.fit(X_train, respTimes_train)
train_score = lasso.score(X_train, respTimes_train)
test_score = lasso.score(X_test, respTimes_test)

print "training score:", train_score
print "test score: ", test_score
예제 #32
0
                      'o',
                      alpha=.5,
                      zorder=-1,
                      label='samples',
                      color="tab:green")
disp.axes_[0, 0].set_ylim(-3, 3)
disp.axes_[0, 0].set_xlim(-1, 1)
plt.legend()
plt.show()

##############################################################################
# Sample-weight support for Lasso and ElasticNet
# ----------------------------------------------
# The two linear regressors :class:`~sklearn.linear_model.Lasso` and
# :class:`~sklearn.linear_model.ElasticNet` now support sample weights.

from sklearn.model_selection import train_test_split
from sklearn.datasets import make_regression
from sklearn.linear_model import Lasso
import numpy as np

n_samples, n_features = 1000, 20
rng = np.random.RandomState(0)
X, y = make_regression(n_samples, n_features, random_state=rng)
sample_weight = rng.rand(n_samples)
X_train, X_test, y_train, y_test, sw_train, sw_test = train_test_split(
    X, y, sample_weight, random_state=rng)
reg = Lasso()
reg.fit(X_train, y_train, sample_weight=sw_train)
print(reg.score(X_test, y_test, sw_test))
	"""
	Runs a linear regression on a random sample of the data. Collects r squared values. 
	"""
	times = 0
	times2 = 0
	while times < 100:
		np.random.shuffle(temp_matrix)

		set1 = temp_matrix[0:size][:,0:2]
		set2 = temp_matrix[0:size][:,2]
		test_set1 = temp_matrix[size:][:,0:2]
		test_set2 = temp_matrix[size:][:,2]

		clf = Lasso(alpha = 5)
		clf.fit(set1, set2)
		r_sq.append(clf.score(test_set1, test_set2))



		clf1 = LinearRegression(fit_intercept = True)
		clf1.fit(set1, set2)
		r_sq1.append(clf1.score(test_set1, test_set2))
		times = times + 1


	"""
	Outputs the results of the linear regression using spearman's rank and OLS regression. 
	"""
	#print "Course :" , course, "Spearman: ", stats.spearmanr(course_grade, GPA_list)[0], ",    R^2: ", np.asarray(r_sq1).mean()

	label_course.append(course)									#adds course to label_course list for future reference
예제 #34
0
                feature_vector.append(value)

        song_x_train.append(feature_vector)
        song_y_train.append(song[2]) # Map to hotttnesss score


    lasso_model = Lasso(alpha=0.1)
    lasso_model.fit(np.array(song_x_train), np.array(song_y_train))
    print lasso_model.coef_

    song_x_test = []
    song_y_test = []
    for song in songs_test:
        feature_vector = [0]*len(artist_inputs)
        artist = song[1]['artist']
        feature_vector[artists[artist]] = 1

        feature_dict = song[1]
        for feature, value in feature_dict.iteritems():
            if feature != 'artist' and feature != 'genre':
                feature_vector.append(value)

        song_x_test.append(feature_vector)
        song_y_test.append(song[2])

    prediction = lasso_model.predict(song_x_test[0])
    print 'Prediction: {}'.format(prediction)
    print 'Actual: {}'.format(song_y_test[0])
    score = lasso_model.score(np.array(song_x_test), np.array(song_y_test))
    print 'Score: {}'.format(score)
예제 #35
0
print('훈련 세트 score : ',ridge.score(X_train, y_train))   # 0.89
print('테스트 세트 score : ',ridge.score(X_test, y_test))    # 0.75

# alpha값 조정 -> alpha값을 높이면 계수를 0에 가깝게함 -> 최적의 alpha값을 찾아야 함
ridge10 = Ridge(alpha=10).fit(X_train, y_train)     # alpha값이 10일 때
print('훈련 세트 score : ',ridge10.score(X_train, y_train))     # 0.79
print('테스트 세트 score : ',ridge10.score(X_test, y_test))      # 0.64 

ridge01 = Ridge(alpha=0.1).fit(X_train, y_train)    # alpha값이 1일 때
print('훈련 세트 score : ',ridge01.score(X_train, y_train))     # 0.93
print('테스트 세트 score : ',ridge01.score(X_test, y_test))      # 0.77

####### Lasso Regression #######
from sklearn.linear_model import Lasso
lasso = Lasso().fit(X_train, y_train)
print('----Lasso Regression----')
print('훈련 세트 score : ',lasso.score(X_train, y_train))   # 0.29 -> 과소적합
print('테스트 세트 score : ',lasso.score(X_test, y_test))    # 0.20
print('사용한 특성의 수 : ',np.sum(lasso.coef_ != 0))          # 4 -> 105개의 특성 중 4개만 사용

# 과소적합을 줄이기 위해 alpha값(규제)을 줄임 
lasso001 = Lasso(alpha=0.01, max_iter=100000).fit(X_train, y_train)
print('훈련 세트 score : ',lasso001.score(X_train, y_train))   # 0.90
print('테스트 세트 score : ',lasso001.score(X_test, y_test))    # 0.77
print('사용한 특성의 수 : ',np.sum(lasso001.coef_ != 0))          # 33

####### ElasticNet #######
from sklearn.linear_model import ElasticNet
elastic = ElasticNet(alpha=0.001, max_iter=10000000).fit(X_train, y_train)
print('train score :',elastic.score(X_train, y_train))
print('test score :',elastic.score(X_test, y_test))
예제 #36
0
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

crime = pd.read_table('CommViolPredUnnormalizedData.txt',
                      sep=',',
                      na_values='?')
columns_to_keep = [5, 6] + list(range(11, 26)) + list(range(32, 103)) + [145]
crime = crime.iloc[:, columns_to_keep].dropna()
X_crime = crime.iloc[:, range(0, 88)]
y_crime = crime['ViolentCrimesPerPop']

X_train, X_test, y_train, y_test = train_test_split(X_crime,
                                                    y_crime,
                                                    random_state=0)

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

linlasso = Lasso(alpha=2.0, max_iter=10000).fit(X_train_scaled, y_train)

print('lasso regression linear model intercept: {}'.format(
    linlasso.intercept_))
print('lasso regression linear model coeff: {}'.format(linlasso.coef_))
print('R-Squared Score (training) :{:.3f}'.format(
    linlasso.score(X_train_scaled, y_train)))
print('R-Squared score (test) :{:.3f}'.format(
    linlasso.score(X_test_scaled, y_test)))
예제 #37
0
# Lasso Regression
import numpy as np
from sklearn import datasets
from sklearn.linear_model import Lasso
# load the diabetes datasets
dataset = datasets.load_diabetes()
# fit a LASSO model to the data
model = Lasso(alpha=0.1)
model.fit(dataset.data, dataset.target)
print(model)
# make predictions
expected = dataset.target
predicted = model.predict(dataset.data)
# summarize the fit of the model
mse = np.mean((predicted-expected)**2)
print(mse)
print(model.score(dataset.data, dataset.target))
예제 #38
0

      if descdim == 1:
          new_features.append(first)
      elif descdim == 2:
          new_features.append((first, second))
      elif descdim == 3:
          new_features.append((first, second, third))

  #plt.scatter(new_features, val_labels)
  #plt.show()

  val_features = numpy.asarray(new_features)
  if descdim == 1:
    val_features = val_features.reshape(-1, 1)

  reg = linear_model.LinearRegression(copy_X=True, 
          fit_intercept=True, n_jobs=None, normalize=False)
  reg.fit(val_features, val_labels)

  lasso = Lasso(alpha=0.01, max_iter=10e5)
  lasso.fit(val_features, val_labels)
  train_score = lasso.score(val_features, val_labels)
  coeff_used = numpy.sum(lasso.coef_!=0)
  print ("LASSO training score:", train_score )
  print ("LASSO number of features used: ", coeff_used)
  print ("LASSO coeff: ", lasso.coef_)
  

  print("Linear model: ", reg.coef_ , " ", reg.intercept_)
def prediction_lasso (X_train, Y_train, X_test, Y_test,alpha,normalize):

    # Print shapes of the training and testing data sets
    #print ("Shapes of the training and testing data sets")
    #print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)
    #Create our regression object

    lreg = Lasso (alpha = alpha,normalize=normalize)

    #do a linear regression, except only on the training
    lreg.fit(X_train,Y_train)

    #print("The estimated intercept coefficient is %.2f " %lreg.intercept_)
    #print("The number of coefficients used was %d " % len(lreg.coef_))



    # Set a DataFrame from the Facts
    coeff_df = DataFrame(X_train.columns)
    coeff_df.columns = ["Fact"]


    # Set a new column lining up the coefficients from the linear regression
    coeff_df["Coefficient"] = pd.Series(lreg.coef_)


    # Show
    #coeff_df

    #highest correlation between a fact and fraction votes
    #print ("Highest correlation fact: %s is %.9f" % (cf_dict.loc[coeff_df.iloc[coeff_df["Coefficient"].idxmax()]["Fact"],"description"], coeff_df.iloc[coeff_df["Coefficient"].idxmax()]["Coefficient"]) )

    #sns_plot = sns.jointplot(coeff_df.iloc[coeff_df["Coefficient"].idxmax()]["Fact"],"Fraction Votes",pd.merge(X_test,pd.DataFrame(Y_test), right_index=True, left_index=True),kind="scatter")


    #Predictions on training and testing sets
    pred_train = lreg.predict(X_train)
    pred_test = lreg.predict(X_test)

    # The mean square error
    #print("Fit a model X_train, and calculate MSE with Y_train: %.6f"  % np.mean((Y_train - pred_train) ** 2))
    #print("Fit a model X_train, and calculate MSE with X_test and Y_test: %.6f"  %np.mean((Y_test - pred_test) ** 2))

    #Explained variance score: 1 is perfect prediction
    #print("Variance score: %.2f" % lreg.score(X_test, Y_test))

    result={}
    result["method"]="Lasso %.3f  " %alpha
    if normalize :
        result["normalize"]="Y"
    else:
        result["normalize"]="N"
    result["X_train_shape"]=X_train.shape
    result["Y_train_shape"]=Y_train.shape
    result["X_test_shape"]=X_test.shape
    result["Y_test_shape"]=Y_test.shape
    result["intercept"]=lreg.intercept_
    result["num_coef"]=len(lreg.coef_)
    result["max_fact"]=cf_dict.loc[coeff_df.iloc[coeff_df["Coefficient"].idxmax()]["Fact"],"description"]
    result["max_fact_value"]=coeff_df.iloc[coeff_df["Coefficient"].idxmax()]["Coefficient"]
    result["MSE_train"]=np.mean((Y_train - pred_train) ** 2)
    result["MSE_test"]=np.mean((Y_test - pred_test) ** 2)
    result["variance"]=lreg.score(X_test, Y_test)

    return pred_test,coeff_df,pred_train,result
예제 #40
0
ridge.fit(X_train, y_train)
ridge_y_pred = ridge.predict(X_test)
print('Ridge regression model performance: ', ridge.score(X_test, y_test))
# cross_val_score
cv_results = cross_val_score(ridge, X, y, cv=5)
print('5 fold Cross validations scores : ', np.around(cv_results, 3).tolist())

y_new_data = ridge.predict(new_data)
print('  Predicted house price on new data: ', y_new_data.item(), '\n\n')

print('Regularised Regression:')
print('      Lasso Regression:')
lasso = Lasso(alpha=0.1, normalize=True)
lasso.fit(X_train, y_train)
lasso_y_pred = lasso.predict(X_test)
print('Lasso regression model performance: ', lasso.score(X_test, y_test))
# cross_val_score
cv_results = cross_val_score(lasso, X, y, cv=5)
print('5 fold Cross validations scores : ', np.around(cv_results, 3).tolist())

y_new_data = lasso.predict(new_data)
print('  Predicted house price on new data: ', y_new_data.item(), '\n\n')

print('Regularised Regression:')
print('Lasso Regression for feature selection: PLOT')

names = boston['feature_names']
lasso_feature = Lasso(alpha=0.1)
coef = lasso_feature.fit(X_train, y_train).coef_
_ = plt.plot(range(len(names)), coef)
_ = plt.xticks(range(len(names)), names, rotation=60)
예제 #41
0
X_train, X_test, y_train, y_test = train_test_split(X_scaled,
                                                    new_y,
                                                    test_size=0.3,
                                                    random_state=42)
b = LinearRegression(normalize=False)
b.fit(X_train, y_train)

# Regularized Ridge & Lasso Regressions
ridge_model = Ridge(alpha=0.02)
ridge_model.fit(X_train, y_train)
lasso_model = Lasso(alpha=0.001)
lasso_model.fit(X_train, y_train)
print("Simple Train: ", b.score(X_train, y_train))
print("Simple Test: ", b.score(X_test, y_test))
print('---------------------------------------')
print("Lasso Train: ", lasso_model.score(X_train, y_train))  #Lasso
print("Lasso Test: ", lasso_model.score(X_test, y_test))
print('---------------------------------------')
print("Ridge Train: ", ridge_model.score(X_train, y_train))  #Ridge
print("Ridge Test: ", ridge_model.score(X_test, y_test))

ridge_model = Ridge(alpha=0.2)
ridge_model.fit(X_train, y_train)
lasso_model = Lasso(alpha=0.1)
lasso_model.fit(X_train, y_train)
print("Simple Train: ", b.score(X_train, y_train))
print("Simple Test: ", b.score(X_test, y_test))
print('---------------------------------------')
print("Lasso Train: ", lasso_model.score(X_train, y_train))  #Lasso
print("Lasso Test: ", lasso_model.score(X_test, y_test))
print('---------------------------------------')
예제 #42
0
#plt.rc('text', usetex=True)  #没装LaTeX宏包把该句注释
a = np.loadtxt("Pdata12_6.txt")  #加载表中的9行5列数据
n = a.shape[1] - 1  #自变量的总个数
x = a[:, :n]  #提出自变量观测值矩阵
X = sm.add_constant(x)
md = sm.OLS(a[:, n], X).fit()  #构建并拟合模型
print(md.summary())  #输出模型的所有结果

aa = zscore(a)  #数据标准化
x = aa[:, :n]
y = aa[:, n]  #提出自变量和因变量观测值矩阵
b = []  #用于存储回归系数的空列表
kk = np.logspace(-4, 0, 100)  #循环迭代的不同k值
for k in kk:
    md = Lasso(alpha=k).fit(x, y)
    b.append(md.coef_)
st = ['s-r', '*-k', 'p-b', '^-y']  #下面画图的控制字符串
for i in range(n):
    plt.plot(kk,
             np.array(b)[:, i], st[i])
plt.legend(['$x_1$', '$x_2$', '$x_3$', '$x_4$'], fontsize=15)
plt.show()
md0 = Lasso(0.05).fit(x, y)  #构建并拟合模型
cs0 = md0.coef_  #提出标准化数据的回归系数b1,b2,b3,b4
print("标准化数据的所有回归系数为:", cs0)
mu = a.mean(axis=0)
s = a.std(axis=0, ddof=1)  #计算所有指标的均值和标准差
params = [mu[-1] - s[-1] * sum(cs0 * mu[:-1] / s[:-1]), s[-1] * cs0 / s[:-1]]
print("原数据的回归系数为:", params)
print("拟合优度:", md0.score(x, y))
예제 #43
0
    Y1 = Y_train_raw[train]

    X2 = X_train_reduced[test]
    Y2 = Y_train_raw[test]

    ## Train Classifiers on fold
    rdg_clf = Ridge(alpha=0.5)
    rdg_clf.fit(X1, Y1)
    lso_clf = Lasso(alpha=0.6257)
    lso_clf.fit(X1, Y1)
    svr_clf = LinearSVR(C=1e3)
    svr_clf.fit(X1, Y1)

    ## Score Classifiers on fold
    rdg_clf_score = rdg_clf.score(X2, Y2)
    lso_clf_score = lso_clf.score(X2, Y2)
    svr_clf_score = svr_clf.score(X2, Y2)

    print "Ridge:  ", rdg_clf_score
    print "Lasso:  ", lso_clf_score
    print "SVR_RBF:  ", svr_clf_score


## Train final Classifiers
# clf = Ridge(alpha=.5)
clf = LinearSVR(C=1e3, gamma=0.1)
clf.fit(X_train_reduced, Y_train_raw)
Y_predicted = clf.predict(X_test_reduced)

## Save results to csv
np.savetxt("prediction.csv", Y_predicted, fmt="%.5f", delimiter=",")
예제 #44
0
    print "train error: " , np.sqrt(np.mean((data_0am_train_predy-data_0am_train_yy)**2))/nom_train
    print "test error: ",  np.sqrt(np.mean((data_0am_test_predy-data_0am_test_y)**2))/nom_test

    # print "train error ratio: " , np.mean(np.divide(np.absolute(data_0am_train_predy-data_0am_train_yy),data_0am_train_yy+0.001))
    # print "train error ratio: " , np.absolute(data_0am_train_predy-data_0am_train_yy)
    # print "test error ratio: ", np.mean(np.divide(np.absolute(data_0am_test_predy-data_0am_test_y),data_0am_train_yy+0.00001))

    las = Lasso(max_iter=50000,alpha=0.01)
    las.fit(data_0am_train_xx,data_0am_train_yy)
    data_0am_train_predy = las.predict(data_0am_train_xx)
    lasso_train_predy = las.predict(data_0am_train_xx)

    data_0am_test_predy = las.predict(data_0am_test_x)
    lasso_test_predy = las.predict(data_0am_test_x)
    print "Lasso report"
    print "train score: ", las.score(data_0am_train_xx,data_0am_train_yy)
    print "train error: " , np.sqrt(np.mean((data_0am_train_predy-data_0am_train_yy)**2))/nom_train
    print "test error: ",  np.sqrt(np.mean((data_0am_test_predy-data_0am_test_y)**2))/nom_test

    svr = SVR(kernel='linear')
    svr.fit(data_0am_train_xx,data_0am_train_yy)
    data_0am_train_predy = svr.predict(data_0am_train_xx)
    svr_train_predy = svr.predict(data_0am_train_xx)

    data_0am_test_predy = svr.predict(data_0am_test_x)
    svr_test_predy = svr.predict(data_0am_test_x)
    print "SVR report"
    print "train score: ", svr.score(data_0am_train_xx,data_0am_train_yy)
    print "train error: " , np.sqrt(np.mean((data_0am_train_predy-data_0am_train_yy)**2))/nom_train
    print "test error: ",  np.sqrt(np.mean((data_0am_test_predy-data_0am_test_y)**2))/nom_test
def train_model():
    start_time=time.time()
    data_inp=data_clean(df)
    pivot = data_inp.pivot(index='goods_code', columns='dis_month', values='sale')
    #对变量重新命名
    col_name=[]
    for i in range(len(pivot.columns)):
        col_name.append('sales_'+str(i))
    pivot.columns=col_name
    pivot.fillna(0, inplace=True)
    sub=pivot.reset_index()
    test_features=['goods_code']
    trian_features = ['goods_code']
    for i in range(1,3):
        test_features.append('sales_' + str(i))
    #前面21个月作为训练集
    for i in range(3,23):
        trian_features.append('sales_' + str(i))

    sub.fillna(0, inplace=True)
    sub.drop_duplicates(subset=['goods_code'],keep='first',inplace=True)
    #最近的两个月作为测试集
    for i in range(1,3):
        test_features.append('sales_' + str(i))
   
    for i in range(3,23):
        trian_features.append('sales_' + str(i))
    X_train = sub[trian_features]
    y_train = sub[['sales_0', 'goods_code']]
    X_test = sub[test_features]    
    sales_type = 'sales_'
    
    #平均数特征
    X_train['mean_sale'] = X_train.apply(
        lambda x: np.mean([x[sales_type+'3'], x[sales_type+'4'],x[sales_type+'5'], 
                              x[sales_type+'6'], x[sales_type+'7'],x[sales_type+'8'], x[sales_type+'9'], 
                           x[sales_type+'10'], x[sales_type+'11'],x[sales_type+'12'],x[sales_type+'13'], 
                              x[sales_type+'14'],
                           x[sales_type+'15'], x[sales_type+'16'], x[sales_type+'17'],x[sales_type+'18'],
                           x[sales_type+'19'], x[sales_type+'20'], x[sales_type+'21'], x[sales_type+'22']]), axis=1)
    
    X_test['mean_sale'] = X_test.apply(
        lambda x: np.mean([x[sales_type+'1'], x[sales_type+'2']]), axis=1)
    train_mean=X_train['mean_sale']
    test_mean=X_test['mean_sale']
    train_mean=pd.Series(train_mean)
    test_mean=pd.Series(test_mean)
    
     #众数特征
    X_train['median_sale'] = X_train.apply(
        lambda x: np.median([ x[sales_type+'3'], x[sales_type+'4'],
                      x[sales_type+'5'], x[sales_type+'6'], x[sales_type+'7'],x[sales_type+'8'], 
                             x[sales_type+'9'], x[sales_type+'10'], x[sales_type+'11'],x[sales_type+'12'],
                             x[sales_type+'13'], x[sales_type+'14'],x[sales_type+'15'], x[sales_type+'16'], 
                             x[sales_type+'17'],x[sales_type+'18'], x[sales_type+'19'], x[sales_type+'20'],
                             x[sales_type+'21'], x[sales_type+'22']]), axis=1)
    X_test['median_sale'] = X_test.apply(
        lambda x: np.median([x[sales_type+'1'], x[sales_type+'2']]), axis=1)
    
    #标准差特征
    X_train['std_sale'] = X_train.apply(
        lambda x: np.std([ x[sales_type+'3'], x[sales_type+'4'],x[sales_type+'5'], x[sales_type+'6'], 
                          x[sales_type+'7'],x[sales_type+'8'], x[sales_type+'9'], x[sales_type+'10'], 
                          x[sales_type+'11'],x[sales_type+'12'],x[sales_type+'13'], x[sales_type+'14'],
                        x[sales_type+'15'], x[sales_type+'16'], x[sales_type+'17'],x[sales_type+'18'], 
                        x[sales_type+'19'], x[sales_type+'20'], x[sales_type+'21'], x[sales_type+'22']]), axis=1)
    X_test['std_sale'] = X_test.apply(
        lambda x: np.std([x[sales_type+'1'], x[sales_type+'2']]), axis=1)
    
    train_median=X_train['median_sale']
    test_median=X_test['median_sale']

    train_std=X_train['std_sale']
    test_std=X_test['std_sale']

    X_train = sub[trian_features]
    X_test = sub[test_features]
    
    formas_train=[train_mean,train_median,train_std]
    formas_test=[test_mean,test_median,test_std]
    train_inp=pd.concat(formas_train,axis=1)
    test_inp=pd.concat(formas_test,axis=1)
    
    #残差特征
    lr_Y=y_train['sales_0']
    lr_train_x=train_inp
    re_train= sm.OLS(lr_Y,lr_train_x).fit()
    train_inp['resid']=re_train.resid
    
    lr_Y=y_train['sales_0']
    lr_test_x=test_inp
    re_test= sm.OLS(lr_Y,lr_test_x).fit()
    test_inp['resid']=re_test.resid
    
    train_inp=pd.concat([y_train,train_inp],axis=1)
    
    ts_test_pro,ts_train_pro=split_ts(df)
    
    ts_train_=ts_train_pro.reset_index()
    train_inp=pd.merge(train_inp,ts_train_,left_on='goods_code',right_on='id',how='left')
    test_inp=pd.concat([y_train,test_inp],axis=1)
    
    ts_test_=ts_test_pro.reset_index()
    test_inp=pd.merge(test_inp,ts_test_,left_on='goods_code',right_on='id',how='left')
    train_inp.drop(['sales_0','goods_code'],axis=1,inplace=True)
    test_inp.drop(['sales_0','goods_code'],axis=1,inplace=True)
    
    train_inp.fillna(0,inplace=True)
    train_inp.replace(np.inf,0,inplace=True)
    test_inp.replace(np.inf,0,inplace=True)
    test_inp.fillna(0,inplace=True)

    #lasso
    ss = StandardScaler()
    train_inp_s= ss.fit_transform(train_inp) 
    test_inp_s= ss.transform(test_inp)
    alpha_ridge = [1e-4,1e-3,1e-2,0.1,1]

    coeffs = {}
    for alpha in alpha_ridge:
        r = Lasso(alpha=alpha, normalize=True, max_iter=1000000)
        r = r.fit(train_inp_s, y_train['sales_0'])

    grid_search = GridSearchCV(Lasso(alpha=alpha, normalize=True), scoring='neg_mean_squared_error',
                           param_grid={'alpha': alpha_ridge}, cv=5, n_jobs=-1)
    grid_search.fit(train_inp_s, y_train['sales_0'])
    
    alpha = alpha_ridge
    rmse = list(np.sqrt(-grid_search.cv_results_['mean_test_score']))
    plt.figure(figsize=(6,5))
    
    lasso_cv = pd.Series(rmse, index = alpha)
    lasso_cv.plot(title = "Validation - LASSO", logx=True)
    plt.xlabel("alpha")
    plt.ylabel("rmse")
    plt.show()
    
    least_lasso=min(alpha)
    lasso = Lasso(alpha=least_lasso,normalize=True)
    model_lasso=lasso.fit(train_inp_s,y_train['sales_0'])
    
    print("lasso feature.......................")
    lasso_coef = pd.Series(model_lasso.coef_,index = train_inp.columns)
    lasso_coef=lasso_coef[lasso_coef!=0.0000]
    lasso_coef=lasso_coef.astype(float)
    print(".....lasso_coef..............")

    print(lasso_coef.sort_values(ascending=False).head(10))
    print(" R^2,拟合优度")
    
    matplotlib.rcParams['figure.figsize'] = (8.0, 10.0)
    imp_coef = pd.concat([lasso_coef.sort_values().head(5), 
                     lasso_coef.sort_values().tail(5)])#选头尾各10条

    imp_coef.plot(kind = "barh")
    plt.title("Coefficients in the Lasso Model")
    
    print(lasso.score(train_inp_s,y_train['sales_0']))
    
    print(lasso.get_params())  
    print('参数信息')
    print(lasso.set_params(fit_intercept=False)) 
    lasso_preds =model_lasso.predict(test_inp_s)
    #绘制预测结果和真实值散点图
    fig, ax = plt.subplots()
    ax.scatter(y_train['sales_0'],lasso_preds)
    ax.plot([y_train['sales_0'].min(), y_train['sales_0'].max()], [y_train['sales_0'].min(), y_train['sales_0'].max()], 'k--', lw=4)
    ax.set_xlabel('y_true')
    ax.set_ylabel('Pred')
    plt.show()
    y_pred=pd.DataFrame(lasso_preds,columns=['y_pred'])
    
    matplotlib.rcParams['figure.figsize'] = (6.0, 6.0)
    preds = pd.DataFrame({"preds":y_pred['y_pred'], "true":y_train['sales_0']}) 
    preds["residuals"] = preds["true"] - preds["preds"]
    
    print("打印预测值描述.....................")
    preds=preds.astype(float)
    print(preds.head())
    print(preds.describe())
    print(preds.shape)
    preds.plot(x = "preds", y = "residuals",kind = "scatter")
    plt.title("True and residuals")
    plt.show()
    
    data_out=[y_train['goods_code'],y_train['sales_0'],y_pred]
    result=pd.concat(data_out,axis=1)
    #计算mape
    result['mape']=abs((result['sales_0']-result['y_pred'])/result['sales_0']*100)    
    return result,lasso_coef

# In[13]:


#Split data into train and test
X_train, X_test, y_train, y_test =train_test_split(x, price, test_size=0.2,random_state=0)


# In[14]:


#Lasso with Cross Validation 
lasso = LassoCV(alphas=np.linspace(0.00001,1,100), cv=10)
L=lasso.fit(X_train, y_train)
print(lasso.score(X_train, y_train))
print(lasso.score(X_test, y_test))
lasso.alpha_
lasso.coef_
Error_Tr=(y_train - L.predict(X_train))
ErroT= (y_test - L.predict(X_test))
Tr_rmse = (np.mean(Error_Tr**2))**.5
T_rmse=(np.mean(ErroT**2))**.5
print(Tr_rmse,T_rmse)





# In[8]:
예제 #47
0
파일: 2017_2_1.py 프로젝트: seoyoungs/Study
print("level 1 Linear Regression")
print("훈련 세트 점수: {:.2f}".format(lr.score(X_train, y_train)))
print("테스트 세트 점수: {:.2f}".format(lr.score(X_test, y_test)))
#Ridge Model
ridge_model = Ridge(alpha=0.01, normalize=True)
ridge_model.fit(X_train, y_train)
pred_ridge = ridge_model.predict(X_test)
print("level 1 Ridge Regression")
print("훈련 세트 점수: {:.2f}".format(ridge_model.score(X_train, y_train)))
print("테스트 세트 점수: {:.2f}".format(ridge_model.score(X_test, y_test)))
#Lasso Model
Lasso_model = Lasso(alpha=0.001, normalize=False)
Lasso_model.fit(X_train, y_train)
pred_Lasso = Lasso_model.predict(X_test)
print("level 1 Lasso Regression")
print("훈련 세트 점수: {:.2f}".format(Lasso_model.score(X_train, y_train)))
print("테스트 세트 점수: {:.2f}".format(Lasso_model.score(X_test, y_test)))
#ElasticNet Model
model_enet = ElasticNet(alpha=0.01, normalize=False)
model_enet.fit(X_train, y_train)
pred_test_enet = model_enet.predict(X_test)
print("level 1 ElasticNet Regression")
print("훈련 세트 점수: {:.2f}".format(model_enet.score(X_train, y_train)))
print("테스트 세트 점수: {:.2f}".format(model_enet.score(X_test, y_test)))
print('-----------1 단계 끝 --------------')
'''
# ============= 2. room_type 변수 제거 ========================
nyc_model_xx= df1.drop(columns=['room_type'])
nyc_model_xx, nyc_model_yx = nyc_model_xx.iloc[:,:-1], nyc_model_xx.iloc[:,-1]
X_train_x, X_test_x, y_train_x, y_test_x = train_test_split(nyc_model_xx, 
                             nyc_model_yx, test_size=0.3,random_state=42)
예제 #48
0
from sklearn.linear_model import Lasso

lasso = Lasso()


# In[106]:


lasso.fit(X_train, y_train)


# In[107]:


# Score the model
lasso_score = lasso.score(X_test, y_test)
lasso_score


# In[108]:


# Score the model
lasso_score = lasso.score(X_train, y_train)
lasso_score


# In[109]:


# Make predictions using the testing set