Ejemplo n.º 1
0
mod = lm.LinearRegression()
n_features, r2_train, r2_test, snr = fit_on_increasing_size(model=mod)
argmax = n_features[np.argmax(r2_test)]
plot_r2_snr(n_features, r2_train, r2_test, argmax, snr, axis[0], 'Regression')

# %% L2 regularization

mod = lm.Ridge(alpha=10)  # lambda is alpha!
n_features, r2_train, r2_test, snr = fit_on_increasing_size(model=mod)
argmax = n_features[np.argmax(r2_test)]
plot_r2_snr(n_features, r2_train, r2_test, argmax, snr, axis[1], 'Ridge')

# %% L1 regularization

mod = lm.Lasso(alpha=.1)  # lambda is alpha !
n_features, r2_train, r2_test, snr = fit_on_increasing_size(model=mod)
argmax = n_features[np.argmax(r2_test)]
plot_r2_snr(n_features, r2_train, r2_test, argmax, snr, axis[2], 'Lasso')

# %% L1-L2 regularization

mod = lm.ElasticNet(alpha=.5, l1_ratio=.5)
n_features, r2_train, r2_test, snr = fit_on_increasing_size(model=mod)
argmax = n_features[np.argmax(r2_test)]
plot_r2_snr(n_features, r2_train, r2_test, argmax, snr, axis[3], 'ElasticNet')

plt.tight_layout()
axis[3].set_xlabel("Number of input features", fontsize=16)
plt.savefig(
    "/home/ed203246/git/pystatsml/images/linear_regression_penalties.png")
Ejemplo n.º 2
0
#X = df.loc[0:,['lights','T1','RH_1','T2','RH_2','T3','RH_3','T4','RH_4','T5','RH_5','T6','RH_6','T7','RH_7','T8', 'RH_8', 'T9','RH_9', 'T_out', 'Press_mm_hg','RH_out','Windspeed','Visibility','Tdewpoint','rv1','rv2','nsm']]

y = df.loc[0:, 'Appliances']

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=50)

print("shape of X2", X.shape)

#REGRESSION MODELS - defining the model, fitting the model, predticting then taking R2 Score of Prediction

#Lasso Regression
laso = linear_model.Lasso(alpha=0.2)
laso.fit(X_train, y_train)
y_predict_laso = laso.predict(X_test)

print("R2 Score for Laso Regression: ", r2_score(y_test, y_predict_laso))

#Ridge Regression
ridge = linear_model.Ridge(alpha=.5)
ridge.fit(X_train, y_train)
y_predict_ridge = ridge.predict(X_test)

print("R2 Score for Ridge Regression: ", r2_score(y_test, y_predict_ridge))

#LassoLars Regression
lassoLars = linear_model.LassoLars(alpha=.1)
lassoLars.fit(X_train, y_train)
Ejemplo n.º 3
0
def graph():
    if request.method == 'POST':
        f = request.files['file']
        path = os.path.join(app.config['UPLOAD_FOLDER'], f.filename)

        df = pd.read_csv(path)
        df.head()
        close_px = df['Adj Close']
        mavg = close_px.rolling(window=100).mean()

        #Setting Features
        dfreg = df.loc[:, ['Adj Close', 'Volume']]
        dfreg['HL_PCT'] = (df['High'] - df['Low']) / df['Close'] * 100.0
        dfreg['PCT_change'] = (df['Close'] - df['Open']) / df['Open'] * 100.0

        # Drop missing value
        dfreg.fillna(value=-99999, inplace=True
                     )  # We want to separate 1 percent of the data to forecast
        forecast_out = int(
            math.ceil(0.01 * len(dfreg))
        )  # Separating the label here, we want to predict the AdjClose
        forecast_col = 'Adj Close'
        dfreg['label'] = dfreg[forecast_col].shift(-forecast_out)
        X = np.array(
            dfreg.drop(['label'], 1)
        )  # Scale the X so that everyone can have the same distribution for linear regression
        X = preprocessing.scale(
            X
        )  # Finally We want to find Data Series of late X and early X (train) for model generation and evaluation
        X_lately = X[-forecast_out:]
        X = X[:-forecast_out]  # Separate label and identify it as y
        y = np.array(dfreg['label'])
        y = y[:-forecast_out]

        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=0)

        # Linear regression
        clfreg = LinearRegression(n_jobs=-1)
        clfreg.fit(X_train, y_train)

        #Lasso Regression
        clflasreg = linear_model.Lasso(alpha=0.1)
        clflasreg.fit(X_train, y_train)

        # Quadratic Regression 2
        clfpoly2 = make_pipeline(PolynomialFeatures(2), Ridge())
        clfpoly2.fit(X_train, y_train)

        # Quadratic Regression 3
        clfpoly3 = make_pipeline(PolynomialFeatures(3), Ridge())
        clfpoly3.fit(X_train, y_train)

        # Get Scores
        confidencereg = clfreg.score(X_test, y_test)
        confidencelassref = clflasreg.score(X_test, y_test)
        confidencepoly2 = clfpoly2.score(X_test, y_test)
        confidencepoly3 = clfpoly3.score(X_test, y_test)

        #Prediction values here
        forecast_set_reg = clfreg.predict(X_lately)
        forecast_set_las_reg = clflasreg.predict(X_lately)
        forecast_set_poly_2_reg = clfpoly2.predict(X_lately)
        forecast_set_poly_3_reg = clfpoly3.predict(X_lately)

        # dfreg['Forecast'] = np.nan
        json_data = {
            'Linear': forecast_set_reg,
            'Lasso': forecast_set_las_reg,
            'QRidge': forecast_set_poly_2_reg,
            'QRidge3': forecast_set_poly_3_reg
        }
        # json_data = json.dumps(data)
        print(json_data)

        return render_template('graph.html',
                               title='Graph',
                               prediction=json_data)
    else:
        return render_template('graph.html', title='Graph')
Ejemplo n.º 4
0
# Cross Validation Classification LogLoss
import numpy
from pandas import read_table
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import sklearn.linear_model as lm
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data"
names = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'model year', 'origin', 'car name']
dataframe = read_table(url, names=names, delimiter="\s+", na_values="?")
dataframe = dataframe[(~numpy.isnan(dataframe['horsepower']))]
array = dataframe.values
X = array[:,1:8]
Y = array[:,0]
linregression = lm.LinearRegression()
ridge = lm.Ridge()
lasso = lm.Lasso()
lars = lm.Lars()
omp = lm.OrthogonalMatchingPursuit()
br = lm.BayesianRidge()
model = ridge
model = model.fit(X, Y)
prediction = model.predict(X)
r2 = r2_score(Y, prediction)
mse = mean_squared_error(Y, prediction)
print("R2: %.3f, MSE: %.3f") % (r2, mse)
Ejemplo n.º 5
0
        encoded = Dense(20, activation='relu')(input_img)
        decoded = Dense(X.shape[1], activation='sigmoid')(encoded)
        autoencoder = Model(input_img, decoded)
        autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
        autoencoder.fit(X, X,
                        epochs=20,
                        batch_size=32,
                        shuffle=True,
                        validation_split=0.8)
        
        return autoencoder.predict(X)

    models = {
            'ridge  ': linear_model.Ridge(alpha=0.1, normalize= False),
<<<<<<< HEAD
            'lasso  ': linear_model.Lasso(alpha=1e-6, max_iter=1e8),
            'lr     ': linear_model.LogisticRegression(solver='lbfgs',warm_start=True, max_iter=1e4),
            'lrCV   ': linear_model.LogisticRegressionCV(solver='lbfgs', max_iter=1e4, cv=5),
=======
#            'lasso  ': linear_model.Lasso(alpha=1e-6, max_iter=1e8),
            'lr     ': linear_model.LogisticRegression(solver='lbfgs',warm_start=True, max_iter=1e4),
#            'lrCV   ': linear_model.LogisticRegressionCV(solver='lbfgs', max_iter=1e4, cv=5),
>>>>>>> bb38b4fdedf3f7cc3dab38fea065353aeef512fa
            'mlp_clf': neural_network.MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(256, 64), random_state=1),
            'mlp_reg': neural_network.MLPRegressor(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(256, 64, 32, 32, 32), random_state=1),
            'svc    ': svm.SVC(C= 10, kernel='rbf'),
            'rfreg  ': ensemble.RandomForestRegressor(max_depth=15),
            'rfclf  ': ensemble.RandomForestClassifier(max_depth=12),
            'lgbclf ': lgb.LGBMClassifier(gamma='auto', num_leaves=4,learning_rate=0.001, n_estimators=2000, verbose = 100),
            'lgbreg ': lgb.LGBMRegressor(gamma='auto', num_leaves=31,learning_rate=0.001, n_estimators=20000, verbose = 100),
            'knn    ': neighbors.KNeighborsClassifier(n_neighbors=30, n_jobs=15),
Ejemplo n.º 6
0
 def LassoRegressor(self, noOfTrials, diagram, gateName):
     from sklearn import linear_model
     model = linear_model.Lasso()
     return self.RegressionTemplate(model, noOfTrials, diagram, gateName)
Ejemplo n.º 7
0
def Regression(z,
               p=3,
               lamb=0,
               model='OLS',
               resampling='kfold',
               error='MSE',
               x=x_array,
               y=y_array,
               intercept=True):
    X = CreateDesignMatrix(x, y, p)
    # print("polynomial: ", p, " Determinant: ", np.linalg.det(X.T.dot(X)))

    if (model == 'OLS' or model == 'Ridge'):
        if model == 'OLS': lamb = 0

        if resampling == 'none':
            if (intercept):
                betas = beta(X, z, lamb)
            else:
                X_no = X[:, 1:]
                betas = beta(X_no, z, lamb)
                betas = np.insert(betas, 0, np.mean(z))
            z_tilde = X.dot(betas)
            MSE, R2 = Error_Analysis(z, z_tilde, "MSE and R2")
            var_betas = Variance_Bias_Analysis(X,
                                               z,
                                               z_tilde,
                                               lamb,
                                               result='Variance',
                                               betas=betas)
            return MSE, R2, var_betas, betas, z_tilde
        elif resampling == 'kfold':
            return Kfold(X, z, lamb, intercept=intercept)

    if model == 'Lasso':  # Here we used scikit learn
        if intercept: fit_intercept_bool = False
        else: fit_intercept_bool = True
        model_lasso = skl.Lasso(alpha=lamb,
                                fit_intercept=fit_intercept_bool,
                                normalize=True,
                                tol=6000)

        if resampling == 'none':
            if (intercept):
                model_lasso.fit(X, z)
                betas = model_lasso.coef_
            else:
                X_no = X[:, 1:]
                model_lasso.fit(X_no, z)
                betas = np.insert(model_lasso.coef_, 0, model_lasso.intercept_)
            z_tilde = X.dot(betas)
            MSE, R2 = Error_Analysis(z, z_tilde, "MSE and R2")
            var_betas = Variance_Bias_Analysis(X,
                                               z,
                                               z_tilde,
                                               lamb,
                                               result='Variance',
                                               betas=betas)
            return MSE, R2, var_betas, betas, z_tilde
        elif resampling == 'kfold':
            return Kfold(X, z, lamb, model_lasso, intercept=intercept)
    return
Ejemplo n.º 8
0
def fit_regression(P, x, u, rule="LS", retall=False, **kws):
    """
    Fit a polynomial chaos expansion using linear regression.

    Args:
        P (Poly) : Polynomial expansion with `P.shape=(M,)` and `P.dim=D`.
        x (array_like) : Collocation nodes with `x.shape=(D,K)`.
        u (array_like) : Model evaluations with `len(u)=K`.
        retall (bool) : If True return Fourier coefficients in addition to R.
        rule (str) : Regression method used.

    Returns:
        (Poly, np.ndarray) : Fitted polynomial with `R.shape=u.shape[1:]` and
                `R.dim=D`. The Fourier coefficients in the estimation.

    Examples:
        >>> x, y = cp.variable(2)
        >>> P = cp.Poly([1, x, y])
        >>> s = [[-1,-1,1,1], [-1,1,-1,1]]
        >>> u = [0,1,1,2]
        >>> print(cp.around(fit_regression(P, s, u), 14))
        0.5q0+0.5q1+1.0
    """
    x = np.array(x)
    if len(x.shape) == 1:
        x = x.reshape(1, *x.shape)
    u = np.array(u)

    Q = P(*x).T
    shape = u.shape[1:]
    u = u.reshape(u.shape[0], int(np.prod(u.shape[1:])))

    rule = rule.upper()

    # Local rules
    if rule == "LS":
        uhat = linalg.lstsq(Q, u)[0].T

    elif rule == "T":
        uhat, alphas = rlstsq(Q, u, kws.get("order", 0),
                              kws.get("alpha", None), False, True)
        uhat = uhat.T

    elif rule == "TC":
        uhat = rlstsq(Q, u, kws.get("order", 0), kws.get("alpha", None), True)
        uhat = uhat.T

    else:

        # Scikit-learn wrapper
        try:
            _ = linear_model
        except:
            raise NotImplementedError("sklearn not installed")

        if rule == "BARD":
            solver = linear_model.ARDRegression(fit_intercept=False,
                                                copy_X=False,
                                                **kws)

        elif rule == "BR":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.BayesianRidge(**kws)

        elif rule == "EN":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.ElasticNet(**kws)

        elif rule == "ENC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.ElasticNetCV(**kws)

        elif rule == "LA":  # success
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.Lars(**kws)

        elif rule == "LAC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.LarsCV(**kws)

        elif rule == "LAS":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.Lasso(**kws)

        elif rule == "LASC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.LassoCV(**kws)

        elif rule == "LL":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.LassoLars(**kws)

        elif rule == "LLC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.LassoLarsCV(**kws)

        elif rule == "LLIC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = linear_model.LassoLarsIC(**kws)

        elif rule == "OMP":
            solver = linear_model.OrthogonalMatchingPursuit(**kws)

        uhat = solver.fit(Q, u).coef_

    u = u.reshape(u.shape[0], *shape)

    R = cp.poly.sum((P * uhat), -1)
    R = cp.poly.reshape(R, shape)

    if retall == 1:
        return R, uhat

    elif retall == 2:
        if rule == "T":
            return R, uhat, Q, alphas
        return R, uhat, Q

    return R
Ejemplo n.º 9
0
## 手順2. 画像をブロック分割して回帰データを作っておく
yreg = []
cdat = []
aa = ImageWavelet(np.zeros((nV, nH)))  # 零埋め画像のImageWaveletオブジェクト
X = aa.BaseMat()  # yreg には基底がはいる.

for ytop in range(0, lobs.shape[0], stride):
    for xlft in range(0, lobs.shape[1], stride):
        ll = lobs[ytop:ytop + nV, xlft:xlft + nH]
        yreg.append(ll.reshape((nH * nV, )) - ll.mean())  # 平均は引いておく
        cdat.append(aa.Wv2coeff(ll))
yreg = np.array(yreg)
cdat = np.array(cdat)

## 手順3. LASSO 回帰してみる
reg = linear_model.Lasso(alpha=5e-4, fit_intercept=False, tol=1e-6)
reg.fit(X[:, 1:], yreg.T)  # 平均部分は推定しない 0 にしているので

## 手順4. 結果評価
lrec = np.zeros((imgsize, imgsize))
cnt = 0
recdat = np.hstack((cdat[:, 0].reshape(reg.coef_.shape[0], 1), reg.coef_))
for ytop in range(0, lrec.shape[0], stride):
    for xlft in range(0, lrec.shape[1], stride):
        lrec[ytop:ytop + nV, xlft:xlft + nH] = aa.Coeff2Wv(recdat[cnt, :])
        cnt += 1

vmin = np.min((lobs, lrec, ltrue))
vmax = np.max((lobs, lrec, ltrue))

plt.figure()
Ejemplo n.º 10
0
#1(c3). Train and evaluate the predictive performance for polynomial (to the 4th degree) with Ridge regularization (regularization strength set to 0.5)

ridge_model = linear_model.Ridge(alpha=0.5)
ridge_model.fit(x_polyTrain, y_polyTrain)
y_predicted_pl_ridge = ridge_model.predict(x_polyTest)
plt.plot(x_test, y_test, "r.")
plt.plot(x_test, y_predicted_pl_ridge, "b.")

from sklearn.metrics import mean_squared_error
evaluation_pl_ridge = mean_squared_error(y_polyTest, y_predicted_pl_ridge)
print ("The mean squared error for the polynomial with Ridge regularization is:", evaluation_pl_ridge)

#1(c4). Train and evaluate the predictive performance for polynomial (to the 4th degree) with Lasso regularization (regularization strength set to 0.5)

lasso_model = linear_model.Lasso(alpha=0.5)
lasso_model.fit(x_polyTrain, y_polyTrain)
y_predicted_pl_lasso = lasso_model.predict(x_polyTest)
plt.plot(x_test, y_test, "r.")
plt.plot(x_test, y_predicted_pl_lasso, "b.")

from sklearn.metrics import mean_squared_error
evaluation_pl_lasso = mean_squared_error(y_polyTest, y_predicted_pl_lasso)
print ("The mean squared error for the polynomial with Lasso regularization is:", evaluation_pl_lasso)

#1(c5).Evaluate using mean squared error. Report all values in a single table. (Code and Write-up)

fig = plt.figure(dpi=120)
ax = fig.add_subplot(1,1,1)
table_data=[
    ["The mean squared error of linear regression", evaluation_lr],
Ejemplo n.º 11
0
house.all=pd.get_dummies(house.all,drop_first=True, dummy_na=False)

# house.train().sample(10)
x=house.train().drop(['SalePrice','test'],axis=1)
y=house.train().SalePrice
# x_train, x_test, y_train, y_test = train_test_split(x,y)

x_train = x.as_matrix().astype(np.float)
np.any(np.isnan(x_train))
y_train = y.as_matrix().astype(np.float)

from sklearn.model_selection import GridSearchCV

alpha_100 =[{'alpha': np.logspace(-4, 2, 100)}]
lasso = linear_model.Lasso(normalize=True)
# lasso.fit(x_train, y_train) # fit data
para_search = GridSearchCV(estimator=lasso,param_grid=alpha_100,scoring='neg_mean_squared_log_error',cv=5)
para_search.fit(x_train,y_train)

rmsle(para_search.predict(x_test),y_test)

#R ^2 Coefficient
lasso.score(x_train,y_train)
rmse_cv(lasso,x_train,y_train)
rmsle(lasso.predict(x_test),y_test)

# %% Compare with Add features
# del house_sf
# house_sf=House('data/train.csv','data/test.csv')
#
Ejemplo n.º 12
0
def main():
    #---------------LOAD PARAMETERS, INITIALIZE VARS---------------
    results = []
    para = params_setup("lasso")  # "lasso" for lasso VAR

    # Which variables are > thresh?
    thresh = para.variable_threshold
    # How far in the future are we predicting?
    offset = para.horizon
    # What is the window of previous timesteps we're looking at?
    attn_length = para.attention_len

    #---------------PREPARE DATA------------------
    (X_cut, y_cut) = load_data(para)

    num_timesteps = X_cut.shape[0]
    num_features = X_cut.shape[1]

    X = list()
    for i in range(attn_length, num_timesteps - offset):
        X.append(
            (X_cut[i - attn_length:i, :]).flatten()
        )  # must flatten (timesteps, features) into 1D b/c model only takes up to 2D
    X = np.array(X)
    y = y_cut[attn_length + offset:num_timesteps]

    # Split into training and testing
    cutoff_idx = math.floor(X.shape[0] * 0.8)
    X_train = X[0:cutoff_idx, :]
    y_train = y[0:cutoff_idx]

    X_test = X[cutoff_idx:, :]
    y_test = y[cutoff_idx:]

    print("X_train shape:", X_train.shape)
    print("y_train shape:", y_train.shape)
    print("X_test shape:", X_test.shape)
    print("y_test shape:", y_test.shape)

    for aa in ALPHAS:
        clf = linear_model.Lasso(alpha=aa, normalize=True)
        clf.fit(X_train, y_train)

        #0 rated emotion signal
        #1-13 MFCCs
        #14-26 dMFCCs
        #27-39 ddMFCCs
        #40 clarity
        #41 brightness
        #42 key_strength
        #43 rms
        #44 centroid
        #45 spread
        #46 skewness
        #47 kurtosis
        #48-59 chroma
        #60 mode
        #61 compress
        #62 hcdf
        #63 flux
        #64-74 lpcs

        # Determine which variables (timesteps, since this is AR) are most important

        inds = [
            j for (i, j) in zip(clf.coef_, range(len(clf.coef_)))
            if abs(i) >= thresh
        ]
        inds_mod = [x % num_features for x in inds]

        # Get RMSE
        RMSE = ((len(y_test)**-1) * sum((clf.predict(X_test) - y_test)**2))**.5
        #RMSE = ((len(y_test) ** -1) * sum((clf.predict(X_test[:][:-2]) - y_test[:-2]) ** 2))**.5
        #RMSE = (len(y_test) ** -1) * sum((clf.predict(X_test) - y_test) ** 2)
        print("RMSE for alpha " + str(aa) + ": " + (str)(RMSE))
        plt.plot(range(len(y_test[:-2])), clf.predict(X_test[:][:-2]))
        plt.plot(range(len(y_test[:-2])), y_test[:-2])
        #plt.plot(range(len(y_test)),clf.predict(X_test))
        #plt.plot(range(len(y_test)),y_test)
        #plt.show()
        #plt.pause(3)
        #plt.close()

        results.append(
            Result(aa, RMSE, clf.coef_, sorted(inds_mod), clf.intercept_))
    min_rmse = results[0].RMSE
    min_idx = 0
    for i in range(0, len(results)):
        if (results[i].RMSE < min_rmse):
            min_rmse = results[i].RMSE
            min_idx = i
    print("Minimum RMSE: " + str(min_rmse) + " for alpha=" +
          str(results[min_idx].alpha))
    # ----------------- WRITE RESULT OF BEST ALPHA TO FILE -------------------
    best_result = results[min_idx]
    with open(para.output_filename, 'w') as f:
        f.write("RMSE: " + (str)(best_result.RMSE))
        f.write("\nAlpha: " + (str)(best_result.alpha))
        f.write("\nCoefficients:\n")
        f.write(np.array2string(best_result.coefs, threshold=np.nan))
        f.write("\nCoefficient indices over threshold " +
                str(para.variable_threshold) + ":\n")
        f.write(' '.join(str(x) for x in best_result.coef_indices))
        f.write("\nTotal number of coefficients over threshold: " +
                str(len(best_result.coef_indices)))
        f.write("\nRegression bias term: " + str(best_result.bias))
        # Write overall RMSE for each alpha as well
        f.write("\n\nRMSEs for each alpha:\n")
        for result in results:
            f.write("Alpha: " + str(result.alpha) + ",\tRMSE: " +
                    str(result.RMSE) + "\n")
        print("Successfully wrote results to file " + para.output_filename)
def lasso_regression (df2, results):
    rlm = linear_model.Lasso(alpha=1)
    model = rlm.fit(df2, results)
    return model
Ejemplo n.º 14
0
 def __init__(self, **kwargs):
     super().__init__(**kwargs)
     self.m = linear_model.Lasso(alpha=self.params.get("alpha", 0.1))
Ejemplo n.º 15
0
def lasso(alpha=1.0):
    reg = linear_model.Lasso(alpha=alpha)
    return reg
Ejemplo n.º 16
0
import numpy as np
Ejemplo n.º 17
0
train_X = np.array(data[train_idxs, 1:-1], dtype = float)
train_y = np.array(data[train_idxs, -1], dtype = float)
test_X = np.array(data[test_idxs, 1:-1], dtype = float)
test_y = np.array(data[test_idxs, -1], dtype = float)

# uncomment these lines to only include certain parts of the data
# only use BST
#train_X, test_X = [arr[:,:6] for arr in [train_X, test_X]]

# only use atk, spa, spe
#train_X, test_X = [arr[:,[1, 3, 5]] for arr in [train_X, test_X]]

# use 4 different fitting models to compare accuracy
x = 2638952
models = [svm.LinearSVC(random_state=x, max_iter=1e5), 
	  linear_model.Lasso(random_state=x, alpha=0.1), 
	  neighbors.KNeighborsClassifier(n_neighbors=5, weights='distance'),
	  svm.SVR(kernel='linear')]
titles = ['Linear Classification', 'Lasso Regression', 'K-Neighbors Classification', 'Linear Regression']

# train & test the models
# plot the deviations from the correct values of each model
fig = plt.figure()
fig.add_subplot(111, frameon=False)
plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
plt.xlabel('Error', fontsize = 16)
plt.ylabel('Density', fontsize = 16)
for i in range(len(titles)):
	ax = fig.add_subplot(2, 2, i+1)
	clf = models[i]
	clf.fit(train_X, train_y)
Ejemplo n.º 18
0
Archivo: dci.py Proyecto: csquires/dci
def estimate_ddag_skeleton(X1,
                           X2,
                           candidate_edges,
                           changed_nodes,
                           alpha=.1,
                           lasso_alpha=None,
                           max_set_size=None,
                           verbose=False):
    """

    :param X1: (n1 x p) data matrix from first context
    :param X2: (n2 x p) data matrix from second context
    :param candidate_edges: set of edges that could possibly be in the skeleton
    :param changed_nodes: nodes adjacent to a candidate edge or with changed variance
    :param alpha: significance level to reject null hypothesis b1 = b2. Lower alpha makes it
    easier to accept the null hypothesis, so more edges will be deleted.
    :param lasso_alpha:
    :param max_set_size:
    :param verbose:
    :return:
    """
    n1, p1 = X1.shape
    n2, p2 = X2.shape
    if p1 != p2:
        raise ValueError("X1 and X2 must have the same number of dimensions")
    if isinstance(alpha, Iterable):
        alpha_ = max(alpha)
    else:
        alpha_ = alpha

    retained_edges = set()
    retained_edges_with_p = {}
    deleted_edges = set()
    deleted_edges_with_p = {}

    candidate_edges = {tuple(sorted((i, j))) for i, j in candidate_edges}

    printv = print if verbose else (lambda x: 0)

    S1 = X1.T @ X1
    S2 = X2.T @ X2
    for i, j in candidate_edges:
        printv("Checking edge (%d, %d)" % (i, j))
        not_ij = changed_nodes - {i, j}
        is_regression_invariant = False
        max_p = float('-inf')
        X1_j = X1[:, j]
        X2_j = X2[:, j]
        X1_i = X1[:, i]
        X2_i = X2[:, i]
        for cond_set in math_utils.powerset(not_ij, max_set_size=max_set_size):
            m_i = cond_set + (i, )
            m_j = cond_set + (j, )
            X1_mi = X1[:, m_i]
            X2_mi = X2[:, m_i]
            X1_mj = X1[:, m_j]
            X2_mj = X2[:, m_j]

            # marginal precision matrices
            if lasso_alpha is None:
                K1_ij = np.linalg.inv(S1[np.ix_(m_i, m_i)])
                K2_ij = np.linalg.inv(S2[np.ix_(m_i, m_i)])
                K1_ji = np.linalg.inv(S1[np.ix_(m_j, m_j)])
                K2_ji = np.linalg.inv(S2[np.ix_(m_j, m_j)])
                b1_mij = K1_ij @ S1[j, m_i].T
                b2_mij = K2_ij @ S2[j, m_i].T
                b1_mji = K1_ji @ S1[i, m_j].T
                b2_mji = K2_ji @ S2[i, m_j].T
            else:
                clf = linear_model.Lasso(alpha=lasso_alpha)
                clf.fit(X1_mi, X1_j)
                b1_mij = clf.coef_
                clf.fit(X2_mi, X2_j)
                b2_mij = clf.coef_
                clf.fit(X1_mj, X1_i)
                b1_mji = clf.coef_
                clf.fit(X2_mj, X2_i)
                b2_mji = clf.coef_

            # calculate t_ij (regressing j on i) and find its p-value
            ssr1_ij = np.sum(np.square(X1_j - X1_mi @ b1_mij.T))
            ssr2_ij = np.sum(np.square(X2_j - X2_mi @ b2_mij.T))
            var1_ij = ssr1_ij / (n1 - len(m_i))
            var2_ij = ssr2_ij / (n2 - len(m_i))
            b1_ij = b1_mij[-1]
            b2_ij = b2_mij[-1]
            t_ij = (b1_ij - b2_ij)**2 * np.linalg.inv(var1_ij * K1_ij +
                                                      var2_ij * K2_ij)[-1, -1]
            p_ij = 1 - stats.f.cdf(t_ij, 1, n1 + n2 - len(m_i) - len(m_j))
            if p_ij > alpha_:  # accept hypothesis that b1_ij = b2_ij, delete edge
                is_regression_invariant = True
                deleted_edges.add((i, j))
                deleted_edges_with_p[(i, j)] = p_ij
                printv("deleted")
                break

            # calculate t_ji (regressing i on j) and find its p-value
            ssr1_ji = np.sum(np.square(X1_i - X1_mj @ b1_mji.T))
            ssr2_ji = np.sum(np.square(X2_i - X2_mj @ b2_mji.T))
            var1_ji = ssr1_ji / (n1 - len(m_i))
            var2_ji = ssr2_ji / (n2 - len(m_i))
            b1_ji = b1_mji[-1]
            b2_ji = b2_mji[-1]
            t_ji = (b1_ji - b2_ji)**2 * np.linalg.inv(var1_ji * K1_ji +
                                                      var2_ji * K2_ji)[-1, -1]
            p_ji = 1 - stats.f.cdf(t_ji, 1, n1 + n2 - len(m_i) - len(m_j))
            if p_ji > alpha_:  # accept hypothesis that b1_ji = b2_ji, delete edge
                is_regression_invariant = True
                deleted_edges.add((i, j))
                deleted_edges_with_p[(i, j)] = p_ji
                printv("deleted")
                break

            max_p = max(max_p, p_ij, p_ji)
        # end of inner loop of powerset

        if not is_regression_invariant:
            printv("retained")
            retained_edges.add((i, j))
            retained_edges_with_p[(i, j)] = max_p

    if isinstance(alpha, Iterable):
        retained_edges_dict = {alpha_: retained_edges}
        deleted_edges_dict = {alpha_: deleted_edges}
        for a in set(alpha) - {alpha_}:
            # if edge was deleted for highest alpha, it would have been deleted for lower alphas.
            deleted_edges_dict[a] = deleted_edges.copy()
            retained_edges_dict[a] = set()
            for (i, j), p in retained_edges_with_p.items():
                if p > a:
                    deleted_edges_dict[a].add((i, j))
                else:
                    retained_edges_dict[a].add((i, j))
        printv("Retained edges: % s" %
               {k: sorted(r)
                for k, r in retained_edges_dict.items()})
        return retained_edges_dict, deleted_edges_dict
    else:
        printv("Retained edges: % s" % sorted(retained_edges))
        return retained_edges, retained_edges_with_p, deleted_edges, deleted_edges_with_p
Ejemplo n.º 19
0
                                coef0=2.0,
                                random_state=RANDOM_SEED)),
        classification_binary(
            light_clf.KernelSVC(kernel="sigmoid", random_state=RANDOM_SEED)),
        classification_binary(
            light_clf.KernelSVC(kernel="cosine", random_state=RANDOM_SEED)),

        # Sklearn Linear Regression
        regression(linear_model.ARDRegression()),
        regression(linear_model.BayesianRidge()),
        regression(linear_model.ElasticNet(random_state=RANDOM_SEED)),
        regression(linear_model.ElasticNetCV(random_state=RANDOM_SEED)),
        regression(linear_model.HuberRegressor()),
        regression(linear_model.Lars()),
        regression(linear_model.LarsCV()),
        regression(linear_model.Lasso(random_state=RANDOM_SEED)),
        regression(linear_model.LassoCV(random_state=RANDOM_SEED)),
        regression(linear_model.LassoLars()),
        regression(linear_model.LassoLarsCV()),
        regression(linear_model.LassoLarsIC()),
        regression(linear_model.LinearRegression()),
        regression(linear_model.OrthogonalMatchingPursuit()),
        regression(linear_model.OrthogonalMatchingPursuitCV()),
        regression(
            linear_model.PassiveAggressiveRegressor(random_state=RANDOM_SEED)),
        regression(
            linear_model.RANSACRegressor(
                base_estimator=tree.ExtraTreeRegressor(**TREE_PARAMS),
                random_state=RANDOM_SEED)),
        regression(linear_model.Ridge(random_state=RANDOM_SEED)),
        regression(linear_model.RidgeCV()),
Ejemplo n.º 20
0
test_data = pd.read_csv('testing_data_set.csv')
train_data = pd.read_csv('training_data_set.csv')

# In[16]:

x_train = train_data['Father'].values.reshape(-1, 1)
y_train = train_data['Son'].values.reshape(-1, 1)
x_test = test_data['Father'].values.reshape(-1, 1)
y_test = test_data['Son'].values.reshape(-1, 1)

# In[17]:

poly = PolynomialFeatures(degree=10)
X_modified_train = poly.fit_transform(x_train)
X_modified_test = poly.fit_transform(x_test)
model1 = linear_model.Lasso(alpha=0.5)
model1.fit(X_modified_train, y_train)
y_predicted_test = model1.predict(X_modified_test)
y_predicted_train = model1.predict(X_modified_train)
a = sqrt(mean_squared_error(y_train, y_predicted_train))
b = sqrt(mean_squared_error(y_test, y_predicted_test))
print(a)
print(b)

# In[18]:

train_err = []
test_err = []
alpha_vals = np.linspace(0, 1, 9)
for alpha_v in alpha_vals:
    polyreg = linear_model.Lasso(alpha=alpha_v)
### Make Train and Test from the Data

msk = np.random.rand(len(convertedX)) < 0.8
train = convertedX[msk]
test = convertedX[~msk]

train_y = train.MarketShare_total
train = train.drop('MarketShare_total', axis=1)

test_y = test.MarketShare_total
test = test.drop('MarketShare_total', axis=1)

### Feature Selection and Transform the Data

clf = linear_model.Lasso(alpha=0.1).fit(train, train_y)
model = SelectFromModel(clf, prefit=True)
X_new = model.transform(train)
print(X_new.shape)

test_new = model.transform(test)

### Make the Dense Network Model and Evaluation

model = baseline_model(train)
# fit model
history = model.fit(train, train_y, batch_size=100, validation_data=(test, test_y), epochs=100, verbose=1)

# evaluate the model
train_mse = model.evaluate(train, train_y, verbose=0)
test_mse = model.evaluate(test, test_y, verbose=0)
Ejemplo n.º 22
0
def fit_regression(P, x, u, rule="LS", retall=False, **kws):
    """
Fit a polynomial chaos expansion using linear regression.

Parameters
----------
P : Poly
    Polynomial chaos expansion with `P.shape=(M,)` and `P.dim=D`.
x : array_like
    Collocation nodes with `x.shape=(D,K)`.
u : array_like
    Model evaluations with `len(u)=K`.
retall : bool
    If True return uhat in addition to R
rule : str
    Regression method used.

    The follwong methods uses scikits-learn as backend.
    See `sklearn.linear_model` for more details.

    Key     Scikit-learn    Description
    ---     ------------    -----------
        Parameters      Description
        ----------      -----------

    "BARD"  ARDRegression   Bayesian ARD Regression
        n_iter=300      Maximum iterations
        tol=1e-3        Optimization tolerance
        alpha_1=1e-6    Gamma scale parameter
        alpha_2=1e-6    Gamma inverse scale parameter
        lambda_1=1e-6   Gamma shape parameter
        lambda_2=1e-6   Gamma inverse scale parameter
        threshold_lambda=1e-4   Upper pruning threshold

    "BR"    BayesianRidge   Bayesian Ridge Regression
        n_iter=300      Maximum iterations
        tol=1e-3        Optimization tolerance
        alpha_1=1e-6    Gamma scale parameter
        alpha_2=1e-6    Gamma inverse scale parameter
        lambda_1=1e-6   Gamma shape parameter
        lambda_2=1e-6   Gamma inverse scale parameter

    "EN"    ElastiNet       Elastic Net
        alpha=1.0       Dampening parameter
        rho             Mixing parameter in [0,1]
        max_iter=300    Maximum iterations
        tol             Optimization tolerance

    "ENC"   ElasticNetCV    EN w/Cross Validation
        rho             Dampening parameter(s)
        eps=1e-3        min(alpha)/max(alpha)
        n_alphas        Number of alphas
        alphas          List of alphas
        max_iter        Maximum iterations
        tol             Optimization tolerance
        cv=3            Cross validation folds

    "LA"    Lars            Least Angle Regression
        n_nonzero_coefs Number of non-zero coefficients
        eps             Cholesky regularization

    "LAC"   LarsCV          LAR w/Cross Validation
        max_iter        Maximum iterations
        cv=5            Cross validation folds
        max_n_alphas    Max points for residuals in cv

    "LAS"   Lasso           Least Absolute Shrinkage and
                            Selection Operator
        alpha=1.0       Dampening parameter
        max_iter        Maximum iterations
        tol             Optimization tolerance

    "LASC"  LassoCV         LAS w/Cross Validation
        eps=1e-3        min(alpha)/max(alpha)
        n_alphas        Number of alphas
        alphas          List of alphas
        max_iter        Maximum iterations
        tol             Optimization tolerance
        cv=3            Cross validation folds

    "LL"    LassoLars       Lasso and Lars model
        max_iter        Maximum iterations
        eps             Cholesky regularization

    "LLC"   LassoLarsCV     LL w/Cross Validation
        max_iter        Maximum iterations
        cv=5            Cross validation folds
        max_n_alphas    Max points for residuals in cv
        eps             Cholesky regularization

    "LLIC"  LassoLarsIC     LL w/AIC or BIC
        criterion       "AIC" or "BIC" criterion
        max_iter        Maximum iterations
        eps             Cholesky regularization

    "OMP"   OrthogonalMatchingPursuit
        n_nonzero_coefs Number of non-zero coefficients
        tol             Max residual norm (instead of non-zero coef)

    Local methods

    Key     Description
    ---     -----------
    "LS"    Ordenary Least Squares

    "T"     Ridge Regression/Tikhonov Regularization
        order           Order of regularization (or custom matrix)
        alpha           Dampning parameter (else estimated from gcv)

    "TC"    T w/Cross Validation
        order           Order of regularization (or custom matrix)
        alpha           Dampning parameter (else estimated from gcv)


Returns
-------
R[, uhat]

R : Poly
    Fitted polynomial with `R.shape=u.shape[1:]` and `R.dim=D`.
uhat : np.ndarray
    The Fourier coefficients in the estimation.

Examples
--------
>>> P = cp.Poly([1, x, y])
>>> x = [[-1,-1,1,1], [-1,1,-1,1]]
>>> u = [0,1,1,2]
>>> print fit_regression(P, x, u)
0.5q1+0.5q0+1.0

    """

    x = np.array(x)
    if len(x.shape) == 1:
        x = x.reshape(1, *x.shape)
    u = np.array(u)

    Q = P(*x).T
    shape = u.shape[1:]
    u = u.reshape(u.shape[0], np.prod(u.shape[1:]))

    rule = rule.upper()

    # Local rules
    if rule == "LS":
        uhat = la.lstsq(Q, u)[0]

    elif rule == "T":
        uhat = rlstsq(Q, u, kws.get("order", 0), kws.get("alpha", None), False)

    elif rule == "TC":
        uhat = rlstsq(Q, u, kws.get("order", 0), kws.get("alpha", None), True)

    else:

        # Scikit-learn wrapper
        try:
            _ = lm
        except:
            raise NotImplementedError("sklearn not installed")

        if rule == "BARD":
            solver = lm.ARDRegression(fit_intercept=False, copy_X=False, **kws)

        elif rule == "BR":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.BayesianRidge(**kws)

        elif rule == "EN":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.ElasticNet(**kws)

        elif rule == "ENC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.ElasticNetCV(**kws)

        elif rule == "LA":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.Lars(**kws)

        elif rule == "LAC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.LarsCV(**kws)

        elif rule == "LAS":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.Lasso(**kws)

        elif rule == "LASC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.LassoCV(**kws)

        elif rule == "LL":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.LassoLars(**kws)

        elif rule == "LLC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.LassoLarsCV(**kws)

        elif rule == "LLIC":
            kws["fit_intercept"] = kws.get("fit_intercept", False)
            solver = lm.LassoLarsIC(**kws)

        elif rule == "OMP":
            solver = lm.OrthogonalMatchingPursuit(**kws)

        uhat = solver.fit(Q, u).coef_

    u = u.reshape(u.shape[0], *shape)

    R = po.sum((P * uhat.T), -1)
    R = po.reshape(R, shape)

    if retall == 1:
        return R, uhat
    elif retall == 2:
        return R, uhat, Q
    return R
Ejemplo n.º 23
0
from sklearn import metrics


def plot(y_pred, y_test):
    plt.plot(y_test, y_pred, '.')  #actual values of x and y
    plt.title('LASSO Least Angled Regression')
    plt.xlabel('Y_test')
    plt.ylabel('Y_pred')
    plt.show()


path = '/scratch/Trainee_DATA/Juhi/cyclone_files/'
aa3 = np.loadtxt(path + 'mix_training_cyclone_set.txt', unpack=True)
data3 = aa3.T
x_train = np.squeeze(data3[:, 4:])
y_train = np.squeeze(data3[:, 3])
aa4 = np.loadtxt(path + 'mix_testing_cyclone_set.txt', unpack=True)
data4 = aa4.T
x_test = np.squeeze(data4[:, 4:])
y_test = np.squeeze(data4[:, 3])

reg = linear_model.Lasso(alpha=.5)
reg.fit(x_train, y_train)
y_pred = reg.predict(x_test)

print(metrics.mean_absolute_error(y_test, y_pred))
print(metrics.mean_squared_error(y_test, y_pred))
print(np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

plot(y_pred, y_test)
                                   getCategoricalColumns(all_houses))
all_houses_onehot.info()

train_updtd, test_updtd = fileSplit(all_houses_onehot, train.shape[0])

y_train = train_updtd['SalePrice']
filterFeatures(train_updtd, ['SalePrice', 'log_sale_price'])
X_train = train_updtd
X_train.info()


def rmse(y_orig, y_pred):
    return math.sqrt(metrics.mean_squared_error(y_orig, y_pred))


lasso_estimator = linear_model.Lasso(random_state=2017)
lasso_grid = {'alpha': [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]}
grid_lasso_estimator = model_selection.GridSearchCV(
    lasso_estimator, lasso_grid, scoring=metrics.make_scorer(rmse), cv=10)
grid_lasso_estimator.fit(X_train, y_train)
print(grid_lasso_estimator.grid_scores_)
print(grid_lasso_estimator.best_params_)
print(grid_lasso_estimator.best_score_)
print(grid_lasso_estimator.score(X_train, y_train))
estimator = grid_lasso_estimator.best_estimator_
estimator.coef_
estimator.intercept_

##################Final Prections Preparation

#total_missing_test = test_updtd.isnull().sum()
Ejemplo n.º 25
0
coeff_names_w = weather_generation_dep.columns.values
y_pos = np.arange(len(coeff_names_w))
fig = plt.figure()
plt.barh(coeff_names_w, scores, alpha=0.5)
plt.xlabel('R-squared value')
plt.title('Coeff of Determination')
plt.show()

# Lasso Regression
# Looking at magnitude of coefficients as a measure of predictive power
clf = linear_model.Lasso(alpha=0.1,
                         copy_X=True,
                         fit_intercept=True,
                         max_iter=100,
                         normalize=True,
                         positive=False,
                         random_state=None,
                         selection='cyclic',
                         tol=0.0001,
                         warm_start=False)

clf.fit(weather_generation_dep, gen_sum_target)

# plotting coefficient magnitudes
coeff_names = weather_generation_dep.columns.values
y_pos = np.arange(len(coeff_names))

fig = plt.figure()
plt.barh(coeff_names, clf.coef_, align='center', alpha=0.5)
plt.xlabel('coeff magnitude')
plt.title('LASSO : Predicting generation from weather coeff magnitudes')
Ejemplo n.º 26
0
def LASSO(A, y, w): # scikit learn
	lasso = lm.Lasso(alpha = w)
	lasso.fit(A, y)
	return lasso.coef_
Ejemplo n.º 27
0
# TODO: Add import statements
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn import linear_model

# Assign the data to predictor and outcome variables
# TODO: Load the data
train_data = pd.read_csv('data.csv')
X = train_data.iloc[:, :-1]
y = train_data.iloc[:, -1]

# TODO: Create the standardization scaling object.
scaler = StandardScaler()

# TODO: Fit the standardization parameters and scale the data.
X_scaled = scaler.fit_transform(X)

# TODO: Create the linear regression model with lasso regularization.
lasso_reg = linear_model.Lasso()

# TODO: Fit the model.
lasso_reg.fit(X_scaled, y)

# TODO: Retrieve and print out the coefficients from the regression model.
reg_coef = lasso_reg.coef_
print(reg_coef)
Ejemplo n.º 28
0
#new array with reduced number of features to store the small size images
sX = np.empty((0, 484), int)

ss = 42000

#Perform convolve on all images
for img in X[0:ss, :]:
    img2D = np.reshape(img, (28, 28))
    nImg = convolve2D(img2D, filter)
    nImg1D = np.reshape(nImg, (-1, 484))
    sX = np.append(sX, nImg1D, axis=0)

Y = Y.to_numpy()
sY = Y[0:ss]

# train and test model
sXTrain, sXTest, yTrain, yTest = train_test_split(sX,
                                                  sY,
                                                  test_size=0.2,
                                                  random_state=0)
clf = linear_model.Lasso(alpha=1)
clf.fit(sXTrain, yTrain)

#Printing our score and creating predictions
print(clf.score(sXTest, yTest))
prediction = clf.predict(sXTest)

#Reading our sample submission file and upadting it
submissionFile = pd.read_csv('sample_submission.csv')
submissionFile['Label'] = prediction
submissionFile.to_csv('Linear7x7.csv', index=False)
Ejemplo n.º 29
0
def k_fold_x_validation(x, y, k, mode, *param):

    idx_shuffle = np.random.permutation(np.size(x, 0))
    if len(x.shape) == 2:
        x = x[idx_shuffle, :]
    else:
        x = x[idx_shuffle]
    y = y[idx_shuffle]

    test_size = int(np.size(x, 0) / k)
    test_MSE = np.zeros((k))
    for ii in range(k):
        test_x = x[ii * test_size:(ii + 1) * test_size]
        test_y = y[ii * test_size:(ii + 1) * test_size]

        train_x = np.delete(x, np.arange(ii * test_size, (ii + 1) * test_size),
                            0)
        train_y = np.delete(y, np.arange(ii * test_size, (ii + 1) * test_size))

        if mode == 'Linear':
            if param:
                alpha = param[0]
            else:
                alpha = 0.01
            beta, train_MSE, test_MSE[ii], y_etim = linear_regression(
                train_x, train_y, alpha, test_x, test_y)
        elif mode == 'RandomForest':
            if param:
                depth = param[0]
            else:
                depth = 4

            regressor = RandomForestRegressor(n_estimators=20, max_depth=depth)
            regressor.fit(train_x, train_y)
            test_y_estim = regressor.predict(test_x)
            test_MSE[ii] = np.sqrt(np.mean((test_y - test_y_estim)**2))
        elif mode == 'NeuralNetwork':
            if param:
                h_layer_sizes = param

            else:
                h_layer_sizes = (30)
            neural_net = MLPRegressor(hidden_layer_sizes=h_layer_sizes,
                                      solver='sgd')
            neural_net.fit(train_x, train_y)
            test_y_estim = neural_net.predict(test_x)
            test_MSE[ii] = np.sqrt(np.mean((test_y - test_y_estim)**2))
        elif mode == 'Polynomial':
            if param:
                deg = param[0]
            else:
                deg = 2

            poly = PolynomialFeatures(degree=deg)
            train_x_poly = poly.fit_transform(train_x)
            test_x_poly = poly.fit_transform(test_x)
            regressor = linear_model.Ridge(alpha=0.01)
            regressor.fit(train_x_poly, train_y)
            test_y_estim = regressor.predict(test_x_poly)
            test_MSE[ii] = np.sqrt(np.mean((test_y - test_y_estim)**2))
        elif mode == 'Lasso':
            if param:
                a = param[0]
            else:
                a = 0.1
            regressor = linear_model.Lasso(alpha=a)
            regressor.fit(train_x, train_y)
            test_y_estim = regressor.predict(test_x)
            test_MSE[ii] = np.sqrt(np.mean((test_y - test_y_estim)**2))
        elif mode == 'Ridge':
            if param:
                a = param[0]

            else:
                a = 0.1
            regressor = linear_model.Ridge(alpha=a)

            regressor.fit(train_x, train_y)
            test_y_estim = regressor.predict(test_x)
            test_MSE[ii] = np.sqrt(np.mean((test_y - test_y_estim)**2))

        else:
            raise ValueError('Regression mode used is noot defined!')

    if mode == 'Linear':
        beta, train_MSE, y_estim = linear_regression(x, y, alpha)

        return beta, train_MSE, test_MSE, y_estim

    elif mode == 'RandomForest':
        regressor = RandomForestRegressor(n_estimators=20, max_depth=depth)
        regressor.fit(x, y)
        y_estim = regressor.predict(x)
        train_MSE = np.sqrt(np.mean((y - y_estim)**2))

        plt.figure()
        plt.scatter(y_estim, y - y_estim, alpha=1, marker='o')
        plt.xlabel('Fitted Value', fontsize=30)
        plt.ylabel('Residual', fontsize=30)
        plt.xticks(fontsize=30)
        plt.yticks(fontsize=30)
        plt.figure()
        plt.scatter(y, y_estim, alpha=1, marker='o')
        plt.xlabel('Data', fontsize=30)
        plt.ylabel('Fitted Value', fontsize=30)
        plt.xticks(fontsize=30)
        plt.yticks(fontsize=30)

        return regressor, train_MSE, test_MSE, y_estim

    elif mode == 'NeuralNetwork':
        neural_net = MLPRegressor(hidden_layer_sizes=h_layer_sizes,
                                  solver='sgd')
        neural_net.fit(x, y)
        y_estim = neural_net.predict(x)
        train_MSE = np.sqrt(np.mean((y - y_estim)**2))

        return neural_net, train_MSE, test_MSE, y_estim
    elif mode == 'Polynomial':
        poly = PolynomialFeatures(degree=deg)
        x_poly = poly.fit_transform(x)
        regressor = linear_model.Ridge(alpha=0.01)
        regressor.fit(x_poly, y)
        y_estim = regressor.predict(x_poly)
        train_MSE = np.sqrt(np.mean((y - y_estim)**2))
        return regressor, train_MSE, test_MSE, y_estim

    elif mode == 'Lasso':
        regressor = linear_model.Lasso(alpha=a)
        regressor.fit(x, y)
        y_estim = regressor.predict(x)
        train_MSE = np.sqrt(np.mean((y - y_estim)**2))

        plt.figure()
        plt.scatter(y_estim, y - y_estim, alpha=1, marker='o')
        plt.xlabel('Fitted Value', fontsize=30)
        plt.ylabel('Residual', fontsize=30)
        plt.xticks(fontsize=30)
        plt.yticks(fontsize=30)
        plt.figure()
        plt.scatter(y, y_estim, alpha=1, marker='o')
        plt.xlabel('Data', fontsize=30)
        plt.ylabel('Fitted Value', fontsize=30)
        plt.xticks(fontsize=30)
        plt.yticks(fontsize=30)

        return regressor.coef_, train_MSE, test_MSE, y_estim
    elif mode == 'Ridge':
        regressor = linear_model.Ridge(alpha=a)
        regressor.fit(x, y)
        y_estim = regressor.predict(x)
        train_MSE = np.sqrt(np.mean((y - y_estim)**2))

        plt.figure()
        plt.scatter(y_estim, y - y_estim, alpha=1, marker='o')
        plt.xlabel('Fitted Value', fontsize=30)
        plt.ylabel('Residual', fontsize=30)
        plt.xticks(fontsize=30)
        plt.yticks(fontsize=30)
        plt.figure()
        plt.scatter(y, y_estim, alpha=1, marker='o')
        plt.xlabel('Data', fontsize=30)
        plt.ylabel('Fitted Value', fontsize=30)
        plt.xticks(fontsize=30)
        plt.yticks(fontsize=30)

        return regressor.coef_, train_MSE, test_MSE, y_estim
Ejemplo n.º 30
0
#feature_model = SelectFromModel(llas, prefit=True)
#F = feature_model.transform(Features)

# Use principal component analysis for best feature selection

random.seed(10)  # set random starting point
pca = PCA(n_components=num_best_features)
pcam = pca.fit(Features, y)
F = pcam.transform(Features)

plt.figure(1)
plt.plot(np.cumsum(pcam.explained_variance_ratio_))
plt.xlabel('Principal Component')
plt.ylabel('Cumulative Explained Variance')

llas = linear_model.Lasso(alpha=0.1).fit(F, y)
feature_model = SelectFromModel(llas, prefit=True)
F = feature_model.transform(F)

# split F and y into training and testing sets

F_train, F_test, y_train, y_test = train_test_split(
    F, y, test_size=test_size)  #use non random data splitting
## Run best features on the Machine learning classifier model ##
x_train = F_train  #X_new_train
y_train = y_train

x_test = F_test  #X_new_test
y_test = y_test

#Define model: uncomment the model of interest below