Exemple #1
0
def lasso_reg():
    traindata = pd.read_csv("train.csv")
    testdata = pd.read_csv("test.csv")
    x_train = traindata['Father'].values.reshape(-1, 1)
    y_train = traindata['Son'].values.reshape(-1, 1)
    x_test = testdata['Father'].values.reshape(-1, 1)
    y_test = testdata['Son'].values.reshape(-1, 1)

    polyreg = PolynomialFeatures(degree=10)
    x_modified_train = polyreg.fit_transform(x_train)
    x_modified_test = polyreg.fit_transform(x_test)
    model = linear_model.Lasso(alpha=0.5)
    model.fit(x_modified_train, y_train)
    y_predicted_test = model.predict(x_modified_test)
    y_predicted_train = model.predict(x_modified_train)
    print('Train RMSE for Lasso with Polynoial Degree 10:',
          sqrt(mean_squared_error(y_train, y_predicted_train)))
    print('Test RMSE for Lasso with Polynoial Degree 10::',
          sqrt(mean_squared_error(y_test, y_predicted_test)))
    train_err = []
    test_err = []
    alpha_vals = np.linspace(0, 1, 20)
    for alpha_v in alpha_vals:
        #print(alpha_v)
        polyreg = linear_model.Lasso(alpha=alpha_v)
        polyreg.fit(x_modified_train, y_train)
        train_err.append(
            sqrt(mean_squared_error(y_train,
                                    polyreg.predict(x_modified_train))))
        test_err.append(
            sqrt(mean_squared_error(y_test, polyreg.predict(x_modified_test))))

    #print(test_err)
    min_Test_error = min(test_err)
    Lambda_min_test_error = (test_err.index(min(test_err)) + 1) / 20
    print(
        f"""Best lambda value is {Lambda_min_test_error} as it has the lowest test error of all lambadas: {min_Test_error: 3.5f}"""
    )
    #Caculate RSME without Lasso for ploynoial of degree=10
    model7 = linear_model.LinearRegression()
    model7.fit(x_modified_train, y_train)
    y_test7 = model7.predict(x_modified_test)
    linear_rmse = sqrt(mean_squared_error(y_test, y_test7))
    print('Test RMSE  with Normal linear Regression is :', linear_rmse)
    print(
        'Impoverment in model by using lasso(lambda=1) over normal Linear regression for ploynomial degree of 10 is : ',
        min_Test_error - linear_rmse)
    plt.title('Lasso')
    plt.xlabel('Alpha value')
    plt.ylabel('RMSE')
    plt.plot(np.linspace(0, 1, 20), train_err, 'bo-', label='Train')
    plt.plot(np.linspace(0, 1, 20), test_err, 'ro-', label='Test')
    plt.legend()
    plt.show()
Exemple #2
0
x_modified_train = polyreg.fit_transform(x_train)
x_modified_test = polyreg.fit_transform(x_test)
model = linear_model.Lasso(alpha=0.5)
model.fit(x_modified_train, y_train)
y_predicted_test = model.predict(x_modified_test)
y_predicted_train = model.predict(x_modified_train)

# print RMSE train and test value
print('RMSE Train:', sqrt(mean_squared_error(y_train, y_predicted_train)))
print('RMSE Test:', sqrt(mean_squared_error(y_test, y_predicted_test)))
train_err = []
test_err = []
alpha_vals = np.linspace(0, 1, 9)
for alpha_v in alpha_vals:
    polyreg = linear_model.Lasso(alpha=alpha_v)
    polyreg.fit(x_train, y_train)
    train_err.append(
        sqrt(mean_squared_error(y_train, polyreg.predict(x_train))))
    test_err.append(sqrt(mean_squared_error(y_test, polyreg.predict(x_test))))

# Plot Lasso Graph
plt.title('Lasso')
plt.xlabel('Alpha value')
plt.ylabel('RMSE')
plt.plot(np.linspace(0, 1, 9), train_err, 'bo-', label='Train')
plt.plot(np.linspace(0, 1, 9), test_err, 'ro-', label='Test')
plt.legend()
plt.show()

# In[ ]:
Exemple #3
0

poly = PolynomialFeatures(degree=10)
x_train1 = poly.fit_transform(x_train)
x_test1 = poly.fit_transform(x_test)


# Typical polynomial regression model
# 

# In[47]:


poly=LinearRegression(normalize=True)
poly.fit(x_train1,y_train)
y_pred_train_poly=poly.predict(x_train1)
y_pred_test_poly=poly.predict(x_test1)
MSE_train_poly=metrics.mean_squared_error(y_pred_train_poly,y_train)
MSE_test_poly=metrics.mean_squared_error(y_pred_test_poly,y_test)
RMSE_tr_poly=math.sqrt(MSE_train_poly)
RMSE_tt_poly=math.sqrt(MSE_test_poly)

print("The RMSE for training with polynomial of degree 10 is :",RMSE_tr_poly )
print("The RMSE for testing with polynomial of degree 10 is :",RMSE_tt_poly )


# Lasso Model with default value of regularisation parameter alpha

# In[48]:

## POLYNOMINAL 
# Create linear regression object
poly = linear_model.LinearRegression(normalize=True)

# Train the model using the training sets
X_train_no_intercept = X_train
X_train = X_train.reshape(-1, X_train.shape[1])
poly.fit(X_train, y_train)

# The intercept
print('Intercept: \n', poly.intercept_)
# The coefficients
print('Coefficients: \n', poly.coef_)
# The mean square error
print("Residual sum of squares, training data: %.2f"
      % np.mean((poly.predict(X_train) - y_train) ** 2))
print("Residual sum of squares, test data: %.2f"
      % np.mean((poly.predict(X_test) - y_test) ** 2))
var_to_graph['multReg_poly'] = np.mean((poly.predict(X_test) - y_test) ** 2)
# Explained variance score: 1 is perfect prediction
print('Variance score, training data: %.2f' % poly.score(X_train, y_train))
#vector of prediction error
print('Distribution of prediction error on training data:')
predError = poly.predict(X_train) - y_train
plt.hist(predError)
plt.show()

print('Distribution of prediction error on test data:')
predError = poly.predict(X_test) - y_test
plt.hist(predError)
plt.show()
y_pred = Linear_regressor.predict(X_test)

print('Linear Regression')
r2_score = r2_score(y_test, y_pred, sample_weight=None)
print(r2_score)

mean_absolute_error = mean_absolute_error(y_test, y_pred, sample_weight=None)
print(mean_absolute_error)

# Fitting polynomial regression on the dataset
from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree = 4)
poly_reg.fit(X_train,y_train)

# Predicting the test results
y_pred = poly_reg.predict(X_test)

print('Polynomial Regression')
r2_score = r2_score(y_test, y_pred, sample_weight=None)
print(r2_score)

mean_absolute_error = mean_absolute_error(y_test, y_pred, sample_weight=None)
print(mean_absolute_error)

# Fitting SVM on the dataset
from sklearn.svm import SVR
SVR_regressor = SVR(kernel = 'rbf') 
SVR_regressor.fit(X, y)

# Predicting the test results
y_pred = SVR_regressor.predict(X_test)
Exemple #6
0
print("mse of lr", mean_squared_error(y, y_lr_pred))

model_rsq_dic['lr'] = (lr.score(lstat_x, y), mean_squared_error(y, y_lr_pred))
plt.scatter(lstat_x, y, s=10)
plt.plot(lstat_x,
         y_lr_pred,
         color='red',
         label='Linear Regression - rsq: 0.544')

poly = PolynomialFeatures(degree=2)
lstat_x_transformed = poly.fit_transform(lstat_x)

poly = LinearRegression()
poly.fit(lstat_x_transformed, y)
print("poly equation", poly.coef_, " intercept ", poly.intercept_)
y_poly_pred = poly.predict(lstat_x_transformed)
print("r^2 of poly", poly.score(lstat_x_transformed, y))
print("mse of poly", mean_squared_error(y, y_poly_pred))
model_rsq_dic['poly'] = (poly.score(lstat_x_transformed,
                                    y), mean_squared_error(y, y_poly_pred))

#REALLY IMPT to sort the values of x before line plot
sort_axis = operator.itemgetter(0)
sorted_zip = sorted(zip(lstat_x, y_poly_pred), key=sort_axis)
x, y_poly_pred = zip(*sorted_zip)

plt.plot(x, y_poly_pred, color='green', label='Poly Regression - rsq: 0.641')

mars = Earth()
mars.fit(
    lstat_x,
Exemple #7
0
# Load the library required for feature engineering
from sklearn.preprocessing import PolynomialFeatures

# Extract the predictor from the dataframe df
X = df.iloc[:, 0:1].values

# Calculate the MSE with a polynomial with varying degrees
degrees = [2, 3, 4, 5, 6, 7, 8, 9]
mse = []
for degree in degrees:
    poly = PolynomialFeatures(degree, include_bias=False)
    X_poly = poly.fit_transform(X)
    X_poly_feature_name = poly.get_feature_names(
        ['Feature' + str(l) for l in range(1, 6)])
    df_poly = pd.DataFrame(X_poly, columns=X_poly_feature_name)
    df_poly['y'] = df['Y']
    X_train = df_poly.drop('y', axis=1)
    y_train = df_poly['y']
    poly = LinearRegression(normalize=True)
    model_poly = poly.fit(X_train, y_train)
    y_poly = poly.predict(X_train)
    mse.append(mean_squared_error(y_poly, y_train))

# Analyze the MSE with a polynomial with varying degrees
plt.figure(figsize=(12, 8))
plt.xlabel("Degrees", fontsize=20)
plt.ylabel("Mean-squared Eror", fontsize=20)
plt.grid(1)
plt.scatter(degrees, mse, edgecolors=(0, 0, 0), lw=2, s=80)
plt.plot(degrees, mse, 'k--', lw=2)
Exemple #8
0
x_train = dataset['Father'].values.reshape(-1, 1)
y_train = dataset['Son'].values.reshape(-1, 1)
x_test = dataset_test['Father'].values.reshape(-1, 1)
y_test = dataset_test['Son'].values.reshape(-1, 1)

poly_reg = PolynomialFeatures(degree=10)
modified_x_train = poly_reg.fit_transform(x_train)
modified_x_test = poly_reg.fit_transform(x_test)
alpla_vals = np.linspace(0, 1, 10)
train_err = []
test_err = []
for a in alpla_vals:
    poly_reg = Lasso(alpha=a, max_iter=1e7, tol=.001)
    poly_reg.fit(modified_x_train, y_train)
    y_train_predict = poly_reg.predict(modified_x_train)
    y_test_predict = poly_reg.predict(modified_x_test)
    train_err.append(math.sqrt(mean_squared_error(y_train, y_train_predict)))
    test_err.append(math.sqrt(mean_squared_error(y_test, y_test_predict)))
    print("Lasso Train RMSE: ",
          math.sqrt(mean_squared_error(y_train, y_train_predict)))
    print("Lasso Test RMSE: ",
          math.sqrt(mean_squared_error(y_test, y_test_predict)))

plt.xlabel('Alpha')
plt.ylabel('RMSE')
plt.title('Lasso')
plt.plot(np.linspace(0, 1, 10), train_err, 'bo-', label='Train')
plt.plot(np.linspace(0, 1, 10), test_err, 'ro-', label='Test')
plt.legend()
plt.show()
x_modified_test = polyreg.fit_transform(x_test)
model = Lasso(alpha=0.5)
model.fit(x_modified_train, y_train)
y_predicted_test = model.predict(x_modified_test)
y_predicted_train = model.predict(x_modified_train)
print('RMSE Train:', math.sqrt(mean_squared_error(y_train, y_predicted_train)))
print('RMSE Test:', math.sqrt(mean_squared_error(y_test, y_predicted_test)))

# In[10]:

#Lasso
train_err = []
test_err = []
alpha_vals = np.linspace(0, 1, 9)
for alpha_v in alpha_vals:
    polyreg = Lasso(alpha=alpha_v)
    polyreg.fit(x_train, y_train)
    train_err.append(
        math.sqrt(mean_squared_error(y_train, polyreg.predict(x_train))))
    test_err.append(
        math.sqrt(mean_squared_error(y_test, polyreg.predict(x_test))))
plt.title('Lasso')
plt.xlabel('Alpha value')
plt.ylabel('RMSE')
plt.plot(np.linspace(0, 1, 9), train_err, 'bo-', label='Train')
plt.plot(np.linspace(0, 1, 9), test_err, 'ro-', label='Test')
plt.legend()
plt.show()

# In[ ]:
Exemple #10
0
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
reg = PolynomialFeatures(degree=4)
x_poly = reg.fit_transform(x_train)
reg2 = LinearRegression()
reg2.fit(x_poly, y_train)
y_pred = reg2.predict(x_poly)
plt.scatter(x_train[:, 0], y_train, color='red')
plt.plot(np.sort(x_test[:, 0]), np.sort(y_pred), color='blue')
plt.title("railways with poly reg")
plt.show()

#linear regression
reg = LinearRegression()
reg.fit(x_train, y_train)
y_pred = reg.predict(x_test)
plt.scatter(x_train[:, 3], y_train, color='red')
plt.plot(np.sort(x_test[:, 3]), np.sort(y_pred), color='blue')
plt.title("railways with linear reg")
plt.show()
print('The accuracy of the linear reg is {:.2f} out of 1 on training data'.
      format(reg.score(x_train, y_train)))
print('The accuracy of the linear reg is {:.2f} out of 1 on test data'.format(
    reg.score(x_test, y_test)))
#reg.coef_
#reg.intercept_
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(rmse)
#KNN
from sklearn.neighbors import KNeighborsClassifier
Exemple #11
0
# 5 polynomial transformation on the feature 'horse power'.
poly = PolynomialFeatures(degree=5)
x_train_2, x_test_2, y_train_2, y_test_2 = train_test_split(X,
                                                            Y,
                                                            test_size=0.45,
                                                            random_state=0)

x_train_fit = poly.fit_transform(x_train_2[["horsepower"]])
x_test_fit = poly.fit_transform((x_test_2[["horsepower"]]))

# Create a linear regression model "poly" and train it
poly = LinearRegression()
poly.fit(x_train_fit, y_train_2)

# Get predicted values of price column
Yhat = poly.predict(x_test_fit)
print("Predicted values from polynomial regression :\n", Yhat[0:5])

# Take the first five predicted values and compare it to the actual targets
print("Actual values :\n", y_test_2[0:5])

# Get R^2 of training and test data
R_sq_train_poly = poly.score(x_train_fit, y_train_2)
R_sq_test_poly = poly.score(x_test_fit, y_test_2)
print("Polynomial Regression R^2 values ", R_sq_train_poly, " and ",
      R_sq_test_poly)

# How the R^2 changes on the test data for different order polynomials and plot the results
orders = [1, 2, 3, 4, 5, 6]
R_sq = []
X_train, X_test, Y_train, Y_test = train_test_split(X,