def lasso_reg(): traindata = pd.read_csv("train.csv") testdata = pd.read_csv("test.csv") x_train = traindata['Father'].values.reshape(-1, 1) y_train = traindata['Son'].values.reshape(-1, 1) x_test = testdata['Father'].values.reshape(-1, 1) y_test = testdata['Son'].values.reshape(-1, 1) polyreg = PolynomialFeatures(degree=10) x_modified_train = polyreg.fit_transform(x_train) x_modified_test = polyreg.fit_transform(x_test) model = linear_model.Lasso(alpha=0.5) model.fit(x_modified_train, y_train) y_predicted_test = model.predict(x_modified_test) y_predicted_train = model.predict(x_modified_train) print('Train RMSE for Lasso with Polynoial Degree 10:', sqrt(mean_squared_error(y_train, y_predicted_train))) print('Test RMSE for Lasso with Polynoial Degree 10::', sqrt(mean_squared_error(y_test, y_predicted_test))) train_err = [] test_err = [] alpha_vals = np.linspace(0, 1, 20) for alpha_v in alpha_vals: #print(alpha_v) polyreg = linear_model.Lasso(alpha=alpha_v) polyreg.fit(x_modified_train, y_train) train_err.append( sqrt(mean_squared_error(y_train, polyreg.predict(x_modified_train)))) test_err.append( sqrt(mean_squared_error(y_test, polyreg.predict(x_modified_test)))) #print(test_err) min_Test_error = min(test_err) Lambda_min_test_error = (test_err.index(min(test_err)) + 1) / 20 print( f"""Best lambda value is {Lambda_min_test_error} as it has the lowest test error of all lambadas: {min_Test_error: 3.5f}""" ) #Caculate RSME without Lasso for ploynoial of degree=10 model7 = linear_model.LinearRegression() model7.fit(x_modified_train, y_train) y_test7 = model7.predict(x_modified_test) linear_rmse = sqrt(mean_squared_error(y_test, y_test7)) print('Test RMSE with Normal linear Regression is :', linear_rmse) print( 'Impoverment in model by using lasso(lambda=1) over normal Linear regression for ploynomial degree of 10 is : ', min_Test_error - linear_rmse) plt.title('Lasso') plt.xlabel('Alpha value') plt.ylabel('RMSE') plt.plot(np.linspace(0, 1, 20), train_err, 'bo-', label='Train') plt.plot(np.linspace(0, 1, 20), test_err, 'ro-', label='Test') plt.legend() plt.show()
x_modified_train = polyreg.fit_transform(x_train) x_modified_test = polyreg.fit_transform(x_test) model = linear_model.Lasso(alpha=0.5) model.fit(x_modified_train, y_train) y_predicted_test = model.predict(x_modified_test) y_predicted_train = model.predict(x_modified_train) # print RMSE train and test value print('RMSE Train:', sqrt(mean_squared_error(y_train, y_predicted_train))) print('RMSE Test:', sqrt(mean_squared_error(y_test, y_predicted_test))) train_err = [] test_err = [] alpha_vals = np.linspace(0, 1, 9) for alpha_v in alpha_vals: polyreg = linear_model.Lasso(alpha=alpha_v) polyreg.fit(x_train, y_train) train_err.append( sqrt(mean_squared_error(y_train, polyreg.predict(x_train)))) test_err.append(sqrt(mean_squared_error(y_test, polyreg.predict(x_test)))) # Plot Lasso Graph plt.title('Lasso') plt.xlabel('Alpha value') plt.ylabel('RMSE') plt.plot(np.linspace(0, 1, 9), train_err, 'bo-', label='Train') plt.plot(np.linspace(0, 1, 9), test_err, 'ro-', label='Test') plt.legend() plt.show() # In[ ]:
poly = PolynomialFeatures(degree=10) x_train1 = poly.fit_transform(x_train) x_test1 = poly.fit_transform(x_test) # Typical polynomial regression model # # In[47]: poly=LinearRegression(normalize=True) poly.fit(x_train1,y_train) y_pred_train_poly=poly.predict(x_train1) y_pred_test_poly=poly.predict(x_test1) MSE_train_poly=metrics.mean_squared_error(y_pred_train_poly,y_train) MSE_test_poly=metrics.mean_squared_error(y_pred_test_poly,y_test) RMSE_tr_poly=math.sqrt(MSE_train_poly) RMSE_tt_poly=math.sqrt(MSE_test_poly) print("The RMSE for training with polynomial of degree 10 is :",RMSE_tr_poly ) print("The RMSE for testing with polynomial of degree 10 is :",RMSE_tt_poly ) # Lasso Model with default value of regularisation parameter alpha # In[48]:
## POLYNOMINAL # Create linear regression object poly = linear_model.LinearRegression(normalize=True) # Train the model using the training sets X_train_no_intercept = X_train X_train = X_train.reshape(-1, X_train.shape[1]) poly.fit(X_train, y_train) # The intercept print('Intercept: \n', poly.intercept_) # The coefficients print('Coefficients: \n', poly.coef_) # The mean square error print("Residual sum of squares, training data: %.2f" % np.mean((poly.predict(X_train) - y_train) ** 2)) print("Residual sum of squares, test data: %.2f" % np.mean((poly.predict(X_test) - y_test) ** 2)) var_to_graph['multReg_poly'] = np.mean((poly.predict(X_test) - y_test) ** 2) # Explained variance score: 1 is perfect prediction print('Variance score, training data: %.2f' % poly.score(X_train, y_train)) #vector of prediction error print('Distribution of prediction error on training data:') predError = poly.predict(X_train) - y_train plt.hist(predError) plt.show() print('Distribution of prediction error on test data:') predError = poly.predict(X_test) - y_test plt.hist(predError) plt.show()
y_pred = Linear_regressor.predict(X_test) print('Linear Regression') r2_score = r2_score(y_test, y_pred, sample_weight=None) print(r2_score) mean_absolute_error = mean_absolute_error(y_test, y_pred, sample_weight=None) print(mean_absolute_error) # Fitting polynomial regression on the dataset from sklearn.preprocessing import PolynomialFeatures poly_reg = PolynomialFeatures(degree = 4) poly_reg.fit(X_train,y_train) # Predicting the test results y_pred = poly_reg.predict(X_test) print('Polynomial Regression') r2_score = r2_score(y_test, y_pred, sample_weight=None) print(r2_score) mean_absolute_error = mean_absolute_error(y_test, y_pred, sample_weight=None) print(mean_absolute_error) # Fitting SVM on the dataset from sklearn.svm import SVR SVR_regressor = SVR(kernel = 'rbf') SVR_regressor.fit(X, y) # Predicting the test results y_pred = SVR_regressor.predict(X_test)
print("mse of lr", mean_squared_error(y, y_lr_pred)) model_rsq_dic['lr'] = (lr.score(lstat_x, y), mean_squared_error(y, y_lr_pred)) plt.scatter(lstat_x, y, s=10) plt.plot(lstat_x, y_lr_pred, color='red', label='Linear Regression - rsq: 0.544') poly = PolynomialFeatures(degree=2) lstat_x_transformed = poly.fit_transform(lstat_x) poly = LinearRegression() poly.fit(lstat_x_transformed, y) print("poly equation", poly.coef_, " intercept ", poly.intercept_) y_poly_pred = poly.predict(lstat_x_transformed) print("r^2 of poly", poly.score(lstat_x_transformed, y)) print("mse of poly", mean_squared_error(y, y_poly_pred)) model_rsq_dic['poly'] = (poly.score(lstat_x_transformed, y), mean_squared_error(y, y_poly_pred)) #REALLY IMPT to sort the values of x before line plot sort_axis = operator.itemgetter(0) sorted_zip = sorted(zip(lstat_x, y_poly_pred), key=sort_axis) x, y_poly_pred = zip(*sorted_zip) plt.plot(x, y_poly_pred, color='green', label='Poly Regression - rsq: 0.641') mars = Earth() mars.fit( lstat_x,
# Load the library required for feature engineering from sklearn.preprocessing import PolynomialFeatures # Extract the predictor from the dataframe df X = df.iloc[:, 0:1].values # Calculate the MSE with a polynomial with varying degrees degrees = [2, 3, 4, 5, 6, 7, 8, 9] mse = [] for degree in degrees: poly = PolynomialFeatures(degree, include_bias=False) X_poly = poly.fit_transform(X) X_poly_feature_name = poly.get_feature_names( ['Feature' + str(l) for l in range(1, 6)]) df_poly = pd.DataFrame(X_poly, columns=X_poly_feature_name) df_poly['y'] = df['Y'] X_train = df_poly.drop('y', axis=1) y_train = df_poly['y'] poly = LinearRegression(normalize=True) model_poly = poly.fit(X_train, y_train) y_poly = poly.predict(X_train) mse.append(mean_squared_error(y_poly, y_train)) # Analyze the MSE with a polynomial with varying degrees plt.figure(figsize=(12, 8)) plt.xlabel("Degrees", fontsize=20) plt.ylabel("Mean-squared Eror", fontsize=20) plt.grid(1) plt.scatter(degrees, mse, edgecolors=(0, 0, 0), lw=2, s=80) plt.plot(degrees, mse, 'k--', lw=2)
x_train = dataset['Father'].values.reshape(-1, 1) y_train = dataset['Son'].values.reshape(-1, 1) x_test = dataset_test['Father'].values.reshape(-1, 1) y_test = dataset_test['Son'].values.reshape(-1, 1) poly_reg = PolynomialFeatures(degree=10) modified_x_train = poly_reg.fit_transform(x_train) modified_x_test = poly_reg.fit_transform(x_test) alpla_vals = np.linspace(0, 1, 10) train_err = [] test_err = [] for a in alpla_vals: poly_reg = Lasso(alpha=a, max_iter=1e7, tol=.001) poly_reg.fit(modified_x_train, y_train) y_train_predict = poly_reg.predict(modified_x_train) y_test_predict = poly_reg.predict(modified_x_test) train_err.append(math.sqrt(mean_squared_error(y_train, y_train_predict))) test_err.append(math.sqrt(mean_squared_error(y_test, y_test_predict))) print("Lasso Train RMSE: ", math.sqrt(mean_squared_error(y_train, y_train_predict))) print("Lasso Test RMSE: ", math.sqrt(mean_squared_error(y_test, y_test_predict))) plt.xlabel('Alpha') plt.ylabel('RMSE') plt.title('Lasso') plt.plot(np.linspace(0, 1, 10), train_err, 'bo-', label='Train') plt.plot(np.linspace(0, 1, 10), test_err, 'ro-', label='Test') plt.legend() plt.show()
x_modified_test = polyreg.fit_transform(x_test) model = Lasso(alpha=0.5) model.fit(x_modified_train, y_train) y_predicted_test = model.predict(x_modified_test) y_predicted_train = model.predict(x_modified_train) print('RMSE Train:', math.sqrt(mean_squared_error(y_train, y_predicted_train))) print('RMSE Test:', math.sqrt(mean_squared_error(y_test, y_predicted_test))) # In[10]: #Lasso train_err = [] test_err = [] alpha_vals = np.linspace(0, 1, 9) for alpha_v in alpha_vals: polyreg = Lasso(alpha=alpha_v) polyreg.fit(x_train, y_train) train_err.append( math.sqrt(mean_squared_error(y_train, polyreg.predict(x_train)))) test_err.append( math.sqrt(mean_squared_error(y_test, polyreg.predict(x_test)))) plt.title('Lasso') plt.xlabel('Alpha value') plt.ylabel('RMSE') plt.plot(np.linspace(0, 1, 9), train_err, 'bo-', label='Train') plt.plot(np.linspace(0, 1, 9), test_err, 'ro-', label='Test') plt.legend() plt.show() # In[ ]:
from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures reg = PolynomialFeatures(degree=4) x_poly = reg.fit_transform(x_train) reg2 = LinearRegression() reg2.fit(x_poly, y_train) y_pred = reg2.predict(x_poly) plt.scatter(x_train[:, 0], y_train, color='red') plt.plot(np.sort(x_test[:, 0]), np.sort(y_pred), color='blue') plt.title("railways with poly reg") plt.show() #linear regression reg = LinearRegression() reg.fit(x_train, y_train) y_pred = reg.predict(x_test) plt.scatter(x_train[:, 3], y_train, color='red') plt.plot(np.sort(x_test[:, 3]), np.sort(y_pred), color='blue') plt.title("railways with linear reg") plt.show() print('The accuracy of the linear reg is {:.2f} out of 1 on training data'. format(reg.score(x_train, y_train))) print('The accuracy of the linear reg is {:.2f} out of 1 on test data'.format( reg.score(x_test, y_test))) #reg.coef_ #reg.intercept_ from sklearn.metrics import mean_squared_error rmse = np.sqrt(mean_squared_error(y_test, y_pred)) print(rmse) #KNN from sklearn.neighbors import KNeighborsClassifier
# 5 polynomial transformation on the feature 'horse power'. poly = PolynomialFeatures(degree=5) x_train_2, x_test_2, y_train_2, y_test_2 = train_test_split(X, Y, test_size=0.45, random_state=0) x_train_fit = poly.fit_transform(x_train_2[["horsepower"]]) x_test_fit = poly.fit_transform((x_test_2[["horsepower"]])) # Create a linear regression model "poly" and train it poly = LinearRegression() poly.fit(x_train_fit, y_train_2) # Get predicted values of price column Yhat = poly.predict(x_test_fit) print("Predicted values from polynomial regression :\n", Yhat[0:5]) # Take the first five predicted values and compare it to the actual targets print("Actual values :\n", y_test_2[0:5]) # Get R^2 of training and test data R_sq_train_poly = poly.score(x_train_fit, y_train_2) R_sq_test_poly = poly.score(x_test_fit, y_test_2) print("Polynomial Regression R^2 values ", R_sq_train_poly, " and ", R_sq_test_poly) # How the R^2 changes on the test data for different order polynomials and plot the results orders = [1, 2, 3, 4, 5, 6] R_sq = [] X_train, X_test, Y_train, Y_test = train_test_split(X,