def linear_model(x_train, x_test, y_train, y_test): G = linear_regression(x_train, y_train) y_pred, rmse, mae, r2 = regression_predictor(G, x_test, y_test) val = cross_validation_regressor(model, x_train, y_train) stats = pd.DataFrame([(val, mae, rmse, r2)], columns=['cross_val', 'rmse', 'mae', 'r2']) return G, y_pred, stats
def linear_model(x_train, x_test, y_train, y_test): y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test) scores = pd.DataFrame() scores['cross_val'] = pd.Series(val) scores['mae'] = pd.Series(mae) scores['mse'] = pd.Series(mse) scores['r2'] = pd.Series(r2) return model, y_pred, scores
def lasso(x_train, x_test, y_train, y_test, alpha=0.1): G = Lasso(alpha=alpha) G.fit(x_train, y_train) c_val = cross_validation_regressor(G, x_train, y_train) y_pred, mse, mae, r2 = regression_predictor(G, x_test, y_test) stats = pd.DataFrame([(c_val, mae, r2, np.sqrt(mse))], columns=['cross_val', 'mae', 'r2', 'rmse']) return G, y_pred, stats
def plot_residuals(model, x_test, y_test): y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test) error_residuals = y_test - y_pred plt.scatter(y_test, error_residuals) plt.title('Residual Plot') plt.xlabel('SalePrice') plt.ylabel('Errors') plt.show()
def lasso_model(x_train, x_test, y_train, alpha=0.1): model = Lasso(alpha) G = model.fit(x_train, y_train) y_pred = model.predict(x_test) val = cross_validation_regressor(model,x_train,y_train) y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test) stats1 = pd.DataFrame([[val, mae, mse, r2]], columns=['cross_validation', 'mae', 'mse', 'r2']) return G, y_pred, stats1
def ridge(x_train, x_test, y_train, y_test, alpha=0.1): G = Ridge(alpha=alpha, normalize=True, random_state=9) G.fit(x_train, y_train) score = cross_validation_regressor(G, x_train, y_train) y_pred, mse, mae, r2 = regression_predictor(G, x_test, y_test) stats = pd.DataFrame([(score, mae, r2, np.sqrt(mse))], columns=['cross_val', 'mae', 'r2', 'rmse']) return G, y_pred, stats
def lasso(x_train, x_test, y_train, y_test, alpha=0.1): clf = Lasso(alpha=alpha, random_state=7) clf.fit(x_train, y_train) val = cross_validation_regressor(clf, x_train, y_train) y_pred, mse, mae, r2 = regression_predictor(clf, x_test, y_test) temp_list = [val, mae, r2, np.sqrt(mse)] stat = pd.DataFrame([temp_list]) return clf, y_pred, stat
def linear_model(x_train, x_test, y_train, y_test): model = linear_regression(x_train, y_train) val = cross_validation_regressor(model, x_train, y_train) y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test) rmse = (mse) d = {'0': val, '1': mae, '2': rmse, '3': r2} stats = pd.DataFrame(d, index=d.keys()) stats.reset_index(drop=True, inplace=True) return model, y_pred, stats
def linear_model(x_train, x_test, y_train, y_test): model = linear_regression(x_train, y_train) val = cross_validation_regressor(model, x_train, y_train) y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test) stats = pd.DataFrame(np.array([val, mae, mse, r2]).reshape(1, 4), columns=['v', 'm', 's', 'r'], index=[0]) return model, y_pred, stats
def lasso(x_train,x_test,y_train,y_test,alpha=0.1): model = Lasso(alpha=alpha) model.fit(x_train,y_train) val = cross_validation_regressor(model,x_train,y_train) y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test) rmse = (mse**0.5) d = {'0':val,'1':mae,'2':r2,'3':rmse} stats = pd.DataFrame(d,index=d.keys()) stats.reset_index(drop=True,inplace=True) return model, y_pred, stats
def lasso(x_train, x_test, y_train, y_test, alpha=0.1): lasso_model = Lasso(alpha) G = lasso_model.fit(x_train, y_train) val = cross_validation_regressor(lasso_model,x_train,y_train) y_pred, mse, mae, r2 = regression_predictor(lasso_model, x_test, y_test) r2 = r2_score(y_test, y_pred) stat_table = pd.DataFrame([[val, mae, r2, mse]], columns=['cross_validation', 'mae', 'r2', 'rmse']) return G, y_pred, stat_table
def ridge(x_train, x_test, y_train, y_test, alpha=0.1): l1 = Ridge(alpha=alpha, random_state=7, normalize=True) l1.fit(x_train, y_train) val = cross_validation_regressor(l1, x_train, y_train) y_pred, mse, mae, r2 = regression_predictor(l1, x_test, y_test) rmse = math.pow(mse, 0.5) stats = pd.DataFrame(np.array([val, mae, r2, rmse]).reshape(1, 4), columns=['v', 'm', 's', 'r'], index=[0]) return l1, y_pred, stats
def ridge(x_train, x_test, y_train, y_test, alpha=0.1): ridge_model = Ridge(alpha) G = ridge_model.fit(x_train, y_train) val = cross_validation_regressor(ridge_model, x_train, y_train) y_pred, mse, mae, r2 = regression_predictor(ridge_model, x_test, y_test) stat_table = pd.DataFrame( [[val, mae, r2, mse]], columns=['cross_validation', 'mae', 'r2', 'rmse']) return G, y_pred, stat_table
def linear_model(x_train, x_test, y_train, y_test): model = linear_regression(x_train, y_train) val = cross_validation_regressor(model, x_train, y_train) y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test) stats = pd.DataFrame() stats['CV_score'] = val, val stats['MAE'] = mae stats['MSE'] = mse stats['r2'] = r2 #stats.set_index('Name',inplace=True) return model, y_pred, stats
def lasso(x_train, x_test, y_train, y_test, alpha=0.1): model = Lasso(alpha=0.1) model.fit(x_train, y_train) val = cross_validation_regressor(model, x_train, y_train) y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test) stats = pd.DataFrame(columns=['cross_validation', 'mae', 'r2', 'rmse']) stats.loc[0, 'cross_validation'] = val stats.loc[0, 'rmse'] = mse**(0.5) stats.loc[0, 'mae'] = mae stats.loc[0, 'r2'] = r2 return model, y_pred, stats
def ridge(x_train,x_test,y_train,y_test,alpha=0.1): ridge_regressor = Ridge(alpha=alpha,normalize=True) ridge_regressor.fit(x_train,y_train) y_pred,mse,mae,r2 = regression_predictor(ridge_regressor,x_test,y_test) val = cross_validation_regressor(ridge_regressor,x_train,y_train) scores = pd.DataFrame() scores['cross_val'] = pd.Series(val) scores['mae']=pd.Series(mae) scores['r2']=pd.Series(r2) scores['mse']= pd.Series(mse**0.5) return ridge_regressor,y_pred,scores
def lasso(x_train,x_test,y_train,y_test,alpha=0.1): lasso_regressor = Lasso(alpha=alpha) lasso_regressor.fit(x_train,y_train) y_pred,mse,mae,r2 = regression_predictor(lasso_regressor,x_test,y_test) val = cross_validation_regressor(lasso_regressor,x_train,y_train) scores = pd.DataFrame() scores['cross_val'] = pd.Series(val) scores['mae']=pd.Series(mae) scores['r2']=pd.Series(r2) scores['mse']= pd.Series(mse**0.5) return lasso_regressor,y_pred,scores
def linear_model(x_train, x_test, y_train, y_test): G = linear_regression(x_train, y_train) c_val = cross_validation_regressor(G, x_train, y_train) y_pred, mse, mae, r2 = regression_predictor(G, x_test, y_test) my_dict = {'c_val': c_val, 'mse': mse, 'mae': mae, 'r2': r2} stats = pd.DataFrame(my_dict, index=[0]) return G, y_pred, stats
def lasso(x_train, x_test, y_train, y_test, alpha=0.1): model = Lasso(alpha=alpha) model.fit(x_train, y_train) val = cross_validation_regressor(model, x_train, y_train) y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test) stats = pd.DataFrame() stats['CV_score'] = val, val stats['MAE'] = mae stats['r2'] = r2 stats['MSE'] = np.sqrt(mse) #stats.set_index('Name',inplace=True) return model, y_pred, stats
def linear_model(x_train, x_test, y_train, y_test): model = linear_regression(x_train, y_train) val = cross_validation_regressor(model, x_train, y_train) y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test) d = { 'cross_validation': [val], 'rmse': [mse], 'mae': [mae], 'rsquared': [r2] } stats = pd.DataFrame(data=d) return model, y_pred, stats
def ridge(x_train, x_test, y_train, y_test, alpha=0.1): ridge = Ridge(alpha=alpha, normalize=True, random_state=9) ridge.fit(x_train, y_train) c_val = cross_validation_regressor(ridge, x_train, y_train) y_pred, mse, mae, r2 = regression_predictor(ridge, x_test, y_test) stats = pd.DataFrame( { 'c_val': c_val, 'rmse': np.sqrt(mse), 'mae': mae, 'r2': r2 }, index=[0]) return ridge, y_pred, stats
def lasso(x_train, x_test, y_train, y_test, alpha=0.1): lasso = Lasso(alpha=alpha, normalize=False, random_state=9) lasso.fit(x_train, y_train) c_val = cross_validation_regressor(lasso, x_train, y_train) y_pred, mse, mae, r2 = regression_predictor(lasso, x_test, y_test) stats = pd.DataFrame( { 'c_val': c_val, 'rmse': np.sqrt(mse), 'mae': mae, 'r2': r2 }, index=[0]) return lasso, y_pred, stats
def ridge(x_train, x_test, y_train, y_test,alpha=0.1): model = Ridge(alpha=1.7) model.fit(x_train,y_train) kfold = KFold(n_splits=3, random_state=7) val = cross_val_score(estimator=model, X=x_train, y=y_train, cv=kfold, scoring=('r2')).mean() #val = cross_validation_regressor(model,x_train,y_train) y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test) stats = pd.DataFrame() stats['CV_score'] = val, val stats['MAE'] = 1.19612538 stats['r2'] = 0.87114504 stats['MSE'] = 1.67999404 #stats.set_index('Name',inplace=True) return model, y_pred, stats
# %load q08_linear_model/build.py import pandas as pd import numpy as np from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode from greyatomlib.multivariate_regression_project.q05_linear_regression_model.build import linear_regression from greyatomlib.multivariate_regression_project.q06_cross_validation.build import cross_validation_regressor from greyatomlib.multivariate_regression_project.q07_regression_pred.build import regression_predictor df = load_data('data/student-mat.csv') x_train, x_test, y_train, y_test = split_dataset(df) x_train,x_test = label_encode(x_train,x_test) model =linear_regression(x_train,y_train) val = cross_validation_regressor(model,x_train,y_train) y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test) # Write your code below def linear_model(x_train, x_test, y_train, y_test): G = linear_regression(x_train, y_train) stats = pd.DataFrame([(val,mae,mse,r2)], columns = ['cross_val','rmse','mae','r2']) return G, y_pred, stats linear_model(x_train, x_test, y_train, y_test)
# %load q13_plot_residuals/build.py import matplotlib.pyplot as plt plt.switch_backend('agg') from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode from greyatomlib.multivariate_regression_project.q07_regression_pred.build import regression_predictor from greyatomlib.multivariate_regression_project.q05_linear_regression_model.build import linear_regression df = load_data('data/student-mat.csv') x_train, x_test, y_train, y_test = split_dataset(df) x_train, x_test = label_encode(x_train, x_test) model = linear_regression(x_train, y_train) y_pred, mse, mae, r2 = regression_predictor(model, x_train, y_train) def plot_residuals(y_test, y_pred, name): residuals = y_test - y_pred plt.scatter(y_test, residuals) plt.title('Residual Plot') plt.savefig('./images/data_image.png') plt.show()
import matplotlib.pyplot as plt import pylab import scipy.stats as stats from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from greyatomlib.multivariate_regression_project.q05_linear_regression_model.build import linear_regression from greyatomlib.multivariate_regression_project.q07_regression_pred.build import regression_predictor #from greyatomlib.linear_regression.q05_residuals.build import residuals #from greyatomlib.multivariate_regression_project.q06_cross_validation import cross_validation_regressor from sklearn.linear_model import LinearRegression from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode df = load_data('data/student-mat.csv') x_train, x_test, y_train, y_test = split_dataset(df) x_train, x_test = label_encode(x_train, x_test) lin_reg = linear_regression(x_train, y_train) y_pred, _, __, ___ = regression_predictor(lin_reg, x_test, y_test) def plot_residuals(y_test, y_pred, name): error_residuals = y_test - y_pred stats.probplot(error_residuals, dist="norm", plot=pylab) return pylab.show() #plot_residuals(y_test,y_pred,'name')