# Problem 2 -- Model Scoring -- for Homework 3 of CS107 # Author: Max Li from sklearn import datasets from sklearn.model_selection import train_test_split import Regression as reg dataset = datasets.load_boston() X_train, X_test, y_train, y_test = train_test_split(dataset['data'], dataset['target'], test_size=0.2, random_state=42) linear_model = reg.LinearRegression() ridge_model = reg.RidgeRegression() ridge_model.set_params(alpha=0.1) models = [linear_model, ridge_model] scores = [] for model in models: model.fit(X_train, y_train) score = model.score(X_test, y_test) scores.append(score) print("R-squared: " + str(score)) print(model.get_params())
##model_performance.py import numpy as np from sklearn import datasets from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt import Regression as myReg dataset = datasets.load_boston() X_train, X_test, y_train, y_test = train_test_split(dataset['data'], dataset['target'], test_size=0.2, random_state=42) alpha = 0.1 olsreg = myReg.LinearRegression() rigreg = myReg.RidgeRegression() rigreg.set_params(alpha=0.1) models = [olsreg, rigreg] alpha_array = np.logspace(-2, 1, 10) score_array_ols = np.zeros(alpha_array.shape) score_array_rig = np.zeros(alpha_array.shape) cnt = 0 for alpha_i in alpha_array: for model in models: model.set_params(alpha=alpha_i) model.fit(X_train, y_train)
from sklearn import datasets from sklearn.model_selection import train_test_split import Regression as reg dataset = datasets.load_boston() X_train, X_test, y_train, y_test = train_test_split(dataset['data'], dataset['target'], test_size=0.2, random_state=42) alpha = 0.1 #initialize LR model LRModel = reg.LinearRegression() #initilizse the RR model RRModel = reg.RidgeRegression() RRModel.set_params(alpha=alpha) #put both models into a list models = [LRModel, RRModel] #initialize empty list to store the scores of the models score = [] #iterate over the models for model in models: model.fit(X_train, y_train) score.append(model.score(X_test, y_test)) print(model.params) #print the computed scores for the different models in nice format
from sklearn import datasets from sklearn.model_selection import train_test_split #import regression classes import Regression as Reg dataset = datasets.load_boston() X_train, X_test, y_train, y_test = train_test_split(dataset['data'], dataset['target'], test_size=0.2, random_state=42) alpha = 0.1 linreg = Reg.LinearRegression() ridreg = Reg.RidgeRegression() ridreg.set_params(alpha=alpha) models = [linreg, ridreg] model_scores = [] for model in models: model.fit(X_train, y_train) score = model.score(X_test, y_test) model_scores.append(score) print(str(type(model).__name__) + " has R^2 score of: " + str(score)) best_model = models[model_scores.index(max(model_scores))] print("The best model is " + str(type(best_model).__name__)) print("And params for the best model are: ") print(best_model.get_params())
plt.axhline(y=85, color='k', linestyle='--', label='85%') plt.xticks(np.arange(1, features + 1, 1)) plt.xlabel('Number of Components') plt.ylabel('Variance Explained') plt.legend() plt.show() ##################### ##################### ## Linear Regression ##################### linear = R.LinearRegression() X_train, X_test, y_train, y_test = data.getSplitData() linear.train(features, X_train, X_test, y_train, y_test, n_jobs=1, verbose=True, startIndex=1) linear.fit(X, y) #func = linear.function(columnNames=['D','E', 'F', 'G', 'L', 'P', 'U', 'AA', 'AB', 'AD'], featureStartIndex = 3) #func = linear.function(columnNames=['D','E', 'F', 'G', 'P','W','X','Y','AA', 'AB', 'AD'], featureStartIndex = 3) linear.function(columnNames=[ feature_columns[letter - ord('A')]
from sklearn import datasets from sklearn.model_selection import train_test_split import Regression as reg dataset = datasets.load_boston() X_train, X_test, y_train, y_test = train_test_split(dataset['data'], dataset['target'], test_size=0.2, random_state=42) alpha = 0.1 rdg_regress = reg.RidgeRegression() rdg_regress.set_params(alpha=alpha) models = [reg.LinearRegression(), rdg_regress] model_scores = {} model_params = {} for model in models: model.fit(X_train, y_train) model_scores[model.__class__.__name__] = model.score(X_test, y_test) model_params[model.__class__.__name__] = model.get_params() print("The model is : {}. The R-square value in the test dataset is : {}.". format(model.__class__.__name__, model.score(X_test, y_test))) best_model = max(model_scores, key=model_scores.get) print("The best model is : {} \nParameters are : \n{}".format( best_model, model_params[best_model]))
from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt irisdata = datasets.load_iris() X_train, X_test, Target_train, Target_test = train_test_split(irisdata.data, irisdata.target, test_size=.4) Batch_size = 10 epoch_num = int(len(X_train) / Batch_size) MeanSquareError = np.zeros((3, epoch_num)) R2Score = np.zeros((3, epoch_num)) for epoch in range(epoch_num): X_batch = X_train[epoch:epoch + Batch_size, :] Y_batch = Target_train[epoch:epoch + Batch_size] Reg = Regression(X_batch, Y_batch) LinReg, _ = Reg.LinearRegression(X_batch) RigReg, _ = Reg.RidgeRegression(X_batch, alpha=0.1) LasReg, _ = Reg.LassoRegression(X_batch, alpha=0.1) LinReg_Eval = Evaluation(LinReg, Y_batch) RigReg_Eval = Evaluation(RigReg, Y_batch) LasReg_Eval = Evaluation(LasReg, Y_batch) MeanSquareError[0, epoch] = LinReg_Eval.MeanSquarErr() MeanSquareError[1, epoch] = RigReg_Eval.MeanSquarErr() MeanSquareError[2, epoch] = LasReg_Eval.MeanSquarErr() R2Score[0, epoch] = LinReg_Eval.R2Square() R2Score[1, epoch] = RigReg_Eval.R2Square() R2Score[2, epoch] = LasReg_Eval.R2Square() fig = plt.figure() fig1 = fig.add_subplot(1, 1, 1)
from sklearn import datasets from sklearn.model_selection import train_test_split import Regression as reg import numpy as np dataset = datasets.load_boston() X_train, X_test, y_train, y_test = train_test_split(dataset['data'], dataset['target'], test_size=0.2, random_state=42) #instantiate the linear model linear = reg.LinearRegression() linear.fit(X_train, y_train) linear_score = linear.score(X_test, y_test) print(linear_score) ridge = reg.RidgeRegression() ridge.set_params(alpha='0.5') ridge.fit(X_train, y_train) ridge_score = ridge.score(X_test, y_test) alpha = 0.5 models = [model1(alpha), model2(alpha)] for model in models: model.fit(X_train, y_train)