def main(): # Get training matrices for linear regression model x, y = get_train_matrices() # Create instance of LinearRegression with the training matrices linear_regression = LinearRegression(x, y) # Fit with learning rate, no of iterations and regularization(L2) parameter linear_regression.fit(0.01, 1000, 0) # Predict for all the input values y_pred = linear_regression.predict(x) # Plot the scatter plots of training data and graph of our linear model plt.scatter(x, y) plt.plot(x, y_pred) plt.show() # Print the weights and biases of the model print("Weights: {}\nBiases: {}".format(linear_regression.w, linear_regression.c)) # Validate the model by printing the performance metrics linear_regression.validate() # Predict for the input data in test folder and save as output.csv in test folder x_test = pd.read_csv('test/input.csv')['x'].values.reshape(-1, 1) y_test = linear_regression.predict(x_test) df_predict = pd.DataFrame({'y': y_test.reshape(-1)}) df_predict.to_csv('test/output.csv')
def test_score(self): train_data, train_labels, test_data, test_labels = GeneralUtilities.dataset_split( self.data, self.labels) lr = LinearRegression() lr.fit(train_data, train_labels) lr.predict(test_data, test_labels) self.assertTrue(0 <= lr.r_squared <= 1)
def test_r_calcuation(self): # check that the adjusted_r_squared value is calculated and different to r_squared # when using multiple attributes # it is possible that the adjusted value can drop below 0 which is why the test doesn't check for that lr = LinearRegression() lr.fit(self.train_data, self.train_labels) lr.predict(self.test_data, self.test_labels) self.assertTrue(lr.adj_r_squared <= 1) self.assertTrue(lr.adj_r_squared != lr.r_squared)
def test_ridge_equal_least_squares(self): lr = LinearRegression(model="Ridge Regression", lam=0) lr.fit(self.train_data, self.train_labels) lr.predict(self.test_data, self.test_labels) ridge_score = lr.r_squared lr = LinearRegression(model=None) lr.fit(self.train_data, self.train_labels) lr.predict(self.test_data, self.test_labels) least_squares_score = lr.r_squared self.assertTrue(ridge_score == least_squares_score)
def test_ridge_scores(self): lr = LinearRegression(model=None) lr.fit(self.train_data, self.train_labels) lr.predict(self.test_data, self.test_labels) least_squares_score = lr.r_squared lam_values = [10, 1, 0.1, 0.01, 0.001] for lam in lam_values: lr = LinearRegression(model="Ridge Regression", lam=lam) lr.fit(self.train_data, self.train_labels) lr.predict(self.test_data, self.test_labels) self.assertTrue(least_squares_score != lr.r_squared)
def linearRegression_Model(learning_rate, n_iters, split_test_ratio): X, y = datasets.make_regression(n_samples=100, n_features=1, noise=20, random_state=4) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=split_test_ratio, random_state=1234) regressor = LinearRegression(learning_rate=0.01, n_iters=1000) regressor.fit(X_train, y_train) predictions = regressor.predict(X_test) mse = mean_squared_error(y_test, predictions) print("MSE:", mse) y_pred_line = regressor.predict(X) cmap = plt.get_cmap('viridis') fig = plt.figure(figsize=(8, 6)) m1 = plt.scatter(X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(X_test, y_test, color=cmap(0.5), s=10) plt.plot(X, y_pred_line, color='black', linewidth=2, label="Prediction") plt.show() return predictions, mse, plt # X, y = datasets.make_regression(n_samples=100, n_features=1, noise=20, random_state=4) # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234) # Inspect data #fig = plt.figure(figsize=(8,6)) #plt.scatter(X[:, 0], y, color = "b", marker = "o", s = 30) #plt.show() # regressor = LinearRegression(learning_rate=0.01, n_iters=1000) # regressor.fit(X_train, y_train) # predictions = regressor.predict(X_test) # mse = mean_squared_error(y_test, predictions) # print("MSE:", mse) # y_pred_line = regressor.predict(X) # cmap = plt.get_cmap('viridis') # fig = plt.figure(figsize=(8,6)) # m1 = plt.scatter(X_train, y_train, color=cmap(0.9), s=10) # m2 = plt.scatter(X_test, y_test, color=cmap(0.5), s=10) # plt.plot(X, y_pred_line, color='black', linewidth=2, label="Prediction") # plt.show()
def test_fit(): y = np.array([ 0.09459717, 0.50650243, 1.03329565, 0.52587828, 0.49264871, -0.64896441, -0.86499999, -1.00885329, -0.80418399, 0.57436388 ]).reshape(-1, 1) x_mat = np.array([ 0., 0., 0.6981317, 0.48738787, 1.3962634, 1.94955149, 2.0943951, 4.38649084, 2.7925268, 7.79820595, 3.4906585, 12.18469679, 4.1887902, 17.54596338, 4.88692191, 23.88200571, 5.58505361, 31.19282379, 6.28318531, 39.4784176 ]).reshape(-1, 2) model = LinearRegression() model.fit(x_mat, y) fitted_weights = model.weights_ correct_weights = np.array([[0.77483422], [-0.42288373], [0.03914334]]) np.testing.assert_almost_equal(fitted_weights, correct_weights, decimal=8)
def test_fit_functional(): import sklearn.model_selection import numpy as np from linear_regression import LinearRegression X = np.zeros((900, 3), dtype=np.float32) num_samples = 30 xx = np.linspace(-5, 5, num_samples) XX, YY = np.meshgrid(xx, xx) X[:, 0] = XX.flatten() X[:, 1] = YY.flatten() X[:, -1] = 1 # a column of 1's for the bias trick Z = 0.1 * XX + 0.2 * YY + 0.4 y = Z.reshape(-1, 1) X_train, X_val, y_train, y_val = sklearn.model_selection.train_test_split( X, y) model = LinearRegression(input_dimensions=2) train_mse, val_mse = model.fit(X_train, y_train, X_val, y_val, num_epochs=20, batch_size=4, alpha=0.1, _lambda=0.0) final_train_mse = train_mse[-1] desired_weights = np.float32([[0.1, 0.2, 0.4]]).T np.testing.assert_allclose(model.weights, desired_weights, rtol=1e-3, atol=1e-3) assert final_train_mse < 0.001 assert final_train_mse < 0.00001 assert final_train_mse < 1e-10
def visual_3d(): data = pd.read_csv('food_profit.txt', names=['population', 'profit']) x = data['population'] y = data['profit'] x = x[:, np.newaxis] x = np.hstack((np.ones_like(x), x)) theta0 = np.zeros((40, 50)) theta1 = np.zeros((40, 50)) jvals = np.zeros((40, 50)) for i, t0 in enumerate(np.arange(-10, 10, .5)): for j, t1 in enumerate(np.arange(-1, 4, .5)): theta0[i, j] = t0 theta1[i, j] = t1 jvals[i, j] = 0.5*np.mean((x.dot(np.array([t0, t1])) - y)**2) import mpl_toolkits.mplot3d ax = plt.gca(projection='3d') ax.plot_surface(theta0, theta1, jvals, cmap=plt.get_cmap('BuPu_r')) alpha = 0.01 max_iter = 1500 model = LinearRegression(alpha, max_iter) loss, w_list = model.fit(x, y) w_list = np.array(w_list) plt.plot([w_list[0,0]], [w_list[0,1]], [loss[0]], 'rx') plt.plot(w_list[:, 0], w_list[:, 1], loss, 'o') plt.plot([w_list[-1, 0]], [w_list[-1, 1]], [loss[-1]], 'gx') plt.show()
def main(): xs, ys = create_dataset(50, 10, 2, correlation='neg') trian_xs, train_ys = xs[:25], ys[:25] test_xs, test_ys = xs[25:], ys[25:] # ml model model = LinearRegression() model.fit(trian_xs, train_ys) preds = model.predictions(xs) print("{:1.2f}".format(model.score(ys, preds))) # plot the dataset plt.scatter(trian_xs, train_ys) plt.scatter(test_xs, test_ys) plt.plot(xs, preds) plt.show()
def fitting(): data = pd.read_csv('house_data.txt', names=['area', 'bedroom', 'price']) x_data = data[['area', 'bedroom']] x_data = (x_data - x_data.mean())/(x_data.max() - x_data.min()) y_data = data['price'] plt.figure(figsize=(10, 6)) plt.subplot(2, 2, 1) plt.plot(x_data['area'], y_data, 'rx') plt.title('area-price') plt.subplot(2, 2, 2) plt.plot(x_data['bedroom'], y_data, 'bx') plt.title('bedroom-price') alpha = 10 max_iter = 50 model = LinearRegression(alpha, max_iter) loss, _ = model.fit(x_data.values, y_data.values) plt.subplot(2, 1, 2) plt.plot(np.arange(1, max_iter+1), loss) plt.subplots_adjust(hspace=0.4) plt.show()
def main(): X, y = make_regression(n_samples=100, n_features=1, noise=20) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) n_samples, n_features = np.shape(X) model = LinearRegression(n_iterations=3000, regularization=l1_l2(alpha=0.5)) model.fit(X_train, y_train) # Training error plot n = len(model.training_errors) training, = plt.plot(range(n), model.training_errors, label="Training Error") plt.legend(handles=[training]) plt.title("Error Plot") plt.ylabel('Mean Squared Error') plt.xlabel('Iterations') plt.show() y_pred = model.predict(X_test) y_pred = np.reshape(y_pred, y_test.shape) mse = mean_squared_error(y_test, y_pred) print("Mean squared error: %s" % (mse)) y_pred_line = model.predict(X) # Plot the result cmap = plt.get_cmap('viridis') m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction") plt.suptitle("Linear Regression") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') plt.show() print('Done!')
def test1(): X1 = np.array([[1], [2], [3], [4], [5]]) y1 = np.array([4, 7, 10, 13, 16]) X2 = np.array([[1], [2], [3], [4], [5]]) y2 = np.array([2, 4, 6, 8, 10]) X3 = np.array([[1, 1], [1, 2], [2, 2], [2, 3]]) y3 = np.dot(X3, np.array([1, 2])) + 3 reg = LinearRegression().fit(X1, y1) print(reg.predict(np.array([[9], [10]]))) reg = reg.fit(X2, y2) print(reg.predict(np.array([[9], [10]]))) reg = reg.fit(X3, y3) print(reg.predict(np.array([[3, 5]])))
def main(): print("Loading data...") # Get training matrices for linear regression model x, y = get_train_matrices() print("Data loaded.\n") # Create instance of LinearRegression with the training matrices linear_regression = LinearRegression(x, y) print("Fitting the model with data...") # Fit with learning rate, no of iterations and regularization(L2) parameter linear_regression.fit(0.01, 1000, 0) print("Model fitted.\n") # Predict for all the input values y_pred = linear_regression.predict(x) if x.shape[1] == 1: print("The model is fitted as shown in the figure.\n") # Plot the scatter plots of training data and graph of our linear model plt.scatter(x, y) plt.plot(x, y_pred) plt.show() # Print the weights and biases of the model print("The weights and biases are printed as:\n" "Weights: {}\nBiases: {}\n".format(linear_regression.w, linear_regression.c)) print("Performance statistics:") # Validate the model by printing the performance metrics linear_regression.validate() # Predict for the input data in test folder and save as output.csv in test folder x_test = pd.read_csv('test/input.csv').values[:, 1:] y_test = linear_regression.predict(x_test) df_predict = pd.DataFrame({'y': y_test.reshape(-1)}) df_predict.to_csv('test/output.csv')
def main(): trainfile = r"data/ex1data1.txt" train_X, train_y = loadDataSet(trainfile) clf = LinearRegression() weigh = clf.fit(train_X, train_y, alpha=0.01, maxCycles=500) Fig = plt.figure(figsize=(8, 4)) # Create a `figure' instance Ax = Fig.add_subplot(111) # Create a `axes' instance in the figure Ax.plot(train_X, train_y, 'o') # Create a Line2D instance in the axes #Ax.plot(a1,a2) a1 = [0, 1] a2 = [0, 1 * weigh] b1 = [0, 25] b2 = [0, 25 * weigh] Ax.plot(a1, a2, b1, b2) Fig.savefig("test.pdf")
def fitting(): data = pd.read_csv('food_profit.txt', names=['population', 'profit']) x = data['population'] y = data['profit'] alpha = 0.01 max_iter = 1500 model = LinearRegression(alpha, max_iter) loss, _ = model.fit(x, y) p = model.predict(x) plt.figure(figsize=(10, 6)) plt.subplot(2,1,1) plt.plot(np.arange(1, 1501), loss) plt.title('Loss Curve') plt.subplot(2,1,2) plt.plot(x, y, 'rx', markersize=10, label='Traing Data') plt.plot(x, p, 'b', label='Linear Regression') plt.xlabel('Population of City in 10,000s') plt.ylabel('Profit in $10,000s') plt.grid(True) plt.legend() plt.show()
from linear_regression import LinearRegression def calculate_mean_squared_error(y_true, y_pred): return np.mean(np.square(y_true - y_pred)) if __name__ == '__main__': data = make_regression(n_samples=200, n_features=1, n_targets=1, random_state=42) X, y = data[0], data[1] X_train, X_test, Y_train, Y_test = train_test_split(X, y, shuffle=True, test_size=0.2) regressor = LinearRegression(lr=0.01, n_iters=500) regressor.fit(X_train, Y_train) predictions = regressor.predict(X_test) mse = calculate_mean_squared_error(Y_test, predictions) r_score = r2_score(Y_test, predictions) print("The mean squared error of the regressor is: {}".format(mse)) print("The R-squared loss of the regressor is: {}".format(r2_score)) y_pred_line = regressor.predict(X) assert X.shape[1] == 1 cmap = plt.get_cmap('viridis') fig = plt.figure(figsize=(8, 6)) plt.scatter(X_train[:, 0], Y_train, color=cmap(0.9), s=10) plt.scatter(X_test[:, 0], Y_test, color=cmap(0.5), s=10)
import numpy.linalg as la from sklearn import preprocessing # load the dataset X_train, Y_train = load_boston(return_X_y=True); # create the linear regression model object model = LinearRegression(); # create the list of the regularisation parameters to be used reg_param = [0.1, 1.0, 10.0, 100.0, 1000.0]; num_epochs = 100000; model_params_ls = model.fit('ls', X_train, Y_train); print(model_params_ls); # compute the lasso parameters for different reg param values # and plot the values in a graph plt.figure(1); plt.title('LASSO parameter profiles at different regularisation params'); plt.xlabel('parameter index'); plt.ylabel('parameter values'); for i in range(len(reg_param)): model_params_lasso = model.fit('lasso', X_train, Y_train, regularization_param=reg_param[i], num_epochs=num_epochs); plt.plot(model_params_lasso, label= '' + str(reg_param[i]));
binary_labels = metrics.make_binary(labels) cm = metrics.confusion_matrix(testt, binary_labels) a = metrics.accuracy(testt, binary_labels) p, r = metrics.precision_and_recall(testt, binary_labels) f = metrics.f1_measure(testt, binary_labels) print(binary_labels) print("Accuracy = %f\n" % a) print("Precision = %f, Recall = %f\n" % (p, r)) print("F1 measure = %f\n" % f) print(sum(binary_labels) / len(binary_labels)) elif model == "linear_regression": lr = LinearRegression() lr.fit(trainf, traint) lrc = lr.score(testf, testt) labels = lr.predict(testf) binary_labels = metrics.make_binary(labels) print(binary_labels) a = metrics.accuracy(testt, binary_labels) print("Accuracy = %f\n" % a) print("R2 sore:", lrc) # GOOGL_closing_data = features[:,5].reshape(-1,1) # n = 3 # # #Data Processing # data0 = features[:,5] # example0 = data0[:-n].reshape(-1,1) #
encoder = utilities.OneHotEncoder() scaler = utilities.StandardScaler() encoder.fit(X_train[:, bin_feat_reg]) X_train_new = np.hstack( (encoder.transform(X_train[:, bin_feat_reg]), X_train[:, con_feat_reg])) X_test_new = np.hstack( (encoder.transform(X_test[:, bin_feat_reg]), X_test[:, con_feat_reg])) scaler.fit(X_train_new) X_train_scaled = scaler.transform(X_train_new) X_test_scaled = scaler.transform(X_test_new) model = LinearRegression(learning_rate=10e-5, penalty='l2') model.fit(X_train_scaled, y_train) print('Train metrics') utilities.regression_report(y_train, model.predict(X_train_scaled)) print('Test metrics') utilities.regression_report(y_test, model.predict(X_test_scaled)) print('Feature importances') args = np.argsort(np.fabs(model.w))[::-1] for i in args[:5]: print(name_features_insurance[i], model.w[i])
X = np.array([X[i] for i in features]).T X = normalize_features(X) # FItting model reg = LinearRegression(learning_rate=0.01, n_iters=300) # Train, test split with 80/20 ratio X_train, y_train, X_test, y_test = train_test_split(X, y, 0.20) # Quick hack to get around tuple to array conversion X_train = np.array(X_train) y_train = np.array(y_train) X_test = np.array(X_test) y_test = np.array(y_test) costs = reg.fit(X_train, y_train) weights = reg.weights_ bias = reg.bias_ pred_train = reg.predict(X_train) pred = reg.predict(X_test) dimension_check() # Mean absolute error / mean absolute deviation of training set print("""\n### Mean absolute error / mean absolute deviation of training set ###""") print(abs(pred_train - y_train).mean()) # Mean absolute error / mean absolute deviation of test set print("""\n### Mean absolute error / mean absolute deviation of test set ###""")
FILE_NAME = "data.csv" data = np.genfromtxt(FILE_NAME, delimiter=",", dtype=np.float32, skip_header=1) # split data n_samples, n_features = data.shape n_features -= 1 X = data[:, 0:n_features] y = data[:, n_features] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # fit model = LinearRegression(X_train, y_train) model.fit() # plot y_pred_line = model.predict(X) cmap = plt.get_cmap('viridis') fig = plt.figure(figsize=(8, 6)) scat = plt.scatter(X, y, color=cmap(0.9), s=10) plt.plot(X, y_pred_line, color='black', linewidth=2, label="Prediction") plt.show() # accuracy train_accuracy = model.score() test_accuracy = model.score(X_test, y_test) table_accuracy = pd.DataFrame([[train_accuracy], [test_accuracy]], ['Train Accuracy', 'Test Accuracy'],
import pandas as pd import matplotlib.pyplot as plt from linear_regression import LinearRegression def load_boston(): dataset = pd.read_csv('boston.csv') X = dataset.iloc[:, 0:13].to_numpy() y = dataset.iloc[:, 13].to_numpy() return X, y def plot_losses(losses): plt.plot(losses) plt.xlabel('iterations') plt.ylabel('loss') plt.tight_layout() plt.show() if __name__ == '__main__': X, y = load_boston() linear_regression = LinearRegression(X, y) losses = linear_regression.fit() plot_loss(losses)
from linear_regression import LinearRegression def min_max_normalization_inverse(x, ma, mi): return x * (ma - mi) + mi if __name__ == '__main__': # データをロード auto_mpg_data = Data() max_mpg = auto_mpg_data.max_value min_mpg = auto_mpg_data.min_value X_train, t_train, X_test, t_test = auto_mpg_data.get_data() # ガウス基底関数モデル lr = LinearRegression() lr.fit(X_train, t_train) gbf_prediction = lr.predict(X_test) print('gaussian_basis_function r2_score:', r2_score(t_test, gbf_prediction)) # シグモイド関数モデル lr = LinearRegression(func_name='sigmoid') lr.fit(X_train, t_train) sigmoid_prediction = lr.predict(X_test) print('sigmoid r2score:', r2_score(t_test, sigmoid_prediction)) # 結果をプロット plt.scatter(min_max_normalization_inverse(t_test, max_mpg, min_mpg), min_max_normalization_inverse(gbf_prediction, max_mpg, min_mpg), c="cyan", label="gbf") plt.scatter(min_max_normalization_inverse(t_test, max_mpg, min_mpg),
sys.path.append(os.path.join('..', 'algorithms')) # import linear regression class from linear_regression import LinearRegression import pandas as pd # create Linear regression object lr = LinearRegression() # read csv data from text file df = pd.read_csv('data/dummy_data_linear_regression.txt') # convert pandas dataframe to matrix data = df.as_matrix() # pick out feature values from the matrix X = data[:,:2] # pick out regression values y = data[:,2] # fit to linear regression model with different parameters (make normalize = true if the dataset has too much variance) lr.fit(X[:30],y[:30],learning_rate=0.1,n_iter=200,normalize=True) # plot cost in different iterations to tweak the learning rate lr.plot_costs() # check the R^2 score used in sklearn models print(lr.score(X[31:46],y[31:46]))
noise=20, random_state=24) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=24) def mse(y_pred, y_true): return np.mean((y_pred - y_true)**2) # plt.figure(figsize=(8,6)) # plt.scatter(X_train[:,0],y_train,color='r',marker='o') # plt.show() regresoor = LinearRegression(n_iterations=10000) regresoor.fit(X_train, y_train) y_pred = regresoor.predict(X_test) error = mse(y_pred, y_test) print(error) y_pred_line = regresoor.predict(X) if X.shape[1] == 1: plt.figure(figsize=(8, 6)) plt.scatter(X_train, y_train, color='r') plt.scatter(X_test, y_test, color='b') plt.plot(X, y_pred_line, color='black', label='Prediction') plt.show()
pd_train = pd.read_csv('./data/train.csv', sep=';') pd_test = pd.read_csv('./data/test.csv', sep=';') pd_validate = pd.read_csv('./data/validate.csv', sep=';') # 1.对原始特征进行均匀预处理 trn_X, trn_X_max, trn_X_min = uniform_norm(pd_train.drop('quality', axis=1).values) trn_y = pd_train['quality'].values val_X = (pd_validate.drop('quality', axis=1).values - trn_X_min) / (trn_X_max - trn_X_min) val_y = pd_validate['quality'].values test_X = (pd_test.drop('quality', axis=1).values - trn_X_min) / (trn_X_max - trn_X_min) test_y = pd_test['quality'].values model_1 = LinearRegression() train_costs = model_1.fit(trn_X, trn_y, alpha=0.5, lmbda=0, algorithm="batch_gd", verbose=True) val_pred = model_1.predict(val_X) test_pred = model_1.predict(test_X) print("Validate Error %f" % (sum((val_pred - val_y) ** 2) * 0.5 / val_X.shape[0])) print("Test Error %f" % (sum((test_pred - test_y) ** 2) * 0.5 / test_X.shape[0])) print("\n\n") # 2.对原始特征进行高斯预处理 trn_X, trn_X_mean, trn_X_std = gaussian_norm(pd_train.drop('quality', axis=1).values) trn_y = pd_train['quality'].values val_X = (pd_validate.drop('quality', axis=1).values - trn_X_mean) / trn_X_std val_y = pd_validate['quality'].values test_X = (pd_test.drop('quality', axis=1).values - trn_X_mean) / trn_X_std
n_features=1, noise=20, random_state=4) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234) # fig = plt.figure(figsize=(8,6)) # plt.scatter(X[:, 0], y, color="b", marker = "o", s = 30) # plt.show() from linear_regression import LinearRegression regressor = LinearRegression(lr=0.01) regressor.fit(X_train, y_train) predicted = regressor.predict(X_test) # def mse(y_true, y_predicted): # return np.mean((y_true-y_predicted)**2) # mse_value = mse(y_test, predicted) # print(mse_value) # print(y_test) y_pred_line = regressor.predict(X) cmap = plt.get_cmap('viridis') fig = plt.figure(figsize=(8, 6)) m1 = plt.scatter(X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(X_test, y_test, color=cmap(0.5), s=10) plt.plot(X, y_pred_line, color='black', linewidth=2, label="Prediction")
import numpy as np from linear_regression import LinearRegression from sklearn.datasets import load_boston X, y = load_boston(return_X_y=True) X = X[:, 4] X = X[:, np.newaxis] model = LinearRegression() model.fit(X, y, 0.1, 300)
def test_linear_regression(): X = np.random.normal(size=(100, 2)) y = X[:, 0] * 5 lr = LinearRegression() lr.fit(X, y) assert pytest.approx(lr.predict(X)[-1], rel=1e-3) == y[-1]
y=y_combined, classifier=logr_clf, test_idx=range(X_train.shape[0], X_train.shape[0] + X_test.shape[0]), ax=ax[1]) ax[1].set_xlabel("x1", fontsize="large") ax[1].set_ylabel("x2", fontsize="large") ax[1].legend(loc="upper right", fontsize="large") ax[1].set_title("Logistic regression (IRLS) on dataset %s" % l, fontsize="x-large", fontweight="bold") """ Run linear regression fitted by solving the normal equation """ linr_clf = LinearRegression() linr_clf.fit(X_train, y_train) linr_train_error = np.mean(linr_clf.predict(X_train) != y_train) linr_test_error = np.mean(linr_clf.predict(X_test) != y_test) plot_decision_regions(X=X_combined, y=y_combined, classifier=linr_clf, test_idx=range(X_train.shape[0], X_A_train.shape[0] + X_test.shape[0]), ax=ax[2]) ax[2].set_xlabel("x1", fontsize="large") ax[2].set_ylabel("x2", fontsize="large") ax[2].legend(loc="upper right", fontsize="large") ax[2].set_title("Linear regression (normal equation) on dataset %s" % l, fontsize="x-large",