import matplotlib.pyplot as plt from linearRegression.linearRegression import LinearRegression from metrics import * np.random.seed(42) N = 30 P = 5 X = pd.DataFrame(np.random.randn(N, P)) y = pd.Series(np.random.randn(N)) # This is for Non_vectorized for fit_intercept in [True, False]: for type in ['inverse', 'constant']: for batch in [1, X.shape[0] // 2, X.shape[0]]: LR = LinearRegression(fit_intercept=fit_intercept) LR.fit_non_vectorised( X, y, batch, lr_type=type ) # here you can use fit_non_vectorised / fit_autograd methods y_hat = LR.predict(X) print("Fit_intercept : {} , type : {} , batch_size : {}".format( str(fit_intercept), type, batch)) print('RMSE: ', round(rmse(y_hat, y), 3), end=" ") print('MAE: ', round(mae(y_hat, y), 3)) print() # This is for vectorized for fit_intercept in [True, False]: for type in ['inverse', 'constant']: for batch in [1, X.shape[0] // 2, X.shape[0]]: LR = LinearRegression(fit_intercept=fit_intercept)
import numpy as np import pandas as pd import matplotlib.pyplot as plt from linearRegression.linearRegression import LinearRegression N = 30 P = 5 X = pd.DataFrame(np.random.randn(N, P)) y = pd.Series(np.random.randn(N)) fit_intercept = True y = X[2] X[4] = X[2] LR = LinearRegression(fit_intercept=fit_intercept) LR.fit_vectorised(X, y, n_iter=1000) LR.plot_contour(X, y)
iris = Data() iris.load_iris() X = iris.data y = iris.target # get a random train/test split num_data = len(y) train_num = math.floor(0.8 * num_data) test_num = num_data - train_num # shuffle data c = list(zip(X, y)) random.shuffle(c) X, y = zip(*c) train_X = np.asarray(X[:train_num]) train_y = np.asarray(y[:train_num]) test_X = np.asarray(X[train_num:]) test_y = np.asarray(y[train_num:]) ##### Linear Regression ##### lr = LinearRegression() lr.fit(train_X, train_y) predictions = [] for x_ in test_X: predictions.append(lr.predict(x_)[0]) predictions = np.asarray(predictions) accuracy = measures.accuracy(predictions, test_y) print('Accuracy:', accuracy) ##### End Linear Regression #####
x = np.array([i * np.pi / 180 for i in range(60, 300, 4)]) np.random.seed(10) #Setting seed for reproducibility y = 4 * x + 7 + np.random.normal(0, 3, len(x)) x = x.reshape(60, 1) #Converting 1D to 2D for matrix operations consistency y = pd.Series(y) max_degree = 10 degrees = [] thetas = [] for degree in range(1, max_degree + 1): degrees.append(degree) pf = PolynomialFeatures(degree) x_poly = pf.transform(x) X = pd.DataFrame(x_poly) LR = LinearRegression(fit_intercept=False) LR.fit_vectorised(X, y, 30, n_iter=7, lr=0.0001) curr_theta = LR.coef_ tot_theta = np.linalg.norm(curr_theta) thetas.append(tot_theta) plt.yscale('log') plt.plot(degrees, thetas) plt.title('Magnitude of theta vs Degree of Polynomial Features') plt.xlabel('Degree') plt.ylabel('Magnitude of Theta (log scale)') plt.savefig('plots/q5')
from metrics import mae, rmse import numpy as np import pandas as pd import matplotlib.pyplot as plt from linearRegression.linearRegression import LinearRegression from preprocessing.polynomial_features import PolynomialFeatures N = 10 P = 2 X = pd.DataFrame(np.random.randn(N, P)) y = pd.Series(np.random.randn(N)) X[3] = 4 * X[1] print(X) # For Gradiant Method model2 = LinearRegression() model2.fit_non_vectorised(X, y, 10) y_hat = model2.predict(X) print('RMSE: ', rmse(y_hat, y)) print('MAE: ', mae(y_hat, y)) print() # For Normal Method model = LinearRegression() model.fit_normal(X, y) y_hat = model.predict(X) print(model.coef_) print('RMSE: ', rmse(y_hat, y)) print('MAE: ', mae(y_hat, y))
import numpy as np import pandas as pd import matplotlib.pyplot as plt from linearRegression.linearRegression import LinearRegression from metrics import * np.random.seed(42) N = 30 P = 5 X = pd.DataFrame(np.random.randn(N, P)) y = pd.Series(np.random.randn(N)) for fit_intercept in [True, False]: LR = LinearRegression(fit_intercept=fit_intercept) LR.fit(X, y) y_hat = LR.predict(X) LR.plot() print('RMSE: ', rmse(y_hat, y)) print('MAE: ', mae(y_hat, y))
np.random.seed(42) N = 30 P = 5 X1 = pd.DataFrame(np.random.randn(N, P)) X = pd.concat([X1, 2 * X1[3], 5 * X1[4]], axis=1) y = pd.Series(np.random.randn(N)) niter = 100 print('with multicollinearity') print() for j in ['constant', 'inverse']: print('learning rate', j, ':') print() print('Vectorised:') for fit_intercept in [True, False]: LR = LinearRegression(fit_intercept=fit_intercept) LR.fit_vectorised( X, y, 30, n_iter=niter, lr_type=j ) # here you can use fit_non_vectorised / fit_autograd methods y_hat = LR.predict(X) print('RMSE: ', rmse(y_hat, y)) print('MAE: ', mae(y_hat, y)) print() print() print() print('without multicollinearity') print() for j in ['constant', 'inverse']: print('learning rate', j, ':') print() print('Vectorised:')
import numpy as np import pandas as pd import matplotlib.pyplot as plt from linearRegression.linearRegression import LinearRegression from linearRegression.gradient_descent import GD from metrics import * np.random.seed(42) N = 30 P = 5 X = pd.DataFrame(np.random.randn(N, P)) y = pd.Series(np.random.randn(N)) gd_variant = "vectorised" # non-vectorised, autograd for fit_intercept in [True, False]: LR = LinearRegression(fit_intercept=fit_intercept) LR.fit(X, y, gd_variant) # here you can supply the gd variants y_hat = LR.predict(X) LR.plot_residuals() print('RMSE: ', rmse(y_hat, y)) print('MAE: ', mae(y_hat, y))