/
linear_regression.py
51 lines (38 loc) · 1.42 KB
/
linear_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import numpy as np
from linear_algebra import svd
class LinearRegressor(object):
""" Class for computing least squares solution if lambda_l2 is zero,
otherwise ridge regression."""
def __init__(self, lambda_l2=0., intercept=True):
self.intercept = intercept
self.lambda_l2 = lambda_l2
def fit(self, X, Y):
if self.intercept:
X = np.insert(X, -1, 1., axis=1)
U, S, V = svd(X)
# set diagonal D depending on if least squares or ridge regression
if self.lambda_l2 == 0:
D = 1 / S
else:
D = S / (S**2 + self.lambda_l2)
self.coefs = V * D @ U.T @ Y
def predict(self, X):
if self.intercept:
X = np.insert(X, -1, 1., axis=1)
return np.dot(X, self.coefs)
def sigmoid(x):
return (1 / (1 + np.exp(-x)))
if __name__ == '__main__':
from sklearn import datasets
from plot import scatter
from normalize import get_standardized
# Load the diabetes dataset
X, Y = datasets.load_diabetes(return_X_y=True)
# Split the data into training/testing sets
X_train, X_test = X[:-20], X[-20:]
Y_train, Y_test = Y[:-20], Y[-20:]
X_train, X_test = get_standardized(X_train, X_test)
regr = LinearRegressor(lambda_l2=0.001)
regr.fit(X_train, Y_train)
Y_pred = regr.predict(X_test)
scatter(Y_test, Y_pred)