def vary_degree(list_deg, x, y, fit_method="vec"): l = [] for degree in list_deg: include_bias = True poly = PolynomialFeatures(degree, include_bias=include_bias) X_trans = [] for i in range(len(x)): ar = np.array([x[i]]) X_trans.append(poly.transform(ar)) # print(len(X_trans),len(y)) # print(pd.DataFrame(X_trans)) X = X_trans LR = LinearRegression(fit_intercept=True) if (fit_method == "normal"): thetas = LR.fit_normal(pd.DataFrame(X), pd.Series(y)) elif (fit_method == "non_vec"): thetas = LR.fit_non_vectorised(pd.DataFrame(X), pd.Series(y), batch_size=1) elif (fit_method == "vec"): thetas = LR.fit_vectorised(pd.DataFrame(X), pd.Series(y), batch_size=1) else: thetas = LR.fit_autograd(pd.DataFrame(X), pd.Series(y), batch_size=1) # print(thetas) l.append(np.linalg.norm(np.array(thetas))) return l
import numpy as np from preprocessing.polynomial_features import PolynomialFeatures X = np.array([1,2]) poly = PolynomialFeatures(2) poly.transform(X)
np.random.seed(10) #Setting seed for reproducibility # plt.figure(figsize=(8,6)) plot_details = {'N':list(), 'D':list(), 'T':list()} N_range = [i for i in range(200,800,50)] degree = [1,3,5,7,9] for N in N_range: x = np.array([i*np.pi/180 for i in range(60,60+4*N,4)]) y = 4*x + 7 + np.random.normal(0,3,len(x)) # weights = list() for d in degree: poly = PolynomialFeatures(d, False) X = poly.transform(np.transpose([x])) LR = LinearRegression(True) LR.fit_normal(pd.DataFrame(data=X), pd.Series(y)) plot_details['N'].append(N) plot_details['D'].append(d) plot_details['T'].append(np.linalg.norm(LR.coef_)) # weights.append(np.linalg.norm(LR.coef_)) # plt.plot(degree, weights, label="N="+str(N)) labels = np.array(plot_details['T']) labels = labels.reshape((len(N_range),len(degree))) df = pd.DataFrame(data=plot_details) heatmap1_data = pd.pivot_table(df, values='T', index=['N'], columns='D') sns.heatmap(heatmap1_data, cmap="YlOrRd", annot=labels)
from metrics import * import pandas as pd # x = np.array([i*np.pi/180 for i in range(60,300,4)]) # np.random.seed(10) #Setting seed for reproducibility # y = 4*x + 7 + np.random.normal(0,3,len(x)) # def f(x): # 4*x + 7 + np.random.normal(0,3,len(x)) degree = [2, 4, 6, 8, 10] fnl_theta = [] for d in degree: x = np.array([i * np.pi / 180 for i in range(60, 300, 4)]) poly = PolynomialFeatures(d) poly.transform(x) newx = np.asarray(poly.result) new_X = newx[:, np.newaxis] new_Y = 4 * newx + 7 + np.random.normal(0, 3, len(newx)) for fit_intercept in [False]: LR = LinearRegression(fit_intercept=fit_intercept) LR.fit_vectorised( pd.DataFrame(new_X), pd.Series(new_Y), n_iter=5 ) # here you can use fit_non_vectorised / fit_autograd methods fnl_theta.append(np.absolute(LR.coef_[0])) print(fnl_theta) plt.plot(degree, fnl_theta) plt.yscale('log')
import numpy as np import pandas as pd import matplotlib.pyplot as plt from preprocessing.polynomial_features import PolynomialFeatures from linearRegression.linearRegression import LinearRegression for k in range(1, 5): x = np.array([i * np.pi / 180 for i in range(60, 300, k)]) np.random.seed(10) #Setting seed for reproducibility y = 4 * x + 7 + np.random.normal(0, 3, len(x)) x = x.reshape(x.shape[0], 1) X_axis = [1, 3, 5, 7, 9] Y_axis = [0, 0, 0, 0, 0] for i in range(len(X_axis)): mod = PolynomialFeatures(X_axis[i]) x_t = mod.transform(x) model = LinearRegression() model.fit_non_vectorised(x_t, y, 1, 10, lr=1, lr_type='inverse') Y_axis[i] = np.log(np.abs(model.coef_).max()) plt.plot(X_axis, Y_axis) plt.xlabel("Degree") plt.ylabel("Max Theta (Log scale)") samp_text = "Max theta vs degree for n=" + str(x.shape[0]) plt.title(samp_text) plt.show()
import numpy as np import matplotlib.pyplot as plt from preprocessing.polynomial_features import PolynomialFeatures x = np.array([i*np.pi/180 for i in range(60,300,4)]) np.random.seed(10) #Setting seed for reproducibility y = 4*x + 7 + np.random.normal(0,3,len(x)) x = np.array([i*np.pi/180 for i in range(60,300,4)]) np.random.seed(10) #Setting seed for reproducibility y = 4*x + 7 + np.random.normal(0,3,len(x)) print(y) X = [7,4] regX = PolynomialFeatures() REG_X = regX.transform(y) print(REG_X) REG_Y = REG_X[0] + REG_X[1]*x style.use('ggplot') plt.plot(x,y,'g',label='original',linewidth=5) plt.plot(x,REG_Y,'c',label='fitted',linewidth=5) plt.title('Data') plt.xlabel('Degree') plt.ylabel('Theta') plt.legend()
import numpy as np from preprocessing.polynomial_features import PolynomialFeatures import sys X = np.array([1,2]) degree = int(sys.argv[1]) poly = PolynomialFeatures(degree,include_bias=True) print(poly.transform(X))
import numpy as np import pandas as pd import matplotlib.pyplot as plt from linearRegression.linearRegression import LinearRegression from preprocessing.polynomial_features import PolynomialFeatures import copy x = np.array([i * np.pi / 180 for i in range(60, 300, 4)]) np.random.seed(10) y = 4 * x + 7 + np.random.normal(0, 3, len(x)) y = pd.Series(y) poly = PolynomialFeatures(degree=1) X = poly.transform(x.copy()) # LR1 = copy.deepcopy(LinearRegression(fit_intercept=False)) # LR1.fit_non_vectorised(X, y, batch_size=6) # LR1.fit = 'non_vectorised' # y_hat = LR1.predict(X) # LR1.plot_surface(np.array(X[1]),y) # LR1.plot_line_fit(np.array(X[1]), np.array(y),t_0 = 1, t_1 = 1) # LR1.plot_contour(np.array(X[1]), np.array(y),t_0 = 1, t_1 = 1) # print("--------------------------------------------------") LR2 = copy.deepcopy(LinearRegression(fit_intercept=False)) LR2.fit_vectorised(X, y, batch_size=60) LR2.fit = 'vectorised' y_hat = LR2.predict(X) LR2.plot_surface(np.array(X[1]), y) LR2.plot_line_fit(np.array(X[1]), np.array(y), t_0=1, t_1=1) LR2.plot_contour(np.array(X[1]), np.array(y), t_0=1, t_1=1) print("--------------------------------------------------")
degree = [1,3,5,7,9] include_bias = True theta_var=[] for i in range(num_variations): theta_deg = [] ind = np.random.choice(x.shape[0], 5 * (i + 1), replace=False) x_new = x[ind] x_new = pd.DataFrame(x_new) y_new = y[ind] y_new = pd.Series(y_new) for n in degree: poly = PolynomialFeatures(n,include_bias) X = poly.transform(x_new) L = LinearRegression(fit_intercept=include_bias) theta,mse, all_coef = L.fit_non_vectorised(X, y_new, X.shape[0], n_iter=5, lr=0.01, lr_type='constant') theta_deg.append(max(abs(theta))) theta_var.append(theta_deg) if not(num_resi)==0: x_new=pd.DataFrame(x) y_new=pd.Series(y) for n in degree: poly = PolynomialFeatures(n,include_bias) X = poly.transform(x_new) L = LinearRegression(fit_intercept=include_bias)
import numpy as np from preprocessing.polynomial_features import PolynomialFeatures import pandas as pd include_bias = True np.random.seed(42) N = 30 P = 2 X = np.array([1, 2]) #X = pd.DataFrame(np.random.randn(N, P)) poly = PolynomialFeatures(2, include_bias) x = poly.transform(X) print(x)
import numpy as np import pandas as pd import matplotlib.pyplot as plt from preprocessing.polynomial_features import PolynomialFeatures from linearRegression.linearRegression import LinearRegression from copy import deepcopy x = np.array([[i*np.pi/180 for i in range(60,300,4)]]).T np.random.seed(10) #Setting seed for reproducibility y = 4*(x.T[0]) + 7 + np.random.normal(0,3,len(x.T[0])) y = pd.Series(y) max_degree = 10 degrees = [i+1 for i in range(max_degree)] max_thetas = [] for degree in degrees: poly = PolynomialFeatures(degree) x_new = poly.transform(x) X = pd.DataFrame(x_new) LR = deepcopy(LinearRegression(fit_intercept=False)) LR.fit_vectorised(X, y, 60, n_iter=5, lr = 0.0001) # here you can use fit_non_vectorised / fit_autograd methods thetas = LR.coef_ max_theta = np.linalg.norm(thetas,ord=np.inf) max_thetas.append(max_theta) plt.plot(degrees,max_thetas) plt.yscale('log') plt.title(r'Plot of |$\theta$| vs Degree for polynomial fit (log scale)') plt.xlabel('Degree') plt.ylabel(r'|$\theta$|') plt.savefig('./gifs/q5_plot.png') plt.show()
from preprocessing.polynomial_features import PolynomialFeatures from linearRegression.linearRegression import LinearRegression x = np.array([i * np.pi / 180 for i in range(60, 300, 4)]) np.random.seed(10) #Setting seed for reproducibility y = 4 * x + 7 + np.random.normal(0, 3, len(x)) x = pd.DataFrame(x) y = pd.Series(y) max_degree = 10 include_bias = True theta_deg = [] for n in range(max_degree): poly = PolynomialFeatures(n + 1, include_bias) X = poly.transform(x) L = LinearRegression(fit_intercept=include_bias) theta, mse, all_coef = L.fit_non_vectorised(X, y, X.shape[0], n_iter=5, lr=0.01, lr_type='constant') theta_deg.append(max(abs(theta))) print(theta_deg) print(x.shape) plt.scatter(list(range(1, max_degree + 1)), theta_deg)
import numpy as np from preprocessing.polynomial_features import PolynomialFeatures # X = np.array([[1,2], [3,4]]) X = np.array([1, 2]) for include_bias in [True, False]: poly = PolynomialFeatures(2, include_bias=include_bias) if include_bias: print('[Config] Include bias ON') else: print('[Config] Include bias OFF') print("Input: {}".format(X)) print("Output: {}".format(poly.transform(X))) print() del poly
x = np.array([i*np.pi/180 for i in range(60,1000,1)]) np.random.seed(10) #Setting seed for reproducibility y = 4*x + 7 + np.random.normal(0,3,len(x)) thetas = [[],[],[],[],[]] degrees = [1,3,5,7,9] N_s = [[],[],[],[],[]] for degree in range(len(degrees)): for N in range(10,100): print(N) N_s[degree].append(N) x = np.array([i*np.pi/180 for i in range(N,300,4)]) np.random.seed(10) #Setting seed for reproducibility y = 4*x + 7 + np.random.normal(0,3,len(x)) poly = PolynomialFeatures(degrees[degree]) X_temp = poly.transform(x) X_temp = pd.DataFrame(X_temp) y = pd.Series(y) # print(X_temp) LR = LinearRegression(fit_intercept=False) # thetas_temp = LR.fit_vectorised(X_temp, y,30, n_iter=3, lr=0.00001, lr_type='constant') thetas_temp = LR.fit_normal(X_temp, y) # print(thetas_temp) thetas[degree].append(np.linalg.norm(thetas_temp)) for i in range(len(degrees)): plt.plot(N_s[i],thetas[i],label = "degree " + str(degrees[i])) plt.legend(loc = "best")
import matplotlib.pyplot as plt from preprocessing.polynomial_features import PolynomialFeatures import pandas as pd from linearRegression.linearRegression import LinearRegression import copy mag = [] degree = [] for j in range(1, 7): the = [] for i in range(1, 10, 2): x = np.array([i * np.pi / 180 for i in range(60, 300 * j, 4)]) np.random.seed(10) y = 4 * x + 7 + np.random.normal(0, 3, len(x)) y = pd.Series(y) poly = PolynomialFeatures(degree=i) X = poly.transform(x) LR = copy.deepcopy(LinearRegression(fit_intercept=False)) LR.fit_non_vectorised(X, y, n_iter=5, batch_size=X.shape[0]) coef = LR.coef_ the.append(np.linalg.norm(coef)) mag.append(the) mag = np.array(mag) degree = np.array([i for i in range(1, 10, 2)]) N = [(300 * i - 60) // 4 for i in range(1, 7)] fig = plt.figure() ax = fig.add_subplot(111) for i in range(len(N)): ax.set_yscale('log') ax.plot(degree, mag[i], label='N=' + str(N[i])) ax.set_xlabel('degree')
import numpy as np from preprocessing.polynomial_features import PolynomialFeatures import pandas as pd x = np.array([i * np.pi / 180 for i in range(60, 100, 4)]) poly = PolynomialFeatures(2) print("before transformation\n ", pd.DataFrame(x)) print("before transformation\n ", pd.DataFrame(poly.transform(x)))
import numpy as np from preprocessing.polynomial_features import PolynomialFeatures # from sklearn.preprocessing import PolynomialFeatures X = np.array([1, 2]) poly = PolynomialFeatures(2) print(poly.transform(X))
x = np.array([i * np.pi / 180 for i in range(60, 1260, 4)]) np.random.seed(10) #Setting seed for reproducibility y = 4 * x + 7 + np.random.normal(0, 3, len(x)) # A= np.power(x,3) # A[np.isnan(A)] = 0 # print(pd.DataFrame(A)) fit_intercept = True for j in range(100, 300, 90): theta1 = [] degree = [] for i in [1, 3, 5, 7, 9]: poly = PolynomialFeatures(i) X = poly.transform(x) LR = LinearRegression(fit_intercept=fit_intercept) LR.fit_normal( pd.DataFrame(X[:j]), pd.Series(y[:j]) ) # here you can use fit_non_vectorised / fit_autograd methods theta1.append(LR.print_theta(X, y)) degree.append(i) print(theta1) plt.scatter(degree, theta1, label="N = " + str(j)) plt.legend(prop={'size': 6}, borderpad=2) plt.xlabel("degree") plt.ylabel("theta") plt.show() # y_hat = LR.predict(X)
x = np.array([i * np.pi / 180 for i in range(60, 300, 4)]) np.random.seed(10) #Setting seed for reproducibility y = 4 * x + 7 + np.random.normal(0, 3, len(x)) x = x.reshape(60, 1) #Converting 1D to 2D for matrix operations consistency y = pd.Series(y) max_degree = 10 degrees = [] thetas = [] for degree in range(1, max_degree + 1): degrees.append(degree) pf = PolynomialFeatures(degree) x_poly = pf.transform(x) X = pd.DataFrame(x_poly) LR = LinearRegression(fit_intercept=False) LR.fit_vectorised(X, y, 30, n_iter=7, lr=0.0001) curr_theta = LR.coef_ tot_theta = np.linalg.norm(curr_theta) thetas.append(tot_theta) plt.yscale('log') plt.plot(degrees, thetas) plt.title('Magnitude of theta vs Degree of Polynomial Features') plt.xlabel('Degree') plt.ylabel('Magnitude of Theta (log scale)')