def continuous_plot(x, y, i, lr): # Build the model: # Plot: ## To get a smooth curve, we need a lot of data points continuous_x = np.arange(1, 7.01, 0.01).reshape(-1, 1) x_ = add_polynomial_features(continuous_x, i) y_hat = lr.predict(x_) print(x.shape, y.shape) plt.scatter(x.T[0], y) plt.plot(continuous_x, y_hat, color='orange') plt.show()
def main(): data = pd.read_csv("../resources/are_blue_pills_magics.csv") data = data.drop("Patient", axis=1) predicting_feature = "Score" x = np.array(data.drop(predicting_feature, axis=1)) y = np.array(data[predicting_feature]).reshape(-1, 1) plt.title("cost in function of polynomial's degree") plt.xlabel("x degree") plt.ylabel("cost") plt.grid() for i in range(1, 11): new_x = add_polynomial_features(x, i) compare_polynomials(new_x, y, i) if i == 4: break plt.legend() plt.show()
def add_polynomial_features(x, power): temp = x.copy() for i in range(2, power + 1): temp = np.append(temp, np.power(x, i), axis=1) return temp x = np.arange(1, 11).reshape(-1, 1) y = np.array([[1.39270298], [3.88237651], [4.37726357], [4.63389049], [7.79814439], [6.41717461], [8.63429886], [8.19939795], [10.37567392], [10.68238222]]) plt.scatter(x, y) plt.show() from polynomial_model import add_polynomial_features from mylinearregression import MyLinearRegression as MyLR # Build the model: x_ = add_polynomial_features(x, 3) my_lr = MyLR(np.ones(4).reshape(-1, 1)) my_lr.fit_(x_, y) ## To get a smooth curve, we need a lot of data points continuous_x = np.arange(1, 10.01, 0.01).reshape(-1, 1) x_ = add_polynomial_features(continuous_x, 3) y_hat = my_lr.predict_(x_) plt.scatter(x, y) # print(my_lr.thetas) plt.plot(continuous_x, y_hat, color='orange') plt.show()
import numpy as np from polynomial_model import add_polynomial_features x = np.arange(1, 6).reshape(-1, 1) # Example 1: print(add_polynomial_features(x, 3)) # Output: # array([[ 1, 1, 1], # [ 2, 4, 8], # [ 3, 9, 27], # [ 4, 16, 64], # [ 5, 25, 125]]) print("--------------------------") # Example 2: print(add_polynomial_features(x, 6)) # Output: # array([[ 1, 1, 1, 1, 1, 1], # [ 2, 4, 8, 16, 32, 64], # [ 3, 9, 27, 81, 243, 729], # [ 4, 16, 64, 256, 1024, 4096], # [ 5, 25, 125, 625, 3125, 15625]])
import math import matplotlib.pyplot as plt from mylinearregression import MyLinearRegression as MyLR from polynomial_model import add_polynomial_features data = pd.read_csv("../../day01/resources/are_blue_pills_magics.csv") X = np.array(data.Micrograms).reshape(-1, 1) Y = np.array(data.Score).reshape(-1, 1) x = [] myLR = [] for i in range(0, 9): print("For power {} :".format(i + 2)) x.append(add_polynomial_features(X, i + 2)) thetas = np.full((i + 3, 1), 1.0) myLR.append(MyLR(thetas)) alpha = 1 / math.pow(10, 3 + i * 2) myLR[i].fit_(x[i], Y, alpha=alpha, n_cycle=250000) MSE = myLR[i].mse_(x[i], Y) # print("thetas = {}".format(myLR[i].thetas)) print("mse = {}\n".format(MSE)) plt.bar(i + 2, MSE, label="power {}".format(i + 2)) plt.legend(prop={'size': 10}) plt.show()
import matplotlib.pyplot as plt import sys from my_linear_regression import MyLinearRegression as MLR sys.path.insert(1, '/Users/elliotcross/Documents/42/python/bootcamp_ml/tools') from polynomial_model import add_polynomial_features from normalisation import minmax data = pd.read_csv("../subjects/day01/resources/are_blue_pills_magics.csv") x_train = np.array(data["Micrograms"]).reshape(-1, 1) y_train = np.array(data["Score"]).reshape(-1, 1) x_test = np.array([5, 4.3, 2, 2, 5, 6, 3.5]).reshape(-1, 1) y_test = np.array([39, 52, 70, 58, 50, 32, 62]).reshape(-1, 1) new_train = add_polynomial_features(x_train, 10) #normalise new_train for i in range(10): new_train[:, i] = minmax(new_train[:, i]) #for plotting of polynomial curves - cotinuous data set over range of original data #then add polynomial features and normalise continuous_x = np.arange(1, 7.01, 0.01).reshape(-1, 1) x_ = add_polynomial_features(continuous_x, 10) for i in range(10): x_[:, i] = minmax(x_[:, i]) thetas = np.ones(11).reshape(-1, 1) cost_values = [] thetas_list = []
array2 = n[sep:, :] return (array1[:, :-1], array2[:, :-1], array1[:, -1], array2[:, -1]) if __name__ == "__main__": data = pd.read_csv("../resources/are_blue_pills_magics.csv") x = np.array(data[['Micrograms']]) y = np.array(data[['Score']]) lst = data_spliter(x, y, 0.5) x_train = lst[0] y_train = lst[2] y_train = y_train[:, np.newaxis] x_test = lst[1] y_test = lst[3] y_test = y_test[:, np.newaxis] i = 2 my_lr = MyLinearRegression([[1], [1]]) my_lr.fit_(x_train, y_train) y_hat = my_lr.predict_(x_test) print(my_lr.cost_(y_hat, y_test)) while i <= 10: x_ = add_polynomial_features(x_train, i) my_lr = MyLinearRegression(np.ones(i + 1).reshape(-1, 1)) my_lr.fit_(x_, y_train) x_2 = add_polynomial_features(x_test, i) y_hat = my_lr.predict_(x_2) print(my_lr.cost_(y_hat, y_test)) i += 1
from mylinearregression import MyLinearRegression as MyLR from polynomial_model import add_polynomial_features data = pd.read_csv("../../day01/resources/are_blue_pills_magics.csv") X = np.array(data.Micrograms).reshape(-1, 1) Y = np.array(data.Score).reshape(-1, 1) x_train, x_test, y_train, y_test = data_spliter(X, Y, 0.8) x = [] myLR = [] for i in range(0, 4): print("For power {} :".format(i + 2)) x.append(add_polynomial_features(x_train, i + 2)) thetas = np.full((i + 3, 1), 1.0) myLR.append(MyLR(thetas)) alpha = 1 / math.pow(10, (3 + i * 2)) print("alpha = {}".format(alpha)) myLR[i].fit_(x[i], y_train, alpha=alpha, n_cycle=250000) MSE = myLR[i].mse_(x[i], y_train) # print("thetas = {}".format(myLR[i].thetas)) print("mse = {}\n".format(MSE)) continuous_x = np.arange(1, 6.51, 0.01).reshape(-1, 1) x_ = add_polynomial_features(continuous_x, i + 2) y_hat = myLR[i].predict_(x_)
data1 = pd.read_csv("../../day03/resources/solar_system_census.csv") data2 = pd.read_csv("../../day03/resources/solar_system_census_planets.csv") X = np.array(data1[['height', 'weight', 'bone_density']]).reshape(-1,3) Y = np.array(data2.Origin).reshape(-1,1) zipcodes = np.array(data2.Origin.drop_duplicates()) zipcodes = np.sort(zipcodes) # Data Splitting print("Split data (training/test set)\n") x_train, x_test, y_train, y_test = data_spliter(X, Y, 0.8) x_train = add_polynomial_features(x_train, 3) x_test = add_polynomial_features(x_test, 3) # Training print("Train models") thetas = [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.] alpha = 1e-4 n_cycle=100000 lambda_ = 0 models = [] for i in range(0, len(zipcodes)): lambda_ += 0.1 print("For zipcode = {}".format(zipcodes[i]))
csv_data = pd.read_csv("../resources/are_blue_pills_magics.csv") y_n = [] x = np.array(csv_data["Micrograms"]).reshape(-1, 1) x = zscore(x) y = np.array(csv_data["Score"]).reshape(-1, 1) y = zscore(y) temp = data_spliter(x, y, 0.5) x_train = temp[0] x_test = temp[1] y_train = temp[2] y_test = temp[3] print(temp) x2 = add_polynomial_features(x_train, 2) x3 = add_polynomial_features(x_train, 3) x4 = add_polynomial_features(x_train, 4) x5 = add_polynomial_features(x_train, 5) x6 = add_polynomial_features(x_train, 6) x7 = add_polynomial_features(x_train, 7) x8 = add_polynomial_features(x_train, 8) x9 = add_polynomial_features(x_train, 9) x2_test = add_polynomial_features(x_test, 2) x3_test = add_polynomial_features(x_test, 3) x4_test = add_polynomial_features(x_test, 4) x5_test = add_polynomial_features(x_test, 5) x6_test = add_polynomial_features(x_test, 6) x7_test = add_polynomial_features(x_test, 7) x8_test = add_polynomial_features(x_test, 8)
for elem in x: temp += ((elem - mu) * (elem - mu)) var = temp std = math.sqrt(var / (len(x) - 1)) return (x - mu) / std csv_data = pd.read_csv("../resources/are_blue_pills_magics.csv") y_n = [] x = np.array(csv_data["Micrograms"]).reshape(-1, 1) x = minmax(x) y = np.array(csv_data["Score"]).reshape(-1, 1) y = minmax(y) plt.scatter(x, y) x9 = add_polynomial_features(x, 9) # mylr4 = MyLR([[10.0],[-21.0 ], [-0.28], [4.63], [6.73]],alpha=5e-3) mylr9 = MyLR( [[0.99549772], [-3.04228406], [11.0342294], [-12.5192794], [-7.56251887], [4.59267205], [9.57475922], [5.99224473], [-1.55560663], [-7.52630899]], alpha=0.55) mylr9.fit_(x9, y) print(mylr9.cost_(x9, y)) continuous_x = np.arange(0, 1, 0.001).reshape(-1, 1) x_9 = add_polynomial_features(continuous_x, 9) y_hat = mylr9.predict_(x_9) print(mylr9.thetas) # print(x_9) # print(y) plt.plot(continuous_x, y_hat, color='orange')
def continuous_plot(x, y, i, lr): # Build the model: # Plot: ## To get a smooth curve, we need a lot of data points continuous_x = np.arange(1, 7.01, 0.01).reshape(-1, 1) x_ = add_polynomial_features(continuous_x, i) y_hat = lr.predict(x_) print(x.shape, y.shape) plt.scatter(x.T[0], y) plt.plot(continuous_x, y_hat, color='orange') plt.show() cost = [] x = add_polynomial_features(Xpill, 10) big_theta = [[2.03333758e-06], [4.76503382e-06], [1.29939248e-05], [3.79946877e-05], [1.12691614e-04], [3.25797609e-04], [8.76644495e-04], [2.01101984e-03], [3.02151256e-03], [-1.12991082e-03], [9.48325917e-05]] # big_theta_futur = [[ 2.07037841e-06], # [ 4.83925060e-06], # [ 1.31593092e-05], # [ 3.83642999e-05], # [ 1.13422797e-04], # [ 3.26767863e-04], # [ 8.75990025e-04], # [ 2.00179965e-03], # [ 2.99573196e-03], # [-1.12062352e-03],