Example #1
0
def continuous_plot(x, y, i, lr):
    # Build the model:
    # Plot:
    ## To get a smooth curve, we need a lot of data points
    continuous_x = np.arange(1, 7.01, 0.01).reshape(-1, 1)
    x_ = add_polynomial_features(continuous_x, i)
    y_hat = lr.predict(x_)
    print(x.shape, y.shape)
    plt.scatter(x.T[0], y)
    plt.plot(continuous_x, y_hat, color='orange')
    plt.show()
Example #2
0
def main():
    data = pd.read_csv("../resources/are_blue_pills_magics.csv")
    data = data.drop("Patient", axis=1)
    predicting_feature = "Score"
    x = np.array(data.drop(predicting_feature, axis=1))
    y = np.array(data[predicting_feature]).reshape(-1, 1)

    plt.title("cost in function of polynomial's degree")
    plt.xlabel("x degree")
    plt.ylabel("cost")
    plt.grid()

    for i in range(1, 11):
        new_x = add_polynomial_features(x, i)
        compare_polynomials(new_x, y, i)
        if i == 4:
            break

    plt.legend()
    plt.show()
Example #3
0

def add_polynomial_features(x, power):
    temp = x.copy()
    for i in range(2, power + 1):
        temp = np.append(temp, np.power(x, i), axis=1)
    return temp


x = np.arange(1, 11).reshape(-1, 1)
y = np.array([[1.39270298], [3.88237651], [4.37726357], [4.63389049],
              [7.79814439], [6.41717461], [8.63429886], [8.19939795],
              [10.37567392], [10.68238222]])
plt.scatter(x, y)
plt.show()

from polynomial_model import add_polynomial_features
from mylinearregression import MyLinearRegression as MyLR
# Build the model:
x_ = add_polynomial_features(x, 3)
my_lr = MyLR(np.ones(4).reshape(-1, 1))
my_lr.fit_(x_, y)
## To get a smooth curve, we need a lot of data points
continuous_x = np.arange(1, 10.01, 0.01).reshape(-1, 1)
x_ = add_polynomial_features(continuous_x, 3)
y_hat = my_lr.predict_(x_)
plt.scatter(x, y)
# print(my_lr.thetas)
plt.plot(continuous_x, y_hat, color='orange')
plt.show()
Example #4
0
import numpy as np
from polynomial_model import add_polynomial_features

x = np.arange(1, 6).reshape(-1, 1)

# Example 1:
print(add_polynomial_features(x, 3))
# Output:
# array([[  1,   1,   1],
#        [  2,   4,   8],
#        [  3,   9,  27],
#        [  4,  16,  64],
#        [  5,  25, 125]])

print("--------------------------")
# Example 2:
print(add_polynomial_features(x, 6))
# Output:
# array([[    1,     1,     1,     1,     1,     1],
#        [    2,     4,     8,    16,    32,    64],
#        [    3,     9,    27,    81,   243,   729],
#        [    4,    16,    64,   256,  1024,  4096],
#        [    5,    25,   125,   625,  3125, 15625]])
import math
import matplotlib.pyplot as plt

from mylinearregression import MyLinearRegression as MyLR
from polynomial_model import add_polynomial_features

data = pd.read_csv("../../day01/resources/are_blue_pills_magics.csv")

X = np.array(data.Micrograms).reshape(-1, 1)
Y = np.array(data.Score).reshape(-1, 1)

x = []
myLR = []
for i in range(0, 9):
    print("For power {} :".format(i + 2))
    x.append(add_polynomial_features(X, i + 2))
    thetas = np.full((i + 3, 1), 1.0)
    myLR.append(MyLR(thetas))

    alpha = 1 / math.pow(10, 3 + i * 2)
    myLR[i].fit_(x[i], Y, alpha=alpha, n_cycle=250000)

    MSE = myLR[i].mse_(x[i], Y)

    # print("thetas = {}".format(myLR[i].thetas))
    print("mse = {}\n".format(MSE))
    plt.bar(i + 2, MSE, label="power {}".format(i + 2))

plt.legend(prop={'size': 10})
plt.show()
import matplotlib.pyplot as plt
import sys

from my_linear_regression import MyLinearRegression as MLR
sys.path.insert(1, '/Users/elliotcross/Documents/42/python/bootcamp_ml/tools')
from polynomial_model import add_polynomial_features
from normalisation import minmax

data = pd.read_csv("../subjects/day01/resources/are_blue_pills_magics.csv")
x_train = np.array(data["Micrograms"]).reshape(-1, 1)
y_train = np.array(data["Score"]).reshape(-1, 1)

x_test = np.array([5, 4.3, 2, 2, 5, 6, 3.5]).reshape(-1, 1)
y_test = np.array([39, 52, 70, 58, 50, 32, 62]).reshape(-1, 1)

new_train = add_polynomial_features(x_train, 10)
#normalise new_train
for i in range(10):
    new_train[:, i] = minmax(new_train[:, i])

#for plotting of polynomial curves - cotinuous data set over range of original data
#then add polynomial features and normalise
continuous_x = np.arange(1, 7.01, 0.01).reshape(-1, 1)
x_ = add_polynomial_features(continuous_x, 10)
for i in range(10):
    x_[:, i] = minmax(x_[:, i])

thetas = np.ones(11).reshape(-1, 1)

cost_values = []
thetas_list = []
    array2 = n[sep:, :]
    return (array1[:, :-1], array2[:, :-1], array1[:, -1], array2[:, -1])


if __name__ == "__main__":
    data = pd.read_csv("../resources/are_blue_pills_magics.csv")
    x = np.array(data[['Micrograms']])
    y = np.array(data[['Score']])

    lst = data_spliter(x, y, 0.5)
    x_train = lst[0]
    y_train = lst[2]
    y_train = y_train[:, np.newaxis]
    x_test = lst[1]
    y_test = lst[3]
    y_test = y_test[:, np.newaxis]

    i = 2
    my_lr = MyLinearRegression([[1], [1]])
    my_lr.fit_(x_train, y_train)
    y_hat = my_lr.predict_(x_test)
    print(my_lr.cost_(y_hat, y_test))

    while i <= 10:
        x_ = add_polynomial_features(x_train, i)
        my_lr = MyLinearRegression(np.ones(i + 1).reshape(-1, 1))
        my_lr.fit_(x_, y_train)
        x_2 = add_polynomial_features(x_test, i)
        y_hat = my_lr.predict_(x_2)
        print(my_lr.cost_(y_hat, y_test))
        i += 1
from mylinearregression import MyLinearRegression as MyLR
from polynomial_model import add_polynomial_features

data = pd.read_csv("../../day01/resources/are_blue_pills_magics.csv")

X = np.array(data.Micrograms).reshape(-1, 1)
Y = np.array(data.Score).reshape(-1, 1)

x_train, x_test, y_train, y_test = data_spliter(X, Y, 0.8)

x = []
myLR = []
for i in range(0, 4):
    print("For power {} :".format(i + 2))
    x.append(add_polynomial_features(x_train, i + 2))
    thetas = np.full((i + 3, 1), 1.0)
    myLR.append(MyLR(thetas))

    alpha = 1 / math.pow(10, (3 + i * 2))
    print("alpha = {}".format(alpha))

    myLR[i].fit_(x[i], y_train, alpha=alpha, n_cycle=250000)
    MSE = myLR[i].mse_(x[i], y_train)

    # print("thetas = {}".format(myLR[i].thetas))
    print("mse = {}\n".format(MSE))

    continuous_x = np.arange(1, 6.51, 0.01).reshape(-1, 1)
    x_ = add_polynomial_features(continuous_x, i + 2)
    y_hat = myLR[i].predict_(x_)
Example #9
0
data1 = pd.read_csv("../../day03/resources/solar_system_census.csv")
data2 = pd.read_csv("../../day03/resources/solar_system_census_planets.csv")

X = np.array(data1[['height', 'weight', 'bone_density']]).reshape(-1,3)
Y = np.array(data2.Origin).reshape(-1,1)

zipcodes = np.array(data2.Origin.drop_duplicates())
zipcodes = np.sort(zipcodes)

# Data Splitting
print("Split data (training/test set)\n")

x_train, x_test, y_train, y_test = data_spliter(X, Y, 0.8)

x_train = add_polynomial_features(x_train, 3)
x_test = add_polynomial_features(x_test, 3)

# Training
print("Train models")

thetas = [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]
alpha = 1e-4
n_cycle=100000
lambda_ = 0

models = []
for i in range(0, len(zipcodes)):
    lambda_ += 0.1

    print("For zipcode = {}".format(zipcodes[i]))
Example #10
0
csv_data = pd.read_csv("../resources/are_blue_pills_magics.csv")
y_n = []
x = np.array(csv_data["Micrograms"]).reshape(-1, 1)
x = zscore(x)

y = np.array(csv_data["Score"]).reshape(-1, 1)
y = zscore(y)

temp = data_spliter(x, y, 0.5)
x_train = temp[0]
x_test = temp[1]
y_train = temp[2]
y_test = temp[3]
print(temp)
x2 = add_polynomial_features(x_train, 2)
x3 = add_polynomial_features(x_train, 3)
x4 = add_polynomial_features(x_train, 4)
x5 = add_polynomial_features(x_train, 5)
x6 = add_polynomial_features(x_train, 6)
x7 = add_polynomial_features(x_train, 7)
x8 = add_polynomial_features(x_train, 8)
x9 = add_polynomial_features(x_train, 9)

x2_test = add_polynomial_features(x_test, 2)
x3_test = add_polynomial_features(x_test, 3)
x4_test = add_polynomial_features(x_test, 4)
x5_test = add_polynomial_features(x_test, 5)
x6_test = add_polynomial_features(x_test, 6)
x7_test = add_polynomial_features(x_test, 7)
x8_test = add_polynomial_features(x_test, 8)
    for elem in x:
        temp += ((elem - mu) * (elem - mu))
    var = temp
    std = math.sqrt(var / (len(x) - 1))
    return (x - mu) / std


csv_data = pd.read_csv("../resources/are_blue_pills_magics.csv")
y_n = []
x = np.array(csv_data["Micrograms"]).reshape(-1, 1)
x = minmax(x)

y = np.array(csv_data["Score"]).reshape(-1, 1)
y = minmax(y)
plt.scatter(x, y)
x9 = add_polynomial_features(x, 9)
# mylr4 = MyLR([[10.0],[-21.0 ], [-0.28], [4.63], [6.73]],alpha=5e-3)
mylr9 = MyLR(
    [[0.99549772], [-3.04228406], [11.0342294], [-12.5192794], [-7.56251887],
     [4.59267205], [9.57475922], [5.99224473], [-1.55560663], [-7.52630899]],
    alpha=0.55)
mylr9.fit_(x9, y)
print(mylr9.cost_(x9, y))

continuous_x = np.arange(0, 1, 0.001).reshape(-1, 1)
x_9 = add_polynomial_features(continuous_x, 9)
y_hat = mylr9.predict_(x_9)
print(mylr9.thetas)
# print(x_9)
# print(y)
plt.plot(continuous_x, y_hat, color='orange')
Example #12
0
def continuous_plot(x, y, i, lr):
    # Build the model:
    # Plot:
    ## To get a smooth curve, we need a lot of data points
    continuous_x = np.arange(1, 7.01, 0.01).reshape(-1, 1)
    x_ = add_polynomial_features(continuous_x, i)
    y_hat = lr.predict(x_)
    print(x.shape, y.shape)
    plt.scatter(x.T[0], y)
    plt.plot(continuous_x, y_hat, color='orange')
    plt.show()


cost = []

x = add_polynomial_features(Xpill, 10)
big_theta = [[2.03333758e-06], [4.76503382e-06], [1.29939248e-05],
             [3.79946877e-05], [1.12691614e-04], [3.25797609e-04],
             [8.76644495e-04], [2.01101984e-03], [3.02151256e-03],
             [-1.12991082e-03], [9.48325917e-05]]

# big_theta_futur = [[ 2.07037841e-06],
#  [ 4.83925060e-06],
#  [ 1.31593092e-05],
#  [ 3.83642999e-05],
#  [ 1.13422797e-04],
#  [ 3.26767863e-04],
#  [ 8.75990025e-04],
#  [ 2.00179965e-03],
#  [ 2.99573196e-03],
#  [-1.12062352e-03],