Esempio n. 1
0
def process(data, learning_rate, num_iter, degree, Islasso, Isdegree):
    # Training data part
    (training_data, validation_data, testing_data,
     x_shuffled) = pre_processing.train_test_split(data, degree)
    x = training_data[:, :-1]
    y_org = training_data[:, -1]
    y_org = np.reshape(y_org, (x.shape[0], 1))
    lambdas = []
    betas = []
    error_in_train = 0
    temp = []
    error_in_validation = 0
    error_in_test = 0
    for j in range(0, 10):
        lam = random.random()
        lambdas.append(lam)
        beta = gd(x, y_org, learning_rate, num_iter, lam, Islasso, Isdegree)
        betas.append(beta)

    x_validate = validation_data[:, :-1]
    y_validate_org = validation_data[:, -1]
    for beta in betas:
        y_validate = x_validate.dot(beta)
        temp.append(error(y_validate_org, y_validate))
    min_index = temp.index(min(temp))
    min_lambda = lambdas[min_index]
    min_beta = betas[min_index]

    error_in_train = error(x.dot(min_beta), y_org)
    error_in_validation = error(x_validate.dot(min_beta), y_validate_org)
    # Test data part
    x_test = testing_data[:, :-1]
    y_test = testing_data[:, -1]
    error_in_test = error(x_test.dot(min_beta), y_test)
    return min_lambda, min_beta, error_in_train, error_in_validation, error_in_test
Esempio n. 2
0
def process(data, learning_rate, num_iter,degree,Islasso,Isdegree):
    # Training data part
    
    (training_data,validation_data,testing_data,x_shuffled)=pre_processing.train_test_split(data,degree)

    x=training_data[:,:-1]
    y_org=training_data[:,-1]
    y_org=np.reshape(y_org,(x.shape[0],1))
    lambdas=[]
    betas=[]
    error_in_train=0
    temp=[]
    error_in_validation=0
    error_in_test=0
    for j in range(0,10):
        lam=random.random()
        lambdas.append(lam)
        beta = gd(x, y_org, learning_rate, num_iter,lam,Islasso,Isdegree)
        betas.append(beta)
        
    x_validate=validation_data[:,:-1]
    y_validate_org=validation_data[:,-1]
    for beta in betas:
        y_validate=x_validate.dot(beta)
        temp.append(error(y_validate_org,y_validate))
    min_index=temp.index(min(temp))
    min_lambda=lambdas[min_index]
    min_beta=betas[min_index]
    
    error_in_train=error(x.dot(min_beta),y_org)
    error_in_validation=error(x_validate.dot(min_beta),y_validate_org)
    # Test data part
    x_test=testing_data[:,:-1]
    y_test=testing_data[:,-1]
    error_in_test=error(x_test.dot(min_beta),y_test)

    minval=np.amin(data[:,-1])     
    maxval=np.amax(data[:,-1])
    z = x.dot(min_beta)
    z= minval + z*(maxval-minval) 
    # Creating figure
    fig = plt.figure()
    ax = fig.gca(projection ="3d")
    xx = x_shuffled[:,0]
    yy = x_shuffled[:,1]

    
    z = np.asarray(z).squeeze()
    ax.plot_trisurf(xx, yy,  z, edgecolor='none', cmap='viridis')
    # ax.scatter3D(xx, yy,  y_org, color = 'violet')
    plt.title("3D surface plot for degree " + str(degree))
    # show plot
    plt.xlabel("Age")
    plt.ylabel("BMI")
    ax.set_zlabel('Predicted Insurance Cost')
    plt.show()
    plt.close()

    return min_lambda,min_beta,error_in_train,error_in_validation,error_in_test
Esempio n. 3
0
def process(data, learning_rate, num_iter, isPlot):
    # Training data part
    (training_data, testing_data) = pre_processing.train_test_split(data)
    x = training_data[:, 0:3]
    y_train_org = training_data[:, 3]
    ones = np.ones((x.shape[0], 1))
    x = np.append(ones, x, axis=1)
    y_train_org = np.reshape(y_train_org, (x.shape[0], 1))

    beta = sgd(x, y_train_org, learning_rate, num_iter, isPlot)

    y_train = x.dot(beta)
    error_in_train = error(y_train, y_train_org)

    # Test data part
    x_test = testing_data[:, 0:3]
    y_test_org = testing_data[:, 3]
    ones = np.ones((x_test.shape[0], 1))
    x_test = np.append(ones, x_test, axis=1)
    y_test = x_test.dot(beta)
    error_in_test = error(y_test_org, y_test)

    return (error_in_train, error_in_test)
Esempio n. 4
0
import numpy as np
import pre_processing
from numpy.linalg import inv
from matplotlib import pyplot


def error(y, yi):
    return (np.mean((y - yi)**2))**(1 / 2)


data = pd.read_csv("insurance.txt").to_numpy()
data = pre_processing.standardization(data)
all_errors_train = []
all_errors_test = []
for i in range(0, 20):
    (training_data, testing_data) = pre_processing.train_test_split(data)
    x = training_data[:, 0:3]
    y_org = training_data[:, 3]
    y_org = np.reshape(y_org, (len(y_org), 1))
    ones = np.ones((x.shape[0], 1))
    x = np.append(ones, x, axis=1)
    beta = inv(x.T.dot(x)).dot(x.T).dot(y_org)
    y_train = x.dot(beta)
    all_errors_train.append(error(y_org, y_train))
    y_org_test = testing_data[:, 3]
    x_test = testing_data[:, 0:3]
    ones = np.ones((x_test.shape[0], 1))
    x_test = np.append(ones, x_test, axis=1)
    y_test = x_test.dot(beta)
    all_errors_test.append(error(y_org_test, y_test))
Esempio n. 5
0
#----------------------------------------PEARSON-----------------------------------------------------
pearson_matrix = ps.pearson_correlation(wind_turbine)
result = ps.transform_value(pearson_matrix)
result = result.astype(float)
#gr.seaborn_pearson_plot(result)
#----------------------------------------PEARSON-----------------------------------------------------

#-----------------------------------FILLNaNVALUES AND SELECT TARGET----------------------------------
wind_turbine = pre.fill_NaN_values(wind_turbine.copy())
y_train = pre.select_y(wind_turbine)
wind_turbine = wind_turbine.drop('Rbt_avg', axis=1)
#-----------------------------------FILLNaNVALUES AND SELECT TARGET----------------------------------

#-----------------------------------SPLIT AND STANDARIZE DATA ---------------------------------------
x_train, x_test, y_train, y_test = pre.train_test_split(wind_turbine, y_train)
x_train = pre.standardize(x_train)
x_test = pre.standardize(x_test)
#-----------------------------------SPLIT AND STANDARIZE DATA ---------------------------------------
#-----------------------------------PLOT ORIGINAL AND STANDAR DATA ---------------------------------
ba_avg_data_graph = wind_turbine.Ba_avg.head(300)

gr.linear_graph_unique(ba_avg_data_graph)
one_column_select = x_train[0:300, 0:1]
gr.linear_graph_unique(
    one_column_select
)  #[start_row_index:end_row_index, start_column_index:end_column_index]

one_column_select = pre.remove_noise(one_column_select.copy())

display(one_column_select)