def process(data, learning_rate, num_iter, degree, Islasso, Isdegree): # Training data part (training_data, validation_data, testing_data, x_shuffled) = pre_processing.train_test_split(data, degree) x = training_data[:, :-1] y_org = training_data[:, -1] y_org = np.reshape(y_org, (x.shape[0], 1)) lambdas = [] betas = [] error_in_train = 0 temp = [] error_in_validation = 0 error_in_test = 0 for j in range(0, 10): lam = random.random() lambdas.append(lam) beta = gd(x, y_org, learning_rate, num_iter, lam, Islasso, Isdegree) betas.append(beta) x_validate = validation_data[:, :-1] y_validate_org = validation_data[:, -1] for beta in betas: y_validate = x_validate.dot(beta) temp.append(error(y_validate_org, y_validate)) min_index = temp.index(min(temp)) min_lambda = lambdas[min_index] min_beta = betas[min_index] error_in_train = error(x.dot(min_beta), y_org) error_in_validation = error(x_validate.dot(min_beta), y_validate_org) # Test data part x_test = testing_data[:, :-1] y_test = testing_data[:, -1] error_in_test = error(x_test.dot(min_beta), y_test) return min_lambda, min_beta, error_in_train, error_in_validation, error_in_test
def process(data, learning_rate, num_iter,degree,Islasso,Isdegree): # Training data part (training_data,validation_data,testing_data,x_shuffled)=pre_processing.train_test_split(data,degree) x=training_data[:,:-1] y_org=training_data[:,-1] y_org=np.reshape(y_org,(x.shape[0],1)) lambdas=[] betas=[] error_in_train=0 temp=[] error_in_validation=0 error_in_test=0 for j in range(0,10): lam=random.random() lambdas.append(lam) beta = gd(x, y_org, learning_rate, num_iter,lam,Islasso,Isdegree) betas.append(beta) x_validate=validation_data[:,:-1] y_validate_org=validation_data[:,-1] for beta in betas: y_validate=x_validate.dot(beta) temp.append(error(y_validate_org,y_validate)) min_index=temp.index(min(temp)) min_lambda=lambdas[min_index] min_beta=betas[min_index] error_in_train=error(x.dot(min_beta),y_org) error_in_validation=error(x_validate.dot(min_beta),y_validate_org) # Test data part x_test=testing_data[:,:-1] y_test=testing_data[:,-1] error_in_test=error(x_test.dot(min_beta),y_test) minval=np.amin(data[:,-1]) maxval=np.amax(data[:,-1]) z = x.dot(min_beta) z= minval + z*(maxval-minval) # Creating figure fig = plt.figure() ax = fig.gca(projection ="3d") xx = x_shuffled[:,0] yy = x_shuffled[:,1] z = np.asarray(z).squeeze() ax.plot_trisurf(xx, yy, z, edgecolor='none', cmap='viridis') # ax.scatter3D(xx, yy, y_org, color = 'violet') plt.title("3D surface plot for degree " + str(degree)) # show plot plt.xlabel("Age") plt.ylabel("BMI") ax.set_zlabel('Predicted Insurance Cost') plt.show() plt.close() return min_lambda,min_beta,error_in_train,error_in_validation,error_in_test
def process(data, learning_rate, num_iter, isPlot): # Training data part (training_data, testing_data) = pre_processing.train_test_split(data) x = training_data[:, 0:3] y_train_org = training_data[:, 3] ones = np.ones((x.shape[0], 1)) x = np.append(ones, x, axis=1) y_train_org = np.reshape(y_train_org, (x.shape[0], 1)) beta = sgd(x, y_train_org, learning_rate, num_iter, isPlot) y_train = x.dot(beta) error_in_train = error(y_train, y_train_org) # Test data part x_test = testing_data[:, 0:3] y_test_org = testing_data[:, 3] ones = np.ones((x_test.shape[0], 1)) x_test = np.append(ones, x_test, axis=1) y_test = x_test.dot(beta) error_in_test = error(y_test_org, y_test) return (error_in_train, error_in_test)
import numpy as np import pre_processing from numpy.linalg import inv from matplotlib import pyplot def error(y, yi): return (np.mean((y - yi)**2))**(1 / 2) data = pd.read_csv("insurance.txt").to_numpy() data = pre_processing.standardization(data) all_errors_train = [] all_errors_test = [] for i in range(0, 20): (training_data, testing_data) = pre_processing.train_test_split(data) x = training_data[:, 0:3] y_org = training_data[:, 3] y_org = np.reshape(y_org, (len(y_org), 1)) ones = np.ones((x.shape[0], 1)) x = np.append(ones, x, axis=1) beta = inv(x.T.dot(x)).dot(x.T).dot(y_org) y_train = x.dot(beta) all_errors_train.append(error(y_org, y_train)) y_org_test = testing_data[:, 3] x_test = testing_data[:, 0:3] ones = np.ones((x_test.shape[0], 1)) x_test = np.append(ones, x_test, axis=1) y_test = x_test.dot(beta) all_errors_test.append(error(y_org_test, y_test))
#----------------------------------------PEARSON----------------------------------------------------- pearson_matrix = ps.pearson_correlation(wind_turbine) result = ps.transform_value(pearson_matrix) result = result.astype(float) #gr.seaborn_pearson_plot(result) #----------------------------------------PEARSON----------------------------------------------------- #-----------------------------------FILLNaNVALUES AND SELECT TARGET---------------------------------- wind_turbine = pre.fill_NaN_values(wind_turbine.copy()) y_train = pre.select_y(wind_turbine) wind_turbine = wind_turbine.drop('Rbt_avg', axis=1) #-----------------------------------FILLNaNVALUES AND SELECT TARGET---------------------------------- #-----------------------------------SPLIT AND STANDARIZE DATA --------------------------------------- x_train, x_test, y_train, y_test = pre.train_test_split(wind_turbine, y_train) x_train = pre.standardize(x_train) x_test = pre.standardize(x_test) #-----------------------------------SPLIT AND STANDARIZE DATA --------------------------------------- #-----------------------------------PLOT ORIGINAL AND STANDAR DATA --------------------------------- ba_avg_data_graph = wind_turbine.Ba_avg.head(300) gr.linear_graph_unique(ba_avg_data_graph) one_column_select = x_train[0:300, 0:1] gr.linear_graph_unique( one_column_select ) #[start_row_index:end_row_index, start_column_index:end_column_index] one_column_select = pre.remove_noise(one_column_select.copy()) display(one_column_select)