def data_processing_linear_regression_with_mapping(filename, power): white = pd.read_csv(filename, low_memory=False, sep=';').values [N, d] = white.shape maped_X = mapping_data(white[:, :-1], power) print("MX DiM:", maped_X.shape) white = np.insert(maped_X, maped_X.shape[1], white[:, -1], axis=1) print("White Dim:", white.shape) np.random.seed(3) # prepare data ridx = np.random.permutation(N) ntr = int(np.round(N * 0.8)) nval = int(np.round(N * 0.1)) ntest = N - ntr - nval # spliting training, validation, and test Xtrain = np.hstack([np.ones([ntr, 1]), white[ridx[0:ntr], 0:-1]]) ytrain = white[ridx[0:ntr], -1] Xval = np.hstack([np.ones([nval, 1]), white[ridx[ntr:ntr + nval], 0:-1]]) yval = white[ridx[ntr:ntr + nval], -1] Xtest = np.hstack([np.ones([ntest, 1]), white[ridx[ntr + nval:], 0:-1]]) ytest = white[ridx[ntr + nval:], -1] return Xtrain, ytrain, Xval, yval, Xtest, ytest
def data_processing_linear_regression(filename, non_invertible, mapping, mapping_power): white = pd.read_csv(filename, low_memory=False, sep=';').values [N, d] = white.shape if (mapping == True): maped_X = mapping_data(white[:, :-1], mapping_power) white = np.insert(maped_X, maped_X.shape[1], white[:, -1], axis=1) np.random.seed(3) # prepare data ridx = np.random.permutation(N) ntr = int(np.round(N * 0.8)) nval = int(np.round(N * 0.1)) ntest = N - ntr - nval # spliting training, validation, and test Xtrain = np.hstack([np.ones([ntr, 1]), white[ridx[0:ntr], 0:-1]]) ytrain = white[ridx[0:ntr], -1] Xval = np.hstack([np.ones([nval, 1]), white[ridx[ntr:ntr + nval], 0:-1]]) yval = white[ridx[ntr:ntr + nval], -1] Xtest = np.hstack([np.ones([ntest, 1]), white[ridx[ntr + nval:], 0:-1]]) # a = 0 # for i in Xval: # for j in Xtrain: # if (i == j).all(): # a +=1 # break # print(len(Xtrain),len(Xval)) # print(a) ytest = white[ridx[ntr + nval:], -1] if (non_invertible == True): N, D = Xtrain.shape np.random.seed(4) random_row = np.random.randint(N) random_col = np.random.randint(D) Xtrain[:, random_col] = 0 Xtrain[random_row, :] = 0 return Xtrain, ytrain, Xval, yval, Xtest, ytest return Xtrain, ytrain, Xval, yval, Xtest, ytest
from data_loader import data_processing_linear_regression from linear_regression import linear_regression_invertible import numpy as np filename = 'winequality-white.csv' # Xtrain, ytrain, Xval, yval, Xtest, ytest = data_processing_linear_regression(filename, True, False, 0) # w = linear_regression_invertible(Xtrain, ytrain) # print('w is ', w) from linear_regression import mapping_data print(mapping_data(np.array([[1,2,3],[2,2,1]]), 3))
from linear_regression import mapping_data import numpy as np X = [1, 2, 3], [4, 5, 6] X = np.array(X) print(X) mapped_x = mapping_data(X, 2) print(mapped_x)
# obtain the index of the last element end_idx = len(sample) # print(end_idx) # add that to the end of the original row sample = np.insert(sample, end_idx, sample_power_i) # print(sample.tolist()) # modify X mapped_X[index] = sample return np.asarray(mapped_X) X=[[1,2,1],[2,1,2],[3,1,1]] # print(X) power = 3 Xtrain, ytrain, Xval, yval, Xtest, ytest = data_processing_linear_regression(filename, False, True, power) print(mapping_data(Xtrain,1).shape) print(mapping_data(Xtrain,power).shape) print(mapping_data(X,1)) print(mapping_data(X,power)) # print(Y) # Z = [] # for idx, row in enumerate(Y): # c = [item for pair in zip(row, X3[idx]) for item in pair] # Z.append(c) # print(Z)
from linear_regression import mapping_data import json import numpy as np import pandas as pd white = pd.read_csv('winequality-white', low_memory=False, sep=';').values [N, d] = white.shape if (mapping == True): maped_X = mapping_data(white[:, :-1], mapping_power) white = np.insert(maped_X, maped_X.shape[1], white[:, -1], axis=1) np.random.seed(3) # prepare data ridx = np.random.permutation(N) ntr = int(np.round(N * 0.8)) nval = int(np.round(N * 0.1)) ntest = N - ntr - nval # spliting training, validation, and test Xtrain = np.hstack([np.ones([ntr, 1]), white[ridx[0:ntr], 0:-1]]) ytrain = white[ridx[0:ntr], -1] Xval = np.hstack([np.ones([nval, 1]), white[ridx[ntr:ntr + nval], 0:-1]]) yval = white[ridx[ntr:ntr + nval], -1] Xtest = np.hstack([np.ones([ntest, 1]), white[ridx[ntr + nval:], 0:-1]]) ytest = white[ridx[ntr + nval:], -1]