def data_loader(status, shuffle=False, validation=False, num_workers=2): prepare_data = Preprocessing() if DATA_TYPE == 'original': data_X, data_y = prepare_data.original(status) elif DATA_TYPE == 'features': data_X, data_y = prepare_data.features(status) elif DATA_TYPE == 'trans': data_X, data_y = prepare_data.trans(status) data = DealDataset(data_X, data_y) size = data.len if validation: train, dev = random_split(data, [int(size*SPLIT_RATE), size-int(size*SPLIT_RATE)]) train, dev = DealDataset(train[:][0],train[:][1]), DealDataset(dev[:][0],dev[:][1]) train_loader = DataLoader(dataset=train, batch_size=BATCH_SIZE, shuffle=shuffle, num_workers=num_workers) dev_loader = DataLoader(dataset=dev, batch_size=BATCH_SIZE, shuffle=shuffle, num_workers=num_workers) return train_loader, dev_loader else: loader = DataLoader(dataset=data, batch_size=BATCH_SIZE, shuffle=shuffle, num_workers=num_workers) return loader
import pandas as pd import numpy as np from utils.Preprocessing import * def load_data(file_path): data = pd.read_csv(file_path, sep="\s+") return data # X = load_data("../data/train/X_train.txt") # y = load_data("../data/train/y_train.txt") # y = np.asarray(y.values) # actionA = X.iloc[np.argwhere(y==5)[:,0]] # print(len(actionA)) a = Preprocessing() X, Y = a.trans('train') print(X.shape) X, Y = a.trans('test') print(X.shape)