def process_data(X, y=None, test_size=0.20, dummies=False): if y is None: y = np.ones(X.shape[0]) len_ = X.shape[0] X = prepare_dataset(X) if dummies: y = pd.get_dummies(y).values shape_ = list(X.shape[1:]) X_train, X_test, y_train, y_test = train_test_split(X.flatten().reshape( len_, -1), y, test_size=test_size, random_state=4891) X_train = X_train.reshape([X_train.shape[0]] + shape_) X_test = X_test.reshape([X_test.shape[0]] + shape_) print('Training dataset shape: ', X_train.shape) print('Validation dataset shape: ', X_test.shape) train_dataset = Dataset(X_train, y_train) test_dataset = Dataset(X_test, y_test) return train_dataset, test_dataset
def process_data(X, y=None, test_size=0.20, dummies=False): if y is None: y = da.ones(X.shape[0]) len_ = X.shape[0] X = prepare_dataset(X) if dummies: y = dd.get_dummies(y) shape_ = list(X.shape[1:]) X_train, X_test, y_train, y_test = train_test_split(X.flatten().reshape(len_,-1), y, test_size=test_size, random_state=4891) X_train = X_train.reshape([X_train.shape[0]]+shape_) X_test = X_test.reshape([X_test.shape[0]]+shape_) print('Training dataset shape: ', X_train.shape) print('Validation dataset shape: ', X_test.shape) train_dataset = Dataset(X_train, y_train) test_dataset = Dataset(X_test, y_test) samples = list() for _ in range(10): for y_uniq in da.unique(train_dataset.labels): samples.append(train_dataset.x[train_dataset.labels==y_uniq][random.randint(0,len(train_dataset.x[train_dataset.labels==y_uniq])-1)]) train_dataset.samples = da.array(samples) return train_dataset, test_dataset
def load_MNIST(): data_path = '../data/MNIST_data' data = input_data.read_data_sets(data_path, one_hot=False) x_train_aux = data.train.images x_test = data.test.images data_dim = data.train.images.shape[1] n_train = data.train.images.shape[0] train_size = int(n_train * 0.8) valid_size = n_train - train_size x_valid, x_train = merge_datasets(x_train_aux, data_dim, train_size, valid_size) print('Data loaded. ', time.localtime().tm_hour, ':', time.localtime().tm_min, 'h') # logs.write('\tData loaded ' + str(time.localtime().tm_hour) +':' + str(time.localtime().tm_min) + 'h\n') x_train = np.reshape(x_train, [-1, 28, 28, 1]) x_valid = np.reshape(x_valid, [-1, 28, 28, 1]) x_test = np.reshape(x_test, [-1, 28, 28, 1]) train_dataset = Dataset(x_train, data.train.labels) valid_dataset = Dataset(x_valid, data.train.labels) test_dataset = Dataset(x_test, data.test.labels) print('Train Data: ', train_dataset.x.shape) print('Valid Data: ', valid_dataset.x.shape) print('Test Data: ', test_dataset.x.shape) return train_dataset, valid_dataset, test_dataset
def process_data(X, y, test_size=None): if test_size is None: test_size = 0.20 len_ = X.shape[0] X_train, X_test, y_train, y_test = train_test_split(X.flatten().reshape( len_, -1), y, test_size=test_size, random_state=4891) print(X_train.shape, X_test.shape) d = int(np.sqrt(X_train.shape[1] / 3)) X_train = np.reshape(X_train, [-1, d, d, 3]) X_test = np.reshape(X_test, [-1, d, d, 3]) train_dataset = Dataset(X_train, y_train) test_dataset = Dataset(X_test, y_test) return train_dataset, test_dataset
def load_FREY(): data_path = '../data/frey_rawface.mat' mat = loadmat(data_path) data = mat['ff'] data = np.transpose(data) # [num_images, dimension] data = np.array(data, dtype=np.float32) for i in range(data.shape[0]): min_value = np.min(data[i, :]) max_value = np.max(data[i, :]) num = (data[i, :] - min_value) den = (max_value - min_value) data[i, :] = num / den data_dim = data.shape[1] num_images = data.shape[0] train_size = int(num_images * 0.8) valid_size = int(num_images * 0.1) test_size = num_images - train_size - valid_size x_train = data[:train_size] x_valid = data[train_size:(train_size + valid_size)] x_test = data[(train_size + valid_size):] x_train = np.reshape(x_train, [-1, 28, 20, 1]) x_valid = np.reshape(x_valid, [-1, 28, 20, 1]) x_test = np.reshape(x_test, [-1, 28, 20, 1]) x_train_labels = np.zeros(x_train.shape[0]) x_valid_labels = np.zeros(x_valid.shape[0]) x_test_labels = np.zeros(x_test.shape[0]) train_dataset = Dataset(x_train, x_train_labels) valid_dataset = Dataset(x_valid, x_valid_labels) test_dataset = Dataset(x_test, x_test_labels) print('Train Data: ', train_dataset.x.shape) print('Valid Data: ', valid_dataset.x.shape) print('Test Data: ', test_dataset.x.shape) return train_dataset, valid_dataset, test_dataset
def prepare_dataset(X, y=None): if y is None: y = np.zeros(X.shape[0]) return Dataset(X, y)