def load_data(num_train=50000, num_test=10000, num_val=10000, num_dev=5000): """ - X (N, D) - y (N, ) """ X_train = fetch_traingset()['images'][0: num_train] X_train = np.array(X_train).reshape(num_train, -1) y_train = fetch_traingset()['labels'][0:num_train] y_train = np.array(y_train) X_val = fetch_traingset()['images'][num_train: num_train+num_val] X_val = np.array(X_val).reshape(num_val, -1) y_val = fetch_traingset()['labels'][num_train: num_train+num_val] y_val = np.array(y_val) X_test = fetch_testingset()['images'][0: num_test] X_test = np.array(X_test).reshape(num_test, -1) y_test = fetch_testingset()['labels'][0:num_test] y_test = np.array(y_test) X_dev = fetch_traingset()['images'][0:num_dev] X_dev = np.array(X_dev).reshape(num_dev, -1) y_dev = fetch_traingset()['labels'][0:num_dev] y_dev = np.array(y_dev) #print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) # data normalization mean_img = np.mean(X_train, axis=0) #std_img = np.std(X_train, axis=0) X_train -= mean_img X_test -= mean_img X_val -= mean_img X_dev -= mean_img return X_train, y_train, X_test, y_test, X_val, y_val, X_dev, y_dev
def predict_process(setting, filename): trainset = mnist.fetch_testingset() train_images = trainset['images'] num_images = len(train_images) for i in range(num_images): yield { 'image': train_images[i], }
def reader(): if filename == 'train': dataset = mnist_data.fetch_traingset() else: dataset = mnist_data.fetch_testingset() for i in range(n): data = np.array(dataset['images'][i]) data = np.reshape(data, (28, 28)) yield data, dataset['labels'][i]
def process(settings, filename): if filename == 'train': dataset = mnist.fetch_traingset() else: dataset = mnist.fetch_testingset() train_images = dataset['images'] train_labels = dataset['labels'] num_images = len(train_images) for i in range(num_images): yield {'image': train_images[i], 'label': int(train_labels[i])}
idx = np.arange(X_train.shape[0]) np.random.seed(0) np.random.shuffle(idx) X_train = X_train[idx] y_train = y_train[idx] # standardize #mean = std = idx #print(mean.shape) #mean = X_train[idx].mean(axis=0) #std = X_train[idx].std(axis=0) #X_train[idx] = (X_train[idx] - mean[idx]) / std[idx] #print(X_train.shape, y_train.shape) dataset = mnist_data.fetch_testingset() X_test, y_test = dataset['images'][:], dataset['labels'][:] # shuffle X_test = np.array(X_test) X_test = X_test.reshape(10000, 784) idx = np.arange(X_test.shape[0]) np.random.seed(0) np.random.shuffle(idx) y_test = np.array(y_test) y_test = y_test.reshape(10000, 1) X_test = X_test[idx] y_test = y_test[idx] # standardize #mean = std = idx