def get_data(N, N_test): #load MNIST data using libraries available training_data, training_labels = load_mnist('training') test_data, test_labels = load_mnist('testing') training_data = flatArray(N, 784, training_data) #training_data is N x 784 matrix training_labels = training_labels[:N] test_data = flatArray(N_test, 784, test_data) test_labels = test_labels[:N_test] # adding column of 1s for bias #training_data = addOnesColAtStart(training_data) #test_data = addOnesColAtStart(test_data) # Last 10% of training data size will be considered as the validation set N_validation = int(N / 10) validation_data = training_data[N - N_validation:N] validation_labels = training_labels[N - N_validation:N] N = N - N_validation #update training data to remove validation data training_data = training_data[:N] training_labels = training_labels[:N] return training_data, training_labels, test_data, test_labels, validation_data, validation_labels
def get_data(N=60000, N_test=10000, validationReqd=True): # Load MNIST data using libraries available training_data, training_labels = load_mnist('training') test_data, test_labels = load_mnist('testing') # Training_data is N x 784 matrix training_data = flattenAndNormalize(N, 784, training_data) training_labels = training_labels[:N] test_data = flattenAndNormalize(N_test, 784, test_data) test_labels = test_labels[:N_test] # Adding column of 1s for bias training_data = addOnesColAtStart(training_data) test_data = addOnesColAtStart(test_data) if (validationReqd): # Last 10% of training data size will be considered as the validation set N_validation = int(N / 6.0) validation_data = training_data[N - N_validation:N] validation_labels = training_labels[N - N_validation:N] N = N - N_validation else: validation_data = [] validation_labels = [] #update training data to remove validation data training_data = training_data[:N] training_labels = training_labels[:N] return training_data, training_labels, test_data, test_labels, validation_data, validation_labels
def get_data(split_percentage): X_images, X_labels = load_mnist("training", np.arange(10), ".") y_train = np.array(X_labels) X_train = np.reshape(X_images, (60000, 784)) X_images, X_labels = load_mnist("testing", np.arange(10), ".") y_test = np.array(X_labels) X_test = np.reshape(X_images, (10000, 784)) print X_train.shape, X_test.shape, y_train.shape, y_test.shape X_train = X_train / 255. X_test = X_test / 255. X = np.vstack((X_train, X_test)) y = np.concatenate((y_train, y_test)) #test_image=np.reshape(X_images[30],(28,28)) #plt.imshow(test_image,cmap='gray') #plt.show() #mnist = fetch_mldata("MNIST original") #num_train_rows=split_percentage*70000/100 #num_test_rows=70000-num_train_rows #X_train, X_test = X[:num_train_rows], X[num_train_rows:] #y_train, y_test = y[:num_train_rows], y[num_train_rows:] #return X_train,X_test,y_train,y_test return X, y