#changing this number allows to test the code with a smaller number of training images ##!! THIS SCRIPT WITH ntrain = 50000 REQUIRES AROUND 10 GB OF RAM !!## ntrain = 50000 print ("Reading the training data...") sys.stdout.flush() TRAIN_DIR = "train/" TEST_DIR = "test/" # use HOG as a list of features # reading in the data. This takes a while train_imgs = utils.read_folder(TRAIN_DIR, 0, ntrain, flatten = False) print ("\nDone!") sys.stdout.flush() print ("Getting HOG3 of the data...") sys.stdout.flush() # also takes a while X = utils.getHOG3(train_imgs) print ("\nDone!") sys.stdout.flush() X = np.insert(X, 0, 1.0, axis = 1) theta = np.random.randn(X.shape[1], 10) * 0.0001 y = utils.read_labels('trainLabels.csv', 0, ntrain) best_val = -1 best_softmax = None X_train, X_val, y_train, y_val = cross_validation.train_test_split(X, y, test_size = 0.1)
import numpy as np import sys import utils TEST_DIR = "test/" batch = 50 for j in range(0, 6): print "\nPart ", j + 1, "of 6" sys.stdout.flush() test_imgs = utils.read_folder(TEST_DIR, j * batch, (j + 1) * batch, flatten = False) name = "tbatch_" + str(j) #np.save(name, np.around(test_imgs, 2)) np.savez_compressed(name, test_imgs)