#changing this number allows to test the code with a smaller number of training images

##!! THIS SCRIPT WITH ntrain = 50000 REQUIRES AROUND 10 GB OF RAM !!##
ntrain = 50000

print ("Reading the training data...")
sys.stdout.flush()

TRAIN_DIR = "train/"
TEST_DIR = "test/"

# use HOG as a list of features
# reading in the data. This takes a while
train_imgs = utils.read_folder(TRAIN_DIR, 0, ntrain, flatten = False)
print ("\nDone!")
sys.stdout.flush()
print ("Getting HOG3 of the data...")
sys.stdout.flush()
# also takes a while
X = utils.getHOG3(train_imgs)
print ("\nDone!")
sys.stdout.flush()
X = np.insert(X, 0, 1.0, axis = 1)
theta = np.random.randn(X.shape[1], 10) * 0.0001
y = utils.read_labels('trainLabels.csv', 0, ntrain)
best_val = -1
best_softmax = None
X_train, X_val, y_train, y_val = cross_validation.train_test_split(X, y, test_size = 0.1)
Esempio n. 2
0
import numpy as np
import sys
import utils
TEST_DIR = "test/"

batch = 50
for j in range(0, 6):
	print "\nPart ", j + 1, "of 6"
	sys.stdout.flush()
	test_imgs = utils.read_folder(TEST_DIR, j * batch, (j + 1) * batch, flatten = False)
	name = "tbatch_" + str(j)
	#np.save(name, np.around(test_imgs, 2))
	np.savez_compressed(name, test_imgs)