if os.path.exists(serialized_name):
	# load a saved ANN
	nn = nn.deserialize(serialized_name)
else:
	# create the ANN with:
	# 1 input layer of size 64 (the images are 8x8 gray pixels)
	# 1 hidden layer of size 100
	# 1 output layer of size 10 (the labels of digits are 0 to 9)
	nn = ANN([784, 300, 10])

	# see how long training takes
	startTime = time.time()

	# train it
	nn.train2(30, X_train_l, labels_train_l, 100000, step_cb)

	elapsedTime = time.time() - startTime
	print("Training took {0} seconds".format(elapsedTime))

	# serialize and save the ANN
	nn.serialize(nn, serialized_name)

# compute the predictions

predictions = []
for i in range(X_test.shape[0]):
	o = nn.predict(X_test[i])
	# the inverse of the binarization would be taking the maximum argument index
	# ex: [.1 .1 .1 .1 .9 .1 .1 .1 .1 .1] -> 4
	# ex: [.1 .1 .1 .1 .1 .1 .1 .9 .1 .1] -> 7
if os.path.exists(serialized_name):
	# load a saved ANN
	nn = nn.deserialize(serialized_name)
else:
	# create the ANN with:
	# 1 input layer of size 64 (the images are 8x8 gray pixels)
	# 1 hidden layer of size 100
	# 1 output layer of size 10 (the labels of digits are 0 to 9)
	nn = ANN([64, 100, 10])

	# see how long training takes
	startTime = time.time()

	# train it
	nn.train2(5, X_train_l, labels_train_l, 100, evaluate(X_train_l, labels_train_l, X_valid_l, labels_valid_l))

	elapsedTime = time.time() - startTime
	print("Training took {0} seconds".format(elapsedTime))

	# serialize and save the ANN
	nn.serialize(nn, serialized_name)

	# plot error over time
	#plt.plot(step, train_error, 'b--', step, validation_err, 'gs', t, t**3, 'g^')
	#plt.plot(step, train_error, 'b--', step, validation_err, 'g')
	plt.plot(steps, train_error, 'b--', label="Training Error")
	plt.plot(steps, validation_err, 'g', label="Validation Error")
	plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.)
	plt.title('Training Error vs Validation Error')
	plt.show()