X /= X.max()

# split data into training and testing 75% of examples are used for training and 25% are used for testing
X_train, y_train = X, y
X_test, y_test = X, y ##############################3
#########################################################

# binarize the labels from a number into a vector with a 1 at that index
# ex: label 4 -> binarized [0 0 0 0 1 0 0 0 0 0]
# ex: label 7 -> binarized [0 0 0 0 0 0 0 1 0 0]
labels_train = LabelBinarizer().fit_transform(y_train)
#labels_test = LabelBinarizer().fit_transform(y_test)

# convert from numpy to normal python list for our simple implementation
X_train_l = X_train.tolist()
labels_train_l = labels_train.tolist()

# free memory
X = None
y = None


def step_cb(nn, step):
	print("ping")
	nn.serialize(nn, str(step) + ".pickle")

# load or create an ANN
nn = ANN([1,1])
serialized_name = '28_1000000.pickle'

if os.path.exists(serialized_name):
Example #2
0
    output_path='data/vat_number_train_1015_4600.hdf5')
vat_number_val_writer = HDF5DatasetWriter(
    data_dims=(1500, 28, 28, 1),
    label_dims=(1500, 10),
    output_path='data/vat_number_val_1015_1500.hdf5')

sdl = SimpleDatasetLoader()
image_paths = list(paths.list_images('../../datasets/digits'))
(data, labels) = sdl.load(image_paths, verbose=1000)
data = data.astype("float") / 255.0
(x_train, x_val, y_train, y_val) = train_test_split(data,
                                                    labels,
                                                    test_size=0.25,
                                                    random_state=42)
logger.debug('{} {} {} {}'.format(x_train.shape, y_train.shape, x_val.shape,
                                  y_val.shape))
y_train = LabelBinarizer().fit_transform(y_train)
y_val = LabelBinarizer().fit_transform(y_val)
logger.debug('{} {} {} {}'.format(x_train.shape, y_train.shape, x_val.shape,
                                  y_val.shape))
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_val = x_val.reshape(x_val.shape[0], 28, 28, 1)
logger.debug('{} {} {} {}'.format(x_train.shape, y_train.shape, x_val.shape,
                                  y_val.shape))
x_train = x_train.tolist()[:4600]
y_train = y_train.tolist()[:4600]
x_val = x_val.tolist()[:1500]
y_val = y_val.tolist()[:1500]
vat_number_train_writer.add(x_train, y_train)
vat_number_val_writer.add(x_val, y_val)
# split again for validation
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.1)

# binarize the labels from a number into a vector with a 1 at that index
# ex: label 4 -> binarized [0 0 0 0 1 0 0 0 0 0]
# ex: label 7 -> binarized [0 0 0 0 0 0 0 1 0 0]
labels_train = LabelBinarizer().fit_transform(y_train)
labels_test = LabelBinarizer().fit_transform(y_test)
labels_valid = LabelBinarizer().fit_transform(y_valid)

# convert from numpy to normal python list for our simple implementation
X_train_l = X_train.tolist()
X_valid_l = X_valid.tolist()

labels_train_l = labels_train.tolist()
labels_valid_l = labels_valid.tolist()

steps = [] #[1, 2, 3, 4]
train_error = [] #[50, 40, 20, 20]
validation_err = [] #[70, 65, 63, 60]




def evaluate(X_t, y_t, X_v, y_v):
	def step_cb(nn, step):
		training_error = nn.get_avg_error(X_t, y_t)
		testing_error = nn.get_avg_error(X_v, y_v)

		steps.append(step)
Example #4
0
	def test_mnist_8by8_training(self):
		print("test_mnist_8by8_training")
		import time
		import numpy as np
		import matplotlib.pyplot as plt
		from sklearn.cross_validation import train_test_split
		from sklearn.datasets import load_digits
		from sklearn.metrics import confusion_matrix, classification_report
		from sklearn.preprocessing import LabelBinarizer
		from sklearn.metrics import precision_score, recall_score

		# import the simplified mnist dataset from scikit learn
		digits = load_digits()

		# get the input vectors (X is a vector of vectors of type int)
		X = digits.data

		# get the output vector ( y is a vector of type int)
		y = digits.target

		# normalize input into [0, 1]
		X -= X.min()
		X /= X.max()

		# split data into training and testing 75% of examples are used for training and 25% are used for testing
		X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=123)

		# binarize the labels from a number into a vector with a 1 at that index
		labels_train = LabelBinarizer().fit_transform(y_train)
		labels_test = LabelBinarizer().fit_transform(y_test)

		# convert from numpy to normal python list for our simple implementation
		X_train_l = X_train.tolist()
		labels_train_l = labels_train.tolist()

		# create the artificial neuron network with:
		# 1 input layer of size 64 (the images are 8x8 gray pixels)
		# 1 hidden layer of size 100
		# 1 output layer of size 10 (the labels of digits are 0 to 9)
		nn = ANN([64, 100, 10])

		# see how long training takes
		startTime = time.time()

		# train it
		nn.train(10, X_train_l, labels_train_l)

		elapsedTime = time.time() - startTime
		print("time took " + str(elapsedTime))
		self.assertTrue(elapsedTime < 300, 'Training took more than 300 seconds')

		# compute the predictions
		predictions = []
		for i in range(X_test.shape[0]):
			o = nn.predict(X_test[i])
			predictions.append(np.argmax(o))

		# compute a confusion matrix
		# print(confusion_matrix(y_test, predictions))
		# print(classification_report(y_test, predictions))

		precision = precision_score(y_test, predictions, average='macro')
		print("precision", precision)
		recall = recall_score(y_test, predictions, average='macro')
		print("recall", recall)

		self.assertTrue(precision > 0.93, 'Precision must be bigger than 93%')
		self.assertTrue(recall > 0.93, 'Recall must be bigger than 93%')
Example #5
0
    output_path='data/vat_number_val_1130_2000.hdf5')

sdl = SimpleDatasetLoader()
train_image_paths = list(paths.list_images('../../datasets/train_digits'))
random.shuffle(train_image_paths)
(x_train, y_train) = sdl.load(train_image_paths, verbose=1000)
x_train = x_train.astype("float") / 255.0
val_image_paths = list(paths.list_images('../../datasets/val_digits'))
random.shuffle(val_image_paths)
val_image_paths = val_image_paths[:2000]
(x_val, y_val) = sdl.load(val_image_paths, verbose=1000)
x_val = x_val.astype("float") / 255.0
logger.debug('{} {} {} {}'.format(x_train.shape, y_train.shape, x_val.shape,
                                  y_val.shape))
y_train = LabelBinarizer().fit_transform(y_train)
y_val = LabelBinarizer().fit_transform(y_val)
logger.debug('{} {} {} {}'.format(x_train.shape, y_train.shape, x_val.shape,
                                  y_val.shape))
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_val = x_val.reshape(x_val.shape[0], 28, 28, 1)
logger.debug('{} {} {} {}'.format(x_train.shape, y_train.shape, x_val.shape,
                                  y_val.shape))
x_train = x_train.tolist()
y_train = y_train.tolist()
x_val = x_val.tolist()
y_val = y_val.tolist()
vat_number_train_writer.add(x_train, y_train)
vat_number_val_writer.add(x_val, y_val)
vat_number_train_writer.close()
vat_number_val_writer.close()