def test_bars(self): # 16x16 images with bars that are 2 pixel thick train_verticals = gen_vertical_bars(50) train_horizontals = gen_horizontal_bars(50) test_verticals = gen_vertical_bars(50) test_horizontals = gen_horizontal_bars(50) inputs = np.array(train_verticals + train_horizontals) targets = np.array([[1, 0] for _ in train_verticals] + [[0, 1] for _ in train_horizontals]) data_set = NumericalDataSet(inputs, targets) test_inputs = np.array(test_verticals + test_horizontals) test_targets = np.array([[1, 0] for _ in test_verticals] + [[0, 1] for _ in test_horizontals]) test_data_set = NumericalDataSet(test_inputs, test_targets) # 16x16 -> C(3): 14x14 -> P(2): 7x7 -> C(3): 5x5 -> P(5): 1x1 net_topo = [('c', 3, 6), ('p', 2), ('c', 3, 8), ('p', 5), ('mlp', 8, 8, 2)] net = ConvNet(iterations=50, learning_rate=0.001, topo=net_topo) net.train(data_set) preds = net.predict(test_data_set) conf_mat = nputils.create_confidence_matrix(preds, test_targets, 2) print "Error rate: " + str(100 - (np.sum(conf_mat.diagonal()) / np.sum(conf_mat[:, :]) * 100)) + "%"
def test_mnist_digits(self): digits, labels = imgutils.load_mnist_digits( '../../data/mnist-digits/train-images.idx3-ubyte', '../../data/mnist-digits/train-labels.idx1-ubyte', 300) targets = np.array( [nputils.vec_with_one(10, digit) for digit in labels]) train_data_set = NumericalDataSet( np.array(digits)[:150], targets[:150]) test_data_set = NumericalDataSet(np.array(digits)[150:], targets[150:]) # 28x28 -> C(5): 24x24 -> P(2): 12x12 -> C(5): 8x8 -> P(2): 4x4 -> C(4): 1x1 net_topo = [('c', 5, 8), ('p', 2), ('c', 5, 16), ('p', 2), ('c', 4, 16), ('mlp', 16, 16, 10)] net = ConvNet(iterations=30, learning_rate=0.01, topo=net_topo, activation_func=(nputils.rectifier, nputils.rectifier_deriv)) net.train(train_data_set) try: srlztn.save_object('../../trained/mnist_digits.cnn', net) except: print("serialization error") preds = net.predict(test_data_set) conf_mat = nputils.create_confidence_matrix(preds, targets[150:], 10) print conf_mat num_correct = np.sum(conf_mat.diagonal()) num_all = np.sum(conf_mat[:, :]) print "Error rate: " + str( 100 - (num_correct / num_all * 100)) + "% (" + str( int(num_correct)) + "/" + str(int(num_all)) + ")"
def test_get_observation_no_labels(self): ''' get observations from a dataset without labels ''' inputs = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3], [4, 4, 4]]) dataSet = NumericalDataSet(inputs) nrObs = 5 for i in range(nrObs): inputs, target = dataSet.get_observation(i) assert target == None assert_equal(inputs, i * np.ones((1, 3)), 'wrong input at observation %d' % i)
def test_gen_observations(self): ''' Test generator getting all observations as (1,x) numpy array from input dataset ''' inputs = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3], [4, 4, 4]]) labels = np.array([[0], [1], [2], [3], [4]]) dataSet = NumericalDataSet(inputs, labels) i = 0 for inputs, labels in dataSet.gen_observations(): assert_equal(inputs, i * np.ones((1, 3)), 'wrong input at observation %d' % i) assert_equal(labels, i * np.ones((1, 1)), 'wrong label at observation %d' % i) i = i + 1
def test_get_observation(self): ''' Test getting a single observation as (1,x) numpy array from input dataset ''' inputs = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3], [4, 4, 4]]) labels = np.array([[0], [1], [2], [3], [4]]) dataSet = NumericalDataSet(inputs, labels) nrObs = 5 for i in range(nrObs): inputs, labels = dataSet.get_observation(i) assert_equal(inputs, i * np.ones((1, 3)), 'wrong input at observation %d' % i) assert_equal(labels, i * np.ones((1, 1)), 'wrong label at observation %d' % i)
def predict_extended(self, inputs): """ Predicts targets for given data set. @param data_set: data Set inheriting AbstractDataSet :return: List of predictions, i.e. output of this net for each observation in the data set. """ data_set = NumericalDataSet(inputs) predictions = [] # loop through dataset for observation, _ in data_set.gen_observations(): # make sure it is a numpy array input_arr = np.array(observation) outputs = self.feedforward(input_arr) predictions.append(outputs[-1]) return predictions
def predict_extended(self, inputs): """ Predicts targets for given data set. @param data_set: data Set inheriting AbstractDataSet :return: List of predictions, i.e. output of this net for each observation in the data set. """ data_set = NumericalDataSet(inputs) predictions = [] # loop through dataset for observation, _ in data_set.gen_observations( ): # make sure it is a numpy array input_arr = np.array(observation) outputs = self.feedforward(input_arr) predictions.append(outputs[-1]) return predictions
def test_predict(self): """ Test prediction for dataset. Returns a list of np.arrays. """ # defining model layerSizes = [2,2,1] nn = MultilayerPerceptron(layerSizes) # setting up nn parameters = [] parameters.append(np.ones((3,2))) parameters.append(np.ones((3,1))) nn.set_params(parameters) # preparing input NumericalDataSet inputSet = np.array([[2,2]]) inputVec = np.array([[2,2]]) nrObs = 10 for _ in range(nrObs-1): inputSet = np.vstack((inputSet,inputVec)) dataSet = NumericalDataSet(inputSet, None) # run function predictions = nn.predict(dataSet) # check nr of observations self.assertEqual(len(predictions), nrObs, "number of observations mismatch") for prediction in predictions: assert_equal(prediction, np.array([[2.9866142981514305]]), "wrong output")
def setUp(self): csv_source = "../../../../../data/wine-quality/winequality-red.csv" rawData = np.genfromtxt(csv_source, delimiter=';', skip_header=0) inputs = rawData[:, :11] targets = rawData[:, 11:] self.dataSet = NumericalDataSet(inputs, targets)
def test_digits_prediction(self): training_data = np.loadtxt('../../data/pendigits-training.txt')[:500, :] testing_data = np.loadtxt('../../data/pendigits-testing.txt') layer_sizes = [16, 16, 10] update_method = Rprop(layer_sizes, init_step=0.01) nn = MultilayerPerceptron(layer_sizes, iterations=100, do_classification=True, update_method=update_method, batch_update_size=30, activation_function=nputils.rectifier, deriv_activation_function=nputils.rectifier_deriv) training_targets = nputils.convert_targets(training_data[:, -1], 10) training_input = training_data[:, 0:-1] maxs = np.max(training_input, axis=0) mins = np.min(training_input, axis=0) normalized_training_input = np.array( [(r - mins) / (maxs - mins) for r in training_input]) training_data_set = NumericalDataSet(normalized_training_input, training_targets) testing_targets = nputils.convert_targets(testing_data[:, -1], 10) testing_input = testing_data[:, 0:-1] maxs = np.max(testing_input, axis=0) mins = np.min(testing_input, axis=0) normalized_testing_input = np.array( [(r - mins) / (maxs - mins) for r in testing_input]) testing_data_set = NumericalDataSet(normalized_testing_input, testing_targets) nn.train(training_data_set) predictions = nn.predict(testing_data_set) predictions = [np.argmax(p) for p in predictions] conf_matrix = np.zeros((10, 10)) conf_matrix = np.concatenate(([np.arange(0, 10)], conf_matrix), axis=0) conf_matrix = np.concatenate((np.transpose([np.arange(-1, 10)]), conf_matrix), axis=1) targets = testing_data[:, -1] for i in range(len(targets)): conf_matrix[targets[i] + 1, predictions[i] + 1] += 1 print("Detection rate: " + str(np.sum(np.diagonal(conf_matrix[1:, 1:])) / len(targets))) print(str(conf_matrix))
def test_training(self): inputs = np.array([[0, 0], [1, 1], [2, 2]]) targets = np.array([[0], [1], [2]]) data_set = NumericalDataSet(inputs, targets) lin_reg = SciPyLinReg(SciPyLinReg.ORDINARY) lin_reg.train(data_set) assert_array_almost_equal([0.5, 0.5], lin_reg.get_params())
def test_constructor_all_params(self): ''' Test constructor for supplied inputs and labels with same nr observations ''' inputs = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3], [4, 4, 4]]) labels = np.array([[0], [1], [2], [3], [4]]) dataSet = NumericalDataSet(inputs, labels) assert dataSet.nrInputVars == 3 assert dataSet.nrTargetVars == 1 assert dataSet.nrObservations == 5
def get_dataset(self, raw_data, targets=None): ''' @param raw_data: np.ndarray of matrices @param target: np.ndarray (n, 1) ''' inputs = raw_data if self.normalize: inputs = [nputils.normalize_arr(img, -1, 1) for img in inputs] if self.scale_to is not None: inputs = [resize(img, self.scale_to) for img in inputs] return NumericalDataSet(inputs, targets)
def test_constructor_nr_observations_mismatch(self): ''' Test constructor for supplied inputs and labels with different nr observations ''' inputs = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3], [4, 4, 4]]) labels = np.array([[0], [1], [2], [3]]) try: NumericalDataSet(inputs, labels) except Exception, errmsg: expected_errmsg = "number of inputs and targets observations mismatch" self.assertTrue(errmsg.message.startswith(expected_errmsg))
def test_aggregation(self): inputs = np.array([[0, 1, 2]]) targets = np.array([[1]]) data_set = NumericalDataSet(inputs, targets) factory = SciPyLinRegFactory() lin_reg_1 = factory.get_instance() lin_reg_1.train(data_set) lin_reg_2 = factory.get_instance() lin_reg_2.train(data_set) final_lin_reg = factory.aggregate([lin_reg_1, lin_reg_2]) assert_array_almost_equal([[0., 0.2, 0.4]], final_lin_reg.get_params())
def test_face_recognition(self): faces = imgutils.load_images('/home/simon/trainingdata/faces/', max_num=100) non_faces = imgutils.load_images('/home/simon/trainingdata/nonfaces/', max_num=100) faces_training = faces[0:50] faces_testing = faces[50:] non_faces_training = non_faces[0:50] non_faces_testing = non_faces[50:] inputs_training = np.array(faces_training + non_faces_training) targets_training = np.array([[1, 0] for _ in range(len(faces_training))] + [[0, 1] for _ in range(len(non_faces_training))]) data_set_training = NumericalDataSet(inputs_training, targets_training) inputs_testing = np.array(faces_testing + non_faces_testing) targets_testing = np.array([[1, 0] for _ in range(len(faces_testing))] + [[0, 1] for _ in range(len(non_faces_testing))]) data_set_testing = NumericalDataSet(inputs_testing, targets_testing) # 24x24 -> C(5): 20x20 -> P(2): 10x10 -> C(3): 8x8 -> P(2): 4x4 -> C(3): 2x2 -> p(2): 1x1 net_topo = [('c', 5, 8), ('p', 2), ('c', 3, 16), ('p', 2), ('c', 3, 24), ('p', 2), ('mlp', 24, 24, 2)] net = ConvNet(iterations=30, learning_rate=0.01, topo=net_topo) net.train(data_set_training) preds = net.predict(data_set_testing) conf_mat = nputils.create_confidence_matrix(preds, targets_testing, 2) num_correct = np.sum(conf_mat.diagonal()) num_all = np.sum(conf_mat[:, :]) print "Error rate: " + str( 100 - (num_correct / num_all * 100)) + "% (" + str( int(num_correct)) + "/" + str(int(num_all)) + ")"
def get_dataset(self, raw_inputs, targets=None): ''' @param raw_inputs: numpy.ndarray only inputs @param targets: numpy.ndarray (n,1) ''' if self.number_of_features is None: # not initialized - choose features first self._init_selection(raw_inputs.shape[1]) if raw_inputs.shape[1] < max(self.feature_indices)+1: raise Exception("Too few variables") inputs = raw_inputs[:, self.feature_indices] if targets is not None and len(targets.shape) == 2 and targets.shape[1] == 1: targets = targets.ravel() #TODO: maybe generate some polynomial features here return NumericalDataSet(inputs, targets)
def test_train(self): """ Testing only execution of train function """ layerSizes = [2,2,1] nn = MultilayerPerceptron(layerSizes) # preparing input NumericalDataSet inputSet = np.array([[2,2]]) inputVec = np.array([[2,2]]) targetSet = np.array([[1]]) targetVec = np.array([[1]]) nrObs = 10 for _ in range(nrObs-1): inputSet = np.vstack((inputSet,inputVec)) targetSet = np.vstack((targetSet,targetVec)) dataSet = NumericalDataSet(inputSet, targetSet) nn.train(dataSet)
def test_smoke(self): smoke_imgs_training = imgutils.load_images( '/home/simon/smoke/training/smoke/', max_num=100) non_smoke_imgs_training = imgutils.load_images( '/home/simon/smoke/training/non-smoke/', max_num=100) inputs_training = np.array(smoke_imgs_training + non_smoke_imgs_training) targets_training = np.array( [[1, 0] for _ in range(len(smoke_imgs_training))] + [[0, 1] for _ in range(len(non_smoke_imgs_training))]) data_set_training = NumericalDataSet(inputs_training, targets_training) # 100x100 -> C(5): 96x96 -> P(2): 48x48 -> C(5): 44x44 -> P(2): 22x22 -> C(3): 20x20 -> P(2): 10x10 -> C(3): 8x8 -> P(2) 4x4 -> C(3): 2x2 -> P(2): 1x1 net_topo = [('c', 5, 8), ('p', 2), ('c', 5, 16), ('p', 2), ('c', 3, 24), ('p', 2), ('c', 3, 24), ('p', 2), ('c', 3, 24), ('p', 2), ('mlp', 24, 24, 2)] net = ConvNet(iterations=30, learning_rate=0.01, topo=net_topo) net.train(data_set_training)
def get_data_set(self): ''' @return: NumericalDataSet ''' return NumericalDataSet(self.inputs, self.targets)
def fit(self, inputs, targets): """ Train net with given data set. :param data_set: Data set for training. n times random sampling for online learning """ split_point = int(len(inputs) * self.split_ratio) data_set = NumericalDataSet(inputs[:split_point], targets[:split_point]) val_in = inputs[split_point:] val_targets = targets[split_point:] prev_layers = None prev_mlp = None self.train_acc_err = [] self.val_acc_err = [] for it in range(self.iterations): # randomly select observations as many times as there are # observations it_error = 0 start = time.time() for _ in range(data_set.get_nr_observations()): input_arr, target_arr = data_set.rand_observation() # feed-forward outputs = self.feedforward(input_arr) current_error = nputils.calc_squared_error(target_arr, outputs[-1]) it_error += current_error # mlp backpropagation and gradient descent mlp_outputs = outputs[-len(self.mlp.arr_layer_sizes):] mlp_deltas = self.mlp.backpropagation(mlp_outputs, target_arr) mlp_weight_updates = self.mlp.calculate_weight_updates(mlp_deltas, mlp_outputs) self.mlp.update_method.perform_update(self.mlp.weights_arr, mlp_weight_updates, current_error) # layer backpropagation and gradient descent # calculate backpropagated error of first mlp layer backprop_error = np.array([[x] for x in np.dot(self.mlp.weights_arr[0], mlp_deltas[0].transpose())]) for layer in reversed(self.layers): backprop_error = layer.backpropagate(backprop_error) # calculate the weight gradients and update the weights for layer in self.layers: layer.calc_gradients() layer.update(self.learning_rate) avg_error = it_error / data_set.nrObservations acc_err = self._accuracy_err(inputs, targets) self.train_acc_err.append(acc_err) #validation error acc_err = self._accuracy_err(val_in, val_targets) self.val_acc_err.append(acc_err) logging.info("Iteration #{} MSE: {}, TrainErr: {:.6f}, ValErr: {:.6f} ({:.2f}s)\n"\ .format(it + 1, avg_error, self.train_acc_err[-1], self.val_acc_err[-1], time.time()-start)) #break cond if it > 3 and val_in is not None and self.val_acc_err[-1] > self.val_acc_err[-4]: # revert self.layers = prev_layers self.mlp = prev_mlp plt.figure() plt.plot(self.train_acc_err) plt.plot(self.val_acc_err) plt.show(block=False) break #prev if it > 0: prev_layers = copy.deepcopy(self.layers) prev_mlp = copy.deepcopy(self.mlp)
def fit(self, inputs, targets): """ Train net with given data set. :param data_set: Data set for training. n times random sampling for online learning """ split_point = int(len(inputs) * self.split_ratio) data_set = NumericalDataSet(inputs[:split_point], targets[:split_point]) val_in = inputs[split_point:] val_targets = targets[split_point:] prev_layers = None prev_mlp = None self.train_acc_err = [] self.val_acc_err = [] for it in range(self.iterations): # randomly select observations as many times as there are # observations it_error = 0 start = time.time() for _ in range(data_set.get_nr_observations()): input_arr, target_arr = data_set.rand_observation() # feed-forward outputs = self.feedforward(input_arr) current_error = nputils.calc_squared_error( target_arr, outputs[-1]) it_error += current_error # mlp backpropagation and gradient descent mlp_outputs = outputs[-len(self.mlp.arr_layer_sizes):] mlp_deltas = self.mlp.backpropagation(mlp_outputs, target_arr) mlp_weight_updates = self.mlp.calculate_weight_updates( mlp_deltas, mlp_outputs) self.mlp.update_method.perform_update(self.mlp.weights_arr, mlp_weight_updates, current_error) # layer backpropagation and gradient descent # calculate backpropagated error of first mlp layer backprop_error = np.array([[x] for x in np.dot( self.mlp.weights_arr[0], mlp_deltas[0].transpose())]) for layer in reversed(self.layers): backprop_error = layer.backpropagate(backprop_error) # calculate the weight gradients and update the weights for layer in self.layers: layer.calc_gradients() layer.update(self.learning_rate) avg_error = it_error / data_set.nrObservations acc_err = self._accuracy_err(inputs, targets) self.train_acc_err.append(acc_err) #validation error acc_err = self._accuracy_err(val_in, val_targets) self.val_acc_err.append(acc_err) logging.info("Iteration #{} MSE: {}, TrainErr: {:.6f}, ValErr: {:.6f} ({:.2f}s)\n"\ .format(it + 1, avg_error, self.train_acc_err[-1], self.val_acc_err[-1], time.time()-start)) #break cond if it > 3 and val_in is not None and self.val_acc_err[ -1] > self.val_acc_err[-4]: # revert self.layers = prev_layers self.mlp = prev_mlp plt.figure() plt.plot(self.train_acc_err) plt.plot(self.val_acc_err) plt.show(block=False) break #prev if it > 0: prev_layers = copy.deepcopy(self.layers) prev_mlp = copy.deepcopy(self.mlp)