def test_bars(self):
        # 16x16 images with bars that are 2 pixel thick
        train_verticals = gen_vertical_bars(50)
        train_horizontals = gen_horizontal_bars(50)
        test_verticals = gen_vertical_bars(50)
        test_horizontals = gen_horizontal_bars(50)
        inputs = np.array(train_verticals + train_horizontals)
        targets = np.array([[1, 0] for _ in train_verticals] +
                           [[0, 1] for _ in train_horizontals])
        data_set = NumericalDataSet(inputs, targets)
        test_inputs = np.array(test_verticals + test_horizontals)
        test_targets = np.array([[1, 0] for _ in test_verticals] +
                                [[0, 1] for _ in test_horizontals])
        test_data_set = NumericalDataSet(test_inputs, test_targets)

        # 16x16 -> C(3): 14x14 -> P(2): 7x7 -> C(3): 5x5 -> P(5): 1x1
        net_topo = [('c', 3, 6), ('p', 2), ('c', 3, 8), ('p', 5),
                    ('mlp', 8, 8, 2)]
        net = ConvNet(iterations=50, learning_rate=0.001, topo=net_topo)
        net.train(data_set)

        preds = net.predict(test_data_set)
        conf_mat = nputils.create_confidence_matrix(preds, test_targets, 2)
        print "Error rate: " + str(100 - (np.sum(conf_mat.diagonal()) /
                                          np.sum(conf_mat[:, :]) * 100)) + "%"
    def test_mnist_digits(self):
        digits, labels = imgutils.load_mnist_digits(
            '../../data/mnist-digits/train-images.idx3-ubyte',
            '../../data/mnist-digits/train-labels.idx1-ubyte', 300)
        targets = np.array(
            [nputils.vec_with_one(10, digit) for digit in labels])
        train_data_set = NumericalDataSet(
            np.array(digits)[:150], targets[:150])
        test_data_set = NumericalDataSet(np.array(digits)[150:], targets[150:])

        # 28x28 -> C(5): 24x24 -> P(2): 12x12 -> C(5): 8x8 -> P(2): 4x4 -> C(4): 1x1
        net_topo = [('c', 5, 8), ('p', 2), ('c', 5, 16), ('p', 2),
                    ('c', 4, 16), ('mlp', 16, 16, 10)]
        net = ConvNet(iterations=30,
                      learning_rate=0.01,
                      topo=net_topo,
                      activation_func=(nputils.rectifier,
                                       nputils.rectifier_deriv))
        net.train(train_data_set)
        try:
            srlztn.save_object('../../trained/mnist_digits.cnn', net)
        except:
            print("serialization error")

        preds = net.predict(test_data_set)

        conf_mat = nputils.create_confidence_matrix(preds, targets[150:], 10)
        print conf_mat
        num_correct = np.sum(conf_mat.diagonal())
        num_all = np.sum(conf_mat[:, :])
        print "Error rate: " + str(
            100 - (num_correct / num_all * 100)) + "% (" + str(
                int(num_correct)) + "/" + str(int(num_all)) + ")"
Exemple #3
0
 def test_get_observation_no_labels(self):
     '''
     get observations from a dataset without labels
     '''
     inputs = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3],
                        [4, 4, 4]])
     dataSet = NumericalDataSet(inputs)
     nrObs = 5
     for i in range(nrObs):
         inputs, target = dataSet.get_observation(i)
         assert target == None
         assert_equal(inputs, i * np.ones((1, 3)),
                      'wrong input at observation %d' % i)
Exemple #4
0
    def test_gen_observations(self):
        '''
        Test generator getting all observations as (1,x) numpy array from input dataset
        '''
        inputs = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3],
                           [4, 4, 4]])
        labels = np.array([[0], [1], [2], [3], [4]])
        dataSet = NumericalDataSet(inputs, labels)

        i = 0
        for inputs, labels in dataSet.gen_observations():
            assert_equal(inputs, i * np.ones((1, 3)),
                         'wrong input at observation %d' % i)
            assert_equal(labels, i * np.ones((1, 1)),
                         'wrong label at observation %d' % i)
            i = i + 1
Exemple #5
0
    def test_get_observation(self):
        '''
        Test getting a single observation as (1,x) numpy array from input dataset
        '''
        inputs = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3],
                           [4, 4, 4]])
        labels = np.array([[0], [1], [2], [3], [4]])
        dataSet = NumericalDataSet(inputs, labels)
        nrObs = 5

        for i in range(nrObs):
            inputs, labels = dataSet.get_observation(i)
            assert_equal(inputs, i * np.ones((1, 3)),
                         'wrong input at observation %d' % i)
            assert_equal(labels, i * np.ones((1, 1)),
                         'wrong label at observation %d' % i)
Exemple #6
0
 def predict_extended(self, inputs):
     """
     Predicts targets for given data set.
     @param data_set: data Set inheriting AbstractDataSet
     :return: List of predictions, i.e. output of this net for each
     observation in the data set.
     """
     data_set = NumericalDataSet(inputs)
     predictions = []
     # loop through dataset
     for observation, _ in data_set.gen_observations():
         # make sure it is a numpy array
         input_arr = np.array(observation)
         outputs = self.feedforward(input_arr)
         predictions.append(outputs[-1])
     return predictions
Exemple #7
0
 def predict_extended(self, inputs):
     """
     Predicts targets for given data set.
     @param data_set: data Set inheriting AbstractDataSet
     :return: List of predictions, i.e. output of this net for each
     observation in the data set.
     """
     data_set = NumericalDataSet(inputs)
     predictions = []
     # loop through dataset
     for observation, _ in data_set.gen_observations( ):
         # make sure it is a numpy array
         input_arr = np.array(observation)
         outputs = self.feedforward(input_arr)
         predictions.append(outputs[-1])
     return predictions
Exemple #8
0
 def test_predict(self):
     """
     Test prediction for dataset. Returns a list of np.arrays.
     """
     # defining model
     layerSizes = [2,2,1]
     nn = MultilayerPerceptron(layerSizes)
     
     # setting up nn
     parameters = []
     parameters.append(np.ones((3,2)))
     parameters.append(np.ones((3,1)))
     nn.set_params(parameters)
     
     # preparing input NumericalDataSet
     inputSet = np.array([[2,2]])
     inputVec = np.array([[2,2]])
     nrObs = 10
     for _ in range(nrObs-1):
         inputSet = np.vstack((inputSet,inputVec))
     dataSet = NumericalDataSet(inputSet, None)
     
     # run function
     predictions = nn.predict(dataSet)
     
     # check nr of observations
     self.assertEqual(len(predictions), nrObs, "number of observations mismatch")
     for prediction in predictions:
         assert_equal(prediction, np.array([[2.9866142981514305]]), "wrong output")
    def setUp(self):
        csv_source = "../../../../../data/wine-quality/winequality-red.csv"
        rawData = np.genfromtxt(csv_source, delimiter=';', skip_header=0)
        inputs = rawData[:, :11]
        targets = rawData[:, 11:]

        self.dataSet = NumericalDataSet(inputs, targets)
Exemple #10
0
    def test_digits_prediction(self):
        training_data = np.loadtxt('../../data/pendigits-training.txt')[:500, :]
        testing_data = np.loadtxt('../../data/pendigits-testing.txt')

        layer_sizes = [16, 16, 10]
        update_method = Rprop(layer_sizes, init_step=0.01)
        nn = MultilayerPerceptron(layer_sizes, iterations=100, do_classification=True,
                           update_method=update_method,
                           batch_update_size=30,
                           activation_function=nputils.rectifier,
                           deriv_activation_function=nputils.rectifier_deriv)

        training_targets = nputils.convert_targets(training_data[:, -1], 10)
        training_input = training_data[:, 0:-1]
        maxs = np.max(training_input, axis=0)
        mins = np.min(training_input, axis=0)
        normalized_training_input = np.array(
            [(r - mins) / (maxs - mins) for r in training_input])

        training_data_set = NumericalDataSet(normalized_training_input,
                                              training_targets)

        testing_targets = nputils.convert_targets(testing_data[:, -1], 10)
        testing_input = testing_data[:, 0:-1]
        maxs = np.max(testing_input, axis=0)
        mins = np.min(testing_input, axis=0)
        normalized_testing_input = np.array(
            [(r - mins) / (maxs - mins) for r in testing_input])

        testing_data_set = NumericalDataSet(normalized_testing_input, testing_targets)

        nn.train(training_data_set)
        predictions = nn.predict(testing_data_set)

        predictions = [np.argmax(p) for p in predictions]

        conf_matrix = np.zeros((10, 10))
        conf_matrix = np.concatenate(([np.arange(0, 10)], conf_matrix), axis=0)
        conf_matrix = np.concatenate((np.transpose([np.arange(-1, 10)]), conf_matrix), axis=1)
        targets = testing_data[:, -1]
        for i in range(len(targets)):
            conf_matrix[targets[i] + 1, predictions[i] + 1] += 1

        print("Detection rate: " + str(np.sum(np.diagonal(conf_matrix[1:, 1:])) / len(targets)))
        print(str(conf_matrix))
Exemple #11
0
    def test_training(self):
        inputs = np.array([[0, 0], [1, 1], [2, 2]])
        targets = np.array([[0], [1], [2]])
        data_set = NumericalDataSet(inputs, targets)

        lin_reg = SciPyLinReg(SciPyLinReg.ORDINARY)
        lin_reg.train(data_set)

        assert_array_almost_equal([0.5, 0.5], lin_reg.get_params())
Exemple #12
0
 def test_constructor_all_params(self):
     '''
     Test constructor for supplied inputs and labels with same nr observations
     '''
     inputs = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3],
                        [4, 4, 4]])
     labels = np.array([[0], [1], [2], [3], [4]])
     dataSet = NumericalDataSet(inputs, labels)
     assert dataSet.nrInputVars == 3
     assert dataSet.nrTargetVars == 1
     assert dataSet.nrObservations == 5
Exemple #13
0
 def get_dataset(self, raw_data, targets=None):
     '''
     @param raw_data: np.ndarray of matrices
     @param target: np.ndarray (n, 1)
     '''
     inputs = raw_data
     if self.normalize:
         inputs = [nputils.normalize_arr(img, -1, 1) for img in inputs]
     if self.scale_to is not None:
         inputs = [resize(img, self.scale_to) for img in inputs]
     return NumericalDataSet(inputs, targets)
Exemple #14
0
 def test_constructor_nr_observations_mismatch(self):
     '''
     Test constructor for supplied inputs and labels with different nr observations
     '''
     inputs = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3],
                        [4, 4, 4]])
     labels = np.array([[0], [1], [2], [3]])
     try:
         NumericalDataSet(inputs, labels)
     except Exception, errmsg:
         expected_errmsg = "number of inputs and targets observations mismatch"
         self.assertTrue(errmsg.message.startswith(expected_errmsg))
Exemple #15
0
    def test_aggregation(self):
        inputs = np.array([[0, 1, 2]])
        targets = np.array([[1]])
        data_set = NumericalDataSet(inputs, targets)

        factory = SciPyLinRegFactory()
        lin_reg_1 = factory.get_instance()
        lin_reg_1.train(data_set)
        lin_reg_2 = factory.get_instance()
        lin_reg_2.train(data_set)

        final_lin_reg = factory.aggregate([lin_reg_1, lin_reg_2])

        assert_array_almost_equal([[0., 0.2, 0.4]], final_lin_reg.get_params())
Exemple #16
0
    def test_face_recognition(self):
        faces = imgutils.load_images('/home/simon/trainingdata/faces/',
                                     max_num=100)
        non_faces = imgutils.load_images('/home/simon/trainingdata/nonfaces/',
                                         max_num=100)
        faces_training = faces[0:50]
        faces_testing = faces[50:]
        non_faces_training = non_faces[0:50]
        non_faces_testing = non_faces[50:]

        inputs_training = np.array(faces_training + non_faces_training)
        targets_training = np.array([[1, 0]
                                     for _ in range(len(faces_training))] +
                                    [[0, 1]
                                     for _ in range(len(non_faces_training))])
        data_set_training = NumericalDataSet(inputs_training, targets_training)

        inputs_testing = np.array(faces_testing + non_faces_testing)
        targets_testing = np.array([[1, 0]
                                    for _ in range(len(faces_testing))] +
                                   [[0, 1]
                                    for _ in range(len(non_faces_testing))])
        data_set_testing = NumericalDataSet(inputs_testing, targets_testing)

        # 24x24 -> C(5): 20x20 -> P(2): 10x10 -> C(3): 8x8 -> P(2): 4x4 -> C(3): 2x2 -> p(2): 1x1
        net_topo = [('c', 5, 8), ('p', 2), ('c', 3, 16), ('p', 2),
                    ('c', 3, 24), ('p', 2), ('mlp', 24, 24, 2)]
        net = ConvNet(iterations=30, learning_rate=0.01, topo=net_topo)
        net.train(data_set_training)
        preds = net.predict(data_set_testing)
        conf_mat = nputils.create_confidence_matrix(preds, targets_testing, 2)
        num_correct = np.sum(conf_mat.diagonal())
        num_all = np.sum(conf_mat[:, :])
        print "Error rate: " + str(
            100 - (num_correct / num_all * 100)) + "% (" + str(
                int(num_correct)) + "/" + str(int(num_all)) + ")"
    def get_dataset(self, raw_inputs, targets=None):
        '''
        @param raw_inputs: numpy.ndarray only inputs
        @param targets: numpy.ndarray (n,1)
        '''
        if self.number_of_features is None:
            # not initialized - choose features first
            self._init_selection(raw_inputs.shape[1])

        if raw_inputs.shape[1] < max(self.feature_indices)+1:
            raise Exception("Too few variables")

        inputs = raw_inputs[:, self.feature_indices]
        if targets is not None and len(targets.shape) == 2 and targets.shape[1] == 1:
            targets = targets.ravel()
        #TODO: maybe generate some polynomial features here

        return NumericalDataSet(inputs, targets)
Exemple #18
0
 def test_train(self):
     """
     Testing only execution of train function
     """
     layerSizes = [2,2,1]
     nn = MultilayerPerceptron(layerSizes)
     
     # preparing input NumericalDataSet
     inputSet = np.array([[2,2]])
     inputVec = np.array([[2,2]])
     targetSet = np.array([[1]])
     targetVec = np.array([[1]])
     nrObs = 10
     for _ in range(nrObs-1):
         inputSet = np.vstack((inputSet,inputVec))
         targetSet = np.vstack((targetSet,targetVec))
     dataSet = NumericalDataSet(inputSet, targetSet)
     nn.train(dataSet)
Exemple #19
0
    def test_smoke(self):
        smoke_imgs_training = imgutils.load_images(
            '/home/simon/smoke/training/smoke/', max_num=100)
        non_smoke_imgs_training = imgutils.load_images(
            '/home/simon/smoke/training/non-smoke/', max_num=100)

        inputs_training = np.array(smoke_imgs_training +
                                   non_smoke_imgs_training)
        targets_training = np.array(
            [[1, 0] for _ in range(len(smoke_imgs_training))] +
            [[0, 1] for _ in range(len(non_smoke_imgs_training))])
        data_set_training = NumericalDataSet(inputs_training, targets_training)

        # 100x100 -> C(5): 96x96 -> P(2): 48x48 -> C(5): 44x44 -> P(2): 22x22 -> C(3): 20x20 -> P(2): 10x10 -> C(3): 8x8 -> P(2) 4x4 -> C(3): 2x2 -> P(2): 1x1
        net_topo = [('c', 5, 8), ('p', 2), ('c', 5, 16),
                    ('p', 2), ('c', 3, 24), ('p', 2), ('c', 3, 24), ('p', 2),
                    ('c', 3, 24), ('p', 2), ('mlp', 24, 24, 2)]
        net = ConvNet(iterations=30, learning_rate=0.01, topo=net_topo)
        net.train(data_set_training)
 def get_data_set(self):
     '''
     @return: NumericalDataSet
     '''
     return NumericalDataSet(self.inputs, self.targets)
Exemple #21
0
    def fit(self, inputs, targets):
        """
        Train net with given data set.
        :param data_set: Data set for training.
        n times random sampling for online learning 
        """
        split_point = int(len(inputs) * self.split_ratio)
        data_set = NumericalDataSet(inputs[:split_point], targets[:split_point])
        val_in = inputs[split_point:]
        val_targets = targets[split_point:]
        prev_layers = None
        prev_mlp = None

        self.train_acc_err = []
        self.val_acc_err = []

        for it in range(self.iterations):
            # randomly select observations as many times as there are
            # observations
            it_error = 0
            start = time.time()
            for _ in range(data_set.get_nr_observations()):
                input_arr, target_arr = data_set.rand_observation()
                # feed-forward
                outputs = self.feedforward(input_arr)
                current_error = nputils.calc_squared_error(target_arr, outputs[-1])
                it_error += current_error

                # mlp backpropagation and gradient descent
                mlp_outputs = outputs[-len(self.mlp.arr_layer_sizes):]
                mlp_deltas = self.mlp.backpropagation(mlp_outputs, target_arr)
                mlp_weight_updates = self.mlp.calculate_weight_updates(mlp_deltas, mlp_outputs)
                self.mlp.update_method.perform_update(self.mlp.weights_arr, mlp_weight_updates, current_error)
                # layer backpropagation and gradient descent
                # calculate backpropagated error of first mlp layer
                backprop_error = np.array([[x] for x in np.dot(self.mlp.weights_arr[0], mlp_deltas[0].transpose())])
                for layer in reversed(self.layers):
                    backprop_error = layer.backpropagate(backprop_error)
                # calculate the weight gradients and update the weights
                for layer in self.layers:
                    layer.calc_gradients()
                    layer.update(self.learning_rate)

            avg_error = it_error / data_set.nrObservations
            acc_err = self._accuracy_err(inputs, targets)
            self.train_acc_err.append(acc_err)

            #validation error
            acc_err = self._accuracy_err(val_in, val_targets)
            self.val_acc_err.append(acc_err)

            logging.info("Iteration #{} MSE: {}, TrainErr: {:.6f}, ValErr: {:.6f} ({:.2f}s)\n"\
                         .format(it + 1, avg_error, self.train_acc_err[-1], self.val_acc_err[-1], time.time()-start))

            #break cond
            if it > 3 and val_in is not None and self.val_acc_err[-1] > self.val_acc_err[-4]:
                # revert
                self.layers = prev_layers
                self.mlp = prev_mlp
                plt.figure()
                plt.plot(self.train_acc_err)
                plt.plot(self.val_acc_err)
                plt.show(block=False)
                break

            #prev
            if it > 0:
                prev_layers = copy.deepcopy(self.layers)
                prev_mlp = copy.deepcopy(self.mlp)
Exemple #22
0
    def fit(self, inputs, targets):
        """
        Train net with given data set.
        :param data_set: Data set for training.
        n times random sampling for online learning 
        """
        split_point = int(len(inputs) * self.split_ratio)
        data_set = NumericalDataSet(inputs[:split_point],
                                    targets[:split_point])
        val_in = inputs[split_point:]
        val_targets = targets[split_point:]
        prev_layers = None
        prev_mlp = None

        self.train_acc_err = []
        self.val_acc_err = []

        for it in range(self.iterations):
            # randomly select observations as many times as there are
            # observations
            it_error = 0
            start = time.time()
            for _ in range(data_set.get_nr_observations()):
                input_arr, target_arr = data_set.rand_observation()
                # feed-forward
                outputs = self.feedforward(input_arr)
                current_error = nputils.calc_squared_error(
                    target_arr, outputs[-1])
                it_error += current_error

                # mlp backpropagation and gradient descent
                mlp_outputs = outputs[-len(self.mlp.arr_layer_sizes):]
                mlp_deltas = self.mlp.backpropagation(mlp_outputs, target_arr)
                mlp_weight_updates = self.mlp.calculate_weight_updates(
                    mlp_deltas, mlp_outputs)
                self.mlp.update_method.perform_update(self.mlp.weights_arr,
                                                      mlp_weight_updates,
                                                      current_error)
                # layer backpropagation and gradient descent
                # calculate backpropagated error of first mlp layer
                backprop_error = np.array([[x] for x in np.dot(
                    self.mlp.weights_arr[0], mlp_deltas[0].transpose())])
                for layer in reversed(self.layers):
                    backprop_error = layer.backpropagate(backprop_error)
                # calculate the weight gradients and update the weights
                for layer in self.layers:
                    layer.calc_gradients()
                    layer.update(self.learning_rate)

            avg_error = it_error / data_set.nrObservations
            acc_err = self._accuracy_err(inputs, targets)
            self.train_acc_err.append(acc_err)

            #validation error
            acc_err = self._accuracy_err(val_in, val_targets)
            self.val_acc_err.append(acc_err)

            logging.info("Iteration #{} MSE: {}, TrainErr: {:.6f}, ValErr: {:.6f} ({:.2f}s)\n"\
                         .format(it + 1, avg_error, self.train_acc_err[-1], self.val_acc_err[-1], time.time()-start))

            #break cond
            if it > 3 and val_in is not None and self.val_acc_err[
                    -1] > self.val_acc_err[-4]:
                # revert
                self.layers = prev_layers
                self.mlp = prev_mlp
                plt.figure()
                plt.plot(self.train_acc_err)
                plt.plot(self.val_acc_err)
                plt.show(block=False)
                break

            #prev
            if it > 0:
                prev_layers = copy.deepcopy(self.layers)
                prev_mlp = copy.deepcopy(self.mlp)