Example #1
0
    def test(self, test_data, verbose=True):
        if verbose:
            print('Testing the network...')
        if self.parameters is None:
            # the odd case when the user wants to test a completely un-trained model
            self.initialize_parameters(test_data)

        validation_batch_size = 5

        test_minibatch_index_sets = Network.get_batch_indices(len(test_data), validation_batch_size)

        class_probabilities, true_labels, precision, recall = \
            self.compute_prediction_precision_and_recall(test_data, test_minibatch_index_sets, verbose=False)

        predicted_labels = np.argmax(class_probabilities, axis=1)
        confusion_matrix = \
            Network.calculate_confusion_matrix(true_labels, predicted_labels, category_count=self.category_count)
        self.save_confusion_matrix_image(confusion_matrix, "monocular")

        results = {'class_scores': class_probabilities,
                   'true_labels': true_labels,
                   'precision': precision,
                   'recall': recall}

        test_batch_indices = Network.get_batch_indices(len(test_data), validation_batch_size)
        error = self.compute_prediction_error(test_data, test_batch_indices)
        print("Test error: ", error)

        if self.output_directory is not None:
            np.savez_compressed(os.path.join(self.output_directory, "sequence_test_results.npz"), **results)

        predicted_labels = []
        for t in range(len(test_data)):
            x, mask, y = prepare_data([test_data.features[t]], np.array(test_data.labels)[t])
            predicted_labels.append(self.classify_timesteps(x, mask))

        results_all = {'predicted_labels': predicted_labels,
                       'true_labels': true_labels,
                       'start_frame': [d['start'] for d in test_data.meta_information],
                       'end_frame': [d['end'] for d in test_data.meta_information],
                       'label': [d['label'] for d in test_data.meta_information],
                       'source': [d['source'] for d in test_data.meta_information]}
        if self.output_directory is not None:
            np.savez_compressed(os.path.join(self.output_directory, "timestep_test_results.npz"), **results_all)

        return predicted_labels
Example #2
0
    def compute_prediction_precision_and_recall(self, sequence_dataset,
                                                batch_index_sets, verbose=False):
        n_samples = len(sequence_dataset)
        category_probabilities = np.zeros((n_samples, self.category_count)).astype(config.floatX)
        true_labels = np.zeros((n_samples,)).astype('int32')

        for batch_index_set in batch_index_sets:
            x, mask, y = prepare_data([sequence_dataset.features[t] for t in batch_index_set],
                                      np.array(sequence_dataset.labels)[batch_index_set])
            minibatch_category_probabilities = self.compute_sequence_class_probabilities(x, mask)
            category_probabilities[batch_index_set, :] = minibatch_category_probabilities
            true_labels[batch_index_set] = np.array(sequence_dataset.labels)[batch_index_set]

        predicted_labels = np.argmax(category_probabilities, axis=1)
        confusion_matrix = Network.calculate_confusion_matrix(true_labels, predicted_labels, self.category_count)
        precision, recall = Network.compute_precision_and_recall(confusion_matrix)

        return category_probabilities, true_labels, precision, recall
Example #3
0
    def multiview_test(self, test_groups, verbose=True):
        """
        :type test_groups: list[list[lstm.data_io.SequenceSet]]
        :param test_groups:
        :param verbose:
        :return:
        """
        true_labels = []
        predicted_labels = []
        category_probabilities = []

        for group in test_groups:
            cumulative_contributions = np.zeros((self.category_count,), dtype=np.float64)
            non_empty_count = 0
            true_label = None
            for sequence_set in group:
                if not sequence_set.empty():
                    true_label = sequence_set.label
                    non_empty_count += 1
                    x, mask, y = prepare_data(sequence_set.features, sequence_set.label)
                    set_category_probabilities = self.compute_sequence_class_probabilities(x, mask)
                    set_category_probabilities *= sequence_set.contributions[np.newaxis].T
                    cumulative_contributions += set_category_probabilities.sum(axis=0)

            if true_label is not None:
                group_category_probabilities = cumulative_contributions / non_empty_count
                category_probabilities.append(group_category_probabilities)
                true_labels.append(true_label)
                predicted_label = np.argmax(cumulative_contributions)
                predicted_labels.append(predicted_label)

        true_labels = np.array(true_labels, dtype=np.int32)
        predicted_labels = np.array(predicted_labels, dtype=np.int32)
        confusion_matrix = Network.calculate_confusion_matrix(true_labels, predicted_labels, self.category_count)
        self.save_confusion_matrix_image(confusion_matrix, "multiview")
        precision, recall = Network.compute_precision_and_recall(confusion_matrix)

        results = {'class_scores': category_probabilities,
                   'true_labels': true_labels,
                   'precision': precision,
                   'recall': recall}

        if self.output_directory is not None:
            np.savez_compressed(os.path.join(self.output_directory, "multiview_test_results.npz"), **results)
Example #4
0
    def train(self, training_data, validation_data, test_data,
              batch_size=10, validation_batch_size=5,
              report_interval=50, validation_interval=20, save_interval=20,
              patience=15, max_epochs=300, learning_rate=0.0001, check_gradients=False, verbose=True):
        """
        Train the model.

        :type training_data: lstm.data_io.SequenceDataset
        :param training_data: dataset to use for actual training/learning
        :type validation_data: lstm.data_io.SequenceDataset
        :param validation_data: dataset to use for validation only, i.e. comparison with labels is only used for early
        stop to prevent overfitting.
        :type test_data: lstm.data_io.SequenceDataset
        :param test_data: dataset set aside for testing
        :type batch_size: int
        :param batch_size: size of batches to use for training
        :type validation_batch_size: int
        :param validation_batch_size: size of batches to use for validation
        :type save_interval: int
        :param save_interval: number of updates until the model is written to disk
        (only if model_output_path is specified)
        :type patience: int
        :param patience: number of validation runs to check whether a better validation result is obtained. After
        the number of checks reaches "patience", training will be stopped "early"
        :type learning_rate: float
        :param learning_rate: factor for weight updates
        :type validation_interval: int
        :param validation_interval: number of updates until the next validation run
        :type report_interval: int
        :param report_interval: number of updates until printing the results again to the console
        :type max_epochs: int
        :param max_epochs: maximum number of epochs to train (each epoch will cover batches based on entire dataset)
        :type check_gradients: bool
        :param check_gradients: whether to print out gradients during training
        :type verbose: bool
        :param verbose: print supplementary output
        :return:
        """
        if verbose:
            print('Training the network...')

        if self.parameters is None:
            self.initialize_parameters(training_data)

        validation_batch_indices = Network.get_batch_indices(len(validation_data), validation_batch_size)
        test_batch_indices = Network.get_batch_indices(len(test_data), validation_batch_size)

        error_history = []
        epoch_index_aggregate = []

        if save_interval == -1:
            save_interval = len(training_data) / batch_size

        current_update_index = 0
        early_stop = False
        start_time = time.time()

        best_parameters = None

        # =============== MAIN TRAINING LOOP BEGIN ============================================== #
        try:
            for epoch_index in range(max_epochs):

                epoch_samples_processed = 0

                # Get new shuffled index for the training set.
                train_minibatch_indices = Network.get_batch_indices(len(training_data), batch_size, shuffle=True)

                # traverse all the mini-batches (1 update per minibatch)
                for training_minibatch_indices in train_minibatch_indices:

                    # Select the random sequences for this minibatch
                    batch_features = [training_data.features[t] for t in training_minibatch_indices]
                    batch_labels = [training_data.labels[t] for t in training_minibatch_indices]

                    # Get the data in np.ndarray format
                    # Swaps the axes!
                    # Returns a matrix of shape (minibatch max. len., n samples)
                    batch_features, mask, batch_labels = prepare_data(batch_features, batch_labels)
                    if self.weighted_loss:
                        w = [training_data.weights[t] for t in training_minibatch_indices]
                        inputs = [batch_features, mask, batch_labels, w]
                    else:
                        inputs = [batch_features, mask, batch_labels]

                    epoch_samples_processed += batch_features.shape[1]

                    # # Check gradients
                    if check_gradients:
                        gradients = self.compute_gradients(*inputs)
                        print('gradients :', [np.mean(g) for g in Network.theano_to_numpy_grad_array(gradients)])
                        print('parameters :', [np.mean(vv) for kk, vv in self.parameters.as_dict().items()])

                    loss = self.compute_shared_gradient(*inputs)
                    self.update_parameters(learning_rate)

                    if np.isinf(loss):
                        raise ValueError("Inf detected in cost. Aborting.")
                    elif np.isnan(loss):
                        raise ValueError("NaN detected in cost. Aborting.")

                    if (current_update_index + 1) % report_interval == 0:
                        if verbose:
                            print('Epoch: ', epoch_index, ' | Update: ', current_update_index, '|Loss/penalty: ', loss)

                    if self.model_output_path is not None and (current_update_index + 1) % save_interval == 0:
                        if verbose:
                            print('Saving...', end=' ')
                        self.parameters.save_to_numpy_archive(self.model_output_path)
                        if verbose:
                            print('Done')

                    if (current_update_index + 1) % validation_interval == 0:
                        self.noise_flag.set_value(0.)
                        training_error = self.compute_prediction_error(training_data, train_minibatch_indices)
                        validation_error = self.compute_prediction_error(validation_data, validation_batch_indices)
                        test_error = self.compute_prediction_error(test_data, test_batch_indices)

                        error_history.append([training_error, validation_error, test_error])
                        epoch_index_aggregate.append([epoch_index, epoch_index, epoch_index])

                        plt.figure(1)
                        plt.clf()
                        lines = plt.plot(np.array(epoch_index_aggregate), np.array(error_history))
                        plt.legend(lines, ['Training error', 'Validation error', 'Test error'])
                        if self.output_directory:
                            plt.savefig(os.path.join(self.output_directory, "error.png"))
                        time.sleep(0.1)

                        # TODO: check the logic here to see if it actually works
                        if current_update_index == 0 or validation_error <= np.array(error_history)[:, 1].min():
                            best_parameters = self.parameters.as_dict()  # save best validation results so far
                            bad_counter = 0
                            if validation_error < np.array(error_history)[:, 1].min() and verbose:
                                print('  New best validation results.')
                        if verbose:
                            print("Training error=%.06f |  Validation error=%.06f | Test error=%.06f" % (
                                training_error, validation_error, test_error))

                        if (len(error_history) > patience
                            and validation_error >= np.array(error_history)[:-patience, 1].min()):
                            bad_counter += 1
                            if bad_counter > patience:
                                print("Early stop: validation error exceeded the minimum error " +
                                      "in the last few epochs too many times!")
                                early_stop = True
                                break
                        self.noise_flag.set_value(1.)

                    current_update_index += 1

                if verbose:
                    print('Seen %d samples.' % epoch_samples_processed)

                if early_stop:
                    break

        except KeyboardInterrupt:
            print("Training interrupted")
        # =============== MAIN TRAINING LOOP END ============================================== #

        end_time = time.time()
        if best_parameters is None:
            best_parameters = self.parameters.as_dict()

        self.noise_flag.set_value(0.)
        sorted_train_minibatch_index_sets = Network.get_batch_indices(len(training_data), batch_size)
        training_error = self.compute_prediction_error(training_data, sorted_train_minibatch_index_sets)
        validation_error = self.compute_prediction_error(validation_data, validation_batch_indices)
        test_error = self.compute_prediction_error(test_data, test_batch_indices)
        if verbose:
            print("Training error=%.06f |  Validation error=%.06f |  Test error=%.06f"
                  % (training_error, validation_error, test_error))
            print("The code run for %d epochs, with %f sec/epochs" % (
                (epoch_index + 1), (end_time - start_time) / (1. * (epoch_index + 1))))
            print(("Training took %.1fs" %
                   (end_time - start_time)), file=sys.stderr)
        if self.model_output_path is not None:
            np.savez(self.model_output_path, training_error=training_error, test_error=test_error,
                     validation_error=validation_error, history_errs=error_history, **best_parameters)

        return training_error, validation_error, test_error
Example #5
0
    def compute_prediction_error(self, data, index_sets, compute_histogram_function=None):
        """
        Compute the prediction error.
        :param compute_histogram_function: function to compute the histogram
        :param index_sets: sets of indexes for batches
        :type data: lstm.data_io.SequenceDataset
        :param data: the dataset for which to get classification error consistent of sequences and their labels
        prepare_data: usual prepare_data for that dataset.
        """
        error = 0
        for index_set in index_sets:
            batch_features, mask, batch_labels = prepare_data([data.features[t] for t in index_set],
                                                              np.array(data.labels)[index_set])
            sequence_classifications = self.classify_sequences(batch_features, mask)
            target_labels = np.array(data.labels)[index_set]
            error += (sequence_classifications == target_labels).sum()

        error = 1. - to_numpy_theano_float(error) / len(data)

        if compute_histogram_function is not None:
            batch_features = data.features[:, None, :].astype('float32')
            mask = np.ones((batch_features.shape[0], 1), dtype='float32')
            # h, c, i, f, o
            hs = compute_histogram_function(batch_features, mask)
            plt.figure(1)
            plt.clf()
            for s in range(5):
                plt.subplot(1, 5, s + 1)
                plt.imshow(np.squeeze(hs[s][:, 0, :]), interpolation='nearest')
                plt.colorbar()
            if self.output_directory is not None:
                plt.savefig(os.path.join(self.output_directory, "hs_test_tmp.png"))

            plt.figure(2)
            plt.clf()
            plt.subplot(3, 1, 1)
            plt.imshow(hs[5], interpolation='nearest')
            plt.colorbar()
            plt.title("hs_Wmatrix_lstm")

            plt.subplot(3, 1, 2)
            plt.imshow(hs[6], interpolation='nearest')
            plt.colorbar()
            plt.title("hs_Umatrix_lstm")

            plt.subplot(3, 1, 3)
            plt.imshow(hs[8], interpolation='nearest')
            plt.colorbar()
            plt.title("hs_Umatrix")
            if self.output_directory is not None:
                plt.savefig(os.path.join(self.output_directory, "hs_matrix.png"))

            plt.figure(3)
            plt.clf()
            plt.subplot(2, 1, 1)
            plt.plot(hs[7])
            plt.title("hs_Bvec_lstm")
            plt.subplot(2, 1, 2)
            plt.plot(hs[9])
            plt.title("hs_Bvec")
            if self.output_directory is not None:
                plt.savefig(os.path.join(self.output_directory, "hs_vector.png"))

            time.sleep(0.1)

        return error