Exemple #1
0
 def __to_class_index(class_str: str, dataset: DatasetBase):
     """
     Converts from class name to class index
     """
     if not class_str:
         return len(dataset.class_names())
     return dataset.class_names().index(class_str)
Exemple #2
0
 def __get_set(dataset: DatasetBase, set_type: str):
     """
     param set_type: can be either test for test set, dev/val for validation set
     """
     if set_type == 'test':
         return dataset.testing_examples()
     elif set_type == 'dev' or set_type == 'val':
         return dataset.validation_examples()
     else:
         raise ValueError('Unexpected set type {}'.format(set_type))
Exemple #3
0
    def __get_in_ref_out(self, dataset: DatasetBase, set_type):
        if set_type == 'valid':
            x_test, y_test = dataset.validation_examples()
        elif set_type == 'test':
            x_test, y_test = dataset.testing_examples()
        else:
            raise ValueError('Unknown set type: {}'.format(set_type))
        y_hat_test = self.predict_class(x_test)
        y_hat_test = [
            LiwcCountsClassifier.__to_class_index(cat_str, dataset)
            for cat_str in y_hat_test
        ]

        return x_test, y_test, y_hat_test
Exemple #4
0
 def evaluate_detailed(self, dataset: DatasetBase, set_type):
     _, y_test, y_hat_test = self.__get_in_ref_out(dataset, set_type)
     return metrics.classification_report(
         y_test,
         y_hat_test,
         target_names=dataset.class_names() + ['None'],
         output_dict=True)
Exemple #5
0
 def evaluate_detailed(self, dataset: DatasetBase, set_type: str = 'test'):
     y_test, y_hat_test = self.get_test_ref_out_pair(dataset, set_type)
     return metrics.classification_report(
         y_test,
         y_hat_test,
         target_names=dataset.class_names(),
         output_dict=True)
Exemple #6
0
 def train(self, dataset: DatasetBase, epochs):
     x_train, y_train = dataset.training_examples()
     cat_to_words_dict = WordEmbeddingsMatching.__group_words_to_cats(
         x_train, y_train)
     cat_to_centroids_and_dist_dict = {
         cat_str: self.__compute_centroid_and_distance(word_set)
         for cat_str, word_set in cat_to_words_dict.items()
     }
Exemple #7
0
    def evaluate(self, dataset: DatasetBase, set_type):
        if set_type == 'valid':
            x_test, y_test_one_hot = dataset.validation_examples()
        elif set_type == 'test':
            x_test, y_test_one_hot = dataset.testing_examples()
        else:
            raise ValueError('Unknown set type: {}'.format(set_type))
        y_hat_test_prob = self.predict(x_test)

        y_test = [np.argmax(np.array(r)) for r in y_test_one_hot]
        y_hat_test = [np.argmax(np.array(r)) for r in y_hat_test_prob]

        fscore = metrics.f1_score(y_test, y_hat_test, average='macro')
        accuracy = metrics.accuracy_score(y_test, y_hat_test)

        print(self._model.evaluate(x_test, y_test_one_hot))

        result_dict = {'f-score': fscore, 'accuracy': accuracy}

        return result_dict
Exemple #8
0
    def precision_recall_curves(self, dataset: DatasetBase, set_type='test'):
        class_count = dataset.class_count()
        y_out_prob = self.get_test_out_prob(dataset, set_type)
        y_ref = self.get_test_ref_labels(dataset, set_type)
        y_ref_onehot = np.array([
            SequenceClassificationModel._to_one_hot(index, class_count)
            for index in y_ref
        ])

        precision_dict, recall_dict, threshold_dict, average_precision_dict = SequenceClassificationModel.precision_recall_curve_per_class(
            y_ref_onehot, y_out_prob, dataset.class_names())
        precision_dict['micro'], recall_dict['micro'], threshold_dict[
            'micro'], average_precision_dict[
                'micro'] = SequenceClassificationModel.precision_recall_curve_micro_average(
                    y_ref_onehot, y_out_prob, class_count)
        precision_dict['macro'], recall_dict['macro'], threshold_dict[
            'macro'], average_precision_dict[
                'macro'] = SequenceClassificationModel.precision_recall_curve_macro_average(
                    y_ref_onehot, y_out_prob, class_count)

        return precision_dict, recall_dict, threshold_dict, average_precision_dict
Exemple #9
0
    def train(self, dataset: DatasetBase, epochs):
        self.reset_cached_test_output()

        start = time.process_time()
        optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
        self._model.compile(optimizer=optimizer, loss=self._model.compute_loss)
        history = self._model.fit(
            dataset.training_examples(),
            epochs=epochs,
            validation_data=dataset.validation_examples(),
            verbose=run_verbosity)
        duration = time.process_time() - start

        self._model_params_dict['learning_rate'] = 5e-5
        self._model_params_dict['epochs'] = epochs
        self._model_params_dict['training timestamp'] = datetime.now(
        ).strftime('%Y-%m-%d %H:%M:%S')
        self._model_params_dict['training time'] = duration
        self._model_params_dict['class count'] = dataset.class_count()

        return history
Exemple #10
0
    def train(self, dataset: DatasetBase, epochs):
        if self.__class_count == 1:
            loss = tf.keras.losses.BinaryCrossentropy()
            metrics = [tf.keras.metrics.BinaryCrossentropy()]
        elif self.__class_count >= 2:
            loss = tf.keras.losses.CategoricalCrossentropy()
            metrics = [tf.keras.metrics.CategoricalCrossentropy()]
        else:
            raise ValueError(
                'Unexpected value for class count. Provided is {}'.format(
                    self.__class_count))

        self._model.compile(optimizer=tf.keras.optimizers.Adam(),
                            loss=loss,
                            metrics=metrics)

        x_train, y_train = dataset.training_examples()
        x_valid, y_valid = dataset.validation_examples()
        history = self._model.fit(x_train,
                                  y_train,
                                  batch_size=64,
                                  epochs=epochs,
                                  validation_data=(x_valid, y_valid))
        return history