Ejemplo n.º 1
0
    def test_save_conf_mat(self, mock_dataframe):

        # ideally mock out the actual contents written to file, but
        # would be difficult to get this completely worked out.
        expected_conf_mat = np.array([
            [1, 0, 1],
            [1, 0, 0],
            [0, 1, 2],
        ])
        expected_row_col_names = dict(
            columns=['pred:PAD', 'pred:BACKGROUND', 'pred:OTHER'],
            index=['true:PAD', 'true:BACKGROUND', 'true:OTHER']
        )
        mock_instance_df = mock.Mock(spec=pd.DataFrame)()
        mock_dataframe.return_value = mock_instance_df

        # still omit bc confusion mat should include all despite omit
        f1, f1_report = labeler_utils.evaluate_accuracy(
            self.y_pred, self.y_true, self.num_labels,
            self.reverse_label_mapping, omitted_labels=['PAD'],
            verbose=False, confusion_matrix_file='test.csv'
        )

        self.assertTrue((mock_dataframe.call_args[0][0] ==
                         expected_conf_mat).all())
        self.assertDictEqual(
            expected_row_col_names, mock_dataframe.call_args[1])

        mock_instance_df.to_csv.assert_called()
Ejemplo n.º 2
0
    def test_omit_2_classes(self):

        expected_output = {
            'OTHER': {
                'precision': 2 / 3,
                'recall': 2 / 3,
                'f1-score': 2 / 3,
                'support': 3,
            },
            'micro avg': {
                'precision': 2 / 3,
                'recall': 2 / 3,
                'f1-score': 2 / 3,
                'support': 3,
            },
            'macro avg': {
                'precision': 2 / 3,
                'recall': 2 / 3,
                'f1-score': 2 / 3,
                'support': 3,
            },
            'weighted avg': {
                'precision': 2 / 3,
                'recall': 2 / 3,
                'f1-score': 2 / 3,
                'support': 3,
            },
        }

        f1, f1_report = labeler_utils.evaluate_accuracy(
            self.y_pred, self.y_true, self.num_labels,
            self.reverse_label_mapping, verbose=False)

        self.assertEqual(2 / 3, f1)
        self.assertDictEqual(expected_output, f1_report)
Ejemplo n.º 3
0
    def _validate_training(self,
                           val_data,
                           batch_size_test=32,
                           verbose_log=True,
                           verbose_keras=False):
        """
        Validate the model on the test set and return the evaluation metrics.

        :param val_data: data generator for the validation
        :type val_data: iterator
        :param batch_size_test: Number of samples to process in testing
        :type batch_size_test: int
        :param verbose_log: whether or not to print out scores for training,
            etc.
        :type verbose_log: bool
        :param verbose_keras: whether or not to print out scores for training,
            from keras.
        :type verbose_keras: bool
        return (f1-score, f1 report).
        """
        f1 = None
        f1_report = None

        if val_data is None:
            return f1, f1_report

        # Predict on the test set
        batch_id = 0
        y_val_pred = []
        y_val_test = []
        for x_val, y_val in val_data:
            y_val_pred.append(
                self._model.predict(x_val,
                                    batch_size=batch_size_test,
                                    verbose=verbose_keras)[1])
            y_val_test.append(np.argmax(y_val, axis=-1))
            batch_id += 1
            sys.stdout.flush()
            if verbose_log:
                sys.stdout.write("\rEPOCH %g, validation_batch_id %d" %
                                 (self._epoch_id, batch_id))

        tf.keras.backend.set_floatx('float32')
        # Clean the predicted entities and the actual entities
        f1, f1_report = labeler_utils.evaluate_accuracy(
            np.concatenate(y_val_pred, axis=0),
            np.concatenate(y_val_test, axis=0),
            self.num_labels,
            self.reverse_label_mapping,
            verbose=verbose_keras)

        return f1, f1_report
Ejemplo n.º 4
0
    def test_no_support_classes(self):

        expected_output = {
            'OTHER': {
                'precision': 2 / 3,
                'recall': 2 / 3,
                'f1-score': 2 / 3,
                'support': 3,
            },
            'NO_SUPPORT': {
                'precision': 0,
                'recall': 0,
                'f1-score': 0,
                'support': 0,
            },
            'NO_SUPPORT2': {
                'precision': 0,
                'recall': 0,
                'f1-score': 0,
                'support': 0,
            },
            'micro avg': {
                'precision': 2 / 3,
                'recall': 2 / 3,
                'f1-score': 2 / 3,
                'support': 3,
            },
            'macro avg': {
                'precision': 2 / 3,
                'recall': 2 / 3,
                'f1-score': 2 / 3,
                'support': 3,
            },
            'weighted avg': {
                'precision': 2 / 3,
                'recall': 2 / 3,
                'f1-score': 2 / 3,
                'support': 3,
            },
        }

        reverse_label_mapping = self.reverse_label_mapping.copy()
        reverse_label_mapping[3] = 'NO_SUPPORT'
        reverse_label_mapping[4] = 'NO_SUPPORT2'

        f1, f1_report = labeler_utils.evaluate_accuracy(
            self.y_pred, self.y_true, self.num_labels + 2,
            reverse_label_mapping, verbose=False)

        self.assertEqual(2 / 3, f1)
        self.assertDictEqual(expected_output, f1_report)
Ejemplo n.º 5
0
    def test_verbose(self, mock_stdout):
        f1, f1_report = labeler_utils.evaluate_accuracy(
            self.y_pred, self.y_true, self.num_labels,
            self.reverse_label_mapping, omitted_labels=[], verbose=True)

        self.assertIn('PAD', mock_stdout.getvalue())
        self.assertIn('BACKGROUND', mock_stdout.getvalue())
        self.assertIn('OTHER', mock_stdout.getvalue())
        self.assertIn('weighted avg', mock_stdout.getvalue())
        self.assertIn('accuracy', mock_stdout.getvalue())
        self.assertIn('macro avg', mock_stdout.getvalue())
        self.assertIn('support', mock_stdout.getvalue())
        self.assertIn('f1-score ', mock_stdout.getvalue())
        self.assertIn('F1 Score: ', mock_stdout.getvalue())
Ejemplo n.º 6
0
    def test_no_omit_class(self):

        expected_output = {
            'PAD': {
                'precision': 1/2,
                'recall': 1/2,
                'f1-score': 1/2,
                'support': 2,
            },
            'BACKGROUND': {
                'precision': 0,
                'recall': 0,
                'f1-score': 0,
                'support': 1,
            },
            'OTHER': {
                'precision': 2 / 3,
                'recall': 2 / 3,
                'f1-score': 2 / 3,
                'support': 3,
            },
            'accuracy': 0.5,
            'macro avg': {
                'precision': (1/2 + 2/3) / 3,
                'recall': (1/2 + 2/3) / 3,
                'f1-score': (1/2 + 2/3) / 3,
                'support': 6,
            },
            'weighted avg': {
                'precision': 1 / 2,
                'recall': 1 / 2,
                'f1-score': 1 / 2,
                'support': 6,
            },
        }

        f1, f1_report = labeler_utils.evaluate_accuracy(
            self.y_pred, self.y_true, self.num_labels,
            self.reverse_label_mapping, omitted_labels=[], verbose=False)

        self.assertEqual((1/2 + 2/3) / 3, f1)
        self.assertDictEqual(expected_output, f1_report)