def fit(self, dataset, batch_size=32, epochs=10, callbacks=[]):
        self.network.compile(loss=self.loss(), optimizer=self.optimizer(), metrics=self.metrics())

        train_sequence = DatasetSequence(dataset.x_train, dataset.y_train, batch_size, augment_fn=self.batch_augment_fn, format_fn=self.batch_format_fn)
        test_sequence = DatasetSequence(dataset.x_test, dataset.y_test, batch_size, augment_fn=self.batch_augment_fn, format_fn=self.batch_format_fn)

        self.network.fit_generator(
            train_sequence,
            epochs=epochs,
            callbacks=callbacks,
            validation_data=test_sequence,
            use_multiprocessing=True,
            workers=1,
            shuffle=True
        )
    def evaluate(self, x, y, batch_size: int=16, verbose=True) -> float:
        test_sequence = DatasetSequence(x, y, batch_size, format_fn=self.batch_format_fn)

        # We can use the `ctc_decoded` layer that is part of our model here.
        decoding_model = KerasModel(inputs=self.network.input, outputs=self.network.get_layer('ctc_decoded').output)
        preds = decoding_model.predict_generator(test_sequence)

        trues = np.argmax(y, -1)
        pred_strings = [''.join(self.data.mapping.get(label, '') for label in pred).strip(' |_') for pred in preds]
        true_strings = [''.join(self.data.mapping.get(label, '') for label in true).strip(' |_') for true in trues]

        char_accuracies = [
            1 - editdistance.eval(true_string, pred_string) / len(true_string)
            for pred_string, true_string in zip(pred_strings, true_strings)
        ]
        if verbose:
            sorted_ind = np.argsort(char_accuracies)
            print("\nLeast accurate predictions:")
            for ind in sorted_ind[:5]:
                print(f'True: {true_strings[ind]}')
                print(f'Pred: {pred_strings[ind]}')
            print("\nMost accurate predictions:")
            for ind in sorted_ind[-5:]:
                print(f'True: {true_strings[ind]}')
                print(f'Pred: {pred_strings[ind]}')
            print("\nRandom predictions:")
            for ind in np.random.randint(0, len(char_accuracies), 5):
                print(f'True: {true_strings[ind]}')
                print(f'Pred: {pred_strings[ind]}')
        mean_accuracy = np.mean(char_accuracies)
        return mean_accuracy
Exemple #3
0
 def evaluate(self, x, y, verbose=True):
     sequence = DatasetSequence(x, y)
     preds_raw = self.network.predict_generator(sequence)
     trues = np.argmax(y, -1)
     preds = np.argmax(preds_raw, -1)
     pred_strings = [
         ''.join(self.data.mapping.get(label, '')
                 for label in pred).strip(' |_') for pred in preds
     ]
     true_strings = [
         ''.join(self.data.mapping.get(label, '')
                 for label in true).strip(' |_') for true in trues
     ]
     char_accuracies = [
         1 - editdistance.eval(true_string, pred_string) / len(true_string)
         for pred_string, true_string in zip(pred_strings, true_strings)
     ]
     if verbose:
         sorted_ind = np.argsort(char_accuracies)
         print("\nLeast accurate predictions:")
         for ind in sorted_ind[:5]:
             print(f'True: {true_strings[ind]}')
             print(f'Pred: {pred_strings[ind]}')
         print("\nMost accurate predictions:")
         for ind in sorted_ind[-5:]:
             print(f'True: {true_strings[ind]}')
             print(f'Pred: {pred_strings[ind]}')
         print("\nRandom predictions:")
         for ind in np.random.randint(0, len(char_accuracies), 5):
             print(f'True: {true_strings[ind]}')
             print(f'Pred: {pred_strings[ind]}')
     mean_accuracy = np.mean(char_accuracies)
     return mean_accuracy
 def evaluate(self, x, y):
     sequence = DatasetSequence(x, y, batch_size=16)  # Use a small batch size to use less memory
     preds = self.network.predict_generator(sequence)
     return np.mean(np.argmax(preds, -1) == np.argmax(y, -1))