def fit(self, dataset, batch_size=32, epochs=10, callbacks=[]): self.network.compile(loss=self.loss(), optimizer=self.optimizer(), metrics=self.metrics()) train_sequence = DatasetSequence(dataset.x_train, dataset.y_train, batch_size, augment_fn=self.batch_augment_fn, format_fn=self.batch_format_fn) test_sequence = DatasetSequence(dataset.x_test, dataset.y_test, batch_size, augment_fn=self.batch_augment_fn, format_fn=self.batch_format_fn) self.network.fit_generator( train_sequence, epochs=epochs, callbacks=callbacks, validation_data=test_sequence, use_multiprocessing=True, workers=1, shuffle=True )
def evaluate(self, x, y, batch_size: int=16, verbose=True) -> float: test_sequence = DatasetSequence(x, y, batch_size, format_fn=self.batch_format_fn) # We can use the `ctc_decoded` layer that is part of our model here. decoding_model = KerasModel(inputs=self.network.input, outputs=self.network.get_layer('ctc_decoded').output) preds = decoding_model.predict_generator(test_sequence) trues = np.argmax(y, -1) pred_strings = [''.join(self.data.mapping.get(label, '') for label in pred).strip(' |_') for pred in preds] true_strings = [''.join(self.data.mapping.get(label, '') for label in true).strip(' |_') for true in trues] char_accuracies = [ 1 - editdistance.eval(true_string, pred_string) / len(true_string) for pred_string, true_string in zip(pred_strings, true_strings) ] if verbose: sorted_ind = np.argsort(char_accuracies) print("\nLeast accurate predictions:") for ind in sorted_ind[:5]: print(f'True: {true_strings[ind]}') print(f'Pred: {pred_strings[ind]}') print("\nMost accurate predictions:") for ind in sorted_ind[-5:]: print(f'True: {true_strings[ind]}') print(f'Pred: {pred_strings[ind]}') print("\nRandom predictions:") for ind in np.random.randint(0, len(char_accuracies), 5): print(f'True: {true_strings[ind]}') print(f'Pred: {pred_strings[ind]}') mean_accuracy = np.mean(char_accuracies) return mean_accuracy
def evaluate(self, x, y, verbose=True): sequence = DatasetSequence(x, y) preds_raw = self.network.predict_generator(sequence) trues = np.argmax(y, -1) preds = np.argmax(preds_raw, -1) pred_strings = [ ''.join(self.data.mapping.get(label, '') for label in pred).strip(' |_') for pred in preds ] true_strings = [ ''.join(self.data.mapping.get(label, '') for label in true).strip(' |_') for true in trues ] char_accuracies = [ 1 - editdistance.eval(true_string, pred_string) / len(true_string) for pred_string, true_string in zip(pred_strings, true_strings) ] if verbose: sorted_ind = np.argsort(char_accuracies) print("\nLeast accurate predictions:") for ind in sorted_ind[:5]: print(f'True: {true_strings[ind]}') print(f'Pred: {pred_strings[ind]}') print("\nMost accurate predictions:") for ind in sorted_ind[-5:]: print(f'True: {true_strings[ind]}') print(f'Pred: {pred_strings[ind]}') print("\nRandom predictions:") for ind in np.random.randint(0, len(char_accuracies), 5): print(f'True: {true_strings[ind]}') print(f'Pred: {pred_strings[ind]}') mean_accuracy = np.mean(char_accuracies) return mean_accuracy
def evaluate(self, x, y): sequence = DatasetSequence(x, y, batch_size=16) # Use a small batch size to use less memory preds = self.network.predict_generator(sequence) return np.mean(np.argmax(preds, -1) == np.argmax(y, -1))