def evaluate(self, data_test, **kwargs): """ Evaluates the keras model using X_test and Y_test. Parameters ---------- X_test : ndarray 3D array with mel-spectrograms of test set. Shape = (N_instances, N_hops, N_mel_bands) Y_test : ndarray 2D array with the annotations of test set (one hot encoding). Shape (N_instances, N_classes) scaler : Scaler, optional Scaler objet to be applied if is not None. Returns ------- float evaluation's accuracy list list of annotations (ground_truth) list list of model predictions """ return evaluate_metrics(self.model, data_test, self.metrics, **kwargs)
def on_epoch_end(self, epoch, logs={}): """ This function is run when each epoch ends. The metrics are calculated, printed and saved to the log file. Parameters ---------- epoch : int number of epoch (from Callback class) logs : dict log data (from Callback class) """ results = evaluate_metrics(self.model, self.data, ['sed'], label_list=self.label_list) results = results['sed'].results() F1 = results['overall']['f_measure']['f_measure'] ER = results['overall']['error_rate']['error_rate'] logs['F1'] = F1 logs['ER'] = ER self.current_F1 = F1 if self.current_F1 > self.best_F1 + self.considered_improvement: self.best_F1 = self.current_F1 self.model.save_weights(self.file_weights) msg = """F1 = {:.4f}, ER = {:.4f} - Best val F1: {:.4f} (IMPROVEMENT, saving)\n""" print(msg.format(self.current_F1, ER, self.best_F1)) self.epochs_since_improvement = 0 self.epoch_best = epoch else: msg = 'F1 = {:.4f}, ER = {:.4f} - Best val F1: {:.4f} ({:d})\n' print( msg.format(self.current_F1, ER, self.best_F1, self.epoch_best)) self.epochs_since_improvement += 1 if self.epochs_since_improvement >= self.early_stopping - 1: print('Not improvement for %d epochs, stopping the training' % self.early_stopping) self.model.stop_training = True
def on_epoch_end(self, epoch, logs={}): """ This function is run when each epoch ends. The metrics are calculated, printed and saved to the log file. Parameters ---------- epoch : int number of epoch (from Callback class) logs : dict log data (from Callback class) """ results = evaluate_metrics(self.model, self.data, ['classification'], label_list=self.label_list) results = results['classification'].results() acc = results['overall']['accuracy'] logs['accuracy'] = acc self.current_acc = acc if self.current_acc > self.best_acc + self.considered_improvement: self.best_acc = self.current_acc self.model.save_weights(self.file_weights) msg = 'Acc = {:.4f} - Best val Acc: {:.4f} (IMPROVEMENT, saving)\n' print(msg.format(self.current_acc, self.best_acc)) self.epochs_since_improvement = 0 self.epoch_best = epoch else: print('Acc = {:.4f} - Best val Acc: {:.4f} ({:d})\n'.format( self.current_acc, self.best_acc, self.epoch_best)) self.epochs_since_improvement += 1 if self.epochs_since_improvement >= self.early_stopping - 1: print('Not improvement for %d epochs, stopping the training' % self.early_stopping) self.model.stop_training = True
def test_evaluate_metrics(): class ToyModel: def __init__(self, Y_val): self.Y_val = Y_val def predict(self, X_val): return self.Y_val Y_val = np.zeros((10, 3)) Y_val[:, 0] = 1 Y_val[1:2, 1] = 1 Y_val[4:7, 1] = 1 X_val = np.zeros((10, 10)) toy_model = ToyModel(Y_val) label_list = ["class1", "class2", "class3"] metrics = ["classification", "sed", "tagging"] results = evaluate_metrics(toy_model, ([X_val], [Y_val]), metrics, label_list=label_list) assert type(results) is dict assert len(results) == 5 assert len(results["annotations"]) == 1 assert np.allclose(results["annotations"], Y_val) assert np.allclose(results["predictions"], Y_val) assert results["tagging"].results( )["overall"]["f_measure"]["f_measure"] == 1.0 assert results["sed"].results()["overall"]["f_measure"]["f_measure"] == 1.0 assert results["classification"].results()["overall"]["accuracy"] == 1.0 # add custom metric def custom_metric(Y_val, Y_predicted, **kwargs): return np.sum(Y_val) metrics = [custom_metric] results = evaluate_metrics(toy_model, ([X_val], [Y_val]), metrics, label_list=label_list) assert type(results) is dict assert len(results) == 3 assert len(results["annotations"]) == 1 assert np.allclose(results["annotations"], Y_val) assert np.allclose(results["predictions"], Y_val) assert results[custom_metric] == np.sum(Y_val) # multi-output class ToyModel: def __init__(self, Y_val): self.Y_val = Y_val def predict(self, X_val): return [self.Y_val, 0, 1] toy_model = ToyModel(Y_val) results = evaluate_metrics(toy_model, ([X_val], [Y_val]), metrics, label_list=label_list) assert type(results) is dict assert len(results) == 3 assert len(results["annotations"]) == 1 assert np.allclose(results["annotations"][0], Y_val) assert np.allclose(results["predictions"][0], Y_val) assert results[custom_metric] == np.sum(Y_val) class ToyDataGenerator: def __init__(self, X_val, Y_val): self.X_val = X_val self.Y_val = Y_val def __len__(self): return 3 def get_data_batch(self, index): return [X_val], [Y_val] toy_data_gen = ToyDataGenerator(X_val, Y_val) results = evaluate_metrics(toy_model, toy_data_gen, metrics, label_list=label_list) assert type(results) is dict assert len(results) == 3 assert len(results["annotations"]) == 3 assert len(results["predictions"]) == 3 assert np.allclose(results["annotations"][0], Y_val) assert np.allclose(results["predictions"][0], Y_val)