def evaluate(data, model, onset_threshold=0.5, frame_threshold=0.5, save_path=None): metrics = defaultdict(list) for label in data: pred, losses = model.run_on_batch(label) for key, loss in losses.items(): metrics[key].append(loss.item()) for key, value in pred.items(): value.squeeze_(0).relu_() p_ref, i_ref, v_ref = extract_notes(label['onset'], label['frame'], label['velocity']) p_est, i_est, v_est = extract_notes(pred['onset'], pred['frame'], pred['velocity'], onset_threshold, frame_threshold) t_ref, f_ref = notes_to_frames(p_ref, i_ref, label['frame'].shape) t_est, f_est = notes_to_frames(p_est, i_est, pred['frame'].shape) scaling = HOP_LENGTH / SAMPLE_RATE i_ref = (i_ref * scaling).reshape(-1, 2) p_ref = np.array([midi_to_hz(MIN_MIDI + midi) for midi in p_ref]) i_est = (i_est * scaling).reshape(-1, 2) p_est = np.array([midi_to_hz(MIN_MIDI + midi) for midi in p_est]) t_ref = t_ref.astype(np.float64) * scaling f_ref = [np.array([midi_to_hz(MIN_MIDI + midi) for midi in freqs]) for freqs in f_ref] t_est = t_est.astype(np.float64) * scaling f_est = [np.array([midi_to_hz(MIN_MIDI + midi) for midi in freqs]) for freqs in f_est] p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est, offset_ratio=None) metrics['metric/note/precision'].append(p) metrics['metric/note/recall'].append(r) metrics['metric/note/f1'].append(f) metrics['metric/note/overlap'].append(o) p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est) metrics['metric/note-with-offsets/precision'].append(p) metrics['metric/note-with-offsets/recall'].append(r) metrics['metric/note-with-offsets/f1'].append(f) metrics['metric/note-with-offsets/overlap'].append(o) p, r, f, o = evaluate_notes_with_velocity(i_ref, p_ref, v_ref, i_est, p_est, v_est, offset_ratio=None, velocity_tolerance=0.1) metrics['metric/note-with-velocity/precision'].append(p) metrics['metric/note-with-velocity/recall'].append(r) metrics['metric/note-with-velocity/f1'].append(f) metrics['metric/note-with-velocity/overlap'].append(o) p, r, f, o = evaluate_notes_with_velocity(i_ref, p_ref, v_ref, i_est, p_est, v_est, velocity_tolerance=0.1) metrics['metric/note-with-offsets-and-velocity/precision'].append(p) metrics['metric/note-with-offsets-and-velocity/recall'].append(r) metrics['metric/note-with-offsets-and-velocity/f1'].append(f) metrics['metric/note-with-offsets-and-velocity/overlap'].append(o) frame_metrics = evaluate_frames(t_ref, f_ref, t_est, f_est) metrics['metric/frame/f1'].append(hmean([frame_metrics['Precision'] + eps, frame_metrics['Recall'] + eps]) - eps) for key, loss in frame_metrics.items(): metrics['metric/frame/' + key.lower().replace(' ', '_')].append(loss) if save_path is not None: os.makedirs(save_path, exist_ok=True) label_path = os.path.join(save_path, os.path.basename(label['path']) + '.label.png') save_pianoroll(label_path, label['onset'], label['frame']) pred_path = os.path.join(save_path, os.path.basename(label['path']) + '.pred.png') save_pianoroll(pred_path, pred['onset'], pred['frame']) midi_path = os.path.join(save_path, os.path.basename(label['path']) + '.pred.mid') save_midi(midi_path, p_est, i_est, v_est) return metrics
def evaluate(metrics, model, inputs, targets, onset_threshold=0.5, frame_threshold=0.5, save_path=None): # NB: this can't be decorated with tf.function because of all the extract_notes functions not being pure TF code. mel = audio_to_mel(inputs) onset_pred, offset_pred, frame_pred, velocity_pred = model(mel, training=False) onset_labels, offset_labels, frame_labels, velocity_labels, path_labels = targets # for key, loss in losses.items(): # metrics[key].append(loss.item()) # todo: add loss metrics # We're working with batch size of 1, so remove the first index for everything. onset_pred = tf.squeeze(onset_pred) offset_pred = tf.squeeze(offset_pred) frame_pred = tf.squeeze(frame_pred) velocity_pred = tf.squeeze(velocity_pred) onset_labels = tf.squeeze(onset_labels) offset_labels = tf.squeeze(offset_labels) frame_labels = tf.squeeze(frame_labels) velocity_labels = tf.squeeze(velocity_labels) path_labels = tf.squeeze(path_labels).numpy().decode("utf-8") p_ref, i_ref, v_ref = extract_notes(onset_labels, frame_labels, velocity_labels) p_est, i_est, v_est = extract_notes(onset_pred, frame_pred, velocity_pred, onset_threshold, frame_threshold) t_ref, f_ref = notes_to_frames(p_ref, i_ref, frame_labels.shape) t_est, f_est = notes_to_frames(p_est, i_est, frame_pred.shape) scaling = HOP_LENGTH / SAMPLE_RATE i_ref = (i_ref * scaling).reshape(-1, 2) p_ref = np.array([midi_to_hz(MIN_MIDI + midi) for midi in p_ref]) i_est = (i_est * scaling).reshape(-1, 2) p_est = np.array([midi_to_hz(MIN_MIDI + midi) for midi in p_est]) t_ref = t_ref.astype(np.float64) * scaling f_ref = [ np.array([midi_to_hz(MIN_MIDI + midi) for midi in freqs]) for freqs in f_ref ] t_est = t_est.astype(np.float64) * scaling f_est = [ np.array([midi_to_hz(MIN_MIDI + midi) for midi in freqs]) for freqs in f_est ] p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est, offset_ratio=None) metrics['metric/note/precision'].append(p) metrics['metric/note/recall'].append(r) metrics['metric/note/f1'].append(f) metrics['metric/note/overlap'].append(o) p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est) metrics['metric/note-with-offsets/precision'].append(p) metrics['metric/note-with-offsets/recall'].append(r) metrics['metric/note-with-offsets/f1'].append(f) metrics['metric/note-with-offsets/overlap'].append(o) p, r, f, o = evaluate_notes_with_velocity(i_ref, p_ref, v_ref, i_est, p_est, v_est, offset_ratio=None, velocity_tolerance=0.1) metrics['metric/note-with-velocity/precision'].append(p) metrics['metric/note-with-velocity/recall'].append(r) metrics['metric/note-with-velocity/f1'].append(f) metrics['metric/note-with-velocity/overlap'].append(o) p, r, f, o = evaluate_notes_with_velocity(i_ref, p_ref, v_ref, i_est, p_est, v_est, velocity_tolerance=0.1) metrics['metric/note-with-offsets-and-velocity/precision'].append(p) metrics['metric/note-with-offsets-and-velocity/recall'].append(r) metrics['metric/note-with-offsets-and-velocity/f1'].append(f) metrics['metric/note-with-offsets-and-velocity/overlap'].append(o) frame_metrics = evaluate_frames(t_ref, f_ref, t_est, f_est) metrics['metric/frame/f1'].append( hmean( [frame_metrics['Precision'] + eps, frame_metrics['Recall'] + eps]) - eps) for key, loss in frame_metrics.items(): metrics['metric/frame/' + key.lower().replace(' ', '_')].append(loss) if save_path is not None: os.makedirs(save_path, exist_ok=True) label_path = os.path.join(save_path, os.path.basename(path_labels) + '.label.png') save_pianoroll(label_path, onset_labels, frame_labels) pred_path = os.path.join(save_path, os.path.basename(path_labels) + '.pred.png') save_pianoroll(pred_path, onset_pred, frame_pred) midi_path = os.path.join(save_path, os.path.basename(path_labels) + '.pred.mid') save_midi(midi_path, p_est, i_est, v_est) return metrics
def evaluate(data, model, logging_info, onset_threshold=0.5, frame_threshold=0.5, save_path=None): metrics = defaultdict(list) song_names = list() for label in data: song_names.append((label['path']).split("/")[-1:]) pred, losses = model.run_on_batch(label) for key, loss in losses.items(): metrics[key].append(loss.item()) for key, value in pred.items(): value.squeeze_(0).relu_() p_ref, i_ref, v_ref = extract_notes(label['onset'], label['frame'], label['velocity']) p_est, i_est, v_est = extract_notes(pred['onset'], pred['frame'], pred['velocity'], onset_threshold, frame_threshold) t_ref, f_ref = notes_to_frames(p_ref, i_ref, label['frame'].shape) t_est, f_est = notes_to_frames(p_est, i_est, pred['frame'].shape) scaling = HOP_LENGTH / SAMPLE_RATE i_ref = (i_ref * scaling).reshape(-1, 2) p_ref = np.array([midi_to_hz(MIN_MIDI + midi) for midi in p_ref]) i_est = (i_est * scaling).reshape(-1, 2) p_est = np.array([midi_to_hz(MIN_MIDI + midi) for midi in p_est]) t_ref = t_ref.astype(np.float64) * scaling f_ref = [np.array([midi_to_hz(MIN_MIDI + midi) for midi in freqs]) for freqs in f_ref] t_est = t_est.astype(np.float64) * scaling f_est = [np.array([midi_to_hz(MIN_MIDI + midi) for midi in freqs]) for freqs in f_est] p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est, offset_ratio=None) metrics['metric/note/precision'].append(p) metrics['metric/note/recall'].append(r) metrics['metric/note/f1'].append(f) metrics['metric/note/overlap'].append(o) p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est) metrics['metric/note-with-offsets/precision'].append(p) metrics['metric/note-with-offsets/recall'].append(r) metrics['metric/note-with-offsets/f1'].append(f) metrics['metric/note-with-offsets/overlap'].append(o) p, r, f, o = evaluate_notes_with_velocity(i_ref, p_ref, v_ref, i_est, p_est, v_est, offset_ratio=None, velocity_tolerance=0.1) metrics['metric/note-with-velocity/precision'].append(p) metrics['metric/note-with-velocity/recall'].append(r) metrics['metric/note-with-velocity/f1'].append(f) metrics['metric/note-with-velocity/overlap'].append(o) p, r, f, o = evaluate_notes_with_velocity(i_ref, p_ref, v_ref, i_est, p_est, v_est, velocity_tolerance=0.1) metrics['metric/note-with-offsets-and-velocity/precision'].append(p) metrics['metric/note-with-offsets-and-velocity/recall'].append(r) metrics['metric/note-with-offsets-and-velocity/f1'].append(f) metrics['metric/note-with-offsets-and-velocity/overlap'].append(o) frame_metrics = evaluate_frames(t_ref, f_ref, t_est, f_est) metrics['metric/frame/f1'].append(hmean([frame_metrics['Precision'] + eps, frame_metrics['Recall'] + eps]) - eps) for key, loss in frame_metrics.items(): metrics['metric/frame/' + key.lower().replace(' ', '_')].append(loss) if save_path is not None: os.makedirs(save_path, exist_ok=True) label_path = os.path.join(save_path, os.path.basename(label['path']) + '.label.png') save_pianoroll(label_path, label['onset'], label['frame']) pred_path = os.path.join(save_path, os.path.basename(label['path']) + '.pred.png') save_pianoroll(pred_path, pred['onset'], pred['frame']) midi_path = os.path.join(save_path, os.path.basename(label['path']) + '.pred.mid') save_midi(midi_path, p_est, i_est, v_est) # Creating a table of results for each song, and sorting by note_with_offset F1 score rename_dict = dict() for key, _ in metrics.items(): if key.startswith('loss/'): _, category = key.split('/') rename_dict[key] = category + " loss" if key.startswith('metric/'): _, category, name = key.split('/') rename_dict[key] = category + " " + name model_file, dataset_name = logging_info log_str = (model_file + "_" + dataset_name).replace("/", "-") evaluation_by_song_df = pd.DataFrame.from_dict(metrics, orient='index').transpose() evaluation_by_song_df.insert(0, "song_name", song_names) evaluation_by_song_df["song_name"] = song_names evaluation_by_song_df.rename(columns=rename_dict, inplace=True) evaluation_by_song_df.sort_values("note-with-offsets f1", ascending=True) evaluation_by_song_df.to_csv("./evaluations/new_evals" + log_str+ "_by_song.csv", index=False) model_df = evaluation_by_song_df.mean() model_df.to_csv("./evaluations/new_evals/new_evals" + log_str+ "_model.csv", index=False) return metrics
def _evaluate_metrics(metrics, pred, label, onset_threshold, frame_threshold): """ Evaluate diverse metrics. Parameters ---------- metrics: Dict to store metric for each song. label: Ground truth label. pred: Prediction result. onset_threshold: Threshold for onset prediction. frame_threshold: Threshold for frame prediction. """ for key, value in pred.items(): if key == 'path': continue value.squeeze_(0).relu_() label_onset = (label == 3).float() label_frame = (label > 1).float() p_ref, i_ref = extract_notes(label_onset, label_frame) p_est, i_est = extract_notes(pred['onset'], pred['frame'], onset_threshold, frame_threshold) i_ref, p_ref, t_ref, f_ref = _rescale_notes(i_ref, p_ref, label_frame.shape) i_est, p_est, t_est, f_est = _rescale_notes(i_est, p_est, pred['frame'].shape) p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est, offset_ratio=None) metrics['metric/note/precision'].append(p) metrics['metric/note/recall'].append(r) metrics['metric/note/f1'].append(f) metrics['metric/note/overlap'].append(o) p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est) metrics['metric/note-with-offsets/precision'].append(p) metrics['metric/note-with-offsets/recall'].append(r) metrics['metric/note-with-offsets/f1'].append(f) metrics['metric/note-with-offsets/overlap'].append(o) # if len(match_notes(i_ref, p_ref, # i_est, p_est, offset_ratio=None)) == 0: # p, r, f, o = 0.0, 0.0, 0.0, 0.0 # else: # p, r, f, o = evaluate_notes_with_velocity(i_ref, p_ref, v_ref, # i_est, p_est, v_est, # offset_ratio=None, # velocity_tolerance=0.1) # metrics['metric/note-with-velocity/precision'].append(p) # metrics['metric/note-with-velocity/recall'].append(r) # metrics['metric/note-with-velocity/f1'].append(f) # metrics['metric/note-with-velocity/overlap'].append(o) # if len(match_notes(i_ref, p_ref, i_est, p_est)) == 0: # p, r, f, o = 0.0, 0.0, 0.0, 0.0 # else: # p, r, f, o = evaluate_notes_with_velocity(i_ref, p_ref, v_ref, # i_est, p_est, v_est, # velocity_tolerance=0.1) # metrics['metric/note-with-offsets-and-velocity/precision'].append(p) # metrics['metric/note-with-offsets-and-velocity/recall'].append(r) # metrics['metric/note-with-offsets-and-velocity/f1'].append(f) # metrics['metric/note-with-offsets-and-velocity/overlap'].append(o) frame_metrics = evaluate_frames(t_ref, f_ref, t_est, f_est) metrics['metric/frame/f1'].append( hmean( [frame_metrics['Precision'] + eps, frame_metrics['Recall'] + eps]) - eps) for key, loss in frame_metrics.items(): metrics['metric/frame/' + (key.lower().replace(' ', '_'))].append(loss)
def evaluate(batch, model, device, save_path=None, criterion=None, sampling_method='argmax', rep_type='base', plot_example=False, recursive=True, detail_eval=False, delay=1): # TODO: input: prediction & label. output: metric metrics = defaultdict(list) acc_conf = [] if sampling_method == 'argmax': gt_ratio = 0.0 elif sampling_method == 'gt': gt_ratio = 1.0 else: gt_ratio = 0.0 with th.no_grad(): preds, losses = models.run_on_batch(model, batch, device[0], sampling_method=sampling_method, gt_ratio=gt_ratio, criterion=criterion, rep_type=rep_type, recursive=recursive, delay=delay) losses = losses.cpu().numpy() metrics['loss'].extend(list(np.atleast_1d(losses))) for n in range(preds.shape[0]): label = dict() pred = preds[n] argmax_pred = pred.argmax(dim=0) for key in batch: label[key] = batch[key][n] if detail_eval: acc_conf.append( calculate_acc_conf( pred.cpu().numpy().transpose((1, 2, 0)), label['shifted_label'][delay:].cpu().numpy())) else: acc_conf.append(None) onset_ref, offset_ref, frame_ref = representation.base2onsets_and_frames( label['shifted_label'][delay:]) onsets, offsets, frames = representation.convert2onsets_and_frames( argmax_pred, rep_type) p_ref, i_ref, v_ref = extract_notes(onset_ref, frame_ref) p_est, i_est, v_est = extract_notes(onsets, frames) t_ref, f_ref = notes_to_frames(p_ref, i_ref, frame_ref.shape) t_est, f_est = notes_to_frames(p_est, i_est, frames.shape) scaling = HOP_LENGTH / SAMPLE_RATE i_ref = (i_ref * scaling).reshape(-1, 2) p_ref = np.array([midi_to_hz(MIN_MIDI + midi) for midi in p_ref]) i_est = (i_est * scaling).reshape(-1, 2) p_est = np.array([midi_to_hz(MIN_MIDI + midi) for midi in p_est]) t_ref = t_ref.astype(np.float64) * scaling f_ref = [ np.array([midi_to_hz(MIN_MIDI + midi) for midi in freqs]) for freqs in f_ref ] t_est = t_est.astype(np.float64) * scaling f_est = [ np.array([midi_to_hz(MIN_MIDI + midi) for midi in freqs]) for freqs in f_est ] p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est, offset_ratio=None) metrics['metric/note/precision'].append(p) metrics['metric/note/recall'].append(r) metrics['metric/note/f1'].append(f) metrics['metric/note/overlap'].append(o) p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est) metrics['metric/note-with-offsets/precision'].append(p) metrics['metric/note-with-offsets/recall'].append(r) metrics['metric/note-with-offsets/f1'].append(f) metrics['metric/note-with-offsets/overlap'].append(o) frame_metrics = evaluate_frames(t_ref, f_ref, t_est, f_est) metrics['metric/frame/f1'].append( hmean([ frame_metrics['Precision'] + eps, frame_metrics['Recall'] + eps ]) - eps) for key, value in frame_metrics.items(): metrics['metric/frame/' + key.lower().replace(' ', '_')].append(value) if plot_example: pred = pred.cpu().numpy().transpose(1, 2, 0) label = label['shifted_label'][delay:].cpu().numpy() os.makedirs(save_path, exist_ok=True) basename = Path(save_path) / Path(batch['path'][n]).stem np.save(str(basename) + f'_label.npy', label) np.save(str(basename) + f'_pred_{sampling_method}.npy', pred) draw_predictions_with_label( str(basename) + f'_pred.png', pred, label) # midi_path = str(basename) + f'_pred_{global_step}.mid' # save_midi(midi_path, p_est, i_est, v_est) return metrics, acc_conf
def evaluate_onf(batch, model, device, save_path=None, criterion=None, sampling_method='argmax', rep_type='base', plot_example=False, recursive=True, detail_eval=False, delay=1): metrics = defaultdict(list) with th.no_grad(): preds, losses = models.run_on_batch_onf(model, batch, device[0]) losses = losses.cpu().numpy() metrics['loss'].extend([losses]) for n in range(preds['frame'].shape[0]): label = dict() for key in batch: label[key] = batch[key][n] onset_ref, offset_ref, frame_ref = representation.base2onsets_and_frames( label['shifted_label'][delay:]) onsets = preds['onset'][n] > 0.5 offsets = preds['offset'][n] > 0.5 frames = preds['frame'][n] > 0.5 p_ref, i_ref, v_ref = extract_notes(onset_ref, frame_ref) p_est, i_est, v_est = extract_notes(onsets, frames) t_ref, f_ref = notes_to_frames(p_ref, i_ref, frame_ref.shape) t_est, f_est = notes_to_frames(p_est, i_est, frames.shape) scaling = HOP_LENGTH / SAMPLE_RATE i_ref = (i_ref * scaling).reshape(-1, 2) p_ref = np.array([midi_to_hz(MIN_MIDI + midi) for midi in p_ref]) i_est = (i_est * scaling).reshape(-1, 2) p_est = np.array([midi_to_hz(MIN_MIDI + midi) for midi in p_est]) t_ref = t_ref.astype(np.float64) * scaling f_ref = [ np.array([midi_to_hz(MIN_MIDI + midi) for midi in freqs]) for freqs in f_ref ] t_est = t_est.astype(np.float64) * scaling f_est = [ np.array([midi_to_hz(MIN_MIDI + midi) for midi in freqs]) for freqs in f_est ] p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est, offset_ratio=None) metrics['metric/note/precision'].append(p) metrics['metric/note/recall'].append(r) metrics['metric/note/f1'].append(f) metrics['metric/note/overlap'].append(o) p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est) metrics['metric/note-with-offsets/precision'].append(p) metrics['metric/note-with-offsets/recall'].append(r) metrics['metric/note-with-offsets/f1'].append(f) metrics['metric/note-with-offsets/overlap'].append(o) frame_metrics = evaluate_frames(t_ref, f_ref, t_est, f_est) metrics['metric/frame/f1'].append( hmean([ frame_metrics['Precision'] + eps, frame_metrics['Recall'] + eps ]) - eps) for key, value in frame_metrics.items(): metrics['metric/frame/' + key.lower().replace(' ', '_')].append(value) return metrics, None