Exemple #1
0
def evaluate(model, batch, device):
    metrics = defaultdict(list)
    batch = allocate_batch(batch, device)

    frame_logit, onset_logit = model(batch['audio'])

    criterion = nn.BCEWithLogitsLoss()
    frame_loss = criterion(frame_logit, batch['frame'])
    onset_loss = criterion(frame_logit, batch['onset'])
    metrics['metric/loss/frame_loss'].append(frame_loss.cpu().numpy())
    metrics['metric/loss/onset_loss'].append(onset_loss.cpu().numpy())

    for n in range(batch['audio'].shape[0]):
        frame_pred = th.sigmoid(frame_logit[n])
        onset_pred = th.sigmoid(onset_logit[n])

        pr, re, f1 = framewise_eval(frame_pred, batch['frame'][n])
        metrics['metric/frame/frame_precision'].append(pr)
        metrics['metric/frame/frame_recall'].append(re)
        metrics['metric/frame/frame_f1'].append(f1)

        pr, re, f1 = framewise_eval(onset_pred, batch['onset'][n])
        metrics['metric/frame/onset_precision'].append(pr)
        metrics['metric/frame/onset_recall'].append(re)
        metrics['metric/frame/onset_f1'].append(f1)

        p_est, i_est = extract_notes(onset_pred, frame_pred)
        p_ref, i_ref = extract_notes(batch['onset'][n], batch['frame'][n])

        scaling = HOP_SIZE / SAMPLE_RATE

        i_ref = (i_ref * scaling).reshape(-1, 2)
        p_ref = np.array([midi_to_hz(MIN_MIDI + pitch) for pitch in p_ref])
        i_est = (i_est * scaling).reshape(-1, 2)
        p_est = np.array([midi_to_hz(MIN_MIDI + pitch) for pitch in p_est])

        p, r, f, o = evaluate_notes(i_ref,
                                    p_ref,
                                    i_est,
                                    p_est,
                                    offset_ratio=None)
        metrics['metric/note/precision'].append(p)
        metrics['metric/note/recall'].append(r)
        metrics['metric/note/f1'].append(f)
        metrics['metric/note/overlap'].append(o)

        p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est)
        metrics['metric/note-with-offsets/precision'].append(p)
        metrics['metric/note-with-offsets/recall'].append(r)
        metrics['metric/note-with-offsets/f1'].append(f)
        metrics['metric/note-with-offsets/overlap'].append(o)

    return metrics
def evaluate(data, model, onset_threshold=0.5, frame_threshold=0.5, save_path=None):
    metrics = defaultdict(list)

    for label in data:
        pred, losses = model.run_on_batch(label)

        for key, loss in losses.items():
            metrics[key].append(loss.item())

        for key, value in pred.items():
            value.squeeze_(0).relu_()

        p_ref, i_ref, v_ref = extract_notes(label['onset'], label['frame'], label['velocity'])
        p_est, i_est, v_est = extract_notes(pred['onset'], pred['frame'], pred['velocity'], onset_threshold, frame_threshold)

        t_ref, f_ref = notes_to_frames(p_ref, i_ref, label['frame'].shape)
        t_est, f_est = notes_to_frames(p_est, i_est, pred['frame'].shape)

        scaling = HOP_LENGTH / SAMPLE_RATE

        i_ref = (i_ref * scaling).reshape(-1, 2)
        p_ref = np.array([midi_to_hz(MIN_MIDI + midi) for midi in p_ref])
        i_est = (i_est * scaling).reshape(-1, 2)
        p_est = np.array([midi_to_hz(MIN_MIDI + midi) for midi in p_est])

        t_ref = t_ref.astype(np.float64) * scaling
        f_ref = [np.array([midi_to_hz(MIN_MIDI + midi) for midi in freqs]) for freqs in f_ref]
        t_est = t_est.astype(np.float64) * scaling
        f_est = [np.array([midi_to_hz(MIN_MIDI + midi) for midi in freqs]) for freqs in f_est]

        p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est, offset_ratio=None)
        metrics['metric/note/precision'].append(p)
        metrics['metric/note/recall'].append(r)
        metrics['metric/note/f1'].append(f)
        metrics['metric/note/overlap'].append(o)

        p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est)
        metrics['metric/note-with-offsets/precision'].append(p)
        metrics['metric/note-with-offsets/recall'].append(r)
        metrics['metric/note-with-offsets/f1'].append(f)
        metrics['metric/note-with-offsets/overlap'].append(o)

        p, r, f, o = evaluate_notes_with_velocity(i_ref, p_ref, v_ref, i_est, p_est, v_est,
                                                  offset_ratio=None, velocity_tolerance=0.1)
        metrics['metric/note-with-velocity/precision'].append(p)
        metrics['metric/note-with-velocity/recall'].append(r)
        metrics['metric/note-with-velocity/f1'].append(f)
        metrics['metric/note-with-velocity/overlap'].append(o)

        p, r, f, o = evaluate_notes_with_velocity(i_ref, p_ref, v_ref, i_est, p_est, v_est, velocity_tolerance=0.1)
        metrics['metric/note-with-offsets-and-velocity/precision'].append(p)
        metrics['metric/note-with-offsets-and-velocity/recall'].append(r)
        metrics['metric/note-with-offsets-and-velocity/f1'].append(f)
        metrics['metric/note-with-offsets-and-velocity/overlap'].append(o)

        frame_metrics = evaluate_frames(t_ref, f_ref, t_est, f_est)
        metrics['metric/frame/f1'].append(hmean([frame_metrics['Precision'] + eps, frame_metrics['Recall'] + eps]) - eps)

        for key, loss in frame_metrics.items():
            metrics['metric/frame/' + key.lower().replace(' ', '_')].append(loss)

        if save_path is not None:
            os.makedirs(save_path, exist_ok=True)
            label_path = os.path.join(save_path, os.path.basename(label['path']) + '.label.png')
            save_pianoroll(label_path, label['onset'], label['frame'])
            pred_path = os.path.join(save_path, os.path.basename(label['path']) + '.pred.png')
            save_pianoroll(pred_path, pred['onset'], pred['frame'])
            midi_path = os.path.join(save_path, os.path.basename(label['path']) + '.pred.mid')
            save_midi(midi_path, p_est, i_est, v_est)

    return metrics
Exemple #3
0
def evaluate(metrics,
             model,
             inputs,
             targets,
             onset_threshold=0.5,
             frame_threshold=0.5,
             save_path=None):

    # NB: this can't be decorated with tf.function because of all the extract_notes functions not being pure TF code.

    mel = audio_to_mel(inputs)

    onset_pred, offset_pred, frame_pred, velocity_pred = model(mel,
                                                               training=False)
    onset_labels, offset_labels, frame_labels, velocity_labels, path_labels = targets

    # for key, loss in losses.items():
    #     metrics[key].append(loss.item()) # todo: add loss metrics

    # We're working with batch size of 1, so remove the first index for everything.
    onset_pred = tf.squeeze(onset_pred)
    offset_pred = tf.squeeze(offset_pred)
    frame_pred = tf.squeeze(frame_pred)
    velocity_pred = tf.squeeze(velocity_pred)

    onset_labels = tf.squeeze(onset_labels)
    offset_labels = tf.squeeze(offset_labels)
    frame_labels = tf.squeeze(frame_labels)
    velocity_labels = tf.squeeze(velocity_labels)
    path_labels = tf.squeeze(path_labels).numpy().decode("utf-8")

    p_ref, i_ref, v_ref = extract_notes(onset_labels, frame_labels,
                                        velocity_labels)
    p_est, i_est, v_est = extract_notes(onset_pred, frame_pred, velocity_pred,
                                        onset_threshold, frame_threshold)

    t_ref, f_ref = notes_to_frames(p_ref, i_ref, frame_labels.shape)
    t_est, f_est = notes_to_frames(p_est, i_est, frame_pred.shape)

    scaling = HOP_LENGTH / SAMPLE_RATE

    i_ref = (i_ref * scaling).reshape(-1, 2)
    p_ref = np.array([midi_to_hz(MIN_MIDI + midi) for midi in p_ref])
    i_est = (i_est * scaling).reshape(-1, 2)
    p_est = np.array([midi_to_hz(MIN_MIDI + midi) for midi in p_est])

    t_ref = t_ref.astype(np.float64) * scaling
    f_ref = [
        np.array([midi_to_hz(MIN_MIDI + midi) for midi in freqs])
        for freqs in f_ref
    ]
    t_est = t_est.astype(np.float64) * scaling
    f_est = [
        np.array([midi_to_hz(MIN_MIDI + midi) for midi in freqs])
        for freqs in f_est
    ]

    p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est, offset_ratio=None)
    metrics['metric/note/precision'].append(p)
    metrics['metric/note/recall'].append(r)
    metrics['metric/note/f1'].append(f)
    metrics['metric/note/overlap'].append(o)

    p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est)
    metrics['metric/note-with-offsets/precision'].append(p)
    metrics['metric/note-with-offsets/recall'].append(r)
    metrics['metric/note-with-offsets/f1'].append(f)
    metrics['metric/note-with-offsets/overlap'].append(o)

    p, r, f, o = evaluate_notes_with_velocity(i_ref,
                                              p_ref,
                                              v_ref,
                                              i_est,
                                              p_est,
                                              v_est,
                                              offset_ratio=None,
                                              velocity_tolerance=0.1)
    metrics['metric/note-with-velocity/precision'].append(p)
    metrics['metric/note-with-velocity/recall'].append(r)
    metrics['metric/note-with-velocity/f1'].append(f)
    metrics['metric/note-with-velocity/overlap'].append(o)

    p, r, f, o = evaluate_notes_with_velocity(i_ref,
                                              p_ref,
                                              v_ref,
                                              i_est,
                                              p_est,
                                              v_est,
                                              velocity_tolerance=0.1)
    metrics['metric/note-with-offsets-and-velocity/precision'].append(p)
    metrics['metric/note-with-offsets-and-velocity/recall'].append(r)
    metrics['metric/note-with-offsets-and-velocity/f1'].append(f)
    metrics['metric/note-with-offsets-and-velocity/overlap'].append(o)

    frame_metrics = evaluate_frames(t_ref, f_ref, t_est, f_est)
    metrics['metric/frame/f1'].append(
        hmean(
            [frame_metrics['Precision'] + eps, frame_metrics['Recall'] +
             eps]) - eps)

    for key, loss in frame_metrics.items():
        metrics['metric/frame/' + key.lower().replace(' ', '_')].append(loss)

    if save_path is not None:
        os.makedirs(save_path, exist_ok=True)
        label_path = os.path.join(save_path,
                                  os.path.basename(path_labels) + '.label.png')
        save_pianoroll(label_path, onset_labels, frame_labels)
        pred_path = os.path.join(save_path,
                                 os.path.basename(path_labels) + '.pred.png')
        save_pianoroll(pred_path, onset_pred, frame_pred)
        midi_path = os.path.join(save_path,
                                 os.path.basename(path_labels) + '.pred.mid')
        save_midi(midi_path, p_est, i_est, v_est)

    return metrics
Exemple #4
0
def evaluate(model, batch, device, save=False, save_path=None):
    metrics = defaultdict(list)
    batch = allocate_batch(batch, device)

    frame_logit, onset_logit = model(batch['audio'])

    criterion = nn.BCEWithLogitsLoss()
    frame_loss = criterion(frame_logit, batch['frame'])
    onset_loss = criterion(frame_logit, batch['onset'])
    metrics['metric/loss/frame_loss'].append(frame_loss.cpu().numpy())
    metrics['metric/loss/onset_loss'].append(onset_loss.cpu().numpy())

    for n in range(batch['audio'].shape[0]):
        frame_pred = th.sigmoid(frame_logit[n])
        onset_pred = th.sigmoid(onset_logit[n])

        pr, re, f1 = framewise_eval(frame_pred, batch['frame'][n])
        metrics['metric/frame/frame_precision'].append(pr)
        metrics['metric/frame/frame_recall'].append(re)
        metrics['metric/frame/frame_f1'].append(f1)

        pr, re, f1 = framewise_eval(onset_pred, batch['onset'][n])
        metrics['metric/frame/onset_precision'].append(pr)
        metrics['metric/frame/onset_recall'].append(re)
        metrics['metric/frame/onset_f1'].append(f1)

        p_est, i_est = extract_notes(onset_pred, frame_pred)
        p_ref, i_ref = extract_notes(batch['onset'][n], batch['frame'][n])

        scaling = HOP_SIZE / SAMPLE_RATE

        i_ref = (i_ref * scaling).reshape(-1, 2)
        p_ref = np.array([midi_to_hz(MIN_MIDI + pitch) for pitch in p_ref])
        i_est = (i_est * scaling).reshape(-1, 2)
        p_est = np.array([midi_to_hz(MIN_MIDI + pitch) for pitch in p_est])

        p, r, f, o = evaluate_notes(i_ref,
                                    p_ref,
                                    i_est,
                                    p_est,
                                    offset_ratio=None)
        metrics['metric/note/precision'].append(p)
        metrics['metric/note/recall'].append(r)
        metrics['metric/note/f1'].append(f)
        metrics['metric/note/overlap'].append(o)

        p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est)
        metrics['metric/note-with-offsets/precision'].append(p)
        metrics['metric/note-with-offsets/recall'].append(r)
        metrics['metric/note-with-offsets/f1'].append(f)
        metrics['metric/note-with-offsets/overlap'].append(o)

        if save:
            if len(p_est) == 0:
                print(
                    f'no onset detected. skip: {Path(batch["path"][n]).stem}')
            midi_filename = Path(save_path) / (Path(batch['path'][n]).stem +
                                               '.midi')
            save_midi(midi_filename, p_est, i_est, [64] * len(p_est))

            wav_filename = Path(save_path) / (Path(batch['path'][n]).stem +
                                              '.wav')
            midi_file = pretty_midi.PrettyMIDI(str(midi_filename))
            synth_audio = midi_file.fluidsynth(fs=16000)
            soundfile.write(wav_filename, synth_audio, 16000)

    return metrics
Exemple #5
0
def evaluate(data, model, logging_info, onset_threshold=0.5, frame_threshold=0.5, save_path=None):
    metrics = defaultdict(list)
    
    
    song_names = list()
    for label in data:
        song_names.append((label['path']).split("/")[-1:])
        pred, losses = model.run_on_batch(label)

        for key, loss in losses.items():
            metrics[key].append(loss.item())

        for key, value in pred.items():
            value.squeeze_(0).relu_()

        p_ref, i_ref, v_ref = extract_notes(label['onset'], label['frame'], label['velocity'])
        p_est, i_est, v_est = extract_notes(pred['onset'], pred['frame'], pred['velocity'], onset_threshold, frame_threshold)

        t_ref, f_ref = notes_to_frames(p_ref, i_ref, label['frame'].shape)
        t_est, f_est = notes_to_frames(p_est, i_est, pred['frame'].shape)

        scaling = HOP_LENGTH / SAMPLE_RATE

        i_ref = (i_ref * scaling).reshape(-1, 2)
        p_ref = np.array([midi_to_hz(MIN_MIDI + midi) for midi in p_ref])
        i_est = (i_est * scaling).reshape(-1, 2)
        p_est = np.array([midi_to_hz(MIN_MIDI + midi) for midi in p_est])

        t_ref = t_ref.astype(np.float64) * scaling
        f_ref = [np.array([midi_to_hz(MIN_MIDI + midi) for midi in freqs]) for freqs in f_ref]
        t_est = t_est.astype(np.float64) * scaling
        f_est = [np.array([midi_to_hz(MIN_MIDI + midi) for midi in freqs]) for freqs in f_est]

        p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est, offset_ratio=None)
        metrics['metric/note/precision'].append(p)
        metrics['metric/note/recall'].append(r)
        metrics['metric/note/f1'].append(f)
        metrics['metric/note/overlap'].append(o)

        p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est)
        metrics['metric/note-with-offsets/precision'].append(p)
        metrics['metric/note-with-offsets/recall'].append(r)
        metrics['metric/note-with-offsets/f1'].append(f)
        metrics['metric/note-with-offsets/overlap'].append(o)

        p, r, f, o = evaluate_notes_with_velocity(i_ref, p_ref, v_ref, i_est, p_est, v_est,
                                                  offset_ratio=None, velocity_tolerance=0.1)
        metrics['metric/note-with-velocity/precision'].append(p)
        metrics['metric/note-with-velocity/recall'].append(r)
        metrics['metric/note-with-velocity/f1'].append(f)
        metrics['metric/note-with-velocity/overlap'].append(o)

        p, r, f, o = evaluate_notes_with_velocity(i_ref, p_ref, v_ref, i_est, p_est, v_est, velocity_tolerance=0.1)
        metrics['metric/note-with-offsets-and-velocity/precision'].append(p)
        metrics['metric/note-with-offsets-and-velocity/recall'].append(r)
        metrics['metric/note-with-offsets-and-velocity/f1'].append(f)
        metrics['metric/note-with-offsets-and-velocity/overlap'].append(o)

        frame_metrics = evaluate_frames(t_ref, f_ref, t_est, f_est)
        metrics['metric/frame/f1'].append(hmean([frame_metrics['Precision'] + eps, frame_metrics['Recall'] + eps]) - eps)

        for key, loss in frame_metrics.items():
            metrics['metric/frame/' + key.lower().replace(' ', '_')].append(loss)

        if save_path is not None:
            os.makedirs(save_path, exist_ok=True)
            label_path = os.path.join(save_path, os.path.basename(label['path']) + '.label.png')
            save_pianoroll(label_path, label['onset'], label['frame'])
            pred_path = os.path.join(save_path, os.path.basename(label['path']) + '.pred.png')
            save_pianoroll(pred_path, pred['onset'], pred['frame'])
            midi_path = os.path.join(save_path, os.path.basename(label['path']) + '.pred.mid')
            save_midi(midi_path, p_est, i_est, v_est)

    # Creating a table of results for each song, and sorting by note_with_offset F1 score
    
    rename_dict = dict()
    for key, _ in metrics.items():
        if key.startswith('loss/'):
            _, category = key.split('/')
            rename_dict[key] = category + " loss"
        if key.startswith('metric/'):
            _, category, name = key.split('/')
            rename_dict[key] = category + " " + name
    
    
    model_file, dataset_name = logging_info
    log_str = (model_file + "_" + dataset_name).replace("/", "-")
    evaluation_by_song_df = pd.DataFrame.from_dict(metrics, orient='index').transpose()
    evaluation_by_song_df.insert(0, "song_name", song_names)
    evaluation_by_song_df["song_name"] = song_names
    evaluation_by_song_df.rename(columns=rename_dict, inplace=True)
    evaluation_by_song_df.sort_values("note-with-offsets f1", ascending=True)
    evaluation_by_song_df.to_csv("./evaluations/new_evals" + log_str+ "_by_song.csv", index=False)

    model_df = evaluation_by_song_df.mean()
    model_df.to_csv("./evaluations/new_evals/new_evals" + log_str+ "_model.csv", index=False)
    return metrics
Exemple #6
0
def _evaluate_metrics(metrics, pred, label, onset_threshold, frame_threshold):
    """
    Evaluate diverse metrics.

    Parameters
    ----------
    metrics: Dict to store metric for each song.
    label: Ground truth label.
    pred: Prediction result.
    onset_threshold: Threshold for onset prediction.
    frame_threshold: Threshold for frame prediction.
    """
    for key, value in pred.items():
        if key == 'path':
            continue
        value.squeeze_(0).relu_()
    label_onset = (label == 3).float()
    label_frame = (label > 1).float()
    p_ref, i_ref = extract_notes(label_onset, label_frame)
    p_est, i_est = extract_notes(pred['onset'], pred['frame'], onset_threshold,
                                 frame_threshold)

    i_ref, p_ref, t_ref, f_ref = _rescale_notes(i_ref, p_ref,
                                                label_frame.shape)
    i_est, p_est, t_est, f_est = _rescale_notes(i_est, p_est,
                                                pred['frame'].shape)

    p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est, offset_ratio=None)
    metrics['metric/note/precision'].append(p)
    metrics['metric/note/recall'].append(r)
    metrics['metric/note/f1'].append(f)
    metrics['metric/note/overlap'].append(o)

    p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est)
    metrics['metric/note-with-offsets/precision'].append(p)
    metrics['metric/note-with-offsets/recall'].append(r)
    metrics['metric/note-with-offsets/f1'].append(f)
    metrics['metric/note-with-offsets/overlap'].append(o)

    # if len(match_notes(i_ref, p_ref,
    #                    i_est, p_est, offset_ratio=None)) == 0:
    #     p, r, f, o = 0.0, 0.0, 0.0, 0.0
    # else:
    #     p, r, f, o = evaluate_notes_with_velocity(i_ref, p_ref, v_ref,
    #                                               i_est, p_est, v_est,
    #                                               offset_ratio=None,
    #                                               velocity_tolerance=0.1)
    # metrics['metric/note-with-velocity/precision'].append(p)
    # metrics['metric/note-with-velocity/recall'].append(r)
    # metrics['metric/note-with-velocity/f1'].append(f)
    # metrics['metric/note-with-velocity/overlap'].append(o)

    # if len(match_notes(i_ref, p_ref, i_est, p_est)) == 0:
    #     p, r, f, o = 0.0, 0.0, 0.0, 0.0
    # else:
    #     p, r, f, o = evaluate_notes_with_velocity(i_ref, p_ref, v_ref,
    #                                               i_est, p_est, v_est,
    #                                               velocity_tolerance=0.1)
    # metrics['metric/note-with-offsets-and-velocity/precision'].append(p)
    # metrics['metric/note-with-offsets-and-velocity/recall'].append(r)
    # metrics['metric/note-with-offsets-and-velocity/f1'].append(f)
    # metrics['metric/note-with-offsets-and-velocity/overlap'].append(o)

    frame_metrics = evaluate_frames(t_ref, f_ref, t_est, f_est)
    metrics['metric/frame/f1'].append(
        hmean(
            [frame_metrics['Precision'] + eps, frame_metrics['Recall'] +
             eps]) - eps)

    for key, loss in frame_metrics.items():
        metrics['metric/frame/' + (key.lower().replace(' ', '_'))].append(loss)
Exemple #7
0
def evaluate(batch,
             model,
             device,
             save_path=None,
             criterion=None,
             sampling_method='argmax',
             rep_type='base',
             plot_example=False,
             recursive=True,
             detail_eval=False,
             delay=1):
    # TODO: input: prediction & label. output: metric
    metrics = defaultdict(list)
    acc_conf = []
    if sampling_method == 'argmax':
        gt_ratio = 0.0
    elif sampling_method == 'gt':
        gt_ratio = 1.0
    else:
        gt_ratio = 0.0
    with th.no_grad():
        preds, losses = models.run_on_batch(model,
                                            batch,
                                            device[0],
                                            sampling_method=sampling_method,
                                            gt_ratio=gt_ratio,
                                            criterion=criterion,
                                            rep_type=rep_type,
                                            recursive=recursive,
                                            delay=delay)
    losses = losses.cpu().numpy()
    metrics['loss'].extend(list(np.atleast_1d(losses)))

    for n in range(preds.shape[0]):
        label = dict()
        pred = preds[n]
        argmax_pred = pred.argmax(dim=0)
        for key in batch:
            label[key] = batch[key][n]

        if detail_eval:
            acc_conf.append(
                calculate_acc_conf(
                    pred.cpu().numpy().transpose((1, 2, 0)),
                    label['shifted_label'][delay:].cpu().numpy()))
        else:
            acc_conf.append(None)

        onset_ref, offset_ref, frame_ref = representation.base2onsets_and_frames(
            label['shifted_label'][delay:])
        onsets, offsets, frames = representation.convert2onsets_and_frames(
            argmax_pred, rep_type)

        p_ref, i_ref, v_ref = extract_notes(onset_ref, frame_ref)
        p_est, i_est, v_est = extract_notes(onsets, frames)

        t_ref, f_ref = notes_to_frames(p_ref, i_ref, frame_ref.shape)
        t_est, f_est = notes_to_frames(p_est, i_est, frames.shape)

        scaling = HOP_LENGTH / SAMPLE_RATE

        i_ref = (i_ref * scaling).reshape(-1, 2)
        p_ref = np.array([midi_to_hz(MIN_MIDI + midi) for midi in p_ref])
        i_est = (i_est * scaling).reshape(-1, 2)
        p_est = np.array([midi_to_hz(MIN_MIDI + midi) for midi in p_est])

        t_ref = t_ref.astype(np.float64) * scaling
        f_ref = [
            np.array([midi_to_hz(MIN_MIDI + midi) for midi in freqs])
            for freqs in f_ref
        ]
        t_est = t_est.astype(np.float64) * scaling
        f_est = [
            np.array([midi_to_hz(MIN_MIDI + midi) for midi in freqs])
            for freqs in f_est
        ]

        p, r, f, o = evaluate_notes(i_ref,
                                    p_ref,
                                    i_est,
                                    p_est,
                                    offset_ratio=None)
        metrics['metric/note/precision'].append(p)
        metrics['metric/note/recall'].append(r)
        metrics['metric/note/f1'].append(f)
        metrics['metric/note/overlap'].append(o)

        p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est)
        metrics['metric/note-with-offsets/precision'].append(p)
        metrics['metric/note-with-offsets/recall'].append(r)
        metrics['metric/note-with-offsets/f1'].append(f)
        metrics['metric/note-with-offsets/overlap'].append(o)

        frame_metrics = evaluate_frames(t_ref, f_ref, t_est, f_est)
        metrics['metric/frame/f1'].append(
            hmean([
                frame_metrics['Precision'] + eps, frame_metrics['Recall'] + eps
            ]) - eps)

        for key, value in frame_metrics.items():
            metrics['metric/frame/' +
                    key.lower().replace(' ', '_')].append(value)

        if plot_example:
            pred = pred.cpu().numpy().transpose(1, 2, 0)
            label = label['shifted_label'][delay:].cpu().numpy()
            os.makedirs(save_path, exist_ok=True)
            basename = Path(save_path) / Path(batch['path'][n]).stem

            np.save(str(basename) + f'_label.npy', label)
            np.save(str(basename) + f'_pred_{sampling_method}.npy', pred)

            draw_predictions_with_label(
                str(basename) + f'_pred.png', pred, label)
            # midi_path = str(basename) + f'_pred_{global_step}.mid'
            # save_midi(midi_path, p_est, i_est, v_est)

    return metrics, acc_conf
Exemple #8
0
def evaluate_onf(batch,
                 model,
                 device,
                 save_path=None,
                 criterion=None,
                 sampling_method='argmax',
                 rep_type='base',
                 plot_example=False,
                 recursive=True,
                 detail_eval=False,
                 delay=1):
    metrics = defaultdict(list)
    with th.no_grad():
        preds, losses = models.run_on_batch_onf(model, batch, device[0])
    losses = losses.cpu().numpy()
    metrics['loss'].extend([losses])

    for n in range(preds['frame'].shape[0]):
        label = dict()
        for key in batch:
            label[key] = batch[key][n]

        onset_ref, offset_ref, frame_ref = representation.base2onsets_and_frames(
            label['shifted_label'][delay:])
        onsets = preds['onset'][n] > 0.5
        offsets = preds['offset'][n] > 0.5
        frames = preds['frame'][n] > 0.5

        p_ref, i_ref, v_ref = extract_notes(onset_ref, frame_ref)
        p_est, i_est, v_est = extract_notes(onsets, frames)

        t_ref, f_ref = notes_to_frames(p_ref, i_ref, frame_ref.shape)
        t_est, f_est = notes_to_frames(p_est, i_est, frames.shape)

        scaling = HOP_LENGTH / SAMPLE_RATE

        i_ref = (i_ref * scaling).reshape(-1, 2)
        p_ref = np.array([midi_to_hz(MIN_MIDI + midi) for midi in p_ref])
        i_est = (i_est * scaling).reshape(-1, 2)
        p_est = np.array([midi_to_hz(MIN_MIDI + midi) for midi in p_est])

        t_ref = t_ref.astype(np.float64) * scaling
        f_ref = [
            np.array([midi_to_hz(MIN_MIDI + midi) for midi in freqs])
            for freqs in f_ref
        ]
        t_est = t_est.astype(np.float64) * scaling
        f_est = [
            np.array([midi_to_hz(MIN_MIDI + midi) for midi in freqs])
            for freqs in f_est
        ]

        p, r, f, o = evaluate_notes(i_ref,
                                    p_ref,
                                    i_est,
                                    p_est,
                                    offset_ratio=None)
        metrics['metric/note/precision'].append(p)
        metrics['metric/note/recall'].append(r)
        metrics['metric/note/f1'].append(f)
        metrics['metric/note/overlap'].append(o)

        p, r, f, o = evaluate_notes(i_ref, p_ref, i_est, p_est)
        metrics['metric/note-with-offsets/precision'].append(p)
        metrics['metric/note-with-offsets/recall'].append(r)
        metrics['metric/note-with-offsets/f1'].append(f)
        metrics['metric/note-with-offsets/overlap'].append(o)

        frame_metrics = evaluate_frames(t_ref, f_ref, t_est, f_est)
        metrics['metric/frame/f1'].append(
            hmean([
                frame_metrics['Precision'] + eps, frame_metrics['Recall'] + eps
            ]) - eps)

        for key, value in frame_metrics.items():
            metrics['metric/frame/' +
                    key.lower().replace(' ', '_')].append(value)

    return metrics, None