Exemple #1
0
def _sequence_to_pianoroll_fn(sequence_tensor,
                              velocity_range_tensor,
                              instrument_family=None,
                              hparams=None):
    """Converts sequence to pianorolls."""
    if instrument_family is not None and instrument_family < 0:
        instrument_family = None
    velocity_range = music_pb2.VelocityRange.FromString(
        velocity_range_tensor.numpy())
    sequence = music_pb2.NoteSequence.FromString(sequence_tensor.numpy())
    sequence = sequences_lib.apply_sustain_control_changes(sequence)
    roll = sequences_lib.sequence_to_pianoroll(
        sequence,
        frames_per_second=hparams_frames_per_second(hparams),
        min_pitch=constants.MIN_MIDI_PITCH,
        max_pitch=constants.MAX_MIDI_PITCH,
        min_frame_occupancy_for_label=hparams.min_frame_occupancy_for_label,
        onset_mode=hparams.onset_mode,
        onset_length_ms=hparams.onset_length,
        offset_length_ms=hparams.offset_length,
        onset_delay_ms=hparams.onset_delay,
        min_velocity=velocity_range.min,
        max_velocity=velocity_range.max,
        instrument_family=instrument_family,
        use_drums=hparams.use_drums,
        timbre_num_classes=hparams.timbre_num_classes)
    return (roll.active, roll.weights, roll.onsets, roll.onset_velocities,
            roll.offsets)
Exemple #2
0
    def _ExampleToInputs(self,
                         ex,
                         truncated_length=0,
                         crop_training_sequence_to_notes=False):
        hparams = copy.deepcopy(constants.DEFAULT_HPARAMS)
        hparams.crop_training_sequence_to_notes = crop_training_sequence_to_notes

        filename = ex.features.feature['id'].bytes_list.value[0]
        sequence, crop_beginning_seconds = data.preprocess_sequence(
            ex.features.feature['sequence'].bytes_list.value[0], hparams)
        wav_data = ex.features.feature['audio'].bytes_list.value[0]

        if crop_training_sequence_to_notes:
            wav_data = audio_io.crop_wav_data(wav_data, hparams.sample_rate,
                                              crop_beginning_seconds,
                                              sequence.total_time)
        spec = data.wav_to_spec(wav_data, hparams=hparams)
        roll = sequences_lib.sequence_to_pianoroll(
            sequence,
            frames_per_second=data.hparams_frames_per_second(hparams),
            min_pitch=constants.MIN_MIDI_PITCH,
            max_pitch=constants.MAX_MIDI_PITCH,
            min_frame_occupancy_for_label=0.0,
            onset_mode='length_ms',
            onset_length_ms=32.,
            onset_delay_ms=0.)
        length = data.wav_to_num_frames(
            wav_data,
            frames_per_second=data.hparams_frames_per_second(hparams))

        return self._DataToInputs(spec, roll.active, roll.weights, length,
                                  filename, truncated_length)
Exemple #3
0
  def _ExampleToInputs(self,
                       ex,
                       truncated_length=0,
                       crop_training_sequence_to_notes=False):
    hparams = copy.deepcopy(constants.DEFAULT_HPARAMS)
    hparams.crop_training_sequence_to_notes = crop_training_sequence_to_notes

    filename = ex.features.feature['id'].bytes_list.value[0]
    sequence, crop_beginning_seconds = data.preprocess_sequence(
        ex.features.feature['sequence'].bytes_list.value[0], hparams)
    wav_data = ex.features.feature['audio'].bytes_list.value[0]

    if crop_training_sequence_to_notes:
      wav_data = audio_io.crop_wav_data(wav_data, hparams.sample_rate,
                                        crop_beginning_seconds,
                                        sequence.total_time)
    spec = data.wav_to_spec(wav_data, hparams=hparams)
    roll = sequences_lib.sequence_to_pianoroll(
        sequence,
        frames_per_second=data.hparams_frames_per_second(hparams),
        min_pitch=constants.MIN_MIDI_PITCH,
        max_pitch=constants.MAX_MIDI_PITCH,
        min_frame_occupancy_for_label=0.0,
        onset_mode='length_ms',
        onset_length_ms=32.,
        onset_delay_ms=0.)
    length = data.wav_to_num_frames(
        wav_data, frames_per_second=data.hparams_frames_per_second(hparams))

    return self._DataToInputs(spec, roll.active, roll.weights, length, filename,
                              truncated_length)
Exemple #4
0
  def _ExampleToInputs(self,
                       ex,
                       truncated_length=0):
    hparams = copy.deepcopy(configs.DEFAULT_HPARAMS)

    filename = ex.features.feature['id'].bytes_list.value[0]
    sequence = music_pb2.NoteSequence.FromString(
        ex.features.feature['sequence'].bytes_list.value[0])
    wav_data = ex.features.feature['audio'].bytes_list.value[0]

    spec = data.wav_to_spec(wav_data, hparams=hparams)
    roll = sequences_lib.sequence_to_pianoroll(
        sequence,
        frames_per_second=data.hparams_frames_per_second(hparams),
        min_pitch=constants.MIN_MIDI_PITCH,
        max_pitch=constants.MAX_MIDI_PITCH,
        min_frame_occupancy_for_label=0.0,
        onset_mode='length_ms',
        onset_length_ms=32.,
        onset_delay_ms=0.)
    length = data.wav_to_num_frames(
        wav_data, frames_per_second=data.hparams_frames_per_second(hparams))

    return self._DataToInputs(spec, roll.active, roll.weights, length, filename,
                              truncated_length)
Exemple #5
0
  def _ExampleToInputs(self,
                       ex,
                       truncated_length=0):
    hparams = copy.deepcopy(configs.DEFAULT_HPARAMS)

    filename = ex.features.feature['id'].bytes_list.value[0]
    sequence = music_pb2.NoteSequence.FromString(
        ex.features.feature['sequence'].bytes_list.value[0])
    wav_data = ex.features.feature['audio'].bytes_list.value[0]

    spec = data.wav_to_spec(wav_data, hparams=hparams)
    roll = sequences_lib.sequence_to_pianoroll(
        sequence,
        frames_per_second=data.hparams_frames_per_second(hparams),
        min_pitch=constants.MIN_MIDI_PITCH,
        max_pitch=constants.MAX_MIDI_PITCH,
        min_frame_occupancy_for_label=0.0,
        onset_mode='length_ms',
        onset_length_ms=32.,
        onset_delay_ms=0.)
    length = data.wav_to_num_frames(
        wav_data, frames_per_second=data.hparams_frames_per_second(hparams))

    return self._DataToInputs(spec, roll.active, roll.weights, length, filename,
                              truncated_length)
Exemple #6
0
def magenta_frame_eval(pred_seq, frame_labels):
    processed_frame_predictions = sequences_lib.sequence_to_pianoroll(
        pred_seq, frames_per_second=16000 / 512, min_pitch=21,
        max_pitch=108).active

    if processed_frame_predictions.shape[0] < frame_labels.shape[0]:
        # Pad transcribed frames with silence.
        pad_length = frame_labels.shape[0] - processed_frame_predictions.shape[
            0]
        processed_frame_predictions = np.pad(processed_frame_predictions,
                                             [(0, pad_length),
                                              (0, 0)], 'constant')
    elif processed_frame_predictions.shape[0] > frame_labels.shape[0]:
        # Truncate transcribed frames.
        processed_frame_predictions = (
            processed_frame_predictions[:frame_labels.shape[0], :])

    frame_metrics = magenta_metrics.calculate_frame_metrics(
        frame_labels=frame_labels,
        frame_predictions=processed_frame_predictions)

    results = defaultdict(list)
    for key, value in frame_metrics.items():
        results[key] = value[0].numpy()
    return results
Exemple #7
0
def posterior_pianoroll_image(frame_probs, sequence_prediction,
                              frame_labels, frames_per_second, overlap=False):
  """Create a pianoroll image showing frame posteriors, predictions & labels."""
  frame_predictions = sequences_lib.sequence_to_pianoroll(
      sequence_prediction,
      frames_per_second=frames_per_second,
      min_pitch=constants.MIN_MIDI_PITCH,
      max_pitch=constants.MAX_MIDI_PITCH).active

  if frame_predictions.shape[0] < frame_labels.shape[0]:
    # Pad transcribed frames with silence.
    pad_length = frame_labels.shape[0] - frame_predictions.shape[0]
    frame_predictions = np.pad(
        frame_predictions, [(0, pad_length), (0, 0)], 'constant')
  elif frame_predictions.shape[0] > frame_labels.shape[0]:
    # Truncate transcribed frames.
    frame_predictions = frame_predictions[:frame_labels.shape[0], :]

  pianoroll_img = np.zeros([len(frame_probs), 3 * len(frame_probs[0]), 3])

  if overlap:
    # Show overlap in yellow
    pianoroll_img[:, :, 0] = np.concatenate(
        [np.array(frame_labels),
         np.array(frame_predictions),
         np.array(frame_probs)],
        axis=1)
    pianoroll_img[:, :, 1] = np.concatenate(
        [np.array(frame_labels),
         np.array(frame_labels),
         np.array(frame_labels)],
        axis=1)
    pianoroll_img[:, :, 2] = np.concatenate(
        [np.array(frame_labels),
         np.zeros_like(frame_predictions),
         np.zeros_like(np.array(frame_probs))],
        axis=1)
  else:
    # Show only red and green
    pianoroll_img[:, :, 0] = np.concatenate(
        [np.array(frame_labels),
         np.array(frame_predictions) * (1.0 - np.array(frame_labels)),
         np.array(frame_probs) * (1.0 - np.array(frame_labels))],
        axis=1)
    pianoroll_img[:, :, 1] = np.concatenate(
        [np.array(frame_labels),
         np.array(frame_predictions) * np.array(frame_labels),
         np.array(frame_probs) * np.array(frame_labels)],
        axis=1)
    pianoroll_img[:, :, 2] = np.concatenate(
        [np.array(frame_labels),
         np.zeros_like(frame_predictions),
         np.zeros_like(np.array(frame_probs))],
        axis=1)

  return np.flipud(np.transpose(pianoroll_img, [1, 0, 2]))
Exemple #8
0
def posterior_pianoroll_image(frame_probs, sequence_prediction,
                              frame_labels, frames_per_second, overlap=False):
  """Create a pianoroll image showing frame posteriors, predictions & labels."""
  frame_predictions, _, _, _, _ = sequences_lib.sequence_to_pianoroll(
      sequence_prediction,
      frames_per_second=frames_per_second,
      min_pitch=constants.MIN_MIDI_PITCH,
      max_pitch=constants.MAX_MIDI_PITCH)

  if frame_predictions.shape[0] < frame_labels.shape[0]:
    # Pad transcribed frames with silence.
    pad_length = frame_labels.shape[0] - frame_predictions.shape[0]
    frame_predictions = np.pad(
        frame_predictions, [(0, pad_length), (0, 0)], 'constant')
  elif frame_predictions.shape[0] > frame_labels.shape[0]:
    # Truncate transcribed frames.
    frame_predictions = frame_predictions[:frame_labels.shape[0], :]

  pianoroll_img = np.zeros([len(frame_probs), 3 * len(frame_probs[0]), 3])

  if overlap:
    # Show overlap in yellow
    pianoroll_img[:, :, 0] = np.concatenate(
        [np.array(frame_labels),
         np.array(frame_predictions),
         np.array(frame_probs)],
        axis=1)
    pianoroll_img[:, :, 1] = np.concatenate(
        [np.array(frame_labels),
         np.array(frame_labels),
         np.array(frame_labels)],
        axis=1)
    pianoroll_img[:, :, 2] = np.concatenate(
        [np.array(frame_labels),
         np.zeros_like(frame_predictions),
         np.zeros_like(np.array(frame_probs))],
        axis=1)
  else:
    # Show only red and green
    pianoroll_img[:, :, 0] = np.concatenate(
        [np.array(frame_labels),
         np.array(frame_predictions) * (1.0 - np.array(frame_labels)),
         np.array(frame_probs) * (1.0 - np.array(frame_labels))],
        axis=1)
    pianoroll_img[:, :, 1] = np.concatenate(
        [np.array(frame_labels),
         np.array(frame_predictions) * np.array(frame_labels),
         np.array(frame_probs) * np.array(frame_labels)],
        axis=1)
    pianoroll_img[:, :, 2] = np.concatenate(
        [np.array(frame_labels),
         np.zeros_like(frame_predictions),
         np.zeros_like(np.array(frame_probs))],
        axis=1)

  return np.flipud(np.transpose(pianoroll_img, [1, 0, 2]))
Exemple #9
0
 def sequence_to_pianoroll_fn(sequence_tensor, velocity_range_tensor):
     velocity_range = music_pb2.VelocityRange.FromString(
         velocity_range_tensor)
     sequence = preprocess_sequence(sequence_tensor)
     return sequences_lib.sequence_to_pianoroll(
         sequence,
         frames_per_second=hparams_frames_per_second(hparams),
         min_pitch=constants.MIN_MIDI_PITCH,
         max_pitch=constants.MAX_MIDI_PITCH,
         min_frame_occupancy_for_label=hparams.
         min_frame_occupancy_for_label,
         onset_mode=hparams.onset_mode,
         onset_length_ms=hparams.onset_length,
         onset_delay_ms=hparams.onset_delay,
         min_velocity=velocity_range.min,
         max_velocity=velocity_range.max)
Exemple #10
0
 def sequence_to_pianoroll_fn(sequence_tensor, velocity_range_tensor):
   """Converts sequence to pianorolls."""
   velocity_range = music_pb2.VelocityRange.FromString(velocity_range_tensor)
   sequence = preprocess_sequence(sequence_tensor)
   roll = sequences_lib.sequence_to_pianoroll(
       sequence,
       frames_per_second=hparams_frames_per_second(hparams),
       min_pitch=constants.MIN_MIDI_PITCH,
       max_pitch=constants.MAX_MIDI_PITCH,
       min_frame_occupancy_for_label=hparams.min_frame_occupancy_for_label,
       onset_mode=hparams.onset_mode,
       onset_length_ms=hparams.onset_length,
       offset_length_ms=hparams.offset_length,
       onset_delay_ms=hparams.onset_delay,
       min_velocity=velocity_range.min,
       max_velocity=velocity_range.max)
   return (roll.active, roll.weights, roll.onsets,
           roll.offsets, roll.onset_velocities)
Exemple #11
0
 def sequence_to_pianoroll_fn(sequence_tensor, velocity_range_tensor):
   """Converts sequence to pianorolls."""
   velocity_range = music_pb2.VelocityRange.FromString(velocity_range_tensor)
   sequence = music_pb2.NoteSequence.FromString(sequence_tensor)
   sequence = sequences_lib.apply_sustain_control_changes(sequence)
   roll = sequences_lib.sequence_to_pianoroll(
       sequence,
       frames_per_second=hparams_frames_per_second(hparams),
       min_pitch=constants.MIN_MIDI_PITCH,
       max_pitch=constants.MAX_MIDI_PITCH,
       min_frame_occupancy_for_label=hparams.min_frame_occupancy_for_label,
       onset_mode=hparams.onset_mode,
       onset_length_ms=hparams.onset_length,
       offset_length_ms=hparams.offset_length,
       onset_delay_ms=hparams.onset_delay,
       min_velocity=velocity_range.min,
       max_velocity=velocity_range.max)
   return (roll.active, roll.weights, roll.onsets, roll.onset_velocities,
           roll.offsets)
Exemple #12
0
 def sequence_to_pianoroll_fn(sequence_tensor, velocity_range_tensor):
   """Converts sequence to pianorolls."""
   velocity_range = music_pb2.VelocityRange.FromString(velocity_range_tensor)
   sequence, unused_cropped_beginning_seconds = preprocess_sequence(
       sequence_tensor, hparams)
   roll = sequences_lib.sequence_to_pianoroll(
       sequence,
       frames_per_second=hparams_frames_per_second(hparams),
       min_pitch=constants.MIN_MIDI_PITCH,
       max_pitch=constants.MAX_MIDI_PITCH,
       min_frame_occupancy_for_label=hparams.min_frame_occupancy_for_label,
       onset_mode=hparams.onset_mode,
       onset_length_ms=hparams.onset_length,
       offset_length_ms=hparams.offset_length,
       onset_delay_ms=hparams.onset_delay,
       min_velocity=velocity_range.min,
       max_velocity=velocity_range.max)
   return (roll.active, roll.weights, roll.onsets, roll.onset_velocities,
           roll.offsets)
def seq_to_pianoroll(seq):
    return sequences_lib.sequence_to_pianoroll(seq,
                                               frames_per_second=16000 / 512,
                                               min_pitch=21,
                                               max_pitch=108).active
Exemple #14
0
def score_sequence(session, global_step_increment, summary_op, summary_writer,
                   metrics_to_updates, metric_note_precision,
                   metric_note_recall, metric_note_f1,
                   metric_note_precision_with_offsets,
                   metric_note_recall_with_offsets,
                   metric_note_f1_with_offsets, metric_frame_labels,
                   metric_frame_predictions, frame_labels, sequence_prediction,
                   frames_per_second, note_sequence_str_label, min_duration_ms,
                   sequence_id):
    """Calculate metrics on the inferred sequence."""
    est_intervals, est_pitches = sequence_to_valued_intervals(
        sequence_prediction, min_duration_ms=min_duration_ms)

    sequence_label = music_pb2.NoteSequence.FromString(note_sequence_str_label)
    ref_intervals, ref_pitches = sequence_to_valued_intervals(
        sequence_label, min_duration_ms=min_duration_ms)

    sequence_note_precision, sequence_note_recall, sequence_note_f1, _ = (
        mir_eval.transcription.precision_recall_f1_overlap(
            ref_intervals,
            pretty_midi.note_number_to_hz(ref_pitches),
            est_intervals,
            pretty_midi.note_number_to_hz(est_pitches),
            offset_ratio=None))

    (sequence_note_precision_with_offsets, sequence_note_recall_with_offsets,
     sequence_note_f1_with_offsets,
     _) = (mir_eval.transcription.precision_recall_f1_overlap(
         ref_intervals, pretty_midi.note_number_to_hz(ref_pitches),
         est_intervals, pretty_midi.note_number_to_hz(est_pitches)))

    frame_predictions = sequences_lib.sequence_to_pianoroll(
        sequence_prediction,
        frames_per_second=frames_per_second,
        min_pitch=constants.MIN_MIDI_PITCH,
        max_pitch=constants.MAX_MIDI_PITCH).active

    if frame_predictions.shape[0] < frame_labels.shape[0]:
        # Pad transcribed frames with silence.
        pad_length = frame_labels.shape[0] - frame_predictions.shape[0]
        frame_predictions = np.pad(frame_predictions, [(0, pad_length),
                                                       (0, 0)], 'constant')
    elif frame_predictions.shape[0] > frame_labels.shape[0]:
        # Truncate transcribed frames.
        frame_predictions = frame_predictions[:frame_labels.shape[0], :]

    global_step, _ = session.run(
        [global_step_increment, metrics_to_updates], {
            metric_frame_predictions: frame_predictions,
            metric_frame_labels: frame_labels,
            metric_note_precision: sequence_note_precision,
            metric_note_recall: sequence_note_recall,
            metric_note_f1: sequence_note_f1,
            metric_note_precision_with_offsets:
            sequence_note_precision_with_offsets,
            metric_note_recall_with_offsets: sequence_note_recall_with_offsets,
            metric_note_f1_with_offsets: sequence_note_f1_with_offsets
        })
    # Running the summary op separately ensures that all of the metrics have been
    # updated before we try to query them.
    summary = session.run(summary_op)

    tf.logging.info('Writing score summary for %s: Step= %d, Note F1=%f',
                    sequence_id, global_step, sequence_note_f1)
    summary_writer.add_summary(summary, global_step)
    summary_writer.flush()

    return sequence_label
Exemple #15
0
def score_sequence(session, global_step_increment, summary_op, summary_writer,
                   metrics_to_updates, metric_note_precision,
                   metric_note_recall, metric_note_f1,
                   metric_note_precision_with_offsets,
                   metric_note_recall_with_offsets,
                   metric_note_f1_with_offsets, metric_frame_labels,
                   metric_frame_predictions, frame_labels, sequence_prediction,
                   frames_per_second, note_sequence_str_label, min_duration_ms,
                   sequence_id):
  """Calculate metrics on the inferred sequence."""
  est_intervals, est_pitches = sequence_to_valued_intervals(
      sequence_prediction,
      min_duration_ms=min_duration_ms)

  sequence_label = music_pb2.NoteSequence.FromString(note_sequence_str_label)
  ref_intervals, ref_pitches = sequence_to_valued_intervals(
      sequence_label,
      min_duration_ms=min_duration_ms)

  sequence_note_precision, sequence_note_recall, sequence_note_f1, _ = (
      mir_eval.transcription.precision_recall_f1_overlap(
          ref_intervals,
          pretty_midi.note_number_to_hz(ref_pitches),
          est_intervals,
          pretty_midi.note_number_to_hz(est_pitches),
          offset_ratio=None))

  (sequence_note_precision_with_offsets,
   sequence_note_recall_with_offsets,
   sequence_note_f1_with_offsets, _) = (
       mir_eval.transcription.precision_recall_f1_overlap(
           ref_intervals,
           pretty_midi.note_number_to_hz(ref_pitches),
           est_intervals,
           pretty_midi.note_number_to_hz(est_pitches)))

  frame_predictions = sequences_lib.sequence_to_pianoroll(
      sequence_prediction,
      frames_per_second=frames_per_second,
      min_pitch=constants.MIN_MIDI_PITCH,
      max_pitch=constants.MAX_MIDI_PITCH).active

  if frame_predictions.shape[0] < frame_labels.shape[0]:
    # Pad transcribed frames with silence.
    pad_length = frame_labels.shape[0] - frame_predictions.shape[0]
    frame_predictions = np.pad(
        frame_predictions, [(0, pad_length), (0, 0)], 'constant')
  elif frame_predictions.shape[0] > frame_labels.shape[0]:
    # Truncate transcribed frames.
    frame_predictions = frame_predictions[:frame_labels.shape[0], :]

  global_step, _ = session.run([global_step_increment, metrics_to_updates], {
      metric_frame_predictions: frame_predictions,
      metric_frame_labels: frame_labels,
      metric_note_precision: sequence_note_precision,
      metric_note_recall: sequence_note_recall,
      metric_note_f1: sequence_note_f1,
      metric_note_precision_with_offsets: sequence_note_precision_with_offsets,
      metric_note_recall_with_offsets: sequence_note_recall_with_offsets,
      metric_note_f1_with_offsets: sequence_note_f1_with_offsets
  })
  # Running the summary op separately ensures that all of the metrics have been
  # updated before we try to query them.
  summary = session.run(summary_op)

  tf.logging.info(
      'Writing score summary for %s: Step= %d, Note F1=%f',
      sequence_id, global_step, sequence_note_f1)
  summary_writer.add_summary(summary, global_step)
  summary_writer.flush()

  return sequence_label
Exemple #16
0
def score_sequence(session, global_step_increment, metrics_to_updates,
                   metric_note_precision, metric_note_recall, metric_note_f1,
                   metric_note_precision_with_offsets,
                   metric_note_recall_with_offsets,
                   metric_note_f1_with_offsets,
                   metric_note_precision_with_offsets_velocity,
                   metric_note_recall_with_offsets_velocity,
                   metric_note_f1_with_offsets_velocity, metric_frame_labels,
                   metric_frame_predictions, frame_labels, sequence_prediction,
                   frames_per_second, sequence_label, sequence_id):
    """Calculate metrics on the inferred sequence."""
    est_intervals, est_pitches, est_velocities = sequence_to_valued_intervals(
        sequence_prediction)

    ref_intervals, ref_pitches, ref_velocities = sequence_to_valued_intervals(
        sequence_label)

    sequence_note_precision, sequence_note_recall, sequence_note_f1, _ = (
        mir_eval.transcription.precision_recall_f1_overlap(
            ref_intervals,
            pretty_midi.note_number_to_hz(ref_pitches),
            est_intervals,
            pretty_midi.note_number_to_hz(est_pitches),
            offset_ratio=None))

    (sequence_note_precision_with_offsets, sequence_note_recall_with_offsets,
     sequence_note_f1_with_offsets,
     _) = (mir_eval.transcription.precision_recall_f1_overlap(
         ref_intervals, pretty_midi.note_number_to_hz(ref_pitches),
         est_intervals, pretty_midi.note_number_to_hz(est_pitches)))

    (sequence_note_precision_with_offsets_velocity,
     sequence_note_recall_with_offsets_velocity,
     sequence_note_f1_with_offsets_velocity,
     _) = (mir_eval.transcription_velocity.precision_recall_f1_overlap(
         ref_intervals=ref_intervals,
         ref_pitches=pretty_midi.note_number_to_hz(ref_pitches),
         ref_velocities=ref_velocities,
         est_intervals=est_intervals,
         est_pitches=pretty_midi.note_number_to_hz(est_pitches),
         est_velocities=est_velocities))

    frame_predictions = sequences_lib.sequence_to_pianoroll(
        sequence_prediction,
        frames_per_second=frames_per_second,
        min_pitch=constants.MIN_MIDI_PITCH,
        max_pitch=constants.MAX_MIDI_PITCH).active

    if frame_predictions.shape[0] < frame_labels.shape[0]:
        # Pad transcribed frames with silence.
        pad_length = frame_labels.shape[0] - frame_predictions.shape[0]
        frame_predictions = np.pad(frame_predictions, [(0, pad_length),
                                                       (0, 0)], 'constant')
    elif frame_predictions.shape[0] > frame_labels.shape[0]:
        # Truncate transcribed frames.
        frame_predictions = frame_predictions[:frame_labels.shape[0], :]

    global_step, _ = session.run(
        [global_step_increment, metrics_to_updates], {
            metric_frame_predictions:
            frame_predictions,
            metric_frame_labels:
            frame_labels,
            metric_note_precision:
            sequence_note_precision,
            metric_note_recall:
            sequence_note_recall,
            metric_note_f1:
            sequence_note_f1,
            metric_note_precision_with_offsets:
            sequence_note_precision_with_offsets,
            metric_note_recall_with_offsets:
            sequence_note_recall_with_offsets,
            metric_note_f1_with_offsets:
            sequence_note_f1_with_offsets,
            metric_note_precision_with_offsets_velocity:
            sequence_note_precision_with_offsets_velocity,
            metric_note_recall_with_offsets_velocity:
            sequence_note_recall_with_offsets_velocity,
            metric_note_f1_with_offsets_velocity:
            sequence_note_f1_with_offsets_velocity,
        })

    tf.logging.info('Updating scores for %s: Step= %d, Note F1=%f',
                    sequence_id, global_step, sequence_note_f1)
Exemple #17
0
def _calculate_metrics_py(frame_predictions, onset_predictions,
                          offset_predictions, velocity_values,
                          sequence_label_str, frame_labels, sequence_id,
                          hparams):
    """Python logic for calculating metrics on a single example."""
    tf.logging.info('Calculating metrics for %s with length %d', sequence_id,
                    frame_labels.shape[0])
    if not hparams.predict_onset_threshold:
        onset_predictions = None
    if not hparams.predict_offset_threshold:
        offset_predictions = None

    sequence_prediction = sequences_lib.pianoroll_to_note_sequence(
        frames=frame_predictions,
        frames_per_second=data.hparams_frames_per_second(hparams),
        min_duration_ms=0,
        min_midi_pitch=constants.MIN_MIDI_PITCH,
        onset_predictions=onset_predictions,
        offset_predictions=offset_predictions,
        velocity_values=velocity_values)

    sequence_label = music_pb2.NoteSequence.FromString(sequence_label_str)

    if hparams.backward_shift_amount_ms:

        def shift_notesequence(ns_time):
            return ns_time + hparams.backward_shift_amount_ms / 1000.

        shifted_sequence_label, skipped_notes = (
            sequences_lib.adjust_notesequence_times(sequence_label,
                                                    shift_notesequence))
        assert skipped_notes == 0
        sequence_label = shifted_sequence_label

    est_intervals, est_pitches, est_velocities = (
        infer_util.sequence_to_valued_intervals(sequence_prediction))

    ref_intervals, ref_pitches, ref_velocities = (
        infer_util.sequence_to_valued_intervals(sequence_label))

    note_precision, note_recall, note_f1, _ = (
        mir_eval.transcription.precision_recall_f1_overlap(
            ref_intervals,
            pretty_midi.note_number_to_hz(ref_pitches),
            est_intervals,
            pretty_midi.note_number_to_hz(est_pitches),
            offset_ratio=None))

    (note_with_offsets_precision, note_with_offsets_recall,
     note_with_offsets_f1,
     _) = (mir_eval.transcription.precision_recall_f1_overlap(
         ref_intervals, pretty_midi.note_number_to_hz(ref_pitches),
         est_intervals, pretty_midi.note_number_to_hz(est_pitches)))

    (note_with_offsets_velocity_precision, note_with_offsets_velocity_recall,
     note_with_offsets_velocity_f1,
     _) = (mir_eval.transcription_velocity.precision_recall_f1_overlap(
         ref_intervals=ref_intervals,
         ref_pitches=pretty_midi.note_number_to_hz(ref_pitches),
         ref_velocities=ref_velocities,
         est_intervals=est_intervals,
         est_pitches=pretty_midi.note_number_to_hz(est_pitches),
         est_velocities=est_velocities))

    processed_frame_predictions = sequences_lib.sequence_to_pianoroll(
        sequence_prediction,
        frames_per_second=data.hparams_frames_per_second(hparams),
        min_pitch=constants.MIN_MIDI_PITCH,
        max_pitch=constants.MAX_MIDI_PITCH).active

    if processed_frame_predictions.shape[0] < frame_labels.shape[0]:
        # Pad transcribed frames with silence.
        pad_length = frame_labels.shape[0] - processed_frame_predictions.shape[
            0]
        processed_frame_predictions = np.pad(processed_frame_predictions,
                                             [(0, pad_length),
                                              (0, 0)], 'constant')
    elif processed_frame_predictions.shape[0] > frame_labels.shape[0]:
        # Truncate transcribed frames.
        processed_frame_predictions = (
            processed_frame_predictions[:frame_labels.shape[0], :])

    tf.logging.info(
        'Metrics for %s: Note F1 %f, Note w/ offsets F1 %f, '
        'Note w/ offsets & velocity: %f', sequence_id, note_f1,
        note_with_offsets_f1, note_with_offsets_velocity_f1)
    return (note_precision, note_recall, note_f1, note_with_offsets_precision,
            note_with_offsets_recall, note_with_offsets_f1,
            note_with_offsets_velocity_precision,
            note_with_offsets_velocity_recall, note_with_offsets_velocity_f1,
            processed_frame_predictions)
Exemple #18
0
def _calculate_metrics_py(frame_probs,
                          onset_probs,
                          frame_predictions,
                          onset_predictions,
                          offset_predictions,
                          velocity_values,
                          sequence_label_str,
                          frame_labels,
                          sequence_id,
                          hparams,
                          min_pitch,
                          max_pitch,
                          onsets_only,
                          restrict_to_pitch=None):
    """Python logic for calculating metrics on a single example."""
    tf.logging.info('Calculating metrics for %s with length %d', sequence_id,
                    frame_labels.shape[0])

    sequence_prediction = infer_util.predict_sequence(
        frame_probs=frame_probs,
        onset_probs=onset_probs,
        frame_predictions=frame_predictions,
        onset_predictions=onset_predictions,
        offset_predictions=offset_predictions,
        velocity_values=velocity_values,
        min_pitch=min_pitch,
        hparams=hparams,
        onsets_only=onsets_only)

    sequence_label = music_pb2.NoteSequence.FromString(sequence_label_str)

    if hparams.backward_shift_amount_ms:
        def shift_notesequence(ns_time):
            return ns_time + hparams.backward_shift_amount_ms / 1000.

        shifted_sequence_label, skipped_notes = (
            sequences_lib.adjust_notesequence_times(sequence_label,
                                                    shift_notesequence))
        assert skipped_notes == 0
        sequence_label = shifted_sequence_label

    est_intervals, est_pitches, est_velocities = (
        sequence_to_valued_intervals(
            sequence_prediction, restrict_to_pitch=restrict_to_pitch))

    ref_intervals, ref_pitches, ref_velocities = (
        sequence_to_valued_intervals(
            sequence_label, restrict_to_pitch=restrict_to_pitch))

    processed_frame_predictions = sequences_lib.sequence_to_pianoroll(
        sequence_prediction,
        frames_per_second=data.hparams_frames_per_second(hparams),
        min_pitch=min_pitch,
        max_pitch=max_pitch).active

    if processed_frame_predictions.shape[0] < frame_labels.shape[0]:
        # Pad transcribed frames with silence.
        pad_length = frame_labels.shape[0] - processed_frame_predictions.shape[0]
        processed_frame_predictions = np.pad(processed_frame_predictions,
                                             [(0, pad_length), (0, 0)], 'constant')
    elif processed_frame_predictions.shape[0] > frame_labels.shape[0]:
        # Truncate transcribed frames.
        processed_frame_predictions = (
            processed_frame_predictions[:frame_labels.shape[0], :])

    if len(ref_pitches) == 0:
        tf.logging.info(
            'Reference pitches were length 0, returning empty metrics for %s:',
            sequence_id)
        return tuple([[]] * 12 + [processed_frame_predictions])

    note_precision, note_recall, note_f1, _ = (
        mir_eval.transcription.precision_recall_f1_overlap(
            ref_intervals,
            pretty_midi.note_number_to_hz(ref_pitches),
            est_intervals,
            pretty_midi.note_number_to_hz(est_pitches),
            offset_ratio=None))

    (note_with_velocity_precision, note_with_velocity_recall,
     note_with_velocity_f1, _) = (
        mir_eval.transcription_velocity.precision_recall_f1_overlap(
            ref_intervals=ref_intervals,
            ref_pitches=pretty_midi.note_number_to_hz(ref_pitches),
            ref_velocities=ref_velocities,
            est_intervals=est_intervals,
            est_pitches=pretty_midi.note_number_to_hz(est_pitches),
            est_velocities=est_velocities,
            offset_ratio=None))

    (note_with_offsets_precision, note_with_offsets_recall, note_with_offsets_f1,
     _) = (
        mir_eval.transcription.precision_recall_f1_overlap(
            ref_intervals, pretty_midi.note_number_to_hz(ref_pitches),
            est_intervals, pretty_midi.note_number_to_hz(est_pitches)))

    (note_with_offsets_velocity_precision, note_with_offsets_velocity_recall,
     note_with_offsets_velocity_f1, _) = (
        mir_eval.transcription_velocity.precision_recall_f1_overlap(
            ref_intervals=ref_intervals,
            ref_pitches=pretty_midi.note_number_to_hz(ref_pitches),
            ref_velocities=ref_velocities,
            est_intervals=est_intervals,
            est_pitches=pretty_midi.note_number_to_hz(est_pitches),
            est_velocities=est_velocities))

    tf.logging.info(
        'Metrics for %s: Note F1 %f, Note w/ velocity F1 %f, Note w/ offsets F1 %f, '
        'Note w/ offsets & velocity: %f', sequence_id, note_f1,
        note_with_velocity_f1, note_with_offsets_f1,
        note_with_offsets_velocity_f1)
    # Return 1-d tensors for the metrics
    return ([note_precision], [note_recall], [note_f1],
            [note_with_velocity_precision], [note_with_velocity_recall],
            [note_with_velocity_f1], [note_with_offsets_precision],
            [note_with_offsets_recall], [note_with_offsets_f1
                                         ], [note_with_offsets_velocity_precision],
            [note_with_offsets_velocity_recall], [note_with_offsets_velocity_f1
                                                  ], [processed_frame_predictions])
Exemple #19
0
def _calculate_metrics_py(
    frame_predictions, onset_predictions, offset_predictions, velocity_values,
    sequence_label_str, frame_labels, sequence_id, hparams):
  """Python logic for calculating metrics on a single example."""
  tf.logging.info('Calculating metrics for %s with length %d', sequence_id,
                  frame_labels.shape[0])
  if not hparams.predict_onset_threshold:
    onset_predictions = None
  if not hparams.predict_offset_threshold:
    offset_predictions = None

  sequence_prediction = sequences_lib.pianoroll_to_note_sequence(
      frames=frame_predictions,
      frames_per_second=data.hparams_frames_per_second(hparams),
      min_duration_ms=0,
      min_midi_pitch=constants.MIN_MIDI_PITCH,
      onset_predictions=onset_predictions,
      offset_predictions=offset_predictions,
      velocity_values=velocity_values)

  sequence_label = music_pb2.NoteSequence.FromString(sequence_label_str)

  if hparams.backward_shift_amount_ms:

    def shift_notesequence(ns_time):
      return ns_time + hparams.backward_shift_amount_ms / 1000.

    shifted_sequence_label, skipped_notes = (
        sequences_lib.adjust_notesequence_times(sequence_label,
                                                shift_notesequence))
    assert skipped_notes == 0
    sequence_label = shifted_sequence_label

  est_intervals, est_pitches, est_velocities = (
      infer_util.sequence_to_valued_intervals(sequence_prediction))

  ref_intervals, ref_pitches, ref_velocities = (
      infer_util.sequence_to_valued_intervals(sequence_label))

  note_precision, note_recall, note_f1, _ = (
      mir_eval.transcription.precision_recall_f1_overlap(
          ref_intervals,
          pretty_midi.note_number_to_hz(ref_pitches),
          est_intervals,
          pretty_midi.note_number_to_hz(est_pitches),
          offset_ratio=None))

  (note_with_offsets_precision, note_with_offsets_recall, note_with_offsets_f1,
   _) = (
       mir_eval.transcription.precision_recall_f1_overlap(
           ref_intervals, pretty_midi.note_number_to_hz(ref_pitches),
           est_intervals, pretty_midi.note_number_to_hz(est_pitches)))

  (note_with_offsets_velocity_precision, note_with_offsets_velocity_recall,
   note_with_offsets_velocity_f1, _) = (
       mir_eval.transcription_velocity.precision_recall_f1_overlap(
           ref_intervals=ref_intervals,
           ref_pitches=pretty_midi.note_number_to_hz(ref_pitches),
           ref_velocities=ref_velocities,
           est_intervals=est_intervals,
           est_pitches=pretty_midi.note_number_to_hz(est_pitches),
           est_velocities=est_velocities))

  processed_frame_predictions = sequences_lib.sequence_to_pianoroll(
      sequence_prediction,
      frames_per_second=data.hparams_frames_per_second(hparams),
      min_pitch=constants.MIN_MIDI_PITCH,
      max_pitch=constants.MAX_MIDI_PITCH).active

  if processed_frame_predictions.shape[0] < frame_labels.shape[0]:
    # Pad transcribed frames with silence.
    pad_length = frame_labels.shape[0] - processed_frame_predictions.shape[0]
    processed_frame_predictions = np.pad(processed_frame_predictions,
                                         [(0, pad_length), (0, 0)], 'constant')
  elif processed_frame_predictions.shape[0] > frame_labels.shape[0]:
    # Truncate transcribed frames.
    processed_frame_predictions = (
        processed_frame_predictions[:frame_labels.shape[0], :])

  tf.logging.info(
      'Metrics for %s: Note F1 %f, Note w/ offsets F1 %f, '
      'Note w/ offsets & velocity: %f', sequence_id, note_f1,
      note_with_offsets_f1, note_with_offsets_velocity_f1)
  return (note_precision, note_recall, note_f1, note_with_offsets_precision,
          note_with_offsets_recall, note_with_offsets_f1,
          note_with_offsets_velocity_precision,
          note_with_offsets_velocity_recall, note_with_offsets_velocity_f1,
          processed_frame_predictions)
Exemple #20
0
def model_inference(model_fn, model_dir, checkpoint_path, data_fn, hparams,
                    examples_path, output_dir, summary_writer, master,
                    preprocess_examples, shuffle_examples):
    """Runs inference for the given examples."""
    tf.logging.info('model_dir=%s', model_dir)
    tf.logging.info('checkpoint_path=%s', checkpoint_path)
    tf.logging.info('examples_path=%s', examples_path)
    tf.logging.info('output_dir=%s', output_dir)

    estimator = train_util.create_estimator(model_fn,
                                            model_dir,
                                            hparams,
                                            master=master)

    transcription_data = functools.partial(
        data_fn,
        examples=examples_path,
        preprocess_examples=preprocess_examples,
        is_training=False,
        shuffle_examples=shuffle_examples,
        skip_n_initial_records=0)

    input_fn = infer_util.labels_to_features_wrapper(transcription_data)

    start_time = time.time()
    infer_times = []
    num_frames = []

    file_num = 0

    all_metrics = collections.defaultdict(list)

    for predictions in estimator.predict(input_fn,
                                         checkpoint_path=checkpoint_path,
                                         yield_single_examples=False):

        # Remove batch dimension for convenience.
        for k in predictions.keys():
            if predictions[k].shape[0] != 1:
                raise ValueError(
                    'All predictions must have batch size 1, but shape of '
                    '{} was: {}'.format(k, +predictions[k].shape[0]))
            predictions[k] = predictions[k][0]

        end_time = time.time()
        infer_time = end_time - start_time
        infer_times.append(infer_time)
        num_frames.append(predictions['frame_predictions'].shape[0])
        tf.logging.info(
            'Infer time %f, frames %d, frames/sec %f, running average %f',
            infer_time, num_frames[-1], num_frames[-1] / infer_time,
            np.sum(num_frames) / np.sum(infer_times))

        tf.logging.info('Scoring sequence %s', predictions['sequence_ids'])

        sequence_prediction = music_pb2.NoteSequence.FromString(
            predictions['sequence_predictions'])
        sequence_label = music_pb2.NoteSequence.FromString(
            predictions['sequence_labels'])

        # Make filenames UNIX-friendly.
        filename_chars = six.ensure_text(predictions['sequence_ids'], 'utf-8')
        filename_chars = [c if c.isalnum() else '_' for c in filename_chars]
        filename_safe = ''.join(filename_chars).rstrip()
        filename_safe = '{:04d}_{}'.format(file_num, filename_safe[:200])
        file_num += 1
        output_file = os.path.join(output_dir, filename_safe + '.mid')
        tf.logging.info('Writing inferred midi file to %s', output_file)
        midi_io.sequence_proto_to_midi_file(sequence_prediction, output_file)

        label_output_file = os.path.join(output_dir,
                                         filename_safe + '_label.mid')
        tf.logging.info('Writing label midi file to %s', label_output_file)
        midi_io.sequence_proto_to_midi_file(sequence_label, label_output_file)

        # Also write a pianoroll showing acoustic model output vs labels.
        pianoroll_output_file = os.path.join(output_dir,
                                             filename_safe + '_pianoroll.png')
        tf.logging.info('Writing acoustic logit/label file to %s',
                        pianoroll_output_file)
        # Calculate frames based on the sequence. Includes any postprocessing done
        # to turn raw onsets/frames predictions into the final sequence.
        # TODO(fjord): This work is duplicated in metrics.py.
        sequence_frame_predictions = sequences_lib.sequence_to_pianoroll(
            sequence_prediction,
            frames_per_second=data.hparams_frames_per_second(hparams),
            min_pitch=constants.MIN_MIDI_PITCH,
            max_pitch=constants.MAX_MIDI_PITCH).active
        with tf.gfile.GFile(pianoroll_output_file, mode='w') as f:
            imageio.imwrite(f,
                            infer_util.posterior_pianoroll_image(
                                predictions['onset_probs'],
                                predictions['onset_labels'],
                                predictions['frame_probs'],
                                predictions['frame_labels'],
                                sequence_frame_predictions),
                            format='png')

        # Update histogram and current scalar for metrics.
        with tf.Graph().as_default(), tf.Session().as_default():
            for k, v in predictions.items():
                if not k.startswith('metrics/'):
                    continue
                all_metrics[k].extend(v)
                histogram_name = k + '_histogram'
                metric_summary = tf.summary.histogram(histogram_name,
                                                      all_metrics[k])
                summary_writer.add_summary(metric_summary.eval(),
                                           global_step=file_num)
                scalar_name = k
                metric_summary = tf.summary.scalar(scalar_name,
                                                   np.mean(all_metrics[k]))
                summary_writer.add_summary(metric_summary.eval(),
                                           global_step=file_num)
            summary_writer.flush()

        start_time = time.time()

    # Write final mean values for all metrics.
    with tf.Graph().as_default(), tf.Session().as_default():
        for k, v in all_metrics.items():
            final_scalar_name = 'final/' + k
            metric_summary = tf.summary.scalar(final_scalar_name,
                                               np.mean(all_metrics[k]))
            summary_writer.add_summary(metric_summary.eval())
        summary_writer.flush()