예제 #1
0
def normalize_tempo(sequence, new_tempo=60):
    if math.isclose(sequence.total_time, 0.):
        return copy_lib.deepcopy(sequence)

    tempo_change_times, tempi = zip(*sorted(
        (tempo.time, tempo.qpm) for tempo in sequence.tempos
        if tempo.time < sequence.total_time))
    original_times = list(tempo_change_times) + [sequence.total_time]
    new_times = [original_times[0]]

    # Iterate through all the intervals between the tempo changes.
    # Compute a new duration for each of them.
    for start, end, tempo in zip(original_times[:-1], original_times[1:],
                                 tempi):
        time = (end - start) * tempo / new_tempo
        new_times.append(new_times[-1] + time)

    def time_func(t):
        return np.interp(t, original_times, new_times)

    adjusted_sequence, skipped_notes = sequences_lib.adjust_notesequence_times(
        sequence, time_func)
    if skipped_notes:
        warnings.warn(
            f'{skipped_notes} notes skipped in adjust_notesequence_times',
            RuntimeWarning)

    del adjusted_sequence.tempos[:]
    tempo = adjusted_sequence.tempos.add()
    tempo.time = 0.
    tempo.qpm = new_tempo

    return adjusted_sequence
예제 #2
0
    def postprocess(self, sequences):
        if self.key_pairs is None:
            raise RuntimeError("'postprocess' called before 'load'")

        sequences = list(sequences)
        if len(sequences) != len(self._durations):
            raise RuntimeError(
                f'Expected {len(self._durations)} sequences, got {len(sequences)}'
            )

        sequences = [
            sequences_lib.trim_note_sequence(seq, 0., dur)
            for seq, dur in zip(sequences, self._durations)
        ]
        sequence = sequences_lib.concatenate_sequences(sequences,
                                                       self._durations)
        if self._warp and self._target_tempo:
            sequence, _ = sequences_lib.adjust_notesequence_times(
                sequence, lambda t: t * 60. / self._target_tempo)
            del sequence.tempos[:]
            sequence.tempos.add().qpm = self._target_tempo
        return sequence
예제 #3
0
def _calculate_metrics_py(frame_probs,
                          onset_probs,
                          frame_predictions,
                          onset_predictions,
                          offset_predictions,
                          velocity_values,
                          sequence_label_str,
                          frame_labels,
                          sequence_id,
                          hparams,
                          min_pitch,
                          max_pitch,
                          onsets_only,
                          restrict_to_pitch=None):
    """Python logic for calculating metrics on a single example."""
    tf.logging.info('Calculating metrics for %s with length %d', sequence_id,
                    frame_labels.shape[0])

    sequence_prediction = infer_util.predict_sequence(
        frame_probs=frame_probs,
        onset_probs=onset_probs,
        frame_predictions=frame_predictions,
        onset_predictions=onset_predictions,
        offset_predictions=offset_predictions,
        velocity_values=velocity_values,
        min_pitch=min_pitch,
        hparams=hparams,
        onsets_only=onsets_only)

    note_density = len(
        sequence_prediction.notes) / sequence_prediction.total_time

    sequence_label = music_pb2.NoteSequence.FromString(sequence_label_str)

    if hparams.backward_shift_amount_ms:

        def shift_notesequence(ns_time):
            return ns_time + hparams.backward_shift_amount_ms / 1000.

        shifted_sequence_label, skipped_notes = (
            sequences_lib.adjust_notesequence_times(sequence_label,
                                                    shift_notesequence))
        assert skipped_notes == 0
        sequence_label = shifted_sequence_label

    est_intervals, est_pitches, est_velocities = (sequence_to_valued_intervals(
        sequence_prediction, restrict_to_pitch=restrict_to_pitch))

    ref_intervals, ref_pitches, ref_velocities = (sequence_to_valued_intervals(
        sequence_label, restrict_to_pitch=restrict_to_pitch))

    processed_frame_predictions = sequences_lib.sequence_to_pianoroll(
        sequence_prediction,
        frames_per_second=data.hparams_frames_per_second(hparams),
        min_pitch=min_pitch,
        max_pitch=max_pitch).active

    if processed_frame_predictions.shape[0] < frame_labels.shape[0]:
        # Pad transcribed frames with silence.
        pad_length = frame_labels.shape[0] - processed_frame_predictions.shape[
            0]
        processed_frame_predictions = np.pad(processed_frame_predictions,
                                             [(0, pad_length),
                                              (0, 0)], 'constant')
    elif processed_frame_predictions.shape[0] > frame_labels.shape[0]:
        # Truncate transcribed frames.
        processed_frame_predictions = (
            processed_frame_predictions[:frame_labels.shape[0], :])

    if len(ref_pitches) == 0:
        tf.logging.info(
            'Reference pitches were length 0, returning empty metrics for %s:',
            sequence_id)
        return tuple([[]] * 13 + [processed_frame_predictions])

    note_precision, note_recall, note_f1, _ = (
        mir_eval.transcription.precision_recall_f1_overlap(
            ref_intervals,
            pretty_midi.note_number_to_hz(ref_pitches),
            est_intervals,
            pretty_midi.note_number_to_hz(est_pitches),
            offset_ratio=None))

    (note_with_velocity_precision, note_with_velocity_recall,
     note_with_velocity_f1,
     _) = (mir_eval.transcription_velocity.precision_recall_f1_overlap(
         ref_intervals=ref_intervals,
         ref_pitches=pretty_midi.note_number_to_hz(ref_pitches),
         ref_velocities=ref_velocities,
         est_intervals=est_intervals,
         est_pitches=pretty_midi.note_number_to_hz(est_pitches),
         est_velocities=est_velocities,
         offset_ratio=None))

    (note_with_offsets_precision, note_with_offsets_recall,
     note_with_offsets_f1,
     _) = (mir_eval.transcription.precision_recall_f1_overlap(
         ref_intervals, pretty_midi.note_number_to_hz(ref_pitches),
         est_intervals, pretty_midi.note_number_to_hz(est_pitches)))

    (note_with_offsets_velocity_precision, note_with_offsets_velocity_recall,
     note_with_offsets_velocity_f1,
     _) = (mir_eval.transcription_velocity.precision_recall_f1_overlap(
         ref_intervals=ref_intervals,
         ref_pitches=pretty_midi.note_number_to_hz(ref_pitches),
         ref_velocities=ref_velocities,
         est_intervals=est_intervals,
         est_pitches=pretty_midi.note_number_to_hz(est_pitches),
         est_velocities=est_velocities))

    tf.logging.info(
        'Metrics for %s: Note F1 %f, Note w/ velocity F1 %f, Note w/ offsets F1 %f, '
        'Note w/ offsets & velocity: %f', sequence_id, note_f1,
        note_with_velocity_f1, note_with_offsets_f1,
        note_with_offsets_velocity_f1)
    # Return 1-d tensors for the metrics
    return ([note_precision], [note_recall], [note_f1], [note_density],
            [note_with_velocity_precision], [note_with_velocity_recall],
            [note_with_velocity_f1], [note_with_offsets_precision
                                      ], [note_with_offsets_recall],
            [note_with_offsets_f1], [note_with_offsets_velocity_precision
                                     ], [note_with_offsets_velocity_recall],
            [note_with_offsets_velocity_f1], [processed_frame_predictions])
def align_cpp(samples,
              sample_rate,
              ns,
              cqt_hop_length,
              sf2_path,
              penalty_mul=1.0,
              band_radius_seconds=.5):
  """Aligns the notesequence to the wav file using C++ DTW.

  Args:
    samples: Samples to align.
    sample_rate: Sample rate for samples.
    ns: The source notesequence to align.
    cqt_hop_length: Hop length to use for CQT calculations.
    sf2_path: Path to SF2 file for synthesis.
    penalty_mul: Penalty multiplier to use for non-diagonal moves.
    band_radius_seconds: What size of band radius to use for restricting DTW.

  Raises:
    RuntimeError: If notes are skipped during alignment.

  Returns:
    samples: The samples used from the wav file.
    aligned_ns: The aligned version of the notesequence.
    remaining_ns: Any remaining notesequence that extended beyond the length
        of the wav file.
  """
  logging.info('Synthesizing')
  ns_samples = midi_synth.fluidsynth(
      ns, sf2_path=sf2_path, sample_rate=sample_rate).astype(np.float32)

  # It is critical that ns_samples and samples are the same length because the
  # alignment code does not do subsequence alignment.
  ns_samples = np.pad(ns_samples,
                      (0, max(0, samples.shape[0] - ns_samples.shape[0])),
                      'constant')

  # Pad samples too, if needed, because there are some cases where the
  # synthesized NoteSequence is actually longer.
  samples = np.pad(samples,
                   (0, max(0, ns_samples.shape[0] - samples.shape[0])),
                   'constant')

  # Note that we skip normalization here becasue it happens in C++.
  logging.info('source_cqt')
  source_cqt = extract_cqt(ns_samples, sample_rate, cqt_hop_length)

  logging.info('dest_cqt')
  dest_cqt = extract_cqt(samples, sample_rate, cqt_hop_length)

  alignment_task = alignment_pb2.AlignmentTask()
  alignment_task.sequence_1.x = source_cqt.shape[0]
  alignment_task.sequence_1.y = source_cqt.shape[1]
  for c in source_cqt.reshape([-1]):
    alignment_task.sequence_1.content.append(c)

  alignment_task.sequence_2.x = dest_cqt.shape[0]
  alignment_task.sequence_2.y = dest_cqt.shape[1]
  for c in dest_cqt.reshape([-1]):
    alignment_task.sequence_2.content.append(c)

  seconds_per_frame = cqt_hop_length / sample_rate

  alignment_task.band_radius = int(band_radius_seconds / seconds_per_frame)
  alignment_task.penalty = 0
  alignment_task.penalty_mul = penalty_mul

  # Write to file.
  fh, temp_path = tempfile.mkstemp(suffix='.proto')
  os.close(fh)
  with open(temp_path, 'wb') as f:
    f.write(alignment_task.SerializeToString())

  # Align with C++ program.
  subprocess.check_call([ALIGN_BINARY, temp_path])

  # Read file.
  with open(temp_path + '.result', 'rb') as f:
    result = alignment_pb2.AlignmentResult.FromString(f.read())

  # Clean up.
  os.remove(temp_path)
  os.remove(temp_path + '.result')

  logging.info('Aligning NoteSequence with warp path.')

  warp_seconds_i = np.array([i * seconds_per_frame for i in result.i])
  warp_seconds_j = np.array([j * seconds_per_frame for j in result.j])

  time_diffs = np.abs(warp_seconds_i - warp_seconds_j)
  warps = np.abs(time_diffs[1:] - time_diffs[:-1])

  stats = {
      'alignment_score': result.score,
      'warp_mean_s': np.mean(warps),
      'warp_median_s': np.median(warps),
      'warp_max_s': np.max(warps),
      'warp_min_s': np.min(warps),
      'time_diff_mean_s': np.mean(time_diffs),
      'time_diff_median_s': np.median(time_diffs),
      'time_diff_max_s': np.max(time_diffs),
      'time_diff_min_s': np.min(time_diffs),
  }

  for name, value in sorted(stats.items()):
    logging.info('%s: %f', name, value)

  aligned_ns, skipped_notes = sequences_lib.adjust_notesequence_times(
      ns,
      lambda t: np.interp(t, warp_seconds_i, warp_seconds_j),
      minimum_duration=seconds_per_frame)
  if skipped_notes > 0:
    raise RuntimeError('Skipped {} notes'.format(skipped_notes))

  logging.debug('done')

  return aligned_ns, stats
예제 #5
0
def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('input_file', metavar='FILE')
    parser.add_argument('output_dir', metavar='OUTPUTDIR')
    parser.add_argument('--stretch', type=str, metavar='RATIO')
    parser.add_argument('--metadata', type=str, metavar='FILE')
    parser.add_argument('--group-by-name', action='store_true')
    parser.add_argument('--duration', type=float)
    parser.add_argument('--trim', action='store_true')
    args = parser.parse_args()

    if args.stretch:
        # Calculate the time stretch ratio
        if ':' in args.stretch:
            a, b = args.stretch.split(':')
            stretch_ratio = float(a) / float(b)
        else:
            stretch_ratio = float(args.stretch)

    metadata = None
    if args.metadata:
        with gzip.open(args.metadata, 'rt') as f:
            metadata = json.load(f)

    if args.group_by_name:
        if not metadata:
            raise ValueError('--group-by-name requires --metadata')

        name_to_sequences = collections.defaultdict(list)

    os.makedirs(args.output_dir, exist_ok=True)

    with lmdb.open(args.input_file, subdir=False, readonly=True,
                   lock=False) as db:
        with db.begin(buffers=True) as txn:
            for key, val in txn.cursor():
                key = bytes(key).decode()
                sequence = music_pb2.NoteSequence.FromString(val)

                if not sequence.tempos:
                    sequence.tempos.add().qpm = 60.

                if args.stretch:
                    sequence, _ = sequences_lib.adjust_notesequence_times(
                        sequence, lambda t: t * stretch_ratio)

                if args.trim:
                    if args.duration is None:
                        raise ValueError('--trim requires --duration')
                    sequence = sequences_lib.trim_note_sequence(
                        sequence, 0., args.duration)

                if args.group_by_name:
                    if '_' in key:
                        src_key, style_key = key.split('_')
                        name, _ = os.path.splitext(
                            metadata[src_key]['filename'])
                        style_name, _ = os.path.splitext(
                            metadata[style_key]['filename'])
                        name = f'{name}__{style_name}'
                    else:
                        name, _ = os.path.splitext(key + '_' +
                                                   metadata[key]['filename'])
                    name_to_sequences[name].append(sequence)
                else:
                    out_path = os.path.join(args.output_dir, key + '.mid')
                    midi_io.note_sequence_to_midi_file(sequence, out_path)

    if args.group_by_name:
        for name, sequences in name_to_sequences.items():
            sequence_durations = None
            if args.duration is not None:
                sequence_durations = [args.duration for _ in sequences]
            sequence = sequences_lib.concatenate_sequences(
                sequences, sequence_durations)

            out_path = os.path.join(args.output_dir, name + '.mid')
            midi_io.note_sequence_to_midi_file(sequence, out_path)