def normalize_tempo(sequence, new_tempo=60): if math.isclose(sequence.total_time, 0.): return copy_lib.deepcopy(sequence) tempo_change_times, tempi = zip(*sorted( (tempo.time, tempo.qpm) for tempo in sequence.tempos if tempo.time < sequence.total_time)) original_times = list(tempo_change_times) + [sequence.total_time] new_times = [original_times[0]] # Iterate through all the intervals between the tempo changes. # Compute a new duration for each of them. for start, end, tempo in zip(original_times[:-1], original_times[1:], tempi): time = (end - start) * tempo / new_tempo new_times.append(new_times[-1] + time) def time_func(t): return np.interp(t, original_times, new_times) adjusted_sequence, skipped_notes = sequences_lib.adjust_notesequence_times( sequence, time_func) if skipped_notes: warnings.warn( f'{skipped_notes} notes skipped in adjust_notesequence_times', RuntimeWarning) del adjusted_sequence.tempos[:] tempo = adjusted_sequence.tempos.add() tempo.time = 0. tempo.qpm = new_tempo return adjusted_sequence
def postprocess(self, sequences): if self.key_pairs is None: raise RuntimeError("'postprocess' called before 'load'") sequences = list(sequences) if len(sequences) != len(self._durations): raise RuntimeError( f'Expected {len(self._durations)} sequences, got {len(sequences)}' ) sequences = [ sequences_lib.trim_note_sequence(seq, 0., dur) for seq, dur in zip(sequences, self._durations) ] sequence = sequences_lib.concatenate_sequences(sequences, self._durations) if self._warp and self._target_tempo: sequence, _ = sequences_lib.adjust_notesequence_times( sequence, lambda t: t * 60. / self._target_tempo) del sequence.tempos[:] sequence.tempos.add().qpm = self._target_tempo return sequence
def _calculate_metrics_py(frame_probs, onset_probs, frame_predictions, onset_predictions, offset_predictions, velocity_values, sequence_label_str, frame_labels, sequence_id, hparams, min_pitch, max_pitch, onsets_only, restrict_to_pitch=None): """Python logic for calculating metrics on a single example.""" tf.logging.info('Calculating metrics for %s with length %d', sequence_id, frame_labels.shape[0]) sequence_prediction = infer_util.predict_sequence( frame_probs=frame_probs, onset_probs=onset_probs, frame_predictions=frame_predictions, onset_predictions=onset_predictions, offset_predictions=offset_predictions, velocity_values=velocity_values, min_pitch=min_pitch, hparams=hparams, onsets_only=onsets_only) note_density = len( sequence_prediction.notes) / sequence_prediction.total_time sequence_label = music_pb2.NoteSequence.FromString(sequence_label_str) if hparams.backward_shift_amount_ms: def shift_notesequence(ns_time): return ns_time + hparams.backward_shift_amount_ms / 1000. shifted_sequence_label, skipped_notes = ( sequences_lib.adjust_notesequence_times(sequence_label, shift_notesequence)) assert skipped_notes == 0 sequence_label = shifted_sequence_label est_intervals, est_pitches, est_velocities = (sequence_to_valued_intervals( sequence_prediction, restrict_to_pitch=restrict_to_pitch)) ref_intervals, ref_pitches, ref_velocities = (sequence_to_valued_intervals( sequence_label, restrict_to_pitch=restrict_to_pitch)) processed_frame_predictions = sequences_lib.sequence_to_pianoroll( sequence_prediction, frames_per_second=data.hparams_frames_per_second(hparams), min_pitch=min_pitch, max_pitch=max_pitch).active if processed_frame_predictions.shape[0] < frame_labels.shape[0]: # Pad transcribed frames with silence. pad_length = frame_labels.shape[0] - processed_frame_predictions.shape[ 0] processed_frame_predictions = np.pad(processed_frame_predictions, [(0, pad_length), (0, 0)], 'constant') elif processed_frame_predictions.shape[0] > frame_labels.shape[0]: # Truncate transcribed frames. processed_frame_predictions = ( processed_frame_predictions[:frame_labels.shape[0], :]) if len(ref_pitches) == 0: tf.logging.info( 'Reference pitches were length 0, returning empty metrics for %s:', sequence_id) return tuple([[]] * 13 + [processed_frame_predictions]) note_precision, note_recall, note_f1, _ = ( mir_eval.transcription.precision_recall_f1_overlap( ref_intervals, pretty_midi.note_number_to_hz(ref_pitches), est_intervals, pretty_midi.note_number_to_hz(est_pitches), offset_ratio=None)) (note_with_velocity_precision, note_with_velocity_recall, note_with_velocity_f1, _) = (mir_eval.transcription_velocity.precision_recall_f1_overlap( ref_intervals=ref_intervals, ref_pitches=pretty_midi.note_number_to_hz(ref_pitches), ref_velocities=ref_velocities, est_intervals=est_intervals, est_pitches=pretty_midi.note_number_to_hz(est_pitches), est_velocities=est_velocities, offset_ratio=None)) (note_with_offsets_precision, note_with_offsets_recall, note_with_offsets_f1, _) = (mir_eval.transcription.precision_recall_f1_overlap( ref_intervals, pretty_midi.note_number_to_hz(ref_pitches), est_intervals, pretty_midi.note_number_to_hz(est_pitches))) (note_with_offsets_velocity_precision, note_with_offsets_velocity_recall, note_with_offsets_velocity_f1, _) = (mir_eval.transcription_velocity.precision_recall_f1_overlap( ref_intervals=ref_intervals, ref_pitches=pretty_midi.note_number_to_hz(ref_pitches), ref_velocities=ref_velocities, est_intervals=est_intervals, est_pitches=pretty_midi.note_number_to_hz(est_pitches), est_velocities=est_velocities)) tf.logging.info( 'Metrics for %s: Note F1 %f, Note w/ velocity F1 %f, Note w/ offsets F1 %f, ' 'Note w/ offsets & velocity: %f', sequence_id, note_f1, note_with_velocity_f1, note_with_offsets_f1, note_with_offsets_velocity_f1) # Return 1-d tensors for the metrics return ([note_precision], [note_recall], [note_f1], [note_density], [note_with_velocity_precision], [note_with_velocity_recall], [note_with_velocity_f1], [note_with_offsets_precision ], [note_with_offsets_recall], [note_with_offsets_f1], [note_with_offsets_velocity_precision ], [note_with_offsets_velocity_recall], [note_with_offsets_velocity_f1], [processed_frame_predictions])
def align_cpp(samples, sample_rate, ns, cqt_hop_length, sf2_path, penalty_mul=1.0, band_radius_seconds=.5): """Aligns the notesequence to the wav file using C++ DTW. Args: samples: Samples to align. sample_rate: Sample rate for samples. ns: The source notesequence to align. cqt_hop_length: Hop length to use for CQT calculations. sf2_path: Path to SF2 file for synthesis. penalty_mul: Penalty multiplier to use for non-diagonal moves. band_radius_seconds: What size of band radius to use for restricting DTW. Raises: RuntimeError: If notes are skipped during alignment. Returns: samples: The samples used from the wav file. aligned_ns: The aligned version of the notesequence. remaining_ns: Any remaining notesequence that extended beyond the length of the wav file. """ logging.info('Synthesizing') ns_samples = midi_synth.fluidsynth( ns, sf2_path=sf2_path, sample_rate=sample_rate).astype(np.float32) # It is critical that ns_samples and samples are the same length because the # alignment code does not do subsequence alignment. ns_samples = np.pad(ns_samples, (0, max(0, samples.shape[0] - ns_samples.shape[0])), 'constant') # Pad samples too, if needed, because there are some cases where the # synthesized NoteSequence is actually longer. samples = np.pad(samples, (0, max(0, ns_samples.shape[0] - samples.shape[0])), 'constant') # Note that we skip normalization here becasue it happens in C++. logging.info('source_cqt') source_cqt = extract_cqt(ns_samples, sample_rate, cqt_hop_length) logging.info('dest_cqt') dest_cqt = extract_cqt(samples, sample_rate, cqt_hop_length) alignment_task = alignment_pb2.AlignmentTask() alignment_task.sequence_1.x = source_cqt.shape[0] alignment_task.sequence_1.y = source_cqt.shape[1] for c in source_cqt.reshape([-1]): alignment_task.sequence_1.content.append(c) alignment_task.sequence_2.x = dest_cqt.shape[0] alignment_task.sequence_2.y = dest_cqt.shape[1] for c in dest_cqt.reshape([-1]): alignment_task.sequence_2.content.append(c) seconds_per_frame = cqt_hop_length / sample_rate alignment_task.band_radius = int(band_radius_seconds / seconds_per_frame) alignment_task.penalty = 0 alignment_task.penalty_mul = penalty_mul # Write to file. fh, temp_path = tempfile.mkstemp(suffix='.proto') os.close(fh) with open(temp_path, 'wb') as f: f.write(alignment_task.SerializeToString()) # Align with C++ program. subprocess.check_call([ALIGN_BINARY, temp_path]) # Read file. with open(temp_path + '.result', 'rb') as f: result = alignment_pb2.AlignmentResult.FromString(f.read()) # Clean up. os.remove(temp_path) os.remove(temp_path + '.result') logging.info('Aligning NoteSequence with warp path.') warp_seconds_i = np.array([i * seconds_per_frame for i in result.i]) warp_seconds_j = np.array([j * seconds_per_frame for j in result.j]) time_diffs = np.abs(warp_seconds_i - warp_seconds_j) warps = np.abs(time_diffs[1:] - time_diffs[:-1]) stats = { 'alignment_score': result.score, 'warp_mean_s': np.mean(warps), 'warp_median_s': np.median(warps), 'warp_max_s': np.max(warps), 'warp_min_s': np.min(warps), 'time_diff_mean_s': np.mean(time_diffs), 'time_diff_median_s': np.median(time_diffs), 'time_diff_max_s': np.max(time_diffs), 'time_diff_min_s': np.min(time_diffs), } for name, value in sorted(stats.items()): logging.info('%s: %f', name, value) aligned_ns, skipped_notes = sequences_lib.adjust_notesequence_times( ns, lambda t: np.interp(t, warp_seconds_i, warp_seconds_j), minimum_duration=seconds_per_frame) if skipped_notes > 0: raise RuntimeError('Skipped {} notes'.format(skipped_notes)) logging.debug('done') return aligned_ns, stats
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('input_file', metavar='FILE') parser.add_argument('output_dir', metavar='OUTPUTDIR') parser.add_argument('--stretch', type=str, metavar='RATIO') parser.add_argument('--metadata', type=str, metavar='FILE') parser.add_argument('--group-by-name', action='store_true') parser.add_argument('--duration', type=float) parser.add_argument('--trim', action='store_true') args = parser.parse_args() if args.stretch: # Calculate the time stretch ratio if ':' in args.stretch: a, b = args.stretch.split(':') stretch_ratio = float(a) / float(b) else: stretch_ratio = float(args.stretch) metadata = None if args.metadata: with gzip.open(args.metadata, 'rt') as f: metadata = json.load(f) if args.group_by_name: if not metadata: raise ValueError('--group-by-name requires --metadata') name_to_sequences = collections.defaultdict(list) os.makedirs(args.output_dir, exist_ok=True) with lmdb.open(args.input_file, subdir=False, readonly=True, lock=False) as db: with db.begin(buffers=True) as txn: for key, val in txn.cursor(): key = bytes(key).decode() sequence = music_pb2.NoteSequence.FromString(val) if not sequence.tempos: sequence.tempos.add().qpm = 60. if args.stretch: sequence, _ = sequences_lib.adjust_notesequence_times( sequence, lambda t: t * stretch_ratio) if args.trim: if args.duration is None: raise ValueError('--trim requires --duration') sequence = sequences_lib.trim_note_sequence( sequence, 0., args.duration) if args.group_by_name: if '_' in key: src_key, style_key = key.split('_') name, _ = os.path.splitext( metadata[src_key]['filename']) style_name, _ = os.path.splitext( metadata[style_key]['filename']) name = f'{name}__{style_name}' else: name, _ = os.path.splitext(key + '_' + metadata[key]['filename']) name_to_sequences[name].append(sequence) else: out_path = os.path.join(args.output_dir, key + '.mid') midi_io.note_sequence_to_midi_file(sequence, out_path) if args.group_by_name: for name, sequences in name_to_sequences.items(): sequence_durations = None if args.duration is not None: sequence_durations = [args.duration for _ in sequences] sequence = sequences_lib.concatenate_sequences( sequences, sequence_durations) out_path = os.path.join(args.output_dir, name + '.mid') midi_io.note_sequence_to_midi_file(sequence, out_path)