def create_song_prototype(song_path, start_time, stop_time, model_used='attention_rnn', temperature=1.0): magenta_model_path = '%s/magenta_models/%s.mag' % (MEDIA_ROOT, model_used) bundle = mm.sequence_generator_bundle.read_bundle_file(magenta_model_path) generator_map = melody_rnn_sequence_generator.get_generator_map() melody_rnn = generator_map[model_used](checkpoint=None, bundle=bundle) melody_rnn.initialize() base_sequence = midi_file_to_note_sequence(song_path) target_sequence = extract_subsequence(base_sequence, start_time, stop_time) generator_options = generator_pb2.GeneratorOptions() generator_options.args['temperature'].float_value = temperature generator_options.generate_sections.add( start_time=target_sequence.total_time, end_time=2 * target_sequence.total_time) generated_sequence = melody_rnn.generate(target_sequence, generator_options) proceed_sequence = extract_subsequence(generated_sequence, target_sequence.total_time, 2 * target_sequence.total_time) return proceed_sequence
def testExtractSubsequencePastEnd(self): sequence = copy.copy(self.note_sequence) testing_lib.add_track_to_sequence( sequence, 0, [(12, 100, 0.01, 10.0), (11, 55, 0.22, 0.50), (40, 45, 2.50, 3.50), (55, 120, 4.0, 4.01), (52, 99, 4.75, 5.0)]) testing_lib.add_chords_to_sequence( sequence, [('C', 1.5), ('G7', 3.0), ('F', 18.0)]) with self.assertRaises(ValueError): sequences_lib.extract_subsequence(sequence, 15.0, 16.0)
def testExtractSubsequencePastEnd(self): sequence = copy.copy(self.note_sequence) testing_lib.add_track_to_sequence( sequence, 0, [(12, 100, 0.01, 10.0), (11, 55, 0.22, 0.50), (40, 45, 2.50, 3.50), (55, 120, 4.0, 4.01), (52, 99, 4.75, 5.0)]) testing_lib.add_chords_to_sequence( sequence, [('C', 1.5), ('G7', 3.0), ('F', 18.0)]) with self.assertRaises(ValueError): sequences_lib.extract_subsequence(sequence, 15.0, 16.0)
def split2batch(audio, sequence): from magenta.models.onsets_frames_transcription.audio_label_data_utils import find_split_points pad_num = int(math.ceil( sequence.total_time * cfg.SAMPLE_RATE)) - audio.shape[0] if pad_num > 0: audio = np.concatenate((audio, np.zeros((pad_num), dtype=audio.dtype))) splits = [0, sequence.total_time] if cfg.MAX_SPLIT_LENGTH == 0 else \ find_split_points(sequence, audio, cfg.SAMPLE_RATE, cfg.MIN_SPLIT_LENGTH, cfg.MAX_SPLIT_LENGTH) samples = [] for start, end in zip(splits[:-1], splits[1:]): if end - start < cfg.MIN_SPLIT_LENGTH: continue split_audio, split_seq = audio, sequence if not (start == 0 and end == sequence.total_time): split_seq = sequences_lib.extract_subsequence(sequence, start, end) split_audio = audio_io.crop_samples(audio, cfg.SAMPLE_RATE, start, end - start) pad_num = int(math.ceil( cfg.MAX_SPLIT_LENGTH * cfg.SAMPLE_RATE)) - split_audio.shape[0] if pad_num > 0: split_audio = np.concatenate( (split_audio, np.zeros((pad_num), dtype=split_audio.dtype))) samples.append((split_audio, split_seq)) return samples
def process_record(wav_data, ns, example_id, min_length=5, max_length=20, sample_rate=16000, allow_empty_notesequence=False): """Split a record into chunks and create an example proto. To use the full length audio and notesequence, set min_length=0 and max_length=-1. Args: wav_data: audio data in WAV format. ns: corresponding NoteSequence. example_id: id for the example proto min_length: minimum length in seconds for audio chunks. max_length: maximum length in seconds for audio chunks. sample_rate: desired audio sample rate. allow_empty_notesequence: whether an empty NoteSequence is allowed. Yields: Example protos. """ samples = audio_io.wav_data_to_samples(wav_data, sample_rate) samples = librosa.util.normalize(samples, norm=np.inf) if max_length == min_length: splits = np.arange(0, ns.total_time, max_length) elif max_length > 0: splits = find_split_points(ns, samples, sample_rate, min_length, max_length) else: splits = [0, ns.total_time] velocities = [note.velocity for note in ns.notes] velocity_max = np.max(velocities) if velocities else 0 velocity_min = np.min(velocities) if velocities else 0 velocity_range = music_pb2.VelocityRange(min=velocity_min, max=velocity_max) for start, end in zip(splits[:-1], splits[1:]): if end - start < min_length: continue if start == 0 and end == ns.total_time: new_ns = ns else: new_ns = sequences_lib.extract_subsequence(ns, start, end) if not new_ns.notes and not allow_empty_notesequence: tf.logging.warning('skipping empty sequence') continue if start == 0 and end == ns.total_time: new_samples = samples else: # the resampling that happen in crop_wav_data is really slow # and we've already done it once, avoid doing it twice new_samples = audio_io.crop_samples(samples, sample_rate, start, end - start) new_wav_data = audio_io.samples_to_wav_data(new_samples, sample_rate) yield create_example( example_id, new_ns, new_wav_data, velocity_range=velocity_range)
def testExtractSubsequence(self): sequence = copy.copy(self.note_sequence) testing_lib.add_track_to_sequence( sequence, 0, [(12, 100, 0.01, 10.0), (11, 55, 0.22, 0.50), (40, 45, 2.50, 3.50), (55, 120, 4.0, 4.01), (52, 99, 4.75, 5.0)]) testing_lib.add_chords_to_sequence( sequence, [('C', 1.5), ('G7', 3.0), ('F', 4.8)]) testing_lib.add_control_changes_to_sequence( sequence, 0, [(0.0, 64, 127), (2.0, 64, 0), (4.0, 64, 127), (5.0, 64, 0)]) testing_lib.add_control_changes_to_sequence( sequence, 1, [(2.0, 64, 127)]) expected_subsequence = copy.copy(self.note_sequence) testing_lib.add_track_to_sequence( expected_subsequence, 0, [(40, 45, 0.0, 1.0), (55, 120, 1.5, 1.51)]) testing_lib.add_chords_to_sequence( expected_subsequence, [('C', 0.0), ('G7', 0.5)]) testing_lib.add_control_changes_to_sequence( expected_subsequence, 0, [(0.0, 64, 0), (1.5, 64, 127)]) testing_lib.add_control_changes_to_sequence( expected_subsequence, 1, [(0.0, 64, 127)]) expected_subsequence.control_changes.sort(key=lambda cc: cc.time) expected_subsequence.total_time = 1.51 expected_subsequence.subsequence_info.start_time_offset = 2.5 expected_subsequence.subsequence_info.end_time_offset = 5.99 subsequence = sequences_lib.extract_subsequence(sequence, 2.5, 4.75) self.assertProtoEquals(expected_subsequence, subsequence)
def preprocess_sequence(sequence_tensor, hparams): """Preprocess a NoteSequence for training. Deserialize, apply sustain control changes, and crop the sequence to the beginning of the first note and end of the last note (if requested). Args: sequence_tensor: The NoteSequence in serialized form. hparams: Current hyperparameters. Returns: sequence: The preprocessed NoteSequence object. cropped_beginning_seconds: How many seconds were cropped from the beginning of the NoteSequence. """ sequence = music_pb2.NoteSequence.FromString(sequence_tensor) sequence = sequences_lib.apply_sustain_control_changes(sequence) crop_beginning_seconds = 0 if hparams.crop_training_sequence_to_notes and sequence.notes: crop_beginning_seconds = _find_first_note_start(sequence) sequence = sequences_lib.extract_subsequence(sequence, crop_beginning_seconds, sequence.total_time) return sequence, crop_beginning_seconds
def testExtractSubsequence(self): sequence = copy.copy(self.note_sequence) testing_lib.add_track_to_sequence( sequence, 0, [(12, 100, 0.01, 10.0), (11, 55, 0.22, 0.50), (40, 45, 2.50, 3.50), (55, 120, 4.0, 4.01), (52, 99, 4.75, 5.0)]) testing_lib.add_chords_to_sequence( sequence, [('C', 1.5), ('G7', 3.0), ('F', 4.8)]) testing_lib.add_control_changes_to_sequence( sequence, 0, [(0.0, 64, 127), (2.0, 64, 0), (4.0, 64, 127), (5.0, 64, 0)]) testing_lib.add_control_changes_to_sequence( sequence, 1, [(2.0, 64, 127)]) expected_subsequence = copy.copy(self.note_sequence) testing_lib.add_track_to_sequence( expected_subsequence, 0, [(40, 45, 0.0, 1.0), (55, 120, 1.5, 1.51)]) testing_lib.add_chords_to_sequence( expected_subsequence, [('C', 0.0), ('G7', 0.5)]) testing_lib.add_control_changes_to_sequence( expected_subsequence, 0, [(0.0, 64, 0), (1.5, 64, 127)]) testing_lib.add_control_changes_to_sequence( expected_subsequence, 1, [(0.0, 64, 127)]) expected_subsequence.control_changes.sort(key=lambda cc: cc.time) expected_subsequence.total_time = 1.51 expected_subsequence.subsequence_info.start_time_offset = 2.5 expected_subsequence.subsequence_info.end_time_offset = 5.99 subsequence = sequences_lib.extract_subsequence(sequence, 2.5, 4.75) self.assertProtoEquals(expected_subsequence, subsequence)
def generate_train_set(): """Generate the train TFRecord.""" train_file_pairs = [] for directory in train_dirs: path = os.path.join(FLAGS.input_dir, directory) path = os.path.join(path, '*.wav') wav_files = glob.glob(path) # find matching mid files for wav_file in wav_files: base_name_root, _ = os.path.splitext(wav_file) mid_file = base_name_root + '.mid' train_file_pairs.append((wav_file, mid_file)) train_output_name = os.path.join(FLAGS.output_dir, 'maps_config2_train.tfrecord') with tf.python_io.TFRecordWriter(train_output_name) as writer: for pair in train_file_pairs: print(pair) # load the wav data wav_data = tf.gfile.Open(pair[0]).read() samples = audio_io.wav_data_to_samples(wav_data, FLAGS.sample_rate) # load the midi data and convert to a notesequence midi_data = tf.gfile.Open(pair[1]).read() ns = midi_io.midi_to_sequence_proto(midi_data) splits = find_split_points(ns, samples, FLAGS.sample_rate, FLAGS.min_length, FLAGS.max_length) for start, end in zip(splits[:-1], splits[1:]): if end - start < FLAGS.min_length: continue new_ns = sequences_lib.extract_subsequence(ns, start, end) new_wav_data = audio_io.crop_wav_data(wav_data, FLAGS.sample_rate, start, end - start) example = tf.train.Example(features=tf.train.Features(feature={ 'id': tf.train.Feature(bytes_list=tf.train.BytesList( value=[pair[0]] )), 'sequence': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_ns.SerializeToString()] )), 'audio': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_wav_data] )) })) writer.write(example.SerializeToString())
def testExtractSubsequence(self): sequence = copy.copy(self.note_sequence) testing_lib.add_track_to_sequence( sequence, 0, [(12, 100, 0.01, 10.0), (11, 55, 0.22, 0.50), (40, 45, 2.50, 3.50), (55, 120, 4.0, 4.01), (52, 99, 4.75, 5.0)]) expected_subsequence = copy.copy(self.note_sequence) testing_lib.add_track_to_sequence( expected_subsequence, 0, [(40, 45, 2.50, 3.50), (55, 120, 4.0, 4.01)]) subsequence = sequences_lib.extract_subsequence(sequence, 2.5, 4.75) self.assertProtoEquals(expected_subsequence, subsequence)
def generate_train_set(): """Generate the train TFRecord.""" train_file_pairs = [] for directory in train_dirs: path = os.path.join(FLAGS.input_dir, directory) path = os.path.join(path, '*.wav') wav_files = glob.glob(path) # find matching mid files for wav_file in wav_files: base_name_root, _ = os.path.splitext(wav_file) mid_file = base_name_root + '.mid' train_file_pairs.append((wav_file, mid_file)) train_output_name = os.path.join(FLAGS.output_dir, 'maps_config2_train.tfrecord') with tf.python_io.TFRecordWriter(train_output_name) as writer: for pair in train_file_pairs: print(pair) # load the wav data wav_data = tf.gfile.Open(pair[0]).read() samples = audio_io.wav_data_to_samples(wav_data, FLAGS.sample_rate) # load the midi data and convert to a notesequence midi_data = tf.gfile.Open(pair[1]).read() ns = midi_io.midi_to_sequence_proto(midi_data) splits = find_split_points(ns, samples, FLAGS.sample_rate, FLAGS.min_length, FLAGS.max_length) for start, end in zip(splits[:-1], splits[1:]): if end - start < FLAGS.min_length: continue new_ns = sequences_lib.extract_subsequence(ns, start, end) new_wav_data = audio_io.crop_wav_data(wav_data, FLAGS.sample_rate, start, end - start) example = tf.train.Example(features=tf.train.Features( feature={ 'id': tf.train.Feature(bytes_list=tf.train.BytesList( value=[pair[0]])), 'sequence': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_ns.SerializeToString()])), 'audio': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_wav_data])) })) writer.write(example.SerializeToString())
def testExtractSubsequence(self): sequence = copy.copy(self.note_sequence) testing_lib.add_track_to_sequence( sequence, 0, [(12, 100, 0.01, 10.0), (11, 55, 0.22, 0.50), (40, 45, 2.50, 3.50), (55, 120, 4.0, 4.01), (52, 99, 4.75, 5.0)]) expected_subsequence = copy.copy(self.note_sequence) testing_lib.add_track_to_sequence( expected_subsequence, 0, [(40, 45, 2.50, 3.50), (55, 120, 4.0, 4.01)]) expected_subsequence.total_time = 4.75 subsequence = sequences_lib.extract_subsequence(sequence, 2.5, 4.75) self.assertProtoEquals(expected_subsequence, subsequence)
def testExtractSubsequence(self): sequence = copy.copy(self.note_sequence) testing_lib.add_track_to_sequence(sequence, 0, [(12, 100, 0.01, 10.0), (11, 55, 0.22, 0.50), (40, 45, 2.50, 3.50), (55, 120, 4.0, 4.01), (52, 99, 4.75, 5.0)]) testing_lib.add_chords_to_sequence(sequence, [('C', 1.5), ('G7', 3.0), ('F', 4.8)]) expected_subsequence = copy.copy(self.note_sequence) testing_lib.add_track_to_sequence(expected_subsequence, 0, [(40, 45, 2.50, 3.50), (55, 120, 4.0, 4.01)]) testing_lib.add_chords_to_sequence(expected_subsequence, [('G7', 3.0)]) expected_subsequence.total_time = 4.75 subsequence = sequences_lib.extract_subsequence(sequence, 2.5, 4.75) self.assertProtoEquals(expected_subsequence, subsequence)
def testExtractSubsequence(self): sequence = copy.copy(self.note_sequence) testing_lib.add_track_to_sequence( sequence, 0, [(12, 100, 0.01, 10.0), (11, 55, 0.22, 0.50), (40, 45, 2.50, 3.50), (55, 120, 4.0, 4.01), (52, 99, 4.75, 5.0)]) testing_lib.add_chords_to_sequence( sequence, [('C', 1.5), ('G7', 3.0), ('F', 4.8)]) expected_subsequence = copy.copy(self.note_sequence) testing_lib.add_track_to_sequence( expected_subsequence, 0, [(40, 45, 0.0, 1.0), (55, 120, 1.5, 1.51)]) testing_lib.add_chords_to_sequence( expected_subsequence, [('C', 0.0), ('G7', 0.5)]) expected_subsequence.total_time = 2.25 expected_subsequence.subsequence_info.start_time_offset = 2.5 expected_subsequence.subsequence_info.end_time_offset = 5.25 subsequence = sequences_lib.extract_subsequence(sequence, 2.5, 4.75) self.assertProtoEquals(expected_subsequence, subsequence)
def preprocess_sequence(sequence_tensor, hparams): """Preprocess a NoteSequence for training. Deserialize, apply sustain control changes, and crop the sequence to the beginning of the first note and end of the last note (if requested). Args: sequence_tensor: The NoteSequence in serialized form. hparams: Current hyperparameters. Returns: sequence: The preprocessed NoteSequence object. cropped_beginning_seconds: How many seconds were cropped from the beginning of the NoteSequence. """ sequence = music_pb2.NoteSequence.FromString(sequence_tensor) sequence = sequences_lib.apply_sustain_control_changes(sequence) crop_beginning_seconds = 0 if hparams.crop_training_sequence_to_notes and sequence.notes: crop_beginning_seconds = _find_first_note_start(sequence) sequence = sequences_lib.extract_subsequence( sequence, crop_beginning_seconds, sequence.total_time) return sequence, crop_beginning_seconds
def process(self, kv): # Seed random number generator based on key so that hop times are # deterministic. key, ns_str = kv m = hashlib.md5(key) random.seed(int(m.hexdigest(), 16)) # Deserialize NoteSequence proto. ns = music_pb2.NoteSequence.FromString(ns_str) # Apply sustain pedal. ns = sequences_lib.apply_sustain_control_changes(ns) # Remove control changes as there are potentially a lot of them and they are # no longer needed. del ns.control_changes[:] if (self._min_hop_size_seconds and ns.total_time < self._min_hop_size_seconds): Metrics.counter('extract_examples', 'sequence_too_short').inc() return sequences = [] for _ in range(self._num_replications): if self._max_hop_size_seconds: if self._max_hop_size_seconds == self._min_hop_size_seconds: # Split using fixed hop size. sequences += sequences_lib.split_note_sequence( ns, self._max_hop_size_seconds) else: # Sample random hop positions such that each segment size is within # the specified range. hop_times = [0.0] while hop_times[-1] <= ns.total_time - self._min_hop_size_seconds: if hop_times[-1] + self._max_hop_size_seconds < ns.total_time: # It's important that we get a valid hop size here, since the # remainder of the sequence is too long. max_offset = min( self._max_hop_size_seconds, ns.total_time - self._min_hop_size_seconds - hop_times[-1]) else: # It's okay if the next hop time is invalid (in which case we'll # just stop). max_offset = self._max_hop_size_seconds offset = random.uniform(self._min_hop_size_seconds, max_offset) hop_times.append(hop_times[-1] + offset) # Split at the chosen hop times (ignoring zero and the final invalid # time). sequences += sequences_lib.split_note_sequence(ns, hop_times[1:-1]) else: sequences += [ns] for performance_sequence in sequences: if self._encode_score_fns: # We need to extract a score. if not self._absolute_timing: # Beats are required to extract a score with metric timing. beats = [ ta for ta in performance_sequence.text_annotations if (ta.annotation_type == music_pb2.NoteSequence.TextAnnotation.BEAT) and ta.time <= performance_sequence.total_time ] if len(beats) < 2: Metrics.counter('extract_examples', 'not_enough_beats').inc() continue # Ensure the sequence starts and ends on a beat. performance_sequence = sequences_lib.extract_subsequence( performance_sequence, start_time=min(beat.time for beat in beats), end_time=max(beat.time for beat in beats) ) # Infer beat-aligned chords (only for relative timing). try: chord_inference.infer_chords_for_sequence( performance_sequence, chord_change_prob=0.25, chord_note_concentration=50.0, add_key_signatures=True) except chord_inference.ChordInferenceError: Metrics.counter('extract_examples', 'chord_inference_failed').inc() continue # Infer melody regardless of relative/absolute timing. try: melody_instrument = melody_inference.infer_melody_for_sequence( performance_sequence, melody_interval_scale=2.0, rest_prob=0.1, instantaneous_non_max_pitch_prob=1e-15, instantaneous_non_empty_rest_prob=0.0, instantaneous_missing_pitch_prob=1e-15) except melody_inference.MelodyInferenceError: Metrics.counter('extract_examples', 'melody_inference_failed').inc() continue if not self._absolute_timing: # Now rectify detected beats to occur at fixed tempo. # TODO(iansimon): also include the alignment score_sequence, unused_alignment = sequences_lib.rectify_beats( performance_sequence, beats_per_minute=SCORE_BPM) else: # Score uses same timing as performance. score_sequence = copy.deepcopy(performance_sequence) # Remove melody notes from performance. performance_notes = [] for note in performance_sequence.notes: if note.instrument != melody_instrument: performance_notes.append(note) del performance_sequence.notes[:] performance_sequence.notes.extend(performance_notes) # Remove non-melody notes from score. score_notes = [] for note in score_sequence.notes: if note.instrument == melody_instrument: score_notes.append(note) del score_sequence.notes[:] score_sequence.notes.extend(score_notes) # Remove key signatures and beat/chord annotations from performance. del performance_sequence.key_signatures[:] del performance_sequence.text_annotations[:] Metrics.counter('extract_examples', 'extracted_score').inc() for augment_fn in self._augment_fns: # Augment and encode the performance. try: augmented_performance_sequence = augment_fn(performance_sequence) except DataAugmentationError: Metrics.counter( 'extract_examples', 'augment_performance_failed').inc() continue example_dict = { 'targets': self._encode_performance_fn( augmented_performance_sequence) } if not example_dict['targets']: Metrics.counter('extract_examples', 'skipped_empty_targets').inc() continue if self._encode_score_fns: # Augment the extracted score. try: augmented_score_sequence = augment_fn(score_sequence) except DataAugmentationError: Metrics.counter('extract_examples', 'augment_score_failed').inc() continue # Apply all score encoding functions. skip = False for name, encode_score_fn in self._encode_score_fns.items(): example_dict[name] = encode_score_fn(augmented_score_sequence) if not example_dict[name]: Metrics.counter('extract_examples', 'skipped_empty_%s' % name).inc() skip = True break if skip: continue Metrics.counter('extract_examples', 'encoded_example').inc() Metrics.distribution( 'extract_examples', 'performance_length_in_seconds').update( int(augmented_performance_sequence.total_time)) yield generator_utils.to_example(example_dict)
def generate_train_set(exclude_ids): """Generate the train TFRecord.""" train_file_pairs = [] for directory in train_dirs: path = os.path.join(FLAGS.input_dir, directory) path = os.path.join(path, '*.wav') wav_files = glob.glob(path) # find matching mid files for wav_file in wav_files: base_name_root, _ = os.path.splitext(wav_file) mid_file = base_name_root + '.mid' if filename_to_id(wav_file) not in exclude_ids: train_file_pairs.append((wav_file, mid_file)) train_output_name = os.path.join(FLAGS.output_dir, 'maps_config2_train.tfrecord') with tf.python_io.TFRecordWriter(train_output_name) as writer: for pair in train_file_pairs: print(pair) # load the wav data wav_data = tf.gfile.Open(pair[0], 'rb').read() samples = audio_io.wav_data_to_samples(wav_data, FLAGS.sample_rate) samples = librosa.util.normalize(samples, norm=np.inf) # load the midi data and convert to a notesequence ns = midi_io.midi_file_to_note_sequence(pair[1]) splits = find_split_points(ns, samples, FLAGS.sample_rate, FLAGS.min_length, FLAGS.max_length) velocities = [note.velocity for note in ns.notes] velocity_max = np.max(velocities) velocity_min = np.min(velocities) new_velocity_tuple = music_pb2.VelocityRange( min=velocity_min, max=velocity_max) for start, end in zip(splits[:-1], splits[1:]): if end - start < FLAGS.min_length: continue new_ns = sequences_lib.extract_subsequence(ns, start, end) new_wav_data = audio_io.crop_wav_data(wav_data, FLAGS.sample_rate, start, end - start) example = tf.train.Example(features=tf.train.Features(feature={ 'id': tf.train.Feature(bytes_list=tf.train.BytesList( value=[pair[0]] )), 'sequence': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_ns.SerializeToString()] )), 'audio': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_wav_data] )), 'velocity_range': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_velocity_tuple.SerializeToString()] )), })) writer.write(example.SerializeToString())
def process(self, kv): # Seed random number generator based on key so that hop times are # deterministic. key, ns_str = kv m = hashlib.md5(key) random.seed(int(m.hexdigest(), 16)) # Deserialize NoteSequence proto. ns = music_pb2.NoteSequence.FromString(ns_str) # Apply sustain pedal. ns = sequences_lib.apply_sustain_control_changes(ns) # Remove control changes as there are potentially a lot of them and they are # no longer needed. del ns.control_changes[:] if (self._min_hop_size_seconds and ns.total_time < self._min_hop_size_seconds): Metrics.counter('extract_examples', 'sequence_too_short').inc() return sequences = [] for _ in range(self._num_replications): if self._max_hop_size_seconds: if self._max_hop_size_seconds == self._min_hop_size_seconds: # Split using fixed hop size. sequences += sequences_lib.split_note_sequence( ns, self._max_hop_size_seconds) else: # Sample random hop positions such that each segment size is within # the specified range. hop_times = [0.0] while hop_times[-1] <= ns.total_time - self._min_hop_size_seconds: if hop_times[-1] + self._max_hop_size_seconds < ns.total_time: # It's important that we get a valid hop size here, since the # remainder of the sequence is too long. max_offset = min( self._max_hop_size_seconds, ns.total_time - self._min_hop_size_seconds - hop_times[-1]) else: # It's okay if the next hop time is invalid (in which case we'll # just stop). max_offset = self._max_hop_size_seconds offset = random.uniform(self._min_hop_size_seconds, max_offset) hop_times.append(hop_times[-1] + offset) # Split at the chosen hop times (ignoring zero and the final invalid # time). sequences += sequences_lib.split_note_sequence(ns, hop_times[1:-1]) else: sequences += [ns] for performance_sequence in sequences: if self._encode_score_fns: # We need to extract a score. if not self._absolute_timing: # Beats are required to extract a score with metric timing. beats = [ ta for ta in performance_sequence.text_annotations if (ta.annotation_type == music_pb2.NoteSequence.TextAnnotation.BEAT) and ta.time <= performance_sequence.total_time ] if len(beats) < 2: Metrics.counter('extract_examples', 'not_enough_beats').inc() continue # Ensure the sequence starts and ends on a beat. performance_sequence = sequences_lib.extract_subsequence( performance_sequence, start_time=min(beat.time for beat in beats), end_time=max(beat.time for beat in beats) ) # Infer beat-aligned chords (only for relative timing). try: chord_inference.infer_chords_for_sequence( performance_sequence, chord_change_prob=0.25, chord_note_concentration=50.0, add_key_signatures=True) except chord_inference.ChordInferenceError: Metrics.counter('extract_examples', 'chord_inference_failed').inc() continue # Infer melody regardless of relative/absolute timing. try: melody_instrument = melody_inference.infer_melody_for_sequence( performance_sequence, melody_interval_scale=2.0, rest_prob=0.1, instantaneous_non_max_pitch_prob=1e-15, instantaneous_non_empty_rest_prob=0.0, instantaneous_missing_pitch_prob=1e-15) except melody_inference.MelodyInferenceError: Metrics.counter('extract_examples', 'melody_inference_failed').inc() continue if not self._absolute_timing: # Now rectify detected beats to occur at fixed tempo. # TODO(iansimon): also include the alignment score_sequence, unused_alignment = sequences_lib.rectify_beats( performance_sequence, beats_per_minute=SCORE_BPM) else: # Score uses same timing as performance. score_sequence = copy.deepcopy(performance_sequence) # Remove melody notes from performance. performance_notes = [] for note in performance_sequence.notes: if note.instrument != melody_instrument: performance_notes.append(note) del performance_sequence.notes[:] performance_sequence.notes.extend(performance_notes) # Remove non-melody notes from score. score_notes = [] for note in score_sequence.notes: if note.instrument == melody_instrument: score_notes.append(note) del score_sequence.notes[:] score_sequence.notes.extend(score_notes) # Remove key signatures and beat/chord annotations from performance. del performance_sequence.key_signatures[:] del performance_sequence.text_annotations[:] Metrics.counter('extract_examples', 'extracted_score').inc() for augment_fn in self._augment_fns: # Augment and encode the performance. try: augmented_performance_sequence = augment_fn(performance_sequence) except DataAugmentationError: Metrics.counter( 'extract_examples', 'augment_performance_failed').inc() continue example_dict = { 'targets': self._encode_performance_fn( augmented_performance_sequence) } if not example_dict['targets']: Metrics.counter('extract_examples', 'skipped_empty_targets').inc() continue if self._encode_score_fns: # Augment the extracted score. try: augmented_score_sequence = augment_fn(score_sequence) except DataAugmentationError: Metrics.counter('extract_examples', 'augment_score_failed').inc() continue # Apply all score encoding functions. skip = False for name, encode_score_fn in self._encode_score_fns.items(): example_dict[name] = encode_score_fn(augmented_score_sequence) if not example_dict[name]: Metrics.counter('extract_examples', 'skipped_empty_%s' % name).inc() skip = True break if skip: continue Metrics.counter('extract_examples', 'encoded_example').inc() Metrics.distribution( 'extract_examples', 'performance_length_in_seconds').update( int(augmented_performance_sequence.total_time)) yield generator_utils.to_example(example_dict)
def generate_train_set(exclude_ids): """Generate the train TFRecord.""" train_file_pairs = [] for directory in train_dirs: path = os.path.join(FLAGS.input_dir, directory) path = os.path.join(path, '*.wav') wav_files = glob.glob(path) # find matching mid files for wav_file in wav_files: base_name_root, _ = os.path.splitext(wav_file) mid_file = base_name_root + '.mid' if filename_to_id(wav_file) not in exclude_ids: train_file_pairs.append((wav_file, mid_file)) train_output_name = os.path.join(FLAGS.output_dir, 'maps_config2_train.tfrecord') with tf.python_io.TFRecordWriter(train_output_name) as writer: for pair in train_file_pairs: print(pair) # load the wav data wav_data = tf.gfile.Open(pair[0], 'rb').read() samples = audio_io.wav_data_to_samples(wav_data, FLAGS.sample_rate) samples = librosa.util.normalize(samples, norm=np.inf) # load the midi data and convert to a notesequence ns = midi_io.midi_file_to_note_sequence(pair[1]) splits = find_split_points(ns, samples, FLAGS.sample_rate, FLAGS.min_length, FLAGS.max_length) velocities = [note.velocity for note in ns.notes] velocity_max = np.max(velocities) velocity_min = np.min(velocities) new_velocity_tuple = music_pb2.VelocityRange(min=velocity_min, max=velocity_max) for start, end in zip(splits[:-1], splits[1:]): if end - start < FLAGS.min_length: continue new_ns = sequences_lib.extract_subsequence(ns, start, end) new_wav_data = audio_io.crop_wav_data(wav_data, FLAGS.sample_rate, start, end - start) example = tf.train.Example(features=tf.train.Features( feature={ 'id': tf.train.Feature(bytes_list=tf.train.BytesList( value=[pair[0]])), 'sequence': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_ns.SerializeToString()])), 'audio': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_wav_data])), 'velocity_range': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_velocity_tuple.SerializeToString()])), })) writer.write(example.SerializeToString())
def process_record(wav_data, ns, example_id, min_length=5, max_length=20, sample_rate=16000, allow_empty_notesequence=False, load_audio_with_librosa=False): """Split a record into chunks and create an example proto. To use the full length audio and notesequence, set min_length=0 and max_length=-1. Args: wav_data: audio data in WAV format. ns: corresponding NoteSequence. example_id: id for the example proto min_length: minimum length in seconds for audio chunks. max_length: maximum length in seconds for audio chunks. sample_rate: desired audio sample rate. allow_empty_notesequence: whether an empty NoteSequence is allowed. load_audio_with_librosa: Use librosa for sampling. Works with 24-bit wavs. Yields: Example protos. """ try: if load_audio_with_librosa: samples = audio_io.wav_data_to_samples_librosa( wav_data, sample_rate) else: samples = audio_io.wav_data_to_samples(wav_data, sample_rate) except audio_io.AudioIOReadError as e: print('Exception %s', e) return samples = librosa.util.normalize(samples, norm=np.inf) # Add padding to samples if notesequence is longer. pad_to_samples = int(math.ceil(ns.total_time * sample_rate)) padding_needed = pad_to_samples - samples.shape[0] if padding_needed > 5 * sample_rate: raise ValueError( 'Would have padded {} more than 5 seconds to match note sequence total ' 'time. ({} original samples, {} sample rate, {} sample seconds, ' '{} sequence seconds) This likely indicates a problem with the source ' 'data.'.format(example_id, samples.shape[0], sample_rate, samples.shape[0] / sample_rate, ns.total_time)) samples = np.pad(samples, (0, max(0, padding_needed)), 'constant') if max_length == min_length: splits = np.arange(0, ns.total_time, max_length) elif max_length > 0: splits = find_split_points(ns, samples, sample_rate, min_length, max_length) else: splits = [0, ns.total_time] velocities = [note.velocity for note in ns.notes] velocity_max = np.max(velocities) if velocities else 0 velocity_min = np.min(velocities) if velocities else 0 velocity_range = music_pb2.VelocityRange(min=velocity_min, max=velocity_max) for start, end in zip(splits[:-1], splits[1:]): if end - start < min_length: continue if start == 0 and end == ns.total_time: new_ns = ns else: new_ns = sequences_lib.extract_subsequence(ns, start, end) if not new_ns.notes and not allow_empty_notesequence: tf.logging.warning('skipping empty sequence') continue if start == 0 and end == ns.total_time: new_samples = samples else: # the resampling that happen in crop_wav_data is really slow # and we've already done it once, avoid doing it twice new_samples = audio_io.crop_samples(samples, sample_rate, start, end - start) new_wav_data = audio_io.samples_to_wav_data(new_samples, sample_rate) yield create_example(example_id, new_ns, new_wav_data, velocity_range=velocity_range)
def process_midi(self, f): def augment_note_sequence(ns, stretch_factor, transpose_amount): """Augment a NoteSequence by time stretch and pitch transposition.""" augmented_ns = sequences_lib.stretch_note_sequence(ns, stretch_factor, in_place=False) try: _, num_deleted_notes = sequences_lib.transpose_note_sequence( augmented_ns, transpose_amount, min_allowed_pitch=MIN_PITCH, max_allowed_pitch=MAX_PITCH, in_place=True) except chord_symbols_lib.ChordSymbolError: raise datagen_beam.DataAugmentationError( 'Transposition of chord symbol(s) failed.') if num_deleted_notes: raise datagen_beam.DataAugmentationError( 'Transposition caused out-of-range pitch(es).') return augmented_ns self._min_hop_size_seconds = 0.0 self._max_hop_size_seconds = 0.0 self._num_replications = 1 self._encode_performance_fn = self.performance_encoder( ).encode_note_sequence self._encode_score_fns = dict( (name, encoder.encode_note_sequence) for name, encoder in self.score_encoders()) augment_params = itertools.product(self.stretch_factors, self.transpose_amounts) augment_fns = [ functools.partial(augment_note_sequence, stretch_factor=s, transpose_amount=t) for s, t in augment_params ] self._augment_fns = augment_fns self._absolute_timing = self.absolute_timing self._random_crop_length = self.random_crop_length_in_datagen if self._random_crop_length is not None: self._augment_fns = self._augment_fns rets = [] ns = magenta.music.midi_file_to_sequence_proto(f) # Apply sustain pedal. ns = sequences_lib.apply_sustain_control_changes(ns) # Remove control changes as there are potentially a lot of them and they are # no longer needed. del ns.control_changes[:] if (self._min_hop_size_seconds and ns.total_time < self._min_hop_size_seconds): print("sequence_too_short") return [] sequences = [] for _ in range(self._num_replications): if self._max_hop_size_seconds: if self._max_hop_size_seconds == self._min_hop_size_seconds: # Split using fixed hop size. sequences += sequences_lib.split_note_sequence( ns, self._max_hop_size_seconds) else: # Sample random hop positions such that each segment size is within # the specified range. hop_times = [0.0] while hop_times[ -1] <= ns.total_time - self._min_hop_size_seconds: if hop_times[ -1] + self._max_hop_size_seconds < ns.total_time: # It's important that we get a valid hop size here, since the # remainder of the sequence is too long. max_offset = min( self._max_hop_size_seconds, ns.total_time - self._min_hop_size_seconds - hop_times[-1]) else: # It's okay if the next hop time is invalid (in which case we'll # just stop). max_offset = self._max_hop_size_seconds offset = random.uniform(self._min_hop_size_seconds, max_offset) hop_times.append(hop_times[-1] + offset) # Split at the chosen hop times (ignoring zero and the final invalid # time). sequences += sequences_lib.split_note_sequence( ns, hop_times[1:-1]) else: sequences += [ns] for performance_sequence in sequences: if self._encode_score_fns: # We need to extract a score. if not self._absolute_timing: # Beats are required to extract a score with metric timing. beats = [ ta for ta in performance_sequence.text_annotations if (ta.annotation_type == music_pb2.NoteSequence.TextAnnotation.BEAT) and ta.time <= performance_sequence.total_time ] if len(beats) < 2: print('not_enough_beats') continue # Ensure the sequence starts and ends on a beat. performance_sequence = sequences_lib.extract_subsequence( performance_sequence, start_time=min(beat.time for beat in beats), end_time=max(beat.time for beat in beats)) # Infer beat-aligned chords (only for relative timing). try: chord_inference.infer_chords_for_sequence( performance_sequence, chord_change_prob=0.25, chord_note_concentration=50.0, add_key_signatures=True) except chord_inference.ChordInferenceError: print("chord_inference_failed") continue # Infer melody regardless of relative/absolute timing. try: melody_instrument = melody_inference.infer_melody_for_sequence( performance_sequence, melody_interval_scale=2.0, rest_prob=0.1, instantaneous_non_max_pitch_prob=1e-15, instantaneous_non_empty_rest_prob=0.0, instantaneous_missing_pitch_prob=1e-15) except melody_inference.MelodyInferenceError: print('melody_inference_failed') continue if not self._absolute_timing: # Now rectify detected beats to occur at fixed tempo. # TODO(iansimon): also include the alignment score_sequence, unused_alignment = sequences_lib.rectify_beats( performance_sequence, beats_per_minute=SCORE_BPM) else: # Score uses same timing as performance. score_sequence = copy.deepcopy(performance_sequence) # Remove melody notes from performance. performance_notes = [] for note in performance_sequence.notes: if note.instrument != melody_instrument: performance_notes.append(note) del performance_sequence.notes[:] performance_sequence.notes.extend(performance_notes) # Remove non-melody notes from score. score_notes = [] for note in score_sequence.notes: if note.instrument == melody_instrument: score_notes.append(note) del score_sequence.notes[:] score_sequence.notes.extend(score_notes) # Remove key signatures and beat/chord annotations from performance. del performance_sequence.key_signatures[:] del performance_sequence.text_annotations[:] for augment_fn in self._augment_fns: # Augment and encode the performance. try: augmented_performance_sequence = augment_fn( performance_sequence) except DataAugmentationError as e: print("augment_performance_failed", e) continue example_dict = { 'targets': self._encode_performance_fn(augmented_performance_sequence) } if not example_dict['targets']: print('skipped_empty_targets') continue if (self._random_crop_length and len(example_dict['targets']) > self._random_crop_length): # Take a random crop of the encoded performance. max_offset = len( example_dict['targets']) - self._random_crop_length offset = random.randrange(max_offset + 1) example_dict['targets'] = example_dict['targets'][ offset:offset + self._random_crop_length] if self._encode_score_fns: # Augment the extracted score. try: augmented_score_sequence = augment_fn(score_sequence) except DataAugmentationError: print('augment_score_failed') continue # Apply all score encoding functions. skip = False for name, encode_score_fn in self._encode_score_fns.items( ): example_dict[name] = encode_score_fn( augmented_score_sequence) if not example_dict[name]: print('skipped_empty_%s' % name) skip = True break if skip: continue rets.append(example_dict) return rets