def create_example(example_id, ns, wav_data, velocity_range=None): """Creates a tf.train.Example proto for training or testing.""" if velocity_range is None: velocities = [note.velocity for note in ns.notes] velocity_max = np.max(velocities) velocity_min = np.min(velocities) velocity_range = music_pb2.VelocityRange(min=velocity_min, max=velocity_max) # Ensure that all sequences for training and evaluation have gone through # sustain processing. sus_ns = sequences_lib.apply_sustain_control_changes(ns) example = tf.train.Example(features=tf.train.Features( feature={ 'id': tf.train.Feature(bytes_list=tf.train.BytesList( value=[example_id.encode('utf-8')])), 'sequence': tf.train.Feature(bytes_list=tf.train.BytesList( value=[sus_ns.SerializeToString()])), 'audio': tf.train.Feature(bytes_list=tf.train.BytesList(value=[wav_data])), 'velocity_range': tf.train.Feature(bytes_list=tf.train.BytesList( value=[velocity_range.SerializeToString()])), })) return example
def velocity_range_from_sequence(ns): """Derive a VelocityRange proto from a NoteSequence.""" velocities = [note.velocity for note in ns.notes] velocity_max = np.max(velocities) if velocities else 0 velocity_min = np.min(velocities) if velocities else 0 velocity_range = music_pb2.VelocityRange(min=velocity_min, max=velocity_max) return velocity_range
def _FillExample(self, sequence, wav_data, filename): velocity_range = music_pb2.VelocityRange(min=0, max=127) feature_dict = { 'id': tf.train.Feature(bytes_list=tf.train.BytesList( value=[filename.encode('utf-8')])), 'sequence': tf.train.Feature(bytes_list=tf.train.BytesList( value=[sequence.SerializeToString()])), 'audio': tf.train.Feature(bytes_list=tf.train.BytesList(value=[wav_data])), 'velocity_range': tf.train.Feature(bytes_list=tf.train.BytesList( value=[velocity_range.SerializeToString()])), } return tf.train.Example(features=tf.train.Features( feature=feature_dict))
def convert_note_cropping_to_sequence_record(tensor, hparams): """ Convert Timbre dataset to be usable by Full Model :param note_cropping_dataset: examples are a dicts with keys: audio, note_croppings, instrument_families :return: same data type as data.parse_example() """ note_croppings = tensor['note_croppings'] instrument_families = tensor['instrument_families'] def to_sequence_fn(eager_note_croppings, eager_instrument_families): eager_note_croppings = eager_note_croppings.numpy() eager_instrument_families = eager_instrument_families.numpy() sequence = music_pb2.NoteSequence() sequence.tempos.add().qpm = 120 sequence.ticks_per_quarter = 220 distinct_families_list = [] for i in range(len(eager_note_croppings)): cropping = NoteCropping(*eager_note_croppings[i]) family = eager_instrument_families[i].argmax() if family not in distinct_families_list: distinct_families_list.append(family) note = sequence.notes.add() note.instrument = distinct_families_list.index(family) note.program = instrument_family_mappings.family_to_midi_instrument[ family] note.start_time = cropping.start_idx / hparams.sample_rate note.end_time = cropping.end_idx / hparams.sample_rate note.pitch = cropping.pitch note.velocity = 70 if note.end_time > sequence.total_time: sequence.total_time = note.end_time return sequence.SerializeToString() sequence = tf.py_function(to_sequence_fn, [note_croppings, instrument_families], tf.string) return dict(id='', sequence=sequence, audio=tensor['audio'], velocity_range=music_pb2.VelocityRange( min=0, max=100).SerializeToString())
def process_record(wav_data, ns, example_id, min_length=5, max_length=20, sample_rate=16000, allow_empty_notesequence=False, load_audio_with_librosa=False): """Split a record into chunks and create an example proto. To use the full length audio and notesequence, set min_length=0 and max_length=-1. Args: wav_data: audio data in WAV format. ns: corresponding NoteSequence. example_id: id for the example proto min_length: minimum length in seconds for audio chunks. max_length: maximum length in seconds for audio chunks. sample_rate: desired audio sample rate. allow_empty_notesequence: whether an empty NoteSequence is allowed. load_audio_with_librosa: Use librosa for sampling. Works with 24-bit wavs. Yields: Example protos. """ try: if load_audio_with_librosa: samples = audio_io.wav_data_to_samples_librosa( wav_data, sample_rate) else: samples = audio_io.wav_data_to_samples(wav_data, sample_rate) except audio_io.AudioIOReadError as e: print('Exception %s', e) return samples = librosa.util.normalize(samples, norm=np.inf) # Add padding to samples if notesequence is longer. pad_to_samples = int(math.ceil(ns.total_time * sample_rate)) padding_needed = pad_to_samples - samples.shape[0] if padding_needed > 5 * sample_rate: raise ValueError( 'Would have padded {} more than 5 seconds to match note sequence total ' 'time. ({} original samples, {} sample rate, {} sample seconds, ' '{} sequence seconds) This likely indicates a problem with the source ' 'data.'.format(example_id, samples.shape[0], sample_rate, samples.shape[0] / sample_rate, ns.total_time)) samples = np.pad(samples, (0, max(0, padding_needed)), 'constant') if max_length == min_length: splits = np.arange(0, ns.total_time, max_length) elif max_length > 0: splits = find_split_points(ns, samples, sample_rate, min_length, max_length) else: splits = [0, ns.total_time] velocities = [note.velocity for note in ns.notes] velocity_max = np.max(velocities) if velocities else 0 velocity_min = np.min(velocities) if velocities else 0 velocity_range = music_pb2.VelocityRange(min=velocity_min, max=velocity_max) for start, end in zip(splits[:-1], splits[1:]): if end - start < min_length: continue if start == 0 and end == ns.total_time: new_ns = ns else: new_ns = sequences_lib.extract_subsequence(ns, start, end) if not new_ns.notes and not allow_empty_notesequence: tf.logging.warning('skipping empty sequence') continue if start == 0 and end == ns.total_time: new_samples = samples else: # the resampling that happen in crop_wav_data is really slow # and we've already done it once, avoid doing it twice new_samples = audio_io.crop_samples(samples, sample_rate, start, end - start) new_wav_data = audio_io.samples_to_wav_data(new_samples, sample_rate) yield create_example(example_id, new_ns, new_wav_data, velocity_range=velocity_range)