def create_example(example_id, ns, wav_data, velocity_range=None): """Creates a tf.train.Example proto for training or testing.""" if velocity_range is None: velocities = [note.velocity for note in ns.notes] velocity_max = np.max(velocities) velocity_min = np.min(velocities) velocity_range = music_pb2.VelocityRange(min=velocity_min, max=velocity_max) # Ensure that all sequences for training and evaluation have gone through # sustain processing. sus_ns = sequences_lib.apply_sustain_control_changes(ns) example = tf.train.Example(features=tf.train.Features( feature={ 'id': tf.train.Feature(bytes_list=tf.train.BytesList( value=[example_id.encode('utf-8')])), 'sequence': tf.train.Feature(bytes_list=tf.train.BytesList( value=[sus_ns.SerializeToString()])), 'audio': tf.train.Feature(bytes_list=tf.train.BytesList(value=[wav_data])), 'velocity_range': tf.train.Feature(bytes_list=tf.train.BytesList( value=[velocity_range.SerializeToString()])), })) return example
def create_example(example_id, ns, wav_data, velocity_range=None): """Creates a tf.train.Example proto for training or testing.""" if velocity_range is None: velocities = [note.velocity for note in ns.notes] velocity_max = np.max(velocities) velocity_min = np.min(velocities) velocity_range = music_pb2.VelocityRange(min=velocity_min, max=velocity_max) example = tf.train.Example( features=tf.train.Features( feature={ 'id': tf.train.Feature( bytes_list=tf.train.BytesList( value=[example_id.encode('utf-8')])), 'sequence': tf.train.Feature( bytes_list=tf.train.BytesList( value=[ns.SerializeToString()])), 'audio': tf.train.Feature( bytes_list=tf.train.BytesList(value=[wav_data])), 'velocity_range': tf.train.Feature( bytes_list=tf.train.BytesList( value=[velocity_range.SerializeToString()])), })) return example
def create_example(self, filename, hparams): """Processes an audio file into an Example proto.""" wav_data = librosa.core.load(filename, sr=hparams.sample_rate)[0] if hparams.normalize_audio: audio_io.normalize_wav_data(wav_data, hparams.sample_rate) wav_data = audio_io.samples_to_wav_data(wav_data, hparams.sample_rate) example = tf.train.Example(features=tf.train.Features( feature={ 'id': tf.train.Feature(bytes_list=tf.train.BytesList( value=[filename.encode('utf-8')])), 'sequence': tf.train.Feature(bytes_list=tf.train.BytesList( value=[music_pb2.NoteSequence().SerializeToString()])), 'audio': tf.train.Feature(bytes_list=tf.train.BytesList( value=[wav_data])), 'velocity_range': tf.train.Feature(bytes_list=tf.train.BytesList( value=[music_pb2.VelocityRange().SerializeToString()])), })) return example.SerializeToString()
def process_record(wav_data, ns, example_id, min_length=5, max_length=20, sample_rate=16000, allow_empty_notesequence=False): """Split a record into chunks and create an example proto. To use the full length audio and notesequence, set min_length=0 and max_length=-1. Args: wav_data: audio data in WAV format. ns: corresponding NoteSequence. example_id: id for the example proto min_length: minimum length in seconds for audio chunks. max_length: maximum length in seconds for audio chunks. sample_rate: desired audio sample rate. allow_empty_notesequence: whether an empty NoteSequence is allowed. Yields: Example protos. """ samples = audio_io.wav_data_to_samples(wav_data, sample_rate) samples = librosa.util.normalize(samples, norm=np.inf) if max_length == min_length: splits = np.arange(0, ns.total_time, max_length) elif max_length > 0: splits = find_split_points(ns, samples, sample_rate, min_length, max_length) else: splits = [0, ns.total_time] velocities = [note.velocity for note in ns.notes] velocity_max = np.max(velocities) if velocities else 0 velocity_min = np.min(velocities) if velocities else 0 velocity_range = music_pb2.VelocityRange(min=velocity_min, max=velocity_max) for start, end in zip(splits[:-1], splits[1:]): if end - start < min_length: continue if start == 0 and end == ns.total_time: new_ns = ns else: new_ns = sequences_lib.extract_subsequence(ns, start, end) if not new_ns.notes and not allow_empty_notesequence: tf.logging.warning('skipping empty sequence') continue if start == 0 and end == ns.total_time: new_samples = samples else: # the resampling that happen in crop_wav_data is really slow # and we've already done it once, avoid doing it twice new_samples = audio_io.crop_samples(samples, sample_rate, start, end - start) new_wav_data = audio_io.samples_to_wav_data(new_samples, sample_rate) yield create_example( example_id, new_ns, new_wav_data, velocity_range=velocity_range)
def generate_test_set(): """Generate the test TFRecord.""" test_file_pairs = [] for directory in test_dirs: path = os.path.join(FLAGS.input_dir, directory) path = os.path.join(path, '*.wav') wav_files = glob.glob(path) # find matching mid files for wav_file in wav_files: base_name_root, _ = os.path.splitext(wav_file) mid_file = base_name_root + '.mid' test_file_pairs.append((wav_file, mid_file)) test_output_name = os.path.join(FLAGS.output_dir, 'maps_config2_test.tfrecord') with tf.python_io.TFRecordWriter(test_output_name) as writer: for pair in test_file_pairs: print(pair) # load the wav data and resample it. samples = audio_io.load_audio(pair[0], FLAGS.sample_rate) wav_data = audio_io.samples_to_wav_data(samples, FLAGS.sample_rate) # load the midi data and convert to a notesequence midi_data = tf.gfile.Open(pair[1]).read() ns = midi_io.midi_to_sequence_proto(midi_data) velocities = [note.velocity for note in ns.notes] velocity_max = np.max(velocities) velocity_min = np.min(velocities) new_velocity_tuple = music_pb2.VelocityRange( min=velocity_min, max=velocity_max) example = tf.train.Example(features=tf.train.Features(feature={ 'id': tf.train.Feature(bytes_list=tf.train.BytesList( value=[pair[0]] )), 'sequence': tf.train.Feature(bytes_list=tf.train.BytesList( value=[ns.SerializeToString()] )), 'audio': tf.train.Feature(bytes_list=tf.train.BytesList( value=[wav_data] )), 'velocity_range': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_velocity_tuple.SerializeToString()] )), })) writer.write(example.SerializeToString()) return [filename_to_id(wav) for wav, _ in test_file_pairs]
def _FillExample(self, sequence, audio, filename): velocity_range = music_pb2.VelocityRange(min=0, max=127) feature_dict = { 'id': tf.train.Feature(bytes_list=tf.train.BytesList( value=[filename.encode('utf-8')])), 'sequence': tf.train.Feature(bytes_list=tf.train.BytesList( value=[sequence.SerializeToString()])), 'audio': tf.train.Feature(bytes_list=tf.train.BytesList(value=[audio])), 'velocity_range': tf.train.Feature(bytes_list=tf.train.BytesList( value=[velocity_range.SerializeToString()])), } return tf.train.Example(features=tf.train.Features( feature=feature_dict))
librosa.core.load(fn, sr=hparams.sample_rate)[0]), hparams.sample_rate) example = tf.train.Example(features=tf.train.Features( feature={ 'id': tf.train.Feature(bytes_list=tf.train.BytesList( value=[fn.encode('utf-8')])), 'sequence': tf.train.Feature(bytes_list=tf.train.BytesList( value=[music_pb2.NoteSequence().SerializeToString()])), 'audio': tf.train.Feature(bytes_list=tf.train.BytesList(value=[wav_data])), 'velocity_range': tf.train.Feature(bytes_list=tf.train.BytesList( value=[music_pb2.VelocityRange().SerializeToString()])), })) to_process.append(example.SerializeToString()) print('Processing complete for', fn) session.run(iterator.initializer, {examples: to_process}) filenames, frame_logits, onset_logits, velocity_values = session.run([ batch.filenames, frame_probs_flat, onset_probs_flat, velocity_values_flat ]) print('Inference complete for', filenames[0]) frame_predictions = frame_logits > .5 onset_predictions = onset_logits > .5
def generate_train_set(exclude_ids): """Generate the train TFRecord.""" train_file_pairs = [] for directory in train_dirs: path = os.path.join(FLAGS.input_dir, directory) path = os.path.join(path, '*.wav') wav_files = glob.glob(path) # find matching mid files for wav_file in wav_files: base_name_root, _ = os.path.splitext(wav_file) mid_file = base_name_root + '.mid' if filename_to_id(wav_file) not in exclude_ids: train_file_pairs.append((wav_file, mid_file)) train_output_name = os.path.join(FLAGS.output_dir, 'maps_config2_train.tfrecord') with tf.python_io.TFRecordWriter(train_output_name) as writer: for pair in train_file_pairs: print(pair) # load the wav data wav_data = tf.gfile.Open(pair[0]).read() samples = audio_io.wav_data_to_samples(wav_data, FLAGS.sample_rate) samples = librosa.util.normalize(samples, norm=np.inf) # load the midi data and convert to a notesequence midi_data = tf.gfile.Open(pair[1]).read() ns = midi_io.midi_to_sequence_proto(midi_data) splits = find_split_points(ns, samples, FLAGS.sample_rate, FLAGS.min_length, FLAGS.max_length) velocities = [note.velocity for note in ns.notes] velocity_max = np.max(velocities) velocity_min = np.min(velocities) new_velocity_tuple = music_pb2.VelocityRange( min=velocity_min, max=velocity_max) for start, end in zip(splits[:-1], splits[1:]): if end - start < FLAGS.min_length: continue new_ns = sequences_lib.extract_subsequence(ns, start, end) new_wav_data = audio_io.crop_wav_data(wav_data, FLAGS.sample_rate, start, end - start) example = tf.train.Example(features=tf.train.Features(feature={ 'id': tf.train.Feature(bytes_list=tf.train.BytesList( value=[pair[0]] )), 'sequence': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_ns.SerializeToString()] )), 'audio': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_wav_data] )), 'velocity_range': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_velocity_tuple.SerializeToString()] )), })) writer.write(example.SerializeToString())