def _ExampleToInputs(self, ex, truncated_length=0, crop_training_sequence_to_notes=False): hparams = copy.deepcopy(constants.DEFAULT_HPARAMS) hparams.crop_training_sequence_to_notes = crop_training_sequence_to_notes filename = ex.features.feature['id'].bytes_list.value[0] sequence, crop_beginning_seconds = data.preprocess_sequence( ex.features.feature['sequence'].bytes_list.value[0], hparams) wav_data = ex.features.feature['audio'].bytes_list.value[0] if crop_training_sequence_to_notes: wav_data = audio_io.crop_wav_data(wav_data, hparams.sample_rate, crop_beginning_seconds, sequence.total_time) spec = data.wav_to_spec(wav_data, hparams=hparams) roll = sequences_lib.sequence_to_pianoroll( sequence, frames_per_second=data.hparams_frames_per_second(hparams), min_pitch=constants.MIN_MIDI_PITCH, max_pitch=constants.MAX_MIDI_PITCH, min_frame_occupancy_for_label=0.0, onset_mode='length_ms', onset_length_ms=32., onset_delay_ms=0.) length = data.wav_to_num_frames( wav_data, frames_per_second=data.hparams_frames_per_second(hparams)) return self._DataToInputs(spec, roll.active, roll.weights, length, filename, truncated_length)
def generate_train_set(): """Generate the train TFRecord.""" train_file_pairs = [] for directory in train_dirs: path = os.path.join(FLAGS.input_dir, directory) path = os.path.join(path, '*.wav') wav_files = glob.glob(path) # find matching mid files for wav_file in wav_files: base_name_root, _ = os.path.splitext(wav_file) mid_file = base_name_root + '.mid' train_file_pairs.append((wav_file, mid_file)) train_output_name = os.path.join(FLAGS.output_dir, 'maps_config2_train.tfrecord') with tf.python_io.TFRecordWriter(train_output_name) as writer: for pair in train_file_pairs: print(pair) # load the wav data wav_data = tf.gfile.Open(pair[0]).read() samples = audio_io.wav_data_to_samples(wav_data, FLAGS.sample_rate) # load the midi data and convert to a notesequence midi_data = tf.gfile.Open(pair[1]).read() ns = midi_io.midi_to_sequence_proto(midi_data) splits = find_split_points(ns, samples, FLAGS.sample_rate, FLAGS.min_length, FLAGS.max_length) for start, end in zip(splits[:-1], splits[1:]): if end - start < FLAGS.min_length: continue new_ns = sequences_lib.extract_subsequence(ns, start, end) new_wav_data = audio_io.crop_wav_data(wav_data, FLAGS.sample_rate, start, end - start) example = tf.train.Example(features=tf.train.Features(feature={ 'id': tf.train.Feature(bytes_list=tf.train.BytesList( value=[pair[0]] )), 'sequence': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_ns.SerializeToString()] )), 'audio': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_wav_data] )) })) writer.write(example.SerializeToString())
def generate_train_set(): """Generate the train TFRecord.""" train_file_pairs = [] for directory in train_dirs: path = os.path.join(FLAGS.input_dir, directory) path = os.path.join(path, '*.wav') wav_files = glob.glob(path) # find matching mid files for wav_file in wav_files: base_name_root, _ = os.path.splitext(wav_file) mid_file = base_name_root + '.mid' train_file_pairs.append((wav_file, mid_file)) train_output_name = os.path.join(FLAGS.output_dir, 'maps_config2_train.tfrecord') with tf.python_io.TFRecordWriter(train_output_name) as writer: for pair in train_file_pairs: print(pair) # load the wav data wav_data = tf.gfile.Open(pair[0]).read() samples = audio_io.wav_data_to_samples(wav_data, FLAGS.sample_rate) # load the midi data and convert to a notesequence midi_data = tf.gfile.Open(pair[1]).read() ns = midi_io.midi_to_sequence_proto(midi_data) splits = find_split_points(ns, samples, FLAGS.sample_rate, FLAGS.min_length, FLAGS.max_length) for start, end in zip(splits[:-1], splits[1:]): if end - start < FLAGS.min_length: continue new_ns = sequences_lib.extract_subsequence(ns, start, end) new_wav_data = audio_io.crop_wav_data(wav_data, FLAGS.sample_rate, start, end - start) example = tf.train.Example(features=tf.train.Features( feature={ 'id': tf.train.Feature(bytes_list=tf.train.BytesList( value=[pair[0]])), 'sequence': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_ns.SerializeToString()])), 'audio': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_wav_data])) })) writer.write(example.SerializeToString())
def transform_wav_data(wav_data, sequence_tensor): """Transforms with sox.""" sequence, cropped_beginning_seconds = preprocess_sequence( sequence_tensor, hparams) # Only do audio transformations during training. if is_training: wav_data = audio_io.jitter_wav_data(wav_data, hparams.sample_rate, jitter_amount_sec) wav_data = audio_transform.transform_wav_audio(wav_data, hparams) # If requested, crop wav. if hparams.crop_training_sequence_to_notes: wav_data = audio_io.crop_wav_data(wav_data, hparams.sample_rate, cropped_beginning_seconds, sequence.total_time) # Normalize. if hparams.normalize_audio: wav_data = audio_io.normalize_wav_data(wav_data, hparams.sample_rate) return [wav_data]
def generate_train_set(exclude_ids): """Generate the train TFRecord.""" train_file_pairs = [] for directory in train_dirs: path = os.path.join(FLAGS.input_dir, directory) path = os.path.join(path, '*.wav') wav_files = glob.glob(path) # find matching mid files for wav_file in wav_files: base_name_root, _ = os.path.splitext(wav_file) mid_file = base_name_root + '.mid' if filename_to_id(wav_file) not in exclude_ids: train_file_pairs.append((wav_file, mid_file)) train_output_name = os.path.join(FLAGS.output_dir, 'maps_config2_train.tfrecord') with tf.python_io.TFRecordWriter(train_output_name) as writer: for pair in train_file_pairs: print(pair) # load the wav data wav_data = tf.gfile.Open(pair[0], 'rb').read() samples = audio_io.wav_data_to_samples(wav_data, FLAGS.sample_rate) samples = librosa.util.normalize(samples, norm=np.inf) # load the midi data and convert to a notesequence ns = midi_io.midi_file_to_note_sequence(pair[1]) splits = find_split_points(ns, samples, FLAGS.sample_rate, FLAGS.min_length, FLAGS.max_length) velocities = [note.velocity for note in ns.notes] velocity_max = np.max(velocities) velocity_min = np.min(velocities) new_velocity_tuple = music_pb2.VelocityRange( min=velocity_min, max=velocity_max) for start, end in zip(splits[:-1], splits[1:]): if end - start < FLAGS.min_length: continue new_ns = sequences_lib.extract_subsequence(ns, start, end) new_wav_data = audio_io.crop_wav_data(wav_data, FLAGS.sample_rate, start, end - start) example = tf.train.Example(features=tf.train.Features(feature={ 'id': tf.train.Feature(bytes_list=tf.train.BytesList( value=[pair[0]] )), 'sequence': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_ns.SerializeToString()] )), 'audio': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_wav_data] )), 'velocity_range': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_velocity_tuple.SerializeToString()] )), })) writer.write(example.SerializeToString())
def generate_train_set(exclude_ids): """Generate the train TFRecord.""" train_file_pairs = [] for directory in train_dirs: path = os.path.join(FLAGS.input_dir, directory) path = os.path.join(path, '*.wav') wav_files = glob.glob(path) # find matching mid files for wav_file in wav_files: base_name_root, _ = os.path.splitext(wav_file) mid_file = base_name_root + '.mid' if filename_to_id(wav_file) not in exclude_ids: train_file_pairs.append((wav_file, mid_file)) train_output_name = os.path.join(FLAGS.output_dir, 'maps_config2_train.tfrecord') with tf.python_io.TFRecordWriter(train_output_name) as writer: for pair in train_file_pairs: print(pair) # load the wav data wav_data = tf.gfile.Open(pair[0], 'rb').read() samples = audio_io.wav_data_to_samples(wav_data, FLAGS.sample_rate) samples = librosa.util.normalize(samples, norm=np.inf) # load the midi data and convert to a notesequence ns = midi_io.midi_file_to_note_sequence(pair[1]) splits = find_split_points(ns, samples, FLAGS.sample_rate, FLAGS.min_length, FLAGS.max_length) velocities = [note.velocity for note in ns.notes] velocity_max = np.max(velocities) velocity_min = np.min(velocities) new_velocity_tuple = music_pb2.VelocityRange(min=velocity_min, max=velocity_max) for start, end in zip(splits[:-1], splits[1:]): if end - start < FLAGS.min_length: continue new_ns = sequences_lib.extract_subsequence(ns, start, end) new_wav_data = audio_io.crop_wav_data(wav_data, FLAGS.sample_rate, start, end - start) example = tf.train.Example(features=tf.train.Features( feature={ 'id': tf.train.Feature(bytes_list=tf.train.BytesList( value=[pair[0]])), 'sequence': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_ns.SerializeToString()])), 'audio': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_wav_data])), 'velocity_range': tf.train.Feature(bytes_list=tf.train.BytesList( value=[new_velocity_tuple.SerializeToString()])), })) writer.write(example.SerializeToString())