Ejemplo n.º 1
0
    def _CreateExamplesAndExpectedInputs(self, truncated_length, lengths,
                                         expected_num_inputs):
        hparams = copy.deepcopy(configs.DEFAULT_HPARAMS)
        examples = []
        expected_inputs = []

        for i, length in enumerate(lengths):
            wav_samples = np.zeros(
                (np.int((length / data.hparams_frames_per_second(hparams)) *
                        hparams.sample_rate), 1), np.float32)
            wav_data = audio_io.samples_to_wav_data(wav_samples,
                                                    hparams.sample_rate)

            num_frames = data.wav_to_num_frames(
                wav_data,
                frames_per_second=data.hparams_frames_per_second(hparams))

            seq = self._SyntheticSequence(
                num_frames / data.hparams_frames_per_second(hparams),
                i + constants.MIN_MIDI_PITCH)

            examples.append(self._FillExample(seq, wav_data, 'ex%d' % i))
            expected_inputs += self._ExampleToInputs(examples[-1],
                                                     truncated_length)
        self.assertEqual(expected_num_inputs, len(expected_inputs))
        return examples, expected_inputs
def generate_test_set():
    """Generate the test TFRecord."""
    test_file_pairs = []
    for directory in test_dirs:
        path = os.path.join(FLAGS.input_dir, directory)
        path = os.path.join(path, '*.wav')
        wav_files = glob.glob(path)
        # find matching mid files
        for wav_file in wav_files:
            base_name_root, _ = os.path.splitext(wav_file)
            mid_file = base_name_root + '.mid'
            test_file_pairs.append((wav_file, mid_file))

    test_output_name = os.path.join(FLAGS.output_dir,
                                    'maps_config2_test.tfrecord')

    with tf.python_io.TFRecordWriter(test_output_name) as writer:
        for idx, pair in enumerate(test_file_pairs):
            print('{} of {}: {}'.format(idx, len(test_file_pairs), pair[0]))
            # load the wav data and resample it.
            samples = audio_io.load_audio(pair[0], FLAGS.sample_rate)
            wav_data = audio_io.samples_to_wav_data(samples, FLAGS.sample_rate)

            # load the midi data and convert to a notesequence
            ns = midi_io.midi_file_to_note_sequence(pair[1])

            example = audio_label_data_utils.create_example(
                pair[0], ns, wav_data)
            writer.write(example.SerializeToString())

    return [filename_to_id(wav) for wav, _ in test_file_pairs]
Ejemplo n.º 3
0
def mix_examples(mixid_exs, sample_rate, load_audio_with_librosa):
    """Mix several Examples together to create a new example."""
    mixid, exs = mixid_exs
    del mixid

    example_samples = []
    example_sequences = []

    for ex_str in exs:
        ex = tf.train.Example.FromString(ex_str)
        wav_data = ex.features.feature['audio'].bytes_list.value[0]
        if load_audio_with_librosa:
            samples = audio_io.wav_data_to_samples_librosa(
                wav_data, sample_rate)
        else:
            samples = audio_io.wav_data_to_samples(wav_data, sample_rate)
        example_samples.append(samples)
        ns = music_pb2.NoteSequence.FromString(
            ex.features.feature['sequence'].bytes_list.value[0])
        example_sequences.append(ns)

    mixed_samples, mixed_sequence = audio_label_data_utils.mix_sequences(
        individual_samples=example_samples,
        sample_rate=sample_rate,
        individual_sequences=example_sequences)

    mixed_wav_data = audio_io.samples_to_wav_data(mixed_samples, sample_rate)

    mixed_id = '::'.join(['mixed'] + [ns.id for ns in example_sequences])
    mixed_sequence.id = mixed_id
    mixed_filename = '::'.join(['mixed'] +
                               [ns.filename for ns in example_sequences])
    mixed_sequence.filename = mixed_filename

    examples = list(
        audio_label_data_utils.process_record(mixed_wav_data,
                                              mixed_sequence,
                                              mixed_id,
                                              min_length=0,
                                              max_length=-1,
                                              sample_rate=sample_rate))
    assert len(examples) == 1
    return examples[0]
Ejemplo n.º 4
0
def process_record(wav_data,
                   ns,
                   example_id,
                   min_length=5,
                   max_length=20,
                   sample_rate=16000,
                   allow_empty_notesequence=False,
                   load_audio_with_librosa=False):
  """Split a record into chunks and create an example proto.

  To use the full length audio and notesequence, set min_length=0 and
  max_length=-1.

  Args:
    wav_data: audio data in WAV format.
    ns: corresponding NoteSequence.
    example_id: id for the example proto
    min_length: minimum length in seconds for audio chunks.
    max_length: maximum length in seconds for audio chunks.
    sample_rate: desired audio sample rate.
    allow_empty_notesequence: whether an empty NoteSequence is allowed.
    load_audio_with_librosa: Use librosa for sampling. Works with 24-bit wavs.

  Yields:
    Example protos.
  """
  try:
    if load_audio_with_librosa:
      samples = audio_io.wav_data_to_samples_librosa(wav_data, sample_rate)
    else:
      samples = audio_io.wav_data_to_samples(wav_data, sample_rate)
  except audio_io.AudioIOReadError as e:
    print('Exception %s', e)
    return
  samples = librosa.util.normalize(samples, norm=np.inf)

  # Add padding to samples if notesequence is longer.
  pad_to_samples = int(math.ceil(ns.total_time * sample_rate))
  padding_needed = pad_to_samples - samples.shape[0]
  if padding_needed > 5 * sample_rate:
    raise ValueError(
        'Would have padded {} more than 5 seconds to match note sequence total '
        'time. ({} original samples, {} sample rate, {} sample seconds, '
        '{} sequence seconds) This likely indicates a problem with the source '
        'data.'.format(
            example_id, samples.shape[0], sample_rate,
            samples.shape[0] / sample_rate, ns.total_time))
  samples = np.pad(samples, (0, max(0, padding_needed)), 'constant')

  if max_length == min_length:
    splits = np.arange(0, ns.total_time, max_length)
  elif max_length > 0:
    splits = find_split_points(ns, samples, sample_rate, min_length, max_length)
  else:
    splits = [0, ns.total_time]
  velocity_range = velocity_range_from_sequence(ns)

  for start, end in zip(splits[:-1], splits[1:]):
    if end - start < min_length:
      continue

    if start == 0 and end == ns.total_time:
      new_ns = ns
    else:
      new_ns = sequences_lib.extract_subsequence(ns, start, end)

    if not new_ns.notes and not allow_empty_notesequence:
      tf.logging.warning('skipping empty sequence')
      continue

    if start == 0 and end == ns.total_time:
      new_samples = samples
    else:
      # the resampling that happen in crop_wav_data is really slow
      # and we've already done it once, avoid doing it twice
      new_samples = audio_io.crop_samples(samples, sample_rate, start,
                                          end - start)
    new_wav_data = audio_io.samples_to_wav_data(new_samples, sample_rate)
    yield create_example(
        example_id, new_ns, new_wav_data, velocity_range=velocity_range)
 def _CreateSyntheticExample(self):
     sequence = self._CreateSyntheticSequence()
     wav_samples = np.zeros(9 * SAMPLE_RATE, np.float32)
     wav_data = audio_io.samples_to_wav_data(wav_samples, SAMPLE_RATE)
     return wav_data, sequence