Ejemplo n.º 1
0
def process_record(wav_data,
                   ns,
                   example_id,
                   min_length=5,
                   max_length=20,
                   sample_rate=16000,
                   allow_empty_notesequence=False):
  """Split a record into chunks and create an example proto.

  To use the full length audio and notesequence, set min_length=0 and
  max_length=-1.

  Args:
    wav_data: audio data in WAV format.
    ns: corresponding NoteSequence.
    example_id: id for the example proto
    min_length: minimum length in seconds for audio chunks.
    max_length: maximum length in seconds for audio chunks.
    sample_rate: desired audio sample rate.
    allow_empty_notesequence: whether an empty NoteSequence is allowed.

  Yields:
    Example protos.
  """
  samples = audio_io.wav_data_to_samples(wav_data, sample_rate)
  samples = librosa.util.normalize(samples, norm=np.inf)
  if max_length == min_length:
    splits = np.arange(0, ns.total_time, max_length)
  elif max_length > 0:
    splits = find_split_points(ns, samples, sample_rate, min_length, max_length)
  else:
    splits = [0, ns.total_time]
  velocities = [note.velocity for note in ns.notes]
  velocity_max = np.max(velocities) if velocities else 0
  velocity_min = np.min(velocities) if velocities else 0
  velocity_range = music_pb2.VelocityRange(min=velocity_min, max=velocity_max)

  for start, end in zip(splits[:-1], splits[1:]):
    if end - start < min_length:
      continue

    if start == 0 and end == ns.total_time:
      new_ns = ns
    else:
      new_ns = sequences_lib.extract_subsequence(ns, start, end)

    if not new_ns.notes and not allow_empty_notesequence:
      tf.logging.warning('skipping empty sequence')
      continue

    if start == 0 and end == ns.total_time:
      new_samples = samples
    else:
      # the resampling that happen in crop_wav_data is really slow
      # and we've already done it once, avoid doing it twice
      new_samples = audio_io.crop_samples(samples, sample_rate, start,
                                          end - start)
    new_wav_data = audio_io.samples_to_wav_data(new_samples, sample_rate)
    yield create_example(
        example_id, new_ns, new_wav_data, velocity_range=velocity_range)
Ejemplo n.º 2
0
def split2batch(audio, sequence):
    from magenta.models.onsets_frames_transcription.audio_label_data_utils import find_split_points
    pad_num = int(math.ceil(
        sequence.total_time * cfg.SAMPLE_RATE)) - audio.shape[0]
    if pad_num > 0:
        audio = np.concatenate((audio, np.zeros((pad_num), dtype=audio.dtype)))

    splits = [0, sequence.total_time] if cfg.MAX_SPLIT_LENGTH == 0 else \
        find_split_points(sequence, audio, cfg.SAMPLE_RATE, cfg.MIN_SPLIT_LENGTH, cfg.MAX_SPLIT_LENGTH)

    samples = []
    for start, end in zip(splits[:-1], splits[1:]):
        if end - start < cfg.MIN_SPLIT_LENGTH:
            continue

        split_audio, split_seq = audio, sequence
        if not (start == 0 and end == sequence.total_time):
            split_seq = sequences_lib.extract_subsequence(sequence, start, end)
        split_audio = audio_io.crop_samples(audio, cfg.SAMPLE_RATE, start,
                                            end - start)
        pad_num = int(math.ceil(
            cfg.MAX_SPLIT_LENGTH * cfg.SAMPLE_RATE)) - split_audio.shape[0]
        if pad_num > 0:
            split_audio = np.concatenate(
                (split_audio, np.zeros((pad_num), dtype=split_audio.dtype)))

        samples.append((split_audio, split_seq))

    return samples
Ejemplo n.º 3
0
def process_record(wav_data,
                   ns,
                   example_id,
                   min_length=5,
                   max_length=20,
                   sample_rate=16000,
                   allow_empty_notesequence=False,
                   load_audio_with_librosa=False):
    """Split a record into chunks and create an example proto.

  To use the full length audio and notesequence, set min_length=0 and
  max_length=-1.

  Args:
    wav_data: audio data in WAV format.
    ns: corresponding NoteSequence.
    example_id: id for the example proto
    min_length: minimum length in seconds for audio chunks.
    max_length: maximum length in seconds for audio chunks.
    sample_rate: desired audio sample rate.
    allow_empty_notesequence: whether an empty NoteSequence is allowed.
    load_audio_with_librosa: Use librosa for sampling. Works with 24-bit wavs.

  Yields:
    Example protos.
  """
    try:
        if load_audio_with_librosa:
            samples = audio_io.wav_data_to_samples_librosa(
                wav_data, sample_rate)
        else:
            samples = audio_io.wav_data_to_samples(wav_data, sample_rate)
    except audio_io.AudioIOReadError as e:
        print('Exception %s', e)
        return
    samples = librosa.util.normalize(samples, norm=np.inf)

    # Add padding to samples if notesequence is longer.
    pad_to_samples = int(math.ceil(ns.total_time * sample_rate))
    padding_needed = pad_to_samples - samples.shape[0]
    if padding_needed > 5 * sample_rate:
        raise ValueError(
            'Would have padded {} more than 5 seconds to match note sequence total '
            'time. ({} original samples, {} sample rate, {} sample seconds, '
            '{} sequence seconds) This likely indicates a problem with the source '
            'data.'.format(example_id, samples.shape[0], sample_rate,
                           samples.shape[0] / sample_rate, ns.total_time))
    samples = np.pad(samples, (0, max(0, padding_needed)), 'constant')

    if max_length == min_length:
        splits = np.arange(0, ns.total_time, max_length)
    elif max_length > 0:
        splits = find_split_points(ns, samples, sample_rate, min_length,
                                   max_length)
    else:
        splits = [0, ns.total_time]
    velocities = [note.velocity for note in ns.notes]
    velocity_max = np.max(velocities) if velocities else 0
    velocity_min = np.min(velocities) if velocities else 0
    velocity_range = music_pb2.VelocityRange(min=velocity_min,
                                             max=velocity_max)

    for start, end in zip(splits[:-1], splits[1:]):
        if end - start < min_length:
            continue

        if start == 0 and end == ns.total_time:
            new_ns = ns
        else:
            new_ns = sequences_lib.extract_subsequence(ns, start, end)

        if not new_ns.notes and not allow_empty_notesequence:
            tf.logging.warning('skipping empty sequence')
            continue

        if start == 0 and end == ns.total_time:
            new_samples = samples
        else:
            # the resampling that happen in crop_wav_data is really slow
            # and we've already done it once, avoid doing it twice
            new_samples = audio_io.crop_samples(samples, sample_rate, start,
                                                end - start)
        new_wav_data = audio_io.samples_to_wav_data(new_samples, sample_rate)
        yield create_example(example_id,
                             new_ns,
                             new_wav_data,
                             velocity_range=velocity_range)