def create_example(example_id, ns, wav_data, velocity_range=None):
    """Creates a tf.train.Example proto for training or testing."""
    if velocity_range is None:
        velocities = [note.velocity for note in ns.notes]
        velocity_max = np.max(velocities)
        velocity_min = np.min(velocities)
        velocity_range = music_pb2.VelocityRange(min=velocity_min,
                                                 max=velocity_max)

    # Ensure that all sequences for training and evaluation have gone through
    # sustain processing.
    sus_ns = sequences_lib.apply_sustain_control_changes(ns)

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'id':
            tf.train.Feature(bytes_list=tf.train.BytesList(
                value=[example_id.encode('utf-8')])),
            'sequence':
            tf.train.Feature(bytes_list=tf.train.BytesList(
                value=[sus_ns.SerializeToString()])),
            'audio':
            tf.train.Feature(bytes_list=tf.train.BytesList(value=[wav_data])),
            'velocity_range':
            tf.train.Feature(bytes_list=tf.train.BytesList(
                value=[velocity_range.SerializeToString()])),
        }))
    return example
def velocity_range_from_sequence(ns):
    """Derive a VelocityRange proto from a NoteSequence."""
    velocities = [note.velocity for note in ns.notes]
    velocity_max = np.max(velocities) if velocities else 0
    velocity_min = np.min(velocities) if velocities else 0
    velocity_range = music_pb2.VelocityRange(min=velocity_min, max=velocity_max)
    return velocity_range
Exemple #3
0
 def _FillExample(self, sequence, wav_data, filename):
     velocity_range = music_pb2.VelocityRange(min=0, max=127)
     feature_dict = {
         'id':
         tf.train.Feature(bytes_list=tf.train.BytesList(
             value=[filename.encode('utf-8')])),
         'sequence':
         tf.train.Feature(bytes_list=tf.train.BytesList(
             value=[sequence.SerializeToString()])),
         'audio':
         tf.train.Feature(bytes_list=tf.train.BytesList(value=[wav_data])),
         'velocity_range':
         tf.train.Feature(bytes_list=tf.train.BytesList(
             value=[velocity_range.SerializeToString()])),
     }
     return tf.train.Example(features=tf.train.Features(
         feature=feature_dict))
Exemple #4
0
def convert_note_cropping_to_sequence_record(tensor, hparams):
    """
    Convert Timbre dataset to be usable by Full Model
    :param note_cropping_dataset: examples are a dicts with keys:
    audio, note_croppings, instrument_families
    :return: same data type as data.parse_example()
    """
    note_croppings = tensor['note_croppings']
    instrument_families = tensor['instrument_families']

    def to_sequence_fn(eager_note_croppings, eager_instrument_families):
        eager_note_croppings = eager_note_croppings.numpy()
        eager_instrument_families = eager_instrument_families.numpy()
        sequence = music_pb2.NoteSequence()
        sequence.tempos.add().qpm = 120
        sequence.ticks_per_quarter = 220
        distinct_families_list = []
        for i in range(len(eager_note_croppings)):
            cropping = NoteCropping(*eager_note_croppings[i])
            family = eager_instrument_families[i].argmax()

            if family not in distinct_families_list:
                distinct_families_list.append(family)

            note = sequence.notes.add()
            note.instrument = distinct_families_list.index(family)
            note.program = instrument_family_mappings.family_to_midi_instrument[
                family]
            note.start_time = cropping.start_idx / hparams.sample_rate
            note.end_time = cropping.end_idx / hparams.sample_rate
            note.pitch = cropping.pitch
            note.velocity = 70
            if note.end_time > sequence.total_time:
                sequence.total_time = note.end_time
        return sequence.SerializeToString()

    sequence = tf.py_function(to_sequence_fn,
                              [note_croppings, instrument_families], tf.string)

    return dict(id='',
                sequence=sequence,
                audio=tensor['audio'],
                velocity_range=music_pb2.VelocityRange(
                    min=0, max=100).SerializeToString())
def process_record(wav_data,
                   ns,
                   example_id,
                   min_length=5,
                   max_length=20,
                   sample_rate=16000,
                   allow_empty_notesequence=False,
                   load_audio_with_librosa=False):
    """Split a record into chunks and create an example proto.

  To use the full length audio and notesequence, set min_length=0 and
  max_length=-1.

  Args:
    wav_data: audio data in WAV format.
    ns: corresponding NoteSequence.
    example_id: id for the example proto
    min_length: minimum length in seconds for audio chunks.
    max_length: maximum length in seconds for audio chunks.
    sample_rate: desired audio sample rate.
    allow_empty_notesequence: whether an empty NoteSequence is allowed.
    load_audio_with_librosa: Use librosa for sampling. Works with 24-bit wavs.

  Yields:
    Example protos.
  """
    try:
        if load_audio_with_librosa:
            samples = audio_io.wav_data_to_samples_librosa(
                wav_data, sample_rate)
        else:
            samples = audio_io.wav_data_to_samples(wav_data, sample_rate)
    except audio_io.AudioIOReadError as e:
        print('Exception %s', e)
        return
    samples = librosa.util.normalize(samples, norm=np.inf)

    # Add padding to samples if notesequence is longer.
    pad_to_samples = int(math.ceil(ns.total_time * sample_rate))
    padding_needed = pad_to_samples - samples.shape[0]
    if padding_needed > 5 * sample_rate:
        raise ValueError(
            'Would have padded {} more than 5 seconds to match note sequence total '
            'time. ({} original samples, {} sample rate, {} sample seconds, '
            '{} sequence seconds) This likely indicates a problem with the source '
            'data.'.format(example_id, samples.shape[0], sample_rate,
                           samples.shape[0] / sample_rate, ns.total_time))
    samples = np.pad(samples, (0, max(0, padding_needed)), 'constant')

    if max_length == min_length:
        splits = np.arange(0, ns.total_time, max_length)
    elif max_length > 0:
        splits = find_split_points(ns, samples, sample_rate, min_length,
                                   max_length)
    else:
        splits = [0, ns.total_time]
    velocities = [note.velocity for note in ns.notes]
    velocity_max = np.max(velocities) if velocities else 0
    velocity_min = np.min(velocities) if velocities else 0
    velocity_range = music_pb2.VelocityRange(min=velocity_min,
                                             max=velocity_max)

    for start, end in zip(splits[:-1], splits[1:]):
        if end - start < min_length:
            continue

        if start == 0 and end == ns.total_time:
            new_ns = ns
        else:
            new_ns = sequences_lib.extract_subsequence(ns, start, end)

        if not new_ns.notes and not allow_empty_notesequence:
            tf.logging.warning('skipping empty sequence')
            continue

        if start == 0 and end == ns.total_time:
            new_samples = samples
        else:
            # the resampling that happen in crop_wav_data is really slow
            # and we've already done it once, avoid doing it twice
            new_samples = audio_io.crop_samples(samples, sample_rate, start,
                                                end - start)
        new_wav_data = audio_io.samples_to_wav_data(new_samples, sample_rate)
        yield create_example(example_id,
                             new_ns,
                             new_wav_data,
                             velocity_range=velocity_range)