コード例 #1
0
def create_example(example_id, ns, wav_data, velocity_range=None):
  """Creates a tf.train.Example proto for training or testing."""
  if velocity_range is None:
    velocity_range = velocity_range_from_sequence(ns)

  # Ensure that all sequences for training and evaluation have gone through
  # sustain processing.
  sus_ns = sequences_lib.apply_sustain_control_changes(ns)

  example = tf.train.Example(
      features=tf.train.Features(
          feature={
              'id':
                  tf.train.Feature(
                      bytes_list=tf.train.BytesList(
                          value=[example_id.encode('utf-8')])),
              'sequence':
                  tf.train.Feature(
                      bytes_list=tf.train.BytesList(
                          value=[sus_ns.SerializeToString()])),
              'audio':
                  tf.train.Feature(
                      bytes_list=tf.train.BytesList(value=[wav_data])),
              'velocity_range':
                  tf.train.Feature(
                      bytes_list=tf.train.BytesList(
                          value=[velocity_range.SerializeToString()])),
          }))
  return example
コード例 #2
0
    def testSustainPipeline(self):
        note_sequence = common_testing_lib.parse_test_proto(
            music_pb2.NoteSequence, """
        time_signatures: {
          numerator: 4
          denominator: 4}
        tempos: {
          qpm: 60}""")
        testing_lib.add_track_to_sequence(note_sequence, 0,
                                          [(11, 55, 0.22, 0.50),
                                           (40, 45, 2.50, 3.50),
                                           (55, 120, 4.0, 4.01)])
        testing_lib.add_control_changes_to_sequence(note_sequence, 0,
                                                    [(0.0, 64, 127),
                                                     (0.75, 64, 0),
                                                     (2.0, 64, 127),
                                                     (3.0, 64, 0),
                                                     (3.75, 64, 127),
                                                     (4.5, 64, 127),
                                                     (4.8, 64, 0),
                                                     (4.9, 64, 127),
                                                     (6.0, 64, 0)])
        expected_sequence = sequences_lib.apply_sustain_control_changes(
            note_sequence)

        unit = note_sequence_pipelines.SustainPipeline()
        self._unit_transform_test(unit, note_sequence, [expected_sequence])
コード例 #3
0
def convert_midi_to_proto(path, dest_dir):
    midi = pretty_midi.PrettyMIDI(path)
    for i, inst in enumerate(midi.instruments):
        num_distinct_pitches = sum([i > 5 for i in inst.get_pitch_class_histogram()])
        if inst.is_drum or num_distinct_pitches < 5 or len(inst.notes) < 30:
            midi.instruments.remove(inst)
    ns = mm.midi_to_note_sequence(midi)
    ns = apply_sustain_control_changes(ns)
    del ns.control_changes[:]
    out_file = os.path.join(dest_dir, os.path.basename(path)) + '.pb'
    with open(out_file, 'wb') as f:
        f.write(ns.SerializeToString())
コード例 #4
0
ファイル: data.py プロジェクト: timgates42/magenta
 def sequence_to_pianoroll_fn(sequence_tensor, velocity_range_tensor):
   """Converts sequence to pianorolls."""
   velocity_range = music_pb2.VelocityRange.FromString(velocity_range_tensor)
   sequence = music_pb2.NoteSequence.FromString(sequence_tensor)
   sequence = sequences_lib.apply_sustain_control_changes(sequence)
   roll = sequences_lib.sequence_to_pianoroll(
       sequence,
       frames_per_second=hparams_frames_per_second(hparams),
       min_pitch=constants.MIN_MIDI_PITCH,
       max_pitch=constants.MAX_MIDI_PITCH,
       min_frame_occupancy_for_label=hparams.min_frame_occupancy_for_label,
       onset_mode=hparams.onset_mode,
       onset_length_ms=hparams.onset_length,
       offset_length_ms=hparams.offset_length,
       onset_delay_ms=hparams.onset_delay,
       min_velocity=velocity_range.min,
       max_velocity=velocity_range.max)
   return (roll.active, roll.weights, roll.onsets, roll.onset_velocities,
           roll.offsets)
コード例 #5
0
def truncate_note_sequence(sequence, truncate_secs):
    """Truncates a NoteSequence to the given length."""
    sus_sequence = sequences_lib.apply_sustain_control_changes(sequence)

    truncated_seq = music_pb2.NoteSequence()

    for note in sus_sequence.notes:
        start_time = note.start_time
        end_time = note.end_time

        if start_time > truncate_secs:
            continue

        if end_time > truncate_secs:
            end_time = truncate_secs

        modified_note = truncated_seq.notes.add()
        modified_note.MergeFrom(note)
        modified_note.start_time = start_time
        modified_note.end_time = end_time
    if truncated_seq.notes:
        truncated_seq.total_time = truncated_seq.notes[-1].end_time
    return truncated_seq
コード例 #6
0
def mix_sequences(individual_samples, sample_rate, individual_sequences):
  """Mix multiple audio/notesequence pairs together.

  All sequences will be repeated until they are as long as the longest sequence.

  Note that the mixed sequence will contain only the (sustain-processed) notes
  from the individual sequences. All other control changes and metadata will not
  be preserved.

  Args:
    individual_samples: A list of audio samples to mix.
    sample_rate: Rate at which to interpret the samples
    individual_sequences: A list of NoteSequences to mix.

  Returns:
    mixed_samples: The mixed audio.
    mixed_sequence: The mixed NoteSequence.
  """
  # Normalize samples and sequence velocities before mixing.
  # This ensures that the velocities/loudness of the individual samples
  # are treated equally.
  for i, samples in enumerate(individual_samples):
    individual_samples[i] = librosa.util.normalize(samples, norm=np.inf)
  for sequence in individual_sequences:
    velocities = [note.velocity for note in sequence.notes]
    velocity_max = np.max(velocities)
    for note in sequence.notes:
      note.velocity = int(
          (note.velocity / velocity_max) * constants.MAX_MIDI_VELOCITY)

  # Ensure that samples are always at least as long as their paired sequences.
  for i, (samples, sequence) in enumerate(
      zip(individual_samples, individual_sequences)):
    if len(samples) / sample_rate < sequence.total_time:
      padding = int(math.ceil(
          (sequence.total_time - len(samples) / sample_rate) * sample_rate))
      individual_samples[i] = np.pad(samples, [0, padding], 'constant')

  # Repeat each ns/wav pair to be as long as the longest wav.
  max_duration = np.max([len(s) for s in individual_samples]) / sample_rate

  extended_samples = []
  extended_sequences = []
  for samples, sequence in zip(individual_samples, individual_sequences):
    extended_samples.append(
        audio_io.repeat_samples_to_duration(samples, sample_rate, max_duration))
    extended_sequences.append(
        sequences_lib.repeat_sequence_to_duration(
            sequence, max_duration,
            sequence_duration=len(samples) / sample_rate))

  # Mix samples and sequences together
  mixed_samples = np.zeros_like(extended_samples[0])
  for samples in extended_samples:
    mixed_samples += samples / len(extended_samples)

  mixed_sequence = music_pb2.NoteSequence()
  mixed_sequence.ticks_per_quarter = constants.STANDARD_PPQ
  del mixed_sequence.notes[:]
  for sequence in extended_sequences:
    # Process sustain changes before copying notes.
    sus_sequence = sequences_lib.apply_sustain_control_changes(sequence)
    if sus_sequence.total_time > mixed_sequence.total_time:
      mixed_sequence.total_time = sus_sequence.total_time
    # TODO(fjord): Manage instrument/program numbers.
    mixed_sequence.notes.extend(sus_sequence.notes)

  return mixed_samples, mixed_sequence
コード例 #7
0
def find_split_points(note_sequence, samples, sample_rate, min_length,
                      max_length):
  """Returns times at which there are no notes.

  The general strategy employed is to first check if there are places in the
  sustained pianoroll where no notes are active within the max_length window;
  if so the middle of the last gap is chosen as the split point.

  If not, then it checks if there are places in the pianoroll without sustain
  where no notes are active and then finds last zero crossing of the wav file
  and chooses that as the split point.

  If neither of those is true, then it chooses the last zero crossing within
  the max_length window as the split point.

  If there are no zero crossings in the entire window, then it basically gives
  up and advances time forward by max_length.

  Args:
      note_sequence: The NoteSequence to split.
      samples: The audio file as samples.
      sample_rate: The sample rate (samples/second) of the audio file.
      min_length: Minimum number of seconds in a split.
      max_length: Maximum number of seconds in a split.

  Returns:
      A list of split points in seconds from the beginning of the file.
  """

  if not note_sequence.notes:
    return []

  end_time = note_sequence.total_time

  note_sequence_sustain = sequences_lib.apply_sustain_control_changes(
      note_sequence)

  ranges_nosustain = find_inactive_ranges(note_sequence)
  ranges_sustain = find_inactive_ranges(note_sequence_sustain)

  nosustain_starts = [x[0] for x in ranges_nosustain]
  sustain_starts = [x[0] for x in ranges_sustain]

  nosustain_ends = [x[1] for x in ranges_nosustain]
  sustain_ends = [x[1] for x in ranges_sustain]

  split_points = [0.]

  while end_time - split_points[-1] > max_length:
    max_advance = split_points[-1] + max_length

    # check for interval in sustained sequence
    pos = bisect.bisect_right(sustain_ends, max_advance)
    if pos < len(sustain_starts) and max_advance > sustain_starts[pos]:
      split_points.append(max_advance)

    # if no interval, or we didn't fit, try the unmodified sequence
    elif pos == 0 or sustain_starts[pos - 1] <= split_points[-1] + min_length:
      # no splits available, use non sustain notes and find close zero crossing
      pos = bisect.bisect_right(nosustain_ends, max_advance)

      if pos < len(nosustain_starts) and max_advance > nosustain_starts[pos]:
        # we fit, great, try to split at a zero crossing
        zxc_start = nosustain_starts[pos]
        zxc_end = max_advance
        last_zero_xing = _last_zero_crossing(
            samples, int(math.floor(zxc_start * sample_rate)),
            int(math.ceil(zxc_end * sample_rate)))
        if last_zero_xing:
          last_zero_xing = float(last_zero_xing) / sample_rate
          split_points.append(last_zero_xing)
        else:
          # give up and just return where there are at least no notes
          split_points.append(max_advance)

      else:
        # there are no good places to cut, so just pick the last zero crossing
        # check the entire valid range for zero crossings
        start_sample = int(
            math.ceil((split_points[-1] + min_length) * sample_rate)) + 1
        end_sample = start_sample + (max_length - min_length) * sample_rate
        last_zero_xing = _last_zero_crossing(samples, start_sample, end_sample)

        if last_zero_xing:
          last_zero_xing = float(last_zero_xing) / sample_rate
          split_points.append(last_zero_xing)
        else:
          # give up and advance by max amount
          split_points.append(max_advance)
    else:
      # only advance as far as max_length
      new_time = min(np.mean(ranges_sustain[pos - 1]), max_advance)
      split_points.append(new_time)

  if split_points[-1] != end_time:
    split_points.append(end_time)

  # ensure that we've generated a valid sequence of splits
  for prev, curr in zip(split_points[:-1], split_points[1:]):
    assert curr > prev
    assert curr - prev <= max_length + 1e-8
    if curr < end_time:
      assert curr - prev >= min_length - 1e-8
  assert end_time - split_points[-1] < max_length

  return split_points
コード例 #8
0
    def process(self, kv):
        # Seed random number generator based on key so that hop times are
        # deterministic.
        key, ns_str = kv
        m = hashlib.md5(key)
        random.seed(int(m.hexdigest(), 16))

        # Deserialize NoteSequence proto.
        ns = note_seq.NoteSequence.FromString(ns_str)

        # Apply sustain pedal.
        ns = sequences_lib.apply_sustain_control_changes(ns)

        # Remove control changes as there are potentially a lot of them and they are
        # no longer needed.
        del ns.control_changes[:]

        for _ in range(self._num_replications):
            for augment_fn in self._augment_fns:
                # Augment and encode the performance.
                try:
                    augmented_performance_sequence = augment_fn(ns)
                except DataAugmentationError:
                    Metrics.counter('extract_examples',
                                    'augment_performance_failed').inc()
                    continue
                seq = self._encode_performance_fn(
                    augmented_performance_sequence)
                # feed in performance as both input/output to music transformer
                # chopping sequence into length 2048 (throw out shorter sequences)
                if len(seq) >= 2048:
                    max_offset = len(seq) - 2048
                    offset = random.randrange(max_offset + 1)
                    cropped_seq = seq[offset:offset + 2048]

                    example_dict = {
                        'inputs': cropped_seq,
                        'targets': cropped_seq
                    }

                    if self._melody:
                        # decode truncated performance sequence for melody inference
                        decoded_midi = self._decode_performance_fn(cropped_seq)
                        decoded_ns = note_seq.midi_io.midi_file_to_note_sequence(
                            decoded_midi)

                        # extract melody from cropped performance sequence
                        melody_instrument = melody_inference.infer_melody_for_sequence(
                            decoded_ns,
                            melody_interval_scale=2.0,
                            rest_prob=0.1,
                            instantaneous_non_max_pitch_prob=1e-15,
                            instantaneous_non_empty_rest_prob=0.0,
                            instantaneous_missing_pitch_prob=1e-15)

                        # remove non-melody notes from score
                        score_sequence = copy.deepcopy(decoded_ns)
                        score_notes = []
                        for note in score_sequence.notes:
                            if note.instrument == melody_instrument:
                                score_notes.append(note)
                        del score_sequence.notes[:]
                        score_sequence.notes.extend(score_notes)

                        # encode melody
                        encode_score_fn = self._encode_score_fns['melody']
                        example_dict['melody'] = encode_score_fn(
                            score_sequence)
                        # make sure performance input also matches targets; needed for
                        # compatibility of both perf and (mel & perf) autoencoders

                        if self._noisy:
                            # randomly sample a pitch shift to construct noisy performance
                            all_pitches = [x.pitch for x in decoded_ns.notes]
                            min_val = min(all_pitches)
                            max_val = max(all_pitches)
                            transpose_range = range(-(min_val - 21),
                                                    108 - max_val + 1)
                            try:
                                transpose_range.remove(
                                    0)  # make sure you transpose
                            except ValueError:
                                pass
                            transpose_amount = random.choice(transpose_range)
                            augmented_ns, _ = sequences_lib.transpose_note_sequence(
                                decoded_ns,
                                transpose_amount,
                                min_allowed_pitch=21,
                                max_allowed_pitch=108,
                                in_place=False)
                            aug_seq = self._encode_performance_fn(augmented_ns)
                            example_dict['performance'] = aug_seq
                        else:
                            example_dict['performance'] = example_dict[
                                'targets']
                        del example_dict['inputs']

                    Metrics.counter('extract_examples',
                                    'encoded_example').inc()
                    Metrics.distribution(
                        'extract_examples',
                        'performance_length_in_seconds').update(
                            int(augmented_performance_sequence.total_time))

                    yield generator_utils.to_example(example_dict)
コード例 #9
0
    def process(self, kv):
        # Seed random number generator based on key so that hop times are
        # deterministic.
        key, ns_str = kv
        m = hashlib.md5(key.encode('utf-8'))
        random.seed(int(m.hexdigest(), 16))

        # Deserialize NoteSequence proto.
        ns = note_seq.NoteSequence.FromString(ns_str)

        # Apply sustain pedal.
        ns = sequences_lib.apply_sustain_control_changes(ns)

        # Remove control changes as there are potentially a lot of them and they are
        # no longer needed.
        del ns.control_changes[:]

        if (self._min_hop_size_seconds
                and ns.total_time < self._min_hop_size_seconds):
            Metrics.counter('extract_examples', 'sequence_too_short').inc()
            return

        sequences = []
        for _ in range(self._num_replications):
            if self._max_hop_size_seconds:
                if self._max_hop_size_seconds == self._min_hop_size_seconds:
                    # Split using fixed hop size.
                    sequences += sequences_lib.split_note_sequence(
                        ns, self._max_hop_size_seconds)
                else:
                    # Sample random hop positions such that each segment size is within
                    # the specified range.
                    hop_times = [0.0]
                    while hop_times[
                            -1] <= ns.total_time - self._min_hop_size_seconds:
                        if hop_times[
                                -1] + self._max_hop_size_seconds < ns.total_time:
                            # It's important that we get a valid hop size here, since the
                            # remainder of the sequence is too long.
                            max_offset = min(
                                self._max_hop_size_seconds, ns.total_time -
                                self._min_hop_size_seconds - hop_times[-1])
                        else:
                            # It's okay if the next hop time is invalid (in which case we'll
                            # just stop).
                            max_offset = self._max_hop_size_seconds
                        offset = random.uniform(self._min_hop_size_seconds,
                                                max_offset)
                        hop_times.append(hop_times[-1] + offset)
                    # Split at the chosen hop times (ignoring zero and the final invalid
                    # time).
                    sequences += sequences_lib.split_note_sequence(
                        ns, hop_times[1:-1])
            else:
                sequences += [ns]

        for performance_sequence in sequences:
            if self._encode_score_fns:
                # We need to extract a score.
                if not self._absolute_timing:
                    # Beats are required to extract a score with metric timing.
                    beats = [
                        ta for ta in performance_sequence.text_annotations
                        if ta.annotation_type == BEAT
                        and ta.time <= performance_sequence.total_time
                    ]
                    if len(beats) < 2:
                        Metrics.counter('extract_examples',
                                        'not_enough_beats').inc()
                        continue

                    # Ensure the sequence starts and ends on a beat.
                    performance_sequence = sequences_lib.extract_subsequence(
                        performance_sequence,
                        start_time=min(beat.time for beat in beats),
                        end_time=max(beat.time for beat in beats))

                    # Infer beat-aligned chords (only for relative timing).
                    try:
                        chord_inference.infer_chords_for_sequence(
                            performance_sequence,
                            chord_change_prob=0.25,
                            chord_note_concentration=50.0,
                            add_key_signatures=True)
                    except chord_inference.ChordInferenceError:
                        Metrics.counter('extract_examples',
                                        'chord_inference_failed').inc()
                        continue

                # Infer melody regardless of relative/absolute timing.
                try:
                    melody_instrument = melody_inference.infer_melody_for_sequence(
                        performance_sequence,
                        melody_interval_scale=2.0,
                        rest_prob=0.1,
                        instantaneous_non_max_pitch_prob=1e-15,
                        instantaneous_non_empty_rest_prob=0.0,
                        instantaneous_missing_pitch_prob=1e-15)
                except melody_inference.MelodyInferenceError:
                    Metrics.counter('extract_examples',
                                    'melody_inference_failed').inc()
                    continue

                if not self._absolute_timing:
                    # Now rectify detected beats to occur at fixed tempo.
                    # TODO(iansimon): also include the alignment
                    score_sequence, unused_alignment = sequences_lib.rectify_beats(
                        performance_sequence, beats_per_minute=SCORE_BPM)
                else:
                    # Score uses same timing as performance.
                    score_sequence = copy.deepcopy(performance_sequence)

                # Remove melody notes from performance.
                performance_notes = []
                for note in performance_sequence.notes:
                    if note.instrument != melody_instrument:
                        performance_notes.append(note)
                del performance_sequence.notes[:]
                performance_sequence.notes.extend(performance_notes)

                # Remove non-melody notes from score.
                score_notes = []
                for note in score_sequence.notes:
                    if note.instrument == melody_instrument:
                        score_notes.append(note)
                del score_sequence.notes[:]
                score_sequence.notes.extend(score_notes)

                # Remove key signatures and beat/chord annotations from performance.
                del performance_sequence.key_signatures[:]
                del performance_sequence.text_annotations[:]

                Metrics.counter('extract_examples', 'extracted_score').inc()

            for augment_fn in self._augment_fns:
                # Augment and encode the performance.
                try:
                    augmented_performance_sequence = augment_fn(
                        performance_sequence)
                except DataAugmentationError:
                    Metrics.counter('extract_examples',
                                    'augment_performance_failed').inc()
                    continue
                example_dict = {
                    'targets':
                    self._encode_performance_fn(augmented_performance_sequence)
                }
                if not example_dict['targets']:
                    Metrics.counter('extract_examples',
                                    'skipped_empty_targets').inc()
                    continue

                if (self._random_crop_length and len(example_dict['targets']) >
                        self._random_crop_length):
                    # Take a random crop of the encoded performance.
                    max_offset = len(
                        example_dict['targets']) - self._random_crop_length
                    offset = random.randrange(max_offset + 1)
                    example_dict['targets'] = example_dict['targets'][
                        offset:offset + self._random_crop_length]

                if self._encode_score_fns:
                    # Augment the extracted score.
                    try:
                        augmented_score_sequence = augment_fn(score_sequence)
                    except DataAugmentationError:
                        Metrics.counter('extract_examples',
                                        'augment_score_failed').inc()
                        continue

                    # Apply all score encoding functions.
                    skip = False
                    for name, encode_score_fn in self._encode_score_fns.items(
                    ):
                        example_dict[name] = encode_score_fn(
                            augmented_score_sequence)
                        if not example_dict[name]:
                            Metrics.counter('extract_examples',
                                            'skipped_empty_%s' % name).inc()
                            skip = True
                            break
                    if skip:
                        continue

                Metrics.counter('extract_examples', 'encoded_example').inc()
                Metrics.distribution(
                    'extract_examples',
                    'performance_length_in_seconds').update(
                        int(augmented_performance_sequence.total_time))

                yield generator_utils.to_example(example_dict)
コード例 #10
0
 def transform(self, note_sequence):
     return [sequences_lib.apply_sustain_control_changes(note_sequence)]
コード例 #11
0
 def transform(self, input_object):
   note_sequence = input_object
   return [sequences_lib.apply_sustain_control_changes(note_sequence)]