Ejemplo n.º 1
0
def create_song_prototype(song_path,
                          start_time,
                          stop_time,
                          model_used='attention_rnn',
                          temperature=1.0):
    magenta_model_path = '%s/magenta_models/%s.mag' % (MEDIA_ROOT, model_used)
    bundle = mm.sequence_generator_bundle.read_bundle_file(magenta_model_path)
    generator_map = melody_rnn_sequence_generator.get_generator_map()
    melody_rnn = generator_map[model_used](checkpoint=None, bundle=bundle)
    melody_rnn.initialize()

    base_sequence = midi_file_to_note_sequence(song_path)
    target_sequence = extract_subsequence(base_sequence, start_time, stop_time)

    generator_options = generator_pb2.GeneratorOptions()
    generator_options.args['temperature'].float_value = temperature
    generator_options.generate_sections.add(
        start_time=target_sequence.total_time,
        end_time=2 * target_sequence.total_time)
    generated_sequence = melody_rnn.generate(target_sequence,
                                             generator_options)

    proceed_sequence = extract_subsequence(generated_sequence,
                                           target_sequence.total_time,
                                           2 * target_sequence.total_time)

    return proceed_sequence
Ejemplo n.º 2
0
  def testExtractSubsequencePastEnd(self):
    sequence = copy.copy(self.note_sequence)
    testing_lib.add_track_to_sequence(
        sequence, 0,
        [(12, 100, 0.01, 10.0), (11, 55, 0.22, 0.50), (40, 45, 2.50, 3.50),
         (55, 120, 4.0, 4.01), (52, 99, 4.75, 5.0)])
    testing_lib.add_chords_to_sequence(
        sequence, [('C', 1.5), ('G7', 3.0), ('F', 18.0)])

    with self.assertRaises(ValueError):
      sequences_lib.extract_subsequence(sequence, 15.0, 16.0)
Ejemplo n.º 3
0
  def testExtractSubsequencePastEnd(self):
    sequence = copy.copy(self.note_sequence)
    testing_lib.add_track_to_sequence(
        sequence, 0,
        [(12, 100, 0.01, 10.0), (11, 55, 0.22, 0.50), (40, 45, 2.50, 3.50),
         (55, 120, 4.0, 4.01), (52, 99, 4.75, 5.0)])
    testing_lib.add_chords_to_sequence(
        sequence, [('C', 1.5), ('G7', 3.0), ('F', 18.0)])

    with self.assertRaises(ValueError):
      sequences_lib.extract_subsequence(sequence, 15.0, 16.0)
Ejemplo n.º 4
0
def split2batch(audio, sequence):
    from magenta.models.onsets_frames_transcription.audio_label_data_utils import find_split_points
    pad_num = int(math.ceil(
        sequence.total_time * cfg.SAMPLE_RATE)) - audio.shape[0]
    if pad_num > 0:
        audio = np.concatenate((audio, np.zeros((pad_num), dtype=audio.dtype)))

    splits = [0, sequence.total_time] if cfg.MAX_SPLIT_LENGTH == 0 else \
        find_split_points(sequence, audio, cfg.SAMPLE_RATE, cfg.MIN_SPLIT_LENGTH, cfg.MAX_SPLIT_LENGTH)

    samples = []
    for start, end in zip(splits[:-1], splits[1:]):
        if end - start < cfg.MIN_SPLIT_LENGTH:
            continue

        split_audio, split_seq = audio, sequence
        if not (start == 0 and end == sequence.total_time):
            split_seq = sequences_lib.extract_subsequence(sequence, start, end)
        split_audio = audio_io.crop_samples(audio, cfg.SAMPLE_RATE, start,
                                            end - start)
        pad_num = int(math.ceil(
            cfg.MAX_SPLIT_LENGTH * cfg.SAMPLE_RATE)) - split_audio.shape[0]
        if pad_num > 0:
            split_audio = np.concatenate(
                (split_audio, np.zeros((pad_num), dtype=split_audio.dtype)))

        samples.append((split_audio, split_seq))

    return samples
Ejemplo n.º 5
0
def process_record(wav_data,
                   ns,
                   example_id,
                   min_length=5,
                   max_length=20,
                   sample_rate=16000,
                   allow_empty_notesequence=False):
  """Split a record into chunks and create an example proto.

  To use the full length audio and notesequence, set min_length=0 and
  max_length=-1.

  Args:
    wav_data: audio data in WAV format.
    ns: corresponding NoteSequence.
    example_id: id for the example proto
    min_length: minimum length in seconds for audio chunks.
    max_length: maximum length in seconds for audio chunks.
    sample_rate: desired audio sample rate.
    allow_empty_notesequence: whether an empty NoteSequence is allowed.

  Yields:
    Example protos.
  """
  samples = audio_io.wav_data_to_samples(wav_data, sample_rate)
  samples = librosa.util.normalize(samples, norm=np.inf)
  if max_length == min_length:
    splits = np.arange(0, ns.total_time, max_length)
  elif max_length > 0:
    splits = find_split_points(ns, samples, sample_rate, min_length, max_length)
  else:
    splits = [0, ns.total_time]
  velocities = [note.velocity for note in ns.notes]
  velocity_max = np.max(velocities) if velocities else 0
  velocity_min = np.min(velocities) if velocities else 0
  velocity_range = music_pb2.VelocityRange(min=velocity_min, max=velocity_max)

  for start, end in zip(splits[:-1], splits[1:]):
    if end - start < min_length:
      continue

    if start == 0 and end == ns.total_time:
      new_ns = ns
    else:
      new_ns = sequences_lib.extract_subsequence(ns, start, end)

    if not new_ns.notes and not allow_empty_notesequence:
      tf.logging.warning('skipping empty sequence')
      continue

    if start == 0 and end == ns.total_time:
      new_samples = samples
    else:
      # the resampling that happen in crop_wav_data is really slow
      # and we've already done it once, avoid doing it twice
      new_samples = audio_io.crop_samples(samples, sample_rate, start,
                                          end - start)
    new_wav_data = audio_io.samples_to_wav_data(new_samples, sample_rate)
    yield create_example(
        example_id, new_ns, new_wav_data, velocity_range=velocity_range)
Ejemplo n.º 6
0
  def testExtractSubsequence(self):
    sequence = copy.copy(self.note_sequence)
    testing_lib.add_track_to_sequence(
        sequence, 0,
        [(12, 100, 0.01, 10.0), (11, 55, 0.22, 0.50), (40, 45, 2.50, 3.50),
         (55, 120, 4.0, 4.01), (52, 99, 4.75, 5.0)])
    testing_lib.add_chords_to_sequence(
        sequence, [('C', 1.5), ('G7', 3.0), ('F', 4.8)])
    testing_lib.add_control_changes_to_sequence(
        sequence, 0,
        [(0.0, 64, 127), (2.0, 64, 0), (4.0, 64, 127), (5.0, 64, 0)])
    testing_lib.add_control_changes_to_sequence(
        sequence, 1, [(2.0, 64, 127)])
    expected_subsequence = copy.copy(self.note_sequence)
    testing_lib.add_track_to_sequence(
        expected_subsequence, 0,
        [(40, 45, 0.0, 1.0), (55, 120, 1.5, 1.51)])
    testing_lib.add_chords_to_sequence(
        expected_subsequence, [('C', 0.0), ('G7', 0.5)])
    testing_lib.add_control_changes_to_sequence(
        expected_subsequence, 0, [(0.0, 64, 0), (1.5, 64, 127)])
    testing_lib.add_control_changes_to_sequence(
        expected_subsequence, 1, [(0.0, 64, 127)])
    expected_subsequence.control_changes.sort(key=lambda cc: cc.time)
    expected_subsequence.total_time = 1.51
    expected_subsequence.subsequence_info.start_time_offset = 2.5
    expected_subsequence.subsequence_info.end_time_offset = 5.99

    subsequence = sequences_lib.extract_subsequence(sequence, 2.5, 4.75)
    self.assertProtoEquals(expected_subsequence, subsequence)
Ejemplo n.º 7
0
def preprocess_sequence(sequence_tensor, hparams):
    """Preprocess a NoteSequence for training.

  Deserialize, apply sustain control changes, and crop the sequence to the
  beginning of the first note and end of the last note (if requested).

  Args:
    sequence_tensor: The NoteSequence in serialized form.
    hparams: Current hyperparameters.

  Returns:
    sequence: The preprocessed NoteSequence object.
    cropped_beginning_seconds: How many seconds were cropped from the beginning
        of the NoteSequence.
  """
    sequence = music_pb2.NoteSequence.FromString(sequence_tensor)
    sequence = sequences_lib.apply_sustain_control_changes(sequence)
    crop_beginning_seconds = 0
    if hparams.crop_training_sequence_to_notes and sequence.notes:
        crop_beginning_seconds = _find_first_note_start(sequence)
        sequence = sequences_lib.extract_subsequence(sequence,
                                                     crop_beginning_seconds,
                                                     sequence.total_time)

    return sequence, crop_beginning_seconds
Ejemplo n.º 8
0
  def testExtractSubsequence(self):
    sequence = copy.copy(self.note_sequence)
    testing_lib.add_track_to_sequence(
        sequence, 0,
        [(12, 100, 0.01, 10.0), (11, 55, 0.22, 0.50), (40, 45, 2.50, 3.50),
         (55, 120, 4.0, 4.01), (52, 99, 4.75, 5.0)])
    testing_lib.add_chords_to_sequence(
        sequence, [('C', 1.5), ('G7', 3.0), ('F', 4.8)])
    testing_lib.add_control_changes_to_sequence(
        sequence, 0,
        [(0.0, 64, 127), (2.0, 64, 0), (4.0, 64, 127), (5.0, 64, 0)])
    testing_lib.add_control_changes_to_sequence(
        sequence, 1, [(2.0, 64, 127)])
    expected_subsequence = copy.copy(self.note_sequence)
    testing_lib.add_track_to_sequence(
        expected_subsequence, 0,
        [(40, 45, 0.0, 1.0), (55, 120, 1.5, 1.51)])
    testing_lib.add_chords_to_sequence(
        expected_subsequence, [('C', 0.0), ('G7', 0.5)])
    testing_lib.add_control_changes_to_sequence(
        expected_subsequence, 0, [(0.0, 64, 0), (1.5, 64, 127)])
    testing_lib.add_control_changes_to_sequence(
        expected_subsequence, 1, [(0.0, 64, 127)])
    expected_subsequence.control_changes.sort(key=lambda cc: cc.time)
    expected_subsequence.total_time = 1.51
    expected_subsequence.subsequence_info.start_time_offset = 2.5
    expected_subsequence.subsequence_info.end_time_offset = 5.99

    subsequence = sequences_lib.extract_subsequence(sequence, 2.5, 4.75)
    self.assertProtoEquals(expected_subsequence, subsequence)
def generate_train_set():
  """Generate the train TFRecord."""
  train_file_pairs = []
  for directory in train_dirs:
    path = os.path.join(FLAGS.input_dir, directory)
    path = os.path.join(path, '*.wav')
    wav_files = glob.glob(path)
    # find matching mid files
    for wav_file in wav_files:
      base_name_root, _ = os.path.splitext(wav_file)
      mid_file = base_name_root + '.mid'
      train_file_pairs.append((wav_file, mid_file))

  train_output_name = os.path.join(FLAGS.output_dir,
                                   'maps_config2_train.tfrecord')

  with tf.python_io.TFRecordWriter(train_output_name) as writer:
    for pair in train_file_pairs:
      print(pair)
      # load the wav data
      wav_data = tf.gfile.Open(pair[0]).read()
      samples = audio_io.wav_data_to_samples(wav_data, FLAGS.sample_rate)

      # load the midi data and convert to a notesequence
      midi_data = tf.gfile.Open(pair[1]).read()
      ns = midi_io.midi_to_sequence_proto(midi_data)

      splits = find_split_points(ns, samples, FLAGS.sample_rate,
                                 FLAGS.min_length, FLAGS.max_length)

      for start, end in zip(splits[:-1], splits[1:]):
        if end - start < FLAGS.min_length:
          continue

        new_ns = sequences_lib.extract_subsequence(ns, start, end)
        new_wav_data = audio_io.crop_wav_data(wav_data, FLAGS.sample_rate,
                                              start, end - start)
        example = tf.train.Example(features=tf.train.Features(feature={
            'id':
            tf.train.Feature(bytes_list=tf.train.BytesList(
                value=[pair[0]]
                )),
            'sequence':
            tf.train.Feature(bytes_list=tf.train.BytesList(
                value=[new_ns.SerializeToString()]
                )),
            'audio':
            tf.train.Feature(bytes_list=tf.train.BytesList(
                value=[new_wav_data]
                ))

            }))
        writer.write(example.SerializeToString())
Ejemplo n.º 10
0
  def testExtractSubsequence(self):
    sequence = copy.copy(self.note_sequence)
    testing_lib.add_track_to_sequence(
        sequence, 0,
        [(12, 100, 0.01, 10.0), (11, 55, 0.22, 0.50), (40, 45, 2.50, 3.50),
         (55, 120, 4.0, 4.01), (52, 99, 4.75, 5.0)])
    expected_subsequence = copy.copy(self.note_sequence)
    testing_lib.add_track_to_sequence(
        expected_subsequence, 0,
        [(40, 45, 2.50, 3.50), (55, 120, 4.0, 4.01)])

    subsequence = sequences_lib.extract_subsequence(sequence, 2.5, 4.75)
    self.assertProtoEquals(expected_subsequence, subsequence)
def generate_train_set():
    """Generate the train TFRecord."""
    train_file_pairs = []
    for directory in train_dirs:
        path = os.path.join(FLAGS.input_dir, directory)
        path = os.path.join(path, '*.wav')
        wav_files = glob.glob(path)
        # find matching mid files
        for wav_file in wav_files:
            base_name_root, _ = os.path.splitext(wav_file)
            mid_file = base_name_root + '.mid'
            train_file_pairs.append((wav_file, mid_file))

    train_output_name = os.path.join(FLAGS.output_dir,
                                     'maps_config2_train.tfrecord')

    with tf.python_io.TFRecordWriter(train_output_name) as writer:
        for pair in train_file_pairs:
            print(pair)
            # load the wav data
            wav_data = tf.gfile.Open(pair[0]).read()
            samples = audio_io.wav_data_to_samples(wav_data, FLAGS.sample_rate)

            # load the midi data and convert to a notesequence
            midi_data = tf.gfile.Open(pair[1]).read()
            ns = midi_io.midi_to_sequence_proto(midi_data)

            splits = find_split_points(ns, samples, FLAGS.sample_rate,
                                       FLAGS.min_length, FLAGS.max_length)

            for start, end in zip(splits[:-1], splits[1:]):
                if end - start < FLAGS.min_length:
                    continue

                new_ns = sequences_lib.extract_subsequence(ns, start, end)
                new_wav_data = audio_io.crop_wav_data(wav_data,
                                                      FLAGS.sample_rate, start,
                                                      end - start)
                example = tf.train.Example(features=tf.train.Features(
                    feature={
                        'id':
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[pair[0]])),
                        'sequence':
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[new_ns.SerializeToString()])),
                        'audio':
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[new_wav_data]))
                    }))
                writer.write(example.SerializeToString())
Ejemplo n.º 12
0
  def testExtractSubsequence(self):
    sequence = copy.copy(self.note_sequence)
    testing_lib.add_track_to_sequence(
        sequence, 0,
        [(12, 100, 0.01, 10.0), (11, 55, 0.22, 0.50), (40, 45, 2.50, 3.50),
         (55, 120, 4.0, 4.01), (52, 99, 4.75, 5.0)])
    expected_subsequence = copy.copy(self.note_sequence)
    testing_lib.add_track_to_sequence(
        expected_subsequence, 0,
        [(40, 45, 2.50, 3.50), (55, 120, 4.0, 4.01)])
    expected_subsequence.total_time = 4.75

    subsequence = sequences_lib.extract_subsequence(sequence, 2.5, 4.75)
    self.assertProtoEquals(expected_subsequence, subsequence)
Ejemplo n.º 13
0
    def testExtractSubsequence(self):
        sequence = copy.copy(self.note_sequence)
        testing_lib.add_track_to_sequence(sequence, 0, [(12, 100, 0.01, 10.0),
                                                        (11, 55, 0.22, 0.50),
                                                        (40, 45, 2.50, 3.50),
                                                        (55, 120, 4.0, 4.01),
                                                        (52, 99, 4.75, 5.0)])
        testing_lib.add_chords_to_sequence(sequence, [('C', 1.5), ('G7', 3.0),
                                                      ('F', 4.8)])
        expected_subsequence = copy.copy(self.note_sequence)
        testing_lib.add_track_to_sequence(expected_subsequence, 0,
                                          [(40, 45, 2.50, 3.50),
                                           (55, 120, 4.0, 4.01)])
        testing_lib.add_chords_to_sequence(expected_subsequence, [('G7', 3.0)])
        expected_subsequence.total_time = 4.75

        subsequence = sequences_lib.extract_subsequence(sequence, 2.5, 4.75)
        self.assertProtoEquals(expected_subsequence, subsequence)
Ejemplo n.º 14
0
  def testExtractSubsequence(self):
    sequence = copy.copy(self.note_sequence)
    testing_lib.add_track_to_sequence(
        sequence, 0,
        [(12, 100, 0.01, 10.0), (11, 55, 0.22, 0.50), (40, 45, 2.50, 3.50),
         (55, 120, 4.0, 4.01), (52, 99, 4.75, 5.0)])
    testing_lib.add_chords_to_sequence(
        sequence, [('C', 1.5), ('G7', 3.0), ('F', 4.8)])
    expected_subsequence = copy.copy(self.note_sequence)
    testing_lib.add_track_to_sequence(
        expected_subsequence, 0,
        [(40, 45, 0.0, 1.0), (55, 120, 1.5, 1.51)])
    testing_lib.add_chords_to_sequence(
        expected_subsequence, [('C', 0.0), ('G7', 0.5)])
    expected_subsequence.total_time = 2.25
    expected_subsequence.subsequence_info.start_time_offset = 2.5
    expected_subsequence.subsequence_info.end_time_offset = 5.25

    subsequence = sequences_lib.extract_subsequence(sequence, 2.5, 4.75)
    self.assertProtoEquals(expected_subsequence, subsequence)
Ejemplo n.º 15
0
def preprocess_sequence(sequence_tensor, hparams):
  """Preprocess a NoteSequence for training.

  Deserialize, apply sustain control changes, and crop the sequence to the
  beginning of the first note and end of the last note (if requested).

  Args:
    sequence_tensor: The NoteSequence in serialized form.
    hparams: Current hyperparameters.

  Returns:
    sequence: The preprocessed NoteSequence object.
    cropped_beginning_seconds: How many seconds were cropped from the beginning
        of the NoteSequence.
  """
  sequence = music_pb2.NoteSequence.FromString(sequence_tensor)
  sequence = sequences_lib.apply_sustain_control_changes(sequence)
  crop_beginning_seconds = 0
  if hparams.crop_training_sequence_to_notes and sequence.notes:
    crop_beginning_seconds = _find_first_note_start(sequence)
    sequence = sequences_lib.extract_subsequence(
        sequence, crop_beginning_seconds, sequence.total_time)

  return sequence, crop_beginning_seconds
Ejemplo n.º 16
0
  def process(self, kv):
    # Seed random number generator based on key so that hop times are
    # deterministic.
    key, ns_str = kv
    m = hashlib.md5(key)
    random.seed(int(m.hexdigest(), 16))

    # Deserialize NoteSequence proto.
    ns = music_pb2.NoteSequence.FromString(ns_str)

    # Apply sustain pedal.
    ns = sequences_lib.apply_sustain_control_changes(ns)

    # Remove control changes as there are potentially a lot of them and they are
    # no longer needed.
    del ns.control_changes[:]

    if (self._min_hop_size_seconds and
        ns.total_time < self._min_hop_size_seconds):
      Metrics.counter('extract_examples', 'sequence_too_short').inc()
      return

    sequences = []
    for _ in range(self._num_replications):
      if self._max_hop_size_seconds:
        if self._max_hop_size_seconds == self._min_hop_size_seconds:
          # Split using fixed hop size.
          sequences += sequences_lib.split_note_sequence(
              ns, self._max_hop_size_seconds)
        else:
          # Sample random hop positions such that each segment size is within
          # the specified range.
          hop_times = [0.0]
          while hop_times[-1] <= ns.total_time - self._min_hop_size_seconds:
            if hop_times[-1] + self._max_hop_size_seconds < ns.total_time:
              # It's important that we get a valid hop size here, since the
              # remainder of the sequence is too long.
              max_offset = min(
                  self._max_hop_size_seconds,
                  ns.total_time - self._min_hop_size_seconds - hop_times[-1])
            else:
              # It's okay if the next hop time is invalid (in which case we'll
              # just stop).
              max_offset = self._max_hop_size_seconds
            offset = random.uniform(self._min_hop_size_seconds, max_offset)
            hop_times.append(hop_times[-1] + offset)
          # Split at the chosen hop times (ignoring zero and the final invalid
          # time).
          sequences += sequences_lib.split_note_sequence(ns, hop_times[1:-1])
      else:
        sequences += [ns]

    for performance_sequence in sequences:
      if self._encode_score_fns:
        # We need to extract a score.
        if not self._absolute_timing:
          # Beats are required to extract a score with metric timing.
          beats = [
              ta for ta in performance_sequence.text_annotations
              if (ta.annotation_type ==
                  music_pb2.NoteSequence.TextAnnotation.BEAT)
              and ta.time <= performance_sequence.total_time
          ]
          if len(beats) < 2:
            Metrics.counter('extract_examples', 'not_enough_beats').inc()
            continue

          # Ensure the sequence starts and ends on a beat.
          performance_sequence = sequences_lib.extract_subsequence(
              performance_sequence,
              start_time=min(beat.time for beat in beats),
              end_time=max(beat.time for beat in beats)
          )

          # Infer beat-aligned chords (only for relative timing).
          try:
            chord_inference.infer_chords_for_sequence(
                performance_sequence,
                chord_change_prob=0.25,
                chord_note_concentration=50.0,
                add_key_signatures=True)
          except chord_inference.ChordInferenceError:
            Metrics.counter('extract_examples', 'chord_inference_failed').inc()
            continue

        # Infer melody regardless of relative/absolute timing.
        try:
          melody_instrument = melody_inference.infer_melody_for_sequence(
              performance_sequence,
              melody_interval_scale=2.0,
              rest_prob=0.1,
              instantaneous_non_max_pitch_prob=1e-15,
              instantaneous_non_empty_rest_prob=0.0,
              instantaneous_missing_pitch_prob=1e-15)
        except melody_inference.MelodyInferenceError:
          Metrics.counter('extract_examples', 'melody_inference_failed').inc()
          continue

        if not self._absolute_timing:
          # Now rectify detected beats to occur at fixed tempo.
          # TODO(iansimon): also include the alignment
          score_sequence, unused_alignment = sequences_lib.rectify_beats(
              performance_sequence, beats_per_minute=SCORE_BPM)
        else:
          # Score uses same timing as performance.
          score_sequence = copy.deepcopy(performance_sequence)

        # Remove melody notes from performance.
        performance_notes = []
        for note in performance_sequence.notes:
          if note.instrument != melody_instrument:
            performance_notes.append(note)
        del performance_sequence.notes[:]
        performance_sequence.notes.extend(performance_notes)

        # Remove non-melody notes from score.
        score_notes = []
        for note in score_sequence.notes:
          if note.instrument == melody_instrument:
            score_notes.append(note)
        del score_sequence.notes[:]
        score_sequence.notes.extend(score_notes)

        # Remove key signatures and beat/chord annotations from performance.
        del performance_sequence.key_signatures[:]
        del performance_sequence.text_annotations[:]

        Metrics.counter('extract_examples', 'extracted_score').inc()

      for augment_fn in self._augment_fns:
        # Augment and encode the performance.
        try:
          augmented_performance_sequence = augment_fn(performance_sequence)
        except DataAugmentationError:
          Metrics.counter(
              'extract_examples', 'augment_performance_failed').inc()
          continue
        example_dict = {
            'targets': self._encode_performance_fn(
                augmented_performance_sequence)
        }
        if not example_dict['targets']:
          Metrics.counter('extract_examples', 'skipped_empty_targets').inc()
          continue

        if self._encode_score_fns:
          # Augment the extracted score.
          try:
            augmented_score_sequence = augment_fn(score_sequence)
          except DataAugmentationError:
            Metrics.counter('extract_examples', 'augment_score_failed').inc()
            continue

          # Apply all score encoding functions.
          skip = False
          for name, encode_score_fn in self._encode_score_fns.items():
            example_dict[name] = encode_score_fn(augmented_score_sequence)
            if not example_dict[name]:
              Metrics.counter('extract_examples',
                              'skipped_empty_%s' % name).inc()
              skip = True
              break
          if skip:
            continue

        Metrics.counter('extract_examples', 'encoded_example').inc()
        Metrics.distribution(
            'extract_examples', 'performance_length_in_seconds').update(
                int(augmented_performance_sequence.total_time))

        yield generator_utils.to_example(example_dict)
def generate_train_set(exclude_ids):
  """Generate the train TFRecord."""
  train_file_pairs = []
  for directory in train_dirs:
    path = os.path.join(FLAGS.input_dir, directory)
    path = os.path.join(path, '*.wav')
    wav_files = glob.glob(path)
    # find matching mid files
    for wav_file in wav_files:
      base_name_root, _ = os.path.splitext(wav_file)
      mid_file = base_name_root + '.mid'
      if filename_to_id(wav_file) not in exclude_ids:
        train_file_pairs.append((wav_file, mid_file))

  train_output_name = os.path.join(FLAGS.output_dir,
                                   'maps_config2_train.tfrecord')

  with tf.python_io.TFRecordWriter(train_output_name) as writer:
    for pair in train_file_pairs:
      print(pair)
      # load the wav data
      wav_data = tf.gfile.Open(pair[0], 'rb').read()
      samples = audio_io.wav_data_to_samples(wav_data, FLAGS.sample_rate)
      samples = librosa.util.normalize(samples, norm=np.inf)

      # load the midi data and convert to a notesequence
      ns = midi_io.midi_file_to_note_sequence(pair[1])

      splits = find_split_points(ns, samples, FLAGS.sample_rate,
                                 FLAGS.min_length, FLAGS.max_length)

      velocities = [note.velocity for note in ns.notes]
      velocity_max = np.max(velocities)
      velocity_min = np.min(velocities)
      new_velocity_tuple = music_pb2.VelocityRange(
          min=velocity_min, max=velocity_max)

      for start, end in zip(splits[:-1], splits[1:]):
        if end - start < FLAGS.min_length:
          continue

        new_ns = sequences_lib.extract_subsequence(ns, start, end)
        new_wav_data = audio_io.crop_wav_data(wav_data, FLAGS.sample_rate,
                                              start, end - start)
        example = tf.train.Example(features=tf.train.Features(feature={
            'id':
            tf.train.Feature(bytes_list=tf.train.BytesList(
                value=[pair[0]]
                )),
            'sequence':
            tf.train.Feature(bytes_list=tf.train.BytesList(
                value=[new_ns.SerializeToString()]
                )),
            'audio':
            tf.train.Feature(bytes_list=tf.train.BytesList(
                value=[new_wav_data]
                )),
            'velocity_range':
            tf.train.Feature(bytes_list=tf.train.BytesList(
                value=[new_velocity_tuple.SerializeToString()]
                )),
            }))
        writer.write(example.SerializeToString())
Ejemplo n.º 18
0
  def process(self, kv):
    # Seed random number generator based on key so that hop times are
    # deterministic.
    key, ns_str = kv
    m = hashlib.md5(key)
    random.seed(int(m.hexdigest(), 16))

    # Deserialize NoteSequence proto.
    ns = music_pb2.NoteSequence.FromString(ns_str)

    # Apply sustain pedal.
    ns = sequences_lib.apply_sustain_control_changes(ns)

    # Remove control changes as there are potentially a lot of them and they are
    # no longer needed.
    del ns.control_changes[:]

    if (self._min_hop_size_seconds and
        ns.total_time < self._min_hop_size_seconds):
      Metrics.counter('extract_examples', 'sequence_too_short').inc()
      return

    sequences = []
    for _ in range(self._num_replications):
      if self._max_hop_size_seconds:
        if self._max_hop_size_seconds == self._min_hop_size_seconds:
          # Split using fixed hop size.
          sequences += sequences_lib.split_note_sequence(
              ns, self._max_hop_size_seconds)
        else:
          # Sample random hop positions such that each segment size is within
          # the specified range.
          hop_times = [0.0]
          while hop_times[-1] <= ns.total_time - self._min_hop_size_seconds:
            if hop_times[-1] + self._max_hop_size_seconds < ns.total_time:
              # It's important that we get a valid hop size here, since the
              # remainder of the sequence is too long.
              max_offset = min(
                  self._max_hop_size_seconds,
                  ns.total_time - self._min_hop_size_seconds - hop_times[-1])
            else:
              # It's okay if the next hop time is invalid (in which case we'll
              # just stop).
              max_offset = self._max_hop_size_seconds
            offset = random.uniform(self._min_hop_size_seconds, max_offset)
            hop_times.append(hop_times[-1] + offset)
          # Split at the chosen hop times (ignoring zero and the final invalid
          # time).
          sequences += sequences_lib.split_note_sequence(ns, hop_times[1:-1])
      else:
        sequences += [ns]

    for performance_sequence in sequences:
      if self._encode_score_fns:
        # We need to extract a score.
        if not self._absolute_timing:
          # Beats are required to extract a score with metric timing.
          beats = [
              ta for ta in performance_sequence.text_annotations
              if (ta.annotation_type ==
                  music_pb2.NoteSequence.TextAnnotation.BEAT)
              and ta.time <= performance_sequence.total_time
          ]
          if len(beats) < 2:
            Metrics.counter('extract_examples', 'not_enough_beats').inc()
            continue

          # Ensure the sequence starts and ends on a beat.
          performance_sequence = sequences_lib.extract_subsequence(
              performance_sequence,
              start_time=min(beat.time for beat in beats),
              end_time=max(beat.time for beat in beats)
          )

          # Infer beat-aligned chords (only for relative timing).
          try:
            chord_inference.infer_chords_for_sequence(
                performance_sequence,
                chord_change_prob=0.25,
                chord_note_concentration=50.0,
                add_key_signatures=True)
          except chord_inference.ChordInferenceError:
            Metrics.counter('extract_examples', 'chord_inference_failed').inc()
            continue

        # Infer melody regardless of relative/absolute timing.
        try:
          melody_instrument = melody_inference.infer_melody_for_sequence(
              performance_sequence,
              melody_interval_scale=2.0,
              rest_prob=0.1,
              instantaneous_non_max_pitch_prob=1e-15,
              instantaneous_non_empty_rest_prob=0.0,
              instantaneous_missing_pitch_prob=1e-15)
        except melody_inference.MelodyInferenceError:
          Metrics.counter('extract_examples', 'melody_inference_failed').inc()
          continue

        if not self._absolute_timing:
          # Now rectify detected beats to occur at fixed tempo.
          # TODO(iansimon): also include the alignment
          score_sequence, unused_alignment = sequences_lib.rectify_beats(
              performance_sequence, beats_per_minute=SCORE_BPM)
        else:
          # Score uses same timing as performance.
          score_sequence = copy.deepcopy(performance_sequence)

        # Remove melody notes from performance.
        performance_notes = []
        for note in performance_sequence.notes:
          if note.instrument != melody_instrument:
            performance_notes.append(note)
        del performance_sequence.notes[:]
        performance_sequence.notes.extend(performance_notes)

        # Remove non-melody notes from score.
        score_notes = []
        for note in score_sequence.notes:
          if note.instrument == melody_instrument:
            score_notes.append(note)
        del score_sequence.notes[:]
        score_sequence.notes.extend(score_notes)

        # Remove key signatures and beat/chord annotations from performance.
        del performance_sequence.key_signatures[:]
        del performance_sequence.text_annotations[:]

        Metrics.counter('extract_examples', 'extracted_score').inc()

      for augment_fn in self._augment_fns:
        # Augment and encode the performance.
        try:
          augmented_performance_sequence = augment_fn(performance_sequence)
        except DataAugmentationError:
          Metrics.counter(
              'extract_examples', 'augment_performance_failed').inc()
          continue
        example_dict = {
            'targets': self._encode_performance_fn(
                augmented_performance_sequence)
        }
        if not example_dict['targets']:
          Metrics.counter('extract_examples', 'skipped_empty_targets').inc()
          continue

        if self._encode_score_fns:
          # Augment the extracted score.
          try:
            augmented_score_sequence = augment_fn(score_sequence)
          except DataAugmentationError:
            Metrics.counter('extract_examples', 'augment_score_failed').inc()
            continue

          # Apply all score encoding functions.
          skip = False
          for name, encode_score_fn in self._encode_score_fns.items():
            example_dict[name] = encode_score_fn(augmented_score_sequence)
            if not example_dict[name]:
              Metrics.counter('extract_examples',
                              'skipped_empty_%s' % name).inc()
              skip = True
              break
          if skip:
            continue

        Metrics.counter('extract_examples', 'encoded_example').inc()
        Metrics.distribution(
            'extract_examples', 'performance_length_in_seconds').update(
                int(augmented_performance_sequence.total_time))

        yield generator_utils.to_example(example_dict)
def generate_train_set(exclude_ids):
    """Generate the train TFRecord."""
    train_file_pairs = []
    for directory in train_dirs:
        path = os.path.join(FLAGS.input_dir, directory)
        path = os.path.join(path, '*.wav')
        wav_files = glob.glob(path)
        # find matching mid files
        for wav_file in wav_files:
            base_name_root, _ = os.path.splitext(wav_file)
            mid_file = base_name_root + '.mid'
            if filename_to_id(wav_file) not in exclude_ids:
                train_file_pairs.append((wav_file, mid_file))

    train_output_name = os.path.join(FLAGS.output_dir,
                                     'maps_config2_train.tfrecord')

    with tf.python_io.TFRecordWriter(train_output_name) as writer:
        for pair in train_file_pairs:
            print(pair)
            # load the wav data
            wav_data = tf.gfile.Open(pair[0], 'rb').read()
            samples = audio_io.wav_data_to_samples(wav_data, FLAGS.sample_rate)
            samples = librosa.util.normalize(samples, norm=np.inf)

            # load the midi data and convert to a notesequence
            ns = midi_io.midi_file_to_note_sequence(pair[1])

            splits = find_split_points(ns, samples, FLAGS.sample_rate,
                                       FLAGS.min_length, FLAGS.max_length)

            velocities = [note.velocity for note in ns.notes]
            velocity_max = np.max(velocities)
            velocity_min = np.min(velocities)
            new_velocity_tuple = music_pb2.VelocityRange(min=velocity_min,
                                                         max=velocity_max)

            for start, end in zip(splits[:-1], splits[1:]):
                if end - start < FLAGS.min_length:
                    continue

                new_ns = sequences_lib.extract_subsequence(ns, start, end)
                new_wav_data = audio_io.crop_wav_data(wav_data,
                                                      FLAGS.sample_rate, start,
                                                      end - start)
                example = tf.train.Example(features=tf.train.Features(
                    feature={
                        'id':
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[pair[0]])),
                        'sequence':
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[new_ns.SerializeToString()])),
                        'audio':
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[new_wav_data])),
                        'velocity_range':
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[new_velocity_tuple.SerializeToString()])),
                    }))
                writer.write(example.SerializeToString())
Ejemplo n.º 20
0
def process_record(wav_data,
                   ns,
                   example_id,
                   min_length=5,
                   max_length=20,
                   sample_rate=16000,
                   allow_empty_notesequence=False,
                   load_audio_with_librosa=False):
    """Split a record into chunks and create an example proto.

  To use the full length audio and notesequence, set min_length=0 and
  max_length=-1.

  Args:
    wav_data: audio data in WAV format.
    ns: corresponding NoteSequence.
    example_id: id for the example proto
    min_length: minimum length in seconds for audio chunks.
    max_length: maximum length in seconds for audio chunks.
    sample_rate: desired audio sample rate.
    allow_empty_notesequence: whether an empty NoteSequence is allowed.
    load_audio_with_librosa: Use librosa for sampling. Works with 24-bit wavs.

  Yields:
    Example protos.
  """
    try:
        if load_audio_with_librosa:
            samples = audio_io.wav_data_to_samples_librosa(
                wav_data, sample_rate)
        else:
            samples = audio_io.wav_data_to_samples(wav_data, sample_rate)
    except audio_io.AudioIOReadError as e:
        print('Exception %s', e)
        return
    samples = librosa.util.normalize(samples, norm=np.inf)

    # Add padding to samples if notesequence is longer.
    pad_to_samples = int(math.ceil(ns.total_time * sample_rate))
    padding_needed = pad_to_samples - samples.shape[0]
    if padding_needed > 5 * sample_rate:
        raise ValueError(
            'Would have padded {} more than 5 seconds to match note sequence total '
            'time. ({} original samples, {} sample rate, {} sample seconds, '
            '{} sequence seconds) This likely indicates a problem with the source '
            'data.'.format(example_id, samples.shape[0], sample_rate,
                           samples.shape[0] / sample_rate, ns.total_time))
    samples = np.pad(samples, (0, max(0, padding_needed)), 'constant')

    if max_length == min_length:
        splits = np.arange(0, ns.total_time, max_length)
    elif max_length > 0:
        splits = find_split_points(ns, samples, sample_rate, min_length,
                                   max_length)
    else:
        splits = [0, ns.total_time]
    velocities = [note.velocity for note in ns.notes]
    velocity_max = np.max(velocities) if velocities else 0
    velocity_min = np.min(velocities) if velocities else 0
    velocity_range = music_pb2.VelocityRange(min=velocity_min,
                                             max=velocity_max)

    for start, end in zip(splits[:-1], splits[1:]):
        if end - start < min_length:
            continue

        if start == 0 and end == ns.total_time:
            new_ns = ns
        else:
            new_ns = sequences_lib.extract_subsequence(ns, start, end)

        if not new_ns.notes and not allow_empty_notesequence:
            tf.logging.warning('skipping empty sequence')
            continue

        if start == 0 and end == ns.total_time:
            new_samples = samples
        else:
            # the resampling that happen in crop_wav_data is really slow
            # and we've already done it once, avoid doing it twice
            new_samples = audio_io.crop_samples(samples, sample_rate, start,
                                                end - start)
        new_wav_data = audio_io.samples_to_wav_data(new_samples, sample_rate)
        yield create_example(example_id,
                             new_ns,
                             new_wav_data,
                             velocity_range=velocity_range)
Ejemplo n.º 21
0
    def process_midi(self, f):
        def augment_note_sequence(ns, stretch_factor, transpose_amount):
            """Augment a NoteSequence by time stretch and pitch transposition."""
            augmented_ns = sequences_lib.stretch_note_sequence(ns,
                                                               stretch_factor,
                                                               in_place=False)
            try:
                _, num_deleted_notes = sequences_lib.transpose_note_sequence(
                    augmented_ns,
                    transpose_amount,
                    min_allowed_pitch=MIN_PITCH,
                    max_allowed_pitch=MAX_PITCH,
                    in_place=True)
            except chord_symbols_lib.ChordSymbolError:
                raise datagen_beam.DataAugmentationError(
                    'Transposition of chord symbol(s) failed.')
            if num_deleted_notes:
                raise datagen_beam.DataAugmentationError(
                    'Transposition caused out-of-range pitch(es).')
            return augmented_ns

        self._min_hop_size_seconds = 0.0
        self._max_hop_size_seconds = 0.0
        self._num_replications = 1
        self._encode_performance_fn = self.performance_encoder(
        ).encode_note_sequence
        self._encode_score_fns = dict(
            (name, encoder.encode_note_sequence)
            for name, encoder in self.score_encoders())

        augment_params = itertools.product(self.stretch_factors,
                                           self.transpose_amounts)
        augment_fns = [
            functools.partial(augment_note_sequence,
                              stretch_factor=s,
                              transpose_amount=t) for s, t in augment_params
        ]

        self._augment_fns = augment_fns
        self._absolute_timing = self.absolute_timing
        self._random_crop_length = self.random_crop_length_in_datagen
        if self._random_crop_length is not None:
            self._augment_fns = self._augment_fns

        rets = []
        ns = magenta.music.midi_file_to_sequence_proto(f)
        # Apply sustain pedal.
        ns = sequences_lib.apply_sustain_control_changes(ns)

        # Remove control changes as there are potentially a lot of them and they are
        # no longer needed.
        del ns.control_changes[:]

        if (self._min_hop_size_seconds
                and ns.total_time < self._min_hop_size_seconds):
            print("sequence_too_short")
            return []

        sequences = []
        for _ in range(self._num_replications):
            if self._max_hop_size_seconds:
                if self._max_hop_size_seconds == self._min_hop_size_seconds:
                    # Split using fixed hop size.
                    sequences += sequences_lib.split_note_sequence(
                        ns, self._max_hop_size_seconds)
                else:
                    # Sample random hop positions such that each segment size is within
                    # the specified range.
                    hop_times = [0.0]
                    while hop_times[
                            -1] <= ns.total_time - self._min_hop_size_seconds:
                        if hop_times[
                                -1] + self._max_hop_size_seconds < ns.total_time:
                            # It's important that we get a valid hop size here, since the
                            # remainder of the sequence is too long.
                            max_offset = min(
                                self._max_hop_size_seconds, ns.total_time -
                                self._min_hop_size_seconds - hop_times[-1])
                        else:
                            # It's okay if the next hop time is invalid (in which case we'll
                            # just stop).
                            max_offset = self._max_hop_size_seconds
                        offset = random.uniform(self._min_hop_size_seconds,
                                                max_offset)
                        hop_times.append(hop_times[-1] + offset)
                    # Split at the chosen hop times (ignoring zero and the final invalid
                    # time).
                    sequences += sequences_lib.split_note_sequence(
                        ns, hop_times[1:-1])
            else:
                sequences += [ns]

        for performance_sequence in sequences:
            if self._encode_score_fns:
                # We need to extract a score.
                if not self._absolute_timing:
                    # Beats are required to extract a score with metric timing.
                    beats = [
                        ta for ta in performance_sequence.text_annotations
                        if (ta.annotation_type ==
                            music_pb2.NoteSequence.TextAnnotation.BEAT)
                        and ta.time <= performance_sequence.total_time
                    ]
                    if len(beats) < 2:
                        print('not_enough_beats')
                        continue

                    # Ensure the sequence starts and ends on a beat.
                    performance_sequence = sequences_lib.extract_subsequence(
                        performance_sequence,
                        start_time=min(beat.time for beat in beats),
                        end_time=max(beat.time for beat in beats))

                    # Infer beat-aligned chords (only for relative timing).
                    try:
                        chord_inference.infer_chords_for_sequence(
                            performance_sequence,
                            chord_change_prob=0.25,
                            chord_note_concentration=50.0,
                            add_key_signatures=True)
                    except chord_inference.ChordInferenceError:
                        print("chord_inference_failed")
                        continue

                # Infer melody regardless of relative/absolute timing.
                try:
                    melody_instrument = melody_inference.infer_melody_for_sequence(
                        performance_sequence,
                        melody_interval_scale=2.0,
                        rest_prob=0.1,
                        instantaneous_non_max_pitch_prob=1e-15,
                        instantaneous_non_empty_rest_prob=0.0,
                        instantaneous_missing_pitch_prob=1e-15)
                except melody_inference.MelodyInferenceError:
                    print('melody_inference_failed')
                    continue

                if not self._absolute_timing:
                    # Now rectify detected beats to occur at fixed tempo.
                    # TODO(iansimon): also include the alignment
                    score_sequence, unused_alignment = sequences_lib.rectify_beats(
                        performance_sequence, beats_per_minute=SCORE_BPM)
                else:
                    # Score uses same timing as performance.
                    score_sequence = copy.deepcopy(performance_sequence)

                # Remove melody notes from performance.
                performance_notes = []
                for note in performance_sequence.notes:
                    if note.instrument != melody_instrument:
                        performance_notes.append(note)
                del performance_sequence.notes[:]
                performance_sequence.notes.extend(performance_notes)

                # Remove non-melody notes from score.
                score_notes = []
                for note in score_sequence.notes:
                    if note.instrument == melody_instrument:
                        score_notes.append(note)
                del score_sequence.notes[:]
                score_sequence.notes.extend(score_notes)

                # Remove key signatures and beat/chord annotations from performance.
                del performance_sequence.key_signatures[:]
                del performance_sequence.text_annotations[:]

            for augment_fn in self._augment_fns:
                # Augment and encode the performance.
                try:
                    augmented_performance_sequence = augment_fn(
                        performance_sequence)
                except DataAugmentationError as e:
                    print("augment_performance_failed", e)
                    continue
                example_dict = {
                    'targets':
                    self._encode_performance_fn(augmented_performance_sequence)
                }
                if not example_dict['targets']:
                    print('skipped_empty_targets')
                    continue

                if (self._random_crop_length and len(example_dict['targets']) >
                        self._random_crop_length):
                    # Take a random crop of the encoded performance.
                    max_offset = len(
                        example_dict['targets']) - self._random_crop_length
                    offset = random.randrange(max_offset + 1)
                    example_dict['targets'] = example_dict['targets'][
                        offset:offset + self._random_crop_length]

                if self._encode_score_fns:
                    # Augment the extracted score.
                    try:
                        augmented_score_sequence = augment_fn(score_sequence)
                    except DataAugmentationError:
                        print('augment_score_failed')
                        continue

                    # Apply all score encoding functions.
                    skip = False
                    for name, encode_score_fn in self._encode_score_fns.items(
                    ):
                        example_dict[name] = encode_score_fn(
                            augmented_score_sequence)
                        if not example_dict[name]:
                            print('skipped_empty_%s' % name)
                            skip = True
                            break
                    if skip:
                        continue

                rets.append(example_dict)
        return rets