Esempio n. 1
0
    def testIsDrumDetection(self):
        """Verify that is_drum instruments are properly tracked.

    self.midi_is_drum_filename is a MIDI file containing two tracks
    set to channel 9 (is_drum == True). Each contains one NoteOn. This
    test is designed to catch a bug where the second track would lose
    is_drum, remapping the drum track to an instrument track.
    """
        sequence_proto = midi_io.midi_file_to_sequence_proto(
            self.midi_is_drum_filename)
        with tempfile.NamedTemporaryFile(prefix='MidiDrumTest') as temp_file:
            midi_io.sequence_proto_to_midi_file(sequence_proto, temp_file.name)
            midi_data1 = mido.MidiFile(filename=self.midi_is_drum_filename)
            # Use the file object when writing to the tempfile
            # to avoid permission error.
            midi_data2 = mido.MidiFile(file=temp_file)

        # Count number of channel 9 Note Ons.
        channel_counts = [0, 0]
        for index, midi_data in enumerate([midi_data1, midi_data2]):
            for event in midi_data:
                if (event.type == 'note_on' and event.velocity > 0
                        and event.channel == 9):
                    channel_counts[index] += 1
        self.assertEqual(channel_counts, [2, 2])
Esempio n. 2
0
def main(unused_argv):
    logging.set_verbosity(FLAGS.log)
    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)
    for input_file in sorted(os.listdir(FLAGS.input_dir)):
        if not input_file.endswith('.wav'):
            continue
        wav_filename = input_file
        midi_filename = input_file.replace('.wav', '.mid')
        logging.info('Aligning %s to %s', midi_filename, wav_filename)

        samples = audio_io.load_audio(
            os.path.join(FLAGS.input_dir, wav_filename),
            align_fine_lib.SAMPLE_RATE)
        ns = midi_io.midi_file_to_sequence_proto(
            os.path.join(FLAGS.input_dir, midi_filename))

        aligned_ns, unused_stats = align_fine_lib.align_cpp(
            samples,
            align_fine_lib.SAMPLE_RATE,
            ns,
            align_fine_lib.CQT_HOP_LENGTH_FINE,
            sf2_path=FLAGS.sf2_path,
            penalty_mul=FLAGS.penalty_mul)

        midi_io.sequence_proto_to_midi_file(
            aligned_ns, os.path.join(FLAGS.output_dir, midi_filename))

    logging.info('Done')
Esempio n. 3
0
    def CheckReadWriteMidi(self, filename):
        """Test writing to a MIDI file and comparing it to the original Sequence."""

        # TODO(deck): The input MIDI file is opened in pretty-midi and
        # re-written to a temp file, sanitizing the MIDI data (reordering
        # note ons, etc). Issue 85 in the pretty-midi GitHub
        # (http://github.com/craffel/pretty-midi/issues/85) requests that
        # this sanitization be available outside of the context of a file
        # write. If that is implemented, this rewrite code should be
        # modified or deleted.

        # When writing to the temp file, use the file object itself instead of
        # file.name to avoid the permission error on Windows.
        with tempfile.NamedTemporaryFile(prefix='MidiIoTest') as rewrite_file:
            original_midi = pretty_midi.PrettyMIDI(filename)
            original_midi.write(rewrite_file)  # Use file object
            # Back the file position to top to reload the rewrite_file
            rewrite_file.seek(0)
            source_midi = pretty_midi.PrettyMIDI(
                rewrite_file)  # Use file object
            sequence_proto = midi_io.midi_to_sequence_proto(source_midi)

        # Translate the NoteSequence to MIDI and write to a file.
        with tempfile.NamedTemporaryFile(prefix='MidiIoTest') as temp_file:
            midi_io.sequence_proto_to_midi_file(sequence_proto, temp_file.name)
            # Read it back in and compare to source.
            created_midi = pretty_midi.PrettyMIDI(temp_file)  # Use file object

        self.CheckPrettyMidiAndSequence(created_midi, sequence_proto)
Esempio n. 4
0
 def write_dataset(self, path_out):
     [
         midi_io.sequence_proto_to_midi_file(seq.to_sequence(),
                                             path_out + f'mel_{i}.mid')
         for i, seq in enumerate(self._song_parts_lead)
     ]
     [
         midi_io.sequence_proto_to_midi_file(seq.to_sequence(),
                                             path_out + f'bass_{i}.mid')
         for i, seq in enumerate(self._song_parts_accomp)
     ]
Esempio n. 5
0
    def to_midi(self, outputs, path_out_dir, filename='out.mid'):
        """
        outputs: one-hot encoded tensor (seq_len, vocab_size)
        """
        events = []
        for event_one_hot in outputs:  #for each event
            label = np.argmax(
                event_one_hot)  #find index of maximum value from one-hot
            events.append(
                self._encoder_decoder.class_index_to_event(label, events))

        print(events)

        mel_pred = note_seq.Melody(events)
        seq_pred = mel_pred.to_sequence()

        path_out = path_out_dir + filename
        midi_io.sequence_proto_to_midi_file(seq_pred, path_out)
Esempio n. 6
0
import utils.paths as paths


mel_path = paths.root_dir + '/dlmusic_data/midi_data_out/melodies/'
input_file1 = mel_path + 'mel1.mid'
input_file2 = mel_path + 'mel2.mid'
out_file1 = mel_path + 'mel1_out.mid'
out_file2 = mel_path + 'mel2_out.mid'
out_file1_trans = mel_path + 'mel1_trans_out.mid'
out_file1_pred = mel_path + 'mel1_pred_out.mid'

# FOR IDEAS OF USING THE OUTPUT (hot encodings) DATA FROM THIS, SEE EVENTUALLY magenta.models.shared.eventss_rnn_model.py

melody1 = melodies_lib.midi_file_to_melody(input_file1)
seq = melody1.to_sequence()
midi_io.sequence_proto_to_midi_file(seq, out_file1)

min_note = 60
max_note = 72
transpose_to_key = 2
mel_encoder = encoder_decoder.OneHotEventSequenceEncoderDecoder(
    melody_encoder_decoder.MelodyOneHotEncoding(min_note, max_note)) # min_note=DEFAULT_MIN_NOTE, max_note=DEFAULT_MAX_NOTE

# Additional labels are NO_EVENT = 0 and NOTE_OFF = 1
assert(mel_encoder.input_size, max_note - min_note + 2) 
assert(mel_encoder.num_classes, max_note - min_note + 2)

# squeeze midi into octaves determined by min_note and max_note and transposes to key = 0 => C major / A minor
melody1.squash(
    min_note,
    max_note,
Esempio n. 7
0
from loaders.dataloader_midi import load_midi_to_seq

#PARAMS
midi_dir = '/Users/nikolasborrel/github/dlmusic_data/midi_data_out/tests/'
midi_dir_out = '/Users/nikolasborrel/github/dlmusic_data/midi_data_out/splitted/'

instruments = [0]
lead_instrument = ('melody', instruments[0])
name_instrument_map = { lead_instrument[0]  :  lead_instrument[1] }

print("Create...")
sequences = load_midi_to_seq(midi_dir, name_instrument_map, recursive=False)

if len(sequences) == 0:
    raise Exception(f'No midi files loaded')

print("Tokenize...")
t = TokenizerMonophonic(max_bars_chunk=8, min_note=60, max_note=72)
t.add_songs(sequences, instruments)

print("write to disk...")

for i, mel in enumerate(t.song_parts_lead):
    inputs_one_hot, label_not_used = t.encoder_decoder.encode(mel)  # MELODY

    print(f'#encodings melody: {len(inputs_one_hot)}')

    path_out_mel_test = f'{midi_dir_out}mel_split_silence_{i}.mid'
    path_out_bass_test = f'{midi_dir_out}bass_split_silence_{i}.mid'
    midi_io.sequence_proto_to_midi_file(mel.to_sequence(), path_out_mel_test)
Esempio n. 8
0
def run(argv, config_map, data_fn):
    """Create transcriptions."""
    tf.logging.set_verbosity(FLAGS.log)

    config = config_map[FLAGS.config]
    hparams = config.hparams
    hparams.parse(FLAGS.hparams)
    hparams.batch_size = 1
    hparams.truncated_length_secs = 0

    with tf.Graph().as_default():
        examples = tf.placeholder(tf.string, [None])

        dataset = data_fn(examples=examples,
                          preprocess_examples=True,
                          params=hparams,
                          is_training=False,
                          shuffle_examples=False,
                          skip_n_initial_records=0)

        estimator = train_util.create_estimator(
            config.model_fn, os.path.expanduser(FLAGS.model_dir), hparams)

        iterator = tf.data.make_initializable_iterator(dataset)
        next_record = iterator.get_next()

        with tf.Session() as sess:
            sess.run([
                tf.initializers.global_variables(),
                tf.initializers.local_variables()
            ])

            for filename in argv[1:]:
                tf.logging.info('Starting transcription for %s...', filename)

                # The reason we bounce between two Dataset objects is so we can use
                # the data processing functionality in data.py without having to
                # construct all the Example protos in memory ahead of time or create
                # a temporary tfrecord file.
                tf.logging.info('Processing file...')
                sess.run(
                    iterator.initializer, {
                        examples: [
                            create_example(filename, hparams.sample_rate,
                                           FLAGS.load_audio_with_librosa)
                        ]
                    })

                def transcription_data(params):
                    del params
                    return tf.data.Dataset.from_tensors(sess.run(next_record))

                input_fn = infer_util.labels_to_features_wrapper(
                    transcription_data)

                tf.logging.info('Running inference...')
                checkpoint_path = None
                if FLAGS.checkpoint_path:
                    checkpoint_path = os.path.expanduser(FLAGS.checkpoint_path)
                prediction_list = list(
                    estimator.predict(input_fn,
                                      checkpoint_path=checkpoint_path,
                                      yield_single_examples=False))
                assert len(prediction_list) == 1

                sequence_prediction = music_pb2.NoteSequence.FromString(
                    prediction_list[0]['sequence_predictions'][0])

                midi_filename = filename + FLAGS.transcribed_file_suffix + '.midi'
                midi_io.sequence_proto_to_midi_file(sequence_prediction,
                                                    midi_filename)

                tf.logging.info('Transcription written to %s.', midi_filename)
Esempio n. 9
0
                                          ns=music_pb2.NoteSequence(),
                                          example_id='accompaniment.wav',
                                          min_length=0,
                                          max_length=-1,
                                          allow_empty_notesequence=True))

to_process.append(example_list[0].SerializeToString())
sess = tf.Session()

sess.run(
    [tf.initializers.global_variables(),
     tf.initializers.local_variables()])

sess.run(iterator.initializer, {examples: to_process})

input_fn = infer_util.labels_to_features_wrapper(transcription_data)
"""# Inference

Run the following cell to transcribe the files you uploaded. Each time it runs it will transcribe one of the uploaded files.
"""
#@title Run inference
prediction_list = list(estimator.predict(input_fn,
                                         yield_single_examples=False))
assert len(prediction_list) == 1
sequence_prediction = note_seq.NoteSequence.FromString(
    prediction_list[0]['sequence_predictions'][0])

#@title Download MIDI
midi_filename = ('transcription.mid')
midi_io.sequence_proto_to_midi_file(sequence_prediction, midi_filename)
Esempio n. 10
0
print("Create...")
sequences = load_midi_to_seq(midi_dir, name_instrument_map, recursive=False)

#%%
if len(sequences) == 0:
    raise Exception(f'No midi files loaded')

print("Tokenize...")
t = TokenizerMonophonic(split_in_bar_chunks=8, min_note=60, max_note=72)
t.add_songs(sequences, instruments)

print("write to disk...")

songs = list(zip(t.song_parts_lead, t.song_parts_accomp))

for i, mel_bass in enumerate(songs):
    inputs_one_hot, label_not_used = t.encoder_decoder.encode(
        mel_bass[0])  # MELODY
    inputs_one_hot_not_used, targets_idx = t.encoder_decoder.encode(
        mel_bass[1])  # BASS

    print(f'#encodings melody: {len(inputs_one_hot)}')
    print(f'#encodings bass: {len(targets_idx)}\n')

    path_out_mel_test = f'{midi_dir_out}mel_split_silence_{i}.mid'
    path_out_bass_test = f'{midi_dir_out}bass_split_silence_{i}.mid'
    midi_io.sequence_proto_to_midi_file(mel_bass[0].to_sequence(),
                                        path_out_mel_test)
    midi_io.sequence_proto_to_midi_file(mel_bass[1].to_sequence(),
                                        path_out_bass_test)
Esempio n. 11
0
 def events_to_melody(events, path_out=None):
     seq = note_seq.Melody(events)
     if path_out is not None:
         midi_io.sequence_proto_to_midi_file(seq.to_sequence(), path_out)
     return seq
Esempio n. 12
0
def model_inference(model_fn,
                    model_dir,
                    checkpoint_path,
                    data_fn,
                    hparams,
                    examples_path,
                    output_dir,
                    summary_writer,
                    master,
                    preprocess_examples,
                    shuffle_examples):
  """Runs inference for the given examples."""
  tf.logging.info('model_dir=%s', model_dir)
  tf.logging.info('checkpoint_path=%s', checkpoint_path)
  tf.logging.info('examples_path=%s', examples_path)
  tf.logging.info('output_dir=%s', output_dir)

  estimator = train_util.create_estimator(
      model_fn, model_dir, hparams, master=master)

  transcription_data = functools.partial(
      data_fn, examples=examples_path, preprocess_examples=preprocess_examples,
      is_training=False, shuffle_examples=shuffle_examples,
      skip_n_initial_records=0)

  input_fn = infer_util.labels_to_features_wrapper(transcription_data)

  start_time = time.time()
  infer_times = []
  num_frames = []

  file_num = 0

  all_metrics = collections.defaultdict(list)

  for predictions in estimator.predict(
      input_fn, checkpoint_path=checkpoint_path, yield_single_examples=False):

    # Remove batch dimension for convenience.
    for k in predictions.keys():
      if predictions[k].shape[0] != 1:
        raise ValueError(
            'All predictions must have batch size 1, but shape of '
            '{} was: {}'.format(k, + predictions[k].shape[0]))
      predictions[k] = predictions[k][0]

    end_time = time.time()
    infer_time = end_time - start_time
    infer_times.append(infer_time)
    num_frames.append(predictions['frame_predictions'].shape[0])
    tf.logging.info(
        'Infer time %f, frames %d, frames/sec %f, running average %f',
        infer_time, num_frames[-1], num_frames[-1] / infer_time,
        np.sum(num_frames) / np.sum(infer_times))

    tf.logging.info('Scoring sequence %s', predictions['sequence_ids'])

    sequence_prediction = music_pb2.NoteSequence.FromString(
        predictions['sequence_predictions'])
    sequence_label = music_pb2.NoteSequence.FromString(
        predictions['sequence_labels'])

    # Make filenames UNIX-friendly.
    filename_chars = six.ensure_text(predictions['sequence_ids'], 'utf-8')
    filename_chars = [c if c.isalnum() else '_' for c in filename_chars]
    filename_safe = ''.join(filename_chars).rstrip()
    filename_safe = '{:04d}_{}'.format(file_num, filename_safe[:200])
    file_num += 1
    output_file = os.path.join(output_dir, filename_safe + '.mid')
    tf.logging.info('Writing inferred midi file to %s', output_file)
    midi_io.sequence_proto_to_midi_file(sequence_prediction, output_file)

    label_output_file = os.path.join(output_dir, filename_safe + '_label.mid')
    tf.logging.info('Writing label midi file to %s', label_output_file)
    midi_io.sequence_proto_to_midi_file(sequence_label, label_output_file)

    # Also write a pianoroll showing acoustic model output vs labels.
    pianoroll_output_file = os.path.join(
        output_dir, filename_safe + '_pianoroll.png')
    tf.logging.info('Writing acoustic logit/label file to %s',
                    pianoroll_output_file)
    # Calculate frames based on the sequence. Includes any postprocessing done
    # to turn raw onsets/frames predictions into the final sequence.
    # TODO(fjord): This work is duplicated in metrics.py.
    sequence_frame_predictions = sequences_lib.sequence_to_pianoroll(
        sequence_prediction,
        frames_per_second=data.hparams_frames_per_second(hparams),
        min_pitch=constants.MIN_MIDI_PITCH,
        max_pitch=constants.MAX_MIDI_PITCH).active
    with tf.gfile.GFile(pianoroll_output_file, mode='w') as f:
      imageio.imwrite(
          f,
          infer_util.posterior_pianoroll_image(
              predictions['onset_probs'],
              predictions['onset_labels'],
              predictions['frame_probs'],
              predictions['frame_labels'],
              sequence_frame_predictions),
          format='png')

    # Update histogram and current scalar for metrics.
    with tf.Graph().as_default(), tf.Session().as_default():
      for k, v in predictions.items():
        if not k.startswith('metrics/'):
          continue
        all_metrics[k].extend(v)
        histogram_name = k + '_histogram'
        metric_summary = tf.summary.histogram(histogram_name, all_metrics[k])
        summary_writer.add_summary(metric_summary.eval(), global_step=file_num)
        scalar_name = k
        metric_summary = tf.summary.scalar(scalar_name, np.mean(all_metrics[k]))
        summary_writer.add_summary(metric_summary.eval(), global_step=file_num)
      summary_writer.flush()

    start_time = time.time()

  # Write final mean values for all metrics.
  with tf.Graph().as_default(), tf.Session().as_default():
    for k, v in all_metrics.items():
      final_scalar_name = 'final/' + k
      metric_summary = tf.summary.scalar(
          final_scalar_name, np.mean(all_metrics[k]))
      summary_writer.add_summary(metric_summary.eval())
    summary_writer.flush()
steps_per_quarter = 4  # default, resulting in 16th note quantization

note_seq_raw = midi_io.midi_file_to_note_sequence(input_file)
note_seq_quan = note_seq.quantize_note_sequence(note_seq_raw,
                                                steps_per_quarter)
extracted_seqs, stats = polyphony_lib.extract_polyphonic_sequences(
    note_seq_quan)

assert (len(extracted_seqs <= 1)
        )  # docs states that only one poly list are extracted
poly_seq = extracted_seqs[0]

print(poly_seq)

seq1 = poly_seq.to_sequence()  #qpm=60.0
midi_io.sequence_proto_to_midi_file(seq1, out_file)

poly_encoder = encoder_decoder.OneHotEventSequenceEncoderDecoder(
    polyphony_encoder_decoder.PolyphonyOneHotEncoding())

if len(note_seq_raw.key_signatures) > 1:
    print(
        "WARNING: more than one key signatures were found - only the first signature is used."
    )
original_key = note_seq_raw.key_signatures[0].key
transpose_interval = transpose_to_key - original_key

# PolyphonicSequence doesn't have a transpose function (like Music for monohonic)
for e in poly_seq:
    if e.pitch != None:
        e.pitch = e.pitch + transpose_interval
Esempio n. 14
0
        type=str,
        help="file path of the output file (excluding the .mid extention)")
    parser.add_argument("--fps",
                        dest="show_fps",
                        action="store_const",
                        const=True,
                        default=False)
    args = parser.parse_args()

    model = OnsetsFrames()
    from time import perf_counter
    if args.outfile is not None:
        s = perf_counter()
        pred = model.predict(f"{args.infile}.wav")
        print(f"Prediction took {(perf_counter() - s)*1000 :.1f}ms.")
        midi_io.sequence_proto_to_midi_file(pred, f"{args.outfile}.mid")
    else:
        s = perf_counter()
        pred = model.predict(f"{args.infile}.wav")
        print(f"Prediction took {(perf_counter() - s)*1000 :.1f}ms.")
        from render import Seq
        from cv2 import cv2
        s = Seq(pred)
        while 1:
            if args.show_fps:
                t = perf_counter()
            frame = next(s)
            if args.show_fps:
                print(f"Live: {1/(perf_counter()-t):.1f}fps")
            cv2.imshow('frame', np.flip(frame, axis=0))
            k = cv2.waitKey(25)