예제 #1
0
 def play_midi(self, filename, track=1, transpose=6):
     midi = MidiFile(filename)
     tune = midi.read_track(track)
     self.play_tune(midi.tempo,
                    tune,
                    transpose=transpose,
                    name=filename)
예제 #2
0
 def test_03_read_track(self):
     m = MidiFile(self.sample_file)
     notes = m.read_track(1)
     for n, expected in zip(notes[:4], (('r', 0.4343891402714933),
                                        ('c2', 1.116279069767441),
                                        ('r', 48.00000000000171),
                                        ('f1', 1.499999999999998))):
         self.assertEqual(n[0], expected[0])
         self.assertTrue(isclose(n[1], n[1]))
예제 #3
0
 def test_02_init(self):
     m = MidiFile(self.sample_file)
예제 #4
0
 def test_01_init_no_file(self):
     with self.assertRaises(OSError):
         m = MidiFile('nonexist')
예제 #5
0
def preprocess(filename, timidity, latency, truncate, pad=1, get_raw=False):
    """
    Preprocess an audio file ands its MIDI counterpart. Computes transforms and labels.
    :param filename: audio filename
    :param timidity: set to True if the files was rendered with timidity
    :param latency: in seconds
    :param truncate: in seconds (0 for no truncation)
    :param pad: in seconds, will be added at the start and end before spectral transforms
    :param get_raw: set to True to return raw computed spectrograms (e.g. for visualization)
    :return:
    """
    filename_midi = filename.rsplit('.')[0] + '.mid'

    dname = filename.replace('/', '_').replace('\\', '_')

    # Load files
    ipad = int(pad * 44100)
    audio_pad = (ipad, ipad
                 )  # add one blank second at the beginning and at the end
    if truncate > 0:
        audio = AudioFile(filename,
                          truncate=int(truncate * 44100),
                          pad=audio_pad)
    else:
        audio = AudioFile(filename, pad=audio_pad)
    mid = MidiFile(filename_midi)

    step = 0.02  # seconds
    latency = int(round(latency / step, 0))

    # Compute spectrograms
    spectrograms = ComputeSpectrograms(audio, step=step)

    # Compute filtered spectrograms
    melgrams = ComputeMelLayers(spectrograms, step, audio.Fs, latency)

    # Build the input tensor
    cnn_window = 15
    tensor_mel = BuildTensor(melgrams[:, 2], cnn_window)

    # Compute CQT
    FreqAxisLog, time, cqgram = ComputeCqt(audio,
                                           200.,
                                           4000.,
                                           step,
                                           latency,
                                           r=3)
    tensor_cqt = BuildTensor([
        cqgram,
    ], cnn_window)

    # Global data length
    max_len = min(tensor_mel.shape[0], tensor_cqt.shape[0])

    # Compute output labels
    notes = mid.getNotes(timidity)
    notes_onset = np.array(notes)[:, 0]  # get only the note timing
    notes_value = np.array(notes, dtype=np.int)[:, 1]  # get only the key value

    onset_labels = np.zeros(max_len)
    onset_caracs = np.zeros((max_len, 5))
    onset_caracs[:, 2] = np.arange(max_len)

    note_low = 21  # lowest midi note on a keyboard
    note_high = 108  # highest midi note on a keyboard

    notes_labels = np.zeros((max_len, note_high - note_low + 1))
    notes_caracs = np.zeros((max_len, note_high - note_low + 1))

    for i in range(len(notes_onset)):
        t_win = int(np.floor(
            (notes_onset[i] + audio_pad[0] / audio.Fs) / step))
        if t_win >= len(onset_labels):
            break
        if t_win >= 0:
            onset_labels[t_win] = 1
            onset_caracs[t_win][0] += 1  # nb_notes
            onset_caracs[t_win][1] = max(onset_caracs[t_win][1],
                                         notes[i][2])  # max volume
            if t_win + 1 < len(onset_labels):
                onset_caracs[t_win + 1:, 2] -= onset_caracs[t_win + 1][
                    2]  # nb of blank windows since the last onset

            n = notes_value[i] - note_low
            notes_labels[t_win][n] = 1
            notes_caracs[t_win][n] = notes[i][2]  # volume

    counter = 0
    for i in range(len(onset_labels) - 1, -1, -1):
        onset_caracs[i][3] = counter
        if onset_labels[i] == 1:
            counter = 0
        else:
            counter += 1
    onset_caracs[:, 4] = np.minimum(onset_caracs[:, 2], onset_caracs[:, 3])

    # Extract useful CQT
    select = [i for i in range(max_len) if onset_labels[i] > 0]
    tensor_cqt_select = np.take(tensor_cqt, select, axis=0)
    notes_labels_select = np.take(notes_labels, select, axis=0)
    notes_caracs_select = np.take(notes_caracs, select, axis=0)

    if not get_raw:
        return (tensor_mel[:max_len, ...], tensor_cqt_select, onset_labels,
                onset_caracs, notes_labels_select, notes_caracs_select, dname)
    else:
        return (melgrams, tensor_mel, onset_labels, cqgram, tensor_cqt, time,
                FreqAxisLog, max_len, step)