Beispiel #1
0
def det(tf):
    # fmt: off
    sp.check_call([
        b"ffmpeg", b"-nostdin", b"-hide_banner", b"-v", b"fatal", b"-ss",
        b"13", b"-y", b"-i",
        fsenc(sys.argv[1]), b"-map", b"0:a:0", b"-ac", b"1", b"-ar", b"22050",
        b"-t", b"300", b"-f", b"f32le",
        fsenc(tf)
    ])
    # fmt: on

    with open(tf, "rb") as f:
        d = np.fromfile(f, dtype=np.float32)
        try:
            # 98% accuracy on jcore
            c = vamp.collect(d, 22050, "beatroot-vamp:beatroot")
            cl = c["list"]
        except:
            # fallback; 73% accuracy
            plug = "vamp-example-plugins:fixedtempo"
            c = vamp.collect(d, 22050, plug, parameters={"maxdflen": 40})
            print(c["list"][0]["label"].split(" ")[0])
            return

        # throws if detection failed:
        bpm = float(cl[-1]["timestamp"] - cl[1]["timestamp"])
        bpm = round(60 * ((len(cl) - 1) / bpm), 2)
        print(f"{bpm:.2f}")
Beispiel #2
0
def process(frames):

    #assert frames == client.blocksize
    #assert (len(l_f) == len(r_f))

    l_f = client.inports[0].get_array()
    r_f = client.inports[1].get_array()

    '''
    l_f = np.frombuffer(l.get_buffer(), dtype=float)
    r_f = np.frombuffer(r.get_buffer(), dtype=float)
    '''

    m_f = np.add(l_f, r_f)
    m_f = np.true_divide(m_f, 2.0)

    try:
        data = vamp.collect(m_f, client.samplerate, "vamp-libxtract:loudness")
        vector = data['vector']
        L = vector[1]

        if L > 0.0:
            data = vamp.collect(m_f, client.samplerate, "pyin:pyin")
            L = data['list']
            print L[1]['values']

    except:
        pass
def analyzeAudio(audio_array, sample_rate, channels):
    print "Found Vamp plugins:"
    for name in vamp.list_plugins():
        print "  " + name

    # convert audio array to float format expected by vamp
    audio_array2 = audio_array / float(numpy.iinfo(numpy.int16).max)
    audio_array2 = audio_array2.reshape((len(audio_array2)/channels,channels))[:,0]

    if False:
        # chuncked
        results = []
        for subarray in numpy.array_split(audio_array2, numpy.ceil(audio_array2.shape[0] / float(BUFFER_SIZE)), axis=0):
            if DEBUG: print "subarray size: " + str(subarray.shape[0])
            #assert(subarray.shape[0] == BUFFER_SIZE)
            results += [vamp.collect(subarray, sample_rate, VAMP_PLUGIN)]
    else:
        # all at once
        results = vamp.collect(audio_array2, sample_rate, VAMP_PLUGIN)
        if DEBUG: 
            print "Vamp plugin output:"
            for result in results["list"][:15]:
                print "  %s" % result

    results = [(float(row["timestamp"]), float(row["duration"]), int(row["values"][0])) for row in results["list"]]

    return results
Beispiel #4
0
def test_process_summary_param():
    buf = input_data(blocksize * 10)
    rdict = vamp.collect(buf, rate, plugin_key, "input-summary", {"produce_output": False})
    assert "vector" in rdict
    step, results = rdict["vector"]
    assert len(results) == 0
    rdict = vamp.collect(buf, rate, plugin_key, "input-summary", {"produce_output": True})
    assert "vector" in rdict
    step, results = rdict["vector"]
    assert len(results) > 0
Beispiel #5
0
def test_process_summary_param():
    buf = input_data(blocksize * 10)
    rdict = vamp.collect(buf, rate, plugin_key, "input-summary",
                         {"produce_output": False})
    assert ("vector" in rdict)
    step, results = rdict["vector"]
    assert len(results) == 0
    rdict = vamp.collect(buf, rate, plugin_key, "input-summary",
                         {"produce_output": True})
    assert ("vector" in rdict)
    step, results = rdict["vector"]
    assert len(results) > 0
Beispiel #6
0
def extract_F0_pYIN_vamp(folder, fn, sr=22050, H=221, N=2048):
    ''' Given an audio file, use the pYIN Vamp Plug-in to extract the F0
        and the confidence of each frame to be voiced.
    '''

    # load wave file
    x, fs = librosa.load(os.path.join(folder, fn), sr=sr)

    # pYIN parameters
    param = {'threshdistr': 2, 'outputunvoiced': 2, 'precisetime': 0}
    '''
    outputs:
    'voicedprob' outputs the probability of each frame to be voiced
    'smoothedpitchtrack' outputs the smoothed pitch track
    parameters:
    'outputunvoiced': 0 (No), 1 (Yes as zeros), 2 (Yes as negative frequencies)
    '''

    pYIN_f0_output = vamp.collect(x,
                                  sr,
                                  "pyin:pyin",
                                  output='smoothedpitchtrack',
                                  parameters=param,
                                  step_size=H,
                                  block_size=N)['vector']

    pYIN_voiced_prob = vamp.collect(x,
                                    sr,
                                    "pyin:pyin",
                                    output='voicedprob',
                                    parameters=param,
                                    step_size=H,
                                    block_size=N)['vector']

    time_step = float(pYIN_f0_output[0])
    F0 = pYIN_f0_output[1]
    voiced_prob = pYIN_voiced_prob[1]
    timestamp = np.arange(start=0,
                          stop=(len(F0) - 0.5) * time_step,
                          step=time_step)  #[:-1]

    traj = np.vstack([timestamp, F0, voiced_prob]).transpose()

    if not os.path.exists(os.path.join(folder, 'pYIN')):
        os.mkdir(os.path.join(folder, 'pYIN'))

    pd.DataFrame(traj).to_csv(os.path.join(folder, 'pYIN', fn[:-3] + 'csv'),
                              header=None)
    #print("{} F0 curve saved to {}".format(fn, os.path.join(folder, 'pYIN', fn[:-3] + 'csv')))

    return traj
Beispiel #7
0
    def load_audio_data_from_url(self, url, audio_path):
        """
        ' 목적 : 음원의 유튜브 url로부터 음원의 코드정보(by vamp plugin)와 박자정보를 추출하는 함수
        ' 리턴값 : 1. 예측된 코드와 코드의 출현시간값 리스트 / 2. 예측된 박자의 시간값 리스트 / 3. 다운로드 받은 오디오 파일명 /  4. vamp 플러그인이 박자를 성공적으로 추출했는지 확인하는 Boolean값
        """

        # 사용자가 앱을 수행하다 중단했을시, 제거되지 않고 남아있는 파일을 제거하여 youtube_dl 오류방지
        files = glob.glob(audio_path + "/*.mp4")
        for x in files:
            if not os.path.isdir(x):
                os.remove(x)

        ydl_opts = {
            'format': 'best',
            'outtmpl': audio_path + '/%(title)s.%(ext)s',
        }
        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])

        files = glob.glob(audio_path + "/*.mp4")
        audio_file_name = ''

        signal, sr = 0, 0
        for x in files:
            if not os.path.isdir(x):
                filename = os.path.splitext(x)
                audio_file_name = filename[0].split(audio_path + '/')[1]
                signal, sr = librosa.load(x)
                os.remove(x)

        # 악보 구조를 생성하는데에 필요한 박자 데이터 추출
        predicted_beat_list = vamp.collect(signal, sr,
                                           "beatroot-vamp:beatroot")
        isEmptyBeatList = False
        if (predicted_beat_list['list'] == []):
            isEmptyBeatList = True

        # 악보를 생성하는데에 필요한 코드 데이터 추출
        predicted_chord_list = vamp.collect(signal, sr, "nnls-chroma:chordino")

        chord_list = []
        for i in predicted_chord_list['list']:
            i['timestamp'] = float(i['timestamp'])
            chord_list.append(i)

        beat_list = []
        for i in predicted_beat_list['list']:
            beat_list.append(float(i['timestamp']))

        return chord_list, beat_list, audio_file_name, isEmptyBeatList
Beispiel #8
0
def FeatureThread():

    global currentTempo, tempoGram, predictTempo
    global chords

    rb = RingBuffer(8)

    while True:

        os.system('jack_capture_ms -d 1000 out.wav > /dev/null 2>&1')
        y, sr = librosa.load('out.wav', sr=client.samplerate)
        onset_env = librosa.onset.onset_strength(y, sr=sr)
        tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr)
        rb.append(int(numpy.around(tempo)))
        tempogram = numpy.array(rb.get())
        unique, counts = numpy.unique(tempogram, return_counts=True)
        max_unique_counts = numpy.asarray((unique, counts)).T
        currentTempo, tempoGram, predictTempo = tempo[0], rb.get(), max_unique_counts[0][0]

        vdata = vamp.collect(y, client.samplerate, "nnls-chroma:chordino")

        idx = 0
        for i in vdata['list']:
            c = vdata['list'][idx]['label']
            if c != 'N':
                chords = c
            idx = idx + 1

        os.system('rm -f out.wav')

        if stopThreads:
            break
def audio_to_pitch_melodia(wav_data,
                           fs=44100,
                           minfqr=55.0,
                           maxfqr=1760.0,
                           voicing=0.2,
                           minpeaksalience=0.0):
    # 用代码调用mtg-melodia时只能使用默认的block=2048,step=128

    params = dict(minfqr=minfqr,
                  maxfqr=maxfqr,
                  voicing=voicing,
                  minpeaksalience=minpeaksalience)
    melody = collect(wav_data, fs, 'mtg-melodia:melodia', parameters=params)

    timestep = melody['vector'][0].to_float()
    pitch = melody['vector'][1]

    # 采样时间的起点是第8个timestep
    starttime = timestep * 8

    # 列表:(时间,频率)
    result = []
    for i, p in enumerate(pitch):
        result.append((starttime + i * timestep, p))

    return result
Beispiel #10
0
def get_hpcp(x, sr, n_bins=12, f_min=55, f_ref=440.0, min_magn=-100):
    """Compute HPCP features from raw audio using the HPCP Vamp plugin.
    Vamp, vamp python module and plug-in must be installed.
    
    Args:
        x (1d-array): audio signal, mono
        sr (int): sample rate
        n_bins (int): number of chroma bins
        f_min (float): minimum frequency
        f_ref (float): A4 tuning frequency
        min_magn (float): minimum magnitude for peak detection, in dB
        
    Returns:
        1d-array: time vector
        2d-array: HPCP features
    """

    plugin = 'vamp-hpcp-mtg:MTG-HPCP'
    params = {'LF': f_min, 'nbins': n_bins, 'reff0': f_ref,
              'peakMagThreshold': min_magn}
    
    data = vamp.collect(x, sr, plugin, parameters=params)
    vamp_hop, hpcp = data['matrix']
    
    t = float(vamp_hop) * (8 + np.arange(len(hpcp)))
    
    return t, hpcp
Beispiel #11
0
def chordBeats(infile, outfile):
    print 'Loading audio file...', infile
    audio = essentia.standard.MonoLoader(filename=infile)()
    bt = BeatTrackerMultiFeature()
    beats, _ = bt(audio)
    #beats = beats[::4]

    parameters = {}
    stepsize, chroma = vamp.collect(audio,
                                    44100,
                                    "nnls-chroma:nnls-chroma",
                                    output="chroma",
                                    step_size=2048)["matrix"]
    # to essentia convention
    #chroma = np.roll(chroma, 3, 1)

    chords = ChordsDetectionBeats(hopSize=2048)
    syms, strengths = chords(chroma, beats)

    segments = essentia_chord_utils.toMirexLab(0.0,
                                               len(audio) / 44100.0, beats,
                                               syms, strengths)
    with open(outfile, 'w') as content_file:
        for s in segments:
            content_file.write(str(s) + '\n')
Beispiel #12
0
    def extract_melody(self):
        """
        Extracts melody from the audio using the melodia vamp plugin. Uses arguments kept 
        in self:
        
        - `self.minimum_frequency` (default: 55 Hz)
        - `self.maximum_frequency` (default: 1760 Hz)
        - `self.voicing_tolerance` (default: 0.2)
        - `self.minimum_peak_salience` (default: 0.0)

        This function sets two class members used in other parts:

        - `self.melody`: (numpy array) contains the melody in Hz for every timestep 
          (0 indicates no voice).
        - `self.timestamps`: (numpy array) contains the timestamps for each melody note
        """

        params = {
            'minfqr': self.minimum_frequency,
            'maxfqr': self.maximum_frequency,
            'voicing': self.voicing_tolerance,
            'minpeaksalience': self.minimum_peak_salience
        }

        data = vamp.collect(self.audio_signal.audio_data,
                            self.sample_rate,
                            "mtg-melodia:melodia",
                            parameters=params)

        _, melody = data['vector']
        hop = 128. / 44100.  # hard coded hop in Melodia vamp plugin, converting it to frames.
        timestamps = 8 * hop + np.arange(len(melody)) * hop
        melody[melody < 0] = 0
        self.melody = melody
        self.timestamps = timestamps
Beispiel #13
0
    def extract_melody(self):
        params = {}
        params['minfqr'] = self.minimum_frequency
        params['maxfqr'] = self.maximum_frequency
        params['voicing'] = self.voicing_tolerance
        params['minpeaksalience'] = self.minimum_peak_salience

        try:
            data = vamp.collect(self.audio_signal.audio_data,
                                self.sample_rate,
                                "mtg-melodia:melodia",
                                parameters=params)
        except Exception as e:
            print(
                '**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~**\n'
                '*          Are Vamp and Melodia installed correctly?          *\n'
                '* Check https://bit.ly/2DXbrAk for installation instructions! *\n'
                '**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~**'
            )
            raise e

        _, melody = data['vector']
        hop = 128. / 44100.  # hard coded hop in Melodia vamp plugin, converting it to frames.
        timestamps = 8 * hop + np.arange(len(melody)) * hop
        melody[melody < 0] = 0
        self.melody = melody
        self.timestamps = timestamps
Beispiel #14
0
    def extract(self, signal):
        """
        Extract Melody of the sound x in numpy array format.
        """
        start_time = time.time()
        if signal.ndim > 1:
            print "INFO: Input signal has more than 1 channel; the channels will be averaged."
            signal = np.mean(signal, axis=1)

        # Remove percussion if separate:
        if self.separate:
            audio, percussion = librosa.effects.hpss(signal)

        #Vamp UPF melody extractor plugin
        data = vamp.collect(audio,
                            self.fs,
                            "mtg-melodia:melodia",
                            parameters=self.params)
        hop, melody = data['vector']

        timestamps = 8 * 128 / self.fs + np.arange(
            len(melody)) * (128 / self.fs)
        # Melodia returns unvoiced (=no melody) sections as negative values.we will put them to 0:
        melody_pos = melody[:]
        melody_pos[melody <= 0] = 0
        # Finally, you might want to plot the pitch sequence in cents rather than in Hz.
        # This especially makes sense if you are comparing two or more pitch sequences
        # to each other (e.g. comparing an estimate against a reference).
        melody_cents = 1200 * np.log2(melody / 55.0)
        melody_cents[melody <= 0] = 0
        print("--- %s seconds --- for melody extraction" %
              (time.time() - start_time))

        return melody_cents, timestamps
Beispiel #15
0
def get_hpcp(x, sr, n_bins=12, f_min=55, f_ref=440.0, min_magn=-100):
    """Compute HPCP features from raw audio using the HPCP Vamp plugin.
    Vamp, vamp python module and plug-in must be installed.
    
    Args:
        x (1d-array): audio signal, mono
        sr (int): sample rate
        n_bins (int): number of chroma bins
        f_min (float): minimum frequency
        f_ref (float): A4 tuning frequency
        min_magn (float): minimum magnitude for peak detection, in dB
        
    Returns:
        1d-array: time vector
        2d-array: HPCP features
    """

    plugin = 'vamp-hpcp-mtg:MTG-HPCP'
    params = {
        'LF': f_min,
        'nbins': n_bins,
        'reff0': f_ref,
        'peakMagThreshold': min_magn
    }

    data = vamp.collect(x, sr, plugin, parameters=params)
    vamp_hop, hpcp = data['matrix']

    t = float(vamp_hop) * (8 + np.arange(len(hpcp)))

    return t, hpcp
Beispiel #16
0
def rawChromaFromAudio(audiofile, sampleRate=44100, stepSize=2048):
    mywindow = np.array([
        0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509,
        0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586,
        0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.999803, 0.999803,
        0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803,
        0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803,
        0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803,
        0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058,
        0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016,
        0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696,
        0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366,
        0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638,
        0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350
    ])
    audio = essentia.standard.MonoLoader(filename=audiofile,
                                         sampleRate=sampleRate)()
    # estimate audio duration just for caching purposes:
    audioDuration(audiofile, sampleRate=sampleRate, audioSamples=audio)

    stepsize, semitones = vamp.collect(audio,
                                       sampleRate,
                                       "nnls-chroma:nnls-chroma",
                                       output="semitonespectrum",
                                       step_size=stepSize)["matrix"]
    chroma = np.zeros((semitones.shape[0], 12))
    for i in range(semitones.shape[0]):
        tones = semitones[i] * mywindow
        cc = chroma[i]
        for j in range(tones.size):
            cc[j % 12] = cc[j % 12] + tones[j]
    # roll from 'A' based to 'C' based
    chroma = np.roll(chroma, shift=-3, axis=1)
    return chroma
Beispiel #17
0
def get_melody(x, sr, f_min=55, f_max=1760, min_salience=0.0, unvoiced=True):
    """Extract main melody from raw audio using the Melodia Vamp plugin.
    Vamp, vamp python module and plug-in must be installed.
    
    Args:
        x (np.array): audio signal, mono
        sr (int): sample rate
        f_min (float): minimum frequency
        f_max (float): maximum frequency
        
    Return:
        1d-array: time vector
        1d-array: main melody (in cents)
    """
    plugin = 'mtg-melodia:melodia'
    params = {'minfqr': f_min, 'maxfqr': f_max,
              'minpeaksalience': min_salience}
    
    data = vamp.collect(x, sr, plugin, parameters=params)
    vamp_hop, f0 = data['vector']
    
    if unvoiced:
        f0 = abs(f0)
        f0[f0 == 0] = None
    else:
        f0[f0 <= 0] = None

    hz2midi = lambda f: 69 + 12 * np.log2(abs(f) / 440)
    
    melody = hz2midi(f0)
    melody = melody[:, np.newaxis]
    
    t = float(vamp_hop) * (8 + np.arange(len(melody)))
    
    return t, melody
Beispiel #18
0
    def melodia(self):
        logging.debug("Generating melody")
        df = self.df
        melodia = vamp.collect(self.y, self.sr, "mtg-melodia:melodia")
        hop, melody = melodia['vector']
        timestamps = 8 * 128 / self.sr + np.arange(
            len(melody)) * (128 / self.sr)
        timestamps /= 2
        melody_pos = melody[:]
        melody_pos[melody <= 0] = None
        df_melodia = pd.DataFrame(np.vstack([timestamps, melody]).T)
        #df_melodia.to_csv('/dataset/test/melodia.csv', index=False)
        df_melodia[1] = df_melodia[1].fillna(-1)
        #df_melodia = df_melodia[df_melodia[1]>0]
        #df_melodia[2] = librosa.hz_to_note(df_melodia[1], octave=False)

        #df_melodia[[0, 0, 2]].to_csv('%s.melody_note_all.txt' % self.file, sep='\t', header=False, index=False)
        tmp_df = pd.merge_asof(df_melodia,
                               self.df_downbeats,
                               left_on=0,
                               right_on='beat_time')
        tmp_df = tmp_df[(tmp_df.beat_time > 0) & (tmp_df[1] > 0)]
        tmp_df['melody_note'] = tmp_df[1].map(self.write_melody_note)
        tmp_df['melody_note_ori'] = librosa.hz_to_note(tmp_df[1], octave=False)

        tmp_df = tmp_df.groupby([
            'beat_time'
        ]).agg(lambda x: scipy.stats.mode(x)[0]).reset_index(drop=False)
        tmp_df = tmp_df[['beat_time', 'melody_note', 'melody_note_ori']]
        self.df_melodia = tmp_df
Beispiel #19
0
def extract_notes_pYIN_vamp(x,
                            Fs=config.fs,
                            H=config.hopsize,
                            N=config.framesize):
    # pYIN parameters
    param = {'threshdistr': 2, 'outputunvoiced': 2, 'precisetime': 0}
    # Options: smoothedpitchtrack, f0candidates, f0probs, voicedprob, candidatesalience, smoothedpitchtrack, notes
    pYIN_note_output = vamp.collect(x,
                                    Fs,
                                    "pyin:pyin",
                                    output='notes',
                                    parameters=param,
                                    step_size=H,
                                    block_size=N)
    # reformating
    traj = np.empty((0, 3))
    for j, entry in enumerate(pYIN_note_output['list']):
        timestamp = float(entry['timestamp'])
        duration = float(entry['duration'])
        note = float(entry['values'][0])
        if j == 0:
            traj = np.vstack((traj, [0, timestamp, 0]))
        elif timestamp != traj[-1][1]:
            traj = np.vstack((traj, [traj[-1][1], timestamp, 0]))

        traj = np.vstack((traj, [timestamp, timestamp + duration, note]))
    return traj
def audio_to_midi_melodia(infile,
                          outfile,
                          bpm,
                          smooth=0.25,
                          minduration=0.1,
                          savejams=False):
    fs = 44100
    hop = 128

    data, sr = librosa.load(infile, sr=fs, mono=True)

    melody = vamp.collect(data,
                          sr,
                          "mtg-melodia:melodia",
                          parameters={"voicing": 0.2})

    pitch = melody['vector'][1]

    pitch = np.insert(pitch, 0, [0] * 8)

    midi_pitch = hz2midi(pitch)

    notes = midi_to_notes(midi_pitch, fs, hop, smooth, minduration)

    save_midi(outfile, notes, bpm)

    if savejams:

        jamsfile = outfile.replace(".mid", ".jams")
        track_duration = len(data) / float(fs)
        save_jams(jamsfile, notes, track_duration, os.path.basename(infile))

    print("Conversion complete.")
Beispiel #21
0
def calculateBeatsAndSemitones(infile):
    print 'Loading audio file...', infile
    proc = BeatTrackingProcessor(fps=100,
                                 method='comb',
                                 min_bpm=40,
                                 max_bpm=240,
                                 act_smooth=0.09,
                                 hist_smooth=7,
                                 alpha=0.79)
    act = RNNBeatProcessor()(infile)
    beats = proc(act).astype('float32')

    audio = essentia.standard.MonoLoader(filename=infile)()
    #bt = BeatTrackerMultiFeature()
    #beats, _ = bt(audio)
    # TODO: best partameters.
    parameters = {}
    #stepsize, chroma = vamp.collect(
    #    audio, 44100, "nnls-chroma:nnls-chroma", output = "chroma", step_size=2048)["matrix"]
    stepsize, semitones = vamp.collect(audio,
                                       44100,
                                       "nnls-chroma:nnls-chroma",
                                       output="semitonespectrum",
                                       step_size=2048)["matrix"]
    return len(audio), beats, semitones
Beispiel #22
0
def extract_melody(filename_wav):
    data, rate = librosa.load(
        os.path.join(utils.get_dirname_audio_warped(), filename_wav))

    data_melody = vamp.collect(data, rate, "mtg-melodia:melodia")

    return data_melody
Beispiel #23
0
def extractRawMelody(filename, position, around):
    data, rate = librosa.load(filename, offset = position - around,\
            duration = 2 * around)
    result = vamp.collect(data, rate, "mtg-melodia:melodia");
    frameDur, melody = result['vector']
    #frameDur = frameDur.to_frame(rate)
    return melody, rate
Beispiel #24
0
    def add_recommend_database(self, url, json_path, audio_path):
        """
        ' 목적 : 음원의 유튜브 url으로부터 추출한, 해당 음원의 코드 정보와 url을 입력받은 json_path에 저장하는 함수 (코드 정보는 vamp 플러그인으로부터 추출)
        ' 리턴값 : 없음
        """

        # 사용자가 앱을 수행하다 중단했을시, 제거되지 않고 남아있는 파일을 제거하여 youtube_dl 오류방지
        files = glob.glob(audio_path + "/*.mp4")
        for x in files:
            if not os.path.isdir(x):
                os.remove(x)

        ydl_opts = {
            'format': 'best',
            'outtmpl': audio_path + '/%(title)s.%(ext)s',
        }
        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])

        files = glob.glob(audio_path + "/*.mp4")
        audio_file_name = ''

        signal, sr = 0, 0
        for x in files:
            if not os.path.isdir(x):
                filename = os.path.splitext(x)
                audio_file_name = filename[0].split(audio_path + '/')[1]
                signal, sr = librosa.load(x)
                os.remove(x)

        # load chord data from vamp plugin
        predicted_chord_list = vamp.collect(signal, sr, "nnls-chroma:chordino")

        # -----------------------추출한 데이터 형식을 다루기 쉽게 가공----------------------- #
        chord_list_with_timestamp = predicted_chord_list["list"]

        chord_list = []
        for i in chord_list_with_timestamp:
            chord_list.append(i['label'])

        double_chord_list_with_url = []

        for idx in range(len(chord_list) - 1):
            if chord_list[idx] != 'N' and chord_list[idx + 1] != 'N':
                double_chord = chord_list[idx] + '-' + chord_list[idx + 1]
                double_chord_list_with_url.append(double_chord)

        double_chord_list_with_url.append(
            double_chord_list_with_url[-1].split('-')[-1] + '-' + url)
        # ---------------------------------------------------------------------------- #

        with open(json_path, 'r', encoding='utf-8') as f:
            chord_list_dict = json.load(f)

        dict_key_music_name = audio_file_name

        if dict_key_music_name not in chord_list_dict:
            chord_list_dict[dict_key_music_name] = double_chord_list_with_url
            with open(json_path, "w", encoding='utf-8') as fp:
                json.dump(chord_list_dict, fp, indent=4, ensure_ascii=False)
Beispiel #25
0
def extract_melody_melodia(audio_path):
    voicing = .6

    # Comments in this function are given by the creator of melodia
    # This is how we load audio using Librosa
    audio_1, sr_1 = librosa.load(audio_path, sr=44100, mono=True)

    # data_1 = vamp.collect(audio_1, sr_1, "mtg-melodia:melodia")

    # vector is a tuple of two values: the hop size used for analysis and the array of pitch values
    # Note that the hop size is *always* equal to 128/44100.0 = 2.9 ms
    # hop_1, melody_1 = data_1['vector']

    # parameter values are specified by providing a dicionary to the optional "parameters" parameter:
    params = {
        "minfqr": 100.0,
        "maxfqr": 1760.0,
        "voicing": voicing,
        "minpeaksalience": 0.0
    }

    data_1 = vamp.collect(audio_1,
                          sr_1,
                          "mtg-melodia:melodia",
                          parameters=params)
    hop_1, melody_1 = data_1['vector']

    # <h3>\*\*\* SUPER IMPORTANT SUPER IMPORTANT \*\*\*</h3>
    # For reasons internal to the vamp architecture, THE TIMESTAMP OF THE FIRST VALUE IN THE MELODY ARRAY IS ALWAYS:
    #
    # ```
    # first_timestamp = 8 * hop = 8 * 128/44100.0 = 0.023219954648526078
    # ```
    #
    # This means that the timestamp of the pitch value at index i (starting with i=0) is given by:
    #
    # ```
    # timestamp[i] = 8 * 128/44100.0 + i * (128/44100.0)
    # ```
    #
    # So, if you want to generate a timestamp array to match the pitch values, you do it like this:

    timestamps_1 = 8 * 128 / 44100.0 + np.arange(
        len(melody_1)) * (128 / 44100.0)

    # Melodia has 4 parameters:
    # * **minfqr**: minimum frequency in Hertz (default 55.0)
    # * **maxfqr**: maximum frequency in Hertz (default 1760.0)
    # * **voicing**: voicing tolerance. Greater values will result in more pitch contours included in the final melody.
    # Smaller values will result in less pitch contours included in the final melody (default 0.2).
    # * **minpeaksalience**: (in Sonic Visualiser "Monophonic Noise Filter") is a hack to avoid silence turning into
    # junk contours when analyzing monophonic recordings (e.g. solo voice with no accompaniment).
    # Generally you want to leave this untouched (default 0.0).

    melody = melody_1.tolist()

    output = {'data': [{'value': melody, 'time': timestamps_1.tolist()}]}

    return output
Beispiel #26
0
def generate_melody(audio_file_path):
    # This is the audio file we'll be analyzing.
    # You can download it here: http://labrosa.ee.columbia.edu/projects/melody/mirex05TrainFiles.zip
    audio_file = audio_file_path

    # This is how we load audio using Librosa
    audio, sr = librosa.load(audio_file,
                             offset=30.0,
                             duration=50.0,
                             sr=44100,
                             mono=True)

    # Exracting the melody using Melodia with default parameter values
    data = vamp.collect(audio, sr, "mtg-melodia:melodia")

    # print(data)

    # vector is a tuple of two values: the hop size used for analysis and the array of pitch values
    # Note that the hop size is *always* equal to 128/44100.0 = 2.9 ms
    hop, melody = data['vector']
    # print(hop)
    # print(melody)

    # timestamps = 8 * 128/44100.0 + np.arange(len(melody)) * (128/44100.0)

    # Extracting the melody using Melodia with custom parameter values
    # parameter values are specified by providing a dicionary to the optional "parameters" parameter:
    # params = {"minfqr": 100.0, "maxfqr": 800.0, "voicing": 0.2, "minpeaksalience": 0.0}

    # data = vamp.collect(audio, sr, "mtg-melodia:melodia", parameters=params)
    # hop, melody = data['vector']

    # Melodia returns unvoiced (=no melody) sections as negative values. So by default, we get:
    # plt.figure(figsize=(18,6))
    # plt.plot(timestamps, melody)
    # plt.xlabel('Time (s)')
    # plt.ylabel('Frequency (Hz)')
    # plt.show()

    # A clearer option is to get rid of the negative values before plotting
    # melody_pos = melody[:]
    # melody_pos[melody<=0] = None
    # plt.figure(figsize=(18,6))
    # plt.plot(timestamps, melody_pos)
    # plt.xlabel('Time (s)')
    # plt.ylabel('Frequency (Hz)')
    # plt.show()

    # Finally, you might want to plot the pitch sequence in cents rather than in Hz.
    # This especially makes sense if you are comparing two or more pitch sequences
    # to each other (e.g. comparing an estimate against a reference).
    # melody_cents = 1200*np.log2(melody/55.0)
    # melody_cents[melody<=0] = None
    # plt.figure(figsize=(18,6))
    # plt.plot(timestamps, melody_cents)
    # plt.xlabel('Time (s)')
    # plt.ylabel('Frequency (cents relative to 55 Hz)')
    # plt.show()
    return melody
Beispiel #27
0
def audio_to_midi_melodia(infile,
                          outfile,
                          bpm,
                          smooth=0.25,
                          minduration=0.1,
                          savejams=False):

    # define analysis parameters
    fs = 44100
    hop = 128

    # load audio using librosa
    print("Loading audio...")
    data, sr = librosa.load(infile, sr=fs, mono=True)

    # extract melody using melodia vamp plugin
    print("Extracting melody f0 with MELODIA...")
    melody = vamp.collect(data,
                          sr,
                          "mtg-melodia:melodia",
                          parameters={"voicing": 0.2})

    # hop = melody['vector'][0]
    pitch = melody['vector'][1]

    # impute missing 0's to compensate for starting timestamp
    pitch = np.insert(pitch, 0, [0] * 8)

    # debug
    # np.asarray(pitch).dump('f0.npy')
    # print(len(pitch))

    # convert f0 to midi notes
    print("Converting Hz to MIDI notes...")
    midi_pitch = hz2midi(pitch)

    # segment sequence into individual midi notes
    notes = midi_to_notes(midi_pitch, fs, hop, smooth, minduration)

    # save note sequence to a midi file
    print("Saving MIDI to disk...")
    save_midi(outfile, notes, bpm)
    '''import matplotlib.pyplot as plt
    notes = np.asarray(notes)
    x = notes[:,0];
    y = np.fmod(notes[:,2],12);
    plt.plot(x, y)
    #plt.xticks(np.arange(0,lim,10))
    #plt.yticks(np.arange(0,12))
    #plt.grid()
    plt.show()'''

    if savejams:
        print("Saving JAMS to disk...")
        jamsfile = outfile.replace(".mid", ".jams")
        track_duration = len(data) / float(fs)
        save_jams(jamsfile, notes, track_duration, os.path.basename(infile))

    print("Conversion complete.")
Beispiel #28
0
def test_collect_fixed_sample_rate_2():
    buf = input_data(blocksize * 10)
    rdict = vamp.collect(buf, rate, plugin_key, "curve-fsr-timed")
    step, results = rdict["vector"]
    assert abs(float(step) - 0.4) < eps
    assert len(results) == 10
    for i in range(len(results)):
        assert abs(results[i] - i * 0.1) < eps
Beispiel #29
0
def test_collect_fixed_sample_rate_2():
    buf = input_data(blocksize * 10)
    rdict = vamp.collect(buf, rate, plugin_key, "curve-fsr-timed")
    step, results = rdict["vector"]
    assert abs(float(step) - 0.4) < eps
    assert len(results) == 10
    for i in range(len(results)):
        assert abs(results[i] - i * 0.1) < eps
Beispiel #30
0
	def get_bars_and_beats_qm(self):
		import vamp
		if self.metre_qm is None:
			barbeattrack_output = vamp.collect(self.signal_mono, self.fs, 'qm-vamp-plugins:qm-barbeattracker')
			beat_times = np.array([float(item['timestamp']) for item in barbeattrack_output['list']])
			beat_metre = np.array([int(item['label']) for item in barbeattrack_output['list']])
			standard_beat_times_metre = np.array(zip(*(beat_times,beat_metre)))
			self.metre_qm = standard_beat_times_metre
Beispiel #31
0
def test_process_summary_param_kwargs_1():
    buf = input_data(blocksize * 10)
    rdict = vamp.collect(
        plugin_key=plugin_key, output="input-summary", parameters={"produce_output": False}, data=buf, sample_rate=rate
    )
    assert "vector" in rdict
    step, results = rdict["vector"]
    assert len(results) == 0
Beispiel #32
0
def convertToMIDI(infile, outfile, bpm=120, smooth=0, minduration=0.1,
                          savejams=False):

    # define analysis parameters
    fs = 44100
    hop = 128

    # load audio using librosa
    print("Loading audio...")
    data, sr = soundfile.read(infile)
    # mixdown to mono if needed
    if len(data.shape) > 1 and data.shape[1] > 1:
        data = data.mean(axis=1)
    # resample to 44100 if needed
    if sr != fs:
        data = resampy.resample(data, sr, fs)
        sr = fs

    # extract melody using melodia vamp plugin
    print("Extracting melody f0 with MELODIA...")
    melody = vamp.collect(data, sr, "mtg-melodia:melodia",
                          parameters={"voicing": 10})

    # hop = melody['vector'][0]
    pitch = melody['vector'][1]

    # impute missing 0's to compensate for starting timestamp
    pitch = np.insert(pitch, 0, [0]*8)

    '''print(pitch)
    stored = 0.0
    for i in pitch:
        if i != stored:
            print(i, end = ' ')
            stored = i'''

    # debug
    # np.asarray(pitch).dump('f0.npy')
    # print(len(pitch))

    # convert f0 to midi notes
    print("Converting Hz to MIDI notes...")
    midi_pitch = hz2midi(pitch)
    stored = 0.0
    for i in midi_pitch:
        if i != stored:
            stored = i

    # segment sequence into individual midi notes
    notes = midi_to_notes(midi_pitch, fs, hop, smooth, minduration)

    # save note sequence to a midi file
    print("Saving MIDI to disk...")
    save_midi(outfile, notes, bpm)
    print("Conversion complete.")


#convertToMIDI('../testSongs/ksg.wav', '../testSongs/ksg.mid', 75, smooth=0, minduration=0.06)
Beispiel #33
0
def test_process_summary_param_kwargs_2():
    buf = input_data(blocksize * 10)
    rdict = vamp.collect(plugin_key=plugin_key,
                         output="input-summary",
                         data=buf,
                         sample_rate=rate)
    assert ("vector" in rdict)
    step, results = rdict["vector"]
    assert len(results) > 0
Beispiel #34
0
def rock(audio):
    jojo = vamp.collect(audio,
                        config.fs,
                        "pyin:pyin",
                        step_size=config.hopsize,
                        output="notes")

    import pdb
    pdb.set_trace()
Beispiel #35
0
def test_collect_grid_one_sample_per_step():
    buf = input_data(blocksize * 10)
    rdict = vamp.collect(buf, rate, plugin_key, "grid-oss")
    step, results = rdict["matrix"]
    assert abs(float(step) - (1024.0 / rate)) < eps
    assert len(results) == 10
    for i in range(len(results)):
        expected = np.array([(j + i + 2.0) / 30.0 for j in range(0, 10)])
        assert (abs(results[i] - expected) < eps).all()
Beispiel #36
0
def test_collect_grid_one_sample_per_step():
    buf = input_data(blocksize * 10)
    rdict = vamp.collect(buf, rate, plugin_key, "grid-oss")
    step, results = rdict["matrix"]
    assert abs(float(step) - (1024.0 / rate)) < eps
    assert len(results) == 10
    for i in range(len(results)):
        expected = np.array([(j + i + 2.0) / 30.0 for j in range(0, 10)])
        assert (abs(results[i] - expected) < eps).all()
def audio_to_midi_melodia(infile,
                          outfile,
                          bpm,
                          smooth=0.25,
                          minduration=0.1,
                          savejams=False):

    # define analysis parameters
    fs = 44100
    hop = 128

    # load audio using librosa
    print("Loading audio...")
    data, sr = soundfile.read(infile)
    # mixdown to mono if needed
    if len(data.shape) > 1 and data.shape[1] > 1:
        data = data.mean(axis=1)
    # resample to 44100 if needed
    if sr != fs:
        data = resampy.resample(data, sr, fs)
        sr = fs

    # extract melody using melodia vamp plugin
    print("Extracting melody f0 with MELODIA...")
    melody = vamp.collect(data,
                          sr,
                          "mtg-melodia:melodia",
                          parameters={"voicing": 0.2})

    # hop = melody['vector'][0]
    pitch = melody['vector'][1]

    # impute missing 0's to compensate for starting timestamp
    pitch = np.insert(pitch, 0, [0] * 8)

    # debug
    # np.asarray(pitch).dump('f0.npy')
    # print(len(pitch))

    # convert f0 to midi notes
    print("Converting Hz to MIDI notes...")
    midi_pitch = hz2midi(pitch)

    # segment sequence into individual midi notes
    notes = midi_to_notes(midi_pitch, fs, hop, smooth, minduration)

    # save note sequence to a midi file
    print("Saving MIDI to disk...")
    save_midi(outfile, notes, bpm)

    if savejams:
        print("Saving JAMS to disk...")
        jamsfile = os.path.splitext(outfile)[0] + ".jams"
        track_duration = len(data) / float(fs)
        save_jams(jamsfile, notes, track_duration, os.path.basename(infile))

    print("Conversion complete.")
Beispiel #38
0
def test_collect_variable_sample_rate():
    buf = input_data(blocksize * 10)
    rdict = vamp.collect(buf, rate, plugin_key, "curve-vsr")
    results = rdict["list"]
    assert len(results) == 10
    i = 0
    for r in results:
        assert r["timestamp"] == vamp.vampyhost.RealTime("seconds", i * 0.75)
        assert abs(r["values"][0] - i * 0.1) < eps
        i = i + 1
Beispiel #39
0
def main():
    fpath = raw_input('Enter full path to audio file: ')
    data, rate = librosa.load(fpath)
    plugin_choice = choose_vamp_plugin()
    # this script is assuming that university of alicante polyphonic transcription is being used
    num_voices = int(raw_input('Enter desired number of voices: '))
    plugin_output = vamp.collect(data, rate, plugin_choice, parameters={'maxpolyphony':num_voices})
    note_start_lst, dur_lst, note_lst = parse_note_transcription_output(plugin_output)
    print 'note_start_lst length: %d, dur_lst length: %d, note_lst length: %d.' % (len(note_start_lst), len(dur_lst), len(note_lst))
    note_df = make_note_dataframe(note_start_lst, dur_lst, note_lst)
    print note_df
    make_reordered_wav_file(data, rate, note_df)
Beispiel #40
0
def test_process_summary_param_kwargs_3():
    buf = input_data(blocksize * 10)
    rdict = vamp.collect(
        plugin_key=plugin_key,
        output="input-summary",
        data=buf,
        sample_rate=rate,
        process_timestamp_method=vamp.vampyhost.SHIFT_DATA,
    )
    assert "vector" in rdict
    step, results = rdict["vector"]
    assert len(results) > 0
Beispiel #41
0
def test_collect_one_sample_per_step():
    buf = input_data(blocksize * 10)
    rdict = vamp.collect(buf, rate, plugin_key, "input-timestamp")
    step, results = rdict["vector"]
    assert abs(float(step) - (1024.0 / rate)) < eps
    assert len(results) == 10
    for i in range(len(results)):
        # The timestamp should be the frame number of the first frame in the
        # input buffer
        expected = i * blocksize
        actual = results[i]
        assert actual == expected
Beispiel #42
0
def test_process_summary_param_kwargs_fail():
    buf = input_data(blocksize * 10)
    try:
        rdict = vamp.collect(
            plugin_key=plugin_key,
            output="input-summary",
            data=buf,
            sample_rate=rate,
            process_timestamp_method=vamp.vampyhost.SHIFT_DATA,
            unknown_argument=1,
        )
    except Exception:  # unknown kwarg
        pass
Beispiel #43
0
    def transcribe(self, audio_path):
        if not os.path.exists(audio_path):
            raise ValueError('Invalid audio path')

        x, fs = load(audio_path, mono=True)

        notes = vamp.collect(x, fs, "qm-vamp-plugins:qm-transcription", output="transcription")['list']
        # access attributes of a note event by:
        # ts: f.timestamp
        # duration: f.duration
        # MIDI notes: f.values

        return notes
def audio_to_midi_melodia(infile, outfile, bpm, smooth=0.25, minduration=0.1,
                          savejams=False):

    # define analysis parameters
    fs = 44100
    hop = 128

    # load audio using librosa
    print("Loading audio...")
    data, sr = librosa.load(infile, sr=fs, mono=True)

    # extract melody using melodia vamp plugin
    print("Extracting melody f0 with MELODIA...")
    melody = vamp.collect(data, sr, "mtg-melodia:melodia",
                          parameters={"voicing": 0.2})

    # hop = melody['vector'][0]
    pitch = melody['vector'][1]

    # impute missing 0's to compensate for starting timestamp
    pitch = np.insert(pitch, 0, [0]*8)

    # debug
    # np.asarray(pitch).dump('f0.npy')
    # print(len(pitch))

    # convert f0 to midi notes
    print("Converting Hz to MIDI notes...")
    midi_pitch = hz2midi(pitch)

    # segment sequence into individual midi notes
    notes = midi_to_notes(midi_pitch, fs, hop, smooth, minduration)

    # save note sequence to a midi file
    print("Saving MIDI to disk...")
    save_midi(outfile, notes, bpm)

    if savejams:
        print("Saving JAMS to disk...")
        jamsfile = outfile.replace(".mid", ".jams")
        track_duration = len(data) / float(fs)
        save_jams(jamsfile, notes, track_duration, os.path.basename(infile))

    print("Conversion complete.")
Beispiel #45
0
def transcription_cal(audio_filename, monoNoteOut_filename):

    sr = 44100
    loader = es.MonoLoader(filename=audio_filename, downmix = 'mix', sampleRate = sr)
    audio = loader()

    # original pyin
    # data = vamp.collect(audio, sr, "pyin:pyin", output = 'notes')

    # modified pyin
    data = vamp.collect(audio, sr, "pyinbobigram:pyin-jingju", output = 'notes')

    # for note in data['list']:
    #     print note['duration'], note['timestamp'], note['values'][0]

    with open(monoNoteOut_filename, 'w') as csvfile:
        csv_writer = csv.writer(csvfile, delimiter=',')
        for note in data['list']:
            csv_writer.writerow([note['timestamp'], note['values'][0], note['duration']])
Beispiel #46
0
def get_pc_data_aubio(y,fs):
    """ Use SLIVET note transcription method to get
    melodic sequence.
    """

    data = vamp.collect(y,fs,'vamp-aubio:aubionotes')
    freqs = [d['values'][0] for d in data['list']]
    midi = librosa.core.hz_to_midi(freqs)
    midi = [int(m) for m in np.round(midi)]

    start_t = [d['timestamp'].to_float() for d in data['list']]
    end_t = start_t[1:]
    end_t.append(y.shape[0]/float(fs))

    # to pitch class representation
    labels = ['pc' + str(coreutils.midi_note_to_pc(n)) for n in midi]
    melody_sequence = coredata.Sequence(labels=labels,\
        start_times=start_t,end_times=end_t)

    return melody_sequence
Beispiel #47
0
def get_midi_data_slivet(y,fs):
    """ Use SLIVET note transcription method to get
    melodic sequence.
    """

    data = vamp.collect(y,fs,'silvet:silvet')
    labels = []
    start_t = []
    end_t = []

    for d in data['list']:
        n = librosa.note_to_midi(d['label'])
        # pc = coreutils.midi_note_to_pc(n)
        # label = 'pc' + str(pc)
        st = d['timestamp'].to_float()
        # et = st + d['duration'].to_float()

        # labels.append(label)
        labels.append(n)
        start_t.append(st)
        # end_t.append(et)

    end_t = start_t[1:]
    # for some reason the last duration gets
    # screwed up
    # st = start_t[-1]
    # et = st + (float(y.shape[0])/float(fs))   
    delta_t = start_t[-1] - start_t[-2]
    et = end_t[-1] + delta_t
    end_t.append(et)

    # to pitch class representation
    labels = ['pc' + str(coreutils.midi_note_to_pc(n)) for n in labels]
    melody_sequence = coredata.Sequence(labels=labels,\
        start_times=start_t,end_times=end_t)

    return melody_sequence
Beispiel #48
0
def test_collect_runs_at_all():
    buf = input_data(blocksize * 10)
    rdict = vamp.collect(buf, rate, plugin_key, "input-timestamp")
    step, results = rdict["vector"]
    assert results != []
#fragment = '../traditional_dataset/allemande/fragments/allemande_first_fragment_nicolet'
#fragment = '../traditional_dataset/allemande/fragments/allemande_third_fragment_rampal'
#fragment = '../traditional_dataset/allemande/fragments/allemande_fourth_fragment_larrieu'
#fragment = '../traditional_dataset/allemande/fragments/allemande_fifth_fragment_preston'

fragment = '../traditional_dataset/sequenza/fragments/sequenza_first_fragment_robison'

audio_file = fragment + '_mono.wav'
gt_file = fragment + '.csv'

audio, sr = librosa.load(audio_file, sr=44100, mono=True)

#%%
# parameter values are specified by providing a dicionary:
params = {"minfqr": 100.0, "maxfqr": 2350.0, "voicing": 0.9, "minpeaksalience": 0.0}
data = vamp.collect(audio, sr, "mtg-melodia:melodia", parameters=params)
hop, melody_melodia = data['vector']

#melody_librosa, magnitudes = librosa.piptrack(audio, sr=sr, hop_length=128)
#print(hop)
#print(melody)

import numpy as np
timestamps = 8 * 128/44100.0 + np.arange(len(melody_melodia)) * (128/44100.0)

melody_hz = copy.deepcopy(melody_melodia)
melody_hz[melody_melodia<=0] = None

#%%
import melosynth as ms
ms.melosynth_pitch(melody_melodia, 'melosynth.wav', fs=44100, nHarmonics=1, square=True, useneg=False)