Beispiel #1
0
def gettempoessentia(f):
    x, fs = librosa.load(f)

    avg_bpm, beat_start, confidence, tempo, beat_duration = ess.RhythmExtractor2013(
        method='multifeature')(x)

    print 'tempo is %i  bpm - essentia\n' % int(avg_bpm)
    print tempo

    return avg_bpm
Beispiel #2
0
 def check_bpm(self) -> float:
     """
     This method runs an analyzer to determine the BPM.
     If the audio is shorter then the setted time margin,
     it is append multiple times with itself
     to make up for the missing data and increase accuracy.
     """
     temp = self.audio
     if len(temp) < self.__margin:
         factor = int(np.round(self.__margin / len(temp)))
         temp = np.tile(temp, factor)
     rhythm_extractor = estd.RhythmExtractor2013()
     self.__bpm = rhythm_extractor(temp)[0]
     return self.__bpm
def algorithm_rhythm_essentia_basic(sound):
    """
    Estimates bpm of given audio file using Zapata14 and Degara12.
    * Zapata14: Jose R Zapata, Matthew E P Davies, and Emilia Gomez. Multi-Feature Beat Tracking. IEEE/ACM
    Transactions on Audio, Speech, and Language Processing, 22(4):816-825, 2014.
    * Degara12: Norberto Degara,Enrique Argones Rua,Antonio Pena, Soledad Torres-Guijarro, Matthew EP Davies, and Mark D
    Plumbley. Reliability-Informed Beat Track- ing of Musical Signals. IEEE Transactions on Audio, Speech, and
    Language Processing, 20(1):290-301, 2012.
    :param sound: sound dictionary from dataset
    :return: dictionary with results per different methods
    """
    results = dict()
    audio = load_audio_file(file_path=sound[SOUND_FILE_KEY], sample_rate=44100)

    # Method RhythmExtractor2013 - multifeature
    rhythm_extractor_2013 = estd.RhythmExtractor2013()
    bpm, ticks, confidence, _, bpm_intervals = rhythm_extractor_2013(audio)
    results['Zapata14'] = {'bpm': bpm, 'confidence': float(confidence)}

    # Method RhythmExtractor2013 - degara
    rhythm_extractor_2013 = estd.RhythmExtractor2013(method='degara')
    bpm, ticks, confidence, _, bpm_intervals = rhythm_extractor_2013(audio)
    results['Degara12'] = {'bpm': bpm}
    return results
    def _essentia(path: str, params=None) -> float:
        """Using essentia to calculate the bpm of a given files.

        This function has been copied from the essentia examples here:
            https://essentia.upf.edu/essentia_python_examples.html
        """

        info = pyo.sndinfo(path)

        audio = es.MonoLoader(filename=path, sampleRate=info[2])()

        # Compute beat positions and BPM
        rhythm_extractor = es.RhythmExtractor2013(method="multifeature")
        bpm, beats, beats_confidence, _, beats_intervals = rhythm_extractor(
            audio)

        return bpm
def extract_beats(song_names, song_name2wav, bps):
    """
    Finds and returns indices of beat-like events in the signal. Generally
    this would correspond to the rhythm you feel when listening to a song,
    but could be slightly different.
    """

    s_beats = []

    for i, s_name in enumerate(song_names):
        s_wav = song_name2wav[s_name]
        s_bpm, s_bs, s_conf, s_tempo, s_beat_dur = \
                ess.RhythmExtractor2013(method='multifeature')(s_wav)

        # Use every two beats instead of just one
        s_bs = s_bs[::bps]
        s_bs = (s_bs*TARGET_FS).astype(int)
        s_beats.append(s_bs)

    return s_beats
def annotate_song(filepath):

    audio = load_audio(filepath)
    key, scale, key_strength = es.KeyExtractor(profileType='edma')(audio)
    key_list = ['A', 'B', 'C', 'D', 'E', 'F', 'G']

    # transforming bemol to sharp
    if len(key) == 2 and key[1] == 'b':
        cur_idx = key_list.index(key[0])
        new_idx = (cur_idx - 1) % len(key_list)
        new_key = key_list[new_idx] + '#'
        key = new_key

    # for calculating bpm, its proven that RhythmExtractor2013 works best but takes longer
    rhythm_desc = es.RhythmExtractor2013()(audio)
    bpm = round(rhythm_desc[0])
    del audio
    return {'bpm': bpm,
            'key': key,
            'key_scale': scale,
            'key_strength': key_strength,}
Beispiel #7
0
robot.stow()

filename = "./audios/drum_60.wav"

# features, _ = es.MusicExtractor(lowlevelStats=['mean', 'stdev'],
#                                               rhythmStats=['mean', 'stdev'],
#                                               tonalStats=['mean', 'stdev'])(filename)

# tempo = np.round(features['rhythm.bpm'])
# print(tempo)
#
# t= 60.0/tempo * 2

audio = es.MonoLoader(filename= filename)()
rhythm_extractor = es.RhythmExtractor2013(method="multifeature")
bpm, beats, _, _, _ = rhythm_extractor(audio)

# interonset = np.ediff1d(beats)
# interonset = np.add.reduceat(interonset, np.arange(0, len(interonset), 2))
# interonset = np.round(interonset, decimals=3)

tempo = np.round(bpm)
t = 60/tempo * 2
t = np.round(t, decimals=2)

xrotate=3.14
xtilt=0.5
xpan=1
xwrist=1.5
#start
 def __get_bpm__(self):
     e_rhythmextractor2013 = e.RhythmExtractor2013(maxTempo=120, minTempo=40)
     bpm, ticks, confidence, estimates, bpmintervals = e_rhythmextractor2013(self.signal)
     #print("bpm:", bpm)
     assert isinstance(bpm, object)
     self.bpm = bpm
Beispiel #9
0
def get_beats(filepath):
    """Gets beat locations by sample number, as well as global BPM"""
    audio = es.MonoLoader(filename=filepath, sampleRate=sample_rate)()
    return es.RhythmExtractor2013(method='multifeature')(audio)
Beispiel #10
0
def compute_features(path, f_mfcc_kl, f_mfcc_euclid, f_notes, f_chroma, f_bh):
    gc.enable()
    # Loading audio file
    #will resample if sampleRate is different!
    try:
        audio = es.MonoLoader(filename=path, sampleRate=fs)()
    except:
        print("Erroneos File detected by essentia standard: skipping!")
        #return bpm, histogram, key, scale, notes, chroma_matrix, mean, cov, var, cov_kl
        return 0, [], 0, 0, [], [], [], [], [], []
    #will resample if sampleRate is different!
    try:
        loader = ess.MonoLoader(filename=path, sampleRate=44100)
    except:
        print("Erroneos File detected by essentia streaming: skipping!")
        #return bpm, histogram, key, scale, notes, chroma_matrix, mean, cov, var, cov_kl
        return 0, [], 0, 0, [], [], [], [], [], []
    #Initialize algorithms we will use
    frameSize = 4096  #512
    hopSize = 2048  #256
    #######################################
    # DO FILTERING ONLY FOR MFCC - not with essentia standard
    # below is just an example
    #HP = es.HighPass(cutoffFrequency=128)
    #LP = es.LowPass(cutoffFrequency=4096)
    #lp_f = LP(audio)
    #hp_f = HP(lp_f)
    #audio = hp_f
    #MonoWriter(filename='music/filtered.wav')(filtered_audio)
    HP = ess.HighPass(cutoffFrequency=128)
    LP = ess.LowPass(cutoffFrequency=4096)
    #loader = ess.MonoLoader(filename=path, sampleRate=44100)
    #writer = ess.MonoWriter(filename='music/filtered.wav')
    #frameCutter = FrameCutter(frameSize = 1024, hopSize = 512)
    #pool = essentia.Pool()
    # Connect streaming algorithms
    #loader.audio >> HP.signal
    #HP.signal >> LP.signal
    #LP.signal >> writer.audio
    # Run streaming network
    #essentia.run(loader)
    bpm = 0
    histogram = 0
    key = 0
    scale = 0
    notes = 0
    chroma_matrix = 0
    mean = 0
    cov = 0
    var = 0
    cov_kl = 0
    #####################################
    # extract mfcc
    #####################################
    if f_mfcc_kl == 1 or f_mfcc_euclid == 1:
        #features, features_frames = es.MusicExtractor(analysisSampleRate=44100, mfccStats=['mean', 'cov'])(path)
        #m, n = features['lowlevel.mfcc.cov'].shape
        #print m
        #iu1 = np.triu_indices(m)
        #cov = features['lowlevel.mfcc.cov'][iu1]
        #mean = features['lowlevel.mfcc.mean']
        #print(features['lowlevel.mfcc.cov'])
        hamming_window = es.Windowing(type='hamming')
        spectrum = es.Spectrum()  # we just want the magnitude spectrum
        mfcc = es.MFCC(numberCoefficients=13)
        frame_sz = 2048  #512
        hop_sz = 1024  #256
        mfccs = np.array([
            mfcc(spectrum(hamming_window(frame)))[1] for frame in
            es.FrameGenerator(audio, frameSize=frame_sz, hopSize=hop_sz)
        ])
        #Let's scale the MFCCs such that each coefficient dimension has zero mean and unit variance:
        #mfccs = sklearn.preprocessing.scale(mfccs)
        #print mfccs.shape
        mean = np.mean(mfccs.T, axis=1)
        #print(mean)
        var = np.var(mfccs.T, axis=1)
        #print(var)
        cov = np.cov(mfccs.T)
        cov_kl = cov  #.flatten()
        #get only upper triangular matrix values to shorten length
        iu1 = np.triu_indices(13)
        cov = cov[iu1]
        #plt.imshow(mfccs.T, origin='lower', aspect='auto', interpolation='nearest')
        #plt.ylabel('MFCC Coefficient Index')
        #plt.xlabel('Frame Index')
        #plt.colorbar()
    #####################################
    # extract beat features and histogram
    #####################################
    if f_bh == 1 or f_chroma == 1 or f_notes == 1:
        # Compute beat positions and BPM
        rhythm_extractor = es.RhythmExtractor2013(method="multifeature")
        bpm, beats, beats_confidence, _, beats_intervals = rhythm_extractor(
            audio)
        if f_bh == 1:
            peak1_bpm, peak1_weight, peak1_spread, peak2_bpm, peak2_weight, peak2_spread, histogram = es.BpmHistogramDescriptors(
            )(beats_intervals)
        tempo = bpm
        times = beats
        beats_frames = (beats * fs) / hopSize
        beats_frames = beats_frames.astype(int)

        #fig, ax = plt.subplots()
        #ax.bar(range(len(histogram)), histogram, width=1)
        #ax.set_xlabel('BPM')
        #ax.set_ylabel('Frequency')
        #plt.title("BPM histogram")
        #ax.set_xticks([20 * x + 0.5 for x in range(int(len(histogram) / 20))])
        #ax.set_xticklabels([str(20 * x) for x in range(int(len(histogram) / 20))])
        #plt.show()

    #####################################
    # extract full beat aligned chroma
    #####################################

    framecutter = ess.FrameCutter(frameSize=frameSize,
                                  hopSize=hopSize,
                                  silentFrames='noise')
    windowing = ess.Windowing(type='blackmanharris62')
    spectrum = ess.Spectrum()
    spectralpeaks = ess.SpectralPeaks(orderBy='magnitude',
                                      magnitudeThreshold=0.00001,
                                      minFrequency=20,
                                      maxFrequency=3500,
                                      maxPeaks=60)
    # Use default HPCP parameters for plots, however we will need higher resolution
    # and custom parameters for better Key estimation
    hpcp = ess.HPCP()
    hpcp_key = ess.HPCP(
        size=36,  # we will need higher resolution for Key estimation
        referenceFrequency=440,  # assume tuning frequency is 44100.
        bandPreset=False,
        minFrequency=20,
        maxFrequency=3500,
        weightType='cosine',
        nonLinear=False,
        windowSize=1.)
    key = ess.Key(
        profileType='edma',  # Use profile for electronic music
        numHarmonics=4,
        pcpSize=36,
        slope=0.6,
        usePolyphony=True,
        useThreeChords=True)
    # Use pool to store data
    pool = essentia.Pool()
    # Connect streaming algorithms
    ###################################
    # USE FILTER - comment next lines in
    loader.audio >> HP.signal
    HP.signal >> LP.signal
    LP.signal >> framecutter.signal
    ###################################
    ###################################
    # NO FILTER - comment next line in
    #loader.audio >> framecutter.signal
    ###################################
    framecutter.frame >> windowing.frame >> spectrum.frame
    spectrum.spectrum >> spectralpeaks.spectrum
    spectralpeaks.magnitudes >> hpcp.magnitudes
    spectralpeaks.frequencies >> hpcp.frequencies
    spectralpeaks.magnitudes >> hpcp_key.magnitudes
    spectralpeaks.frequencies >> hpcp_key.frequencies
    hpcp_key.hpcp >> key.pcp
    hpcp.hpcp >> (pool, 'tonal.hpcp')
    key.key >> (pool, 'tonal.key_key')
    key.scale >> (pool, 'tonal.key_scale')
    key.strength >> (pool, 'tonal.key_strength')
    # Run streaming network
    essentia.run(loader)
    #print("Estimated key and scale:", pool['tonal.key_key'] + " " + pool['tonal.key_scale'])
    #print(pool['tonal.hpcp'].T)
    chroma = pool['tonal.hpcp'].T
    key = pool['tonal.key_key']
    scale = pool['tonal.key_scale']
    if f_chroma == 1:
        # Plot HPCP
        #imshow(pool['tonal.hpcp'].T, aspect='auto', origin='lower', interpolation='none')
        #plt.title("HPCPs in frames (the 0-th HPCP coefficient corresponds to A)")
        #show()
        #print beats_frames.shape[0]
        chroma_matrix = np.zeros((beats_frames.shape[0], 12))
        prev_beat = 0
        act_beat = 0
        sum_key = np.zeros(12)
        chroma_align = chroma
        chroma_align = chroma_align.transpose()
        mat_index = 0
        for i in beats_frames:
            act_beat = i
            value = sum(
                chroma_align[prev_beat:act_beat]) / (act_beat - prev_beat)
            chroma_align[prev_beat:act_beat] = value
            prev_beat = i
            if np.linalg.norm(value, ord=1) != 0:
                value = value / np.linalg.norm(value, ord=1)
            chroma_matrix[mat_index] = value
            mat_index = mat_index + 1

        #chroma_align = chroma_align.transpose()
        #plt.figure(figsize=(10, 4))
        #librosa.display.specshow(chroma_align, y_axis='chroma', x_axis='time')
        #plt.vlines(times, 0, 12, alpha=0.5, color='r', linestyle='--', label='Beats')
        #plt.colorbar()
        #plt.title('Chromagram')
        #plt.tight_layout()
        #chroma_align = chroma_align.transpose()
    #print(chroma_align[24:28])
    #####################################
    # extract full chroma text
    #####################################
    if f_notes == 1:
        #print(chroma.shape)
        m, n = chroma.shape
        avg = 0
        chroma = chroma.transpose()
        m, n = chroma.shape
        for j in chroma:
            avg = avg + np.sum(j)
        avg = avg / m
        threshold = avg / 2
        for i in chroma:
            if np.sum(i) > threshold:
                ind = np.where(i == np.max(i))
                max_val = i[ind]  #is always 1!
                i[ind] = 0

                ind2 = np.where(i == np.max(i))
                i[ind] = 1

                #if np.any(i[ind2][0] >= 0.8 * max_val):
                #i[ind2] = i[ind2]
                #pass
                #low_values_flags = i < 1
                low_values_flags = i < 0.8

                i[low_values_flags] = 0
            else:
                i.fill(0)
        chroma = chroma.transpose()
        # Compute beat positions and BPM
        prev_beat = 0
        act_beat = 0
        sum_key = np.zeros(12)
        chroma = chroma.transpose()
        for i in beats_frames:
            act_beat = i
            sum_key = sum(chroma[prev_beat:act_beat])
            #print(sum_key)
            #print(chroma[prev_beat:act_beat])

            ind = np.where(sum_key == np.max(sum_key))
            ind = ind[0]
            #print("debug")
            fill = np.zeros(len(j))
            if (np.all(chroma[prev_beat:act_beat] == 0)):
                fill[ind] = 0
            else:
                fill[ind] = 1
            chroma[prev_beat:act_beat] = fill
            #print(chroma[prev_beat:act_beat])
            prev_beat = i
            #print("BEAT")
        notes = []
        for i in notes:
            del i
        prev_beat = 0
        act_beat = 0
        for i in beats_frames:
            act_beat = i
            sum_key = sum(chroma[prev_beat:act_beat])
            ind = np.where(sum_key == np.max(sum_key))
            prev_beat = i
            notes.append(ind[0][0])
            prev_beat = i
        #chroma = chroma.transpose()
        #plt.figure(figsize=(10, 4))
        #librosa.display.specshow(chroma, y_axis='chroma', x_axis='time')
        #plt.vlines(times, 0, 12, alpha=0.5, color='r', linestyle='--', label='Beats')
        #plt.colorbar()
        #plt.title('Chromagram')
        #plt.tight_layout()
        #chroma = chroma.transpose()
    gc.collect()
    return bpm, histogram, key, scale, notes, chroma_matrix, mean, cov, var, cov_kl