Beispiel #1
0
    def get_onsets(self, _audio=[]):

        if _audio != []:
            audio = _audio
        else:
            audio = self.audio

        W = es.Windowing(type=self.winType)
        c2p = es.CartesianToPolar()
        fft = es.FFT()
        onsetDetection = es.OnsetDetection(method=self.onsetMethod,
                                           sampleRate=44100)
        onsets = es.Onsets(alpha=.2)
        # onsetIndex = []
        pool = Pool()

        for frame in es.FrameGenerator(audio, frameSize=1024, hopSize=512):
            mag, phase, = c2p(fft(W(frame)))
            onsetDetection.configure(method=self.onsetMethod)
            onsetFunction = onsetDetection(mag, phase)
            pool.add("onsetFunction", onsetFunction)

        DetectedOnsetsArray = onsets([pool["onsetFunction"]], [1])

        return DetectedOnsetsArray
Beispiel #2
0
def get_key(file_in):
    """
    Estimates the key and scale for an audio file.
    """
    loader = streaming.MonoLoader(filename=file_in)
    framecutter = streaming.FrameCutter()
    windowing = streaming.Windowing(type="blackmanharris62")
    spectrum = streaming.Spectrum()
    spectralpeaks = streaming.SpectralPeaks(orderBy="magnitude",
                                            magnitudeThreshold=1e-05,
                                            minFrequency=40,
                                            maxFrequency=5000,
                                            maxPeaks=10000)
    pool = Pool()
    hpcp = streaming.HPCP()
    key = streaming.Key()

    loader.audio >> framecutter.signal
    framecutter.frame >> windowing.frame >> spectrum.frame
    spectrum.spectrum >> spectralpeaks.spectrum
    spectralpeaks.magnitudes >> hpcp.magnitudes
    spectralpeaks.frequencies >> hpcp.frequencies
    hpcp.hpcp >> key.pcp
    key.key >> (pool, 'tonal.key_key')
    key.scale >> (pool, 'tonal.key_scale')
    key.strength >> (pool, 'tonal.key_strength')

    run(loader)

    return Key(pool['tonal.key_key'], pool['tonal.key_scale'])
def compute_harmonic_magnitudes(contour_f0s, fftgram, idx_start, options):
    '''
    Compute for each frame harm amplitude
    get harmonic partials form original spectrum
    
    Params:
    --------------------
    fftgram - fftgram of whole audio file
    times - ts of whole audio
    
 
    hfreq - harmonics  of contour
    magns -  magns of contour
    '''

    run_harm_model_anal = HarmonicModelAnal(nHarmonics=30)

    # TODO: sanity check: times == len(fftgram) and contour_start_time_SAL in times

    pool = Pool()

    for i, contour_f0 in enumerate(contour_f0s):

        if idx_start + i > len(fftgram) - 1:
            sys.exit('idx start is {} while len ffmtgram is {}'.format(
                idx_start, len(fftgram)))
        fft = fftgram[idx_start + i]
        # convert to freq :
        hfreq, magn, phase = run_harm_model_anal(fft, contour_f0)

        pool.add('phases', phase)
        pool.add('hfreqs', hfreq)
        pool.add('magns', magn)

    return pool['hfreqs'], pool['magns'], pool['phases']
Beispiel #4
0
def estimate_main_band(infile):
    """
    Estimate if this is a low, mid, or high track.

    Not _really_ sure if this does what I need it to,
    but some quick tests looked right.
    """
    loader = streaming.MonoLoader(filename=infile)
    framecutter = streaming.FrameCutter()
    windowing = streaming.Windowing(type="blackmanharris62")
    spectrum = streaming.Spectrum()
    freqbands = streaming.FrequencyBands(frequencyBands=[0, 250, 750, 4000])
    pool = Pool()

    loader.audio >> framecutter.signal
    framecutter.frame >> windowing.frame >> spectrum.frame
    spectrum.spectrum >> freqbands.spectrum
    freqbands.bands >> (pool, 'bands')

    run(loader)

    sums = np.sum(pool['bands'], axis=0)
    band = np.argmax(sums)
    if band == 0:
        return 'low'
    elif band == 1:
        return 'mid'
    elif band == 2:
        return 'high'
Beispiel #5
0
def get_embeddings(melspecs: dict[str, np.ndarray], architectures: dict, predictors: dict) -> Optional[dict]:
    data = {}
    for architecture, metadata in architectures.items():
        input_pool = Pool()
        input_pool.set('model/Placeholder', melspecs[metadata['essentia-algorithm']])

        for dataset in metadata['datasets']:
            # TODO: chunk the input melspecs to avoid OOM error
            try:
                output_pool = predictors[f'{dataset}-{architecture}'](input_pool)
            except RuntimeError:
                return None

            for layer, layer_data in metadata['layers'].items():
                embeddings = output_pool[layer_data['name']].squeeze()

                if len(embeddings) == 0:
                    return None

                if len(embeddings.shape) == 1:
                    embeddings = np.expand_dims(embeddings, axis=0)

                data[f'{dataset}-{architecture}-{layer}'] = embeddings

    return data
Beispiel #6
0
def pca(pool, namespace=''):
    llspace = 'lowlevel.'
    if namespace: llspace = namespace + '.lowlevel.'
    sccoeffs = pool[llspace + 'sccoeffs']
    scvalleys = pool[llspace + 'scvalleys']
    numFrames = len(sccoeffs)
    poolSc = Pool()
    merged = essentia.zeros(2 * len(sccoeffs[0]))
    for frame in xrange(numFrames):
        j = 0
        for i in xrange(len(sccoeffs[frame])):
            merged[j] = sccoeffs[frame][i]
            merged[j + 1] = scvalleys[frame][i]
            j += 2
        poolSc.add('contrast', merged)

    poolTransformed = standard.PCA(namespaceIn='contrast',
                                   namespaceOut='contrast')(poolSc)

    contrast = poolTransformed['contrast']

    pool.set(llspace + 'spectral_contrast.mean', mean(contrast, axis=0))
    pool.set(llspace + 'spectral_contrast.var', var(contrast, axis=0))

    pool.remove(llspace + 'sccoeffs')
    pool.remove(llspace + 'scvalleys')
Beispiel #7
0
    def computeBpmHistogram(self,
                            noveltyCurve,
                            frameSize=4,
                            overlap=2,
                            frameRate=44100. / 128.,
                            window='hann',
                            zeroPadding=0,
                            constantTempo=False,
                            minBpm=30):

        pool = Pool()
        bpmHist = ess.BpmHistogram(frameRate=frameRate,
                                   frameSize=frameSize,
                                   overlap=overlap,
                                   zeroPadding=zeroPadding,
                                   constantTempo=constantTempo,
                                   windowType='hann',
                                   minBpm=minBpm)

        gen = ess.VectorInput(noveltyCurve)
        gen.data >> bpmHist.novelty
        bpmHist.bpm >> (pool, 'bpm')
        bpmHist.bpmCandidates >> (pool, 'bpmCandidates')
        bpmHist.bpmMagnitudes >> (pool, 'bpmMagnitudes')
        bpmHist.frameBpms >> None  #(pool, 'frameBpms')
        bpmHist.tempogram >> (pool, 'tempogram')
        bpmHist.ticks >> (pool, 'ticks')
        bpmHist.ticksMagnitude >> (pool, 'ticksMagnitude')
        bpmHist.sinusoid >> (pool, 'sinusoid')
        essentia.run(gen)

        return pool
Beispiel #8
0
def get_bpm(file_in):
    pool = Pool()

    loader = streaming.MonoLoader(filename=file_in)
    bt = streaming.RhythmExtractor2013()
    bpm_histogram = streaming.BpmHistogramDescriptors()
    # BPM histogram output size is 250
    centroid = streaming.Centroid(range=250)

    loader.audio >> bt.signal
    bt.bpm >> (pool, 'bpm')
    bt.ticks >> None
    bt.confidence >> (pool, 'confidence')
    bt.estimates >> None
    bt.bpmIntervals >> bpm_histogram.bpmIntervals
    bpm_histogram.firstPeakBPM >> (pool, 'bpm_first_peak')
    bpm_histogram.firstPeakWeight >> None
    bpm_histogram.firstPeakSpread >> None
    bpm_histogram.secondPeakBPM >> (pool, 'bpm_second_peak')
    bpm_histogram.secondPeakWeight >> None
    bpm_histogram.secondPeakSpread >> None
    bpm_histogram.histogram >> (pool, 'bpm_histogram')
    bpm_histogram.histogram >> centroid.array
    centroid.centroid >> (pool, 'bpm_centroid')

    run(loader)
    return pool['bpm']
Beispiel #9
0
    def _extract_pitch_contours(self, audio):
        # Hann window with x4 zero padding
        run_windowing = estd.Windowing(  # pylint: disable-msg=E1101
            zeroPadding=3 * self.frame_size)
        run_spectrum = estd.Spectrum(  # pylint: disable-msg=E1101
            size=self.frame_size * 4)
        run_spectral_peaks = estd.SpectralPeaks(  # pylint: disable-msg=E1101
            minFrequency=self.min_frequency,
            maxFrequency=self.max_frequency,
            magnitudeThreshold=self.magnitude_threshold,
            sampleRate=self.sample_rate,
            orderBy='magnitude')

        # convert unit to cents, PitchSalienceFunction takes 55 Hz as the
        # default reference
        run_pitch_salience_function = \
            estd.PitchSalienceFunction(  # pylint: disable-msg=E1101
                binResolution=self.bin_resolution)
        run_pitch_salience_function_peaks = \
            estd.PitchSalienceFunctionPeaks(  # pylint: disable-msg=E1101
                binResolution=self.bin_resolution,
                minFrequency=self.min_frequency,
                maxFrequency=self.max_frequency)
        run_pitch_contours = estd.PitchContours(  # pylint: disable-msg=E1101
            hopSize=self.hop_size,
            binResolution=self.bin_resolution,
            peakDistributionThreshold=self.peak_distribution_threshold)

        # compute frame by frame
        pool = Pool()
        for frame in estd.FrameGenerator(
                audio,  # pylint: disable-msg=E1101
                frameSize=self.frame_size,
                hopSize=self.hop_size):
            frame = run_windowing(frame)
            spectrum = run_spectrum(frame)
            peak_frequencies, peak_magnitudes = run_spectral_peaks(spectrum)
            salience = run_pitch_salience_function(peak_frequencies,
                                                   peak_magnitudes)
            salience_peaks_bins, salience_peaks_contour_saliences = \
                run_pitch_salience_function_peaks(salience)
            if not np.size(salience_peaks_bins):
                salience_peaks_bins = np.array([0])
            if not np.size(salience_peaks_contour_saliences):
                salience_peaks_contour_saliences = np.array([0])

            pool.add('allframes_salience_peaks_bins', salience_peaks_bins)
            pool.add('allframes_salience_peaks_contourSaliences',
                     salience_peaks_contour_saliences)

        # post-processing: contour tracking
        contours_bins, contour_saliences, contours_start_times, duration = \
            run_pitch_contours(
                [f.tolist()
                 for f in pool['allframes_salience_peaks_bins']],
                [f.tolist()
                 for f in pool['allframes_salience_peaks_contourSaliences']])
        return contours_bins, contours_start_times, contour_saliences, duration
Beispiel #10
0
def compute_pitch_yin(audio):
    yin = PitchYin()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('pitch_yin', yin(frame)[0])
    return p['pitch_yin']
Beispiel #11
0
def compute_rms(audio):
    rms = RMS()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('rms', rms(frame))
    return p['rms']
Beispiel #12
0
def compute_energy(audio):
    energy = Energy()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('energy', energy(frame))
    return 'True'
Beispiel #13
0
def compute_zcr(audio):
    zcr = ZeroCrossingRate()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('zcr', zcr(frame))
    return p['zcr']
Beispiel #14
0
def compute_power_spectrum(audio):
    w = Windowing(type='hann')
    power_spectrum = PowerSpectrum()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('power_spectrum', power_spectrum(w(frame)))
    return p['power_spectrum']
Beispiel #15
0
def compute_spectral_centroid(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    centroid = Centroid()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('spectral_centroid', centroid(spectrum(w(frame))))
    return p['spectral_centroid']
Beispiel #16
0
def compute_bark(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    bark = BarkBands()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('bark', bark(spectrum(w(frame))))
    return p['bark']
Beispiel #17
0
def compute_mel(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    mel = MelBands(numberBands=96)
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('mel', mel(spectrum(w(frame))))
    return p['mel']
Beispiel #18
0
def compute_spectral_rolloff(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    rolloff = RollOff()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('spectral_rolloff', rolloff(spectrum(w(frame))))
    return p['spectral_rolloff']
Beispiel #19
0
def compute_spectral_flatness(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    flatness = Flatness()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('spectral_flatness', flatness(spectrum(w(frame))))
    return p['spectral_flatness']
Beispiel #20
0
def compute_pitch_yinfft(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    yinfft = PitchYinFFT()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('pitch_yinfft', yinfft(spectrum(w(frame)))[0])
    return p['pitch_yinfft']
Beispiel #21
0
def estimate_danceability(infile):
    loader = streaming.MonoLoader(filename=infile)
    dance = streaming.Danceability()
    pool = Pool()

    loader.audio >> dance.signal
    dance.danceability >> (pool, 'danceability')

    run(loader)

    return pool['danceability']
Beispiel #22
0
 def __init__(self, arch):
     self.architechture = arch
     self.in_layer = None
     self.out_layer = None
     if arch == 'musicnn':
         self.feature_extractor = es.TensorflowInputMusiCNN()
         self.frame_size = 512
         self.hop_size = 256
         self.patch_size = 187
         self.num_bands = 96
     elif arch == 'vggish':
         self.feature_extractor = es.TensorflowInputVGGish()
         self.frame_size = 400
         self.hop_size = 200
         self.patch_size = 96
         self.num_bands = 64
     self.feature_frames = []
     self.in_pool = Pool()
     self.out_pool = Pool()
     # setup model
     self.predict = None
Beispiel #23
0
def compute_mfcc(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    mfcc = MFCC()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        _, coeffs = mfcc(spectrum(w(frame)))
        p.add('mfcc', coeffs)
    return p['mfcc']
Beispiel #24
0
def compute_spectral_shape(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    cm = CentralMoments()
    ds = DistributionShape()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        spread, skewness, kurtosis = ds(cm(spectrum(w(frame))))
        p.add('spectral_spread', spread)
        p.add('spectral_skewness', skewness)
        p.add('spectral_kurtosis', kurtosis)
    return p['spectral_spread'], p['spectral_skewness'], p['spectral_kurtosis']
Beispiel #25
0
    def chromaprint(self, analysisTime=30):
        """
        This algorithm computes the fingerprint of the input signal using Chromaprint algorithm. 
        It is a wrapper of the Chromaprint library

        Returns: The chromaprints are returned as base64-encoded strings.
        """
        vec_input = ess.VectorInput(self.audio_vector)
        chromaprinter = ess.Chromaprinter(analysisTime=analysisTime, sampleRate=self.fs)
        pool = Pool()

        vec_input.data >> chromaprinter.signal
        chromaprinter.fingerprint >> (pool, 'chromaprint')
        run(vec_input)
        return pool['chromaprint']
Beispiel #26
0
def compute_hpcp(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    peaks = SpectralPeaks(orderBy='magnitude',
                          magnitudeThreshold=0.00001,
                          minFrequency=20,
                          maxFrequency=3500,
                          maxPeaks=60)
    hpcp = HPCP()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=tonal_frame_size,
                                hopSize=tonal_hop_size,
                                startFromZero=True):
        p.add('hpcp', hpcp(*peaks(spectrum(w(frame)))))
    return p['hpcp']
Beispiel #27
0
def __mfccs__(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum(
    )  # FFT() would return the complex FFT, here we just want the magnitude spectrum
    mfcc = MFCC()
    mfcc_pool = Pool()
    ## NOTAS -> 1 segundo de un fichero wav son aprox 90 frames y la intensidad esta dada en Hz
    for frame in FrameGenerator(audio,
                                frameSize=1024,
                                hopSize=512,
                                startFromZero=True):
        spec = spectrum(w(frame))
        mfcc_bands, mfcc_coeffs = mfcc(spec)
        mfcc_pool.add('mfcc', mfcc_coeffs[1:])
        mfcc_pool.add('mfcc_bands', mfcc_bands)
    return mfcc_pool
def computeOnsets(inFile, outFile):

    # don't forget, we can actually instantiate and call an algorithm on the same line!
    print 'Loading audio file...'
    audio = MonoLoader(filename=inFile)()

    pool = Pool()

    onsetDetectionGlobal = OnsetDetectionGlobal()
    onsetDetections = onsetDetectionGlobal(audio)

    pool.add('features.onsetDetections', onsetDetections)

    onsets = Onsets()
    onsetTimes = onsets(array([onsetDetections]), [1])

    pool.add('features.onsets', onsetTimes)
    np.savetxt(outFile, pool['features.onsets'][0], fmt='%f')
def extractMFCCs(audio):
    '''
    extract mfccs from spectromra
    '''

    ######## compute MFCCs
    #     maybe set highFrequencyBound=22100
    frameSizeInSamples = int(round(44100 * frameSize_block))
    hopSizeInSamples = int(round(44100 * hopSize_block))
    inputSpectrumSize = frameSizeInSamples / 2 + 1

    #     inputSpectrumSize = 1025
    mfcc = MFCC(numberCoefficients=num_mfccs,
                numberBands=numberBands,
                highFrequencyBound=highFrequencyBound,
                inputSize=inputSpectrumSize)
    w = Windowing(type='hann')
    spectrum = Spectrum()
    mfccs_array = []
    pool = Pool()

    audio = essentia.array(audio)
    for frame in FrameGenerator(audio,
                                frameSize=frameSizeInSamples,
                                hopSize=hopSizeInSamples):
        mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame)))
        pool.add('mfcc', mfcc_coeffs)


#     mfccs_array = np.zeros( (len(spectogram), num_mfccs) )
#     for i,spectrum in enumerate(spectogram):
#
#         mfcc_bands, mfcc_coeffs = mfcc( spectrum )
#         mfccs_array[i] = mfcc_coeffs

# transpose to have it in a better shape
# we need to convert the list to an essentia.array first (== numpy.array of floats)

#     mfccs_T = essentia.array(pool['mfcc']).T
#     # and plot
#     imshow(mfccs_T, aspect = 'auto', interpolation='none')
#     show() # unnecessary if you started "ipython --pylab"

    return pool['mfcc']
Beispiel #30
0
def SliceDrums_BeatDetection(folder, audio_filename, fs):
    od_hfc = OnsetDetection(method='hfc')
    w = Windowing(type='hann')
    fft = FFT()  # this gives us a complex FFT
    c2p = CartesianToPolar(
    )  # and this turns it into a pair (magnitude, phase)
    onsets = Onsets()

    x = MonoLoader(filename=folder + audio_filename, sampleRate=fs)()
    duration = float(len(x)) / fs

    x = x / np.max(np.abs(x))

    t = np.arange(len(x)) / float(fs)

    zero_array = t * 0  #used only for plotting purposes

    #Plotting
    f, axarr = plt.subplots(1, 1, figsize=(80, 20))

    #Essentia beat tracking
    pool = Pool()
    for frame in FrameGenerator(x, frameSize=1024, hopSize=512):
        mag, phase, = c2p(fft(w(frame)))
        pool.add('features.hfc', od_hfc(mag, phase))

    onsets_list = onsets(array([pool['features.hfc']]), [1])
    axarr.vlines(onsets_list, -1, 1, color='k', zorder=2, linewidth=5.0)
    axarr.plot(t, x, zorder=1)
    axarr.axis('off')
    for i, onset in enumerate(onsets_list):
        sample = int(onset * fs) - 1000
        samplename = "{}slices/{}{}__blind.wav".format(folder,
                                                       str(len(str(i))),
                                                       str(i))
        if (i >= len(onsets_list) - 1):
            next_sample = len(x)
        else:
            next_sample = int(onsets_list[i + 1] * fs) - 1000
        x_seg = x[sample:next_sample]
        MonoWriter(filename=samplename)(x_seg)

    return onsets_list, duration