Esempio n. 1
0
def pca(pool, namespace=''):
    llspace = 'lowlevel.'
    if namespace: llspace = namespace + '.lowlevel.'
    sccoeffs = pool[llspace + 'sccoeffs']
    scvalleys = pool[llspace + 'scvalleys']
    numFrames = len(sccoeffs)
    poolSc = Pool()
    merged = essentia.zeros(2*len(sccoeffs[0]))
    for frame in xrange(numFrames):
        j = 0
        for i in xrange(len(sccoeffs[frame])):
            merged[j]=sccoeffs[frame][i]
            merged[j+1]=scvalleys[frame][i]
            j+=2
        poolSc.add('contrast', merged)

    poolTransformed = standard.PCA(namespaceIn='contrast',
                                   namespaceOut='contrast')(poolSc)

    contrast = poolTransformed['contrast']

    pool.set(llspace+'spectral_contrast.mean', mean(contrast, axis=0))
    pool.set(llspace+'spectral_contrast.var', var(contrast, axis=0))

    pool.remove(llspace+'sccoeffs')
    pool.remove(llspace+'scvalleys')
Esempio n. 2
0
    def get_onsets(self, _audio=[]):

        if _audio != []:
            audio = _audio
        else:
            audio = self.audio

        W = es.Windowing(type=self.winType)
        c2p = es.CartesianToPolar()
        fft = es.FFT()
        onsetDetection = es.OnsetDetection(method=self.onsetMethod,
                                           sampleRate=44100)
        onsets = es.Onsets(alpha=.2)
        # onsetIndex = []
        pool = Pool()

        for frame in es.FrameGenerator(audio, frameSize=1024, hopSize=512):
            mag, phase, = c2p(fft(W(frame)))
            onsetDetection.configure(method=self.onsetMethod)
            onsetFunction = onsetDetection(mag, phase)
            pool.add("onsetFunction", onsetFunction)

        DetectedOnsetsArray = onsets([pool["onsetFunction"]], [1])

        return DetectedOnsetsArray
Esempio n. 3
0
def pca(pool, namespace=''):
    llspace = 'lowlevel.'
    if namespace: llspace = namespace + '.lowlevel.'
    sccoeffs = pool[llspace + 'sccoeffs']
    scvalleys = pool[llspace + 'scvalleys']
    numFrames = len(sccoeffs)
    poolSc = Pool()
    merged = essentia.zeros(2 * len(sccoeffs[0]))
    for frame in xrange(numFrames):
        j = 0
        for i in xrange(len(sccoeffs[frame])):
            merged[j] = sccoeffs[frame][i]
            merged[j + 1] = scvalleys[frame][i]
            j += 2
        poolSc.add('contrast', merged)

    poolTransformed = standard.PCA(namespaceIn='contrast',
                                   namespaceOut='contrast')(poolSc)

    contrast = poolTransformed['contrast']

    pool.set(llspace + 'spectral_contrast.mean', mean(contrast, axis=0))
    pool.set(llspace + 'spectral_contrast.var', var(contrast, axis=0))

    pool.remove(llspace + 'sccoeffs')
    pool.remove(llspace + 'scvalleys')
Esempio n. 4
0
def get_embeddings(melspecs: dict[str, np.ndarray], architectures: dict, predictors: dict) -> Optional[dict]:
    data = {}
    for architecture, metadata in architectures.items():
        input_pool = Pool()
        input_pool.set('model/Placeholder', melspecs[metadata['essentia-algorithm']])

        for dataset in metadata['datasets']:
            # TODO: chunk the input melspecs to avoid OOM error
            try:
                output_pool = predictors[f'{dataset}-{architecture}'](input_pool)
            except RuntimeError:
                return None

            for layer, layer_data in metadata['layers'].items():
                embeddings = output_pool[layer_data['name']].squeeze()

                if len(embeddings) == 0:
                    return None

                if len(embeddings.shape) == 1:
                    embeddings = np.expand_dims(embeddings, axis=0)

                data[f'{dataset}-{architecture}-{layer}'] = embeddings

    return data
Esempio n. 5
0
	def get_onsets(self, in_filename):

		# print in_filename
		# Load the audio (in mono)
		audio, sampleRate, numChan = AudioLoader(filename=in_filename)()
		audio = MonoLoader(filename=in_filename)()

		self.sampleRate = sampleRate

		# 1) Compute onset detection functions
		od = OnsetDetection(method='rms')

		w = Windowing(type='hann')
		fft = FFT()
		c2p = CartesianToPolar()

		pool_features = Pool()

		# print 'Computing onset detection functions'
		for frame in FrameGenerator(audio, frameSize=self.frame_size, hopSize=self.hop_size):
			mag, phase = c2p(fft(w(frame)))
			pool_features.add('features.rms', od(mag, phase))

		# 2) Compute the onset locations
		onsets = Onsets(silenceThreshold=0.14, delay=10)

		# print 'Computing onset locations'
		onsets_rms = onsets(
							array([ pool_features['features.rms'] ]),
							[ 1 ])

		print "Num onsets: " + str(len(onsets_rms))

		return onsets_rms
Esempio n. 6
0
def compute_pitch_yin(audio):
    yin = PitchYin()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('pitch_yin', yin(frame)[0])
    return p['pitch_yin']
Esempio n. 7
0
def compute_zcr(audio):
    zcr = ZeroCrossingRate()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('zcr', zcr(frame))
    return p['zcr']
Esempio n. 8
0
def compute_energy(audio):
    energy = Energy()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('energy', energy(frame))
    return 'True'
Esempio n. 9
0
def compute_rms(audio):
    rms = RMS()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('rms', rms(frame))
    return p['rms']
Esempio n. 10
0
def compute_power_spectrum(audio):
    w = Windowing(type='hann')
    power_spectrum = PowerSpectrum()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('power_spectrum', power_spectrum(w(frame)))
    return p['power_spectrum']
Esempio n. 11
0
    def _analyse(self, filepath):
        audio = to_mono(wavread(filepath)[0])
        audio = audio.astype('float32')
        
        w = Windowing(type = 'hann')
        fft = FFT() # this gives us a complex FFT
        c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase)
        hfc_detect = OnsetDetection(method = 'hfc')
        complex_detect = OnsetDetection(method = 'complex')
        rms_detect = RMS()
        spec = Spectrum()
        #pd = PitchDetection()
        flux = Flux()
        pool = Pool()
        #wap = WarpedAutoCorrelation()
        
    
        # let's get down to business
        print 'Computing onset detection functions...'
        for frame in FrameGenerator(audio, frameSize = self.frame_size,\
            hopSize = self.hop_size):
            mag, phase, = c2p(fft(w(frame)))
            spectrum = spec(w(frame))
            f = flux(spectrum)
            #pitch = pd(spectrum)
            pool.add('hfc', hfc_detect(mag, phase))
            pool.add('complex', complex_detect(mag, phase))
            pool.add('rms', rms_detect(frame))
            pool.add('flux', f)
            #pool.add('pitch', pitch[0])
        #print pool['pitch']
        #pool.add('autoc', wap(pool['pitch']))
     

        return pool, audio
Esempio n. 12
0
def compute_spectral_flatness(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    flatness = Flatness()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('spectral_flatness', flatness(spectrum(w(frame))))
    return p['spectral_flatness']
Esempio n. 13
0
def compute_bark(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    bark = BarkBands()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('bark', bark(spectrum(w(frame))))
    return p['bark']
Esempio n. 14
0
def compute_mel(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    mel = MelBands(numberBands=96)
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('mel', mel(spectrum(w(frame))))
    return p['mel']
Esempio n. 15
0
def compute_pitch_yinfft(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    yinfft = PitchYinFFT()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('pitch_yinfft', yinfft(spectrum(w(frame)))[0])
    return p['pitch_yinfft']
Esempio n. 16
0
def compute_spectral_rolloff(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    rolloff = RollOff()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('spectral_rolloff', rolloff(spectrum(w(frame))))
    return p['spectral_rolloff']
Esempio n. 17
0
	def detect_essentia(arquivo_audio,selected): #ODF using essentia library
		# 
		try:
		    filename = arquivo_audio
		except:
		    print "usage:", sys.argv[0], "<audiofile>"
		    sys.exit()

		# don't forget, we can actually instantiate and call an algorithm on the same line!
		global audio

		# Phase 1: compute the onset detection function
		# The OnsetDetection algorithm tells us that there are several methods available in Essentia,
		# let's do two of them
		if selected==3:
			od = OnsetDetection(method = 'hfc')
		elif selected==4:
			od = OnsetDetection(method = 'complex')
		elif selected==5:
			od = OnsetDetection(method = 'melflux')
		elif selected==6:
			od = OnsetDetection(method = 'complex_phase')
		elif selected==7:
			od = OnsetDetection(method = 'rms')


		# let's also get the other algorithms we will need, and a pool to store the results
		w = Windowing(type = 'hann')
		fft = FFT() # this gives us a complex FFT
		c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase)

		pool = Pool()

		# let's get down to business
		print 'Computing onset detection functions...'
		for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512):
		    mag, phase, = c2p(fft(w(frame)))
		    pool.add('features.method', od(mag, phase))

		# Phase 2: compute the actual onsets locations
		onsets = Onsets()
		print 'Computing onset times...'
		onsets_method = onsets(array([ pool['features.method'] ]), [ 1 ])

		# and mark them on the audio, which we'll write back to disk
		# we use beeps instead of white noise to mark them, as it's more distinctive

		#convertendo para o tipo list
		listadet = onsets_method.tolist()

		#convertendo os segundos para frames
		listadet = [int(SecToFrames(x)) for x in listadet if x >= 0]
		 
		return listadet
Esempio n. 18
0
def compute_spectral_centroid(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    centroid = Centroid()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('spectral_centroid', centroid(spectrum(w(frame))))
    return p['spectral_centroid']
Esempio n. 19
0
def compute_mfcc(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    mfcc = MFCC()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        _, coeffs = mfcc(spectrum(w(frame)))
        p.add('mfcc', coeffs)
    return p['mfcc']
Esempio n. 20
0
def multipool():
	from multiprocessing import Pool
	print 'Loading audio file...'
	audio = MonoLoader(filename = sys.argv[1])()
	a = AudioInfo(sys.argv[1],3,10)
	b = AudioInfo(sys.argv[1],4,10)
	c = AudioInfo(sys.argv[1],6,10)

	todo = []
	todo.append(a)
	todo.append(b)
	todo.append(c)

	pool = Pool(3)
	pool.map(detecta,todo)
Esempio n. 21
0
def compute_hpcp(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    peaks = SpectralPeaks(orderBy='magnitude',
                          magnitudeThreshold=0.00001,
                          minFrequency=20,
                          maxFrequency=3500,
                          maxPeaks=60)
    hpcp = HPCP()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=tonal_frame_size,
                                hopSize=tonal_hop_size,
                                startFromZero=True):
        p.add('hpcp', hpcp(*peaks(spectrum(w(frame)))))
    return p['hpcp']
Esempio n. 22
0
def get_key(file_in):
    """
    Estimates the key and scale for an audio file.
    """
    loader = streaming.MonoLoader(filename=file_in)
    framecutter = streaming.FrameCutter()
    windowing = streaming.Windowing(type="blackmanharris62")
    spectrum = streaming.Spectrum()
    spectralpeaks = streaming.SpectralPeaks(orderBy="magnitude",
                                            magnitudeThreshold=1e-05,
                                            minFrequency=40,
                                            maxFrequency=5000,
                                            maxPeaks=10000)
    pool = Pool()
    hpcp = streaming.HPCP()
    key = streaming.Key()

    loader.audio >> framecutter.signal
    framecutter.frame >> windowing.frame >> spectrum.frame
    spectrum.spectrum >> spectralpeaks.spectrum
    spectralpeaks.magnitudes >> hpcp.magnitudes
    spectralpeaks.frequencies >> hpcp.frequencies
    hpcp.hpcp >> key.pcp
    key.key >> (pool, 'tonal.key_key')
    key.scale >> (pool, 'tonal.key_scale')
    key.strength >> (pool, 'tonal.key_strength')

    run(loader)

    return Key(pool['tonal.key_key'], pool['tonal.key_scale'])
Esempio n. 23
0
    def computeBpmHistogram(self,
                            noveltyCurve,
                            frameSize=4,
                            overlap=2,
                            frameRate=44100. / 128.,
                            window='hann',
                            zeroPadding=0,
                            constantTempo=False,
                            minBpm=30):

        pool = Pool()
        bpmHist = ess.BpmHistogram(frameRate=frameRate,
                                   frameSize=frameSize,
                                   overlap=overlap,
                                   zeroPadding=zeroPadding,
                                   constantTempo=constantTempo,
                                   windowType='hann',
                                   minBpm=minBpm)

        gen = ess.VectorInput(noveltyCurve)
        gen.data >> bpmHist.novelty
        bpmHist.bpm >> (pool, 'bpm')
        bpmHist.bpmCandidates >> (pool, 'bpmCandidates')
        bpmHist.bpmMagnitudes >> (pool, 'bpmMagnitudes')
        bpmHist.frameBpms >> None  #(pool, 'frameBpms')
        bpmHist.tempogram >> (pool, 'tempogram')
        bpmHist.ticks >> (pool, 'ticks')
        bpmHist.ticksMagnitude >> (pool, 'ticksMagnitude')
        bpmHist.sinusoid >> (pool, 'sinusoid')
        essentia.run(gen)

        return pool
Esempio n. 24
0
def get_bpm(file_in):
    pool = Pool()

    loader = streaming.MonoLoader(filename=file_in)
    bt = streaming.RhythmExtractor2013()
    bpm_histogram = streaming.BpmHistogramDescriptors()
    # BPM histogram output size is 250
    centroid = streaming.Centroid(range=250)

    loader.audio >> bt.signal
    bt.bpm >> (pool, 'bpm')
    bt.ticks >> None
    bt.confidence >> (pool, 'confidence')
    bt.estimates >> None
    bt.bpmIntervals >> bpm_histogram.bpmIntervals
    bpm_histogram.firstPeakBPM >> (pool, 'bpm_first_peak')
    bpm_histogram.firstPeakWeight >> None
    bpm_histogram.firstPeakSpread >> None
    bpm_histogram.secondPeakBPM >> (pool, 'bpm_second_peak')
    bpm_histogram.secondPeakWeight >> None
    bpm_histogram.secondPeakSpread >> None
    bpm_histogram.histogram >> (pool, 'bpm_histogram')
    bpm_histogram.histogram >> centroid.array
    centroid.centroid >> (pool, 'bpm_centroid')

    run(loader)
    return pool['bpm']
Esempio n. 25
0
def estimate_main_band(infile):
    """
    Estimate if this is a low, mid, or high track.

    Not _really_ sure if this does what I need it to,
    but some quick tests looked right.
    """
    loader = streaming.MonoLoader(filename=infile)
    framecutter = streaming.FrameCutter()
    windowing = streaming.Windowing(type="blackmanharris62")
    spectrum = streaming.Spectrum()
    freqbands = streaming.FrequencyBands(frequencyBands=[0, 250, 750, 4000])
    pool = Pool()

    loader.audio >> framecutter.signal
    framecutter.frame >> windowing.frame >> spectrum.frame
    spectrum.spectrum >> freqbands.spectrum
    freqbands.bands >> (pool, 'bands')

    run(loader)

    sums = np.sum(pool['bands'], axis=0)
    band = np.argmax(sums)
    if band == 0:
        return 'low'
    elif band == 1:
        return 'mid'
    elif band == 2:
        return 'high'
Esempio n. 26
0
    def _extract_pitch_contours(self, audio):
        # Hann window with x4 zero padding
        run_windowing = estd.Windowing(zeroPadding=3 * self.frame_size)
        run_spectrum = estd.Spectrum(size=self.frame_size * 4)
        run_spectral_peaks = estd.SpectralPeaks(
            minFrequency=self.min_frequency, maxFrequency=self.max_frequency,
            magnitudeThreshold=self.magnitude_threshold,
            sampleRate=self.sample_rate, orderBy='magnitude')

        # convert unit to cents, PitchSalienceFunction takes 55 Hz as the
        # default reference
        run_pitch_salience_function = estd.PitchSalienceFunction(
            binResolution=self.bin_resolution)
        run_pitch_salience_function_peaks = estd.PitchSalienceFunctionPeaks(
            binResolution=self.bin_resolution, minFrequency=self.min_frequency,
            maxFrequency=self.max_frequency)
        run_pitch_contours = estd.PitchContours(
            hopSize=self.hop_size, binResolution=self.bin_resolution,
            peakDistributionThreshold=self.peak_distribution_threshold)

        # compute frame by frame
        pool = Pool()
        for frame in estd.FrameGenerator(audio, frameSize=self.frame_size,
                                         hopSize=self.hop_size):
            frame = run_windowing(frame)
            spectrum = run_spectrum(frame)
            peak_frequencies, peak_magnitudes = run_spectral_peaks(spectrum)
            salience = run_pitch_salience_function(peak_frequencies,
                                                   peak_magnitudes)
            salience_peaks_bins, salience_peaks_contour_saliences = \
                run_pitch_salience_function_peaks(salience)
            if not np.size(salience_peaks_bins):
                salience_peaks_bins = np.array([0])
            if not np.size(salience_peaks_contour_saliences):
                salience_peaks_contour_saliences = np.array([0])

            pool.add('allframes_salience_peaks_bins', salience_peaks_bins)
            pool.add('allframes_salience_peaks_contourSaliences',
                     salience_peaks_contour_saliences)

        # post-processing: contour tracking
        contours_bins, contour_saliences, contours_start_times, duration = \
            run_pitch_contours(
                pool['allframes_salience_peaks_bins'],
                pool['allframes_salience_peaks_contourSaliences'])
        return contours_bins, contours_start_times, contour_saliences, duration
def extractMFCCs(audio):
    '''
    extract mfccs from spectromra
    '''

    ######## compute MFCCs
    #     maybe set highFrequencyBound=22100
    frameSizeInSamples = int(round(44100 * frameSize_block))
    hopSizeInSamples = int(round(44100 * hopSize_block))
    inputSpectrumSize = frameSizeInSamples / 2 + 1

    #     inputSpectrumSize = 1025
    mfcc = MFCC(numberCoefficients=num_mfccs,
                numberBands=numberBands,
                highFrequencyBound=highFrequencyBound,
                inputSize=inputSpectrumSize)
    w = Windowing(type='hann')
    spectrum = Spectrum()
    mfccs_array = []
    pool = Pool()

    audio = essentia.array(audio)
    for frame in FrameGenerator(audio,
                                frameSize=frameSizeInSamples,
                                hopSize=hopSizeInSamples):
        mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame)))
        pool.add('mfcc', mfcc_coeffs)


#     mfccs_array = np.zeros( (len(spectogram), num_mfccs) )
#     for i,spectrum in enumerate(spectogram):
#
#         mfcc_bands, mfcc_coeffs = mfcc( spectrum )
#         mfccs_array[i] = mfcc_coeffs

# transpose to have it in a better shape
# we need to convert the list to an essentia.array first (== numpy.array of floats)

#     mfccs_T = essentia.array(pool['mfcc']).T
#     # and plot
#     imshow(mfccs_T, aspect = 'auto', interpolation='none')
#     show() # unnecessary if you started "ipython --pylab"

    return pool['mfcc']
Esempio n. 28
0
def SliceDrums_BeatDetection(folder, audio_filename, fs):
    od_hfc = OnsetDetection(method='hfc')
    w = Windowing(type='hann')
    fft = FFT()  # this gives us a complex FFT
    c2p = CartesianToPolar(
    )  # and this turns it into a pair (magnitude, phase)
    onsets = Onsets()

    x = MonoLoader(filename=folder + audio_filename, sampleRate=fs)()
    duration = float(len(x)) / fs

    x = x / np.max(np.abs(x))

    t = np.arange(len(x)) / float(fs)

    zero_array = t * 0  #used only for plotting purposes

    #Plotting
    f, axarr = plt.subplots(1, 1, figsize=(80, 20))

    #Essentia beat tracking
    pool = Pool()
    for frame in FrameGenerator(x, frameSize=1024, hopSize=512):
        mag, phase, = c2p(fft(w(frame)))
        pool.add('features.hfc', od_hfc(mag, phase))

    onsets_list = onsets(array([pool['features.hfc']]), [1])
    axarr.vlines(onsets_list, -1, 1, color='k', zorder=2, linewidth=5.0)
    axarr.plot(t, x, zorder=1)
    axarr.axis('off')
    for i, onset in enumerate(onsets_list):
        sample = int(onset * fs) - 1000
        samplename = "{}slices/{}{}__blind.wav".format(folder,
                                                       str(len(str(i))),
                                                       str(i))
        if (i >= len(onsets_list) - 1):
            next_sample = len(x)
        else:
            next_sample = int(onsets_list[i + 1] * fs) - 1000
        x_seg = x[sample:next_sample]
        MonoWriter(filename=samplename)(x_seg)

    return onsets_list, duration
Esempio n. 29
0
def estimate_danceability(infile):
    loader = streaming.MonoLoader(filename=infile)
    dance = streaming.Danceability()
    pool = Pool()

    loader.audio >> dance.signal
    dance.danceability >> (pool, 'danceability')

    run(loader)

    return pool['danceability']
Esempio n. 30
0
def get_cat_audio_pitch():
    
    spectrum = Spectrum()
    pitch = PitchYinFFT(frameSize=1024)
 
    pool = Pool()
    windowing = Windowing(type = 'hann')


    cat_audio = MonoLoader(filename='cat-01.wav', sampleRate=44100)()
    cat_audio_loudness = Loudness()(cat_audio)
 
    for frame in FrameGenerator(cat_audio, frameSize=1024, hopSize=512):
        spec = spectrum(windowing(frame))
        p, conf = pitch(spec)
        pool.add('cat_pitch', p)
 
 
    cat_pitch = numpy.mean(pool['cat_pitch'])
    cat_MIDI = mir_eval.multipitch.frequencies_to_midi([cat_pitch]) 
    return cat_audio, cat_MIDI[0]
Esempio n. 31
0
 def __init__(self, arch):
     self.architechture = arch
     self.in_layer = None
     self.out_layer = None
     if arch == 'musicnn':
         self.feature_extractor = es.TensorflowInputMusiCNN()
         self.frame_size = 512
         self.hop_size = 256
         self.patch_size = 187
         self.num_bands = 96
     elif arch == 'vggish':
         self.feature_extractor = es.TensorflowInputVGGish()
         self.frame_size = 400
         self.hop_size = 200
         self.patch_size = 96
         self.num_bands = 64
     self.feature_frames = []
     self.in_pool = Pool()
     self.out_pool = Pool()
     # setup model
     self.predict = None
def compute_harmonic_magnitudes(contour_f0s, fftgram, idx_start, options):
    '''
    Compute for each frame harm amplitude
    get harmonic partials form original spectrum
    
    Params:
    --------------------
    fftgram - fftgram of whole audio file
    times - ts of whole audio
    
 
    hfreq - harmonics  of contour
    magns -  magns of contour
    '''

    run_harm_model_anal = HarmonicModelAnal(nHarmonics=30)

    # TODO: sanity check: times == len(fftgram) and contour_start_time_SAL in times

    pool = Pool()

    for i, contour_f0 in enumerate(contour_f0s):

        if idx_start + i > len(fftgram) - 1:
            sys.exit('idx start is {} while len ffmtgram is {}'.format(
                idx_start, len(fftgram)))
        fft = fftgram[idx_start + i]
        # convert to freq :
        hfreq, magn, phase = run_harm_model_anal(fft, contour_f0)

        pool.add('phases', phase)
        pool.add('hfreqs', hfreq)
        pool.add('magns', magn)

    return pool['hfreqs'], pool['magns'], pool['phases']
def harmonic_magnitudes_to_audio(hfreqs, magns, phases, options):
    '''
    Compute for each frame harm amplitude
    convert cent bins to herz
    get harmonic partials form original spectrum
    
    Params:
    
    hfreq - harmonics  of contour
    magns -  magns of contour
    
    return:
    spectogram contour

    out_audio_contour - audio of harmonics for a contour
    '''

    pool = Pool()

    run_sine_model_synth = SineModelSynth(hopSize=512, sampleRate=options.Fs)
    run_ifft = IFFT(size=options.windowsizeInSamples)
    run_overl = OverlapAdd(frameSize=options.windowsizeInSamples,
                           hopSize=512,
                           gain=1. / options.windowsizeInSamples)
    out_audio_contour = np.array(0)

    for hfreq, hmag, hphase in zip(hfreqs, magns, phases):

        spectrum, audio_frame = harmonics_to_audio(hfreq, hmag, hphase,
                                                   run_sine_model_synth,
                                                   run_ifft, run_overl)
        out_audio_contour = np.append(out_audio_contour, audio_frame)

        pool.add('spectrum', spectrum)

    out_audio_contour = SM.sineModelSynth(hfreqs, magns, phases, 512, 128,
                                          44100)

    return out_audio_contour, pool['spectrum']
Esempio n. 34
0
    def _extract_pitch_contours(self, audio):
        # Hann window with x4 zero padding
        run_windowing = estd.Windowing(  # pylint: disable-msg=E1101
            zeroPadding=3 * self.frame_size)
        run_spectrum = estd.Spectrum(  # pylint: disable-msg=E1101
            size=self.frame_size * 4)
        run_spectral_peaks = estd.SpectralPeaks(  # pylint: disable-msg=E1101
            minFrequency=self.min_frequency,
            maxFrequency=self.max_frequency,
            magnitudeThreshold=self.magnitude_threshold,
            sampleRate=self.sample_rate,
            orderBy='magnitude')

        # convert unit to cents, PitchSalienceFunction takes 55 Hz as the
        # default reference
        run_pitch_salience_function = \
            estd.PitchSalienceFunction(  # pylint: disable-msg=E1101
                binResolution=self.bin_resolution)
        run_pitch_salience_function_peaks = \
            estd.PitchSalienceFunctionPeaks(  # pylint: disable-msg=E1101
                binResolution=self.bin_resolution,
                minFrequency=self.min_frequency,
                maxFrequency=self.max_frequency)
        run_pitch_contours = estd.PitchContours(  # pylint: disable-msg=E1101
            hopSize=self.hop_size,
            binResolution=self.bin_resolution,
            peakDistributionThreshold=self.peak_distribution_threshold)

        # compute frame by frame
        pool = Pool()
        for frame in estd.FrameGenerator(
                audio,  # pylint: disable-msg=E1101
                frameSize=self.frame_size,
                hopSize=self.hop_size):
            frame = run_windowing(frame)
            spectrum = run_spectrum(frame)
            peak_frequencies, peak_magnitudes = run_spectral_peaks(spectrum)
            salience = run_pitch_salience_function(peak_frequencies,
                                                   peak_magnitudes)
            salience_peaks_bins, salience_peaks_contour_saliences = \
                run_pitch_salience_function_peaks(salience)
            if not np.size(salience_peaks_bins):
                salience_peaks_bins = np.array([0])
            if not np.size(salience_peaks_contour_saliences):
                salience_peaks_contour_saliences = np.array([0])

            pool.add('allframes_salience_peaks_bins', salience_peaks_bins)
            pool.add('allframes_salience_peaks_contourSaliences',
                     salience_peaks_contour_saliences)

        # post-processing: contour tracking
        contours_bins, contour_saliences, contours_start_times, duration = \
            run_pitch_contours(
                [f.tolist()
                 for f in pool['allframes_salience_peaks_bins']],
                [f.tolist()
                 for f in pool['allframes_salience_peaks_contourSaliences']])
        return contours_bins, contours_start_times, contour_saliences, duration
Esempio n. 35
0
def spectrogram(audio, audio_file, save_fig=True, save_fig_path=None):
    if audio_file.endswith('.wav'):
        w = Windowing(type='hann')
        spectrum = Spectrum(
        )  # FFT() would return the complex FFT, here we just want the magnitude spectrum
        pool = Pool()
        ## NOTAS -> 1 segundo de un fichero wav son aprox 90 frames y la intensidad esta dada en Hz
        for frame in FrameGenerator(audio,
                                    frameSize=1024,
                                    hopSize=512,
                                    startFromZero=True):
            win = w(frame)
            spec = spectrum(win)
            pool.add('spec', spec)
        aggrPool = PoolAggregator(defaultStats=['mean'])(pool)
        a = sum(aggrPool['spec.mean'].T) / aggrPool['spec.mean'].T.shape[0]
        # a = aggrPool['spec.mean'].T
        # b = np.zeros(pool['spec'].T.shape)
        b = np.array(pool['spec'].T)
        # for iterator1, i in enumerate(pool['spec'].T):
        #     for iterator2, j in enumerate(i):
        #         # if j > a[iterator1]/2:
        #         if j > a/2 and j > 0.015:
        #             b[iterator1][iterator2] = j
        # b = np.array([i for i in b if i.max() > 0.01])
        # no para el nuevo dataset de bats
        b = remove_initial_zeros(b)
        b = b.tolist()
        b.reverse()
        b = remove_initial_zeros(b)
        b.reverse()
        b = np.array(b)
        if save_fig:
            if not save_fig_path:
                save_fig_path = audio_file.replace('.wav', '_spec.jpg')
            save_plots(b, save_fig_path)
        return b[:200, :200].tolist()
Esempio n. 36
0
    def chromaprint(self, analysisTime=30):
        """
        This algorithm computes the fingerprint of the input signal using Chromaprint algorithm. 
        It is a wrapper of the Chromaprint library

        Returns: The chromaprints are returned as base64-encoded strings.
        """
        vec_input = ess.VectorInput(self.audio_vector)
        chromaprinter = ess.Chromaprinter(analysisTime=analysisTime, sampleRate=self.fs)
        pool = Pool()

        vec_input.data >> chromaprinter.signal
        chromaprinter.fingerprint >> (pool, 'chromaprint')
        run(vec_input)
        return pool['chromaprint']
Esempio n. 37
0
def compute_spectral_shape(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    cm = CentralMoments()
    ds = DistributionShape()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        spread, skewness, kurtosis = ds(cm(spectrum(w(frame))))
        p.add('spectral_spread', spread)
        p.add('spectral_skewness', skewness)
        p.add('spectral_kurtosis', kurtosis)
    return p['spectral_spread'], p['spectral_skewness'], p['spectral_kurtosis']
- bin 0 = 0 BPM
- bin 128 = 645.99609375 BPM
"""

sampleRate   = 22050
frameSize    = 8192
hopSize      = 1024
rmsFrameSize = 256
rmsHopSize   = 32

loader = MonoLoader(filename=input_file, sampleRate=sampleRate)
w = Windowing(type='blackmanharris62')
spectrum = Spectrum()
melbands = MelBands(sampleRate=sampleRate, numberBands=40, lowFrequencyBound=0, highFrequencyBound=sampleRate/2)

pool = Pool()

for frame in FrameGenerator(audio=loader(), frameSize=frameSize, hopSize=hopSize, startFromZero=True):
    bands = melbands(spectrum(w(frame)))
    pool.add('melbands', bands)


rhythmtransform = RhythmTransform(frameSize=rmsFrameSize, hopSize=rmsHopSize)
rt = rhythmtransform(pool['melbands'])
rt_mean = numpy.mean(rt, axis=0)
bin_resoluion = 5.007721656976744


print numpy.argmax(rt_mean) * bin_resoluion

Esempio n. 39
0

if __name__ == '__main__':

    opt, args = parse_args()

    if len(args) != 2: #3:
        print "Incorrect number of arguments\n", essentia_usage
        sys.exit(1)


    #profile = args[0]
    input_file = args[0]
    output_file = args[1]

    pool = Pool()
    startTime = float(opt.startTime)
    endTime = float(opt.endTime)

    # compute descriptors

    readMetadata(input_file, pool)
    INFO('Process step 1: Replay Gain')
    replaygain.compute(input_file, pool, startTime, endTime)

    segments_namespace=[]
    if opt.segmentation:
        INFO('Process step 2: Low Level')
        computeLowLevel(input_file, pool, startTime, endTime)
        segmentation.compute(input_file, pool, startTime, endTime)
        segments = pool['segmentation.timestamps']

if __name__ == '__main__':

    opt, args = parse_args()

    if len(args) != 2: #3:
        print "Incorrect number of arguments\n", essentia_usage
        sys.exit(1)


    #profile = args[0]
    input_file = args[0]
    output_file = args[1]

    neqPool = Pool()
    eqPool = Pool()
    startTime = float(opt.startTime)
    endTime = float(opt.endTime)

    # compute descriptors

    readMetadata(input_file, eqPool)
    INFO('Process step 1: Replay Gain')
    replaygain.compute(input_file, eqPool, startTime, endTime)

    segments_namespace=[]
    neqPool.merge(eqPool, 'replace')
    if opt.segmentation:
        INFO('Process step 2: Low Level')
        computeLowLevel(input_file, neqPool, eqPool, startTime, endTime)
  def run(self, fname):
    citation = u"""
            Atlı, H. S., Uyar, B., Şentürk, S., Bozkurt, B., and Serra, X.
            (2014). Audio feature extraction for exploring Turkish makam music.
            In Proceedings of 3rd International Conference on Audio Technologies
            for Music and Media, Ankara, Turkey.
            """

    run_windowing = Windowing(zeroPadding = 3 * self.settings.frameSize) # Hann window with x4 zero padding
    run_spectrum = Spectrum(size=self.settings.frameSize * 4)

    run_spectral_peaks = SpectralPeaks(minFrequency=self.settings.minFrequency,
            maxFrequency = self.settings.maxFrequency,
            maxPeaks = self.settings.maxPeaks,
            sampleRate = self.settings.sampleRate,
            magnitudeThreshold = self.settings.magnitudeThreshold,
            orderBy = 'magnitude')

    run_pitch_salience_function = PitchSalienceFunction(binResolution=self.settings.binResolution) # converts unit to cents, 55 Hz is taken as the default reference
    run_pitch_salience_function_peaks = PitchSalienceFunctionPeaks(binResolution=self.settings.binResolution)
    run_pitch_contours = PitchContours(hopSize=self.settings.hopSize,
            binResolution=self.settings.binResolution,
            peakDistributionThreshold = self.settings.peakDistributionThreshold)
    pool = Pool();

    # load audio and eqLoudness
    audio = MonoLoader(filename = fname)() # MonoLoader resamples the audio signal to 44100 Hz by default
    audio = EqualLoudness()(audio)

    for frame in FrameGenerator(audio,frameSize=self.settings.frameSize, hopSize=self.settings.hopSize):
      frame = run_windowing(frame)
      spectrum = run_spectrum(frame)
      peak_frequencies, peak_magnitudes = run_spectral_peaks(spectrum)
      salience = run_pitch_salience_function(peak_frequencies, peak_magnitudes)
      salience_peaks_bins, salience_peaks_contourSaliences = run_pitch_salience_function_peaks(salience)
      if not size(salience_peaks_bins):
          salience_peaks_bins = array([0])
      if not size(salience_peaks_contourSaliences):
          salience_peaks_contourSaliences = array([0])

      pool.add('allframes_salience_peaks_bins', salience_peaks_bins)
      pool.add('allframes_salience_peaks_contourSaliences', salience_peaks_contourSaliences)

    # post-processing: contour tracking
    contours_bins, contours_contourSaliences, contours_start_times, duration = run_pitch_contours(
            pool['allframes_salience_peaks_bins'],
            pool['allframes_salience_peaks_contourSaliences'])

    # run the simplified contour selection
    #[pitch, pitch_salience] = self.ContourSelection(contours_bins,contours_contourSaliences,contours_start_times,duration)

    # cent to Hz conversion
    #pitch = [0. if p == 0 else 55.*(2.**(((self.settings.binResolution*(p)))/1200)) for p in pitch]

    # generate time stamps
    #time_stamps = [s*self.settings.hopSize/float(self.settings.sampleRate) for s in xrange(0,len(pitch))]

    # [time pitch salience] matrix
    #out = transpose(vstack((time_stamps, pitch, pitch_salience)))
    #out = out.tolist()
    
    # settings
    settings = self.settings
    settings.update({'version':self.__version__, 
            'slug':self.__slug__, 
            'source': fname,
            'essentiaVersion': essentia.__version__,
            'pitchUnit': 'Hz',
            'citation': citation})

    # matlab 
    #matout = cStringIO.StringIO()
    #matob = {'pitch': out}
    #matob.update(settings)

    #scipy.io.savemat(matout, matob)
    
    #return out
    
    # unused
    #return {'pitch': json.dumps(out),
    #        'matlab': matout.getvalue(),
    #        'settings': json.dumps(settings)}

    return contours_bins, contours_contourSaliences, contours_start_times, duration
    essentia.run(loader)


if __name__ == "__main__":

    opt, args = parse_args()

    if len(args) != 2:  # 3:
        print "Incorrect number of arguments\n", essentia_usage
        sys.exit(1)

    # profile = args[0]
    input_file = args[0]
    output_file = args[1]

    neqPool = Pool()
    eqPool = Pool()
    startTime = float(opt.startTime)
    endTime = float(opt.endTime)

    # compute descriptors

    readMetadata(input_file, eqPool)
    INFO("Process step 1: Replay Gain")
    replaygain.compute(input_file, eqPool, startTime, endTime)

    segments_namespace = []
    neqPool.merge(eqPool, "replace")
    if opt.segmentation:
        INFO("Process step 2: Low Level")
        computeLowLevel(input_file, neqPool, eqPool, startTime, endTime)
def computeOnsets(inFile, outFile):
    print outFile
    # In this example we are going to look at how to perform some onset detection
    # and mark them on the audio using the AudioOnsetsMarker algorithm.
    #
    # Onset detection consists of two main phases:
    #  1- we need to compute an onset detection function, which is a function
    #     describing the evolution of some parameters, which might be representative
    #     of whether we might find an onset or not
    #  2- performing the actual onset detection, that is given a number of these
    #     detection functions, decide where in the sound there actually are onsets

    # we're going to work with a file specified as an argument in the command line
    # try:
    #     filename = sys.argv[1]
    # except:
    #     print "usage:", sys.argv[0], "<audiofile>"
    #     sys.exit()

    # don't forget, we can actually instantiate and call an algorithm on the same line!
    print "Loading audio file..."
    audio = MonoLoader(filename=inFile)()

    # Phase 1: compute the onset detection function
    # The OnsetDetection algorithm tells us that there are several methods available in Essentia,
    # let's do two of them

    od1 = OnsetDetection(method="hfc")
    od2 = OnsetDetection(method="complex")

    # let's also get the other algorithms we will need, and a pool to store the results

    w = Windowing(type="hann")
    fft = FFT()  # this gives us a complex FFT
    c2p = CartesianToPolar()  # and this turns it into a pair (magnitude, phase)

    pool = Pool()

    # let's get down to business
    print "Computing onset detection functions..."
    for frame in FrameGenerator(audio, frameSize=1024, hopSize=512):
        mag, phase, = c2p(fft(w(frame)))
        pool.add("features.hfc", od1(mag, phase))
        pool.add("features.complex", od2(mag, phase))

    # Phase 2: compute the actual onsets locations
    onsets = Onsets()

    print "Computing onset times..."
    onsets_hfc = onsets(  # this algo expects a matrix, not a vector
        array([pool["features.hfc"]]),
        # you need to specify weights, but as there is only a single
        # function, it doesn't actually matter which weight you give it
        [1],
    )
    np.savetxt(outFile, onsets_hfc, fmt="%f")

    # Let's just take the complex as an example
    onsets_complex = onsets(array([pool["features.complex"]]), [1])

    np.savetxt(outFile, onsets_complex, fmt="%f")
Esempio n. 44
0
audio = MonoLoader(filename=filename)()

# Phase 1: compute the onset detection function
# The OnsetDetection algorithm tells us that there are several methods available in Essentia,
# let's do two of them

od1 = OnsetDetection(method="hfc")
od2 = OnsetDetection(method="complex")

# let's also get the other algorithms we will need, and a pool to store the results

w = Windowing(type="hann")
fft = FFT()  # this gives us a complex FFT
c2p = CartesianToPolar()  # and this turns it into a pair (magnitude, phase)

pool = Pool()

# let's get down to business
print "Computing onset detection functions..."
for frame in FrameGenerator(audio, frameSize=1024, hopSize=512):
    mag, phase, = c2p(fft(w(frame)))
    pool.add("features.hfc", od1(mag, phase))
    pool.add("features.complex", od2(mag, phase))


# Phase 2: compute the actual onsets locations
onsets = Onsets()

print "Computing onset times..."
onsets_hfc = onsets(  # this algo expects a matrix, not a vector
    array([pool["features.hfc"]]),
Esempio n. 45
0
def find_files(directory, pattern):
    for root, dirs, files in os.walk(directory):
        for basename in files:
            if basename.lower().endswith(pattern):
                filename = os.path.join(root, basename)
                yield filename

try:
    indir = sys.argv[1]
    result_file = sys.argv[2]
except:
    print "usage:", sys.argv[0], "<input-directory> <result.json>"
    sys.exit()


result = Pool()
files = [f for f in find_files(indir, FILE_EXT)]

print 'Found', len(files), 'audio files (' + '/'.join(FILE_EXT) + ')'

i = 0
for filename in files:
    i += 1
    print 'Extracting metadata:', filename
    namespace = 'track_' + str(i)
    try:
        meta = MetadataReader(filename=filename, failOnError=True, tagPoolName=namespace + '.metadata')()
        pool_meta, duration, bitrate, samplerate, channels = meta[7:]
        pool_meta.set(namespace + ".file_path", os.path.relpath(filename))
        pool_meta.set(namespace + ".duration", duration)
        pool_meta.set(namespace + ".bit_rate", bitrate)
Esempio n. 46
0
audio = MonoLoader(filename = filename)()

# Phase 1: compute the onset detection function
# The OnsetDetection algorithm tells us that there are several methods available in Essentia,
# let's do two of them

od1 = OnsetDetection(method = 'hfc')
od2 = OnsetDetection(method = 'complex')

# let's also get the other algorithms we will need, and a pool to store the results

w = Windowing(type = 'hann')
fft = FFT() # this gives us a complex FFT
c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase)

pool = Pool()

# let's get down to business
print 'Computing onset detection functions...'
for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512):
    mag, phase, = c2p(fft(w(frame)))
    pool.add('features.hfc', od1(mag, phase))
    pool.add('features.complex', od2(mag, phase))


# Phase 2: compute the actual onsets locations
onsets = Onsets()

print 'Computing onset times...'
onsets_hfc = onsets(# this algo expects a matrix, not a vector
                    array([ pool['features.hfc'] ]),
def extractFeatures(arffDir = '.', dirname = '.', fnames = '', segment_length = 'WHOLE', hopsize = 0):
	# Start to process file by file from input
	for fname in fnames:
		# It only process wav or mp3 file
		if ".wav" not in fname.lower() and ".mp3" not in fname.lower(): continue

		# Generate output dir
		trackName = fname.split('/')[-1]
		segmentArffDir = arffDir+"/"+trackName[:-4]+"/"
		if not exists(segmentArffDir):
			mkdir(segmentArffDir)
		else:
			print fname + ' exsits, pass...'
			continue
		
		# Read audio and some more info
		loader = es.EasyLoader(filename = dirname+"/"+fname)
		audio = loader.compute()
		sampleRate = loader.paramValue('sampleRate')
		length = int(len(audio)/sampleRate)
		if length == 0: length = 1
		print fname + ' length: ' + str(length) 

		if hopsize == 0:
			hopsize = segment_length
			
		# Specify the length of the segment
		if segment_length == 'WHOLE':
			step = length
			end_time = length
			segment_length = length
			print 'The whole audio is being processed...'
		else:
			step = hopsize
			segment_length = float(segment_length)
			if step>length: continue

		# Start computing segment by segment
		for start_time in arange(0, length, step):
			end_time = start_time + segment_length
			if step != length:
				print 'the time from second ' + str(start_time) + ' is being processed...'
			if end_time > length:
				break;
			segAudio = audio[start_time*sampleRate:end_time*sampleRate]
			pool = Pool()

			# Setup parameters 
			specContrast = es.SpectralContrast(frameSize=2048, lowFrequencyBound=40, sampleRate=sampleRate)
			spectrum = es.Spectrum(size=2048) #size is frameSize
			mfcc = es.MFCC(lowFrequencyBound=40, sampleRate=sampleRate) # MFCC
			if step > 20:
				hpcp = es.HPCP(size = 12, referenceFrequency = 440, harmonics=8, bandPreset = True, minFrequency = 40.0, maxFrequency = 5000.0, \
					splitFrequency = 500.0, weightType = 'cosine', nonLinear = False, windowSize = 1);# HPCP
			lowLevelSpectralExtractor = \
				es.LowLevelSpectralExtractor(frameSize=2048, hopSize=1024, sampleRate=sampleRate)
			spectralPeaks = es.SpectralPeaks(sampleRate=sampleRate, minFrequency=40, maxFrequency=11000, maxPeaks=50, magnitudeThreshold=0.2)

			# Low level spectral feature analysis
			try:
				features = lowLevelSpectralExtractor(segAudio)
			except:
				print start_time, "has failed!"
				continue
			
			
			# Harmonic spectral features (TODO: Is the magnitude threshold ok?)
			harmonicPeaks = es.HarmonicPeaks()
			pitch = es.PitchDetection()	# Using YIN instead of predominant pitch analysis as this frame-based analysis


			# Windowing
			window = es.Windowing(size=2048)
			for frame in es.FrameGenerator(segAudio, frameSize=2048, hopSize=1024):
				# spectral contrast
				s = spectrum(window(frame))
				contrast, valley = specContrast(s)
				pool.add('spectral_contrast', contrast)
				pool.add('spectral_valley', valley)

				# MFCC
				bands, mfccs = mfcc(s)
				pool.add('mfcc', mfccs[1:])

				freqs, mags = spectralPeaks(s)

				# HPCP
				if step > 20:
					hpcps = hpcp(freqs, mags)
					pool.add('HPCP', hpcps) 

				# Self-compute spectral features
				if len(freqs) > 0:
					p, conf = pitch(s)
					if freqs[0] == 0:
						freqs = freqs[1:]
						mags = mags[1:]
					freqs, mags = harmonicPeaks(freqs, mags, p)
					_sum = 0
					if len(freqs) == 1:
						specEnvelope_i = [freqs[0]] #for hsd
						_sum = freqs[0]*mags[0]
					elif len(freqs) == 2:
						specEnvelope_i = [(freqs[0]+freqs[1])/2.0] #for hsd
						_sum = freqs[0]*mags[0]+freqs[1]*mags[1]
					elif len(freqs) > 2:
						specEnvelope_i = [(freqs[0]+freqs[1])/2.0] #for hsd
						_sum = freqs[0]*mags[0]
						for i in xrange(1, len(freqs)-1):
							_sum += freqs[i]*mags[i] #for hsc_i
							specEnvelope_i.append((freqs[i-1]+freqs[i]+freqs[i+1])/3.0)
						specEnvelope_i.append((freqs[i]+freqs[i+1])/2.0)
						_sum += freqs[i+1]*mags[i+1]
					hsc_i = _sum/sum(mags)
					pool.add('harmonic_spectral_centroid', hsc_i)
					hsd_i = sum(abs(log10(mags)-log10(specEnvelope_i)))/sum(log10(mags))
					pool.add('harmonic_spectral_deviation', hsd_i)
					hss_i = sqrt(sum(square(freqs-hsc_i)*square(mags))/sum(square(mags)))/hsc_i
					pool.add('harmonic_spectral_spread', hss_i)
				else:
					pool.add('harmonic_spectral_centroid', 0)
					pool.add('harmonic_spectral_deviation', 0)
					pool.add('harmonic_spectral_spread', 0)


			for i in xrange(0, len(features[0])):
			#	pool.add('barkbands', features[0][i])
				pool.add('hfc', features[4][i])
				pool.add('pitch', features[6][i])
				pool.add('pitch_instantaneous_confidence', features[7][i])
				pool.add('pitch_salience', features[8][i])
				pool.add('silence_rate_20dB', features[9][i])
			#	pool.add('silence_rate_30dB', features[10][i])
			#	pool.add('silence_rate_60dB', features[11][i])
				pool.add('spectral_complexity', features[12][i])
				pool.add('spectral_crest', features[13][i])
				pool.add('spectral_decrease', features[14][i])
				pool.add('spectral_energy', features[15][i])
			#	pool.add('spectral_energyband_low', features[16][i])
			#	pool.add('spectral_energyband_middle_low', features[17][i])
			#	pool.add('spectral_energyband_middle_high', features[18][i])
			#	pool.add('spectral_energy_high', features[19][i])
				pool.add('spectral_flatness_db', features[20][i])
				pool.add('spectral_flux', features[21][i])
				pool.add('spectral_rms', features[22][i])
				pool.add('spectral_rolloff', features[23][i])
				pool.add('spectral_strongpeak', features[24][i])
				pool.add('zero_crossing_rate', features[25][i])
				pool.add('inharmonicity',  features[26][i])
				pool.add('tristimulus',  features[27][i])
			
			onsetRate = es.OnsetRate()
			onsets, rate = onsetRate(segAudio)
			try:
				aggrPool = es.PoolAggregator(defaultStats = ['mean', 'var', 'skew', 'kurt'])(pool)
			except:
				print start_time/step, "failed"
				continue

			aggrPool.add('onset_rate', rate)
							
			#print start_time, segment_length, start_time/segment_length
			fileout = segmentArffDir+trackName[:-4]+"_%003d%s"%(start_time/step, ".sig")
			output = es.YamlOutput(filename = fileout)
			output(aggrPool)
Esempio n. 48
0
  def run(self, fname):
    citation = u'Atlı, H. S., Uyar, B., Şentürk, S., Bozkurt, B., and Serra, X. ' \
                '(2014). Audio feature extraction for exploring Turkish makam music. ' \
                'In Proceedings of 3rd International Conference on Audio Technologies ' \
                'for Music and Media, Ankara, Turkey.'

    run_windowing = Windowing(zeroPadding = 3 * self.settings.frameSize) # Hann window with x4 zero padding
    run_spectrum = Spectrum(size=self.settings.frameSize * 4)

    run_spectral_peaks = SpectralPeaks(minFrequency=self.settings.minFrequency,
            maxFrequency = self.settings.maxFrequency,
            maxPeaks = self.settings.maxPeaks,
            sampleRate = self.settings.sampleRate,
            magnitudeThreshold = self.settings.magnitudeThreshold,
            orderBy = 'magnitude')

    run_pitch_salience_function = PitchSalienceFunction(binResolution=self.settings.binResolution) # converts unit to cents, 55 Hz is taken as the default reference
    run_pitch_salience_function_peaks = PitchSalienceFunctionPeaks(binResolution=self.settings.binResolution)
    run_pitch_contours = PitchContours(hopSize=self.settings.hopSize,
            binResolution=self.settings.binResolution,
            peakDistributionThreshold = self.settings.peakDistributionThreshold)
    pool = Pool();

    # load audio and eqLoudness
    audio = MonoLoader(filename = fname)() # MonoLoader resamples the audio signal to 44100 Hz by default
    audio = EqualLoudness()(audio)

    for frame in FrameGenerator(audio,frameSize=self.settings.frameSize, hopSize=self.settings.hopSize):
      frame = run_windowing(frame)
      spectrum = run_spectrum(frame)
      peak_frequencies, peak_magnitudes = run_spectral_peaks(spectrum)
      salience = run_pitch_salience_function(peak_frequencies, peak_magnitudes)
      salience_peaks_bins, salience_peaks_contourSaliences = run_pitch_salience_function_peaks(salience)
      if not size(salience_peaks_bins):
          salience_peaks_bins = array([0])
      if not size(salience_peaks_contourSaliences):
          salience_peaks_contourSaliences = array([0])

      pool.add('allframes_salience_peaks_bins', salience_peaks_bins)
      pool.add('allframes_salience_peaks_contourSaliences', salience_peaks_contourSaliences)

    # post-processing: contour tracking
    contours_bins, contours_contourSaliences, contours_start_times, duration = run_pitch_contours(
            pool['allframes_salience_peaks_bins'],
            pool['allframes_salience_peaks_contourSaliences'])

    # WARNING: As of 3 April 2015, the values in "contours_start_times" leads the audio
    # by 1024 + 128 samples if the read audio is in mp3 format as explained in 
    # https://github.com/MTG/essentia/issues/246. This roots because of the typical
    # encoder/decoder problems. For now We are advancing the values in "contours_start_times"
    # by 1152 samples. Uncomment the next line if this problem is fixed.
    contours_start_times = [c + (1024+128)/float(self.settings.sampleRate) for c in contours_start_times]

    # run the simplified contour selection
    [pitch, pitch_salience] = self.ContourSelection(contours_bins,contours_contourSaliences,contours_start_times,duration)

    # cent to Hz conversion
    pitch = [0. if p == 0 else 55.*(2.**(((self.settings.binResolution*(p)))/1200)) for p in pitch]

    # generate time stamps
    time_stamps = [s*self.settings.hopSize/float(self.settings.sampleRate) for s in xrange(0,len(pitch))]

    # [time pitch salience] matrix
    out = transpose(vstack((time_stamps, pitch, pitch_salience)))
    out = out.tolist()
    
    # settings
    settings = self.settings
    settings.update({'version':self.__version__, 
            'slug':self.__slug__, 
            'source': fname,
            'essentiaVersion': essentia.__version__,
            'pitchUnit': 'Hz',
            'citation': citation})

    # matlab 
    matout = cStringIO.StringIO()
    matob = {'pitch': out}
    matob.update(settings)

    scipy.io.savemat(matout, matob)

    return {'pitch': json.dumps(out),
            'matlab': matout.getvalue(),
            'settings': json.dumps(settings)}
	def deteccoes(arquivo_audio): #Return a list with all detections	

		try:
		    filename = sys.argv[1]
		except:
		    print "usage:", sys.argv[0], "<audiofile>"
		    sys.exit()

		audio = MonoLoader(filename = filename)()

		# Phase 1: compute the onset detection function
		# The OnsetDetection algorithm tells us that there are several methods available in Essentia,
		# let's do two of them

		od1 = OnsetDetection(method = 'hfc')
		od2 = OnsetDetection(method = 'complex')

		# let's also get the other algorithms we will need, and a pool to store the results

		w = Windowing(type = 'hann')
		fft = FFT() # this gives us a complex FFT
		c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase)

		pool = Pool()

		# let's get down to business
		print 'Computing onset detection functions...'
		for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512):
		    mag, phase, = c2p(fft(w(frame)))
		    pool.add('features.hfc', od1(mag, phase))
		    pool.add('features.complex', od2(mag, phase))


		# Phase 2: compute the actual onsets locations
		onsets = Onsets()

		print 'Computing onset times...'
		onsets_hfc = onsets(# this algo expects a matrix, not a vector
		                    array([ pool['features.hfc'] ]),

		                    # you need to specify weights, but as there is only a single
		                    # function, it doesn't actually matter which weight you give it
		                    [ 1 ])

		onsets_complex = onsets(array([ pool['features.complex'] ]), [ 1 ])

		# and mark them on the audio, which we'll write back to disk
		# we use beeps instead of white noise to mark them, as it's more distinctive
		print 'Writing audio files to disk with onsets marked...'

		# mark the 'hfc' onsets:

		#convertendo para o tipo list
		listadethfc = onsets_hfc.tolist()
		listadetcomplex = onsets_complex.tolist()

		#convertendo os segundos para frames
		listadethfc = [int(SecToFrames(x)) for x in listadethfc if x >= 0]
		listadetcomplex = [int(SecToFrames(x)) for x in listadetcomplex if x >= 0]

		return listadetcomplex