Beispiel #1
0
	def get_onsets(self, in_filename):

		# print in_filename
		# Load the audio (in mono)
		audio, sampleRate, numChan = AudioLoader(filename=in_filename)()
		audio = MonoLoader(filename=in_filename)()

		self.sampleRate = sampleRate

		# 1) Compute onset detection functions
		od = OnsetDetection(method='rms')

		w = Windowing(type='hann')
		fft = FFT()
		c2p = CartesianToPolar()

		pool_features = Pool()

		# print 'Computing onset detection functions'
		for frame in FrameGenerator(audio, frameSize=self.frame_size, hopSize=self.hop_size):
			mag, phase = c2p(fft(w(frame)))
			pool_features.add('features.rms', od(mag, phase))

		# 2) Compute the onset locations
		onsets = Onsets(silenceThreshold=0.14, delay=10)

		# print 'Computing onset locations'
		onsets_rms = onsets(
							array([ pool_features['features.rms'] ]),
							[ 1 ])

		print "Num onsets: " + str(len(onsets_rms))

		return onsets_rms
Beispiel #2
0
def pca(pool, namespace=''):
    llspace = 'lowlevel.'
    if namespace: llspace = namespace + '.lowlevel.'
    sccoeffs = pool[llspace + 'sccoeffs']
    scvalleys = pool[llspace + 'scvalleys']
    numFrames = len(sccoeffs)
    poolSc = Pool()
    merged = essentia.zeros(2*len(sccoeffs[0]))
    for frame in xrange(numFrames):
        j = 0
        for i in xrange(len(sccoeffs[frame])):
            merged[j]=sccoeffs[frame][i]
            merged[j+1]=scvalleys[frame][i]
            j+=2
        poolSc.add('contrast', merged)

    poolTransformed = standard.PCA(namespaceIn='contrast',
                                   namespaceOut='contrast')(poolSc)

    contrast = poolTransformed['contrast']

    pool.set(llspace+'spectral_contrast.mean', mean(contrast, axis=0))
    pool.set(llspace+'spectral_contrast.var', var(contrast, axis=0))

    pool.remove(llspace+'sccoeffs')
    pool.remove(llspace+'scvalleys')
Beispiel #3
0
def pca(pool, namespace=''):
    llspace = 'lowlevel.'
    if namespace: llspace = namespace + '.lowlevel.'
    sccoeffs = pool[llspace + 'sccoeffs']
    scvalleys = pool[llspace + 'scvalleys']
    numFrames = len(sccoeffs)
    poolSc = Pool()
    merged = essentia.zeros(2 * len(sccoeffs[0]))
    for frame in xrange(numFrames):
        j = 0
        for i in xrange(len(sccoeffs[frame])):
            merged[j] = sccoeffs[frame][i]
            merged[j + 1] = scvalleys[frame][i]
            j += 2
        poolSc.add('contrast', merged)

    poolTransformed = standard.PCA(namespaceIn='contrast',
                                   namespaceOut='contrast')(poolSc)

    contrast = poolTransformed['contrast']

    pool.set(llspace + 'spectral_contrast.mean', mean(contrast, axis=0))
    pool.set(llspace + 'spectral_contrast.var', var(contrast, axis=0))

    pool.remove(llspace + 'sccoeffs')
    pool.remove(llspace + 'scvalleys')
Beispiel #4
0
    def get_onsets(self, _audio=[]):

        if _audio != []:
            audio = _audio
        else:
            audio = self.audio

        W = es.Windowing(type=self.winType)
        c2p = es.CartesianToPolar()
        fft = es.FFT()
        onsetDetection = es.OnsetDetection(method=self.onsetMethod,
                                           sampleRate=44100)
        onsets = es.Onsets(alpha=.2)
        # onsetIndex = []
        pool = Pool()

        for frame in es.FrameGenerator(audio, frameSize=1024, hopSize=512):
            mag, phase, = c2p(fft(W(frame)))
            onsetDetection.configure(method=self.onsetMethod)
            onsetFunction = onsetDetection(mag, phase)
            pool.add("onsetFunction", onsetFunction)

        DetectedOnsetsArray = onsets([pool["onsetFunction"]], [1])

        return DetectedOnsetsArray
def compute_harmonic_magnitudes(contour_f0s, fftgram, idx_start, options):
    '''
    Compute for each frame harm amplitude
    get harmonic partials form original spectrum
    
    Params:
    --------------------
    fftgram - fftgram of whole audio file
    times - ts of whole audio
    
 
    hfreq - harmonics  of contour
    magns -  magns of contour
    '''

    run_harm_model_anal = HarmonicModelAnal(nHarmonics=30)

    # TODO: sanity check: times == len(fftgram) and contour_start_time_SAL in times

    pool = Pool()

    for i, contour_f0 in enumerate(contour_f0s):

        if idx_start + i > len(fftgram) - 1:
            sys.exit('idx start is {} while len ffmtgram is {}'.format(
                idx_start, len(fftgram)))
        fft = fftgram[idx_start + i]
        # convert to freq :
        hfreq, magn, phase = run_harm_model_anal(fft, contour_f0)

        pool.add('phases', phase)
        pool.add('hfreqs', hfreq)
        pool.add('magns', magn)

    return pool['hfreqs'], pool['magns'], pool['phases']
Beispiel #6
0
    def _extract_pitch_contours(self, audio):
        # Hann window with x4 zero padding
        run_windowing = estd.Windowing(  # pylint: disable-msg=E1101
            zeroPadding=3 * self.frame_size)
        run_spectrum = estd.Spectrum(  # pylint: disable-msg=E1101
            size=self.frame_size * 4)
        run_spectral_peaks = estd.SpectralPeaks(  # pylint: disable-msg=E1101
            minFrequency=self.min_frequency,
            maxFrequency=self.max_frequency,
            magnitudeThreshold=self.magnitude_threshold,
            sampleRate=self.sample_rate,
            orderBy='magnitude')

        # convert unit to cents, PitchSalienceFunction takes 55 Hz as the
        # default reference
        run_pitch_salience_function = \
            estd.PitchSalienceFunction(  # pylint: disable-msg=E1101
                binResolution=self.bin_resolution)
        run_pitch_salience_function_peaks = \
            estd.PitchSalienceFunctionPeaks(  # pylint: disable-msg=E1101
                binResolution=self.bin_resolution,
                minFrequency=self.min_frequency,
                maxFrequency=self.max_frequency)
        run_pitch_contours = estd.PitchContours(  # pylint: disable-msg=E1101
            hopSize=self.hop_size,
            binResolution=self.bin_resolution,
            peakDistributionThreshold=self.peak_distribution_threshold)

        # compute frame by frame
        pool = Pool()
        for frame in estd.FrameGenerator(
                audio,  # pylint: disable-msg=E1101
                frameSize=self.frame_size,
                hopSize=self.hop_size):
            frame = run_windowing(frame)
            spectrum = run_spectrum(frame)
            peak_frequencies, peak_magnitudes = run_spectral_peaks(spectrum)
            salience = run_pitch_salience_function(peak_frequencies,
                                                   peak_magnitudes)
            salience_peaks_bins, salience_peaks_contour_saliences = \
                run_pitch_salience_function_peaks(salience)
            if not np.size(salience_peaks_bins):
                salience_peaks_bins = np.array([0])
            if not np.size(salience_peaks_contour_saliences):
                salience_peaks_contour_saliences = np.array([0])

            pool.add('allframes_salience_peaks_bins', salience_peaks_bins)
            pool.add('allframes_salience_peaks_contourSaliences',
                     salience_peaks_contour_saliences)

        # post-processing: contour tracking
        contours_bins, contour_saliences, contours_start_times, duration = \
            run_pitch_contours(
                [f.tolist()
                 for f in pool['allframes_salience_peaks_bins']],
                [f.tolist()
                 for f in pool['allframes_salience_peaks_contourSaliences']])
        return contours_bins, contours_start_times, contour_saliences, duration
Beispiel #7
0
def compute_energy(audio):
    energy = Energy()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('energy', energy(frame))
    return 'True'
Beispiel #8
0
def compute_pitch_yin(audio):
    yin = PitchYin()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('pitch_yin', yin(frame)[0])
    return p['pitch_yin']
Beispiel #9
0
def compute_zcr(audio):
    zcr = ZeroCrossingRate()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('zcr', zcr(frame))
    return p['zcr']
Beispiel #10
0
def compute_rms(audio):
    rms = RMS()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('rms', rms(frame))
    return p['rms']
Beispiel #11
0
def compute_power_spectrum(audio):
    w = Windowing(type='hann')
    power_spectrum = PowerSpectrum()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('power_spectrum', power_spectrum(w(frame)))
    return p['power_spectrum']
Beispiel #12
0
def compute_spectral_rolloff(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    rolloff = RollOff()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('spectral_rolloff', rolloff(spectrum(w(frame))))
    return p['spectral_rolloff']
Beispiel #13
0
def compute_spectral_centroid(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    centroid = Centroid()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('spectral_centroid', centroid(spectrum(w(frame))))
    return p['spectral_centroid']
Beispiel #14
0
def compute_mel(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    mel = MelBands(numberBands=96)
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('mel', mel(spectrum(w(frame))))
    return p['mel']
	def detect_essentia(arquivo_audio,selected): #ODF using essentia library
		# 
		try:
		    filename = arquivo_audio
		except:
		    print "usage:", sys.argv[0], "<audiofile>"
		    sys.exit()

		# don't forget, we can actually instantiate and call an algorithm on the same line!
		global audio

		# Phase 1: compute the onset detection function
		# The OnsetDetection algorithm tells us that there are several methods available in Essentia,
		# let's do two of them
		if selected==3:
			od = OnsetDetection(method = 'hfc')
		elif selected==4:
			od = OnsetDetection(method = 'complex')
		elif selected==5:
			od = OnsetDetection(method = 'melflux')
		elif selected==6:
			od = OnsetDetection(method = 'complex_phase')
		elif selected==7:
			od = OnsetDetection(method = 'rms')


		# let's also get the other algorithms we will need, and a pool to store the results
		w = Windowing(type = 'hann')
		fft = FFT() # this gives us a complex FFT
		c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase)

		pool = Pool()

		# let's get down to business
		print 'Computing onset detection functions...'
		for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512):
		    mag, phase, = c2p(fft(w(frame)))
		    pool.add('features.method', od(mag, phase))

		# Phase 2: compute the actual onsets locations
		onsets = Onsets()
		print 'Computing onset times...'
		onsets_method = onsets(array([ pool['features.method'] ]), [ 1 ])

		# and mark them on the audio, which we'll write back to disk
		# we use beeps instead of white noise to mark them, as it's more distinctive

		#convertendo para o tipo list
		listadet = onsets_method.tolist()

		#convertendo os segundos para frames
		listadet = [int(SecToFrames(x)) for x in listadet if x >= 0]
		 
		return listadet
Beispiel #16
0
def compute_pitch_yinfft(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    yinfft = PitchYinFFT()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('pitch_yinfft', yinfft(spectrum(w(frame)))[0])
    return p['pitch_yinfft']
Beispiel #17
0
def compute_bark(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    bark = BarkBands()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('bark', bark(spectrum(w(frame))))
    return p['bark']
Beispiel #18
0
def compute_spectral_flatness(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    flatness = Flatness()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        p.add('spectral_flatness', flatness(spectrum(w(frame))))
    return p['spectral_flatness']
Beispiel #19
0
def compute_mfcc(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    mfcc = MFCC()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        _, coeffs = mfcc(spectrum(w(frame)))
        p.add('mfcc', coeffs)
    return p['mfcc']
Beispiel #20
0
    def _analyse(self, filepath):
        audio = to_mono(wavread(filepath)[0])
        audio = audio.astype('float32')
        
        w = Windowing(type = 'hann')
        fft = FFT() # this gives us a complex FFT
        c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase)
        hfc_detect = OnsetDetection(method = 'hfc')
        complex_detect = OnsetDetection(method = 'complex')
        rms_detect = RMS()
        spec = Spectrum()
        #pd = PitchDetection()
        flux = Flux()
        pool = Pool()
        #wap = WarpedAutoCorrelation()
        
    
        # let's get down to business
        print 'Computing onset detection functions...'
        for frame in FrameGenerator(audio, frameSize = self.frame_size,\
            hopSize = self.hop_size):
            mag, phase, = c2p(fft(w(frame)))
            spectrum = spec(w(frame))
            f = flux(spectrum)
            #pitch = pd(spectrum)
            pool.add('hfc', hfc_detect(mag, phase))
            pool.add('complex', complex_detect(mag, phase))
            pool.add('rms', rms_detect(frame))
            pool.add('flux', f)
            #pool.add('pitch', pitch[0])
        #print pool['pitch']
        #pool.add('autoc', wap(pool['pitch']))
     

        return pool, audio
Beispiel #21
0
def compute_spectral_shape(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    cm = CentralMoments()
    ds = DistributionShape()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):
        spread, skewness, kurtosis = ds(cm(spectrum(w(frame))))
        p.add('spectral_spread', spread)
        p.add('spectral_skewness', skewness)
        p.add('spectral_kurtosis', kurtosis)
    return p['spectral_spread'], p['spectral_skewness'], p['spectral_kurtosis']
Beispiel #22
0
def __mfccs__(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum(
    )  # FFT() would return the complex FFT, here we just want the magnitude spectrum
    mfcc = MFCC()
    mfcc_pool = Pool()
    ## NOTAS -> 1 segundo de un fichero wav son aprox 90 frames y la intensidad esta dada en Hz
    for frame in FrameGenerator(audio,
                                frameSize=1024,
                                hopSize=512,
                                startFromZero=True):
        spec = spectrum(w(frame))
        mfcc_bands, mfcc_coeffs = mfcc(spec)
        mfcc_pool.add('mfcc', mfcc_coeffs[1:])
        mfcc_pool.add('mfcc_bands', mfcc_bands)
    return mfcc_pool
Beispiel #23
0
def compute_hpcp(audio):
    w = Windowing(type='hann')
    spectrum = Spectrum()
    peaks = SpectralPeaks(orderBy='magnitude',
                          magnitudeThreshold=0.00001,
                          minFrequency=20,
                          maxFrequency=3500,
                          maxPeaks=60)
    hpcp = HPCP()
    p = Pool()
    for frame in FrameGenerator(audio,
                                frameSize=tonal_frame_size,
                                hopSize=tonal_hop_size,
                                startFromZero=True):
        p.add('hpcp', hpcp(*peaks(spectrum(w(frame)))))
    return p['hpcp']
    def _extract_pitch_contours(self, audio):
        # Hann window with x4 zero padding
        run_windowing = estd.Windowing(zeroPadding=3 * self.frame_size)
        run_spectrum = estd.Spectrum(size=self.frame_size * 4)
        run_spectral_peaks = estd.SpectralPeaks(
            minFrequency=self.min_frequency, maxFrequency=self.max_frequency,
            magnitudeThreshold=self.magnitude_threshold,
            sampleRate=self.sample_rate, orderBy='magnitude')

        # convert unit to cents, PitchSalienceFunction takes 55 Hz as the
        # default reference
        run_pitch_salience_function = estd.PitchSalienceFunction(
            binResolution=self.bin_resolution)
        run_pitch_salience_function_peaks = estd.PitchSalienceFunctionPeaks(
            binResolution=self.bin_resolution, minFrequency=self.min_frequency,
            maxFrequency=self.max_frequency)
        run_pitch_contours = estd.PitchContours(
            hopSize=self.hop_size, binResolution=self.bin_resolution,
            peakDistributionThreshold=self.peak_distribution_threshold)

        # compute frame by frame
        pool = Pool()
        for frame in estd.FrameGenerator(audio, frameSize=self.frame_size,
                                         hopSize=self.hop_size):
            frame = run_windowing(frame)
            spectrum = run_spectrum(frame)
            peak_frequencies, peak_magnitudes = run_spectral_peaks(spectrum)
            salience = run_pitch_salience_function(peak_frequencies,
                                                   peak_magnitudes)
            salience_peaks_bins, salience_peaks_contour_saliences = \
                run_pitch_salience_function_peaks(salience)
            if not np.size(salience_peaks_bins):
                salience_peaks_bins = np.array([0])
            if not np.size(salience_peaks_contour_saliences):
                salience_peaks_contour_saliences = np.array([0])

            pool.add('allframes_salience_peaks_bins', salience_peaks_bins)
            pool.add('allframes_salience_peaks_contourSaliences',
                     salience_peaks_contour_saliences)

        # post-processing: contour tracking
        contours_bins, contour_saliences, contours_start_times, duration = \
            run_pitch_contours(
                pool['allframes_salience_peaks_bins'],
                pool['allframes_salience_peaks_contourSaliences'])
        return contours_bins, contours_start_times, contour_saliences, duration
def computeOnsets(inFile, outFile):

    # don't forget, we can actually instantiate and call an algorithm on the same line!
    print 'Loading audio file...'
    audio = MonoLoader(filename=inFile)()

    pool = Pool()

    onsetDetectionGlobal = OnsetDetectionGlobal()
    onsetDetections = onsetDetectionGlobal(audio)

    pool.add('features.onsetDetections', onsetDetections)

    onsets = Onsets()
    onsetTimes = onsets(array([onsetDetections]), [1])

    pool.add('features.onsets', onsetTimes)
    np.savetxt(outFile, pool['features.onsets'][0], fmt='%f')
def extractMFCCs(audio):
    '''
    extract mfccs from spectromra
    '''

    ######## compute MFCCs
    #     maybe set highFrequencyBound=22100
    frameSizeInSamples = int(round(44100 * frameSize_block))
    hopSizeInSamples = int(round(44100 * hopSize_block))
    inputSpectrumSize = frameSizeInSamples / 2 + 1

    #     inputSpectrumSize = 1025
    mfcc = MFCC(numberCoefficients=num_mfccs,
                numberBands=numberBands,
                highFrequencyBound=highFrequencyBound,
                inputSize=inputSpectrumSize)
    w = Windowing(type='hann')
    spectrum = Spectrum()
    mfccs_array = []
    pool = Pool()

    audio = essentia.array(audio)
    for frame in FrameGenerator(audio,
                                frameSize=frameSizeInSamples,
                                hopSize=hopSizeInSamples):
        mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame)))
        pool.add('mfcc', mfcc_coeffs)


#     mfccs_array = np.zeros( (len(spectogram), num_mfccs) )
#     for i,spectrum in enumerate(spectogram):
#
#         mfcc_bands, mfcc_coeffs = mfcc( spectrum )
#         mfccs_array[i] = mfcc_coeffs

# transpose to have it in a better shape
# we need to convert the list to an essentia.array first (== numpy.array of floats)

#     mfccs_T = essentia.array(pool['mfcc']).T
#     # and plot
#     imshow(mfccs_T, aspect = 'auto', interpolation='none')
#     show() # unnecessary if you started "ipython --pylab"

    return pool['mfcc']
Beispiel #27
0
def SliceDrums_BeatDetection(folder, audio_filename, fs):
    od_hfc = OnsetDetection(method='hfc')
    w = Windowing(type='hann')
    fft = FFT()  # this gives us a complex FFT
    c2p = CartesianToPolar(
    )  # and this turns it into a pair (magnitude, phase)
    onsets = Onsets()

    x = MonoLoader(filename=folder + audio_filename, sampleRate=fs)()
    duration = float(len(x)) / fs

    x = x / np.max(np.abs(x))

    t = np.arange(len(x)) / float(fs)

    zero_array = t * 0  #used only for plotting purposes

    #Plotting
    f, axarr = plt.subplots(1, 1, figsize=(80, 20))

    #Essentia beat tracking
    pool = Pool()
    for frame in FrameGenerator(x, frameSize=1024, hopSize=512):
        mag, phase, = c2p(fft(w(frame)))
        pool.add('features.hfc', od_hfc(mag, phase))

    onsets_list = onsets(array([pool['features.hfc']]), [1])
    axarr.vlines(onsets_list, -1, 1, color='k', zorder=2, linewidth=5.0)
    axarr.plot(t, x, zorder=1)
    axarr.axis('off')
    for i, onset in enumerate(onsets_list):
        sample = int(onset * fs) - 1000
        samplename = "{}slices/{}{}__blind.wav".format(folder,
                                                       str(len(str(i))),
                                                       str(i))
        if (i >= len(onsets_list) - 1):
            next_sample = len(x)
        else:
            next_sample = int(onsets_list[i + 1] * fs) - 1000
        x_seg = x[sample:next_sample]
        MonoWriter(filename=samplename)(x_seg)

    return onsets_list, duration
Beispiel #28
0
def get_cat_audio_pitch():
    
    spectrum = Spectrum()
    pitch = PitchYinFFT(frameSize=1024)
 
    pool = Pool()
    windowing = Windowing(type = 'hann')


    cat_audio = MonoLoader(filename='cat-01.wav', sampleRate=44100)()
    cat_audio_loudness = Loudness()(cat_audio)
 
    for frame in FrameGenerator(cat_audio, frameSize=1024, hopSize=512):
        spec = spectrum(windowing(frame))
        p, conf = pitch(spec)
        pool.add('cat_pitch', p)
 
 
    cat_pitch = numpy.mean(pool['cat_pitch'])
    cat_MIDI = mir_eval.multipitch.frequencies_to_midi([cat_pitch]) 
    return cat_audio, cat_MIDI[0]
def harmonic_magnitudes_to_audio(hfreqs, magns, phases, options):
    '''
    Compute for each frame harm amplitude
    convert cent bins to herz
    get harmonic partials form original spectrum
    
    Params:
    
    hfreq - harmonics  of contour
    magns -  magns of contour
    
    return:
    spectogram contour

    out_audio_contour - audio of harmonics for a contour
    '''

    pool = Pool()

    run_sine_model_synth = SineModelSynth(hopSize=512, sampleRate=options.Fs)
    run_ifft = IFFT(size=options.windowsizeInSamples)
    run_overl = OverlapAdd(frameSize=options.windowsizeInSamples,
                           hopSize=512,
                           gain=1. / options.windowsizeInSamples)
    out_audio_contour = np.array(0)

    for hfreq, hmag, hphase in zip(hfreqs, magns, phases):

        spectrum, audio_frame = harmonics_to_audio(hfreq, hmag, hphase,
                                                   run_sine_model_synth,
                                                   run_ifft, run_overl)
        out_audio_contour = np.append(out_audio_contour, audio_frame)

        pool.add('spectrum', spectrum)

    out_audio_contour = SM.sineModelSynth(hfreqs, magns, phases, 512, 128,
                                          44100)

    return out_audio_contour, pool['spectrum']
Beispiel #30
0
def spectrogram(audio, audio_file, save_fig=True, save_fig_path=None):
    if audio_file.endswith('.wav'):
        w = Windowing(type='hann')
        spectrum = Spectrum(
        )  # FFT() would return the complex FFT, here we just want the magnitude spectrum
        pool = Pool()
        ## NOTAS -> 1 segundo de un fichero wav son aprox 90 frames y la intensidad esta dada en Hz
        for frame in FrameGenerator(audio,
                                    frameSize=1024,
                                    hopSize=512,
                                    startFromZero=True):
            win = w(frame)
            spec = spectrum(win)
            pool.add('spec', spec)
        aggrPool = PoolAggregator(defaultStats=['mean'])(pool)
        a = sum(aggrPool['spec.mean'].T) / aggrPool['spec.mean'].T.shape[0]
        # a = aggrPool['spec.mean'].T
        # b = np.zeros(pool['spec'].T.shape)
        b = np.array(pool['spec'].T)
        # for iterator1, i in enumerate(pool['spec'].T):
        #     for iterator2, j in enumerate(i):
        #         # if j > a[iterator1]/2:
        #         if j > a/2 and j > 0.015:
        #             b[iterator1][iterator2] = j
        # b = np.array([i for i in b if i.max() > 0.01])
        # no para el nuevo dataset de bats
        b = remove_initial_zeros(b)
        b = b.tolist()
        b.reverse()
        b = remove_initial_zeros(b)
        b.reverse()
        b = np.array(b)
        if save_fig:
            if not save_fig_path:
                save_fig_path = audio_file.replace('.wav', '_spec.jpg')
            save_plots(b, save_fig_path)
        return b[:200, :200].tolist()
	def deteccoes(arquivo_audio): #Return a list with all detections	

		try:
		    filename = sys.argv[1]
		except:
		    print "usage:", sys.argv[0], "<audiofile>"
		    sys.exit()

		audio = MonoLoader(filename = filename)()

		# Phase 1: compute the onset detection function
		# The OnsetDetection algorithm tells us that there are several methods available in Essentia,
		# let's do two of them

		od1 = OnsetDetection(method = 'hfc')
		od2 = OnsetDetection(method = 'complex')

		# let's also get the other algorithms we will need, and a pool to store the results

		w = Windowing(type = 'hann')
		fft = FFT() # this gives us a complex FFT
		c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase)

		pool = Pool()

		# let's get down to business
		print 'Computing onset detection functions...'
		for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512):
		    mag, phase, = c2p(fft(w(frame)))
		    pool.add('features.hfc', od1(mag, phase))
		    pool.add('features.complex', od2(mag, phase))


		# Phase 2: compute the actual onsets locations
		onsets = Onsets()

		print 'Computing onset times...'
		onsets_hfc = onsets(# this algo expects a matrix, not a vector
		                    array([ pool['features.hfc'] ]),

		                    # you need to specify weights, but as there is only a single
		                    # function, it doesn't actually matter which weight you give it
		                    [ 1 ])

		onsets_complex = onsets(array([ pool['features.complex'] ]), [ 1 ])

		# and mark them on the audio, which we'll write back to disk
		# we use beeps instead of white noise to mark them, as it's more distinctive
		print 'Writing audio files to disk with onsets marked...'

		# mark the 'hfc' onsets:

		#convertendo para o tipo list
		listadethfc = onsets_hfc.tolist()
		listadetcomplex = onsets_complex.tolist()

		#convertendo os segundos para frames
		listadethfc = [int(SecToFrames(x)) for x in listadethfc if x >= 0]
		listadetcomplex = [int(SecToFrames(x)) for x in listadetcomplex if x >= 0]

		return listadetcomplex
Beispiel #32
0
def hpcpgram(audio,
             sampleRate=44100,
             frameSize=4096,
             hopSize=2048,
             numBins=12,
             windowType='blackmanharris62',
             minFrequency=100,
             maxFrequency=4000,
             whitening=False,
             maxPeaks=100,
             magnitudeThreshold=1e-05,
             **kwargs):
    """
    Compute Harmonic Pitch Class Profile (HPCP) Grams for overlapped frames of a given input audio signal 

    For additional list of parameters of essentia standard mode HPCP please refer to 
    http://essentia.upf.edu/documentation/reference/std_HPCP.html

    References:
    [1]. Gómez, E. (2006). Tonal Description of Polyphonic Audio for Music Content Processing.

    Inputs
        audio (2d vector): audio signal

    Parameters:
        sampleRate : (real ∈ (0, ∞), default = 44100) :
        the sampling rate of the audio signal [Hz]

        frameSize (integer ∈ [1, ∞), default = 1024) :
        the output frame size
        
        hopSize (integer ∈ [1, ∞), default = 512) :
        the hop size between frames

        numBins : (integer ∈ [12, ∞), default = 12) :
        the size of the output HPCP (must be a positive nonzero multiple of 12)

        windowType (string ∈ {hamming, hann, hannnsgcq, triangular, square, blackmanharris62, blackmanharris70, blackmanharris74, blackmanharris92}, default = blackmanharris62) :
        the window type, which can be 'hamming', 'hann', 'triangular', 'square' or 'blackmanharrisXX'

        maxFrequency : (real ∈ (0, ∞), default = 4000) :
        the maximum frequency that contributes to the SpectralPeaks and HPCP algorithms computation [Hz] (the difference between the max and split frequencies must not be less than 200.0 Hz)

        minFrequency : (real ∈ (0, ∞), default = 100) :
        the minimum frequency that contributes to the SpectralPeaks and HPCP algorithm computation [Hz] (the difference between the min and split frequencies must not be less than 200.0 Hz)

        maxPeaks (integer ∈ [1, ∞), default = 100) :
        the maximum number of returned peaks while calculating SpectralPeaks

        magnitudeThreshold (real ∈ (-∞, ∞), default = 0) :
        peaks below this given threshold are not outputted while calculating Spectral Peaks

        whitening : (boolean (True, False), default = False)
        Optional step of computing spectral whitening to the output from speakPeak magnitudes

        kwargs : additional keyword arguments
        Arguments to parameterize HPCP alogithms.
        see standard mode HPCP algorithm (http://essentia.upf.edu/documentation/reference/std_HPCP.html).


    Returns: hpcpgram of overlapped frames of input audio signal (2D vector) 

    """
    frameGenerator = es.FrameGenerator(array(audio),
                                       frameSize=frameSize,
                                       hopSize=hopSize)
    window = es.Windowing(type=windowType)
    spectrum = es.Spectrum()
    # Refer http://essentia.upf.edu/documentation/reference/std_SpectralPeaks.html
    spectralPeaks = es.SpectralPeaks(magnitudeThreshold=magnitudeThreshold,
                                     maxFrequency=maxFrequency,
                                     minFrequency=minFrequency,
                                     maxPeaks=maxPeaks,
                                     sampleRate=sampleRate)
    # http://essentia.upf.edu/documentation/reference/std_SpectralWhitening.html
    spectralWhitening = es.SpectralWhitening(maxFrequency=maxFrequency,
                                             sampleRate=sampleRate)
    # http://essentia.upf.edu/documentation/reference/std_HPCP.html
    hpcp = es.HPCP(sampleRate=sampleRate,
                   maxFrequency=maxFrequency,
                   minFrequency=minFrequency,
                   size=numBins,
                   **kwargs)
    pool = Pool()
    #compute hpcp for each frame and add the results to the pool
    for frame in frameGenerator:
        spectrum_mag = spectrum(window(frame))
        frequencies, magnitudes = spectralPeaks(spectrum_mag)
        if whitening:
            w_magnitudes = spectralWhitening(spectrum_mag, frequencies,
                                             magnitudes)
            hpcp_vector = hpcp(frequencies, w_magnitudes)
        else:
            hpcp_vector = hpcp(frequencies, magnitudes)
        pool.add('tonal.hpcp', hpcp_vector)
    return pool['tonal.hpcp']
Beispiel #33
0
od1 = OnsetDetection(method='hfc')
od2 = OnsetDetection(method='complex')

# let's also get the other algorithms we will need, and a pool to store the results

w = Windowing(type='hann')
fft = FFT()  # this gives us a complex FFT
c2p = CartesianToPolar()  # and this turns it into a pair (magnitude, phase)

pool = Pool()

# let's get down to business
print 'Computing onset detection functions...'
for frame in FrameGenerator(audio, frameSize=1024, hopSize=512):
    mag, phase, = c2p(fft(w(frame)))
    pool.add('features.hfc', od1(mag, phase))
    pool.add('features.complex', od2(mag, phase))

# Phase 2: compute the actual onsets locations
onsets = Onsets()

print 'Computing onset times...'
onsets_hfc = onsets(  # this algo expects a matrix, not a vector
    array([pool['features.hfc']]),

    # you need to specify weights, but as there is only a single
    # function, it doesn't actually matter which weight you give it
    [1])

#onsets_complex = onsets(array([ pool['features.complex'] ]), [ 1 ])
print 'Onsets-hfc'
def extractFeatures(arffDir = '.', dirname = '.', fnames = '', segment_length = 'WHOLE', hopsize = 0):
	# Start to process file by file from input
	for fname in fnames:
		# It only process wav or mp3 file
		if ".wav" not in fname.lower() and ".mp3" not in fname.lower(): continue

		# Generate output dir
		trackName = fname.split('/')[-1]
		segmentArffDir = arffDir+"/"+trackName[:-4]+"/"
		if not exists(segmentArffDir):
			mkdir(segmentArffDir)
		else:
			print fname + ' exsits, pass...'
			continue
		
		# Read audio and some more info
		loader = es.EasyLoader(filename = dirname+"/"+fname)
		audio = loader.compute()
		sampleRate = loader.paramValue('sampleRate')
		length = int(len(audio)/sampleRate)
		if length == 0: length = 1
		print fname + ' length: ' + str(length) 

		if hopsize == 0:
			hopsize = segment_length
			
		# Specify the length of the segment
		if segment_length == 'WHOLE':
			step = length
			end_time = length
			segment_length = length
			print 'The whole audio is being processed...'
		else:
			step = hopsize
			segment_length = float(segment_length)
			if step>length: continue

		# Start computing segment by segment
		for start_time in arange(0, length, step):
			end_time = start_time + segment_length
			if step != length:
				print 'the time from second ' + str(start_time) + ' is being processed...'
			if end_time > length:
				break;
			segAudio = audio[start_time*sampleRate:end_time*sampleRate]
			pool = Pool()

			# Setup parameters 
			specContrast = es.SpectralContrast(frameSize=2048, lowFrequencyBound=40, sampleRate=sampleRate)
			spectrum = es.Spectrum(size=2048) #size is frameSize
			mfcc = es.MFCC(lowFrequencyBound=40, sampleRate=sampleRate) # MFCC
			if step > 20:
				hpcp = es.HPCP(size = 12, referenceFrequency = 440, harmonics=8, bandPreset = True, minFrequency = 40.0, maxFrequency = 5000.0, \
					splitFrequency = 500.0, weightType = 'cosine', nonLinear = False, windowSize = 1);# HPCP
			lowLevelSpectralExtractor = \
				es.LowLevelSpectralExtractor(frameSize=2048, hopSize=1024, sampleRate=sampleRate)
			spectralPeaks = es.SpectralPeaks(sampleRate=sampleRate, minFrequency=40, maxFrequency=11000, maxPeaks=50, magnitudeThreshold=0.2)

			# Low level spectral feature analysis
			try:
				features = lowLevelSpectralExtractor(segAudio)
			except:
				print start_time, "has failed!"
				continue
			
			
			# Harmonic spectral features (TODO: Is the magnitude threshold ok?)
			harmonicPeaks = es.HarmonicPeaks()
			pitch = es.PitchDetection()	# Using YIN instead of predominant pitch analysis as this frame-based analysis


			# Windowing
			window = es.Windowing(size=2048)
			for frame in es.FrameGenerator(segAudio, frameSize=2048, hopSize=1024):
				# spectral contrast
				s = spectrum(window(frame))
				contrast, valley = specContrast(s)
				pool.add('spectral_contrast', contrast)
				pool.add('spectral_valley', valley)

				# MFCC
				bands, mfccs = mfcc(s)
				pool.add('mfcc', mfccs[1:])

				freqs, mags = spectralPeaks(s)

				# HPCP
				if step > 20:
					hpcps = hpcp(freqs, mags)
					pool.add('HPCP', hpcps) 

				# Self-compute spectral features
				if len(freqs) > 0:
					p, conf = pitch(s)
					if freqs[0] == 0:
						freqs = freqs[1:]
						mags = mags[1:]
					freqs, mags = harmonicPeaks(freqs, mags, p)
					_sum = 0
					if len(freqs) == 1:
						specEnvelope_i = [freqs[0]] #for hsd
						_sum = freqs[0]*mags[0]
					elif len(freqs) == 2:
						specEnvelope_i = [(freqs[0]+freqs[1])/2.0] #for hsd
						_sum = freqs[0]*mags[0]+freqs[1]*mags[1]
					elif len(freqs) > 2:
						specEnvelope_i = [(freqs[0]+freqs[1])/2.0] #for hsd
						_sum = freqs[0]*mags[0]
						for i in xrange(1, len(freqs)-1):
							_sum += freqs[i]*mags[i] #for hsc_i
							specEnvelope_i.append((freqs[i-1]+freqs[i]+freqs[i+1])/3.0)
						specEnvelope_i.append((freqs[i]+freqs[i+1])/2.0)
						_sum += freqs[i+1]*mags[i+1]
					hsc_i = _sum/sum(mags)
					pool.add('harmonic_spectral_centroid', hsc_i)
					hsd_i = sum(abs(log10(mags)-log10(specEnvelope_i)))/sum(log10(mags))
					pool.add('harmonic_spectral_deviation', hsd_i)
					hss_i = sqrt(sum(square(freqs-hsc_i)*square(mags))/sum(square(mags)))/hsc_i
					pool.add('harmonic_spectral_spread', hss_i)
				else:
					pool.add('harmonic_spectral_centroid', 0)
					pool.add('harmonic_spectral_deviation', 0)
					pool.add('harmonic_spectral_spread', 0)


			for i in xrange(0, len(features[0])):
			#	pool.add('barkbands', features[0][i])
				pool.add('hfc', features[4][i])
				pool.add('pitch', features[6][i])
				pool.add('pitch_instantaneous_confidence', features[7][i])
				pool.add('pitch_salience', features[8][i])
				pool.add('silence_rate_20dB', features[9][i])
			#	pool.add('silence_rate_30dB', features[10][i])
			#	pool.add('silence_rate_60dB', features[11][i])
				pool.add('spectral_complexity', features[12][i])
				pool.add('spectral_crest', features[13][i])
				pool.add('spectral_decrease', features[14][i])
				pool.add('spectral_energy', features[15][i])
			#	pool.add('spectral_energyband_low', features[16][i])
			#	pool.add('spectral_energyband_middle_low', features[17][i])
			#	pool.add('spectral_energyband_middle_high', features[18][i])
			#	pool.add('spectral_energy_high', features[19][i])
				pool.add('spectral_flatness_db', features[20][i])
				pool.add('spectral_flux', features[21][i])
				pool.add('spectral_rms', features[22][i])
				pool.add('spectral_rolloff', features[23][i])
				pool.add('spectral_strongpeak', features[24][i])
				pool.add('zero_crossing_rate', features[25][i])
				pool.add('inharmonicity',  features[26][i])
				pool.add('tristimulus',  features[27][i])
			
			onsetRate = es.OnsetRate()
			onsets, rate = onsetRate(segAudio)
			try:
				aggrPool = es.PoolAggregator(defaultStats = ['mean', 'var', 'skew', 'kurt'])(pool)
			except:
				print start_time/step, "failed"
				continue

			aggrPool.add('onset_rate', rate)
							
			#print start_time, segment_length, start_time/segment_length
			fileout = segmentArffDir+trackName[:-4]+"_%003d%s"%(start_time/step, ".sig")
			output = es.YamlOutput(filename = fileout)
			output(aggrPool)
od1 = OnsetDetection(method="hfc")
od2 = OnsetDetection(method="complex")

# let's also get the other algorithms we will need, and a pool to store the results

w = Windowing(type="hann")
fft = FFT()  # this gives us a complex FFT
c2p = CartesianToPolar()  # and this turns it into a pair (magnitude, phase)

pool = Pool()

# let's get down to business
print "Computing onset detection functions..."
for frame in FrameGenerator(audio, frameSize=1024, hopSize=512):
    mag, phase, = c2p(fft(w(frame)))
    pool.add("features.hfc", od1(mag, phase))
    pool.add("features.complex", od2(mag, phase))


# Phase 2: compute the actual onsets locations
onsets = Onsets()

print "Computing onset times..."
onsets_hfc = onsets(  # this algo expects a matrix, not a vector
    array([pool["features.hfc"]]),
    # you need to specify weights, but as there is only a single
    # function, it doesn't actually matter which weight you give it
    [1],
)

onsets_complex = onsets(array([pool["features.complex"]]), [1])
od1 = OnsetDetection(method = 'hfc')
od2 = OnsetDetection(method = 'complex')

# let's also get the other algorithms we will need, and a pool to store the results

w = Windowing(type = 'hann')
fft = FFT() # this gives us a complex FFT
c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase)

pool = Pool()

# let's get down to business
print 'Computing onset detection functions...'
for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512):
    mag, phase, = c2p(fft(w(frame)))
    pool.add('features.hfc', od1(mag, phase))
    pool.add('features.complex', od2(mag, phase))


# Phase 2: compute the actual onsets locations
onsets = Onsets()

print 'Computing onset times...'
onsets_hfc = onsets(# this algo expects a matrix, not a vector
                    array([ pool['features.hfc'] ]),

                    # you need to specify weights, but as there is only a single
                    # function, it doesn't actually matter which weight you give it
                    [ 1 ])

onsets_complex = onsets(array([ pool['features.complex'] ]), [ 1 ])
Beispiel #37
0
    def chroma_hpcp(self,
                    frameSize=4096,
                    hopSize=2048,
                    windowType='blackmanharris62',
                    harmonicsPerPeak=8,
                    magnitudeThreshold=1e-05,
                    maxPeaks=1000,
                    whitening=True,
                    referenceFrequency=440,
                    minFrequency=40,
                    maxFrequency=5000,
                    nonLinear=False,
                    numBins=12,
                    display=False):
        '''
        Compute Harmonic Pitch Class Profiles (HPCP) for the input audio files using essentia standard mode using
        the default parameters as mentioned in [1].
        Please refer to the following paper for detailed explanantion of the algorithm.
        [1]. Gómez, E. (2006). Tonal Description of Polyphonic Audio for Music Content Processing.
        For full list of parameters of essentia standard mode HPCP please refer to http://essentia.upf.edu/documentation/reference/std_HPCP.html
        Parameters
            harmonicsPerPeak : (integer ∈ [0, ∞), default = 0) :
            number of harmonics for frequency contribution, 0 indicates exclusive fundamental frequency contribution
            maxFrequency : (real ∈ (0, ∞), default = 5000) :
            the maximum frequency that contributes to the HPCP [Hz] (the difference between the max and split frequencies must not be less than 200.0 Hz)

            minFrequency : (real ∈ (0, ∞), default = 40) :
            the minimum frequency that contributes to the HPCP [Hz] (the difference between the min and split frequencies must not be less than 200.0 Hz)

            nonLinear : (bool ∈ {true, false}, default = false) :
            apply non-linear post-processing to the output (use with normalized='unitMax'). Boosts values close to 1, decreases values close to 0.
            normalized (string ∈ {none, unitSum, unitMax}, default = unitMax) :
            whether to normalize the HPCP vector

            referenceFrequency : (real ∈ (0, ∞), default = 440) :
            the reference frequency for semitone index calculation, corresponding to A3 [Hz]

            sampleRate : (real ∈ (0, ∞), default = 44100) :
            the sampling rate of the audio signal [Hz]

            numBins : (integer ∈ [12, ∞), default = 12) :
            the size of the output HPCP (must be a positive nonzero multiple of 12)
            whitening : (boolean (True, False), default = False)
            Optional step of computing spectral whitening to the output from speakPeak magnitudes
        '''

        audio = array(self.audio_vector)

        #print audio.shape

        frameGenerator = estd.FrameGenerator(audio,
                                             frameSize=frameSize,
                                             hopSize=hopSize)

        window = estd.Windowing(type=windowType)

        spectrum = estd.Spectrum()

        # Refer http://essentia.upf.edu/documentation/reference/std_SpectralPeaks.html
        spectralPeaks = estd.SpectralPeaks(magnitudeThreshold=0,
                                           maxFrequency=maxFrequency,
                                           minFrequency=minFrequency,
                                           maxPeaks=maxPeaks,
                                           orderBy="frequency",
                                           sampleRate=self.fs)

        # http://essentia.upf.edu/documentation/reference/std_SpectralWhitening.html
        spectralWhitening = estd.SpectralWhitening(maxFrequency=maxFrequency,
                                                   sampleRate=self.fs)

        # http://essentia.upf.edu/documentation/reference/std_HPCP.html
        hpcp = estd.HPCP(sampleRate=self.fs,
                         maxFrequency=maxFrequency,
                         minFrequency=minFrequency,
                         referenceFrequency=referenceFrequency,
                         nonLinear=nonLinear,
                         harmonics=harmonicsPerPeak,
                         size=numBins)

        pool = Pool()

        #compute hpcp for each frame and add the results to the pool
        for frame in frameGenerator:
            spectrum_mag = spectrum(window(frame))
            frequencies, magnitudes = spectralPeaks(spectrum_mag)
            if whitening:
                w_magnitudes = spectralWhitening(spectrum_mag, frequencies,
                                                 magnitudes)
                hpcp_vector = hpcp(frequencies, w_magnitudes)
            else:
                hpcp_vector = hpcp(frequencies, magnitudes)
            pool.add('tonal.hpcp', hpcp_vector)

        if display:
            display_chroma(np.swapaxes(pool['tonal.hpcp']), 0, 1)

        return pool['tonal.hpcp']
Beispiel #38
0
def sliceDrums_from_annotations_SDtrainset(instrument_name, segments_dir,
                                           song_dict, fs):
    """
        Input:  instrument_name: str woth a key in the song_dict
                segments_dir : str with path where slices are saved
                song_dict : dict containing audio stream and annotations
                fs :  sampling rate to properly save the files

        This function slices audio stream based on annotations and save each slice in a individual wav file, 
        each on the corresponent folder = segmens_dir/song_name/instrument/file.wav
        
        Adapted to routines recorded in the studio

        This function could be combined with the feature extraction in the next cells, but having the slices
        saved allows us to do data augmentation combining individual samples to get more instances of all the combinations
    """
    song = song_dict[instrument_name]
    x_seg_dir = os.path.join(segments_dir, instrument_name)

    od_complex = OnsetDetection(method='complex')
    w = Windowing(type='hann')
    fft = FFT()  # this gives us a complex FFT
    c2p = CartesianToPolar(
    )  # and this turns it into a pair (magnitude, phase)
    onsets = Onsets()

    file_count = 0

    for audio in song['audios']:
        x = audio
        duration = float(len(x)) / fs

        x = x / np.max(np.abs(x))

        t = np.arange(len(x)) / float(fs)

        #Essentia beat tracking
        pool = Pool()
        for frame in FrameGenerator(x, frameSize=1024, hopSize=512):
            mag, phase, = c2p(fft(w(frame)))
            pool.add('features.complex', od_complex(mag, phase))

        onsets_list = onsets(array([pool['features.complex']]), [1])
        first_onset = int(onsets_list[0] * fs)

        print(first_onset)
        if not os.path.exists(segments_dir):  #creating the directory
            os.mkdir(segments_dir)
        segments_dir__ = os.path.join(segments_dir, instrument_name)
        if not os.path.exists(segments_dir__):  #creating the directory
            os.mkdir(segments_dir__)

        n_notes = len(song['annotations'])
        annotations = song['annotations']
        for i in range(1, n_notes):
            if i != n_notes - 1 and i != 0:
                x_seg = audio[(annotations[i][2] - 3000 +
                               first_onset):(annotations[i + 1][2] - 3000 +
                                             first_onset)]

            if len(x_seg) < 5000 or np.max(np.abs(x_seg)) < 0.05:
                continue

            x_seg = x_seg / np.max(np.abs(x_seg))

            if not os.path.exists(x_seg_dir):  #creating the directory
                os.mkdir(x_seg_dir)
            path, dirs, files = next(os.walk(x_seg_dir))
            dir_n_files = len(files)
            if annotations[i][1] == 'N':
                continue
            filename = os.path.join(
                x_seg_dir, instrument_name + '_' + str(dir_n_files) + '.wav')
            ess.MonoWriter(filename=filename, format='wav',
                           sampleRate=fs)(x_seg)
            file_count = file_count + 1

    print(instrument_name + ": " + str(file_count))
  def run(self, fname):
    citation = u"""
            Atlı, H. S., Uyar, B., Şentürk, S., Bozkurt, B., and Serra, X.
            (2014). Audio feature extraction for exploring Turkish makam music.
            In Proceedings of 3rd International Conference on Audio Technologies
            for Music and Media, Ankara, Turkey.
            """

    run_windowing = Windowing(zeroPadding = 3 * self.settings.frameSize) # Hann window with x4 zero padding
    run_spectrum = Spectrum(size=self.settings.frameSize * 4)

    run_spectral_peaks = SpectralPeaks(minFrequency=self.settings.minFrequency,
            maxFrequency = self.settings.maxFrequency,
            maxPeaks = self.settings.maxPeaks,
            sampleRate = self.settings.sampleRate,
            magnitudeThreshold = self.settings.magnitudeThreshold,
            orderBy = 'magnitude')

    run_pitch_salience_function = PitchSalienceFunction(binResolution=self.settings.binResolution) # converts unit to cents, 55 Hz is taken as the default reference
    run_pitch_salience_function_peaks = PitchSalienceFunctionPeaks(binResolution=self.settings.binResolution)
    run_pitch_contours = PitchContours(hopSize=self.settings.hopSize,
            binResolution=self.settings.binResolution,
            peakDistributionThreshold = self.settings.peakDistributionThreshold)
    pool = Pool();

    # load audio and eqLoudness
    audio = MonoLoader(filename = fname)() # MonoLoader resamples the audio signal to 44100 Hz by default
    audio = EqualLoudness()(audio)

    for frame in FrameGenerator(audio,frameSize=self.settings.frameSize, hopSize=self.settings.hopSize):
      frame = run_windowing(frame)
      spectrum = run_spectrum(frame)
      peak_frequencies, peak_magnitudes = run_spectral_peaks(spectrum)
      salience = run_pitch_salience_function(peak_frequencies, peak_magnitudes)
      salience_peaks_bins, salience_peaks_contourSaliences = run_pitch_salience_function_peaks(salience)
      if not size(salience_peaks_bins):
          salience_peaks_bins = array([0])
      if not size(salience_peaks_contourSaliences):
          salience_peaks_contourSaliences = array([0])

      pool.add('allframes_salience_peaks_bins', salience_peaks_bins)
      pool.add('allframes_salience_peaks_contourSaliences', salience_peaks_contourSaliences)

    # post-processing: contour tracking
    contours_bins, contours_contourSaliences, contours_start_times, duration = run_pitch_contours(
            pool['allframes_salience_peaks_bins'],
            pool['allframes_salience_peaks_contourSaliences'])

    # run the simplified contour selection
    #[pitch, pitch_salience] = self.ContourSelection(contours_bins,contours_contourSaliences,contours_start_times,duration)

    # cent to Hz conversion
    #pitch = [0. if p == 0 else 55.*(2.**(((self.settings.binResolution*(p)))/1200)) for p in pitch]

    # generate time stamps
    #time_stamps = [s*self.settings.hopSize/float(self.settings.sampleRate) for s in xrange(0,len(pitch))]

    # [time pitch salience] matrix
    #out = transpose(vstack((time_stamps, pitch, pitch_salience)))
    #out = out.tolist()
    
    # settings
    settings = self.settings
    settings.update({'version':self.__version__, 
            'slug':self.__slug__, 
            'source': fname,
            'essentiaVersion': essentia.__version__,
            'pitchUnit': 'Hz',
            'citation': citation})

    # matlab 
    #matout = cStringIO.StringIO()
    #matob = {'pitch': out}
    #matob.update(settings)

    #scipy.io.savemat(matout, matob)
    
    #return out
    
    # unused
    #return {'pitch': json.dumps(out),
    #        'matlab': matout.getvalue(),
    #        'settings': json.dumps(settings)}

    return contours_bins, contours_contourSaliences, contours_start_times, duration
Beispiel #40
0
  def run(self, fname):
    citation = u'Atlı, H. S., Uyar, B., Şentürk, S., Bozkurt, B., and Serra, X. ' \
                '(2014). Audio feature extraction for exploring Turkish makam music. ' \
                'In Proceedings of 3rd International Conference on Audio Technologies ' \
                'for Music and Media, Ankara, Turkey.'

    run_windowing = Windowing(zeroPadding = 3 * self.settings.frameSize) # Hann window with x4 zero padding
    run_spectrum = Spectrum(size=self.settings.frameSize * 4)

    run_spectral_peaks = SpectralPeaks(minFrequency=self.settings.minFrequency,
            maxFrequency = self.settings.maxFrequency,
            maxPeaks = self.settings.maxPeaks,
            sampleRate = self.settings.sampleRate,
            magnitudeThreshold = self.settings.magnitudeThreshold,
            orderBy = 'magnitude')

    run_pitch_salience_function = PitchSalienceFunction(binResolution=self.settings.binResolution) # converts unit to cents, 55 Hz is taken as the default reference
    run_pitch_salience_function_peaks = PitchSalienceFunctionPeaks(binResolution=self.settings.binResolution)
    run_pitch_contours = PitchContours(hopSize=self.settings.hopSize,
            binResolution=self.settings.binResolution,
            peakDistributionThreshold = self.settings.peakDistributionThreshold)
    pool = Pool();

    # load audio and eqLoudness
    audio = MonoLoader(filename = fname)() # MonoLoader resamples the audio signal to 44100 Hz by default
    audio = EqualLoudness()(audio)

    for frame in FrameGenerator(audio,frameSize=self.settings.frameSize, hopSize=self.settings.hopSize):
      frame = run_windowing(frame)
      spectrum = run_spectrum(frame)
      peak_frequencies, peak_magnitudes = run_spectral_peaks(spectrum)
      salience = run_pitch_salience_function(peak_frequencies, peak_magnitudes)
      salience_peaks_bins, salience_peaks_contourSaliences = run_pitch_salience_function_peaks(salience)
      if not size(salience_peaks_bins):
          salience_peaks_bins = array([0])
      if not size(salience_peaks_contourSaliences):
          salience_peaks_contourSaliences = array([0])

      pool.add('allframes_salience_peaks_bins', salience_peaks_bins)
      pool.add('allframes_salience_peaks_contourSaliences', salience_peaks_contourSaliences)

    # post-processing: contour tracking
    contours_bins, contours_contourSaliences, contours_start_times, duration = run_pitch_contours(
            pool['allframes_salience_peaks_bins'],
            pool['allframes_salience_peaks_contourSaliences'])

    # WARNING: As of 3 April 2015, the values in "contours_start_times" leads the audio
    # by 1024 + 128 samples if the read audio is in mp3 format as explained in 
    # https://github.com/MTG/essentia/issues/246. This roots because of the typical
    # encoder/decoder problems. For now We are advancing the values in "contours_start_times"
    # by 1152 samples. Uncomment the next line if this problem is fixed.
    contours_start_times = [c + (1024+128)/float(self.settings.sampleRate) for c in contours_start_times]

    # run the simplified contour selection
    [pitch, pitch_salience] = self.ContourSelection(contours_bins,contours_contourSaliences,contours_start_times,duration)

    # cent to Hz conversion
    pitch = [0. if p == 0 else 55.*(2.**(((self.settings.binResolution*(p)))/1200)) for p in pitch]

    # generate time stamps
    time_stamps = [s*self.settings.hopSize/float(self.settings.sampleRate) for s in xrange(0,len(pitch))]

    # [time pitch salience] matrix
    out = transpose(vstack((time_stamps, pitch, pitch_salience)))
    out = out.tolist()
    
    # settings
    settings = self.settings
    settings.update({'version':self.__version__, 
            'slug':self.__slug__, 
            'source': fname,
            'essentiaVersion': essentia.__version__,
            'pitchUnit': 'Hz',
            'citation': citation})

    # matlab 
    matout = cStringIO.StringIO()
    matob = {'pitch': out}
    matob.update(settings)

    scipy.io.savemat(matout, matob)

    return {'pitch': json.dumps(out),
            'matlab': matout.getvalue(),
            'settings': json.dumps(settings)}
Beispiel #41
0
loader = MonoLoader(filename=input_file, sampleRate=sampleRate)
w = Windowing(type='blackmanharris62')
spectrum = Spectrum()
melbands = MelBands(sampleRate=sampleRate,
                    numberBands=40,
                    lowFrequencyBound=0,
                    highFrequencyBound=sampleRate / 2)

pool = Pool()

for frame in FrameGenerator(audio=loader(),
                            frameSize=frameSize,
                            hopSize=hopSize,
                            startFromZero=True):
    bands = melbands(spectrum(w(frame)))
    pool.add('melbands', bands)

print len(pool['melbands']), "Mel band frames"
print len(pool['melbands']) / 32, "Rhythm transform frames"

rhythmtransform = RhythmTransform(frameSize=rmsFrameSize, hopSize=rmsHopSize)
rt = rhythmtransform(pool['melbands'])

import matplotlib.pyplot as plt

plt.imshow(rt.T[:, :], aspect='auto')
plt.xlabel('Frames')
plt.ylabel('Rhythm Transform coefficients')
plt.show()
Beispiel #42
0
def computeOnsets(inFile, outFile):
    print outFile
    # In this example we are going to look at how to perform some onset detection
    # and mark them on the audio using the AudioOnsetsMarker algorithm.
    #
    # Onset detection consists of two main phases:
    #  1- we need to compute an onset detection function, which is a function
    #     describing the evolution of some parameters, which might be representative
    #     of whether we might find an onset or not
    #  2- performing the actual onset detection, that is given a number of these
    #     detection functions, decide where in the sound there actually are onsets

    # we're going to work with a file specified as an argument in the command line
    # try:
    #     filename = sys.argv[1]
    # except:
    #     print "usage:", sys.argv[0], "<audiofile>"
    #     sys.exit()

    # don't forget, we can actually instantiate and call an algorithm on the same line!
    print 'Loading audio file...'
    audio = MonoLoader(filename=inFile)()

    # Phase 1: compute the onset detection function
    # The OnsetDetection algorithm tells us that there are several methods available in Essentia,
    # let's do two of them

    od1 = OnsetDetection(method='hfc')
    od2 = OnsetDetection(method='complex')

    # let's also get the other algorithms we will need, and a pool to store the results

    w = Windowing(type='hann')
    fft = FFT()  # this gives us a complex FFT
    c2p = CartesianToPolar(
    )  # and this turns it into a pair (magnitude, phase)

    pool = Pool()

    # let's get down to business
    print 'Computing onset detection functions...'
    for frame in FrameGenerator(audio, frameSize=1024, hopSize=512):
        mag, phase, = c2p(fft(w(frame)))
        pool.add('features.hfc', od1(mag, phase))
        pool.add('features.complex', od2(mag, phase))

    # Phase 2: compute the actual onsets locations
    onsets = Onsets()

    print 'Computing onset times...'
    onsets_hfc = onsets(  # this algo expects a matrix, not a vector
        array([pool['features.hfc']]),

        # you need to specify weights, but as there is only a single
        # function, it doesn't actually matter which weight you give it
        [1])
    np.savetxt(outFile, onsets_hfc, fmt='%f')

    #Let's just take the complex as an example
    onsets_complex = onsets(array([pool['features.complex']]), [1])

    np.savetxt(outFile, onsets_complex, fmt='%f')
sampleRate   = 22050
frameSize    = 8192
hopSize      = 1024
rmsFrameSize = 256
rmsHopSize   = 32

loader = MonoLoader(filename=input_file, sampleRate=sampleRate)
w = Windowing(type='blackmanharris62')
spectrum = Spectrum()
melbands = MelBands(sampleRate=sampleRate, numberBands=40, lowFrequencyBound=0, highFrequencyBound=sampleRate/2)

pool = Pool()

for frame in FrameGenerator(audio=loader(), frameSize=frameSize, hopSize=hopSize, startFromZero=True):
    bands = melbands(spectrum(w(frame)))
    pool.add('melbands', bands)


rhythmtransform = RhythmTransform(frameSize=rmsFrameSize, hopSize=rmsHopSize)
rt = rhythmtransform(pool['melbands'])
rt_mean = numpy.mean(rt, axis=0)
bin_resoluion = 5.007721656976744


print numpy.argmax(rt_mean) * bin_resoluion





def computeOnsets(inFile, outFile):
    print outFile
    # In this example we are going to look at how to perform some onset detection
    # and mark them on the audio using the AudioOnsetsMarker algorithm.
    #
    # Onset detection consists of two main phases:
    #  1- we need to compute an onset detection function, which is a function
    #     describing the evolution of some parameters, which might be representative
    #     of whether we might find an onset or not
    #  2- performing the actual onset detection, that is given a number of these
    #     detection functions, decide where in the sound there actually are onsets

    # we're going to work with a file specified as an argument in the command line
    # try:
    #     filename = sys.argv[1]
    # except:
    #     print "usage:", sys.argv[0], "<audiofile>"
    #     sys.exit()

    # don't forget, we can actually instantiate and call an algorithm on the same line!
    print "Loading audio file..."
    audio = MonoLoader(filename=inFile)()

    # Phase 1: compute the onset detection function
    # The OnsetDetection algorithm tells us that there are several methods available in Essentia,
    # let's do two of them

    od1 = OnsetDetection(method="hfc")
    od2 = OnsetDetection(method="complex")

    # let's also get the other algorithms we will need, and a pool to store the results

    w = Windowing(type="hann")
    fft = FFT()  # this gives us a complex FFT
    c2p = CartesianToPolar()  # and this turns it into a pair (magnitude, phase)

    pool = Pool()

    # let's get down to business
    print "Computing onset detection functions..."
    for frame in FrameGenerator(audio, frameSize=1024, hopSize=512):
        mag, phase, = c2p(fft(w(frame)))
        pool.add("features.hfc", od1(mag, phase))
        pool.add("features.complex", od2(mag, phase))

    # Phase 2: compute the actual onsets locations
    onsets = Onsets()

    print "Computing onset times..."
    onsets_hfc = onsets(  # this algo expects a matrix, not a vector
        array([pool["features.hfc"]]),
        # you need to specify weights, but as there is only a single
        # function, it doesn't actually matter which weight you give it
        [1],
    )
    np.savetxt(outFile, onsets_hfc, fmt="%f")

    # Let's just take the complex as an example
    onsets_complex = onsets(array([pool["features.complex"]]), [1])

    np.savetxt(outFile, onsets_complex, fmt="%f")