def compute(audio, pool, options): # analysis parameters sampleRate = options['sampleRate'] frameSize = options['frameSize'] hopSize = options['hopSize'] windowType = options['windowType'] # frame algorithms frames = essentia.FrameGenerator(audio=audio, frameSize=frameSize, hopSize=hopSize) window = essentia.Windowing(size=frameSize, zeroPadding=0, type=windowType) spectrum = essentia.Spectrum(size=frameSize) # spectral algorithms energy = essentia.Energy() mfcc = essentia.MFCC(highFrequencyBound=8000) INFO('Computing Low-Level descriptors necessary for segmentation...') # used for a nice progress display total_frames = frames.num_frames() n_frames = 0 start_of_frame = -frameSize * 0.5 progress = Progress(total=total_frames) for frame in frames: frameScope = [ start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate ] #pool.setCurrentScope(frameScope) pool.add(namespace + '.' + 'scope', frameScope) if options['skipSilence'] and essentia.isSilent(frame): total_frames -= 1 start_of_frame += hopSize continue frame_windowed = window(frame) frame_spectrum = spectrum(frame_windowed) # need the energy for getting the thumbnail pool.add(namespace + '.' + 'spectral_energy', energy(frame_spectrum)) # mfcc (frame_melbands, frame_mfcc) = mfcc(frame_spectrum) pool.add(namespace + '.' + 'spectral_mfcc', frame_mfcc) # display of progress report progress.update(n_frames) n_frames += 1 start_of_frame += hopSize progress.finish()
def compute(audio, pool, options): # analysis parameters sampleRate = options['sampleRate'] frameSize = options['frameSize'] hopSize = options['hopSize'] windowType = options['windowType'] # temporal descriptors lpc = essentia.LPC(order=10, type='warped', sampleRate=sampleRate) zerocrossingrate = essentia.ZeroCrossingRate() # frame algorithms frames = essentia.FrameGenerator(audio=audio, frameSize=frameSize, hopSize=hopSize) window = essentia.Windowing(size=frameSize, zeroPadding=0, type=windowType) spectrum = essentia.Spectrum(size=frameSize) # spectral algorithms barkbands = essentia.BarkBands(sampleRate=sampleRate) centralmoments = essentia.SpectralCentralMoments() crest = essentia.Crest() centroid = essentia.SpectralCentroid() decrease = essentia.SpectralDecrease() spectral_contrast = essentia.SpectralContrast(frameSize=frameSize, sampleRate=sampleRate, numberBands=6, lowFrequencyBound=20, highFrequencyBound=11000, neighbourRatio=0.4, staticDistribution=0.15) distributionshape = essentia.DistributionShape() energy = essentia.Energy() # energyband_bass, energyband_middle and energyband_high parameters come from "standard" hi-fi equalizers energyband_bass = essentia.EnergyBand(startCutoffFrequency=20.0, stopCutoffFrequency=150.0, sampleRate=sampleRate) energyband_middle_low = essentia.EnergyBand(startCutoffFrequency=150.0, stopCutoffFrequency=800.0, sampleRate=sampleRate) energyband_middle_high = essentia.EnergyBand(startCutoffFrequency=800.0, stopCutoffFrequency=4000.0, sampleRate=sampleRate) energyband_high = essentia.EnergyBand(startCutoffFrequency=4000.0, stopCutoffFrequency=20000.0, sampleRate=sampleRate) flatnessdb = essentia.FlatnessDB() flux = essentia.Flux() harmonic_peaks = essentia.HarmonicPeaks() hfc = essentia.HFC() mfcc = essentia.MFCC() rolloff = essentia.RollOff() rms = essentia.RMS() strongpeak = essentia.StrongPeak() # pitch algorithms pitch_detection = essentia.PitchDetection(frameSize=frameSize, sampleRate=sampleRate) pitch_salience = essentia.PitchSalience() # dissonance spectral_peaks = essentia.SpectralPeaks(sampleRate=sampleRate, orderBy='frequency') dissonance = essentia.Dissonance() # spectral complexity # magnitudeThreshold = 0.005 is hardcoded for a "blackmanharris62" frame spectral_complexity = essentia.SpectralComplexity(magnitudeThreshold=0.005) INFO('Computing Low-Level descriptors...') # used for a nice progress display total_frames = frames.num_frames() n_frames = 0 start_of_frame = -frameSize * 0.5 pitches, pitch_confidences = [], [] progress = Progress(total=total_frames) scPool = essentia.Pool() # pool for spectral contrast for frame in frames: frameScope = [ start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate ] #pool.setCurrentScope(frameScope) # silence rate pool.add(namespace + '.' + 'silence_rate_60dB', essentia.isSilent(frame)) pool.add(namespace + '.' + 'silence_rate_30dB', is_silent_threshold(frame, -30)) pool.add(namespace + '.' + 'silence_rate_20dB', is_silent_threshold(frame, -20)) if options['skipSilence'] and essentia.isSilent(frame): total_frames -= 1 start_of_frame += hopSize continue # temporal descriptors pool.add(namespace + '.' + 'zerocrossingrate', zerocrossingrate(frame)) (frame_lpc, frame_lpc_reflection) = lpc(frame) pool.add(namespace + '.' + 'temporal_lpc', frame_lpc) frame_windowed = window(frame) frame_spectrum = spectrum(frame_windowed) # spectrum-based descriptors power_spectrum = frame_spectrum**2 pool.add(namespace + '.' + 'spectral_centroid', centroid(power_spectrum)) pool.add(namespace + '.' + 'spectral_decrease', decrease(power_spectrum)) pool.add(namespace + '.' + 'spectral_energy', energy(frame_spectrum)) pool.add(namespace + '.' + 'spectral_energyband_low', energyband_bass(frame_spectrum)) pool.add(namespace + '.' + 'spectral_energyband_middle_low', energyband_middle_low(frame_spectrum)) pool.add(namespace + '.' + 'spectral_energyband_middle_high', energyband_middle_high(frame_spectrum)) pool.add(namespace + '.' + 'spectral_energyband_high', energyband_high(frame_spectrum)) pool.add(namespace + '.' + 'hfc', hfc(frame_spectrum)) pool.add(namespace + '.' + 'spectral_rms', rms(frame_spectrum)) pool.add(namespace + '.' + 'spectral_flux', flux(frame_spectrum)) pool.add(namespace + '.' + 'spectral_rolloff', rolloff(frame_spectrum)) pool.add(namespace + '.' + 'spectral_strongpeak', strongpeak(frame_spectrum)) # central moments descriptors frame_centralmoments = centralmoments(power_spectrum) (frame_spread, frame_skewness, frame_kurtosis) = distributionshape(frame_centralmoments) pool.add(namespace + '.' + 'spectral_kurtosis', frame_kurtosis) pool.add(namespace + '.' + 'spectral_spread', frame_spread) pool.add(namespace + '.' + 'spectral_skewness', frame_skewness) # dissonance (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum) frame_dissonance = dissonance(frame_frequencies, frame_magnitudes) pool.add(namespace + '.' + 'dissonance', frame_dissonance) # mfcc (frame_melbands, frame_mfcc) = mfcc(frame_spectrum) pool.add(namespace + '.' + 'mfcc', frame_mfcc) # spectral contrast (sc_coeffs, sc_valleys) = spectral_contrast(frame_spectrum) scPool.add(namespace + '.' + 'sccoeffs', sc_coeffs) scPool.add(namespace + '.' + 'scvalleys', sc_valleys) # barkbands-based descriptors frame_barkbands = barkbands(frame_spectrum) pool.add(namespace + '.' + 'barkbands', frame_barkbands) pool.add(namespace + '.' + 'spectral_crest', crest(frame_barkbands)) pool.add(namespace + '.' + 'spectral_flatness_db', flatnessdb(frame_barkbands)) barkbands_centralmoments = essentia.CentralMoments( range=len(frame_barkbands) - 1) (barkbands_spread, barkbands_skewness, barkbands_kurtosis) = distributionshape( barkbands_centralmoments(frame_barkbands)) pool.add(namespace + '.' + 'barkbands_spread', barkbands_spread) pool.add(namespace + '.' + 'barkbands_skewness', barkbands_skewness) pool.add(namespace + '.' + 'barkbands_kurtosis', barkbands_kurtosis) # pitch descriptors frame_pitch, frame_pitch_confidence = pitch_detection(frame_spectrum) if frame_pitch > 0 and frame_pitch <= 20000.: pool.add(namespace + '.' + 'pitch', frame_pitch) pitches.append(frame_pitch) pitch_confidences.append(frame_pitch_confidence) pool.add(namespace + '.' + 'pitch_instantaneous_confidence', frame_pitch_confidence) frame_pitch_salience = pitch_salience(frame_spectrum[:-1]) pool.add(namespace + '.' + 'pitch_salience', frame_pitch_salience) # spectral complexity pool.add(namespace + '.' + 'spectral_complexity', spectral_complexity(frame_spectrum)) # display of progress report progress.update(n_frames) n_frames += 1 start_of_frame += hopSize # if no 'temporal_zerocrossingrate' it means that this is a silent file if 'zerocrossingrate' not in descriptorNames(pool.descriptorNames(), namespace): raise essentia.EssentiaError('This is a silent file!') spectralContrastPCA(scPool, pool) # build pitch value histogram from math import log from numpy import bincount # convert from Hz to midi notes midipitches = [] unknown = 0 for freq in pitches: if freq > 0. and freq <= 12600: midipitches.append(12 * (log(freq / 6.875) / 0.69314718055995) - 3.) else: unknown += 1 if len(midipitches) > 0: # compute histogram midipitchhist = bincount(midipitches) # set 0 midi pitch to be the number of pruned value midipitchhist[0] = unknown # normalise midipitchhist = [ val / float(sum(midipitchhist)) for val in midipitchhist ] # zero pad for i in range(128 - len(midipitchhist)): midipitchhist.append(0.0) else: midipitchhist = [0.] * 128 midipitchhist[0] = 1. # pitchhist = essentia.array(zip(range(len(midipitchhist)), midipitchhist)) pool.add(namespace + '.' + 'spectral_pitch_histogram', midipitchhist) #, pool.GlobalScope) # the code below is the same as the one above: #for note in midipitchhist: # pool.add(namespace + '.' + 'spectral_pitch_histogram_values', note) # print "midi note:", note pitch_centralmoments = essentia.CentralMoments(range=len(midipitchhist) - 1) (pitch_histogram_spread, pitch_histogram_skewness, pitch_histogram_kurtosis) = distributionshape( pitch_centralmoments(midipitchhist)) pool.add(namespace + '.' + 'spectral_pitch_histogram_spread', pitch_histogram_spread) #, pool.GlobalScope) progress.finish()
def compute(audio, pool, options): INFO("Computing Beats descriptors...") sampleRate = options['sampleRate'] windowType = options['windowType'] beat_window_duration = 0.1 # 100ms beat_duration = 0.05 # 50ms estimation after checking some drums kicks duration on freesound beats = pool.value('rhythm.beats_position')[0] # special case if len(beats) == 0: # we add them 2 times to get 'mean/var' stats and not 'value' # and not on full scope so it's not global # FIXME: should use "undefined" pool.add("beats_loudness", 0.0, [0., 0.]) pool.add("beats_loudness", 0.0, [0., 0.]) pool.add("beats_loudness_bass", 0.0, [0., 0.]) pool.add("beats_loudness_bass", 0.0, [0., 0.]) INFO('100% done...') return duration = pool.value('metadata.duration_processed')[0] # FIXME: converted to samples in order to have more accurate control of the size of # the window. This is due to FFT not being able to be computed on arrays of # odd sizes. Please FIXME later, when FFT accepts all kinds of sizes. beat_window_duration = int(beat_window_duration * float(sampleRate) + 0.5) beat_duration = int(beat_duration * float(sampleRate) + 0.5) duration *= float(sampleRate) if beat_duration % 2 == 1: beat_duration += 1 beat_window_duration = beat_duration * 2 energy = essentia.Energy() energybandratio = essentia.EnergyBandRatio(startFrequency=20.0, stopFrequency=150.0, sampleRate=sampleRate) total_beats = len(beats) n_beats = 1 progress = Progress(total=total_beats) between_beats_start = [0.0] between_beats_end = [] beats_spectral_energy = 0.0 # love on the beats for beat in beats: # convert beat to samples in order to ensure an even size beat = beat * float(sampleRate) beat_window_start = (beat - beat_window_duration / 2.0) # in samples beat_window_end = (beat + beat_window_duration / 2.0) # in samples if beat_window_start > 0.0 and beat_window_end < duration: # in samples #print "duration: ", duration, "start:", beat_window_start, "end:", beat_window_end beat_window = audio[beat_window_start:beat_window_end] beat_start = beat_window_start + max_energy_index(beat_window) beat_end = beat_start + beat_duration beat_audio = audio[beat_start:beat_end] beat_scope = [ beat_start / float(sampleRate), beat_end / float(sampleRate) ] # in seconds #print "beat audio size: ", len(beat_audio) window = essentia.Windowing(size=len(beat_audio), zeroPadding=0, type=windowType) spectrum = essentia.Spectrum(size=len(beat_audio)) beat_spectrum = spectrum(window(beat_audio)) beat_spectral_energy = energy(beat_spectrum) pool.add(namespace + '.' + 'beats_loudness', beat_spectral_energy) #, beat_scope) beats_spectral_energy += beat_spectral_energy beat_spectral_energybandratio = energybandratio(beat_spectrum) pool.add(namespace + '.' + 'beats_loudness_bass', beat_spectral_energybandratio) #, beat_scope) # filling between-beats arrays between_beats_end.append(beat_start / float(sampleRate)) between_beats_start.append(beat_end / float(sampleRate)) # display of progress report progress.update(n_beats / float(sampleRate)) n_beats += 1 between_beats_end.append(duration) between_beats_spectral_energy = 0.0 # love in between beats ''' for between_beat_start, between_beat_end in zip(between_beats_start, between_beats_end): between_beat_audio = audio[between_beat_start * sampleRate : between_beat_end * sampleRate] between_beat_scope = [between_beat_start, between_beat_end] window = essentia.Windowing(windowSize = len(between_beat_audio), zeroPadding = 0, type = "blackmanharris62") spectrum = essentia.Spectrum(size = len(between_beat_audio)) between_beat_spectrum = spectrum(window(between_beat_audio)) between_beat_spectral_energy = energy(between_beat_spectrum) between_beats_spectral_energy += between_beat_spectral_energy ''' progress.finish()