def _ExtractAll(self, audio_location, sample_rate): # Build a dataflow object using FeaturePlan fp = FeaturePlan(sample_rate=sample_rate) # Using *.addFeature() multiple extractions can be called with a # single call fp.addFeature('zcr: ZCR') fp.addFeature('mfcc: MFCC') fp.addFeature('mfcc_D1: MFCC > Derivate DOrder=1') #fp.addFeature('mfcc_D2: MFCC > Derivate DOrder=2') fp.addFeature('flux: SpectralFlux') fp.addFeature('energy: Energy') fp.addFeature('loudness: Loudness') fp.addFeature('obsi: OBSI') fp.addFeature('sharpness: PerceptualSharpness') fp.addFeature('spread: PerceptualSpread') fp.addFeature('rolloff: SpectralRolloff') fp.addFeature('variation: SpectralVariation') # Get dataflow df = fp.getDataFlow() # Configure engine engine = Engine() engine.load(df) # extract features from audio using AudioFileProcessor afp = AudioFileProcessor() afp.processFile(engine, audio_location) # features array holds all the extracted features features = engine.readAllOutputs() #print features["zcr"] # returns the array of features extracted return features
class MFCCExtractor: def __init__(self, **args): self.engine = Engine() self.afp = AudioFileProcessor() self.block_size = (1024 if not 'block_size' in args else args['block_size']) self.step_size = (512 if not 'step_size' in args else args['step_size']) def extract(self, path, **args): self.path = path print path self.wave = wave.open(path, 'r') self.rate = self.wave.getframerate() fp = FeaturePlan(sample_rate=self.rate) fp.addFeature('mfcc: MFCC blockSize={} stepSize={}'.format( self.block_size, self.step_size)) self.engine.load(fp.getDataFlow()) self.afp.processFile(self.engine, path) feats = self.engine.readAllOutputs() if 'save_to_disk' in args: self.__save_to_disk(feats['mfcc']) return feats['mfcc'] def __save_to_disk(self, feats): new_path = self.path + ".mfc.csv" np.savetxt(new_path, feats, delimiter=",")
def _energy(audio_location, sample_rate): # This function behaves the same as 'python yaafe.py -r SAMPLERATE -f \ # "energy: Energy PARAMETERS" WAV-LOCATION' # SAMPLERATE : Samplerate of the file being processed #- blockSize (default=1024): output frames size #- stepSize (default=512): step between consecutive frames # Build a dataflow object using FeaturePlan # blockSize, stepSize could be added too. 1024, 512 default fp = FeaturePlan(sample_rate=sample_rate) # Using *.addFeature() multiple extractions can be called with a # single call fp.addFeature('Energy: Energy') #('energy: Energy blockSize=1024 stepSize=512') # Get dataflow df = fp.getDataFlow() # Configure engine engine = Engine() engine.load(df) # extract features from audio using AudioFileProcessor afp = AudioFileProcessor() afp.processFile(engine, audio_location) # features array holds all the extracted features features = engine.readAllOutputs() # returns the array of features extracted return features
def main(argv): if len(argv) != 2: print "usage: python %s foo.mp3" % (argv[0]) return fp = FeaturePlan(sample_rate=SAMPLE_RATE, resample=True, time_start=TIME_START, time_limit=TIME_LIMIT) fp.addFeature("frames: Frames blockSize={0} stepSize={1}".format(SLICE_WINSIZE, SLICE_STEPSIZE)) if "YAAFE_PATH" in os.environ: fp.addFeature( "beat_hist: BeatHistogramSummary ACPNbPeaks=3 BHSBeatFrameSize=128 BHSBeatFrameStep=64 " "BHSHistogramFrameSize=40 BHSHistogramFrameStep=40 FFTLength=0 FFTWindow=Hanning " "HInf=40 HNbBins=80 HSup=200 NMANbFrames=5000 blockSize=1024 stepSize=512" ) df = fp.getDataFlow() engine = Engine() engine.load(df) afp = AudioFileProcessor() song_path = argv[1] assert os.path.exists(song_path) afp.processFile(engine, song_path) frames = engine.readOutput("frames") frames = np.concatenate(frames) print "time start: %ss" % TIME_START print "time limit: %ss" % TIME_LIMIT print "duration:", 1.0 * frames.size / SAMPLE_RATE if "YAAFE_PATH" in os.environ: beat_hist = engine.readOutput("beat_hist") print "beat_hist: %s" % beat_hist
class MFCCExtractor: def __init__(self, **args): self.engine = Engine() self.afp = AudioFileProcessor() self.block_size = (1024 if not 'block_size' in args else args['block_size']) self.step_size = (512 if not 'step_size' in args else args['step_size']) def extract(self, path, **args): self.path = path print path self.wave = wave.open(path, 'r') self.rate = self.wave.getframerate() fp = FeaturePlan(sample_rate=self.rate) fp.addFeature('mfcc: MFCC blockSize={} stepSize={}'.format(self.block_size, self.step_size)) self.engine.load(fp.getDataFlow()) self.afp.processFile(self.engine, path) feats = self.engine.readAllOutputs() if 'save_to_disk' in args: self.__save_to_disk(feats['mfcc']) return feats['mfcc'] def __save_to_disk(self, feats): new_path = self.path + ".mfc.csv" np.savetxt(new_path, feats, delimiter=",")
class ConstantQ: def __init__(self, num_bands, fmin, num_octaves, fps, align, log_div, sample_rate=44100, fold=None): self.fps = fps self.num_bands = num_bands self.align = align self.fmin = fmin self.num_octaves = num_octaves self.log_div = log_div self.sample_rate = sample_rate from yaafelib import FeaturePlan, Engine fp = FeaturePlan(sample_rate=sample_rate) cqt_config = " ".join([ 'cqt: CQT', 'CQTAlign={}'.format(align), 'CQTBinsPerOctave={}'.format(num_bands), 'CQTMinFreq={}'.format(fmin), 'CQTNbOctaves={}'.format(num_octaves), 'stepSize={}'.format(sample_rate / fps) ]) fp.addFeature(cqt_config) df = fp.getDataFlow() self.engine = Engine() self.engine.load(df) @property def name(self): return 'cqt_fps={}_num-bands={}_align={}_fmin={}_num_oct={}'\ '_logdiv={}'.format(self.fps, self.num_bands, self.align, self.fmin, self.num_octaves, self.log_div) def __call__(self, audio_file): audio = mm.audio.signal.Signal(audio_file, sample_rate=self.sample_rate, num_channels=1).astype(np.float64) cqt = self.engine.processAudio(audio.reshape((1, -1)))['cqt'] # compensate for different padding in madmom vs. yaafe and convert # to float32 cqt = np.vstack((cqt, np.zeros(cqt.shape[1:]))).astype(np.float32) if self.log_div: return np.log(cqt / self.log_div + 1) else: return cqt
def __init__(self, **args): self.engine = Engine() self.afp = AudioFileProcessor() self.block_size = (1024 if not 'block_size' in args else args['block_size']) self.step_size = (512 if not 'step_size' in args else args['step_size'])
def compute_spec(song_path): fp = FeaturePlan(sample_rate=22050, resample=True) #add one feature fp.addFeature("spec: PowerSpectrum blockSize=1024 stepSize=512") df = fp.getDataFlow() engine = Engine() engine.load(df) afp = AudioFileProcessor() afp.processFile(engine, song_path) return engine.readOutput('spec')
def initialize(self, feature_dict): """ Run the required boilerplate for yaafe """ self.feature_dict = feature_dict self.fp = FeaturePlan(sample_rate=self.sample_rate, normalize=0.98) for name, desc in self.feature_dict.items(): self.fp.addFeature("{0}: {1}".format(name, desc)) self.df = self.fp.getDataFlow() self.engine = Engine() self.engine.load(self.df) self.afp = AudioFileProcessor() return self
def _ExtractZCRAndFlux(self, audio_location, sample_rate): fp = FeaturePlan(sample_rate=sample_rate) fp.addFeature('zcr: ZCR') fp.addFeature('flux: SpectralFlux') df = fp.getDataFlow() engine = Engine() engine.load(df) afp = AudioFileProcessor() afp.processFile(engine, audio_location) features = engine.readAllOutputs() return features
def startEngine(path): global features engine = Engine() engine.load(df) afp = AudioFileProcessor() afp.processFile(engine,path) features = engine.readAllOutputs() return 'Yaafe引擎启动成功'
def get_engine(audio_freq, mfcc_block_size, mfcc_step_size): fp = FeaturePlan(sample_rate=audio_freq) fp.addFeature('mfcc: MFCC blockSize=%d stepSize=%d > Derivate DOrder=1' % \ (mfcc_block_size, mfcc_step_size)) df = fp.getDataFlow() afp = AudioFileProcessor() engine = Engine() engine.load(df) return (afp, engine)
def init(): global engine fp = FeaturePlan(sample_rate=44100, resample=True, time_start=0,time_limit=20) fp.addFeature("loudness: Loudness") fp.addFeature("perceptualSharpness: PerceptualSharpness") fp.addFeature("perceptualSpread: PerceptualSpread") fp.addFeature("obsi: OBSI") fp.addFeature("obsir: OBSIR") df = fp.getDataFlow() engine = Engine() # Engine setup engine.load(df) return 'initialization'
def _zcr(self, audio_location, sample_rate): # This function behave the same as 'python yaafe.py -r SAMPLERATE -f \ # "zcr: ZCR blockSize=1024 stepSize=512" WAV-LOCATION' # SAMPLERATE = samplerate of the file being processed # zcr = name for the process (zcr1, zcr2... ) # ZCR = the feature that is being extracted # blockSize = output frames size # stepSize = step between consecutive frames # Build a dataflow object using FeaturePlan # blockSize, stepSize could be added too. 1024, 512 default fp = FeaturePlan(sample_rate=sample_rate) # Using *.addFeature() multiple extractions can be called with a # single call fp.addFeature('ZCR: ZCR') # Get dataflow df = fp.getDataFlow() # Or load it from a file # df = DataFlow() # df.load(dataflow_file) # Configure engine engine = Engine() engine.load(df) # extract features from audio using AudioFileProcessor afp = AudioFileProcessor() afp.processFile(engine, audio_location) # features array holds all the extracted features features = engine.readAllOutputs() # extract features from an audio file and write results to csv files # afp.setOutputFormat('csv','output',{'Precision':'8'}) # afp.processFile(engine,audiofile) # this creates output/myaudio.wav.mfcc.csv, # output/myaudio.wav.mfcc_d1.csv and # output/myaudio.wav.mfcc_d2.csv files. # Clear the engine so it can be used again #engine.reset() # returns the array of features extracted return features
def __init__(self): self.fp = FeaturePlan(sample_rate=44100) self.fp.addFeature('feat: ') self.engine = Engine() self.engine.load(self.fp.getDataFlow()) self.afp = AudioFileProcessor() self.mfcc = [] self.songs = []
def init(): global engine fp = FeaturePlan(sample_rate=44100, resample=True, time_start=0, time_limit=60) # 采样率44.1Hkz,提取20 - 40s fp.addFeature("mfcc: MFCC") # 梅尔倒谱系数 13 fp.addFeature("energy: Energy") # 短时能量 1 fp.addFeature("zcr: ZCR") # 短时平均过零率 1 fp.addFeature("sf: SpectralFlux") # 尖锐度 1 fp.addFeature("sr: SpectralRolloff") # 频谱滚降点 1 fp.addFeature("lpc: LPC LPCNbCoeffs=3") # 线性预测编码 3 fp.addFeature("lx: Loudness") # 响度 24 df = fp.getDataFlow() engine = Engine() # 配置Engine engine.load(df) return 'Yaafe初始化'
def main(audio_file, variables_to_extract): # Delete old files and recriate folder if isdir('output'): shutil.rmtree('output') os.mkdir('output') # Build a DataFlow object using FeaturePlan fp = FeaturePlan(sample_rate = 44100) for variable in variables_to_extract: fp.addFeature(feat_json['variables'][variable]) df = fp.getDataFlow() # configure an Engine engine = Engine() engine.load(df) # Make csv files with audio variables process_audio(audio_file, engine) return
def main(argv): if len(argv) != 2: print 'usage: python %s foo.mp3' % (argv[0]) return fp = FeaturePlan(sample_rate=SAMPLE_RATE, resample=True, time_start=TIME_START, time_limit=TIME_LIMIT) fp.addFeature("frames: Frames blockSize={0} stepSize={1}".format( SLICE_WINSIZE, SLICE_STEPSIZE)) if 'YAAFE_PATH' in os.environ: fp.addFeature( "beat_hist: BeatHistogramSummary ACPNbPeaks=3 BHSBeatFrameSize=128 BHSBeatFrameStep=64 " "BHSHistogramFrameSize=40 BHSHistogramFrameStep=40 FFTLength=0 FFTWindow=Hanning " "HInf=40 HNbBins=80 HSup=200 NMANbFrames=5000 blockSize=1024 stepSize=512" ) df = fp.getDataFlow() engine = Engine() engine.load(df) afp = AudioFileProcessor() song_path = argv[1] assert os.path.exists(song_path) afp.processFile(engine, song_path) frames = engine.readOutput('frames') frames = np.concatenate(frames) print 'time start: %ss' % TIME_START print 'time limit: %ss' % TIME_LIMIT print 'duration:', 1. * frames.size / SAMPLE_RATE if 'YAAFE_PATH' in os.environ: beat_hist = engine.readOutput('beat_hist') print 'beat_hist: %s' % beat_hist
def initialize(self, feature_dict): """ Run the required boilerplate for yaafe """ self.feature_dict = feature_dict self.fp = FeaturePlan( sample_rate=self.sample_rate, normalize=0.98) for name, desc in self.feature_dict.items(): self.fp.addFeature("{0}: {1}".format(name, desc)) self.df = self.fp.getDataFlow() self.engine = Engine() self.engine.load(self.df) self.afp = AudioFileProcessor() return self
class Extractor(object): def __init__(self): self.fp = FeaturePlan(sample_rate=44100) self.fp.addFeature('feat: ') self.engine = Engine() self.engine.load(self.fp.getDataFlow()) self.afp = AudioFileProcessor() self.mfcc = [] self.songs = [] def recurse(self, directory): results = [] for root, dirs, files in os.walk(directory): for f in files: current_file = os.path.join(root, f) if current_file.endswith('wav') and current_file not in results: results.append(current_file) print results return results def extract_feature(self, f): self.afp.processFile(self.engine, f) feats = self.engine.readAllOutputs() return feats['feat'] def build_feature_space(self): self.space = FeatureSpace('mfcc') last_feature = None for f in self.recurse(sys.argv[1]): data = self.extract_feature(f) temp_feature = Feature(f, ['mfcc', data]) self.space.add(temp_feature) last_feature = temp_feature print self.space.min_dist(last_feature)
def _mfcc(self, audio_location, sample_rate): # This function behaves the same as 'python yaafe.py -r SAMPLERATE -f \ # "mfcc: MFCC PARAMETERS" WAV-LOCATION' # SAMPLERATE : Samplerate of the file being processed #- CepsIgnoreFirstCoeff (default=1): 0 keeps the first cepstral coeffcient, 1 ignore it #- CepsNbCoeffs (default=13): Number of cepstral coefficient to keep. #- FFTWindow (default=Hanning): Weighting window to apply before fft. Hanning|Hamming|None #- MelMaxFreq (default=6854.0): Maximum frequency of the mel filter bank #- MelMinFreq (default=130.0): Minimum frequency of the mel filter bank #- MelNbFilters (default=40): Number of mel filters #- blockSize (default=1024): output frames size #- stepSize (default=512): step between consecutive frames # Build a dataflow object using FeaturePlan fp = FeaturePlan(sample_rate=sample_rate) # Using *.addFeature() multiple extractions can be called with a # single call fp.addFeature('mfcc: MFCC') #('mfcc: MFCC CepsIgnoreFirstCoeff=0 \ #CepsNbCoeffs=13 FFTWindow=Hanning MelMaxFreq=6854\ #MelMinFreq=130 MelNbFilters=40 blockSize=1024 stepSize=512') # Get dataflow df = fp.getDataFlow() engine = Engine() engine.load(df) # extract features from audio using AudioFileProcessor afp = AudioFileProcessor() afp.processFile(engine, audio_location) # features array holds all the extracted features features = engine.readAllOutputs() # returns the array of features extracted return features
def _SpectralFlux(self, audio_location, sample_rate): # This function behaves the same as 'python yaafe.py -r SAMPLERATE -f \ # "flux: SpectralFlux PARAMETERS" WAV-LOCATION' # SAMPLERATE : Samplerate of the file being processed # - FFTLength (default=0): Frame's length on which perform FFT. Original # frame is padded with zeros or truncated to reach this size. If 0 then # use original frame length. # - FFTWindow (default=Hanning): Weighting window to apply before fft. Hanning|Hamming|None # - FluxSupport (default=All): support of flux computation. if 'All' then # use all bins (default), if 'Increase' then use only bins which are increasing # - blockSize (default=1024): output frames size # - stepSize (default=512): step between consecutive frames # Build a dataflow object using FeaturePlan fp = FeaturePlan(sample_rate=sample_rate) # Using *.addFeature() multiple extractions can be called with a # single call fp.addFeature('Flux: SpectralFlux') #('flux: SpectralFlux FFTLength=0 FFTWindow=Hanning FluxSupport=All\ # blockSize=1024 stepSize=512') # Get dataflow df = fp.getDataFlow() # Configure engine engine = Engine() engine.load(df) # extract features from audio using AudioFileProcessor afp = AudioFileProcessor() afp.processFile(engine, audio_location) # features array holds all the extracted features features = engine.readAllOutputs() # returns the array of features extracted return features
def __init__(self, num_bands, fmin, num_octaves, fps, align, log_div, sample_rate=44100, fold=None): self.fps = fps self.num_bands = num_bands self.align = align self.fmin = fmin self.num_octaves = num_octaves self.log_div = log_div self.sample_rate = sample_rate from yaafelib import FeaturePlan, Engine fp = FeaturePlan(sample_rate=sample_rate) cqt_config = " ".join([ 'cqt: CQT', 'CQTAlign={}'.format(align), 'CQTBinsPerOctave={}'.format(num_bands), 'CQTMinFreq={}'.format(fmin), 'CQTNbOctaves={}'.format(num_octaves), 'stepSize={}'.format(sample_rate / fps) ]) fp.addFeature(cqt_config) df = fp.getDataFlow() self.engine = Engine() self.engine.load(df)
def main(argv): if len(argv) != 2: print 'usage: python %s foo.mp3' % (argv[0]) return fp = FeaturePlan(sample_rate=SAMPLE_RATE, resample=True, time_start=TIME_START, time_limit=TIME_LIMIT) fp.addFeature("frames: Frames blockSize={0} stepSize={1}".format( SLICE_WINSIZE, SLICE_STEPSIZE)) df = fp.getDataFlow() engine = Engine() engine.load(df) afp = AudioFileProcessor() song_path = argv[1] assert os.path.exists(song_path) afp.processFile(engine, song_path) frames = engine.readOutput('frames') frames = np.concatenate(frames) print 'time start: %ss' % TIME_START print 'time limit: %ss' % TIME_LIMIT print 'duration:', 1. * frames.size / SAMPLE_RATE from pylab import plot, show plot(frames) show()
import sys from math import sqrt import time import wave import operator from datetime import datetime import numpy from yaafelib import FeaturePlan, Engine import matplotlib.pyplot as plot import scipy.io.wavfile as wavfile converter = {'Encoded date' : lambda(x) : time.mktime(datetime.strptime(x, "%Z %Y-%m-%d %H:%M:%S").timetuple())} fpMFCC = FeaturePlan(sample_rate=16000, normalize=True) fpMFCC.addFeature('mfcc: MFCC blockSize=512 stepSize=256') engine = Engine() engine.load(fpMFCC.getDataFlow()) def getTextUnderNode(node): rc = [] for child in node.childNodes: if child.nodeType == node.TEXT_NODE: rc.append(child.data.replace('\n', '').strip()) else: rc.append(getTextUnderNode(child).strip()) return ' '.join(rc) def visualizeResult(stream1, stream2, rate, bestPosition, distances, filename): plot.subplot(3, 1, 1)
class Yaafe(EnhancedObject): '''Yaafe toolbox wrapper. To be used with Database object. Attributes: sample_rate The Files' sample rate plan_filename The Featue Plan filename Methods: process(audiofile) Process audiofile and return features get_X(entries_list, feat) Fetch array of processed data from Database get_y Fetch subdir i's from Database ` Init: Yaafe(sample_rate, feature_plan) ''' _features = { 'spec_rolloff': ("SpectralRolloff blockSize=512 stepSize=128 " "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spec_shape': ("SpectralShapeStatistics blockSize=512 stepSize=128 " "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spec_flux': ("SpectralFlux blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'amp_mod': ("AmplitudeModulation blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'auto': ("AutoCorrelation blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'lpc': ("LPC blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'loudness': ("Loudness blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'mfcc': ("MFCC blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'mel_spectrum': ("MelSpectrum blockSize=512, stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'obsi': ("OBSI blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'obsir': ("OBSIR blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'perc_sharp': ("PerceptualSharpness blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'perc_spread': ("PerceptualSpread blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_crest': ("SpectralCrestFactorPerBand blockSize=512 " "stepSize=128 > StatisticalIntegrator NbFrames=40 " "StepNbFrames=8"), 'spec_decr': ("SpectralDecrease blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_flat': ("SpectralFlatness blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_flat_band': ("SpectralFlatnessPerBand blockSize=512 " "stepSize=128 > StatisticalIntegrator NbFrames=40 " "StepNbFrames=8"), 'spect_slope': ("SpectralSlope blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_var': ("SpectralVariation blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'temp_shape': ("TemporalShapeStatistics blockSize=512 stepSize=128 " "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'zcr': ("ZCR blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'env_shape': ("EnvelopeShapeStatistics blockSize=512 stepSize=128" " > StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'comp_onest': ("ComplexDomainOnsetDetection blockSize=512 " "stepSize=128 > StatisticalIntegrator NbFrames=40 " "StepNbFrames=8"), } def __init__(self, sample_rate, features=None): if features is None: features = self._features self.sample_rate = sample_rate self.initialize(features) self.features = features def initialize(self, feature_dict): """ Run the required boilerplate for yaafe """ self.feature_dict = feature_dict self.fp = FeaturePlan( sample_rate=self.sample_rate, normalize=0.98) for name, desc in self.feature_dict.items(): self.fp.addFeature("{0}: {1}".format(name, desc)) self.df = self.fp.getDataFlow() self.engine = Engine() self.engine.load(self.df) self.afp = AudioFileProcessor() return self def save_fplan(self, name): """ Save a feature plan (text file) """ text_file = open("{}.txt".format(name), 'w') for name, desc in self.features.items(): text_file.write("{}: {}".format(name, desc)) text_file.close() def process(self, audiofile): """ Process function for running a file through yaafe's feature extractor """ self.afp.processFile(self.engine, audiofile) out = self.engine.readAllOutputs() self.engine.flush() return sorted(out)
class Yaafe(EnhancedObject): '''Yaafe toolbox wrapper. To be used with Database object. Attributes: sample_rate The Files' sample rate plan_filename The Featue Plan filename Methods: process(audiofile) Process audiofile and return features get_X(entries_list, feat) Fetch array of processed data from Database get_y Fetch subdir i's from Database ` Init: Yaafe(sample_rate, feature_plan) ''' _features = { 'spec_rolloff': ("SpectralRolloff blockSize=512 stepSize=128 " "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spec_shape': ("SpectralShapeStatistics blockSize=512 stepSize=128 " "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spec_flux': ("SpectralFlux blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'amp_mod': ("AmplitudeModulation blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'auto': ("AutoCorrelation blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'lpc': ("LPC blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'loudness': ("Loudness blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'mfcc': ("MFCC blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'mel_spectrum': ("MelSpectrum blockSize=512, stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'obsi': ("OBSI blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'obsir': ("OBSIR blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'perc_sharp': ("PerceptualSharpness blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'perc_spread': ("PerceptualSpread blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_crest': ("SpectralCrestFactorPerBand blockSize=512 " "stepSize=128 > StatisticalIntegrator NbFrames=40 " "StepNbFrames=8"), 'spec_decr': ("SpectralDecrease blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_flat': ("SpectralFlatness blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_flat_band': ("SpectralFlatnessPerBand blockSize=512 " "stepSize=128 > StatisticalIntegrator NbFrames=40 " "StepNbFrames=8"), 'spect_slope': ("SpectralSlope blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_var': ("SpectralVariation blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'temp_shape': ("TemporalShapeStatistics blockSize=512 stepSize=128 " "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'zcr': ("ZCR blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'env_shape': ("EnvelopeShapeStatistics blockSize=512 stepSize=128" " > StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'comp_onest': ("ComplexDomainOnsetDetection blockSize=512 " "stepSize=128 > StatisticalIntegrator NbFrames=40 " "StepNbFrames=8"), } def __init__(self, sample_rate, features=None): if features is None: features = self._features self.sample_rate = sample_rate self.initialize(features) self.features = features def initialize(self, feature_dict): """ Run the required boilerplate for yaafe """ self.feature_dict = feature_dict self.fp = FeaturePlan(sample_rate=self.sample_rate, normalize=0.98) for name, desc in self.feature_dict.items(): self.fp.addFeature("{0}: {1}".format(name, desc)) self.df = self.fp.getDataFlow() self.engine = Engine() self.engine.load(self.df) self.afp = AudioFileProcessor() return self def save_fplan(self, name): """ Save a feature plan (text file) """ text_file = open("{}.txt".format(name), 'w') for name, desc in self.features.items(): text_file.write("{}: {}".format(name, desc)) text_file.close() def process(self, audiofile): """ Process function for running a file through yaafe's feature extractor """ self.afp.processFile(self.engine, audiofile) out = self.engine.readAllOutputs() self.engine.flush() return sorted(out)
def detect(wav_path, ans_path=None): sample_rate = 8000 block_size = 1024 step_size = block_size / 2 n_band = block_size / 2 freq_bound = [i * sample_rate / 2. / n_band for i in range(n_band + 1)] plan = FeaturePlan(sample_rate=sample_rate, resample=True) plan.addFeature('power_spectrum: PowerSpectrum blockSize=%d stepSize=%d' % (block_size, step_size)) dataflow = plan.getDataFlow() afp = AudioFileProcessor() engine = Engine() engine.load(dataflow) afp.processFile(engine, wav_path) spectrogram = engine.readOutput('power_spectrum') seq = [] for spectrum in spectrogram: mean_mag = np.mean(spectrum) if mean_mag <= SILENT_MAG_THRESHOLD: seq.append(SILENT_KEY) continue lower_data = (-1, -1) upper_data = (-1, -1) for target_idx, target_freq in itertools.chain(enumerate(LOWER_FREQS), enumerate(UPPER_FREQS)): idx = bisect.bisect(freq_bound, target_freq) assert idx > 0 freq1 = freq_bound[idx - 1] mag1 = spectrum[idx - 1] freq2 = freq_bound[idx] mag2 = spectrum[idx] w1 = 1. * (freq2 - target_freq) / (freq2 - freq1) w2 = 1. - w1 target_mag = (w1 * mag1 + w2 * mag2) if target_mag > mean_mag * 2: if target_freq < 1000: if target_mag > lower_data[1]: lower_data = (target_idx, target_mag) else: if target_mag > upper_data[1]: upper_data = (target_idx, target_mag) lower_idx = lower_data[0] upper_idx = upper_data[0] if lower_idx == -1 or upper_idx == -1: seq.append(UNKNOWN_KEY) else: seq.append(KEYS[lower_idx * len(LOWER_FREQS) + upper_idx]) ans = purge_seq(seq) if ans_path is not None: with open(ans_path) as fh: for i, line in enumerate(fh): line = line.strip() assert line == ans[i], "%s != %s" % (line, ans[i]) print '[%d] %s' % (i, line) else: for i, line in enumerate(ans): print '[%d] %s' % (i, line)
""" Stand-alone application to demonstrate yaafe's transformations """ import numpy as np import matplotlib.pyplot as plt from yaafelib import FeaturePlan, Engine, AudioFileProcessor FPLAN = FeaturePlan(sample_rate=44100) FPLAN.addFeature("mfcc: MFCC blockSize=512 stepSize=128") FPLAN.addFeature("mfcc_stat: MFCC blockSize=512 stepSize=128 > " "StatisticalIntegrator NbFrames=40 StepNbFrames=8") ENGINE = Engine() ENGINE.load(FPLAN.getDataFlow()) PROCESSOR = AudioFileProcessor() PROCESSOR.processFile(ENGINE, 'track.wav') DATA = ENGINE.readAllOutputs() ENGINE.flush() X_MFCC = DATA['mfcc'] X_MFCC_STAT = DATA['mfcc_stat'] MFCC_DESC = list() for i in range(1, 14): desc = "Average for Band {}".format(i) MFCC_DESC.append(desc) for i in range(1, 14): desc = "Standard Dev. for Band {}".format(i) MFCC_DESC.append(desc) plt.ion() FIG = plt.figure()
""" Stand-alone application to demonstrate yaafe's transformations """ import numpy as np import matplotlib.pyplot as plt from yaafelib import FeaturePlan, Engine, AudioFileProcessor FPLAN = FeaturePlan(sample_rate=44100) FPLAN.addFeature("mfcc: MFCC blockSize=512 stepSize=128") FPLAN.addFeature("mfcc_stat: MFCC blockSize=512 stepSize=128 > " "StatisticalIntegrator NbFrames=40 StepNbFrames=8") ENGINE = Engine() ENGINE.load(FPLAN.getDataFlow()) PROCESSOR = AudioFileProcessor() PROCESSOR.processFile(ENGINE, 'track.wav') DATA = ENGINE.readAllOutputs() ENGINE.flush() X_MFCC = DATA['mfcc'] X_MFCC_STAT = DATA['mfcc_stat'] MFCC_DESC = list() for i in range(1, 14): desc = "Average for Band {}".format(i) MFCC_DESC.append(desc) for i in range(1, 14): desc = "Standard Dev. for Band {}".format(i) MFCC_DESC.append(desc) plt.ion() FIG = plt.figure() FIG.set_size_inches(14, 8)
def detect(wav_path, ans_path=None): sample_rate = 8000 block_size = 1024 step_size = block_size / 2 n_band = block_size / 2 freq_bound = [i * sample_rate / 2. / n_band for i in range(n_band + 1)] plan = FeaturePlan(sample_rate=sample_rate, resample=True) plan.addFeature( 'power_spectrum: PowerSpectrum blockSize=%d stepSize=%d' % ( block_size, step_size ) ) dataflow = plan.getDataFlow() afp = AudioFileProcessor() engine = Engine() engine.load(dataflow) afp.processFile(engine, wav_path) spectrogram = engine.readOutput('power_spectrum') seq = [] for spectrum in spectrogram: mean_mag = np.mean(spectrum) if mean_mag <= SILENT_MAG_THRESHOLD: seq.append(SILENT_KEY) continue lower_data = (-1, -1) upper_data = (-1, -1) for target_idx, target_freq in itertools.chain( enumerate(LOWER_FREQS), enumerate(UPPER_FREQS) ): idx = bisect.bisect(freq_bound, target_freq) assert idx > 0 freq1 = freq_bound[idx - 1] mag1 = spectrum[idx - 1] freq2 = freq_bound[idx] mag2 = spectrum[idx] w1 = 1. * (freq2 - target_freq) / (freq2 - freq1) w2 = 1. - w1 target_mag = (w1 * mag1 + w2 * mag2) if target_mag > mean_mag * 2: if target_freq < 1000: if target_mag > lower_data[1]: lower_data = (target_idx, target_mag) else: if target_mag > upper_data[1]: upper_data = (target_idx, target_mag) lower_idx = lower_data[0] upper_idx = upper_data[0] if lower_idx == -1 or upper_idx == -1: seq.append(UNKNOWN_KEY) else: seq.append(KEYS[lower_idx * len(LOWER_FREQS) + upper_idx]) ans = purge_seq(seq) if ans_path is not None: with open(ans_path) as fh: for i, line in enumerate(fh): line = line.strip() assert line == ans[i], "%s != %s" % (line, ans[i]) print '[%d] %s' % (i, line) else: for i, line in enumerate(ans): print '[%d] %s' % (i, line)