def main(argv): if len(argv) != 2: print "usage: python %s foo.mp3" % (argv[0]) return fp = FeaturePlan(sample_rate=SAMPLE_RATE, resample=True, time_start=TIME_START, time_limit=TIME_LIMIT) fp.addFeature("frames: Frames blockSize={0} stepSize={1}".format(SLICE_WINSIZE, SLICE_STEPSIZE)) if "YAAFE_PATH" in os.environ: fp.addFeature( "beat_hist: BeatHistogramSummary ACPNbPeaks=3 BHSBeatFrameSize=128 BHSBeatFrameStep=64 " "BHSHistogramFrameSize=40 BHSHistogramFrameStep=40 FFTLength=0 FFTWindow=Hanning " "HInf=40 HNbBins=80 HSup=200 NMANbFrames=5000 blockSize=1024 stepSize=512" ) df = fp.getDataFlow() engine = Engine() engine.load(df) afp = AudioFileProcessor() song_path = argv[1] assert os.path.exists(song_path) afp.processFile(engine, song_path) frames = engine.readOutput("frames") frames = np.concatenate(frames) print "time start: %ss" % TIME_START print "time limit: %ss" % TIME_LIMIT print "duration:", 1.0 * frames.size / SAMPLE_RATE if "YAAFE_PATH" in os.environ: beat_hist = engine.readOutput("beat_hist") print "beat_hist: %s" % beat_hist
class MFCCExtractor: def __init__(self, **args): self.engine = Engine() self.afp = AudioFileProcessor() self.block_size = (1024 if not 'block_size' in args else args['block_size']) self.step_size = (512 if not 'step_size' in args else args['step_size']) def extract(self, path, **args): self.path = path print path self.wave = wave.open(path, 'r') self.rate = self.wave.getframerate() fp = FeaturePlan(sample_rate=self.rate) fp.addFeature('mfcc: MFCC blockSize={} stepSize={}'.format(self.block_size, self.step_size)) self.engine.load(fp.getDataFlow()) self.afp.processFile(self.engine, path) feats = self.engine.readAllOutputs() if 'save_to_disk' in args: self.__save_to_disk(feats['mfcc']) return feats['mfcc'] def __save_to_disk(self, feats): new_path = self.path + ".mfc.csv" np.savetxt(new_path, feats, delimiter=",")
def _ExtractAll(self, audio_location, sample_rate): # Build a dataflow object using FeaturePlan fp = FeaturePlan(sample_rate=sample_rate) # Using *.addFeature() multiple extractions can be called with a # single call fp.addFeature('zcr: ZCR') fp.addFeature('mfcc: MFCC') fp.addFeature('mfcc_D1: MFCC > Derivate DOrder=1') #fp.addFeature('mfcc_D2: MFCC > Derivate DOrder=2') fp.addFeature('flux: SpectralFlux') fp.addFeature('energy: Energy') fp.addFeature('loudness: Loudness') fp.addFeature('obsi: OBSI') fp.addFeature('sharpness: PerceptualSharpness') fp.addFeature('spread: PerceptualSpread') fp.addFeature('rolloff: SpectralRolloff') fp.addFeature('variation: SpectralVariation') # Get dataflow df = fp.getDataFlow() # Configure engine engine = Engine() engine.load(df) # extract features from audio using AudioFileProcessor afp = AudioFileProcessor() afp.processFile(engine, audio_location) # features array holds all the extracted features features = engine.readAllOutputs() #print features["zcr"] # returns the array of features extracted return features
def _energy(audio_location, sample_rate): # This function behaves the same as 'python yaafe.py -r SAMPLERATE -f \ # "energy: Energy PARAMETERS" WAV-LOCATION' # SAMPLERATE : Samplerate of the file being processed #- blockSize (default=1024): output frames size #- stepSize (default=512): step between consecutive frames # Build a dataflow object using FeaturePlan # blockSize, stepSize could be added too. 1024, 512 default fp = FeaturePlan(sample_rate=sample_rate) # Using *.addFeature() multiple extractions can be called with a # single call fp.addFeature('Energy: Energy') #('energy: Energy blockSize=1024 stepSize=512') # Get dataflow df = fp.getDataFlow() # Configure engine engine = Engine() engine.load(df) # extract features from audio using AudioFileProcessor afp = AudioFileProcessor() afp.processFile(engine, audio_location) # features array holds all the extracted features features = engine.readAllOutputs() # returns the array of features extracted return features
def main(argv): if len(argv) != 2: print 'usage: python %s foo.mp3' % (argv[0]) return fp = FeaturePlan(sample_rate=SAMPLE_RATE, resample=True, time_start=TIME_START, time_limit=TIME_LIMIT) fp.addFeature("frames: Frames blockSize={0} stepSize={1}".format( SLICE_WINSIZE, SLICE_STEPSIZE)) if 'YAAFE_PATH' in os.environ: fp.addFeature( "beat_hist: BeatHistogramSummary ACPNbPeaks=3 BHSBeatFrameSize=128 BHSBeatFrameStep=64 " "BHSHistogramFrameSize=40 BHSHistogramFrameStep=40 FFTLength=0 FFTWindow=Hanning " "HInf=40 HNbBins=80 HSup=200 NMANbFrames=5000 blockSize=1024 stepSize=512" ) df = fp.getDataFlow() engine = Engine() engine.load(df) afp = AudioFileProcessor() song_path = argv[1] assert os.path.exists(song_path) afp.processFile(engine, song_path) frames = engine.readOutput('frames') frames = np.concatenate(frames) print 'time start: %ss' % TIME_START print 'time limit: %ss' % TIME_LIMIT print 'duration:', 1. * frames.size / SAMPLE_RATE if 'YAAFE_PATH' in os.environ: beat_hist = engine.readOutput('beat_hist') print 'beat_hist: %s' % beat_hist
class MFCCExtractor: def __init__(self, **args): self.engine = Engine() self.afp = AudioFileProcessor() self.block_size = (1024 if not 'block_size' in args else args['block_size']) self.step_size = (512 if not 'step_size' in args else args['step_size']) def extract(self, path, **args): self.path = path print path self.wave = wave.open(path, 'r') self.rate = self.wave.getframerate() fp = FeaturePlan(sample_rate=self.rate) fp.addFeature('mfcc: MFCC blockSize={} stepSize={}'.format( self.block_size, self.step_size)) self.engine.load(fp.getDataFlow()) self.afp.processFile(self.engine, path) feats = self.engine.readAllOutputs() if 'save_to_disk' in args: self.__save_to_disk(feats['mfcc']) return feats['mfcc'] def __save_to_disk(self, feats): new_path = self.path + ".mfc.csv" np.savetxt(new_path, feats, delimiter=",")
def startEngine(path): global afp, features afp = AudioFileProcessor() afp.processFile(engine,path) features = engine.readAllOutputs() # matrix of all features return 'extracted'
def __init__(self, **args): self.engine = Engine() self.afp = AudioFileProcessor() self.block_size = (1024 if not 'block_size' in args else args['block_size']) self.step_size = (512 if not 'step_size' in args else args['step_size'])
def startEngine(path): global afp, features afp = AudioFileProcessor() afp.processFile(engine,path) # 从mp3文件中提取特征,它必需提供engine配置 features = engine.readAllOutputs() # 得到所有特征矩阵 return 'Yaafe提取成功'
def compute_spec(song_path): fp = FeaturePlan(sample_rate=22050, resample=True) #add one feature fp.addFeature("spec: PowerSpectrum blockSize=1024 stepSize=512") df = fp.getDataFlow() engine = Engine() engine.load(df) afp = AudioFileProcessor() afp.processFile(engine, song_path) return engine.readOutput('spec')
def initialize(self, feature_dict): """ Run the required boilerplate for yaafe """ self.feature_dict = feature_dict self.fp = FeaturePlan(sample_rate=self.sample_rate, normalize=0.98) for name, desc in self.feature_dict.items(): self.fp.addFeature("{0}: {1}".format(name, desc)) self.df = self.fp.getDataFlow() self.engine = Engine() self.engine.load(self.df) self.afp = AudioFileProcessor() return self
def _ExtractZCRAndFlux(self, audio_location, sample_rate): fp = FeaturePlan(sample_rate=sample_rate) fp.addFeature('zcr: ZCR') fp.addFeature('flux: SpectralFlux') df = fp.getDataFlow() engine = Engine() engine.load(df) afp = AudioFileProcessor() afp.processFile(engine, audio_location) features = engine.readAllOutputs() return features
def startEngine(path): global features engine = Engine() engine.load(df) afp = AudioFileProcessor() afp.processFile(engine,path) features = engine.readAllOutputs() return 'Yaafe引擎启动成功'
def _zcr(self, audio_location, sample_rate): # This function behave the same as 'python yaafe.py -r SAMPLERATE -f \ # "zcr: ZCR blockSize=1024 stepSize=512" WAV-LOCATION' # SAMPLERATE = samplerate of the file being processed # zcr = name for the process (zcr1, zcr2... ) # ZCR = the feature that is being extracted # blockSize = output frames size # stepSize = step between consecutive frames # Build a dataflow object using FeaturePlan # blockSize, stepSize could be added too. 1024, 512 default fp = FeaturePlan(sample_rate=sample_rate) # Using *.addFeature() multiple extractions can be called with a # single call fp.addFeature('ZCR: ZCR') # Get dataflow df = fp.getDataFlow() # Or load it from a file # df = DataFlow() # df.load(dataflow_file) # Configure engine engine = Engine() engine.load(df) # extract features from audio using AudioFileProcessor afp = AudioFileProcessor() afp.processFile(engine, audio_location) # features array holds all the extracted features features = engine.readAllOutputs() # extract features from an audio file and write results to csv files # afp.setOutputFormat('csv','output',{'Precision':'8'}) # afp.processFile(engine,audiofile) # this creates output/myaudio.wav.mfcc.csv, # output/myaudio.wav.mfcc_d1.csv and # output/myaudio.wav.mfcc_d2.csv files. # Clear the engine so it can be used again #engine.reset() # returns the array of features extracted return features
def __init__(self): self.fp = FeaturePlan(sample_rate=44100) self.fp.addFeature('feat: ') self.engine = Engine() self.engine.load(self.fp.getDataFlow()) self.afp = AudioFileProcessor() self.mfcc = [] self.songs = []
def initialize(self, feature_dict): """ Run the required boilerplate for yaafe """ self.feature_dict = feature_dict self.fp = FeaturePlan( sample_rate=self.sample_rate, normalize=0.98) for name, desc in self.feature_dict.items(): self.fp.addFeature("{0}: {1}".format(name, desc)) self.df = self.fp.getDataFlow() self.engine = Engine() self.engine.load(self.df) self.afp = AudioFileProcessor() return self
class Extractor(object): def __init__(self): self.fp = FeaturePlan(sample_rate=44100) self.fp.addFeature('feat: ') self.engine = Engine() self.engine.load(self.fp.getDataFlow()) self.afp = AudioFileProcessor() self.mfcc = [] self.songs = [] def recurse(self, directory): results = [] for root, dirs, files in os.walk(directory): for f in files: current_file = os.path.join(root, f) if current_file.endswith('wav') and current_file not in results: results.append(current_file) print results return results def extract_feature(self, f): self.afp.processFile(self.engine, f) feats = self.engine.readAllOutputs() return feats['feat'] def build_feature_space(self): self.space = FeatureSpace('mfcc') last_feature = None for f in self.recurse(sys.argv[1]): data = self.extract_feature(f) temp_feature = Feature(f, ['mfcc', data]) self.space.add(temp_feature) last_feature = temp_feature print self.space.min_dist(last_feature)
def _mfcc(self, audio_location, sample_rate): # This function behaves the same as 'python yaafe.py -r SAMPLERATE -f \ # "mfcc: MFCC PARAMETERS" WAV-LOCATION' # SAMPLERATE : Samplerate of the file being processed #- CepsIgnoreFirstCoeff (default=1): 0 keeps the first cepstral coeffcient, 1 ignore it #- CepsNbCoeffs (default=13): Number of cepstral coefficient to keep. #- FFTWindow (default=Hanning): Weighting window to apply before fft. Hanning|Hamming|None #- MelMaxFreq (default=6854.0): Maximum frequency of the mel filter bank #- MelMinFreq (default=130.0): Minimum frequency of the mel filter bank #- MelNbFilters (default=40): Number of mel filters #- blockSize (default=1024): output frames size #- stepSize (default=512): step between consecutive frames # Build a dataflow object using FeaturePlan fp = FeaturePlan(sample_rate=sample_rate) # Using *.addFeature() multiple extractions can be called with a # single call fp.addFeature('mfcc: MFCC') #('mfcc: MFCC CepsIgnoreFirstCoeff=0 \ #CepsNbCoeffs=13 FFTWindow=Hanning MelMaxFreq=6854\ #MelMinFreq=130 MelNbFilters=40 blockSize=1024 stepSize=512') # Get dataflow df = fp.getDataFlow() engine = Engine() engine.load(df) # extract features from audio using AudioFileProcessor afp = AudioFileProcessor() afp.processFile(engine, audio_location) # features array holds all the extracted features features = engine.readAllOutputs() # returns the array of features extracted return features
def _SpectralFlux(self, audio_location, sample_rate): # This function behaves the same as 'python yaafe.py -r SAMPLERATE -f \ # "flux: SpectralFlux PARAMETERS" WAV-LOCATION' # SAMPLERATE : Samplerate of the file being processed # - FFTLength (default=0): Frame's length on which perform FFT. Original # frame is padded with zeros or truncated to reach this size. If 0 then # use original frame length. # - FFTWindow (default=Hanning): Weighting window to apply before fft. Hanning|Hamming|None # - FluxSupport (default=All): support of flux computation. if 'All' then # use all bins (default), if 'Increase' then use only bins which are increasing # - blockSize (default=1024): output frames size # - stepSize (default=512): step between consecutive frames # Build a dataflow object using FeaturePlan fp = FeaturePlan(sample_rate=sample_rate) # Using *.addFeature() multiple extractions can be called with a # single call fp.addFeature('Flux: SpectralFlux') #('flux: SpectralFlux FFTLength=0 FFTWindow=Hanning FluxSupport=All\ # blockSize=1024 stepSize=512') # Get dataflow df = fp.getDataFlow() # Configure engine engine = Engine() engine.load(df) # extract features from audio using AudioFileProcessor afp = AudioFileProcessor() afp.processFile(engine, audio_location) # features array holds all the extracted features features = engine.readAllOutputs() # returns the array of features extracted return features
def process_audio(audioFile, engine): rename(audioFile,basename(audioFile)) afp = AudioFileProcessor() afp.setOutputFormat('csv', config['OUTPUT_FOLDER'], {'Precision':'8'}) # afp.setOutputFormat('h5', OUTPUT_FOLDER_NAME, {'mode':'overwrite'}) afp.processFile(engine, basename(audioFile)) rename(basename(audioFile),audioFile) return
def main(argv): if len(argv) != 2: print 'usage: python %s foo.mp3' % (argv[0]) return fp = FeaturePlan(sample_rate=SAMPLE_RATE, resample=True, time_start=TIME_START, time_limit=TIME_LIMIT) fp.addFeature("frames: Frames blockSize={0} stepSize={1}".format( SLICE_WINSIZE, SLICE_STEPSIZE)) df = fp.getDataFlow() engine = Engine() engine.load(df) afp = AudioFileProcessor() song_path = argv[1] assert os.path.exists(song_path) afp.processFile(engine, song_path) frames = engine.readOutput('frames') frames = np.concatenate(frames) print 'time start: %ss' % TIME_START print 'time limit: %ss' % TIME_LIMIT print 'duration:', 1. * frames.size / SAMPLE_RATE from pylab import plot, show plot(frames) show()
class Yaafe(EnhancedObject): '''Yaafe toolbox wrapper. To be used with Database object. Attributes: sample_rate The Files' sample rate plan_filename The Featue Plan filename Methods: process(audiofile) Process audiofile and return features get_X(entries_list, feat) Fetch array of processed data from Database get_y Fetch subdir i's from Database ` Init: Yaafe(sample_rate, feature_plan) ''' _features = { 'spec_rolloff': ("SpectralRolloff blockSize=512 stepSize=128 " "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spec_shape': ("SpectralShapeStatistics blockSize=512 stepSize=128 " "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spec_flux': ("SpectralFlux blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'amp_mod': ("AmplitudeModulation blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'auto': ("AutoCorrelation blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'lpc': ("LPC blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'loudness': ("Loudness blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'mfcc': ("MFCC blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'mel_spectrum': ("MelSpectrum blockSize=512, stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'obsi': ("OBSI blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'obsir': ("OBSIR blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'perc_sharp': ("PerceptualSharpness blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'perc_spread': ("PerceptualSpread blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_crest': ("SpectralCrestFactorPerBand blockSize=512 " "stepSize=128 > StatisticalIntegrator NbFrames=40 " "StepNbFrames=8"), 'spec_decr': ("SpectralDecrease blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_flat': ("SpectralFlatness blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_flat_band': ("SpectralFlatnessPerBand blockSize=512 " "stepSize=128 > StatisticalIntegrator NbFrames=40 " "StepNbFrames=8"), 'spect_slope': ("SpectralSlope blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_var': ("SpectralVariation blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'temp_shape': ("TemporalShapeStatistics blockSize=512 stepSize=128 " "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'zcr': ("ZCR blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'env_shape': ("EnvelopeShapeStatistics blockSize=512 stepSize=128" " > StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'comp_onest': ("ComplexDomainOnsetDetection blockSize=512 " "stepSize=128 > StatisticalIntegrator NbFrames=40 " "StepNbFrames=8"), } def __init__(self, sample_rate, features=None): if features is None: features = self._features self.sample_rate = sample_rate self.initialize(features) self.features = features def initialize(self, feature_dict): """ Run the required boilerplate for yaafe """ self.feature_dict = feature_dict self.fp = FeaturePlan( sample_rate=self.sample_rate, normalize=0.98) for name, desc in self.feature_dict.items(): self.fp.addFeature("{0}: {1}".format(name, desc)) self.df = self.fp.getDataFlow() self.engine = Engine() self.engine.load(self.df) self.afp = AudioFileProcessor() return self def save_fplan(self, name): """ Save a feature plan (text file) """ text_file = open("{}.txt".format(name), 'w') for name, desc in self.features.items(): text_file.write("{}: {}".format(name, desc)) text_file.close() def process(self, audiofile): """ Process function for running a file through yaafe's feature extractor """ self.afp.processFile(self.engine, audiofile) out = self.engine.readAllOutputs() self.engine.flush() return sorted(out)
def detect(wav_path, ans_path=None): sample_rate = 8000 block_size = 1024 step_size = block_size / 2 n_band = block_size / 2 freq_bound = [i * sample_rate / 2. / n_band for i in range(n_band + 1)] plan = FeaturePlan(sample_rate=sample_rate, resample=True) plan.addFeature( 'power_spectrum: PowerSpectrum blockSize=%d stepSize=%d' % ( block_size, step_size ) ) dataflow = plan.getDataFlow() afp = AudioFileProcessor() engine = Engine() engine.load(dataflow) afp.processFile(engine, wav_path) spectrogram = engine.readOutput('power_spectrum') seq = [] for spectrum in spectrogram: mean_mag = np.mean(spectrum) if mean_mag <= SILENT_MAG_THRESHOLD: seq.append(SILENT_KEY) continue lower_data = (-1, -1) upper_data = (-1, -1) for target_idx, target_freq in itertools.chain( enumerate(LOWER_FREQS), enumerate(UPPER_FREQS) ): idx = bisect.bisect(freq_bound, target_freq) assert idx > 0 freq1 = freq_bound[idx - 1] mag1 = spectrum[idx - 1] freq2 = freq_bound[idx] mag2 = spectrum[idx] w1 = 1. * (freq2 - target_freq) / (freq2 - freq1) w2 = 1. - w1 target_mag = (w1 * mag1 + w2 * mag2) if target_mag > mean_mag * 2: if target_freq < 1000: if target_mag > lower_data[1]: lower_data = (target_idx, target_mag) else: if target_mag > upper_data[1]: upper_data = (target_idx, target_mag) lower_idx = lower_data[0] upper_idx = upper_data[0] if lower_idx == -1 or upper_idx == -1: seq.append(UNKNOWN_KEY) else: seq.append(KEYS[lower_idx * len(LOWER_FREQS) + upper_idx]) ans = purge_seq(seq) if ans_path is not None: with open(ans_path) as fh: for i, line in enumerate(fh): line = line.strip() assert line == ans[i], "%s != %s" % (line, ans[i]) print '[%d] %s' % (i, line) else: for i, line in enumerate(ans): print '[%d] %s' % (i, line)
def detect(wav_path, ans_path=None): sample_rate = 8000 block_size = 1024 step_size = block_size / 2 n_band = block_size / 2 freq_bound = [i * sample_rate / 2. / n_band for i in range(n_band + 1)] plan = FeaturePlan(sample_rate=sample_rate, resample=True) plan.addFeature('power_spectrum: PowerSpectrum blockSize=%d stepSize=%d' % (block_size, step_size)) dataflow = plan.getDataFlow() afp = AudioFileProcessor() engine = Engine() engine.load(dataflow) afp.processFile(engine, wav_path) spectrogram = engine.readOutput('power_spectrum') seq = [] for spectrum in spectrogram: mean_mag = np.mean(spectrum) if mean_mag <= SILENT_MAG_THRESHOLD: seq.append(SILENT_KEY) continue lower_data = (-1, -1) upper_data = (-1, -1) for target_idx, target_freq in itertools.chain(enumerate(LOWER_FREQS), enumerate(UPPER_FREQS)): idx = bisect.bisect(freq_bound, target_freq) assert idx > 0 freq1 = freq_bound[idx - 1] mag1 = spectrum[idx - 1] freq2 = freq_bound[idx] mag2 = spectrum[idx] w1 = 1. * (freq2 - target_freq) / (freq2 - freq1) w2 = 1. - w1 target_mag = (w1 * mag1 + w2 * mag2) if target_mag > mean_mag * 2: if target_freq < 1000: if target_mag > lower_data[1]: lower_data = (target_idx, target_mag) else: if target_mag > upper_data[1]: upper_data = (target_idx, target_mag) lower_idx = lower_data[0] upper_idx = upper_data[0] if lower_idx == -1 or upper_idx == -1: seq.append(UNKNOWN_KEY) else: seq.append(KEYS[lower_idx * len(LOWER_FREQS) + upper_idx]) ans = purge_seq(seq) if ans_path is not None: with open(ans_path) as fh: for i, line in enumerate(fh): line = line.strip() assert line == ans[i], "%s != %s" % (line, ans[i]) print '[%d] %s' % (i, line) else: for i, line in enumerate(ans): print '[%d] %s' % (i, line)
class Yaafe(EnhancedObject): '''Yaafe toolbox wrapper. To be used with Database object. Attributes: sample_rate The Files' sample rate plan_filename The Featue Plan filename Methods: process(audiofile) Process audiofile and return features get_X(entries_list, feat) Fetch array of processed data from Database get_y Fetch subdir i's from Database ` Init: Yaafe(sample_rate, feature_plan) ''' _features = { 'spec_rolloff': ("SpectralRolloff blockSize=512 stepSize=128 " "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spec_shape': ("SpectralShapeStatistics blockSize=512 stepSize=128 " "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spec_flux': ("SpectralFlux blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'amp_mod': ("AmplitudeModulation blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'auto': ("AutoCorrelation blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'lpc': ("LPC blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'loudness': ("Loudness blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'mfcc': ("MFCC blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'mel_spectrum': ("MelSpectrum blockSize=512, stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'obsi': ("OBSI blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'obsir': ("OBSIR blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'perc_sharp': ("PerceptualSharpness blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'perc_spread': ("PerceptualSpread blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_crest': ("SpectralCrestFactorPerBand blockSize=512 " "stepSize=128 > StatisticalIntegrator NbFrames=40 " "StepNbFrames=8"), 'spec_decr': ("SpectralDecrease blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_flat': ("SpectralFlatness blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_flat_band': ("SpectralFlatnessPerBand blockSize=512 " "stepSize=128 > StatisticalIntegrator NbFrames=40 " "StepNbFrames=8"), 'spect_slope': ("SpectralSlope blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_var': ("SpectralVariation blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'temp_shape': ("TemporalShapeStatistics blockSize=512 stepSize=128 " "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'zcr': ("ZCR blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'env_shape': ("EnvelopeShapeStatistics blockSize=512 stepSize=128" " > StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'comp_onest': ("ComplexDomainOnsetDetection blockSize=512 " "stepSize=128 > StatisticalIntegrator NbFrames=40 " "StepNbFrames=8"), } def __init__(self, sample_rate, features=None): if features is None: features = self._features self.sample_rate = sample_rate self.initialize(features) self.features = features def initialize(self, feature_dict): """ Run the required boilerplate for yaafe """ self.feature_dict = feature_dict self.fp = FeaturePlan(sample_rate=self.sample_rate, normalize=0.98) for name, desc in self.feature_dict.items(): self.fp.addFeature("{0}: {1}".format(name, desc)) self.df = self.fp.getDataFlow() self.engine = Engine() self.engine.load(self.df) self.afp = AudioFileProcessor() return self def save_fplan(self, name): """ Save a feature plan (text file) """ text_file = open("{}.txt".format(name), 'w') for name, desc in self.features.items(): text_file.write("{}: {}".format(name, desc)) text_file.close() def process(self, audiofile): """ Process function for running a file through yaafe's feature extractor """ self.afp.processFile(self.engine, audiofile) out = self.engine.readAllOutputs() self.engine.flush() return sorted(out)
""" Stand-alone application to demonstrate yaafe's transformations """ import numpy as np import matplotlib.pyplot as plt from yaafelib import FeaturePlan, Engine, AudioFileProcessor FPLAN = FeaturePlan(sample_rate=44100) FPLAN.addFeature("mfcc: MFCC blockSize=512 stepSize=128") FPLAN.addFeature("mfcc_stat: MFCC blockSize=512 stepSize=128 > " "StatisticalIntegrator NbFrames=40 StepNbFrames=8") ENGINE = Engine() ENGINE.load(FPLAN.getDataFlow()) PROCESSOR = AudioFileProcessor() PROCESSOR.processFile(ENGINE, 'track.wav') DATA = ENGINE.readAllOutputs() ENGINE.flush() X_MFCC = DATA['mfcc'] X_MFCC_STAT = DATA['mfcc_stat'] MFCC_DESC = list() for i in range(1, 14): desc = "Average for Band {}".format(i) MFCC_DESC.append(desc) for i in range(1, 14): desc = "Standard Dev. for Band {}".format(i) MFCC_DESC.append(desc) plt.ion() FIG = plt.figure()
""" Stand-alone application to demonstrate yaafe's transformations """ import numpy as np import matplotlib.pyplot as plt from yaafelib import FeaturePlan, Engine, AudioFileProcessor FPLAN = FeaturePlan(sample_rate=44100) FPLAN.addFeature("mfcc: MFCC blockSize=512 stepSize=128") FPLAN.addFeature("mfcc_stat: MFCC blockSize=512 stepSize=128 > " "StatisticalIntegrator NbFrames=40 StepNbFrames=8") ENGINE = Engine() ENGINE.load(FPLAN.getDataFlow()) PROCESSOR = AudioFileProcessor() PROCESSOR.processFile(ENGINE, 'track.wav') DATA = ENGINE.readAllOutputs() ENGINE.flush() X_MFCC = DATA['mfcc'] X_MFCC_STAT = DATA['mfcc_stat'] MFCC_DESC = list() for i in range(1, 14): desc = "Average for Band {}".format(i) MFCC_DESC.append(desc) for i in range(1, 14): desc = "Standard Dev. for Band {}".format(i) MFCC_DESC.append(desc) plt.ion() FIG = plt.figure() FIG.set_size_inches(14, 8)