def main(argv): if len(argv) != 2: print 'usage: python %s foo.mp3' % (argv[0]) return fp = FeaturePlan(sample_rate=SAMPLE_RATE, resample=True, time_start=TIME_START, time_limit=TIME_LIMIT) fp.addFeature("frames: Frames blockSize={0} stepSize={1}".format( SLICE_WINSIZE, SLICE_STEPSIZE)) if 'YAAFE_PATH' in os.environ: fp.addFeature( "beat_hist: BeatHistogramSummary ACPNbPeaks=3 BHSBeatFrameSize=128 BHSBeatFrameStep=64 " "BHSHistogramFrameSize=40 BHSHistogramFrameStep=40 FFTLength=0 FFTWindow=Hanning " "HInf=40 HNbBins=80 HSup=200 NMANbFrames=5000 blockSize=1024 stepSize=512" ) df = fp.getDataFlow() engine = Engine() engine.load(df) afp = AudioFileProcessor() song_path = argv[1] assert os.path.exists(song_path) afp.processFile(engine, song_path) frames = engine.readOutput('frames') frames = np.concatenate(frames) print 'time start: %ss' % TIME_START print 'time limit: %ss' % TIME_LIMIT print 'duration:', 1. * frames.size / SAMPLE_RATE if 'YAAFE_PATH' in os.environ: beat_hist = engine.readOutput('beat_hist') print 'beat_hist: %s' % beat_hist
def _ExtractAll(self, audio_location, sample_rate): # Build a dataflow object using FeaturePlan fp = FeaturePlan(sample_rate=sample_rate) # Using *.addFeature() multiple extractions can be called with a # single call fp.addFeature('zcr: ZCR') fp.addFeature('mfcc: MFCC') fp.addFeature('mfcc_D1: MFCC > Derivate DOrder=1') #fp.addFeature('mfcc_D2: MFCC > Derivate DOrder=2') fp.addFeature('flux: SpectralFlux') fp.addFeature('energy: Energy') fp.addFeature('loudness: Loudness') fp.addFeature('obsi: OBSI') fp.addFeature('sharpness: PerceptualSharpness') fp.addFeature('spread: PerceptualSpread') fp.addFeature('rolloff: SpectralRolloff') fp.addFeature('variation: SpectralVariation') # Get dataflow df = fp.getDataFlow() # Configure engine engine = Engine() engine.load(df) # extract features from audio using AudioFileProcessor afp = AudioFileProcessor() afp.processFile(engine, audio_location) # features array holds all the extracted features features = engine.readAllOutputs() #print features["zcr"] # returns the array of features extracted return features
def _energy(audio_location, sample_rate): # This function behaves the same as 'python yaafe.py -r SAMPLERATE -f \ # "energy: Energy PARAMETERS" WAV-LOCATION' # SAMPLERATE : Samplerate of the file being processed #- blockSize (default=1024): output frames size #- stepSize (default=512): step between consecutive frames # Build a dataflow object using FeaturePlan # blockSize, stepSize could be added too. 1024, 512 default fp = FeaturePlan(sample_rate=sample_rate) # Using *.addFeature() multiple extractions can be called with a # single call fp.addFeature('Energy: Energy') #('energy: Energy blockSize=1024 stepSize=512') # Get dataflow df = fp.getDataFlow() # Configure engine engine = Engine() engine.load(df) # extract features from audio using AudioFileProcessor afp = AudioFileProcessor() afp.processFile(engine, audio_location) # features array holds all the extracted features features = engine.readAllOutputs() # returns the array of features extracted return features
def compute_spec(song_path): fp = FeaturePlan(sample_rate=22050, resample=True) #add one feature fp.addFeature("spec: PowerSpectrum blockSize=1024 stepSize=512") df = fp.getDataFlow() engine = Engine() engine.load(df) afp = AudioFileProcessor() afp.processFile(engine, song_path) return engine.readOutput('spec')
def initialize(self, feature_dict): """ Run the required boilerplate for yaafe """ self.feature_dict = feature_dict self.fp = FeaturePlan(sample_rate=self.sample_rate, normalize=0.98) for name, desc in self.feature_dict.items(): self.fp.addFeature("{0}: {1}".format(name, desc)) self.df = self.fp.getDataFlow() self.engine = Engine() self.engine.load(self.df) self.afp = AudioFileProcessor() return self
def _ExtractZCRAndFlux(self, audio_location, sample_rate): fp = FeaturePlan(sample_rate=sample_rate) fp.addFeature('zcr: ZCR') fp.addFeature('flux: SpectralFlux') df = fp.getDataFlow() engine = Engine() engine.load(df) afp = AudioFileProcessor() afp.processFile(engine, audio_location) features = engine.readAllOutputs() return features
def init(): global engine fp = FeaturePlan(sample_rate=44100, resample=True, time_start=0,time_limit=20) fp.addFeature("loudness: Loudness") fp.addFeature("perceptualSharpness: PerceptualSharpness") fp.addFeature("perceptualSpread: PerceptualSpread") fp.addFeature("obsi: OBSI") fp.addFeature("obsir: OBSIR") df = fp.getDataFlow() engine = Engine() # Engine setup engine.load(df) return 'initialization'
def testOnGuitarWithFeaturePlanFromFile(self): "runs on guitar and load Yaafe feature plan from file" self.source = os.path.join (os.path.dirname(__file__), "samples", "guitar.wav") # Setup Yaafe Analyzer # Load Yaafe Feature Plan fp = FeaturePlan(sample_rate=self.sample_rate) fp_file = os.path.join (os.path.dirname(__file__), "yaafe_config", "yaafeFeaturePlan") fp.loadFeaturePlan(fp_file) # Setup a new Yaafe TimeSide analyzer # from FeaturePlan self.analyzer = Yaafe(fp) # Expected Results self.result_length = 3
def _zcr(self, audio_location, sample_rate): # This function behave the same as 'python yaafe.py -r SAMPLERATE -f \ # "zcr: ZCR blockSize=1024 stepSize=512" WAV-LOCATION' # SAMPLERATE = samplerate of the file being processed # zcr = name for the process (zcr1, zcr2... ) # ZCR = the feature that is being extracted # blockSize = output frames size # stepSize = step between consecutive frames # Build a dataflow object using FeaturePlan # blockSize, stepSize could be added too. 1024, 512 default fp = FeaturePlan(sample_rate=sample_rate) # Using *.addFeature() multiple extractions can be called with a # single call fp.addFeature('ZCR: ZCR') # Get dataflow df = fp.getDataFlow() # Or load it from a file # df = DataFlow() # df.load(dataflow_file) # Configure engine engine = Engine() engine.load(df) # extract features from audio using AudioFileProcessor afp = AudioFileProcessor() afp.processFile(engine, audio_location) # features array holds all the extracted features features = engine.readAllOutputs() # extract features from an audio file and write results to csv files # afp.setOutputFormat('csv','output',{'Precision':'8'}) # afp.processFile(engine,audiofile) # this creates output/myaudio.wav.mfcc.csv, # output/myaudio.wav.mfcc_d1.csv and # output/myaudio.wav.mfcc_d2.csv files. # Clear the engine so it can be used again #engine.reset() # returns the array of features extracted return features
def init(): global engine fp = FeaturePlan(sample_rate=44100, resample=True, time_start=0, time_limit=60) # 采样率44.1Hkz,提取20 - 40s fp.addFeature("mfcc: MFCC") # 梅尔倒谱系数 13 fp.addFeature("energy: Energy") # 短时能量 1 fp.addFeature("zcr: ZCR") # 短时平均过零率 1 fp.addFeature("sf: SpectralFlux") # 尖锐度 1 fp.addFeature("sr: SpectralRolloff") # 频谱滚降点 1 fp.addFeature("lpc: LPC LPCNbCoeffs=3") # 线性预测编码 3 fp.addFeature("lx: Loudness") # 响度 24 df = fp.getDataFlow() engine = Engine() # 配置Engine engine.load(df) return 'Yaafe初始化'
def testOnSweepWithFeaturePlan(self): "runs on sweep and define feature plan manually" self.source = os.path.join (os.path.dirname(__file__), "samples", "sweep.wav") # Setup Yaafe Analyzer # Define Yaafe Feature Plan fp = FeaturePlan(sample_rate=self.sample_rate) # add feature definitions manually fp.addFeature('mfcc: MFCC blockSize=512 stepSize=256') fp.addFeature('mfcc_d1: MFCC blockSize=512 stepSize=256 > Derivate DOrder=1') fp.addFeature('mfcc_d2: MFCC blockSize=512 stepSize=256 > Derivate DOrder=2') # Setup a new Yaafe TimeSide analyzer # from FeaturePlan self.analyzer = Yaafe(fp) # Expected Results self.result_length = 3
def extract(self, path, **args): self.path = path print path self.wave = wave.open(path, 'r') self.rate = self.wave.getframerate() fp = FeaturePlan(sample_rate=self.rate) fp.addFeature('mfcc: MFCC blockSize={} stepSize={}'.format( self.block_size, self.step_size)) self.engine.load(fp.getDataFlow()) self.afp.processFile(self.engine, path) feats = self.engine.readAllOutputs() if 'save_to_disk' in args: self.__save_to_disk(feats['mfcc']) return feats['mfcc']
def initYaafe(): global df fp = FeaturePlan(sample_rate=44100, resample=True, time_start=20,time_limit=40) # 20s fp.addFeature("energy: Energy")#能量 fp.addFeature("zcr: ZCR")#过零率 #fp.addFeature("loudness: Loudness")#响度 fp.addFeature("sharpness: PerceptualSharpness") #尖锐度 fp.addFeature("lpc: LPC")#线性预测系数 fp.addFeature("lsf: LSF")#线性谱率 fp.addFeature("spectralRolloff: SpectralRolloff") #谱流量 fp.addFeature("spectralFlatness: SpectralFlatness") #谱平坦度 fp.addFeature("mfcc: MFCC CepsNbCoeffs=13") #MFCC fp.addFeature('mfcc_d1: MFCC blockSize=1024 stepSize=512 > Derivate DOrder=1')#MFCC一阶倒数 df = fp.getDataFlow() return 'Yaafe初始化'
def main(audio_file, variables_to_extract): # Delete old files and recriate folder if isdir('output'): shutil.rmtree('output') os.mkdir('output') # Build a DataFlow object using FeaturePlan fp = FeaturePlan(sample_rate = 44100) for variable in variables_to_extract: fp.addFeature(feat_json['variables'][variable]) df = fp.getDataFlow() # configure an Engine engine = Engine() engine.load(df) # Make csv files with audio variables process_audio(audio_file, engine) return
def _mfcc(self, audio_location, sample_rate): # This function behaves the same as 'python yaafe.py -r SAMPLERATE -f \ # "mfcc: MFCC PARAMETERS" WAV-LOCATION' # SAMPLERATE : Samplerate of the file being processed #- CepsIgnoreFirstCoeff (default=1): 0 keeps the first cepstral coeffcient, 1 ignore it #- CepsNbCoeffs (default=13): Number of cepstral coefficient to keep. #- FFTWindow (default=Hanning): Weighting window to apply before fft. Hanning|Hamming|None #- MelMaxFreq (default=6854.0): Maximum frequency of the mel filter bank #- MelMinFreq (default=130.0): Minimum frequency of the mel filter bank #- MelNbFilters (default=40): Number of mel filters #- blockSize (default=1024): output frames size #- stepSize (default=512): step between consecutive frames # Build a dataflow object using FeaturePlan fp = FeaturePlan(sample_rate=sample_rate) # Using *.addFeature() multiple extractions can be called with a # single call fp.addFeature('mfcc: MFCC') #('mfcc: MFCC CepsIgnoreFirstCoeff=0 \ #CepsNbCoeffs=13 FFTWindow=Hanning MelMaxFreq=6854\ #MelMinFreq=130 MelNbFilters=40 blockSize=1024 stepSize=512') # Get dataflow df = fp.getDataFlow() engine = Engine() engine.load(df) # extract features from audio using AudioFileProcessor afp = AudioFileProcessor() afp.processFile(engine, audio_location) # features array holds all the extracted features features = engine.readAllOutputs() # returns the array of features extracted return features
def _SpectralFlux(self, audio_location, sample_rate): # This function behaves the same as 'python yaafe.py -r SAMPLERATE -f \ # "flux: SpectralFlux PARAMETERS" WAV-LOCATION' # SAMPLERATE : Samplerate of the file being processed # - FFTLength (default=0): Frame's length on which perform FFT. Original # frame is padded with zeros or truncated to reach this size. If 0 then # use original frame length. # - FFTWindow (default=Hanning): Weighting window to apply before fft. Hanning|Hamming|None # - FluxSupport (default=All): support of flux computation. if 'All' then # use all bins (default), if 'Increase' then use only bins which are increasing # - blockSize (default=1024): output frames size # - stepSize (default=512): step between consecutive frames # Build a dataflow object using FeaturePlan fp = FeaturePlan(sample_rate=sample_rate) # Using *.addFeature() multiple extractions can be called with a # single call fp.addFeature('Flux: SpectralFlux') #('flux: SpectralFlux FFTLength=0 FFTWindow=Hanning FluxSupport=All\ # blockSize=1024 stepSize=512') # Get dataflow df = fp.getDataFlow() # Configure engine engine = Engine() engine.load(df) # extract features from audio using AudioFileProcessor afp = AudioFileProcessor() afp.processFile(engine, audio_location) # features array holds all the extracted features features = engine.readAllOutputs() # returns the array of features extracted return features
def __init__(self, num_bands, fmin, num_octaves, fps, align, log_div, sample_rate=44100, fold=None): self.fps = fps self.num_bands = num_bands self.align = align self.fmin = fmin self.num_octaves = num_octaves self.log_div = log_div self.sample_rate = sample_rate from yaafelib import FeaturePlan, Engine fp = FeaturePlan(sample_rate=sample_rate) cqt_config = " ".join([ 'cqt: CQT', 'CQTAlign={}'.format(align), 'CQTBinsPerOctave={}'.format(num_bands), 'CQTMinFreq={}'.format(fmin), 'CQTNbOctaves={}'.format(num_octaves), 'stepSize={}'.format(sample_rate / fps) ]) fp.addFeature(cqt_config) df = fp.getDataFlow() self.engine = Engine() self.engine.load(df)
def detect(wav_path, ans_path=None): sample_rate = 8000 block_size = 1024 step_size = block_size / 2 n_band = block_size / 2 freq_bound = [i * sample_rate / 2. / n_band for i in range(n_band + 1)] plan = FeaturePlan(sample_rate=sample_rate, resample=True) plan.addFeature('power_spectrum: PowerSpectrum blockSize=%d stepSize=%d' % (block_size, step_size)) dataflow = plan.getDataFlow() afp = AudioFileProcessor() engine = Engine() engine.load(dataflow) afp.processFile(engine, wav_path) spectrogram = engine.readOutput('power_spectrum') seq = [] for spectrum in spectrogram: mean_mag = np.mean(spectrum) if mean_mag <= SILENT_MAG_THRESHOLD: seq.append(SILENT_KEY) continue lower_data = (-1, -1) upper_data = (-1, -1) for target_idx, target_freq in itertools.chain(enumerate(LOWER_FREQS), enumerate(UPPER_FREQS)): idx = bisect.bisect(freq_bound, target_freq) assert idx > 0 freq1 = freq_bound[idx - 1] mag1 = spectrum[idx - 1] freq2 = freq_bound[idx] mag2 = spectrum[idx] w1 = 1. * (freq2 - target_freq) / (freq2 - freq1) w2 = 1. - w1 target_mag = (w1 * mag1 + w2 * mag2) if target_mag > mean_mag * 2: if target_freq < 1000: if target_mag > lower_data[1]: lower_data = (target_idx, target_mag) else: if target_mag > upper_data[1]: upper_data = (target_idx, target_mag) lower_idx = lower_data[0] upper_idx = upper_data[0] if lower_idx == -1 or upper_idx == -1: seq.append(UNKNOWN_KEY) else: seq.append(KEYS[lower_idx * len(LOWER_FREQS) + upper_idx]) ans = purge_seq(seq) if ans_path is not None: with open(ans_path) as fh: for i, line in enumerate(fh): line = line.strip() assert line == ans[i], "%s != %s" % (line, ans[i]) print '[%d] %s' % (i, line) else: for i, line in enumerate(ans): print '[%d] %s' % (i, line)
""" Stand-alone application to demonstrate yaafe's transformations """ import numpy as np import matplotlib.pyplot as plt from yaafelib import FeaturePlan, Engine, AudioFileProcessor FPLAN = FeaturePlan(sample_rate=44100) FPLAN.addFeature("mfcc: MFCC blockSize=512 stepSize=128") FPLAN.addFeature("mfcc_stat: MFCC blockSize=512 stepSize=128 > " "StatisticalIntegrator NbFrames=40 StepNbFrames=8") ENGINE = Engine() ENGINE.load(FPLAN.getDataFlow()) PROCESSOR = AudioFileProcessor() PROCESSOR.processFile(ENGINE, 'track.wav') DATA = ENGINE.readAllOutputs() ENGINE.flush() X_MFCC = DATA['mfcc'] X_MFCC_STAT = DATA['mfcc_stat'] MFCC_DESC = list() for i in range(1, 14): desc = "Average for Band {}".format(i) MFCC_DESC.append(desc) for i in range(1, 14): desc = "Standard Dev. for Band {}".format(i) MFCC_DESC.append(desc) plt.ion() FIG = plt.figure() FIG.set_size_inches(14, 8)