def extract_audio_features(sigdata): '''Extracts a bunch of audio features using YAAFE ''' window = 'Hanning' # using 80 / 40 here produces NaNs in mel spectrum, for some reason block = 120 step = 60 fp = yl.FeaturePlan(sample_rate=SAMPLE_RATE) fp.addFeature('CDOD: ComplexDomainOnsetDetection FFTWindow=%s blockSize=%d stepSize=%d' % (window, block, step)) fp.addFeature('LPC: LPC LPCNbCoeffs=4 blockSize=%d stepSize=%d' % (block, step)) fp.addFeature('MelSpec: MelSpectrum FFTWindow=%s MelMaxFreq=600 MelMinFreq=30 MelNbFilters=40 blockSize=%d stepSize=%d' % (window, block, step)) fp.addFeature('MFCC: MFCC CepsIgnoreFirstCoeff=1 CepsNbCoeffs=12 FFTWindow=%s MelMaxFreq=600 MelMinFreq=30 MelNbFilters=40 blockSize=%d stepSize=%d' % (window, block, step)) fp.addFeature('SF: SpectralFlux FFTWindow=%s FluxSupport=Increase blockSize=%d stepSize=%d' % (window, block, step)) fp.addFeature('SpecStats: SpectralShapeStatistics FFTWindow=%s blockSize=%d stepSize=%d' % (window, block, step)) fp.addFeature('SpecSlope: SpectralSlope FFTWindow=%s blockSize=%d stepSize=%d' % (window, block, step)) fp.addFeature('SpecVar: SpectralVariation FFTWindow=%s blockSize=%d stepSize=%d' % (window, block, step)) df = fp.getDataFlow() # df.display() engine = yl.Engine() engine.load(df) feats = [] for cnt in range(sigdata.shape[0]): signal = np.reshape(sigdata[cnt,:],[1,-1]) feats.append(engine.processAudio(signal)) return feats
def createAFP(): engine = yaafelib.Engine() fp = yaafelib.FeaturePlan(sample_rate=16000) fp.addFeature('energy: Energy') fp.addFeature('mfcc: MFCC blockSize=2048 stepSize=1024') df = fp.getDataFlow() engine.load(fp.getDataFlow()) afp = yaafelib.AudioFileProcessor() return afp, engine
def feature_indices(): fp = yaafe.FeaturePlan() fp.loadFeaturePlan('features.txt') df = fp.getDataFlow() engine = yaafe.Engine() engine.load(fp.getDataFlow()) indices = sorted([(name, feat['size']) for (name, feat) in engine.getOutputs().items()]) return sum([[(name, i) for i in range(size)] for (name, size) in indices], [])
def __call__(self, path): """Extract features Parameters ---------- path : path to .wav file Returns ------- data : numpy array """ # --- load audio file sample_rate, y = wav.read(path) # --- update data_flow every time sample rate changes if not hasattr(self, 'sample_rate_') or self.sample_rate_ != sample_rate: self.sample_rate_ = sample_rate feature_plan = yaafelib.FeaturePlan(sample_rate=self.sample_rate_) for name, recipe in self.definition(): assert feature_plan.addFeature("{name}: {recipe}".format( name=name, recipe=recipe)) data_flow = feature_plan.getDataFlow() self.engine_.load(data_flow) # Yaafe needs this: float64, column-contiguous, 2-dimensional y = np.array(y, dtype=np.float64, order='C').reshape((1, -1)) # --- extract features features = self.engine_.processAudio(y) data = np.hstack([features[name] for name, _ in self.definition()]) # --- stack features n_samples, n_features = data.shape zero_padding = self.stack // 2 if self.stack % 2 == 0: expanded_data = np.concatenate( (np.zeros((zero_padding, n_features)) + data[0], data, np.zeros((zero_padding - 1, n_features)) + data[-1])) else: expanded_data = np.concatenate( (np.zeros((zero_padding, n_features)) + data[0], data, np.zeros((zero_padding, n_features)) + data[-1])) data = np.lib.stride_tricks.as_strided(expanded_data, shape=(n_samples, n_features * self.stack), strides=data.strides) self.engine_.reset() return data
def __init__(self, app_config, rate): self.ExtractedFeaturesList = ['LPC1_mean', 'LSF7_min', 'SpectralFlatness_min', 'SSS_centroid_min', 'SSS_spread_min', 'PerceptualSpread_min', 'SpectralSlope_min', 'PerceptualSharpness_min', 'SpectralDecrease_max', 'OBSI0_mm', 'SpectralRolloff_min'] self._rate = rate feature_plan = yaafelib.FeaturePlan(sample_rate=rate) feature_plan_path = os.path.join(app_config.program_directory, 'features.config') success = feature_plan.loadFeaturePlan(feature_plan_path) if not success: sys.exit('Feature plan not loaded correctly') self._engine = yaafelib.Engine() self._engine.load(feature_plan.getDataFlow())
def yaafe2features(wavefiles, out_file, feature_type='MFCC'): """Generate features with yaafe and put them in h5features format. Whole wavefiles are encoded as internal h5features files. To use them with abkhazia's ABX tasks, these need to be segmented according to an abkhazia segments.txt (abkhazia/utilities/segment_features.py can be used for this) Supported feature types: - 'MFCC' (default) - 'CMSP13' (cubic-root-compressed 13-frequency-channels Mel spectrogram) """ assert feature_type in ['MFCC', 'CMSP13'], \ 'Unsupported feature_type {0}'.format(feature_type) feature_plan = ya.FeaturePlan(sample_rate=16000) if feature_type == 'MFCC': feat_name = 'mfcc' feature_plan.addFeature('{0}: MFCC blockSize=400 stepSize=160'.format( feat_name)) # 0.025s + 0.01s elif feature_type == 'CMSP13': feat_name = 'melsp' feature_plan.addFeature( '{0}: MelSpectrum MelNbFilters=13 blockSize=400 stepSize=160'. format(feat_name)) # 0.025s + 0.01s engine = ya.Engine() engine.load(feature_plan.getDataFlow()) wav_ids = [] times = [] features = [] for wavefile in wavefiles: wav_ids.append(p.splitext(p.basename(wavefile))[0]) afp = ya.AudioFileProcessor() afp.processFile(engine, wavefile) feat_out = engine.readAllOutputs()[feat_name] if feature_type == 'CMSP13': # need to add compression by hand feat_out = np.power(feat_out, 1 / 3.) # times according to: # http://yaafe.sourceforge.net/features.html?highlight=mfcc#yaafefeatures.Frames nframes = feat_out.shape[0] # 0.01 here is ad hoc and dependent on 160 above times.append(0.01 * np.arange(nframes)) features.append(feat_out) h5features.write(out_file, 'features', wav_ids, times, features)
def _wave2features(self, wavearray): engine = yaafelib.Engine() featureplan = yaafelib.FeaturePlan(sample_rate=self.samplerate) for name, recipe in self.definition: assert featureplan.addFeature("{name}: {recipe}".format(name=name, recipe=recipe)); dataflow = featureplan.getDataFlow() engine.load(dataflow) wavearray = np.array(wavearray, dtype=np.float64, order='C').reshape((1, -1)) features = engine.processAudio(wavearray) engine.reset() return features
def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): super(Yaafe, self).setup(channels, samplerate, blocksize, totalframes) # Setup Yaafe Feature plan and Dataflow yaafe_feature_plan = yaafelib.FeaturePlan(sample_rate=samplerate) for feat in self.feature_plan: yaafe_feature_plan.addFeature(feat) self.data_flow = yaafe_feature_plan.getDataFlow() # Configure a YAAFE engine self.yaafe_engine = yaafelib.Engine() self.yaafe_engine.load(self.data_flow) self.yaafe_engine.reset()
def features(self, feature_set): if len(self.frames) == 0: return None fp = yaafe.FeaturePlan() if feature_set in ['auto', 'all']: fp.loadFeaturePlan('features.txt') else: fp.loadFeaturePlan('features_reduced.txt') df = fp.getDataFlow() engine = yaafe.Engine() engine.load(fp.getDataFlow()) feats = engine.processAudio(np.array([self.frames])) attributes = [mean(values) for (k, values) in sorted(feats.items())] return np.concatenate(attributes)
def __init__(self, fs: int, config: dict): yaafe_config = {} for feature_name, feature_params in config.items(): if feature_params['use']: specs = feature_name + ' ' + str( feature_params['params']).replace("'", '').replace( ",", "").replace(": ", "=")[1:-1] yaafe_config[feature_name] = specs if yaafe_config: feature_plan = yaafelib.FeaturePlan(sample_rate=fs, normalize=True) for feature_name, setting in yaafe_config.items(): feature_plan.addFeature(feature_name + ': ' + setting) data_flow = feature_plan.getDataFlow() self.engine = yaafelib.Engine() self.engine.load(data_flow) else: self.engine = None
def __init__(self, fs: int, block_size=1024, step_size=None, selected_features='all'): if not step_size: step_size = block_size // 2 features_config = { 'Chroma': f'Chroma2 CQTAlign=c CQTBinsPerOctave=48 CQTMinFreq=27.5 CQTNbOctaves=7 CZBinsPerSemitone=1 CZNbCQTBinsAggregatedToPCPBin=-1 CZTuning=440 stepSize={step_size}', 'LPC': f'LPC LPCNbCoeffs=1 blockSize={block_size} stepSize={step_size}', 'LSF': f'LSF blockSize={block_size} stepSize={step_size}', 'MFCC': f'MFCC CepsIgnoreFirstCoeff=1 CepsNbCoeffs=13 FFTWindow=Hanning MelMaxFreq=6000.0 MelMinFreq=400.0 MelNbFilters=40 blockSize={block_size} stepSize={step_size}', 'OBSI': f'OBSI FFTLength=0 FFTWindow=Hanning OBSIMinFreq=27.5 blockSize={block_size} stepSize={step_size}', 'SpectralCrestFactorPerBand': f'SpectralCrestFactorPerBand FFTLength=0 FFTWindow=Hanning blockSize={block_size} stepSize={step_size}', 'SpectralDecrease': f'SpectralDecrease FFTLength=0 FFTWindow=Hanning blockSize={block_size} stepSize={step_size}', 'SpectralFlatness': f'SpectralFlatness FFTLength=0 FFTWindow=Hanning blockSize={block_size} stepSize={step_size}', 'SpectralFlux': f'SpectralFlux FFTLength=0 FFTWindow=Hanning FluxSupport=All blockSize={block_size} stepSize={step_size}', 'SpectralRolloff': f'SpectralRolloff FFTLength=0 FFTWindow=Hanning blockSize={block_size} stepSize={step_size}', 'SpectralVariation': f'SpectralVariation FFTLength=0 FFTWindow=Hanning blockSize={block_size} stepSize={step_size}', 'ZCR': f'ZCR blockSize={block_size} stepSize={step_size}' } self.fs = fs if selected_features == 'all': selected_features = features_config.keys() feature_plan = yaafelib.FeaturePlan(sample_rate=fs, normalize=True) for feature_name, setting in features_config.items(): if feature_name in selected_features: feature_plan.addFeature(feature_name + ': ' + setting) data_flow = feature_plan.getDataFlow() self.engine = yaafelib.Engine() self.engine.load(data_flow)
def calculate_spectrogram(y, fs, block_size=1024, step_size=None): if step_size is None: step_size = block_size // 2 feature_plan = yaafelib.FeaturePlan(sample_rate=fs, normalize=True) feature_plan.addFeature( f'MagnitudeSpectrum: MagnitudeSpectrum blockSize={block_size} stepSize={step_size}' ) data_flow = feature_plan.getDataFlow() engine = yaafelib.Engine() engine.load(data_flow) features = engine.processAudio(y.reshape(1, -1).astype('float64')) noverlap = block_size // 2 spectrum = features['MagnitudeSpectrum'] time = np.linspace(noverlap / fs, (len(y) - noverlap) / fs, spectrum.shape[0]) freq = np.linspace(0, fs // 2, num=spectrum.shape[-1]) return freq, time, spectrum
def __call__(self, wav): """Extract features Parameters ---------- wav : string Path to wav file. Returns ------- features : SlidingWindowFeature """ definition = self.definition() # --- prepare the feature plan feature_plan = yaafelib.FeaturePlan(sample_rate=self.sample_rate) for name, recipe in definition: assert feature_plan.addFeature( "{name}: {recipe}".format(name=name, recipe=recipe)) # --- prepare the Yaafe engine data_flow = feature_plan.getDataFlow() engine = yaafelib.Engine() engine.load(data_flow) sample_rate, raw_audio = scipy.io.wavfile.read(wav) assert sample_rate == self.sample_rate, "sample rate mismatch" audio = np.array(raw_audio, dtype=np.float64, order='C').reshape(1, -1) features = engine.processAudio(audio) data = np.hstack([features[name] for name, _ in definition]) sliding_window = YaafeFrame( blockSize=self.block_size, stepSize=self.step_size, sampleRate=self.sample_rate) return SlidingWindowFeature(data, sliding_window)
def get_flow_and_stack(self): feature_plan = yaafelib.FeaturePlan(sample_rate=self.sample_rate) stack = [] # --- coefficients # 0 if energy is kept # 1 if energy is removed definition = ("mfcc: " "MFCC CepsIgnoreFirstCoeff=%d CepsNbCoeffs=%d " "blockSize=%d stepSize=%d" % (0 if self.e else 1, self.coefs + self.e * 1, self.block_size, self.step_size)) assert feature_plan.addFeature(definition) stack.append('mfcc') # --- 1st order derivatives if self.D or self.De: definition = ("mfcc_d: " "MFCC CepsIgnoreFirstCoeff=%d CepsNbCoeffs=%d " "blockSize=%d stepSize=%d > Derivate DOrder=1" % (0 if self.De else 1, self.D * self.coefs + self.De * 1, self.block_size, self.step_size)) assert feature_plan.addFeature(definition) stack.append('mfcc_d') # --- 2nd order derivatives if self.DD or self.DDe: definition = ("mfcc_dd: " "MFCC CepsIgnoreFirstCoeff=%d CepsNbCoeffs=%d " "blockSize=%d stepSize=%d > Derivate DOrder=2" % (0 if self.DDe else 1, self.DD * self.coefs + self.DDe * 1, self.block_size, self.step_size)) assert feature_plan.addFeature(definition) stack.append('mfcc_dd') # --- prepare the Yaafe engine data_flow = feature_plan.getDataFlow() return data_flow, stack
def __init__(self, sad_model='etape'): """ Parameters: ---------- sad_model : string bellowing to 'etape' 'maya' alllows the selection of a SAD model: 'etape' is more suited to radionews material 'maya' is more suited to speech obtained in noisy environments """ super(LimsiSad, self).__init__() # feature extraction defition spec = yaafelib.FeaturePlan(sample_rate=16000) spec.addFeature( 'mfcc: MFCC CepsIgnoreFirstCoeff=0 blockSize=1024 stepSize=256') spec.addFeature( 'mfccd1: MFCC CepsIgnoreFirstCoeff=0 blockSize=1024 stepSize=256 > Derivate DOrder=1' ) spec.addFeature( 'mfccd2: MFCC CepsIgnoreFirstCoeff=0 blockSize=1024 stepSize=256 > Derivate DOrder=2' ) spec.addFeature('zcr: ZCR blockSize=1024 stepSize=256') parent_analyzer = Yaafe(spec) self.parents.append(parent_analyzer) # informative parameters # these are not really taken into account by the system # these are bypassed by yaafe feature plan self.input_blocksize = 1024 self.input_stepsize = 256 # load gmm model if sad_model not in ['etape', 'maya']: raise ValueError( "argument sad_model %s not supported. Supported values are 'etape' or 'maya'" % sad_model) picfname = os.path.join(timeside.__path__[0], 'trained_models', 'limsi_sad_%s.pkl' % sad_model) self.gmms = pickle.load(open(picfname, 'rb'))
def extract_feature(filename, offset): fp = yaafelib.FeaturePlan(sample_rate=22050, resample=True) fp.loadFeaturePlan('static/featureplan.txt') engine = yaafelib.Engine() engine.load(fp.getDataFlow()) print(filename) print offset sound = AudioSegment.from_file(filename) halfway_point = int(offset) * 1000 end = halfway_point + 30000 first_half = sound[halfway_point:end] filename = os.path.join( app.config['UPLOAD_FOLDER'], os.path.splitext(os.path.basename(filename))[0] + str(offset) + ".cliped.wav") if not os.path.isfile(filename): first_half.export(filename, format="wav") afp = yaafelib.AudioFileProcessor() afp.processFile(engine, filename) feats = engine.readAllOutputs() return preprocessed(feats)
def __init__( self, sample_rate=16000, block_size=512, step_size=256, e=True, coefs=11, De=False, DDe=False, D=False, DD=False, ): super(YaafeMFCC, self).__init__(sample_rate=sample_rate, block_size=block_size, step_size=step_size) self.e = e self.coefs = coefs self.De = De self.DDe = DDe self.D = D self.DD = DD self.definition_ = self.definition() # --- prepare the feature plan feature_plan = yaafelib.FeaturePlan(sample_rate=self.sample_rate) for name, recipe in self.definition_: assert feature_plan.addFeature("{name}: {recipe}".format( name=name, recipe=recipe)) # --- prepare the Yaafe engine data_flow = feature_plan.getDataFlow() self.engine = yaafelib.Engine() self.engine.load(data_flow)
def write_features(audiofile, sample_rate, data): """Extract features then write means and std devs to data (tab) file. Returns True if extraction was successful, False if unsuccessful. Arguments: audioFile -- WAV file to process sampleRate -- sample rate of the audio file in Hz data -- the data file to write to """ N_MFCC = 12 # Number of MFCC coefficients N_LLD = 2 # Number of other low-level descriptors N_FUNCS = 4 # Number of functionals # Add features to extract featplan = yf.FeaturePlan(sample_rate=sample_rate, resample=False) featplan.addFeature('mfcc: MFCC CepsIgnoreFirstCoeff=0 CepsNbCoeffs=12 \ FFTWindow=Hanning MelMinFreq=1200 MelMaxFreq=5050') featplan.addFeature('energy: Energy') featplan.addFeature('zcr: ZCR') # Configure an Engine engine = yf.Engine() engine.load(featplan.getDataFlow()) # Extract features afp = yf.AudioFileProcessor() afp.processFile(engine, audiofile) # 2D numpy arrays mfccs = engine.readOutput('mfcc') energy = engine.readOutput('energy') zcr = engine.readOutput('zcr') # Write header lines if they don't exist data.seek(0, 0) if not data.readline(): # Write attribute header line for i in range(N_MFCC): # MFCC header data.write("mfcc" + str(i + 1) + "_mean" + "\t") data.write("mfcc" + str(i + 1) + "_std" + "\t") data.write("mfcc" + str(i + 1) + "_skew" + "\t") data.write("mfcc" + str(i + 1) + "_kurtosis" + "\t") #Energy header data.write("energy_mean" + "\t") data.write("energy_std" + "\t") data.write("energy_skew" + "\t") data.write("energy_kurtosis" + "\t") # ZCR header data.write("zcr_mean" + "\t") data.write("zcr_std" + "\t") data.write("zcr_skew" + "\t") data.write("zcr_kurtosis" + "\t") # Filename and classification headers data.write("filename" + '\t') data.write("classification" + "\n") # Write attribute type line for i in range(N_MFCC * N_FUNCS + (N_LLD * N_FUNCS)): data.write("continuous" + "\t") # filename is a string data.write("string" + '\t') # Classification is discrete data.write("discrete" + "\n") # Write flags for i in range(N_MFCC * N_FUNCS + (N_LLD * N_FUNCS)): data.write('\t') data.write("meta" + '\t') data.write("class" + '\n') data.seek(0, 2) # Go to end of file. # Write feature data if mfccs.size > 0 and energy.size > 0 and zcr.size > 0: # Write MFCCs for i in range(mfccs[0].size): mfcc_mean = mfccs[:, i].mean() mfcc_std = mfccs[:, i].std() mfcc_skew = spstats.skew(mfccs[:, i]) mfcc_kurt = spstats.kurtosis(mfccs[:, i]) data.write( str(mfcc_mean) + '\t' + str(mfcc_std) + '\t' + str(mfcc_skew) + '\t' + str(mfcc_kurt) + '\t') # Write energy for i in range(energy[0].size): energy_mean = energy[:, i].mean() energy_std = energy[:, i].std() energy_skew = spstats.skew(energy[:, i]) energy_kurt = spstats.kurtosis(energy[:, i]) data.write( str(energy_mean) + '\t' + str(energy_std) + '\t' + str(energy_skew) + '\t' + str(energy_kurt) + '\t') # Write ZCR for i in range(zcr[0].size): zcr_mean = zcr[:, i].mean() zcr_std = zcr[:, i].std() zcr_skew = spstats.skew(zcr[:, i]) zcr_kurt = spstats.kurtosis(energy[:, i]) data.write( str(zcr_mean) + '\t' + str(zcr_std) + '\t' + str(zcr_skew) + '\t' + str(zcr_kurt) + '\t') return True else: return False
return numFiles # Main trainAudio = './IRMAS-Dataset/Training' trainFeats = './trainFeatures.dat' testAudio = './IRMAS-Dataset/Testing' testFeats = './testFeatures.dat' model = './model.svm' # Get the instruments and their class indices instruments = getInstruments(trainAudio) # Specify features fp = yl.FeaturePlan(sample_rate=44100) fp.loadFeaturePlan('featureplan.txt') # Initialize yaafe tools df = fp.getDataFlow() eng = yl.Engine() eng.load(df) dimensions = 0 # The sum of the dimensions of the features ftSizes = eng.getOutputs().items() for ftSize in ftSizes: dimensions += int(ftSize[1]['size']) afp = yl.AudioFileProcessor() # Remove previous model files for k in range(len(instruments)): classFile = model + '.' + str(k)
def process(self, signal, rate, segments, wavelet_decomposition_level=6, frame_overlap=512, wavelet_type='sym10'): """ Extract features """ self.ExtractedFeaturesList = ['LPC1_mean', 'LSF7_min', 'SpectralFlatness_min', 'SSS_centroid_min', 'SSS_spread_min', 'PerceptualSpread_min', 'SpectralSlope_min', 'PerceptualSharpness_min', 'SpectralDecrease_max', 'OBSI0_mm', 'SpectralRolloff_min'] self._signal = signal self._rate = rate self._segments = segments """ Calculate spectral and temporal features """ feature_plan = yaafelib.FeaturePlan(sample_rate=rate) success = feature_plan.loadFeaturePlan('features.config') if not success: sys.exit('Feature plan not loaded correctly') engine = yaafelib.Engine() engine.load(feature_plan.getDataFlow()) self.Features = engine.processAudio(np.array([signal.astype('float64')])) """ Initialize wavelet features Based on "Wavelets in Recognition of Bird Sounds" by A. Selin et al. EURASIP Journal on Advances in Signal Processing 2007, 2007:051806 """ # wavelets_calculator = wavelets.Wavelets(wavelet_type) # wavelet_coefficients = wavelets_calculator.decompose(signal, wavelet_decomposition_level) # no_segments = len(segments) self.ExtractedFeatures = np.zeros(shape=(no_segments, len(self.ExtractedFeaturesList))) LPC1 = self.Features['LPC'][:,1] LSF7 = self.Features['LSF'][:,7] SpectralFlatness = self.Features['SpectralFlatness'] SSS_centroid = self.Features['SpectralShapeStatistics'][:,0] SSS_spread = self.Features['SpectralShapeStatistics'][:,1] PerceptualSpread = self.Features['PerceptualSpread'] SpectralSlope = self.Features['SpectralSlope'] PerceptualSharpness = self.Features['PerceptualSharpness'] SpectralDecrease = self.Features['SpectralDecrease'] OBSI0 = self.Features['OBSI'][:,0] SpectralRolloff = self.Features['SpectralRolloff'] for i, segment in enumerate(segments): start = int(segment[0] / frame_overlap) - 1 end = int(segment[1] / frame_overlap) + 1 self.ExtractedFeatures[i,0] = LPC1[start:end].mean() self.ExtractedFeatures[i,1] = LSF7[start:end].min() self.ExtractedFeatures[i,2] = SpectralFlatness[start:end].min() self.ExtractedFeatures[i,3] = SSS_centroid[start:end].min() self.ExtractedFeatures[i,4] = SSS_spread[start:end].min() self.ExtractedFeatures[i,5] = PerceptualSpread[start:end].min() self.ExtractedFeatures[i,6] = SpectralSlope[start:end].min() self.ExtractedFeatures[i,7] = PerceptualSharpness[start:end].min() self.ExtractedFeatures[i,8] = SpectralDecrease[start:end].max() self.ExtractedFeatures[i,9] = maxmin(OBSI0[start:end]) self.ExtractedFeatures[i,10] = SpectralRolloff[start:end].min() return self.ExtractedFeatures
import numpy from os import walk import yaafelib as yaafe from sklearn import neighbors from sklearn.svm import SVC from sklearn.svm import LinearSVC from sklearn.svm import NuSVC from sklearn import linear_model from sklearn.linear_model import SGDClassifier from sklearn import tree from sklearn.neighbors.nearest_centroid import NearestCentroid yaafe.loadComponentLibrary('yaafe-io') fp = yaafe.FeaturePlan(sample_rate=8000) fp.loadFeaturePlan('./featureplan') engine = yaafe.Engine() engine.load(fp.getDataFlow()) afp = yaafe.AudioFileProcessor() afp.setOutputFormat('csv', './outputs', { 'Metadata': 'false', 'Precision': '2' }) emotions = ['angry', 'happy', 'neutral', 'unhappy'] feats = ['eng', 'lpc', 'lsf', 'ldd', 'mfc'] def getProperties(audiofile): props = "" for feat in feats:
def compute_features(dataStruct): """ This function takes a data structure dictionnaire, and renders several audio features as spectral rolloff, spectral slope etc... and store the data into the datastructure. Args: - dataStruct: dictionnaire containing filepath, labels, and list of classes Returns: - dataSet: same as dataStruct, with the given spectral features """ ### --- INIT --- ### # DSP settings Nwin_bin = 1024 Hop_bin = round(Nwin_bin) # Const Nex = len(dataStruct["filepath"]) # Number of files # Listing audio features features_yaafe = [ 'SpectralFlatness', 'SpectralRolloff', 'PerceptualSharpness', 'PerceptualSpread', 'SpectralDecrease', 'SpectralVariation', 'SpectralFlux' ] features_libro = [ 'Loudness', 'SpectralCentroid', 'SpectralContrast', 'SpectralRolloff', 'SpectralBandwidth' ] dataStruct["SpectralFeatures"] = features_yaafe + features_libro # New fields dataStruct["signal"] = [] dataStruct["sRate"] = [] # Creating three fields per descriptor: full temporal vector, mean, and # standard deviation for f in dataStruct["SpectralFeatures"]: dataStruct[f] = [] dataStruct[f + 'Mean'] = [] dataStruct[f + 'Std'] = [] dataStruct[f + 'Max'] = [] ### --- Compute Feature --- ### print('\t \t \t Feature Extraction') # Computing the set of features for curFile in range(Nex): print('%s' % dataStruct["filepath"][curFile]) # Loading signal curSignal, curSRate = lib.load(dataStruct["filepath"][curFile], mono=True, offset=0) # Storing signal data dataStruct["signal"].append(curSignal) dataStruct["sRate"].append(curSRate) """ YAAFE Extraction """ # Create YAAFE extraction engine fp = yaf.FeaturePlan(sample_rate=curSRate) # Formatting string for DSP for f in features_yaafe: fp.addFeature(f+': '+f+' blockSize='+str(Nwin_bin)+\ ' stepSize='+str(Hop_bin)) engine = yaf.Engine() engine.load(fp.getDataFlow()) features = engine.processAudio(curSignal.astype('float64')\ .reshape((1, curSignal.shape[0]))) # Computing mean and std for each for key, val in sorted(features.items()): dataStruct[key].append(val) dataStruct[key + 'Mean'].append(np.mean(val)) dataStruct[key + 'Std'].append(np.std(val)) dataStruct[key + 'Max'].append(np.max(val)) """ Librosa extraction """ # Add the specific features from Librosa dataStruct["Loudness"].append(lib.feature.rmse(curSignal)) # Compute the spectral centroid. [y, sr, S, n_fft, ...] dataStruct["SpectralCentroid"].append( lib.feature.spectral_centroid(curSignal)) # Compute spectral contrast [R16] , sr, S, n_fft, ...]) dataStruct["SpectralContrast"].append( lib.feature.spectral_contrast(curSignal)) # Compute roll-off frequency dataStruct["SpectralRolloff"].append( lib.feature.spectral_rolloff(curSignal)) # Compute Bandwidth dataStruct["SpectralBandwidth"].append( lib.feature.spectral_bandwidth(curSignal)) # Computing mean and std for each for f in features_libro: val = dataStruct[f][-1] dataStruct[f + 'Mean'].append(np.mean(val)) dataStruct[f + 'Std'].append(np.std(val)) dataStruct[f + 'Max'].append(np.max(val)) ### --- Formatting --- ### return dataStruct
def main(): parser = OptionParser(version='yaafe.py, Yaafe v%s' % yaafe.getYaafeVersion()) parser.add_option('-v', '--verbose', dest='verbose', action='store_true', default=False, help='display more output') parser.add_option('-l', '--list', dest='listFeatures', action='store_true', default=False, help='list all available features and output formats') parser.add_option('-d', '--describe', dest='describe', default=None, help='describe a feature or an output format') parser.add_option('-f', '--feature', action='append', dest='feature', metavar='FEATUREDEFINITION', help='feature to extract') parser.add_option('-c', '--config-file', dest='configFile', default=None, help='feature extraction plan') parser.add_option('-r', '--rate', dest='sample_rate', type='int', default=None, help='working samplerate in Hz.') parser.add_option('', '--resample', dest='resample', action='store_true', default=False, help='Resample input signal to the analysis sample rate') parser.add_option('-n', '--normalize', dest='normalize', action='store_true', default=False, help='normalize input signal by removing mean and scale maximum absolute value to 0.98 (or other value given with --normalize-max)') parser.add_option('', '--normalize-max', dest='normalize_max', type='float', default=0.98, help='Normalize input signal so that maximum absolute value reached given value (see -n, --normalize)') parser.add_option('-i', '--input', dest='input_list', default=None, help='text file, each line is an audio file to process') parser.add_option('-b', '--base-dir', dest='out_dir', default='', help='output directory base') parser.add_option('-o', '--output-format', dest='format', default='csv', choices=output_format_choices, help='Features output format: %s' % '|'.join(output_format_choices)) parser.add_option('-p', '--output-params', dest='formatparams', action='append', default=[], metavar='key=value', help='add an output format parameter (can be used multiple times, use -l options to list output formats and parameters)') parser.add_option('', '--dump-dataflow', dest='dumpDataflow', default='', metavar='FILE', help='output dataflow plan (suitable for process with yaafe-engine)') parser.add_option('', '--dump-graph', dest='dumpGraph', default='', metavar='FILE', help="output dataflow in dot format (suitable for display with graphviz") parser.add_option('-s', '--data-block-size', dest='buffer_size', type='int', default=None, help='Prefered size for data blocks.') parser.add_option('', '--show', dest='showFeatures', default=None, help='Show all features in a H5 file') (options, audiofiles) = parser.parse_args() if options.listFeatures: listFeatures() return if options.describe: if options.describe in yaafe.getOutputFormatList(): describeOutputFormat(options.describe) else: describeFeature(options.describe) return if options.showFeatures: showFeatures(options.showFeatures) return if not options.sample_rate: print "ERROR: please specify sample rate !" return if options.buffer_size: yaafe.setPreferedDataBlockSize(options.buffer_size) if options.verbose: yaafe.setVerbose(True) # initialize feature plan fp = yaafe.FeaturePlan(sample_rate=options.sample_rate, normalize=(options.normalize_max if options.normalize else None), resample=options.resample) if options.configFile: if not fp.loadFeaturePlan(options.configFile): return elif options.feature: for feat in options.feature: if not fp.addFeature(feat): return else: print "ERROR: please specify features using either a config file or -f [feature]" return if options.dumpDataflow: fp.getDataFlow().save(options.dumpDataflow) if options.dumpGraph: fp.getDataFlow().save(options.dumpGraph) # read audio file list if options.input_list: fin = open(options.input_list, 'r') for line in fin: audiofiles.append(line.strip()) fin.close() if audiofiles: # initialize engine engine = yaafe.Engine() if not engine.load(fp.getDataFlow()): return # initialize file processor afp = yaafe.AudioFileProcessor() oparams = dict() for pstr in options.formatparams: pstrdata = pstr.split('=') if len(pstrdata) != 2: print 'ERROR: invalid parameter syntax in "%s" (should be "key=value")' % pstr return oparams[pstrdata[0]] = pstrdata[1] afp.setOutputFormat(options.format, options.out_dir, oparams) # process audio files for audiofile in audiofiles: afp.processFile(engine, audiofile)
def __call__(self, item): """Extract features Parameters ---------- item : dict Returns ------- features : SlidingWindowFeature """ # --- load audio file y, sample_rate = read_audio(item, sample_rate=self.sample_rate, mono=True) # --- update data_flow every time sample rate changes if not hasattr(self, 'sample_rate_') or self.sample_rate_ != sample_rate: self.sample_rate_ = sample_rate feature_plan = yaafelib.FeaturePlan(sample_rate=self.sample_rate_) for name, recipe in self.definition(): assert feature_plan.addFeature("{name}: {recipe}".format( name=name, recipe=recipe)) data_flow = feature_plan.getDataFlow() self.engine_.load(data_flow) # Yaafe needs this: float64, column-contiguous, 2-dimensional y = np.array(y, dtype=np.float64, order='C').reshape((1, -1)) # --- extract features features = self.engine_.processAudio(y) data = np.hstack([features[name] for name, _ in self.definition()]) # --- stack features n_samples, n_features = data.shape zero_padding = self.stack // 2 if self.stack % 2 == 0: expanded_data = np.concatenate( (np.zeros((zero_padding, n_features)) + data[0], data, np.zeros((zero_padding - 1, n_features)) + data[-1])) else: expanded_data = np.concatenate( (np.zeros((zero_padding, n_features)) + data[0], data, np.zeros((zero_padding, n_features)) + data[-1])) data = np.lib.stride_tricks.as_strided(expanded_data, shape=(n_samples, n_features * self.stack), strides=data.strides) self.engine_.reset() # --- return as SlidingWindowFeature if np.any(np.isnan(data)): uri = get_unique_identifier(item) msg = 'Features extracted from "{uri}" contain NaNs.' warnings.warn(msg.format(uri=uri)) return SlidingWindowFeature(data, self.sliding_window_)
def export_features( path=None, audiofiles=None, out='../audio_features', train_file_path=None, pca_params_path="/data/lisa/exp/faces/emotiw_final/caglar_audio/pca_params.pkl" ): # prepare the FeaturePlan plan = yaafelib.FeaturePlan(sample_rate=48000, normalize=0.99) size_info = 'blockSize=1248 stepSize=624' if pca is None: global pca features = [ 'ZCR', 'TemporalShapeStatistics', 'Energy', 'MagnitudeSpectrum', 'SpectralVariation', 'SpectralSlope', 'SpectralRolloff', 'SpectralShapeStatistics', 'SpectralFlux', 'SpectralFlatness', 'SpectralDecrease', 'SpectralFlatnessPerBand', 'SpectralCrestFactorPerBand', 'AutoCorrelation', 'LPC', 'LSF', 'ComplexDomainOnsetDetection', 'MelSpectrum', 'MFCC: MFCC CepsNbCoeffs=22', 'MFCC_d1: MFCC %s > Derivate DOrder=1', 'MFCC_d2: MFCC %s > Derivate DOrder=2', 'Envelope', 'EnvelopeShapeStatistics', 'AmplitudeModulation', 'Loudness', 'PerceptualSharpness', 'PerceptualSpread', 'OBSI', 'OBSIR' ] for f in features: if ':' not in f: f = '%s: %s' % (f, f) if '%s' not in f: f += ' %s' plan.addFeature(f % size_info) dataflow = plan.getDataFlow() engine = yaafelib.Engine() engine.load(dataflow) processor = yaafelib.AudioFileProcessor() subsets = {'full': 'full'} def train_pca(pca=None): if pca is not None: return pca assert train_file_path is not None print "Training pca..." pca = defaultdict(PCA) audiofiles_ = glob.glob('%s/*/*.mp3' % train_file_path) if not (os.path.exists(pca_params_path)): # extract features from audio files for audiofile in audiofiles_: processor.processFile(engine, audiofile) features = engine.readAllOutputs() for subset, keys in subsets.iteritems(): if keys == 'full': keys = sorted(features.keys()) output = numpy.concatenate([features[k].T for k in keys]).T if 'Train' in audiofile: pca[subset].add(output) pca_params = {} pca_params["mean"] = pca["full"].mean pca_params["covariance"] = pca["full"].covariance pca_params["num_frames"] = pca["full"].num_frames pca_params["ndim"] = pca["full"].ndim cPickle.dump(pca_params, file_create(pca_params_path), cPickle.HIGHEST_PROTOCOL) else: pca_params = cPickle.load(open(pca_params_path, "rb")) pca["full"].covariance = pca_params["covariance"] pca["full"].mean = pca_params["mean"] pca["full"].num_frames = pca_params["num_frames"] pca["full"].ndim = pca_params["ndim"] print "PCA training finished." return pca assert audiofiles is not None pca = train_pca(pca) assert pca is not None for f in features: if ':' not in f: f = '%s: %s' % (f, f) if '%s' not in f: f += ' %s' plan.addFeature(f % size_info) # extract features from audio files for audiofile in audiofiles: audiofile = os.path.join(path, audiofile) processor.processFile(engine, audiofile) features = engine.readAllOutputs() for subset, keys in subsets.iteritems(): if keys == 'full': keys = sorted(features.keys()) output = numpy.concatenate([features[k].T for k in keys]).T pickle_file = audiofile.replace('.mp3', '.%s.pkl' % subset).replace( path, out) cPickle.dump(output, file_create(pickle_file), cPickle.HIGHEST_PROTOCOL) for subset in subsets.iterkeys(): pca[subset].pca(diagonal=True) cPickle.dump(pca[subset], file_create('%s/%s.pca' % (out, subset))) print 'Rewriting PCA data...' sys.stdout.flush() for audiofile in audiofiles: for subset in subsets.iterkeys(): pickle_file = os.path.join(out, audiofile).replace( '.mp3', '.%s.pkl' % subset) #pickle_file = audiofile.replace('.mp3', '.%s.pkl' % subset).replace(path, out) matrix = cPickle.load(file(pickle_file)) matrix = pca[subset].feature(matrix) cPickle.dump(matrix, file_create(pickle_file.replace('.pkl', '.pca.pkl')), cPickle.HIGHEST_PROTOCOL)
def transform(self, X): X_prime = None params = {'block_size': 256, 'step_size': 128, 'mel_min_freq': 0.0, 'mel_max_freq': 500.0, 'mel_nb_filters': 50, 'ceps_ign_first_coef': 0, 'fft_len': 0, 'do1len': 5, 'do2len': 1, 'slope_step_nbframes': 5, 'slope_nbframes': 9, } fp = yf.FeaturePlan(sample_rate=self.sample_rate) fp.addFeature('melspec: MelSpectrum FFTWindow=Hanning MelNbFilters=%(mel_nb_filters)d' ' blockSize=%(block_size)d stepSize=%(step_size)d' ' MelMinFreq=%(mel_min_freq)f MelMaxFreq=%(mel_max_freq)f' % params) if self.diff: fp.addFeature('melspec_diff1: MelSpectrum FFTWindow=Hanning MelNbFilters=%(mel_nb_filters)d' ' blockSize=%(block_size)d stepSize=%(step_size)d' ' MelMinFreq=%(mel_min_freq)f MelMaxFreq=%(mel_max_freq)f' ' > Derivate DOrder=1 DO1Len=%(do1len)d' % params) if self.diff2: fp.addFeature('melspec_diff2: MelSpectrum FFTWindow=Hanning MelNbFilters=%(mel_nb_filters)d' ' blockSize=%(block_size)d stepSize=%(step_size)d' ' MelMinFreq=%(mel_min_freq)f MelMaxFreq=%(mel_max_freq)f' ' > Derivate DOrder=2 DO2Len=%(do2len)d' % params) if self.slope: fp.addFeature('melspec_slope: MelSpectrum FFTWindow=Hanning MelNbFilters=%(mel_nb_filters)d' ' blockSize=%(block_size)d stepSize=%(step_size)d' ' MelMinFreq=%(mel_min_freq)f MelMaxFreq=%(mel_max_freq)f' ' > SlopeIntegrator NbFrames=%(slope_nbframes)d StepNbFrames=%(slope_step_nbframes)d' % params) df = fp.getDataFlow() engine = yf.Engine() engine.load(df) X = X.astype(np.float64) x_shape = (1, X.shape[1]) for i, x in enumerate(X): x = x.reshape(x_shape) feats = engine.processAudio(x) ## if i == 0: ## import IPython ## IPython.embed() if X_prime is None: fx_groups = tuple(feats.keys()) n_features = 0 for fx_group in fx_groups: n_features += feats[fx_group].ravel().shape[0] X_prime = np.empty((X.shape[0], n_features), dtype=np.float64) print 'n_groups:', len(fx_groups) print 'n_features:', n_features offset = 0 for fx_group in fx_groups: fxs = feats[fx_group].ravel() if fx_group == 'melspec': # log melspec features fxs = np.log10(fxs) X_prime[i, offset:(offset + fxs.shape[0])] = fxs offset += fxs.shape[0] return X_prime
nargs='+', help='tar archive which contains all the wav files') parser.add_argument('output', help='output file') parser.add_argument('--derivatives', action='store_true') args = parser.parse_args() parameters = dict( step_size=160, # corresponds to 10 ms (at 16 kHz) block_size=640, # corresponds to 40 ms mfcc_coeffs=40, mfcc_filters= 41 # more filters? (needs to be at least mfcc_coeffs+1, because first coeff is ignored) ) fp = yaafelib.FeaturePlan(sample_rate=16000) mfcc_features = 'MFCC MelNbFilters={mfcc_filters} CepsNbCoeffs={mfcc_coeffs} ' \ 'blockSize={block_size} stepSize={step_size}'.format(**parameters) energy_features = 'Energy blockSize={block_size} stepSize={step_size}'.format( **parameters) fp.addFeature('mfcc: {}'.format(mfcc_features)) if args.derivatives: fp.addFeature('mfcc_d1: {} > Derivate DOrder=1'.format(mfcc_features)) fp.addFeature('mfcc_d2: {} > Derivate DOrder=2'.format(mfcc_features)) fp.addFeature('energy: {}'.format(energy_features)) if args.derivatives: fp.addFeature('energy_d1: {} > Derivate DOrder=1'.format(energy_features)) fp.addFeature('energy_d2: {} > Derivate DOrder=2'.format(energy_features))