def createAFP(): engine = yaafelib.Engine() fp = yaafelib.FeaturePlan(sample_rate=16000) fp.addFeature('energy: Energy') fp.addFeature('mfcc: MFCC blockSize=2048 stepSize=1024') df = fp.getDataFlow() engine.load(fp.getDataFlow()) afp = yaafelib.AudioFileProcessor() return afp, engine
def yaafe2features(wavefiles, out_file, feature_type='MFCC'): """Generate features with yaafe and put them in h5features format. Whole wavefiles are encoded as internal h5features files. To use them with abkhazia's ABX tasks, these need to be segmented according to an abkhazia segments.txt (abkhazia/utilities/segment_features.py can be used for this) Supported feature types: - 'MFCC' (default) - 'CMSP13' (cubic-root-compressed 13-frequency-channels Mel spectrogram) """ assert feature_type in ['MFCC', 'CMSP13'], \ 'Unsupported feature_type {0}'.format(feature_type) feature_plan = ya.FeaturePlan(sample_rate=16000) if feature_type == 'MFCC': feat_name = 'mfcc' feature_plan.addFeature('{0}: MFCC blockSize=400 stepSize=160'.format( feat_name)) # 0.025s + 0.01s elif feature_type == 'CMSP13': feat_name = 'melsp' feature_plan.addFeature( '{0}: MelSpectrum MelNbFilters=13 blockSize=400 stepSize=160'. format(feat_name)) # 0.025s + 0.01s engine = ya.Engine() engine.load(feature_plan.getDataFlow()) wav_ids = [] times = [] features = [] for wavefile in wavefiles: wav_ids.append(p.splitext(p.basename(wavefile))[0]) afp = ya.AudioFileProcessor() afp.processFile(engine, wavefile) feat_out = engine.readAllOutputs()[feat_name] if feature_type == 'CMSP13': # need to add compression by hand feat_out = np.power(feat_out, 1 / 3.) # times according to: # http://yaafe.sourceforge.net/features.html?highlight=mfcc#yaafefeatures.Frames nframes = feat_out.shape[0] # 0.01 here is ad hoc and dependent on 160 above times.append(0.01 * np.arange(nframes)) features.append(feat_out) h5features.write(out_file, 'features', wav_ids, times, features)
def extract_feature(filename, offset): fp = yaafelib.FeaturePlan(sample_rate=22050, resample=True) fp.loadFeaturePlan('static/featureplan.txt') engine = yaafelib.Engine() engine.load(fp.getDataFlow()) print(filename) print offset sound = AudioSegment.from_file(filename) halfway_point = int(offset) * 1000 end = halfway_point + 30000 first_half = sound[halfway_point:end] filename = os.path.join( app.config['UPLOAD_FOLDER'], os.path.splitext(os.path.basename(filename))[0] + str(offset) + ".cliped.wav") if not os.path.isfile(filename): first_half.export(filename, format="wav") afp = yaafelib.AudioFileProcessor() afp.processFile(engine, filename) feats = engine.readAllOutputs() return preprocessed(feats)
def write_features(audiofile, sample_rate, data): """Extract features then write means and std devs to data (tab) file. Returns True if extraction was successful, False if unsuccessful. Arguments: audioFile -- WAV file to process sampleRate -- sample rate of the audio file in Hz data -- the data file to write to """ N_MFCC = 12 # Number of MFCC coefficients N_LLD = 2 # Number of other low-level descriptors N_FUNCS = 4 # Number of functionals # Add features to extract featplan = yf.FeaturePlan(sample_rate=sample_rate, resample=False) featplan.addFeature('mfcc: MFCC CepsIgnoreFirstCoeff=0 CepsNbCoeffs=12 \ FFTWindow=Hanning MelMinFreq=1200 MelMaxFreq=5050') featplan.addFeature('energy: Energy') featplan.addFeature('zcr: ZCR') # Configure an Engine engine = yf.Engine() engine.load(featplan.getDataFlow()) # Extract features afp = yf.AudioFileProcessor() afp.processFile(engine, audiofile) # 2D numpy arrays mfccs = engine.readOutput('mfcc') energy = engine.readOutput('energy') zcr = engine.readOutput('zcr') # Write header lines if they don't exist data.seek(0, 0) if not data.readline(): # Write attribute header line for i in range(N_MFCC): # MFCC header data.write("mfcc" + str(i + 1) + "_mean" + "\t") data.write("mfcc" + str(i + 1) + "_std" + "\t") data.write("mfcc" + str(i + 1) + "_skew" + "\t") data.write("mfcc" + str(i + 1) + "_kurtosis" + "\t") #Energy header data.write("energy_mean" + "\t") data.write("energy_std" + "\t") data.write("energy_skew" + "\t") data.write("energy_kurtosis" + "\t") # ZCR header data.write("zcr_mean" + "\t") data.write("zcr_std" + "\t") data.write("zcr_skew" + "\t") data.write("zcr_kurtosis" + "\t") # Filename and classification headers data.write("filename" + '\t') data.write("classification" + "\n") # Write attribute type line for i in range(N_MFCC * N_FUNCS + (N_LLD * N_FUNCS)): data.write("continuous" + "\t") # filename is a string data.write("string" + '\t') # Classification is discrete data.write("discrete" + "\n") # Write flags for i in range(N_MFCC * N_FUNCS + (N_LLD * N_FUNCS)): data.write('\t') data.write("meta" + '\t') data.write("class" + '\n') data.seek(0, 2) # Go to end of file. # Write feature data if mfccs.size > 0 and energy.size > 0 and zcr.size > 0: # Write MFCCs for i in range(mfccs[0].size): mfcc_mean = mfccs[:, i].mean() mfcc_std = mfccs[:, i].std() mfcc_skew = spstats.skew(mfccs[:, i]) mfcc_kurt = spstats.kurtosis(mfccs[:, i]) data.write( str(mfcc_mean) + '\t' + str(mfcc_std) + '\t' + str(mfcc_skew) + '\t' + str(mfcc_kurt) + '\t') # Write energy for i in range(energy[0].size): energy_mean = energy[:, i].mean() energy_std = energy[:, i].std() energy_skew = spstats.skew(energy[:, i]) energy_kurt = spstats.kurtosis(energy[:, i]) data.write( str(energy_mean) + '\t' + str(energy_std) + '\t' + str(energy_skew) + '\t' + str(energy_kurt) + '\t') # Write ZCR for i in range(zcr[0].size): zcr_mean = zcr[:, i].mean() zcr_std = zcr[:, i].std() zcr_skew = spstats.skew(zcr[:, i]) zcr_kurt = spstats.kurtosis(energy[:, i]) data.write( str(zcr_mean) + '\t' + str(zcr_std) + '\t' + str(zcr_skew) + '\t' + str(zcr_kurt) + '\t') return True else: return False
fp.addFeature('mfcc_d2: {} > Derivate DOrder=2'.format(mfcc_features)) fp.addFeature('energy: {}'.format(energy_features)) if args.derivatives: fp.addFeature('energy_d1: {} > Derivate DOrder=1'.format(energy_features)) fp.addFeature('energy_d2: {} > Derivate DOrder=2'.format(energy_features)) if args.derivatives: keys = ['mfcc', 'mfcc_d1', 'mfcc_d2', 'energy', 'energy_d1', 'energy_d2'] else: keys = ['mfcc', 'energy'] df = fp.getDataFlow() engine = yaafelib.Engine() engine.load(df) afp = yaafelib.AudioFileProcessor() outfile = open(args.output, 'wb') total = 0 for filename in args.inputs: tar = tarfile.open(filename) total += len([f for f in tar if f.isfile()]) for j, filename in enumerate(args.inputs): tar = tarfile.open(filename) files = sorted([f for f in tar if f.isfile()], key=lambda f: f.name) for i, fileinfo in enumerate(files): _, data = wav.read(tar.extractfile(fileinfo)) data = data.astype(np.float64)
def export_features( path=None, audiofiles=None, out='../audio_features', train_file_path=None, pca_params_path="/data/lisa/exp/faces/emotiw_final/caglar_audio/pca_params.pkl" ): # prepare the FeaturePlan plan = yaafelib.FeaturePlan(sample_rate=48000, normalize=0.99) size_info = 'blockSize=1248 stepSize=624' if pca is None: global pca features = [ 'ZCR', 'TemporalShapeStatistics', 'Energy', 'MagnitudeSpectrum', 'SpectralVariation', 'SpectralSlope', 'SpectralRolloff', 'SpectralShapeStatistics', 'SpectralFlux', 'SpectralFlatness', 'SpectralDecrease', 'SpectralFlatnessPerBand', 'SpectralCrestFactorPerBand', 'AutoCorrelation', 'LPC', 'LSF', 'ComplexDomainOnsetDetection', 'MelSpectrum', 'MFCC: MFCC CepsNbCoeffs=22', 'MFCC_d1: MFCC %s > Derivate DOrder=1', 'MFCC_d2: MFCC %s > Derivate DOrder=2', 'Envelope', 'EnvelopeShapeStatistics', 'AmplitudeModulation', 'Loudness', 'PerceptualSharpness', 'PerceptualSpread', 'OBSI', 'OBSIR' ] for f in features: if ':' not in f: f = '%s: %s' % (f, f) if '%s' not in f: f += ' %s' plan.addFeature(f % size_info) dataflow = plan.getDataFlow() engine = yaafelib.Engine() engine.load(dataflow) processor = yaafelib.AudioFileProcessor() subsets = {'full': 'full'} def train_pca(pca=None): if pca is not None: return pca assert train_file_path is not None print "Training pca..." pca = defaultdict(PCA) audiofiles_ = glob.glob('%s/*/*.mp3' % train_file_path) if not (os.path.exists(pca_params_path)): # extract features from audio files for audiofile in audiofiles_: processor.processFile(engine, audiofile) features = engine.readAllOutputs() for subset, keys in subsets.iteritems(): if keys == 'full': keys = sorted(features.keys()) output = numpy.concatenate([features[k].T for k in keys]).T if 'Train' in audiofile: pca[subset].add(output) pca_params = {} pca_params["mean"] = pca["full"].mean pca_params["covariance"] = pca["full"].covariance pca_params["num_frames"] = pca["full"].num_frames pca_params["ndim"] = pca["full"].ndim cPickle.dump(pca_params, file_create(pca_params_path), cPickle.HIGHEST_PROTOCOL) else: pca_params = cPickle.load(open(pca_params_path, "rb")) pca["full"].covariance = pca_params["covariance"] pca["full"].mean = pca_params["mean"] pca["full"].num_frames = pca_params["num_frames"] pca["full"].ndim = pca_params["ndim"] print "PCA training finished." return pca assert audiofiles is not None pca = train_pca(pca) assert pca is not None for f in features: if ':' not in f: f = '%s: %s' % (f, f) if '%s' not in f: f += ' %s' plan.addFeature(f % size_info) # extract features from audio files for audiofile in audiofiles: audiofile = os.path.join(path, audiofile) processor.processFile(engine, audiofile) features = engine.readAllOutputs() for subset, keys in subsets.iteritems(): if keys == 'full': keys = sorted(features.keys()) output = numpy.concatenate([features[k].T for k in keys]).T pickle_file = audiofile.replace('.mp3', '.%s.pkl' % subset).replace( path, out) cPickle.dump(output, file_create(pickle_file), cPickle.HIGHEST_PROTOCOL) for subset in subsets.iterkeys(): pca[subset].pca(diagonal=True) cPickle.dump(pca[subset], file_create('%s/%s.pca' % (out, subset))) print 'Rewriting PCA data...' sys.stdout.flush() for audiofile in audiofiles: for subset in subsets.iterkeys(): pickle_file = os.path.join(out, audiofile).replace( '.mp3', '.%s.pkl' % subset) #pickle_file = audiofile.replace('.mp3', '.%s.pkl' % subset).replace(path, out) matrix = cPickle.load(file(pickle_file)) matrix = pca[subset].feature(matrix) cPickle.dump(matrix, file_create(pickle_file.replace('.pkl', '.pca.pkl')), cPickle.HIGHEST_PROTOCOL)
def main(): parser = OptionParser(version='yaafe.py, Yaafe v%s' % yaafe.getYaafeVersion()) parser.add_option('-v', '--verbose', dest='verbose', action='store_true', default=False, help='display more output') parser.add_option('-l', '--list', dest='listFeatures', action='store_true', default=False, help='list all available features and output formats') parser.add_option('-d', '--describe', dest='describe', default=None, help='describe a feature or an output format') parser.add_option('-f', '--feature', action='append', dest='feature', metavar='FEATUREDEFINITION', help='feature to extract') parser.add_option('-c', '--config-file', dest='configFile', default=None, help='feature extraction plan') parser.add_option('-r', '--rate', dest='sample_rate', type='int', default=None, help='working samplerate in Hz.') parser.add_option('', '--resample', dest='resample', action='store_true', default=False, help='Resample input signal to the analysis sample rate') parser.add_option('-n', '--normalize', dest='normalize', action='store_true', default=False, help='normalize input signal by removing mean and scale maximum absolute value to 0.98 (or other value given with --normalize-max)') parser.add_option('', '--normalize-max', dest='normalize_max', type='float', default=0.98, help='Normalize input signal so that maximum absolute value reached given value (see -n, --normalize)') parser.add_option('-i', '--input', dest='input_list', default=None, help='text file, each line is an audio file to process') parser.add_option('-b', '--base-dir', dest='out_dir', default='', help='output directory base') parser.add_option('-o', '--output-format', dest='format', default='csv', choices=output_format_choices, help='Features output format: %s' % '|'.join(output_format_choices)) parser.add_option('-p', '--output-params', dest='formatparams', action='append', default=[], metavar='key=value', help='add an output format parameter (can be used multiple times, use -l options to list output formats and parameters)') parser.add_option('', '--dump-dataflow', dest='dumpDataflow', default='', metavar='FILE', help='output dataflow plan (suitable for process with yaafe-engine)') parser.add_option('', '--dump-graph', dest='dumpGraph', default='', metavar='FILE', help="output dataflow in dot format (suitable for display with graphviz") parser.add_option('-s', '--data-block-size', dest='buffer_size', type='int', default=None, help='Prefered size for data blocks.') parser.add_option('', '--show', dest='showFeatures', default=None, help='Show all features in a H5 file') (options, audiofiles) = parser.parse_args() if options.listFeatures: listFeatures() return if options.describe: if options.describe in yaafe.getOutputFormatList(): describeOutputFormat(options.describe) else: describeFeature(options.describe) return if options.showFeatures: showFeatures(options.showFeatures) return if not options.sample_rate: print "ERROR: please specify sample rate !" return if options.buffer_size: yaafe.setPreferedDataBlockSize(options.buffer_size) if options.verbose: yaafe.setVerbose(True) # initialize feature plan fp = yaafe.FeaturePlan(sample_rate=options.sample_rate, normalize=(options.normalize_max if options.normalize else None), resample=options.resample) if options.configFile: if not fp.loadFeaturePlan(options.configFile): return elif options.feature: for feat in options.feature: if not fp.addFeature(feat): return else: print "ERROR: please specify features using either a config file or -f [feature]" return if options.dumpDataflow: fp.getDataFlow().save(options.dumpDataflow) if options.dumpGraph: fp.getDataFlow().save(options.dumpGraph) # read audio file list if options.input_list: fin = open(options.input_list, 'r') for line in fin: audiofiles.append(line.strip()) fin.close() if audiofiles: # initialize engine engine = yaafe.Engine() if not engine.load(fp.getDataFlow()): return # initialize file processor afp = yaafe.AudioFileProcessor() oparams = dict() for pstr in options.formatparams: pstrdata = pstr.split('=') if len(pstrdata) != 2: print 'ERROR: invalid parameter syntax in "%s" (should be "key=value")' % pstr return oparams[pstrdata[0]] = pstrdata[1] afp.setOutputFormat(options.format, options.out_dir, oparams) # process audio files for audiofile in audiofiles: afp.processFile(engine, audiofile)
# Get the instruments and their class indices instruments = getInstruments(trainAudio) # Specify features fp = yl.FeaturePlan(sample_rate=44100) fp.loadFeaturePlan('featureplan.txt') # Initialize yaafe tools df = fp.getDataFlow() eng = yl.Engine() eng.load(df) dimensions = 0 # The sum of the dimensions of the features ftSizes = eng.getOutputs().items() for ftSize in ftSizes: dimensions += int(ftSize[1]['size']) afp = yl.AudioFileProcessor() # Remove previous model files for k in range(len(instruments)): classFile = model + '.' + str(k) if (os.path.isfile(classFile)): os.remove(classFile) # Write training features print('\nExtracting training features\n') numTrainFiles = writeFeatures(trainAudio, trainFeats, instruments) # Display the number of training files used print('Number of training files used: ' + str(numTrainFiles) + '\n') # Train the svm
from sklearn import neighbors from sklearn.svm import SVC from sklearn.svm import LinearSVC from sklearn.svm import NuSVC from sklearn import linear_model from sklearn.linear_model import SGDClassifier from sklearn import tree from sklearn.neighbors.nearest_centroid import NearestCentroid yaafe.loadComponentLibrary('yaafe-io') fp = yaafe.FeaturePlan(sample_rate=8000) fp.loadFeaturePlan('./featureplan') engine = yaafe.Engine() engine.load(fp.getDataFlow()) afp = yaafe.AudioFileProcessor() afp.setOutputFormat('csv', './outputs', { 'Metadata': 'false', 'Precision': '2' }) emotions = ['angry', 'happy', 'neutral', 'unhappy'] feats = ['eng', 'lpc', 'lsf', 'ldd', 'mfc'] def getProperties(audiofile): props = "" for feat in feats: lines = [] outfile = "./outputs" + audiofile[1:] + "." + feat + ".csv" with open(outfile, 'r') as f: