def get_segments(filepath, seg_length): afm = af.audiofile_manager(filepath, seg_length) while afm.HasMoreData(): segment, index = afm.GetNextSegment() print "%d: %d | (%d of %d)" % (index, segment.shape[0], index * afm.seg_length_samps, afm.afReader.numsamples())
def getfeatures(args): ''' write the extracted features from the input audio file into numpy files for reading in the other steps. ''' debug = args.debug filepath = args.audiofile chunk_len = args.audio_seg_length afm = af.audiofile_manager(filepath, chunk_len) # FFT Parameters fs = afm.afReader.samplerate() N = 2048 hopDenom = 2 hopSize = N / hopDenom zp = 0 winfunc = np.hamming fftParams = fftparams.FFTParams(fs, N, hopDenom, zp, winfunc) # MFCC Paramters nFilters = 40 nDCTCoefs = 20 minFreq = 50 maxFreq = 8000 nIndexSkip = 2 seglen = 1 mfccParams = fftparams.MFCCParams(nFilters, nDCTCoefs, minFreq, maxFreq, nIndexSkip) # Feature Vector parameters # Template : ('name', order index, length) vector_template = [('sones', 0, 1), ('mfcc', 1, nDCTCoefs - nIndexSkip)] sone_template = [('sones', 0, 1)] # Initialize the feature vector holder feature_holder = featurevector.feature_holder(vector_template, filepath) sone_holder = featurevector.feature_holder(sone_template, filepath) envelopeHolder = [] audioHolder = [] sonesHolder = [] maxEnvelope = 0 count = 0 print "Feature Extraction Mode\n" print datetime.now() # For each chunk of audio while afm.HasMoreData(): count += 1 audioChunk, chunkIndex = afm.GetNextSegment() if debug: print "Read %d sample chunk of audio (%0.2fs)" % ( len(audioChunk), len(audioChunk) / fs) # Get Events eventTimes, envelope = GetEvents(audioChunk, fftParams, debug) if maxEnvelope < envelope.max(): maxEnvelope = envelope.max() if debug: print "EVENTTIMES:", eventTimes envelopeHolder.append(envelope) eventTimesSamps = np.asarray(np.multiply(eventTimes, fs), dtype=int) # Get event audio segments eventSegments = GetEventAudioSegments(eventTimesSamps, audioChunk, debug) #get sones eventSegmentSones = GetEventSones(eventSegments, fftParams, debug) # Get the MFCCs for each segment / event eventSegmentMFCCs = GetEventMFCCs(eventSegments, fftParams, mfccParams, debug) # Time-average for each segment / event averagedEventSegmentMFCCs, averagedEventSegmentSones = AverageEventFeatures( eventSegmentMFCCs, eventSegmentSones, seglen, fftParams, debug) # Store these vectors in the feature_holder, labelled with their time StoreFeatureVector(feature_holder, sone_holder, averagedEventSegmentMFCCs, averagedEventSegmentSones, chunkIndex, chunk_len, eventTimes, debug) # Write features to disk print datetime.now() fileSize = feature_holder.save(FEATURE_VECTOR_FILENAME) print "Wrote", fileSize, "bytes to disk. (%s)" % (FEATURE_VECTOR_FILENAME) fileSize = sone_holder.save(SONE_VECTOR_FILENAME) print "Wrote", fileSize, "bytes to disk. (%s)" % (SONE_VECTOR_FILENAME)
def getfeatures(args): ''' write the extracted features from the input audio file into numpy files for reading in the other steps. ''' debug = args.debug filepath = args.audiofile chunk_len = args.audio_seg_length afm = af.audiofile_manager(filepath, chunk_len) # FFT Parameters fs = afm.afReader.samplerate() N = 2048 hopDenom = 2 hopSize = N/hopDenom zp = 0 winfunc=np.hamming fftParams = fftparams.FFTParams(fs, N, hopDenom, zp, winfunc) # MFCC Paramters nFilters = 40 nDCTCoefs = 20 minFreq = 50 maxFreq = 8000 nIndexSkip = 2 seglen = 1 mfccParams = fftparams.MFCCParams(nFilters, nDCTCoefs, minFreq, maxFreq, nIndexSkip) # Feature Vector parameters # Template : ('name', order index, length) vector_template = [('sones', 0, 1), ('mfcc', 1, nDCTCoefs - nIndexSkip)] sone_template = [('sones', 0, 1)] # Initialize the feature vector holder feature_holder = featurevector.feature_holder(vector_template, filepath) sone_holder = featurevector.feature_holder(sone_template, filepath) envelopeHolder = [] audioHolder = [] sonesHolder = [] maxEnvelope = 0; count =0 print "Feature Extraction Mode\n" print datetime.now() # For each chunk of audio while afm.HasMoreData(): count +=1 audioChunk, chunkIndex = afm.GetNextSegment() if debug: print "Read %d sample chunk of audio (%0.2fs)" % (len(audioChunk), len(audioChunk) / fs) # Get Events eventTimes, envelope = GetEvents(audioChunk, fftParams, debug) if maxEnvelope < envelope.max(): maxEnvelope = envelope.max() if debug: print "EVENTTIMES:", eventTimes envelopeHolder.append(envelope) eventTimesSamps = np.asarray(np.multiply(eventTimes,fs),dtype=int) # Get event audio segments eventSegments = GetEventAudioSegments(eventTimesSamps, audioChunk, debug) #get sones eventSegmentSones = GetEventSones(eventSegments, fftParams, debug) # Get the MFCCs for each segment / event eventSegmentMFCCs = GetEventMFCCs(eventSegments, fftParams, mfccParams, debug) # Time-average for each segment / event averagedEventSegmentMFCCs, averagedEventSegmentSones = AverageEventFeatures(eventSegmentMFCCs, eventSegmentSones, seglen, fftParams, debug) # Store these vectors in the feature_holder, labelled with their time StoreFeatureVector(feature_holder, sone_holder, averagedEventSegmentMFCCs, averagedEventSegmentSones, chunkIndex, chunk_len, eventTimes, debug) # Write features to disk print datetime.now() fileSize = feature_holder.save(FEATURE_VECTOR_FILENAME) print "Wrote", fileSize, "bytes to disk. (%s)" % (FEATURE_VECTOR_FILENAME) fileSize = sone_holder.save(SONE_VECTOR_FILENAME) print "Wrote", fileSize, "bytes to disk. (%s)" % (SONE_VECTOR_FILENAME)