Example #1
0
    def _ExtractAll(self, audio_location, sample_rate):
        # Build a dataflow object using FeaturePlan
        fp = FeaturePlan(sample_rate=sample_rate)

        # Using *.addFeature() multiple extractions can be called with a
        # single call
        fp.addFeature('zcr: ZCR')
        fp.addFeature('mfcc: MFCC')
        fp.addFeature('mfcc_D1: MFCC > Derivate DOrder=1')
        #fp.addFeature('mfcc_D2: MFCC > Derivate DOrder=2')
        fp.addFeature('flux: SpectralFlux')
        fp.addFeature('energy: Energy')
        fp.addFeature('loudness: Loudness')
        fp.addFeature('obsi: OBSI')
        fp.addFeature('sharpness: PerceptualSharpness')
        fp.addFeature('spread: PerceptualSpread')
        fp.addFeature('rolloff: SpectralRolloff')
        fp.addFeature('variation: SpectralVariation')
        # Get dataflow
        df = fp.getDataFlow()

        # Configure engine
        engine = Engine()
        engine.load(df)

        # extract features from audio using AudioFileProcessor
        afp = AudioFileProcessor()
        afp.processFile(engine, audio_location)

        # features array holds all the extracted features
        features = engine.readAllOutputs()
        #print features["zcr"]
        # returns the array of features extracted
        return features
class MFCCExtractor:
    def __init__(self, **args):
        self.engine = Engine()
        self.afp = AudioFileProcessor()

        self.block_size = (1024
                           if not 'block_size' in args else args['block_size'])
        self.step_size = (512
                          if not 'step_size' in args else args['step_size'])

    def extract(self, path, **args):
        self.path = path
        print path
        self.wave = wave.open(path, 'r')
        self.rate = self.wave.getframerate()

        fp = FeaturePlan(sample_rate=self.rate)
        fp.addFeature('mfcc: MFCC blockSize={} stepSize={}'.format(
            self.block_size, self.step_size))
        self.engine.load(fp.getDataFlow())
        self.afp.processFile(self.engine, path)

        feats = self.engine.readAllOutputs()

        if 'save_to_disk' in args:
            self.__save_to_disk(feats['mfcc'])

        return feats['mfcc']

    def __save_to_disk(self, feats):
        new_path = self.path + ".mfc.csv"
        np.savetxt(new_path, feats, delimiter=",")
Example #3
0
    def _energy(audio_location, sample_rate):
        # This function behaves the same as 'python yaafe.py -r SAMPLERATE -f \
        #               "energy: Energy PARAMETERS" WAV-LOCATION'
        # SAMPLERATE : Samplerate of the file being processed
        #- blockSize (default=1024): output frames size
        #- stepSize (default=512): step between consecutive frames

        # Build a dataflow object using FeaturePlan
        # blockSize, stepSize could be added too. 1024, 512 default
        fp = FeaturePlan(sample_rate=sample_rate)

        # Using *.addFeature() multiple extractions can be called with a
        # single call
        fp.addFeature('Energy: Energy')
        #('energy: Energy blockSize=1024 stepSize=512')

        # Get dataflow
        df = fp.getDataFlow()

        # Configure engine
        engine = Engine()
        engine.load(df)

        # extract features from audio using AudioFileProcessor
        afp = AudioFileProcessor()
        afp.processFile(engine, audio_location)

        # features array holds all the extracted features
        features = engine.readAllOutputs()

        # returns the array of features extracted
        return features
Example #4
0
File: frames.py Project: EQ4/Yaafe
def main(argv):
    if len(argv) != 2:
        print "usage: python %s foo.mp3" % (argv[0])
        return
    fp = FeaturePlan(sample_rate=SAMPLE_RATE, resample=True, time_start=TIME_START, time_limit=TIME_LIMIT)
    fp.addFeature("frames: Frames blockSize={0} stepSize={1}".format(SLICE_WINSIZE, SLICE_STEPSIZE))
    if "YAAFE_PATH" in os.environ:
        fp.addFeature(
            "beat_hist: BeatHistogramSummary ACPNbPeaks=3  BHSBeatFrameSize=128  BHSBeatFrameStep=64  "
            "BHSHistogramFrameSize=40  BHSHistogramFrameStep=40  FFTLength=0  FFTWindow=Hanning  "
            "HInf=40  HNbBins=80  HSup=200  NMANbFrames=5000  blockSize=1024  stepSize=512"
        )
    df = fp.getDataFlow()
    engine = Engine()
    engine.load(df)
    afp = AudioFileProcessor()

    song_path = argv[1]
    assert os.path.exists(song_path)
    afp.processFile(engine, song_path)
    frames = engine.readOutput("frames")
    frames = np.concatenate(frames)
    print "time start: %ss" % TIME_START
    print "time limit: %ss" % TIME_LIMIT
    print "duration:", 1.0 * frames.size / SAMPLE_RATE

    if "YAAFE_PATH" in os.environ:
        beat_hist = engine.readOutput("beat_hist")
        print "beat_hist: %s" % beat_hist
class MFCCExtractor:
  def __init__(self, **args):
    self.engine = Engine()
    self.afp = AudioFileProcessor()

    self.block_size = (1024 if not 'block_size' in args else args['block_size'])
    self.step_size  = (512 if not 'step_size' in args else args['step_size'])

  def extract(self, path, **args):
    self.path = path
    print path
    self.wave = wave.open(path, 'r')
    self.rate = self.wave.getframerate()

    fp = FeaturePlan(sample_rate=self.rate)
    fp.addFeature('mfcc: MFCC blockSize={} stepSize={}'.format(self.block_size, self.step_size))
    self.engine.load(fp.getDataFlow())
    self.afp.processFile(self.engine, path)

    feats = self.engine.readAllOutputs()

    if 'save_to_disk' in args:
      self.__save_to_disk(feats['mfcc'])

    return feats['mfcc']

  def __save_to_disk(self, feats):
    new_path = self.path + ".mfc.csv"
    np.savetxt(new_path, feats, delimiter=",")
Example #6
0
class ConstantQ:
    def __init__(self,
                 num_bands,
                 fmin,
                 num_octaves,
                 fps,
                 align,
                 log_div,
                 sample_rate=44100,
                 fold=None):

        self.fps = fps
        self.num_bands = num_bands
        self.align = align
        self.fmin = fmin
        self.num_octaves = num_octaves
        self.log_div = log_div

        self.sample_rate = sample_rate

        from yaafelib import FeaturePlan, Engine

        fp = FeaturePlan(sample_rate=sample_rate)

        cqt_config = " ".join([
            'cqt: CQT', 'CQTAlign={}'.format(align),
            'CQTBinsPerOctave={}'.format(num_bands),
            'CQTMinFreq={}'.format(fmin),
            'CQTNbOctaves={}'.format(num_octaves),
            'stepSize={}'.format(sample_rate / fps)
        ])

        fp.addFeature(cqt_config)

        df = fp.getDataFlow()
        self.engine = Engine()
        self.engine.load(df)

    @property
    def name(self):
        return 'cqt_fps={}_num-bands={}_align={}_fmin={}_num_oct={}'\
               '_logdiv={}'.format(self.fps, self.num_bands, self.align,
                                   self.fmin, self.num_octaves, self.log_div)

    def __call__(self, audio_file):

        audio = mm.audio.signal.Signal(audio_file,
                                       sample_rate=self.sample_rate,
                                       num_channels=1).astype(np.float64)

        cqt = self.engine.processAudio(audio.reshape((1, -1)))['cqt']
        # compensate for different padding in madmom vs. yaafe and convert
        # to float32
        cqt = np.vstack((cqt, np.zeros(cqt.shape[1:]))).astype(np.float32)

        if self.log_div:
            return np.log(cqt / self.log_div + 1)
        else:
            return cqt
    def __init__(self, **args):
        self.engine = Engine()
        self.afp = AudioFileProcessor()

        self.block_size = (1024
                           if not 'block_size' in args else args['block_size'])
        self.step_size = (512
                          if not 'step_size' in args else args['step_size'])
Example #8
0
def compute_spec(song_path):
    fp = FeaturePlan(sample_rate=22050, resample=True)
    #add one feature
    fp.addFeature("spec: PowerSpectrum blockSize=1024 stepSize=512")
    df = fp.getDataFlow()
    engine = Engine()
    engine.load(df)
    afp = AudioFileProcessor()
    afp.processFile(engine, song_path)
    return engine.readOutput('spec')
Example #9
0
def compute_spec(song_path):
    fp = FeaturePlan(sample_rate=22050, resample=True)
    #add one feature
    fp.addFeature("spec: PowerSpectrum blockSize=1024 stepSize=512")
    df = fp.getDataFlow()
    engine = Engine()
    engine.load(df)
    afp = AudioFileProcessor()
    afp.processFile(engine, song_path)
    return engine.readOutput('spec')
Example #10
0
 def initialize(self, feature_dict):
     """ Run the required boilerplate for yaafe """
     self.feature_dict = feature_dict
     self.fp = FeaturePlan(sample_rate=self.sample_rate, normalize=0.98)
     for name, desc in self.feature_dict.items():
         self.fp.addFeature("{0}: {1}".format(name, desc))
     self.df = self.fp.getDataFlow()
     self.engine = Engine()
     self.engine.load(self.df)
     self.afp = AudioFileProcessor()
     return self
Example #11
0
 def _ExtractZCRAndFlux(self, audio_location, sample_rate):
     fp = FeaturePlan(sample_rate=sample_rate)
     fp.addFeature('zcr: ZCR')
     fp.addFeature('flux: SpectralFlux')
     df = fp.getDataFlow()
     engine = Engine()
     engine.load(df)
     afp = AudioFileProcessor()
     afp.processFile(engine, audio_location)
     features = engine.readAllOutputs()
     return features
Example #12
0
def startEngine(path):

    global features

    engine = Engine()
    engine.load(df)

    afp  = AudioFileProcessor()
    afp.processFile(engine,path)
    features = engine.readAllOutputs()

    return 'Yaafe引擎启动成功'
Example #13
0
def get_engine(audio_freq, mfcc_block_size, mfcc_step_size):
    fp = FeaturePlan(sample_rate=audio_freq)
    fp.addFeature('mfcc: MFCC blockSize=%d stepSize=%d > Derivate DOrder=1' % \
                  (mfcc_block_size, mfcc_step_size))

    df = fp.getDataFlow()
    
    afp = AudioFileProcessor()
    engine = Engine()
    engine.load(df)
    
    return (afp, engine)
Example #14
0
def init():
    global engine
    fp = FeaturePlan(sample_rate=44100, resample=True, time_start=0,time_limit=20)           
    
    fp.addFeature("loudness: Loudness") 
    fp.addFeature("perceptualSharpness: PerceptualSharpness")
    fp.addFeature("perceptualSpread: PerceptualSpread")
    fp.addFeature("obsi: OBSI")  
    fp.addFeature("obsir: OBSIR")
    
    df = fp.getDataFlow()
    engine = Engine()                  # Engine setup
    engine.load(df)

    return 'initialization'
Example #15
0
    def _zcr(self, audio_location, sample_rate):
        # This function behave the same as 'python yaafe.py -r SAMPLERATE -f \
        #               "zcr: ZCR blockSize=1024  stepSize=512" WAV-LOCATION'
        # SAMPLERATE = samplerate of the file being processed
        # zcr        = name for the process (zcr1, zcr2... )
        # ZCR        = the feature that is being extracted
        # blockSize  = output frames size
        # stepSize   = step between consecutive frames

        # Build a dataflow object using FeaturePlan
        # blockSize, stepSize could be added too. 1024, 512 default
        fp = FeaturePlan(sample_rate=sample_rate)

        # Using *.addFeature() multiple extractions can be called with a
        # single call
        fp.addFeature('ZCR: ZCR')

        # Get dataflow
        df = fp.getDataFlow()

        # Or load it from a file
        # df = DataFlow()
        # df.load(dataflow_file)

        # Configure engine
        engine = Engine()
        engine.load(df)

        # extract features from audio using AudioFileProcessor
        afp = AudioFileProcessor()
        afp.processFile(engine, audio_location)

        # features array holds all the extracted features
        features = engine.readAllOutputs()

        # extract features from an audio file and write results to csv files
        # afp.setOutputFormat('csv','output',{'Precision':'8'})
        # afp.processFile(engine,audiofile)
        #  this creates output/myaudio.wav.mfcc.csv,
        #               output/myaudio.wav.mfcc_d1.csv and
        #               output/myaudio.wav.mfcc_d2.csv files.

        # Clear the engine so it can be used again
        #engine.reset()

        # returns the array of features extracted
        return features
Example #16
0
	def __init__(self):
		self.fp = FeaturePlan(sample_rate=44100)
		self.fp.addFeature('feat: ')
		self.engine = Engine()
		self.engine.load(self.fp.getDataFlow())
		self.afp = AudioFileProcessor()
		
		self.mfcc = []
		self.songs = []
def init():

    global engine
    fp = FeaturePlan(sample_rate=44100, resample=True, time_start=0, time_limit=60)   # 采样率44.1Hkz,提取20 - 40s

    fp.addFeature("mfcc: MFCC")             # 梅尔倒谱系数 13
    fp.addFeature("energy: Energy")         # 短时能量 1
    fp.addFeature("zcr: ZCR")               # 短时平均过零率 1
    fp.addFeature("sf: SpectralFlux")       # 尖锐度 1
    fp.addFeature("sr: SpectralRolloff")    # 频谱滚降点 1
    fp.addFeature("lpc: LPC LPCNbCoeffs=3") # 线性预测编码 3
    fp.addFeature("lx: Loudness")           # 响度 24

    df = fp.getDataFlow()
    engine = Engine()                  # 配置Engine
    engine.load(df)

    return 'Yaafe初始化'
Example #18
0
def main(audio_file, variables_to_extract):
    # Delete old files and recriate folder
    if isdir('output'): shutil.rmtree('output')
    os.mkdir('output')
    
    # Build a DataFlow object using FeaturePlan
    fp = FeaturePlan(sample_rate = 44100)
    for variable in variables_to_extract:
        fp.addFeature(feat_json['variables'][variable])

    df = fp.getDataFlow()
    
    # configure an Engine
    engine = Engine()
    engine.load(df)

    # Make csv files with audio variables
    process_audio(audio_file, engine)
    
    return
Example #19
0
def main(argv):
    if len(argv) != 2:
        print 'usage: python %s foo.mp3' % (argv[0])
        return
    fp = FeaturePlan(sample_rate=SAMPLE_RATE,
                     resample=True,
                     time_start=TIME_START,
                     time_limit=TIME_LIMIT)
    fp.addFeature("frames: Frames blockSize={0} stepSize={1}".format(
        SLICE_WINSIZE, SLICE_STEPSIZE))
    if 'YAAFE_PATH' in os.environ:
        fp.addFeature(
            "beat_hist: BeatHistogramSummary ACPNbPeaks=3  BHSBeatFrameSize=128  BHSBeatFrameStep=64  "
            "BHSHistogramFrameSize=40  BHSHistogramFrameStep=40  FFTLength=0  FFTWindow=Hanning  "
            "HInf=40  HNbBins=80  HSup=200  NMANbFrames=5000  blockSize=1024  stepSize=512"
        )
    df = fp.getDataFlow()
    engine = Engine()
    engine.load(df)
    afp = AudioFileProcessor()

    song_path = argv[1]
    assert os.path.exists(song_path)
    afp.processFile(engine, song_path)
    frames = engine.readOutput('frames')
    frames = np.concatenate(frames)
    print 'time start: %ss' % TIME_START
    print 'time limit: %ss' % TIME_LIMIT
    print 'duration:', 1. * frames.size / SAMPLE_RATE

    if 'YAAFE_PATH' in os.environ:
        beat_hist = engine.readOutput('beat_hist')
        print 'beat_hist: %s' % beat_hist
 def initialize(self, feature_dict):
     """ Run the required boilerplate for yaafe """
     self.feature_dict = feature_dict
     self.fp = FeaturePlan(
         sample_rate=self.sample_rate,
         normalize=0.98)
     for name, desc in self.feature_dict.items():
         self.fp.addFeature("{0}: {1}".format(name, desc))
     self.df = self.fp.getDataFlow()
     self.engine = Engine()
     self.engine.load(self.df)
     self.afp = AudioFileProcessor()
     return self
Example #21
0
class Extractor(object):
	def __init__(self):
		self.fp = FeaturePlan(sample_rate=44100)
		self.fp.addFeature('feat: ')
		self.engine = Engine()
		self.engine.load(self.fp.getDataFlow())
		self.afp = AudioFileProcessor()
		
		self.mfcc = []
		self.songs = []
		
	def recurse(self, directory):
		results = []
		for root, dirs, files in os.walk(directory):
			for f in files:
				current_file = os.path.join(root, f)
				if current_file.endswith('wav') and current_file not in results:
					results.append(current_file)
		
		print results
		return results
	
	def extract_feature(self, f):
		self.afp.processFile(self.engine, f)
		feats = self.engine.readAllOutputs()
		return feats['feat']

		
	def build_feature_space(self):
		self.space = FeatureSpace('mfcc')
		
		last_feature = None
		for f in self.recurse(sys.argv[1]):
			data = self.extract_feature(f)
			temp_feature = Feature(f, ['mfcc', data])
			self.space.add(temp_feature)
			last_feature = temp_feature
		
		print self.space.min_dist(last_feature)
Example #22
0
    def _mfcc(self, audio_location, sample_rate):
        # This function behaves the same as 'python yaafe.py -r SAMPLERATE -f \
        #               "mfcc: MFCC PARAMETERS" WAV-LOCATION'
        # SAMPLERATE : Samplerate of the file being processed
        #- CepsIgnoreFirstCoeff (default=1): 0 keeps the first cepstral coeffcient, 1 ignore it
        #- CepsNbCoeffs (default=13): Number of cepstral coefficient to keep.
        #- FFTWindow (default=Hanning): Weighting window to apply before fft. Hanning|Hamming|None
        #- MelMaxFreq (default=6854.0): Maximum frequency of the mel filter bank
        #- MelMinFreq (default=130.0): Minimum frequency of the mel filter bank
        #- MelNbFilters (default=40): Number of mel filters
        #- blockSize (default=1024): output frames size
        #- stepSize (default=512): step between consecutive frames

        # Build a dataflow object using FeaturePlan
        fp = FeaturePlan(sample_rate=sample_rate)

        # Using *.addFeature() multiple extractions can be called with a
        # single call
        fp.addFeature('mfcc: MFCC')
        #('mfcc: MFCC CepsIgnoreFirstCoeff=0 \
        #CepsNbCoeffs=13 FFTWindow=Hanning MelMaxFreq=6854\
        #MelMinFreq=130 MelNbFilters=40 blockSize=1024 stepSize=512')

        # Get dataflow
        df = fp.getDataFlow()

        engine = Engine()
        engine.load(df)

        # extract features from audio using AudioFileProcessor
        afp = AudioFileProcessor()
        afp.processFile(engine, audio_location)

        # features array holds all the extracted features
        features = engine.readAllOutputs()

        # returns the array of features extracted
        return features
Example #23
0
    def _SpectralFlux(self, audio_location, sample_rate):
        # This function behaves the same as 'python yaafe.py -r SAMPLERATE -f \
        #               "flux: SpectralFlux PARAMETERS" WAV-LOCATION'
        # SAMPLERATE : Samplerate of the file being processed
        # - FFTLength (default=0): Frame's length on which perform FFT. Original
        #   frame is padded with zeros or truncated to reach this size. If 0 then
        #   use original frame length.
        # - FFTWindow (default=Hanning): Weighting window to apply before fft. Hanning|Hamming|None
        # - FluxSupport (default=All): support of flux computation. if 'All' then
        #   use all bins (default), if 'Increase' then use only bins which are increasing
        # - blockSize (default=1024): output frames size
        # - stepSize (default=512): step between consecutive frames

        # Build a dataflow object using FeaturePlan
        fp = FeaturePlan(sample_rate=sample_rate)

        # Using *.addFeature() multiple extractions can be called with a
        # single call
        fp.addFeature('Flux: SpectralFlux')
        #('flux: SpectralFlux FFTLength=0 FFTWindow=Hanning FluxSupport=All\
        # blockSize=1024 stepSize=512')

        # Get dataflow
        df = fp.getDataFlow()

        # Configure engine
        engine = Engine()
        engine.load(df)

        # extract features from audio using AudioFileProcessor
        afp = AudioFileProcessor()
        afp.processFile(engine, audio_location)

        # features array holds all the extracted features
        features = engine.readAllOutputs()

        # returns the array of features extracted
        return features
Example #24
0
    def __init__(self,
                 num_bands,
                 fmin,
                 num_octaves,
                 fps,
                 align,
                 log_div,
                 sample_rate=44100,
                 fold=None):

        self.fps = fps
        self.num_bands = num_bands
        self.align = align
        self.fmin = fmin
        self.num_octaves = num_octaves
        self.log_div = log_div

        self.sample_rate = sample_rate

        from yaafelib import FeaturePlan, Engine

        fp = FeaturePlan(sample_rate=sample_rate)

        cqt_config = " ".join([
            'cqt: CQT', 'CQTAlign={}'.format(align),
            'CQTBinsPerOctave={}'.format(num_bands),
            'CQTMinFreq={}'.format(fmin),
            'CQTNbOctaves={}'.format(num_octaves),
            'stepSize={}'.format(sample_rate / fps)
        ])

        fp.addFeature(cqt_config)

        df = fp.getDataFlow()
        self.engine = Engine()
        self.engine.load(df)
Example #25
0
def main(argv):
    if len(argv) != 2:
        print 'usage: python %s foo.mp3' % (argv[0])
        return
    fp = FeaturePlan(sample_rate=SAMPLE_RATE, resample=True, time_start=TIME_START, time_limit=TIME_LIMIT)
    fp.addFeature("frames: Frames blockSize={0} stepSize={1}".format(
        SLICE_WINSIZE, SLICE_STEPSIZE))
    df = fp.getDataFlow()
    engine = Engine()
    engine.load(df)
    afp = AudioFileProcessor()

    song_path = argv[1]
    assert os.path.exists(song_path)
    afp.processFile(engine, song_path)
    frames = engine.readOutput('frames')
    frames = np.concatenate(frames)
    print 'time start: %ss' % TIME_START
    print 'time limit: %ss' % TIME_LIMIT
    print 'duration:', 1. * frames.size / SAMPLE_RATE

    from pylab import plot, show
    plot(frames)
    show()
Example #26
0
import sys
from math import sqrt
import time
import wave
import operator
from datetime import datetime
import numpy
from yaafelib import FeaturePlan, Engine
import matplotlib.pyplot as plot
import scipy.io.wavfile as wavfile

converter = {'Encoded date' : lambda(x) : time.mktime(datetime.strptime(x, "%Z %Y-%m-%d %H:%M:%S").timetuple())}

fpMFCC = FeaturePlan(sample_rate=16000, normalize=True)
fpMFCC.addFeature('mfcc: MFCC blockSize=512 stepSize=256')
engine = Engine()
engine.load(fpMFCC.getDataFlow())

def getTextUnderNode(node):
    rc = []
    
    for child in node.childNodes:
        if child.nodeType == node.TEXT_NODE:
            rc.append(child.data.replace('\n', '').strip())
        else:
            rc.append(getTextUnderNode(child).strip())
    
    return ' '.join(rc)

def visualizeResult(stream1, stream2, rate, bestPosition, distances, filename):
    plot.subplot(3, 1, 1)
class Yaafe(EnhancedObject):
    '''Yaafe toolbox wrapper. To be used with Database object.

    Attributes:
        sample_rate     The Files' sample rate
        plan_filename   The Featue Plan filename

    Methods:
        process(audiofile)          Process audiofile and return features
        get_X(entries_list, feat)   Fetch array of processed data from Database
        get_y                       Fetch subdir i's from Database
    `
    Init:
        Yaafe(sample_rate, feature_plan)
    '''
    _features = {
        'spec_rolloff': ("SpectralRolloff blockSize=512 stepSize=128 "
                         "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spec_shape': ("SpectralShapeStatistics blockSize=512 stepSize=128 "
                       "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spec_flux': ("SpectralFlux blockSize=512 stepSize=128 >"
                      "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'amp_mod': ("AmplitudeModulation blockSize=512 stepSize=128 >"
                    "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'auto': ("AutoCorrelation  blockSize=512 stepSize=128 >"
                 "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'lpc': ("LPC  blockSize=512 stepSize=128 > StatisticalIntegrator "
                "NbFrames=40 StepNbFrames=8"),
        'loudness': ("Loudness blockSize=512 stepSize=128 >"
                     "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'mfcc': ("MFCC blockSize=512 stepSize=128 > StatisticalIntegrator "
                 "NbFrames=40 StepNbFrames=8"),
        'mel_spectrum': ("MelSpectrum blockSize=512, stepSize=128 >"
                         "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'obsi': ("OBSI blockSize=512 stepSize=128 > StatisticalIntegrator "
                 "NbFrames=40 StepNbFrames=8"),
        'obsir': ("OBSIR blockSize=512 stepSize=128 >"
                  "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'perc_sharp': ("PerceptualSharpness blockSize=512 stepSize=128 >"
                       "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'perc_spread': ("PerceptualSpread blockSize=512 stepSize=128 >"
                        "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_crest': ("SpectralCrestFactorPerBand blockSize=512 "
                        "stepSize=128 > StatisticalIntegrator NbFrames=40 "
                        "StepNbFrames=8"),
        'spec_decr': ("SpectralDecrease blockSize=512 stepSize=128 >"
                      "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_flat': ("SpectralFlatness blockSize=512 stepSize=128 >"
                       "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_flat_band': ("SpectralFlatnessPerBand blockSize=512 "
                            "stepSize=128 > StatisticalIntegrator NbFrames=40 "
                            "StepNbFrames=8"),
        'spect_slope': ("SpectralSlope blockSize=512 stepSize=128 >"
                        "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_var': ("SpectralVariation blockSize=512 stepSize=128 >"
                      "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'temp_shape': ("TemporalShapeStatistics blockSize=512 stepSize=128 "
                       "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'zcr': ("ZCR blockSize=512 stepSize=128 > StatisticalIntegrator "
                "NbFrames=40 StepNbFrames=8"),
        'env_shape': ("EnvelopeShapeStatistics blockSize=512 stepSize=128"
                      " > StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'comp_onest': ("ComplexDomainOnsetDetection blockSize=512 "
                       "stepSize=128 > StatisticalIntegrator NbFrames=40 "
                       "StepNbFrames=8"),
        }

    def __init__(self, sample_rate, features=None):
        if features is None:
            features = self._features
        self.sample_rate = sample_rate
        self.initialize(features)
        self.features = features

    def initialize(self, feature_dict):
        """ Run the required boilerplate for yaafe """
        self.feature_dict = feature_dict
        self.fp = FeaturePlan(
            sample_rate=self.sample_rate,
            normalize=0.98)
        for name, desc in self.feature_dict.items():
            self.fp.addFeature("{0}: {1}".format(name, desc))
        self.df = self.fp.getDataFlow()
        self.engine = Engine()
        self.engine.load(self.df)
        self.afp = AudioFileProcessor()
        return self

    def save_fplan(self, name):
        """ Save a feature plan (text file) """
        text_file = open("{}.txt".format(name), 'w')
        for name, desc in self.features.items():
            text_file.write("{}: {}".format(name, desc))
        text_file.close()

    def process(self, audiofile):
        """ Process function for running a file through yaafe's
            feature extractor
        """
        self.afp.processFile(self.engine, audiofile)
        out = self.engine.readAllOutputs()
        self.engine.flush()
        return sorted(out)
  def __init__(self, **args):
    self.engine = Engine()
    self.afp = AudioFileProcessor()

    self.block_size = (1024 if not 'block_size' in args else args['block_size'])
    self.step_size  = (512 if not 'step_size' in args else args['step_size'])
Example #29
0
class Yaafe(EnhancedObject):
    '''Yaafe toolbox wrapper. To be used with Database object.

    Attributes:
        sample_rate     The Files' sample rate
        plan_filename   The Featue Plan filename

    Methods:
        process(audiofile)          Process audiofile and return features
        get_X(entries_list, feat)   Fetch array of processed data from Database
        get_y                       Fetch subdir i's from Database
    `
    Init:
        Yaafe(sample_rate, feature_plan)
    '''
    _features = {
        'spec_rolloff': ("SpectralRolloff blockSize=512 stepSize=128 "
                         "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spec_shape': ("SpectralShapeStatistics blockSize=512 stepSize=128 "
                       "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spec_flux': ("SpectralFlux blockSize=512 stepSize=128 >"
                      "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'amp_mod': ("AmplitudeModulation blockSize=512 stepSize=128 >"
                    "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'auto': ("AutoCorrelation  blockSize=512 stepSize=128 >"
                 "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'lpc': ("LPC  blockSize=512 stepSize=128 > StatisticalIntegrator "
                "NbFrames=40 StepNbFrames=8"),
        'loudness': ("Loudness blockSize=512 stepSize=128 >"
                     "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'mfcc': ("MFCC blockSize=512 stepSize=128 > StatisticalIntegrator "
                 "NbFrames=40 StepNbFrames=8"),
        'mel_spectrum': ("MelSpectrum blockSize=512, stepSize=128 >"
                         "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'obsi': ("OBSI blockSize=512 stepSize=128 > StatisticalIntegrator "
                 "NbFrames=40 StepNbFrames=8"),
        'obsir': ("OBSIR blockSize=512 stepSize=128 >"
                  "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'perc_sharp': ("PerceptualSharpness blockSize=512 stepSize=128 >"
                       "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'perc_spread': ("PerceptualSpread blockSize=512 stepSize=128 >"
                        "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_crest': ("SpectralCrestFactorPerBand blockSize=512 "
                        "stepSize=128 > StatisticalIntegrator NbFrames=40 "
                        "StepNbFrames=8"),
        'spec_decr': ("SpectralDecrease blockSize=512 stepSize=128 >"
                      "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_flat': ("SpectralFlatness blockSize=512 stepSize=128 >"
                       "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_flat_band': ("SpectralFlatnessPerBand blockSize=512 "
                            "stepSize=128 > StatisticalIntegrator NbFrames=40 "
                            "StepNbFrames=8"),
        'spect_slope': ("SpectralSlope blockSize=512 stepSize=128 >"
                        "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_var': ("SpectralVariation blockSize=512 stepSize=128 >"
                      "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'temp_shape': ("TemporalShapeStatistics blockSize=512 stepSize=128 "
                       "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'zcr': ("ZCR blockSize=512 stepSize=128 > StatisticalIntegrator "
                "NbFrames=40 StepNbFrames=8"),
        'env_shape': ("EnvelopeShapeStatistics blockSize=512 stepSize=128"
                      " > StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'comp_onest': ("ComplexDomainOnsetDetection blockSize=512 "
                       "stepSize=128 > StatisticalIntegrator NbFrames=40 "
                       "StepNbFrames=8"),
    }

    def __init__(self, sample_rate, features=None):
        if features is None:
            features = self._features
        self.sample_rate = sample_rate
        self.initialize(features)
        self.features = features

    def initialize(self, feature_dict):
        """ Run the required boilerplate for yaafe """
        self.feature_dict = feature_dict
        self.fp = FeaturePlan(sample_rate=self.sample_rate, normalize=0.98)
        for name, desc in self.feature_dict.items():
            self.fp.addFeature("{0}: {1}".format(name, desc))
        self.df = self.fp.getDataFlow()
        self.engine = Engine()
        self.engine.load(self.df)
        self.afp = AudioFileProcessor()
        return self

    def save_fplan(self, name):
        """ Save a feature plan (text file) """
        text_file = open("{}.txt".format(name), 'w')
        for name, desc in self.features.items():
            text_file.write("{}: {}".format(name, desc))
        text_file.close()

    def process(self, audiofile):
        """ Process function for running a file through yaafe's
            feature extractor
        """
        self.afp.processFile(self.engine, audiofile)
        out = self.engine.readAllOutputs()
        self.engine.flush()
        return sorted(out)
Example #30
0
def detect(wav_path, ans_path=None):
    sample_rate = 8000
    block_size = 1024
    step_size = block_size / 2
    n_band = block_size / 2
    freq_bound = [i * sample_rate / 2. / n_band for i in range(n_band + 1)]

    plan = FeaturePlan(sample_rate=sample_rate, resample=True)
    plan.addFeature('power_spectrum: PowerSpectrum blockSize=%d stepSize=%d' %
                    (block_size, step_size))
    dataflow = plan.getDataFlow()
    afp = AudioFileProcessor()
    engine = Engine()
    engine.load(dataflow)
    afp.processFile(engine, wav_path)
    spectrogram = engine.readOutput('power_spectrum')
    seq = []
    for spectrum in spectrogram:
        mean_mag = np.mean(spectrum)
        if mean_mag <= SILENT_MAG_THRESHOLD:
            seq.append(SILENT_KEY)
            continue
        lower_data = (-1, -1)
        upper_data = (-1, -1)

        for target_idx, target_freq in itertools.chain(enumerate(LOWER_FREQS),
                                                       enumerate(UPPER_FREQS)):
            idx = bisect.bisect(freq_bound, target_freq)
            assert idx > 0
            freq1 = freq_bound[idx - 1]
            mag1 = spectrum[idx - 1]
            freq2 = freq_bound[idx]
            mag2 = spectrum[idx]
            w1 = 1. * (freq2 - target_freq) / (freq2 - freq1)
            w2 = 1. - w1
            target_mag = (w1 * mag1 + w2 * mag2)

            if target_mag > mean_mag * 2:
                if target_freq < 1000:
                    if target_mag > lower_data[1]:
                        lower_data = (target_idx, target_mag)
                else:
                    if target_mag > upper_data[1]:
                        upper_data = (target_idx, target_mag)

        lower_idx = lower_data[0]
        upper_idx = upper_data[0]
        if lower_idx == -1 or upper_idx == -1:
            seq.append(UNKNOWN_KEY)
        else:
            seq.append(KEYS[lower_idx * len(LOWER_FREQS) + upper_idx])

    ans = purge_seq(seq)
    if ans_path is not None:
        with open(ans_path) as fh:
            for i, line in enumerate(fh):
                line = line.strip()
                assert line == ans[i], "%s != %s" % (line, ans[i])
                print '[%d] %s' % (i, line)
    else:
        for i, line in enumerate(ans):
            print '[%d] %s' % (i, line)
""" Stand-alone application to demonstrate yaafe's transformations """
import numpy as np
import matplotlib.pyplot as plt
from yaafelib import FeaturePlan, Engine, AudioFileProcessor


FPLAN = FeaturePlan(sample_rate=44100)
FPLAN.addFeature("mfcc: MFCC blockSize=512 stepSize=128")
FPLAN.addFeature("mfcc_stat: MFCC blockSize=512 stepSize=128 > "
                 "StatisticalIntegrator NbFrames=40 StepNbFrames=8")
ENGINE = Engine()
ENGINE.load(FPLAN.getDataFlow())
PROCESSOR = AudioFileProcessor()

PROCESSOR.processFile(ENGINE, 'track.wav')
DATA = ENGINE.readAllOutputs()
ENGINE.flush()

X_MFCC = DATA['mfcc']
X_MFCC_STAT = DATA['mfcc_stat']

MFCC_DESC = list()
for i in range(1, 14):
    desc = "Average for Band {}".format(i)
    MFCC_DESC.append(desc)
for i in range(1, 14):
    desc = "Standard Dev. for Band {}".format(i)
    MFCC_DESC.append(desc)

plt.ion()
FIG = plt.figure()
Example #32
0
""" Stand-alone application to demonstrate yaafe's transformations """
import numpy as np
import matplotlib.pyplot as plt
from yaafelib import FeaturePlan, Engine, AudioFileProcessor

FPLAN = FeaturePlan(sample_rate=44100)
FPLAN.addFeature("mfcc: MFCC blockSize=512 stepSize=128")
FPLAN.addFeature("mfcc_stat: MFCC blockSize=512 stepSize=128 > "
                 "StatisticalIntegrator NbFrames=40 StepNbFrames=8")
ENGINE = Engine()
ENGINE.load(FPLAN.getDataFlow())
PROCESSOR = AudioFileProcessor()

PROCESSOR.processFile(ENGINE, 'track.wav')
DATA = ENGINE.readAllOutputs()
ENGINE.flush()

X_MFCC = DATA['mfcc']
X_MFCC_STAT = DATA['mfcc_stat']

MFCC_DESC = list()
for i in range(1, 14):
    desc = "Average for Band {}".format(i)
    MFCC_DESC.append(desc)
for i in range(1, 14):
    desc = "Standard Dev. for Band {}".format(i)
    MFCC_DESC.append(desc)

plt.ion()
FIG = plt.figure()
FIG.set_size_inches(14, 8)
Example #33
0
def detect(wav_path, ans_path=None):
    sample_rate = 8000
    block_size = 1024
    step_size = block_size / 2
    n_band = block_size / 2
    freq_bound = [i * sample_rate / 2. / n_band for i in range(n_band + 1)]

    plan = FeaturePlan(sample_rate=sample_rate, resample=True)
    plan.addFeature(
        'power_spectrum: PowerSpectrum blockSize=%d stepSize=%d' % (
            block_size, step_size
        )
    )
    dataflow = plan.getDataFlow()
    afp = AudioFileProcessor()
    engine = Engine()
    engine.load(dataflow)
    afp.processFile(engine, wav_path)
    spectrogram = engine.readOutput('power_spectrum')
    seq = []
    for spectrum in spectrogram:
        mean_mag = np.mean(spectrum)
        if mean_mag <= SILENT_MAG_THRESHOLD:
            seq.append(SILENT_KEY)
            continue
        lower_data = (-1, -1)
        upper_data = (-1, -1)

        for target_idx, target_freq in itertools.chain(
            enumerate(LOWER_FREQS), enumerate(UPPER_FREQS)
        ):
            idx = bisect.bisect(freq_bound, target_freq)
            assert idx > 0
            freq1 = freq_bound[idx - 1]
            mag1 = spectrum[idx - 1]
            freq2 = freq_bound[idx]
            mag2 = spectrum[idx]
            w1 = 1. * (freq2 - target_freq) / (freq2 - freq1)
            w2 = 1. - w1
            target_mag = (w1 * mag1 + w2 * mag2)

            if target_mag > mean_mag * 2:
                if target_freq < 1000:
                    if target_mag > lower_data[1]:
                        lower_data = (target_idx, target_mag)
                else:
                    if target_mag > upper_data[1]:
                        upper_data = (target_idx, target_mag)

        lower_idx = lower_data[0]
        upper_idx = upper_data[0]
        if lower_idx == -1 or upper_idx == -1:
            seq.append(UNKNOWN_KEY)
        else:
            seq.append(KEYS[lower_idx * len(LOWER_FREQS) + upper_idx])

    ans = purge_seq(seq)
    if ans_path is not None:
        with open(ans_path) as fh:
            for i, line in enumerate(fh):
                line = line.strip()
                assert line == ans[i], "%s != %s" % (line, ans[i])
                print '[%d] %s' % (i, line)
    else:
        for i, line in enumerate(ans):
            print '[%d] %s' % (i, line)