Exemple #1
0
    def _ExtractAll(self, audio_location, sample_rate):
        # Build a dataflow object using FeaturePlan
        fp = FeaturePlan(sample_rate=sample_rate)

        # Using *.addFeature() multiple extractions can be called with a
        # single call
        fp.addFeature('zcr: ZCR')
        fp.addFeature('mfcc: MFCC')
        fp.addFeature('mfcc_D1: MFCC > Derivate DOrder=1')
        #fp.addFeature('mfcc_D2: MFCC > Derivate DOrder=2')
        fp.addFeature('flux: SpectralFlux')
        fp.addFeature('energy: Energy')
        fp.addFeature('loudness: Loudness')
        fp.addFeature('obsi: OBSI')
        fp.addFeature('sharpness: PerceptualSharpness')
        fp.addFeature('spread: PerceptualSpread')
        fp.addFeature('rolloff: SpectralRolloff')
        fp.addFeature('variation: SpectralVariation')
        # Get dataflow
        df = fp.getDataFlow()

        # Configure engine
        engine = Engine()
        engine.load(df)

        # extract features from audio using AudioFileProcessor
        afp = AudioFileProcessor()
        afp.processFile(engine, audio_location)

        # features array holds all the extracted features
        features = engine.readAllOutputs()
        #print features["zcr"]
        # returns the array of features extracted
        return features
class MFCCExtractor:
  def __init__(self, **args):
    self.engine = Engine()
    self.afp = AudioFileProcessor()

    self.block_size = (1024 if not 'block_size' in args else args['block_size'])
    self.step_size  = (512 if not 'step_size' in args else args['step_size'])

  def extract(self, path, **args):
    self.path = path
    print path
    self.wave = wave.open(path, 'r')
    self.rate = self.wave.getframerate()

    fp = FeaturePlan(sample_rate=self.rate)
    fp.addFeature('mfcc: MFCC blockSize={} stepSize={}'.format(self.block_size, self.step_size))
    self.engine.load(fp.getDataFlow())
    self.afp.processFile(self.engine, path)

    feats = self.engine.readAllOutputs()

    if 'save_to_disk' in args:
      self.__save_to_disk(feats['mfcc'])

    return feats['mfcc']

  def __save_to_disk(self, feats):
    new_path = self.path + ".mfc.csv"
    np.savetxt(new_path, feats, delimiter=",")
Exemple #3
0
    def _energy(audio_location, sample_rate):
        # This function behaves the same as 'python yaafe.py -r SAMPLERATE -f \
        #               "energy: Energy PARAMETERS" WAV-LOCATION'
        # SAMPLERATE : Samplerate of the file being processed
        #- blockSize (default=1024): output frames size
        #- stepSize (default=512): step between consecutive frames

        # Build a dataflow object using FeaturePlan
        # blockSize, stepSize could be added too. 1024, 512 default
        fp = FeaturePlan(sample_rate=sample_rate)

        # Using *.addFeature() multiple extractions can be called with a
        # single call
        fp.addFeature('Energy: Energy')
        #('energy: Energy blockSize=1024 stepSize=512')

        # Get dataflow
        df = fp.getDataFlow()

        # Configure engine
        engine = Engine()
        engine.load(df)

        # extract features from audio using AudioFileProcessor
        afp = AudioFileProcessor()
        afp.processFile(engine, audio_location)

        # features array holds all the extracted features
        features = engine.readAllOutputs()

        # returns the array of features extracted
        return features
class MFCCExtractor:
    def __init__(self, **args):
        self.engine = Engine()
        self.afp = AudioFileProcessor()

        self.block_size = (1024
                           if not 'block_size' in args else args['block_size'])
        self.step_size = (512
                          if not 'step_size' in args else args['step_size'])

    def extract(self, path, **args):
        self.path = path
        print path
        self.wave = wave.open(path, 'r')
        self.rate = self.wave.getframerate()

        fp = FeaturePlan(sample_rate=self.rate)
        fp.addFeature('mfcc: MFCC blockSize={} stepSize={}'.format(
            self.block_size, self.step_size))
        self.engine.load(fp.getDataFlow())
        self.afp.processFile(self.engine, path)

        feats = self.engine.readAllOutputs()

        if 'save_to_disk' in args:
            self.__save_to_disk(feats['mfcc'])

        return feats['mfcc']

    def __save_to_disk(self, feats):
        new_path = self.path + ".mfc.csv"
        np.savetxt(new_path, feats, delimiter=",")
Exemple #5
0
 def _ExtractZCRAndFlux(self, audio_location, sample_rate):
     fp = FeaturePlan(sample_rate=sample_rate)
     fp.addFeature('zcr: ZCR')
     fp.addFeature('flux: SpectralFlux')
     df = fp.getDataFlow()
     engine = Engine()
     engine.load(df)
     afp = AudioFileProcessor()
     afp.processFile(engine, audio_location)
     features = engine.readAllOutputs()
     return features
Exemple #6
0
def startEngine(path):

    global features

    engine = Engine()
    engine.load(df)

    afp  = AudioFileProcessor()
    afp.processFile(engine,path)
    features = engine.readAllOutputs()

    return 'Yaafe引擎启动成功'
Exemple #7
0
    def _zcr(self, audio_location, sample_rate):
        # This function behave the same as 'python yaafe.py -r SAMPLERATE -f \
        #               "zcr: ZCR blockSize=1024  stepSize=512" WAV-LOCATION'
        # SAMPLERATE = samplerate of the file being processed
        # zcr        = name for the process (zcr1, zcr2... )
        # ZCR        = the feature that is being extracted
        # blockSize  = output frames size
        # stepSize   = step between consecutive frames

        # Build a dataflow object using FeaturePlan
        # blockSize, stepSize could be added too. 1024, 512 default
        fp = FeaturePlan(sample_rate=sample_rate)

        # Using *.addFeature() multiple extractions can be called with a
        # single call
        fp.addFeature('ZCR: ZCR')

        # Get dataflow
        df = fp.getDataFlow()

        # Or load it from a file
        # df = DataFlow()
        # df.load(dataflow_file)

        # Configure engine
        engine = Engine()
        engine.load(df)

        # extract features from audio using AudioFileProcessor
        afp = AudioFileProcessor()
        afp.processFile(engine, audio_location)

        # features array holds all the extracted features
        features = engine.readAllOutputs()

        # extract features from an audio file and write results to csv files
        # afp.setOutputFormat('csv','output',{'Precision':'8'})
        # afp.processFile(engine,audiofile)
        #  this creates output/myaudio.wav.mfcc.csv,
        #               output/myaudio.wav.mfcc_d1.csv and
        #               output/myaudio.wav.mfcc_d2.csv files.

        # Clear the engine so it can be used again
        #engine.reset()

        # returns the array of features extracted
        return features
Exemple #8
0
class Extractor(object):
	def __init__(self):
		self.fp = FeaturePlan(sample_rate=44100)
		self.fp.addFeature('feat: ')
		self.engine = Engine()
		self.engine.load(self.fp.getDataFlow())
		self.afp = AudioFileProcessor()
		
		self.mfcc = []
		self.songs = []
		
	def recurse(self, directory):
		results = []
		for root, dirs, files in os.walk(directory):
			for f in files:
				current_file = os.path.join(root, f)
				if current_file.endswith('wav') and current_file not in results:
					results.append(current_file)
		
		print results
		return results
	
	def extract_feature(self, f):
		self.afp.processFile(self.engine, f)
		feats = self.engine.readAllOutputs()
		return feats['feat']

		
	def build_feature_space(self):
		self.space = FeatureSpace('mfcc')
		
		last_feature = None
		for f in self.recurse(sys.argv[1]):
			data = self.extract_feature(f)
			temp_feature = Feature(f, ['mfcc', data])
			self.space.add(temp_feature)
			last_feature = temp_feature
		
		print self.space.min_dist(last_feature)
Exemple #9
0
    def _mfcc(self, audio_location, sample_rate):
        # This function behaves the same as 'python yaafe.py -r SAMPLERATE -f \
        #               "mfcc: MFCC PARAMETERS" WAV-LOCATION'
        # SAMPLERATE : Samplerate of the file being processed
        #- CepsIgnoreFirstCoeff (default=1): 0 keeps the first cepstral coeffcient, 1 ignore it
        #- CepsNbCoeffs (default=13): Number of cepstral coefficient to keep.
        #- FFTWindow (default=Hanning): Weighting window to apply before fft. Hanning|Hamming|None
        #- MelMaxFreq (default=6854.0): Maximum frequency of the mel filter bank
        #- MelMinFreq (default=130.0): Minimum frequency of the mel filter bank
        #- MelNbFilters (default=40): Number of mel filters
        #- blockSize (default=1024): output frames size
        #- stepSize (default=512): step between consecutive frames

        # Build a dataflow object using FeaturePlan
        fp = FeaturePlan(sample_rate=sample_rate)

        # Using *.addFeature() multiple extractions can be called with a
        # single call
        fp.addFeature('mfcc: MFCC')
        #('mfcc: MFCC CepsIgnoreFirstCoeff=0 \
        #CepsNbCoeffs=13 FFTWindow=Hanning MelMaxFreq=6854\
        #MelMinFreq=130 MelNbFilters=40 blockSize=1024 stepSize=512')

        # Get dataflow
        df = fp.getDataFlow()

        engine = Engine()
        engine.load(df)

        # extract features from audio using AudioFileProcessor
        afp = AudioFileProcessor()
        afp.processFile(engine, audio_location)

        # features array holds all the extracted features
        features = engine.readAllOutputs()

        # returns the array of features extracted
        return features
Exemple #10
0
    def _SpectralFlux(self, audio_location, sample_rate):
        # This function behaves the same as 'python yaafe.py -r SAMPLERATE -f \
        #               "flux: SpectralFlux PARAMETERS" WAV-LOCATION'
        # SAMPLERATE : Samplerate of the file being processed
        # - FFTLength (default=0): Frame's length on which perform FFT. Original
        #   frame is padded with zeros or truncated to reach this size. If 0 then
        #   use original frame length.
        # - FFTWindow (default=Hanning): Weighting window to apply before fft. Hanning|Hamming|None
        # - FluxSupport (default=All): support of flux computation. if 'All' then
        #   use all bins (default), if 'Increase' then use only bins which are increasing
        # - blockSize (default=1024): output frames size
        # - stepSize (default=512): step between consecutive frames

        # Build a dataflow object using FeaturePlan
        fp = FeaturePlan(sample_rate=sample_rate)

        # Using *.addFeature() multiple extractions can be called with a
        # single call
        fp.addFeature('Flux: SpectralFlux')
        #('flux: SpectralFlux FFTLength=0 FFTWindow=Hanning FluxSupport=All\
        # blockSize=1024 stepSize=512')

        # Get dataflow
        df = fp.getDataFlow()

        # Configure engine
        engine = Engine()
        engine.load(df)

        # extract features from audio using AudioFileProcessor
        afp = AudioFileProcessor()
        afp.processFile(engine, audio_location)

        # features array holds all the extracted features
        features = engine.readAllOutputs()

        # returns the array of features extracted
        return features
Exemple #11
0
class Yaafe(EnhancedObject):
    '''Yaafe toolbox wrapper. To be used with Database object.

    Attributes:
        sample_rate     The Files' sample rate
        plan_filename   The Featue Plan filename

    Methods:
        process(audiofile)          Process audiofile and return features
        get_X(entries_list, feat)   Fetch array of processed data from Database
        get_y                       Fetch subdir i's from Database
    `
    Init:
        Yaafe(sample_rate, feature_plan)
    '''
    _features = {
        'spec_rolloff': ("SpectralRolloff blockSize=512 stepSize=128 "
                         "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spec_shape': ("SpectralShapeStatistics blockSize=512 stepSize=128 "
                       "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spec_flux': ("SpectralFlux blockSize=512 stepSize=128 >"
                      "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'amp_mod': ("AmplitudeModulation blockSize=512 stepSize=128 >"
                    "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'auto': ("AutoCorrelation  blockSize=512 stepSize=128 >"
                 "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'lpc': ("LPC  blockSize=512 stepSize=128 > StatisticalIntegrator "
                "NbFrames=40 StepNbFrames=8"),
        'loudness': ("Loudness blockSize=512 stepSize=128 >"
                     "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'mfcc': ("MFCC blockSize=512 stepSize=128 > StatisticalIntegrator "
                 "NbFrames=40 StepNbFrames=8"),
        'mel_spectrum': ("MelSpectrum blockSize=512, stepSize=128 >"
                         "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'obsi': ("OBSI blockSize=512 stepSize=128 > StatisticalIntegrator "
                 "NbFrames=40 StepNbFrames=8"),
        'obsir': ("OBSIR blockSize=512 stepSize=128 >"
                  "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'perc_sharp': ("PerceptualSharpness blockSize=512 stepSize=128 >"
                       "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'perc_spread': ("PerceptualSpread blockSize=512 stepSize=128 >"
                        "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_crest': ("SpectralCrestFactorPerBand blockSize=512 "
                        "stepSize=128 > StatisticalIntegrator NbFrames=40 "
                        "StepNbFrames=8"),
        'spec_decr': ("SpectralDecrease blockSize=512 stepSize=128 >"
                      "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_flat': ("SpectralFlatness blockSize=512 stepSize=128 >"
                       "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_flat_band': ("SpectralFlatnessPerBand blockSize=512 "
                            "stepSize=128 > StatisticalIntegrator NbFrames=40 "
                            "StepNbFrames=8"),
        'spect_slope': ("SpectralSlope blockSize=512 stepSize=128 >"
                        "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_var': ("SpectralVariation blockSize=512 stepSize=128 >"
                      "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'temp_shape': ("TemporalShapeStatistics blockSize=512 stepSize=128 "
                       "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'zcr': ("ZCR blockSize=512 stepSize=128 > StatisticalIntegrator "
                "NbFrames=40 StepNbFrames=8"),
        'env_shape': ("EnvelopeShapeStatistics blockSize=512 stepSize=128"
                      " > StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'comp_onest': ("ComplexDomainOnsetDetection blockSize=512 "
                       "stepSize=128 > StatisticalIntegrator NbFrames=40 "
                       "StepNbFrames=8"),
    }

    def __init__(self, sample_rate, features=None):
        if features is None:
            features = self._features
        self.sample_rate = sample_rate
        self.initialize(features)
        self.features = features

    def initialize(self, feature_dict):
        """ Run the required boilerplate for yaafe """
        self.feature_dict = feature_dict
        self.fp = FeaturePlan(sample_rate=self.sample_rate, normalize=0.98)
        for name, desc in self.feature_dict.items():
            self.fp.addFeature("{0}: {1}".format(name, desc))
        self.df = self.fp.getDataFlow()
        self.engine = Engine()
        self.engine.load(self.df)
        self.afp = AudioFileProcessor()
        return self

    def save_fplan(self, name):
        """ Save a feature plan (text file) """
        text_file = open("{}.txt".format(name), 'w')
        for name, desc in self.features.items():
            text_file.write("{}: {}".format(name, desc))
        text_file.close()

    def process(self, audiofile):
        """ Process function for running a file through yaafe's
            feature extractor
        """
        self.afp.processFile(self.engine, audiofile)
        out = self.engine.readAllOutputs()
        self.engine.flush()
        return sorted(out)
""" Stand-alone application to demonstrate yaafe's transformations """
import numpy as np
import matplotlib.pyplot as plt
from yaafelib import FeaturePlan, Engine, AudioFileProcessor


FPLAN = FeaturePlan(sample_rate=44100)
FPLAN.addFeature("mfcc: MFCC blockSize=512 stepSize=128")
FPLAN.addFeature("mfcc_stat: MFCC blockSize=512 stepSize=128 > "
                 "StatisticalIntegrator NbFrames=40 StepNbFrames=8")
ENGINE = Engine()
ENGINE.load(FPLAN.getDataFlow())
PROCESSOR = AudioFileProcessor()

PROCESSOR.processFile(ENGINE, 'track.wav')
DATA = ENGINE.readAllOutputs()
ENGINE.flush()

X_MFCC = DATA['mfcc']
X_MFCC_STAT = DATA['mfcc_stat']

MFCC_DESC = list()
for i in range(1, 14):
    desc = "Average for Band {}".format(i)
    MFCC_DESC.append(desc)
for i in range(1, 14):
    desc = "Standard Dev. for Band {}".format(i)
    MFCC_DESC.append(desc)

plt.ion()
FIG = plt.figure()
class Yaafe(EnhancedObject):
    '''Yaafe toolbox wrapper. To be used with Database object.

    Attributes:
        sample_rate     The Files' sample rate
        plan_filename   The Featue Plan filename

    Methods:
        process(audiofile)          Process audiofile and return features
        get_X(entries_list, feat)   Fetch array of processed data from Database
        get_y                       Fetch subdir i's from Database
    `
    Init:
        Yaafe(sample_rate, feature_plan)
    '''
    _features = {
        'spec_rolloff': ("SpectralRolloff blockSize=512 stepSize=128 "
                         "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spec_shape': ("SpectralShapeStatistics blockSize=512 stepSize=128 "
                       "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spec_flux': ("SpectralFlux blockSize=512 stepSize=128 >"
                      "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'amp_mod': ("AmplitudeModulation blockSize=512 stepSize=128 >"
                    "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'auto': ("AutoCorrelation  blockSize=512 stepSize=128 >"
                 "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'lpc': ("LPC  blockSize=512 stepSize=128 > StatisticalIntegrator "
                "NbFrames=40 StepNbFrames=8"),
        'loudness': ("Loudness blockSize=512 stepSize=128 >"
                     "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'mfcc': ("MFCC blockSize=512 stepSize=128 > StatisticalIntegrator "
                 "NbFrames=40 StepNbFrames=8"),
        'mel_spectrum': ("MelSpectrum blockSize=512, stepSize=128 >"
                         "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'obsi': ("OBSI blockSize=512 stepSize=128 > StatisticalIntegrator "
                 "NbFrames=40 StepNbFrames=8"),
        'obsir': ("OBSIR blockSize=512 stepSize=128 >"
                  "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'perc_sharp': ("PerceptualSharpness blockSize=512 stepSize=128 >"
                       "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'perc_spread': ("PerceptualSpread blockSize=512 stepSize=128 >"
                        "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_crest': ("SpectralCrestFactorPerBand blockSize=512 "
                        "stepSize=128 > StatisticalIntegrator NbFrames=40 "
                        "StepNbFrames=8"),
        'spec_decr': ("SpectralDecrease blockSize=512 stepSize=128 >"
                      "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_flat': ("SpectralFlatness blockSize=512 stepSize=128 >"
                       "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_flat_band': ("SpectralFlatnessPerBand blockSize=512 "
                            "stepSize=128 > StatisticalIntegrator NbFrames=40 "
                            "StepNbFrames=8"),
        'spect_slope': ("SpectralSlope blockSize=512 stepSize=128 >"
                        "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_var': ("SpectralVariation blockSize=512 stepSize=128 >"
                      "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'temp_shape': ("TemporalShapeStatistics blockSize=512 stepSize=128 "
                       "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'zcr': ("ZCR blockSize=512 stepSize=128 > StatisticalIntegrator "
                "NbFrames=40 StepNbFrames=8"),
        'env_shape': ("EnvelopeShapeStatistics blockSize=512 stepSize=128"
                      " > StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'comp_onest': ("ComplexDomainOnsetDetection blockSize=512 "
                       "stepSize=128 > StatisticalIntegrator NbFrames=40 "
                       "StepNbFrames=8"),
        }

    def __init__(self, sample_rate, features=None):
        if features is None:
            features = self._features
        self.sample_rate = sample_rate
        self.initialize(features)
        self.features = features

    def initialize(self, feature_dict):
        """ Run the required boilerplate for yaafe """
        self.feature_dict = feature_dict
        self.fp = FeaturePlan(
            sample_rate=self.sample_rate,
            normalize=0.98)
        for name, desc in self.feature_dict.items():
            self.fp.addFeature("{0}: {1}".format(name, desc))
        self.df = self.fp.getDataFlow()
        self.engine = Engine()
        self.engine.load(self.df)
        self.afp = AudioFileProcessor()
        return self

    def save_fplan(self, name):
        """ Save a feature plan (text file) """
        text_file = open("{}.txt".format(name), 'w')
        for name, desc in self.features.items():
            text_file.write("{}: {}".format(name, desc))
        text_file.close()

    def process(self, audiofile):
        """ Process function for running a file through yaafe's
            feature extractor
        """
        self.afp.processFile(self.engine, audiofile)
        out = self.engine.readAllOutputs()
        self.engine.flush()
        return sorted(out)
Exemple #14
0
""" Stand-alone application to demonstrate yaafe's transformations """
import numpy as np
import matplotlib.pyplot as plt
from yaafelib import FeaturePlan, Engine, AudioFileProcessor

FPLAN = FeaturePlan(sample_rate=44100)
FPLAN.addFeature("mfcc: MFCC blockSize=512 stepSize=128")
FPLAN.addFeature("mfcc_stat: MFCC blockSize=512 stepSize=128 > "
                 "StatisticalIntegrator NbFrames=40 StepNbFrames=8")
ENGINE = Engine()
ENGINE.load(FPLAN.getDataFlow())
PROCESSOR = AudioFileProcessor()

PROCESSOR.processFile(ENGINE, 'track.wav')
DATA = ENGINE.readAllOutputs()
ENGINE.flush()

X_MFCC = DATA['mfcc']
X_MFCC_STAT = DATA['mfcc_stat']

MFCC_DESC = list()
for i in range(1, 14):
    desc = "Average for Band {}".format(i)
    MFCC_DESC.append(desc)
for i in range(1, 14):
    desc = "Standard Dev. for Band {}".format(i)
    MFCC_DESC.append(desc)

plt.ion()
FIG = plt.figure()
FIG.set_size_inches(14, 8)