Example #1
0
File: frames.py Project: EQ4/Yaafe
def main(argv):
    if len(argv) != 2:
        print "usage: python %s foo.mp3" % (argv[0])
        return
    fp = FeaturePlan(sample_rate=SAMPLE_RATE, resample=True, time_start=TIME_START, time_limit=TIME_LIMIT)
    fp.addFeature("frames: Frames blockSize={0} stepSize={1}".format(SLICE_WINSIZE, SLICE_STEPSIZE))
    if "YAAFE_PATH" in os.environ:
        fp.addFeature(
            "beat_hist: BeatHistogramSummary ACPNbPeaks=3  BHSBeatFrameSize=128  BHSBeatFrameStep=64  "
            "BHSHistogramFrameSize=40  BHSHistogramFrameStep=40  FFTLength=0  FFTWindow=Hanning  "
            "HInf=40  HNbBins=80  HSup=200  NMANbFrames=5000  blockSize=1024  stepSize=512"
        )
    df = fp.getDataFlow()
    engine = Engine()
    engine.load(df)
    afp = AudioFileProcessor()

    song_path = argv[1]
    assert os.path.exists(song_path)
    afp.processFile(engine, song_path)
    frames = engine.readOutput("frames")
    frames = np.concatenate(frames)
    print "time start: %ss" % TIME_START
    print "time limit: %ss" % TIME_LIMIT
    print "duration:", 1.0 * frames.size / SAMPLE_RATE

    if "YAAFE_PATH" in os.environ:
        beat_hist = engine.readOutput("beat_hist")
        print "beat_hist: %s" % beat_hist
class MFCCExtractor:
  def __init__(self, **args):
    self.engine = Engine()
    self.afp = AudioFileProcessor()

    self.block_size = (1024 if not 'block_size' in args else args['block_size'])
    self.step_size  = (512 if not 'step_size' in args else args['step_size'])

  def extract(self, path, **args):
    self.path = path
    print path
    self.wave = wave.open(path, 'r')
    self.rate = self.wave.getframerate()

    fp = FeaturePlan(sample_rate=self.rate)
    fp.addFeature('mfcc: MFCC blockSize={} stepSize={}'.format(self.block_size, self.step_size))
    self.engine.load(fp.getDataFlow())
    self.afp.processFile(self.engine, path)

    feats = self.engine.readAllOutputs()

    if 'save_to_disk' in args:
      self.__save_to_disk(feats['mfcc'])

    return feats['mfcc']

  def __save_to_disk(self, feats):
    new_path = self.path + ".mfc.csv"
    np.savetxt(new_path, feats, delimiter=",")
Example #3
0
    def _ExtractAll(self, audio_location, sample_rate):
        # Build a dataflow object using FeaturePlan
        fp = FeaturePlan(sample_rate=sample_rate)

        # Using *.addFeature() multiple extractions can be called with a
        # single call
        fp.addFeature('zcr: ZCR')
        fp.addFeature('mfcc: MFCC')
        fp.addFeature('mfcc_D1: MFCC > Derivate DOrder=1')
        #fp.addFeature('mfcc_D2: MFCC > Derivate DOrder=2')
        fp.addFeature('flux: SpectralFlux')
        fp.addFeature('energy: Energy')
        fp.addFeature('loudness: Loudness')
        fp.addFeature('obsi: OBSI')
        fp.addFeature('sharpness: PerceptualSharpness')
        fp.addFeature('spread: PerceptualSpread')
        fp.addFeature('rolloff: SpectralRolloff')
        fp.addFeature('variation: SpectralVariation')
        # Get dataflow
        df = fp.getDataFlow()

        # Configure engine
        engine = Engine()
        engine.load(df)

        # extract features from audio using AudioFileProcessor
        afp = AudioFileProcessor()
        afp.processFile(engine, audio_location)

        # features array holds all the extracted features
        features = engine.readAllOutputs()
        #print features["zcr"]
        # returns the array of features extracted
        return features
Example #4
0
    def _energy(audio_location, sample_rate):
        # This function behaves the same as 'python yaafe.py -r SAMPLERATE -f \
        #               "energy: Energy PARAMETERS" WAV-LOCATION'
        # SAMPLERATE : Samplerate of the file being processed
        #- blockSize (default=1024): output frames size
        #- stepSize (default=512): step between consecutive frames

        # Build a dataflow object using FeaturePlan
        # blockSize, stepSize could be added too. 1024, 512 default
        fp = FeaturePlan(sample_rate=sample_rate)

        # Using *.addFeature() multiple extractions can be called with a
        # single call
        fp.addFeature('Energy: Energy')
        #('energy: Energy blockSize=1024 stepSize=512')

        # Get dataflow
        df = fp.getDataFlow()

        # Configure engine
        engine = Engine()
        engine.load(df)

        # extract features from audio using AudioFileProcessor
        afp = AudioFileProcessor()
        afp.processFile(engine, audio_location)

        # features array holds all the extracted features
        features = engine.readAllOutputs()

        # returns the array of features extracted
        return features
Example #5
0
def main(argv):
    if len(argv) != 2:
        print 'usage: python %s foo.mp3' % (argv[0])
        return
    fp = FeaturePlan(sample_rate=SAMPLE_RATE,
                     resample=True,
                     time_start=TIME_START,
                     time_limit=TIME_LIMIT)
    fp.addFeature("frames: Frames blockSize={0} stepSize={1}".format(
        SLICE_WINSIZE, SLICE_STEPSIZE))
    if 'YAAFE_PATH' in os.environ:
        fp.addFeature(
            "beat_hist: BeatHistogramSummary ACPNbPeaks=3  BHSBeatFrameSize=128  BHSBeatFrameStep=64  "
            "BHSHistogramFrameSize=40  BHSHistogramFrameStep=40  FFTLength=0  FFTWindow=Hanning  "
            "HInf=40  HNbBins=80  HSup=200  NMANbFrames=5000  blockSize=1024  stepSize=512"
        )
    df = fp.getDataFlow()
    engine = Engine()
    engine.load(df)
    afp = AudioFileProcessor()

    song_path = argv[1]
    assert os.path.exists(song_path)
    afp.processFile(engine, song_path)
    frames = engine.readOutput('frames')
    frames = np.concatenate(frames)
    print 'time start: %ss' % TIME_START
    print 'time limit: %ss' % TIME_LIMIT
    print 'duration:', 1. * frames.size / SAMPLE_RATE

    if 'YAAFE_PATH' in os.environ:
        beat_hist = engine.readOutput('beat_hist')
        print 'beat_hist: %s' % beat_hist
class MFCCExtractor:
    def __init__(self, **args):
        self.engine = Engine()
        self.afp = AudioFileProcessor()

        self.block_size = (1024
                           if not 'block_size' in args else args['block_size'])
        self.step_size = (512
                          if not 'step_size' in args else args['step_size'])

    def extract(self, path, **args):
        self.path = path
        print path
        self.wave = wave.open(path, 'r')
        self.rate = self.wave.getframerate()

        fp = FeaturePlan(sample_rate=self.rate)
        fp.addFeature('mfcc: MFCC blockSize={} stepSize={}'.format(
            self.block_size, self.step_size))
        self.engine.load(fp.getDataFlow())
        self.afp.processFile(self.engine, path)

        feats = self.engine.readAllOutputs()

        if 'save_to_disk' in args:
            self.__save_to_disk(feats['mfcc'])

        return feats['mfcc']

    def __save_to_disk(self, feats):
        new_path = self.path + ".mfc.csv"
        np.savetxt(new_path, feats, delimiter=",")
Example #7
0
def startEngine(path):
    global afp, features
    afp = AudioFileProcessor()
    afp.processFile(engine,path)       

    features = engine.readAllOutputs() # matrix of all features

    return 'extracted'
    def __init__(self, **args):
        self.engine = Engine()
        self.afp = AudioFileProcessor()

        self.block_size = (1024
                           if not 'block_size' in args else args['block_size'])
        self.step_size = (512
                          if not 'step_size' in args else args['step_size'])
def startEngine(path):
    global afp, features
    afp = AudioFileProcessor()
    afp.processFile(engine,path)       # 从mp3文件中提取特征,它必需提供engine配置

    features = engine.readAllOutputs() # 得到所有特征矩阵

    return 'Yaafe提取成功'
Example #10
0
def compute_spec(song_path):
    fp = FeaturePlan(sample_rate=22050, resample=True)
    #add one feature
    fp.addFeature("spec: PowerSpectrum blockSize=1024 stepSize=512")
    df = fp.getDataFlow()
    engine = Engine()
    engine.load(df)
    afp = AudioFileProcessor()
    afp.processFile(engine, song_path)
    return engine.readOutput('spec')
Example #11
0
def compute_spec(song_path):
    fp = FeaturePlan(sample_rate=22050, resample=True)
    #add one feature
    fp.addFeature("spec: PowerSpectrum blockSize=1024 stepSize=512")
    df = fp.getDataFlow()
    engine = Engine()
    engine.load(df)
    afp = AudioFileProcessor()
    afp.processFile(engine, song_path)
    return engine.readOutput('spec')
Example #12
0
 def initialize(self, feature_dict):
     """ Run the required boilerplate for yaafe """
     self.feature_dict = feature_dict
     self.fp = FeaturePlan(sample_rate=self.sample_rate, normalize=0.98)
     for name, desc in self.feature_dict.items():
         self.fp.addFeature("{0}: {1}".format(name, desc))
     self.df = self.fp.getDataFlow()
     self.engine = Engine()
     self.engine.load(self.df)
     self.afp = AudioFileProcessor()
     return self
Example #13
0
 def _ExtractZCRAndFlux(self, audio_location, sample_rate):
     fp = FeaturePlan(sample_rate=sample_rate)
     fp.addFeature('zcr: ZCR')
     fp.addFeature('flux: SpectralFlux')
     df = fp.getDataFlow()
     engine = Engine()
     engine.load(df)
     afp = AudioFileProcessor()
     afp.processFile(engine, audio_location)
     features = engine.readAllOutputs()
     return features
Example #14
0
def startEngine(path):

    global features

    engine = Engine()
    engine.load(df)

    afp  = AudioFileProcessor()
    afp.processFile(engine,path)
    features = engine.readAllOutputs()

    return 'Yaafe引擎启动成功'
Example #15
0
    def _zcr(self, audio_location, sample_rate):
        # This function behave the same as 'python yaafe.py -r SAMPLERATE -f \
        #               "zcr: ZCR blockSize=1024  stepSize=512" WAV-LOCATION'
        # SAMPLERATE = samplerate of the file being processed
        # zcr        = name for the process (zcr1, zcr2... )
        # ZCR        = the feature that is being extracted
        # blockSize  = output frames size
        # stepSize   = step between consecutive frames

        # Build a dataflow object using FeaturePlan
        # blockSize, stepSize could be added too. 1024, 512 default
        fp = FeaturePlan(sample_rate=sample_rate)

        # Using *.addFeature() multiple extractions can be called with a
        # single call
        fp.addFeature('ZCR: ZCR')

        # Get dataflow
        df = fp.getDataFlow()

        # Or load it from a file
        # df = DataFlow()
        # df.load(dataflow_file)

        # Configure engine
        engine = Engine()
        engine.load(df)

        # extract features from audio using AudioFileProcessor
        afp = AudioFileProcessor()
        afp.processFile(engine, audio_location)

        # features array holds all the extracted features
        features = engine.readAllOutputs()

        # extract features from an audio file and write results to csv files
        # afp.setOutputFormat('csv','output',{'Precision':'8'})
        # afp.processFile(engine,audiofile)
        #  this creates output/myaudio.wav.mfcc.csv,
        #               output/myaudio.wav.mfcc_d1.csv and
        #               output/myaudio.wav.mfcc_d2.csv files.

        # Clear the engine so it can be used again
        #engine.reset()

        # returns the array of features extracted
        return features
Example #16
0
	def __init__(self):
		self.fp = FeaturePlan(sample_rate=44100)
		self.fp.addFeature('feat: ')
		self.engine = Engine()
		self.engine.load(self.fp.getDataFlow())
		self.afp = AudioFileProcessor()
		
		self.mfcc = []
		self.songs = []
 def initialize(self, feature_dict):
     """ Run the required boilerplate for yaafe """
     self.feature_dict = feature_dict
     self.fp = FeaturePlan(
         sample_rate=self.sample_rate,
         normalize=0.98)
     for name, desc in self.feature_dict.items():
         self.fp.addFeature("{0}: {1}".format(name, desc))
     self.df = self.fp.getDataFlow()
     self.engine = Engine()
     self.engine.load(self.df)
     self.afp = AudioFileProcessor()
     return self
Example #18
0
class Extractor(object):
	def __init__(self):
		self.fp = FeaturePlan(sample_rate=44100)
		self.fp.addFeature('feat: ')
		self.engine = Engine()
		self.engine.load(self.fp.getDataFlow())
		self.afp = AudioFileProcessor()
		
		self.mfcc = []
		self.songs = []
		
	def recurse(self, directory):
		results = []
		for root, dirs, files in os.walk(directory):
			for f in files:
				current_file = os.path.join(root, f)
				if current_file.endswith('wav') and current_file not in results:
					results.append(current_file)
		
		print results
		return results
	
	def extract_feature(self, f):
		self.afp.processFile(self.engine, f)
		feats = self.engine.readAllOutputs()
		return feats['feat']

		
	def build_feature_space(self):
		self.space = FeatureSpace('mfcc')
		
		last_feature = None
		for f in self.recurse(sys.argv[1]):
			data = self.extract_feature(f)
			temp_feature = Feature(f, ['mfcc', data])
			self.space.add(temp_feature)
			last_feature = temp_feature
		
		print self.space.min_dist(last_feature)
Example #19
0
    def _mfcc(self, audio_location, sample_rate):
        # This function behaves the same as 'python yaafe.py -r SAMPLERATE -f \
        #               "mfcc: MFCC PARAMETERS" WAV-LOCATION'
        # SAMPLERATE : Samplerate of the file being processed
        #- CepsIgnoreFirstCoeff (default=1): 0 keeps the first cepstral coeffcient, 1 ignore it
        #- CepsNbCoeffs (default=13): Number of cepstral coefficient to keep.
        #- FFTWindow (default=Hanning): Weighting window to apply before fft. Hanning|Hamming|None
        #- MelMaxFreq (default=6854.0): Maximum frequency of the mel filter bank
        #- MelMinFreq (default=130.0): Minimum frequency of the mel filter bank
        #- MelNbFilters (default=40): Number of mel filters
        #- blockSize (default=1024): output frames size
        #- stepSize (default=512): step between consecutive frames

        # Build a dataflow object using FeaturePlan
        fp = FeaturePlan(sample_rate=sample_rate)

        # Using *.addFeature() multiple extractions can be called with a
        # single call
        fp.addFeature('mfcc: MFCC')
        #('mfcc: MFCC CepsIgnoreFirstCoeff=0 \
        #CepsNbCoeffs=13 FFTWindow=Hanning MelMaxFreq=6854\
        #MelMinFreq=130 MelNbFilters=40 blockSize=1024 stepSize=512')

        # Get dataflow
        df = fp.getDataFlow()

        engine = Engine()
        engine.load(df)

        # extract features from audio using AudioFileProcessor
        afp = AudioFileProcessor()
        afp.processFile(engine, audio_location)

        # features array holds all the extracted features
        features = engine.readAllOutputs()

        # returns the array of features extracted
        return features
Example #20
0
    def _SpectralFlux(self, audio_location, sample_rate):
        # This function behaves the same as 'python yaafe.py -r SAMPLERATE -f \
        #               "flux: SpectralFlux PARAMETERS" WAV-LOCATION'
        # SAMPLERATE : Samplerate of the file being processed
        # - FFTLength (default=0): Frame's length on which perform FFT. Original
        #   frame is padded with zeros or truncated to reach this size. If 0 then
        #   use original frame length.
        # - FFTWindow (default=Hanning): Weighting window to apply before fft. Hanning|Hamming|None
        # - FluxSupport (default=All): support of flux computation. if 'All' then
        #   use all bins (default), if 'Increase' then use only bins which are increasing
        # - blockSize (default=1024): output frames size
        # - stepSize (default=512): step between consecutive frames

        # Build a dataflow object using FeaturePlan
        fp = FeaturePlan(sample_rate=sample_rate)

        # Using *.addFeature() multiple extractions can be called with a
        # single call
        fp.addFeature('Flux: SpectralFlux')
        #('flux: SpectralFlux FFTLength=0 FFTWindow=Hanning FluxSupport=All\
        # blockSize=1024 stepSize=512')

        # Get dataflow
        df = fp.getDataFlow()

        # Configure engine
        engine = Engine()
        engine.load(df)

        # extract features from audio using AudioFileProcessor
        afp = AudioFileProcessor()
        afp.processFile(engine, audio_location)

        # features array holds all the extracted features
        features = engine.readAllOutputs()

        # returns the array of features extracted
        return features
Example #21
0
def process_audio(audioFile, engine):
    rename(audioFile,basename(audioFile))
    afp = AudioFileProcessor()
    afp.setOutputFormat('csv', config['OUTPUT_FOLDER'], {'Precision':'8'})
    # afp.setOutputFormat('h5', OUTPUT_FOLDER_NAME, {'mode':'overwrite'})
    afp.processFile(engine, basename(audioFile))
    rename(basename(audioFile),audioFile)

    return 
Example #22
0
def main(argv):
    if len(argv) != 2:
        print 'usage: python %s foo.mp3' % (argv[0])
        return
    fp = FeaturePlan(sample_rate=SAMPLE_RATE, resample=True, time_start=TIME_START, time_limit=TIME_LIMIT)
    fp.addFeature("frames: Frames blockSize={0} stepSize={1}".format(
        SLICE_WINSIZE, SLICE_STEPSIZE))
    df = fp.getDataFlow()
    engine = Engine()
    engine.load(df)
    afp = AudioFileProcessor()

    song_path = argv[1]
    assert os.path.exists(song_path)
    afp.processFile(engine, song_path)
    frames = engine.readOutput('frames')
    frames = np.concatenate(frames)
    print 'time start: %ss' % TIME_START
    print 'time limit: %ss' % TIME_LIMIT
    print 'duration:', 1. * frames.size / SAMPLE_RATE

    from pylab import plot, show
    plot(frames)
    show()
  def __init__(self, **args):
    self.engine = Engine()
    self.afp = AudioFileProcessor()

    self.block_size = (1024 if not 'block_size' in args else args['block_size'])
    self.step_size  = (512 if not 'step_size' in args else args['step_size'])
class Yaafe(EnhancedObject):
    '''Yaafe toolbox wrapper. To be used with Database object.

    Attributes:
        sample_rate     The Files' sample rate
        plan_filename   The Featue Plan filename

    Methods:
        process(audiofile)          Process audiofile and return features
        get_X(entries_list, feat)   Fetch array of processed data from Database
        get_y                       Fetch subdir i's from Database
    `
    Init:
        Yaafe(sample_rate, feature_plan)
    '''
    _features = {
        'spec_rolloff': ("SpectralRolloff blockSize=512 stepSize=128 "
                         "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spec_shape': ("SpectralShapeStatistics blockSize=512 stepSize=128 "
                       "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spec_flux': ("SpectralFlux blockSize=512 stepSize=128 >"
                      "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'amp_mod': ("AmplitudeModulation blockSize=512 stepSize=128 >"
                    "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'auto': ("AutoCorrelation  blockSize=512 stepSize=128 >"
                 "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'lpc': ("LPC  blockSize=512 stepSize=128 > StatisticalIntegrator "
                "NbFrames=40 StepNbFrames=8"),
        'loudness': ("Loudness blockSize=512 stepSize=128 >"
                     "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'mfcc': ("MFCC blockSize=512 stepSize=128 > StatisticalIntegrator "
                 "NbFrames=40 StepNbFrames=8"),
        'mel_spectrum': ("MelSpectrum blockSize=512, stepSize=128 >"
                         "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'obsi': ("OBSI blockSize=512 stepSize=128 > StatisticalIntegrator "
                 "NbFrames=40 StepNbFrames=8"),
        'obsir': ("OBSIR blockSize=512 stepSize=128 >"
                  "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'perc_sharp': ("PerceptualSharpness blockSize=512 stepSize=128 >"
                       "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'perc_spread': ("PerceptualSpread blockSize=512 stepSize=128 >"
                        "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_crest': ("SpectralCrestFactorPerBand blockSize=512 "
                        "stepSize=128 > StatisticalIntegrator NbFrames=40 "
                        "StepNbFrames=8"),
        'spec_decr': ("SpectralDecrease blockSize=512 stepSize=128 >"
                      "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_flat': ("SpectralFlatness blockSize=512 stepSize=128 >"
                       "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_flat_band': ("SpectralFlatnessPerBand blockSize=512 "
                            "stepSize=128 > StatisticalIntegrator NbFrames=40 "
                            "StepNbFrames=8"),
        'spect_slope': ("SpectralSlope blockSize=512 stepSize=128 >"
                        "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_var': ("SpectralVariation blockSize=512 stepSize=128 >"
                      "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'temp_shape': ("TemporalShapeStatistics blockSize=512 stepSize=128 "
                       "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'zcr': ("ZCR blockSize=512 stepSize=128 > StatisticalIntegrator "
                "NbFrames=40 StepNbFrames=8"),
        'env_shape': ("EnvelopeShapeStatistics blockSize=512 stepSize=128"
                      " > StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'comp_onest': ("ComplexDomainOnsetDetection blockSize=512 "
                       "stepSize=128 > StatisticalIntegrator NbFrames=40 "
                       "StepNbFrames=8"),
        }

    def __init__(self, sample_rate, features=None):
        if features is None:
            features = self._features
        self.sample_rate = sample_rate
        self.initialize(features)
        self.features = features

    def initialize(self, feature_dict):
        """ Run the required boilerplate for yaafe """
        self.feature_dict = feature_dict
        self.fp = FeaturePlan(
            sample_rate=self.sample_rate,
            normalize=0.98)
        for name, desc in self.feature_dict.items():
            self.fp.addFeature("{0}: {1}".format(name, desc))
        self.df = self.fp.getDataFlow()
        self.engine = Engine()
        self.engine.load(self.df)
        self.afp = AudioFileProcessor()
        return self

    def save_fplan(self, name):
        """ Save a feature plan (text file) """
        text_file = open("{}.txt".format(name), 'w')
        for name, desc in self.features.items():
            text_file.write("{}: {}".format(name, desc))
        text_file.close()

    def process(self, audiofile):
        """ Process function for running a file through yaafe's
            feature extractor
        """
        self.afp.processFile(self.engine, audiofile)
        out = self.engine.readAllOutputs()
        self.engine.flush()
        return sorted(out)
Example #25
0
def detect(wav_path, ans_path=None):
    sample_rate = 8000
    block_size = 1024
    step_size = block_size / 2
    n_band = block_size / 2
    freq_bound = [i * sample_rate / 2. / n_band for i in range(n_band + 1)]

    plan = FeaturePlan(sample_rate=sample_rate, resample=True)
    plan.addFeature(
        'power_spectrum: PowerSpectrum blockSize=%d stepSize=%d' % (
            block_size, step_size
        )
    )
    dataflow = plan.getDataFlow()
    afp = AudioFileProcessor()
    engine = Engine()
    engine.load(dataflow)
    afp.processFile(engine, wav_path)
    spectrogram = engine.readOutput('power_spectrum')
    seq = []
    for spectrum in spectrogram:
        mean_mag = np.mean(spectrum)
        if mean_mag <= SILENT_MAG_THRESHOLD:
            seq.append(SILENT_KEY)
            continue
        lower_data = (-1, -1)
        upper_data = (-1, -1)

        for target_idx, target_freq in itertools.chain(
            enumerate(LOWER_FREQS), enumerate(UPPER_FREQS)
        ):
            idx = bisect.bisect(freq_bound, target_freq)
            assert idx > 0
            freq1 = freq_bound[idx - 1]
            mag1 = spectrum[idx - 1]
            freq2 = freq_bound[idx]
            mag2 = spectrum[idx]
            w1 = 1. * (freq2 - target_freq) / (freq2 - freq1)
            w2 = 1. - w1
            target_mag = (w1 * mag1 + w2 * mag2)

            if target_mag > mean_mag * 2:
                if target_freq < 1000:
                    if target_mag > lower_data[1]:
                        lower_data = (target_idx, target_mag)
                else:
                    if target_mag > upper_data[1]:
                        upper_data = (target_idx, target_mag)

        lower_idx = lower_data[0]
        upper_idx = upper_data[0]
        if lower_idx == -1 or upper_idx == -1:
            seq.append(UNKNOWN_KEY)
        else:
            seq.append(KEYS[lower_idx * len(LOWER_FREQS) + upper_idx])

    ans = purge_seq(seq)
    if ans_path is not None:
        with open(ans_path) as fh:
            for i, line in enumerate(fh):
                line = line.strip()
                assert line == ans[i], "%s != %s" % (line, ans[i])
                print '[%d] %s' % (i, line)
    else:
        for i, line in enumerate(ans):
            print '[%d] %s' % (i, line)
Example #26
0
def detect(wav_path, ans_path=None):
    sample_rate = 8000
    block_size = 1024
    step_size = block_size / 2
    n_band = block_size / 2
    freq_bound = [i * sample_rate / 2. / n_band for i in range(n_band + 1)]

    plan = FeaturePlan(sample_rate=sample_rate, resample=True)
    plan.addFeature('power_spectrum: PowerSpectrum blockSize=%d stepSize=%d' %
                    (block_size, step_size))
    dataflow = plan.getDataFlow()
    afp = AudioFileProcessor()
    engine = Engine()
    engine.load(dataflow)
    afp.processFile(engine, wav_path)
    spectrogram = engine.readOutput('power_spectrum')
    seq = []
    for spectrum in spectrogram:
        mean_mag = np.mean(spectrum)
        if mean_mag <= SILENT_MAG_THRESHOLD:
            seq.append(SILENT_KEY)
            continue
        lower_data = (-1, -1)
        upper_data = (-1, -1)

        for target_idx, target_freq in itertools.chain(enumerate(LOWER_FREQS),
                                                       enumerate(UPPER_FREQS)):
            idx = bisect.bisect(freq_bound, target_freq)
            assert idx > 0
            freq1 = freq_bound[idx - 1]
            mag1 = spectrum[idx - 1]
            freq2 = freq_bound[idx]
            mag2 = spectrum[idx]
            w1 = 1. * (freq2 - target_freq) / (freq2 - freq1)
            w2 = 1. - w1
            target_mag = (w1 * mag1 + w2 * mag2)

            if target_mag > mean_mag * 2:
                if target_freq < 1000:
                    if target_mag > lower_data[1]:
                        lower_data = (target_idx, target_mag)
                else:
                    if target_mag > upper_data[1]:
                        upper_data = (target_idx, target_mag)

        lower_idx = lower_data[0]
        upper_idx = upper_data[0]
        if lower_idx == -1 or upper_idx == -1:
            seq.append(UNKNOWN_KEY)
        else:
            seq.append(KEYS[lower_idx * len(LOWER_FREQS) + upper_idx])

    ans = purge_seq(seq)
    if ans_path is not None:
        with open(ans_path) as fh:
            for i, line in enumerate(fh):
                line = line.strip()
                assert line == ans[i], "%s != %s" % (line, ans[i])
                print '[%d] %s' % (i, line)
    else:
        for i, line in enumerate(ans):
            print '[%d] %s' % (i, line)
Example #27
0
class Yaafe(EnhancedObject):
    '''Yaafe toolbox wrapper. To be used with Database object.

    Attributes:
        sample_rate     The Files' sample rate
        plan_filename   The Featue Plan filename

    Methods:
        process(audiofile)          Process audiofile and return features
        get_X(entries_list, feat)   Fetch array of processed data from Database
        get_y                       Fetch subdir i's from Database
    `
    Init:
        Yaafe(sample_rate, feature_plan)
    '''
    _features = {
        'spec_rolloff': ("SpectralRolloff blockSize=512 stepSize=128 "
                         "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spec_shape': ("SpectralShapeStatistics blockSize=512 stepSize=128 "
                       "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spec_flux': ("SpectralFlux blockSize=512 stepSize=128 >"
                      "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'amp_mod': ("AmplitudeModulation blockSize=512 stepSize=128 >"
                    "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'auto': ("AutoCorrelation  blockSize=512 stepSize=128 >"
                 "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'lpc': ("LPC  blockSize=512 stepSize=128 > StatisticalIntegrator "
                "NbFrames=40 StepNbFrames=8"),
        'loudness': ("Loudness blockSize=512 stepSize=128 >"
                     "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'mfcc': ("MFCC blockSize=512 stepSize=128 > StatisticalIntegrator "
                 "NbFrames=40 StepNbFrames=8"),
        'mel_spectrum': ("MelSpectrum blockSize=512, stepSize=128 >"
                         "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'obsi': ("OBSI blockSize=512 stepSize=128 > StatisticalIntegrator "
                 "NbFrames=40 StepNbFrames=8"),
        'obsir': ("OBSIR blockSize=512 stepSize=128 >"
                  "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'perc_sharp': ("PerceptualSharpness blockSize=512 stepSize=128 >"
                       "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'perc_spread': ("PerceptualSpread blockSize=512 stepSize=128 >"
                        "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_crest': ("SpectralCrestFactorPerBand blockSize=512 "
                        "stepSize=128 > StatisticalIntegrator NbFrames=40 "
                        "StepNbFrames=8"),
        'spec_decr': ("SpectralDecrease blockSize=512 stepSize=128 >"
                      "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_flat': ("SpectralFlatness blockSize=512 stepSize=128 >"
                       "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_flat_band': ("SpectralFlatnessPerBand blockSize=512 "
                            "stepSize=128 > StatisticalIntegrator NbFrames=40 "
                            "StepNbFrames=8"),
        'spect_slope': ("SpectralSlope blockSize=512 stepSize=128 >"
                        "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'spect_var': ("SpectralVariation blockSize=512 stepSize=128 >"
                      "StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'temp_shape': ("TemporalShapeStatistics blockSize=512 stepSize=128 "
                       "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'zcr': ("ZCR blockSize=512 stepSize=128 > StatisticalIntegrator "
                "NbFrames=40 StepNbFrames=8"),
        'env_shape': ("EnvelopeShapeStatistics blockSize=512 stepSize=128"
                      " > StatisticalIntegrator NbFrames=40 StepNbFrames=8"),
        'comp_onest': ("ComplexDomainOnsetDetection blockSize=512 "
                       "stepSize=128 > StatisticalIntegrator NbFrames=40 "
                       "StepNbFrames=8"),
    }

    def __init__(self, sample_rate, features=None):
        if features is None:
            features = self._features
        self.sample_rate = sample_rate
        self.initialize(features)
        self.features = features

    def initialize(self, feature_dict):
        """ Run the required boilerplate for yaafe """
        self.feature_dict = feature_dict
        self.fp = FeaturePlan(sample_rate=self.sample_rate, normalize=0.98)
        for name, desc in self.feature_dict.items():
            self.fp.addFeature("{0}: {1}".format(name, desc))
        self.df = self.fp.getDataFlow()
        self.engine = Engine()
        self.engine.load(self.df)
        self.afp = AudioFileProcessor()
        return self

    def save_fplan(self, name):
        """ Save a feature plan (text file) """
        text_file = open("{}.txt".format(name), 'w')
        for name, desc in self.features.items():
            text_file.write("{}: {}".format(name, desc))
        text_file.close()

    def process(self, audiofile):
        """ Process function for running a file through yaafe's
            feature extractor
        """
        self.afp.processFile(self.engine, audiofile)
        out = self.engine.readAllOutputs()
        self.engine.flush()
        return sorted(out)
""" Stand-alone application to demonstrate yaafe's transformations """
import numpy as np
import matplotlib.pyplot as plt
from yaafelib import FeaturePlan, Engine, AudioFileProcessor


FPLAN = FeaturePlan(sample_rate=44100)
FPLAN.addFeature("mfcc: MFCC blockSize=512 stepSize=128")
FPLAN.addFeature("mfcc_stat: MFCC blockSize=512 stepSize=128 > "
                 "StatisticalIntegrator NbFrames=40 StepNbFrames=8")
ENGINE = Engine()
ENGINE.load(FPLAN.getDataFlow())
PROCESSOR = AudioFileProcessor()

PROCESSOR.processFile(ENGINE, 'track.wav')
DATA = ENGINE.readAllOutputs()
ENGINE.flush()

X_MFCC = DATA['mfcc']
X_MFCC_STAT = DATA['mfcc_stat']

MFCC_DESC = list()
for i in range(1, 14):
    desc = "Average for Band {}".format(i)
    MFCC_DESC.append(desc)
for i in range(1, 14):
    desc = "Standard Dev. for Band {}".format(i)
    MFCC_DESC.append(desc)

plt.ion()
FIG = plt.figure()
Example #29
0
""" Stand-alone application to demonstrate yaafe's transformations """
import numpy as np
import matplotlib.pyplot as plt
from yaafelib import FeaturePlan, Engine, AudioFileProcessor

FPLAN = FeaturePlan(sample_rate=44100)
FPLAN.addFeature("mfcc: MFCC blockSize=512 stepSize=128")
FPLAN.addFeature("mfcc_stat: MFCC blockSize=512 stepSize=128 > "
                 "StatisticalIntegrator NbFrames=40 StepNbFrames=8")
ENGINE = Engine()
ENGINE.load(FPLAN.getDataFlow())
PROCESSOR = AudioFileProcessor()

PROCESSOR.processFile(ENGINE, 'track.wav')
DATA = ENGINE.readAllOutputs()
ENGINE.flush()

X_MFCC = DATA['mfcc']
X_MFCC_STAT = DATA['mfcc_stat']

MFCC_DESC = list()
for i in range(1, 14):
    desc = "Average for Band {}".format(i)
    MFCC_DESC.append(desc)
for i in range(1, 14):
    desc = "Standard Dev. for Band {}".format(i)
    MFCC_DESC.append(desc)

plt.ion()
FIG = plt.figure()
FIG.set_size_inches(14, 8)