def test_other_slaney(self): f = filterbank(40, 512*2) f.set_mel_coeffs_slaney(44100) _ = f.get_coeffs() #print "sum is", sum(sum(a)) for win_s in [256, 512, 1024, 2048, 4096]: f = filterbank(40, win_s) f.set_mel_coeffs_slaney(32000) _ = f.get_coeffs()
def test_other_slaney(self): f = filterbank(40, 512*2) f.set_mel_coeffs_slaney(44100) self.assertIsInstance(f.get_coeffs(), np.ndarray) #print "sum is", sum(sum(a)) for win_s in [256, 512, 1024, 2048, 4096]: f = filterbank(40, win_s) f.set_mel_coeffs_slaney(32000) #print "sum is", sum(sum(a)) self.assertIsInstance(f.get_coeffs(), np.ndarray)
def test_triangle_freqs_zeros(self): f = filterbank(9, 1024) freq_list = [40, 80, 200, 400, 800, 1600, 3200, 6400, 12800, 15000, 24000] freqs = array(freq_list, dtype = float_type) f.set_triangle_bands(freqs, 48000) _ = f.get_coeffs().T assert_equal ( f(cvec(1024)), 0)
def getMelEnergy(path): win_s = 512 #fft size hop_s = win_s / 4 #hop size samplerate = 0 s = source(path, samplerate, hop_s) samplerate = s.samplerate pv = pvoc(win_s, hop_s) f = filterbank(40, win_s) f.set_mel_coeffs_slaney(samplerate) energies = np.zeros((40, )) o = {} total_frames = 0 downsample = 2 while True: samples, read = s() fftgrain = pv(samples) new_energies = f(fftgrain) energies = np.vstack([energies, new_energies]) total_frames += read if read < hop_s: break return energies
def test_mfcc_coeffs(self): f = filterbank(40, 512) c = cvec(512) f.set_mel_coeffs_slaney(44100) c.norm[:] = np.random.random((int(512 / 2) + 1,)).astype(float_type) assert_equal ( f(c) < 1., True ) assert_equal ( f(c) > 0., True )
def test_random_coeffs(self): f = filterbank(40, 512) c = cvec(512) r = random.random([40, 512 / 2 + 1]).astype('float32') r /= r.sum() f.set_coeffs(r) c.norm[:] = random.random((512 / 2 + 1,)).astype('float32') assert_equal ( f(c) < 1., True ) assert_equal ( f(c) > 0., True )
def test_random_coeffs(self): win_s = 128 f = filterbank(40, win_s) c = cvec(win_s) r = np.random.random([40, int(win_s / 2) + 1]).astype(float_type) r /= r.sum() f.set_coeffs(r) c.norm[:] = np.random.random((int(win_s / 2) + 1,)).astype(float_type) assert_equal ( f(c) < 1., True ) assert_equal ( f(c) > 0., True )
def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): super(AubioMelEnergy, self).setup( channels, samplerate, blocksize, totalframes) self.n_filters = 40 self.n_coeffs = 13 self.pvoc = pvoc(self.input_blocksize, self.input_stepsize) self.melenergy = filterbank(self.n_filters, self.input_blocksize) self.melenergy.set_mel_coeffs_slaney(samplerate) self.block_read = 0 self.melenergy_results = []
def test_triangle_freqs_ones(self): f = filterbank(9, 1024) freq_list = [40, 80, 200, 400, 800, 1600, 3200, 6400, 12800, 15000, 24000] freqs = array(freq_list, dtype = float_type) f.set_triangle_bands(freqs, 48000) _ = f.get_coeffs().T spec = cvec(1024) spec.norm[:] = 1 assert_almost_equal ( f(spec), [ 0.02070313, 0.02138672, 0.02127604, 0.02135417, 0.02133301, 0.02133301, 0.02133311, 0.02133334, 0.02133345])
def __init__(self): super(AubioMelEnergy, self).__init__() self.input_blocksize = 1024 self.input_stepsize = self.input_blocksize / 4 # Aubio Melenergy Initialisation self.n_filters = 40 self.n_coeffs = 13 self.pvoc = pvoc(self.input_blocksize, self.input_stepsize) self.melenergy = filterbank(self.n_filters, self.input_blocksize) self.block_read = 0 self.melenergy_results = []
def __init__(self, options): if options.settings['fbuckets'] == 'third-octave': options.settings['fbuckets'] = [22.4, 25, 31.5, 40, 50, 63, 80, 100, 125, 160, 200, 250, 315, 400, 500, 630, 800, 1000, 1250, 1600, 2000, 2500, 3150, 4000, 5000, 6300, 8000, 10000, 12500, 16000, 20000, 22390] elif options.settings['fbuckets'] == 'octave': options.settings['fbuckets'] = [22, 31.5, 63, 125, 250, 500, 1000, 2000, 4000, 8000, 16000, 22720] self.midi_processor = None self.sysex_command_array = [] for command in options.settings['fsysexnum'].split(' '): self.sysex_command_array.append(int(command, 0)) self.filter_bank = filterbank(len(options.settings['fbuckets']) - 2, (int(options.settings['framesize']) * int(options.settings['fframemult']))) self.frequencies = fvec(options.settings['fbuckets']) self.filter_bank.set_triangle_bands(self.frequencies, int(options.settings[ 'samplerate'])) self.phase_vocoder = pvoc(int(float(options.settings['framesize']) * float(options.settings['fframemult'])), int(float(options.settings['framesize']) * float(options.settings['fframemult']) * float(options.settings['fhopmult']))) self.frame_arrays = np.zeros( (int(float(options.settings['fframemult'])), int(float(options.settings['framesize']))), dtype=np.float32) self.frame_count = 0 self.maximum_frequencies = np.zeros( (len(options.settings['fbuckets']) - 2,), dtype=np.float32) self.last_energies = np.zeros((len(options.settings['fbuckets']) - 2,), dtype=np.float32) self.count_energies = np.zeros((int(options.settings['fcount']), (len(options.settings[ 'fbuckets']) - 2)), dtype=np.float32) self.energy_count = 0 self.rest_stop = 0 self.frame_multiplier = int(options.settings['fframemult']) self.count = int(options.settings['fcount']) self.graceful = float(options.settings['fgraceful'])
def __init__(self, args): self.args = args valid_opts = ['hop_size', 'buf_size'] self.parse_options(args, valid_opts) self.remap_pvoc_options(self.options) self.pv = aubio.pvoc(**self.options) valid_opts = ['buf_size', 'n_filters'] self.parse_options(args, valid_opts) self.remap_pvoc_options(self.options) self.filterbank = aubio.filterbank(**self.options) self.filterbank.set_mel_coeffs_slaney(args.samplerate) super(process_melbands, self).__init__(args)
def test_zero_fmax(self): f = filterbank(40, 1024) f.set_mel_coeffs(44100, 0, 0)
def test_mel_coeffs_htk(self): f = filterbank(40, 1024) f.set_mel_coeffs_htk(44100, 0, 44100 / 2)
def test_set_coeffs(self): f = filterbank(40, 512) r = random.random([40, 512 / 2 + 1]).astype('float32') f.set_coeffs(r) assert_equal (r, f.get_coeffs())
def process_wav_file(filename, samplerate=0, win_s=4096, seconds_window=3, svm=True): from aubio import source, pitch hop_s = win_s // 4 #filename = filename.replace("b'", "")[:-1] filename = filename.strip() pitches = [] confidences = [] #print(filename) n_filters = 40 # must be 40 for mfcc n_coeffs = 13 s = source(filename, samplerate, hop_s) samplerate = s.samplerate tolerance = 0.8 pitch_o = pitch("yin", win_s, hop_s, samplerate) pitch_o.set_unit("midi") pitch_o.set_tolerance(tolerance) p = pvoc(win_s, hop_s) m = mfcc(win_s, n_filters, n_coeffs, samplerate) n_samples = 1 #s.duration / s.samplerate / seconds_window pv = pvoc(win_s, hop_s) f = filterbank(40, win_s) f.set_mel_coeffs_slaney(samplerate) energies = zeros((40, )) if n_samples == 0: return [] mfccs = zeros([ n_coeffs, ]) frames_read = 0 while True: samples, read = s() #print(samples, read) spec = p(samples) mfcc_out = m(spec) mfccs = vstack((mfccs, mfcc_out)) pitch = pitch_o(samples)[0] #pitch = int(round(pitch)) fftgrain = pv(samples) new_energies = f(fftgrain) energies = vstack([energies, new_energies]) confidence = pitch_o.get_confidence() pitches += [pitch] confidences += [confidence] frames_read += read if read < hop_s: break mfccs1 = diff(mfccs, axis=0) mfccs2 = diff(mfccs, axis=0) #print mfccs.shape, mfccs1.shape, mfccs2.shape # total number of fra pitches = np.array(pitches) pitches = pitches.reshape((len(pitches), 1)) # print(pitches.shape) # print(mfccs1.shape) all_data = np.concatenate( (mfccs[1:, :], mfccs1, mfccs2, pitches, energies[1:]), 1) final = [] size_row = len(all_data) / n_samples if svm: final.append(get_mean_avg_etc(all_data)) else: final.append(all_data) # for i in range(n_samples): # if svm: # final.append(get_mean_avg_etc(all_data[i*size_row: (i+1)*size_row])) # else: # final.append(all_data[i*size_row: (i+1)*size_row]) return final
#! /usr/bin/env python from aubio import filterbank, fvec from pylab import loglog, show, xlim, ylim, xlabel, ylabel, title from numpy import vstack, arange win_s = 2048 samplerate = 48000 freq_list = [60, 80, 200, 400, 800, 1600, 3200, 6400, 12800, 24000] n_filters = len(freq_list) - 2 f = filterbank(n_filters, win_s) freqs = fvec(freq_list) f.set_triangle_bands(freqs, samplerate) coeffs = f.get_coeffs() coeffs[4] *= 5. f.set_coeffs(coeffs) times = vstack([arange(win_s // 2 + 1) * samplerate / win_s] * n_filters) title( 'Bank of filters built using a simple list of boundaries\nThe middle band has been amplified by 2.' ) loglog(times.T, f.get_coeffs().T, '.-') xlim([50, samplerate / 2]) ylim([1.0e-6, 2.0e-2]) xlabel('log frequency (Hz)') ylabel('log amplitude')
def test_triangle_freqs_with_wrong_negative(self): """make sure set_triangle_bands fails when list contains a negative""" freq_list = [-10, 0, 80] f = filterbank(len(freq_list) - 2, 1024) with self.assertRaises(ValueError): f.set_triangle_bands(fvec(freq_list), 48000)
def test_random_norm(self): f = filterbank(40, 512) c = cvec(512) c.norm[:] = random.random((512 / 2 + 1, )).astype('float32') assert_equal(f(c), 0)
def test_set_coeffs(self): f = filterbank(40, 512) r = random.random([40, 512 / 2 + 1]).astype('float32') f.set_coeffs(r) assert_equal(r, f.get_coeffs())
def test_triangle_freqs_with_wrong_ordering(self): """make sure set_triangle_bands fails when list not ordered""" freq_list = [0, 80, 40] f = filterbank(len(freq_list) - 2, 1024) with self.assertRaises(ValueError): f.set_triangle_bands(fvec(freq_list), 48000)
def _initialize_melbank(self): """Initialize all the melbank related variables""" # Few difference coefficient types for experimentation if self._config["coeffs_type"] == "triangle": melbank_mel = np.linspace( aubio.hztomel(self._config["min_frequency"]), aubio.hztomel(self._config["max_frequency"]), self._config["samples"] + 2, ) self.melbank_frequencies = np.array( [aubio.meltohz(mel) for mel in melbank_mel]).astype(np.float32) self.filterbank = aubio.filterbank(self._config["samples"], self._config["fft_size"]) self.filterbank.set_triangle_bands(self.melbank_frequencies, self._config["mic_rate"]) self.melbank_frequencies = self.melbank_frequencies[1:-1] if self._config["coeffs_type"] == "bark": melbank_bark = np.linspace( 6.0 * np.arcsinh(self._config["min_frequency"] / 600.0), 6.0 * np.arcsinh(self._config["max_frequency"] / 600.0), self._config["samples"] + 2, ) self.melbank_frequencies = (600.0 * np.sinh(melbank_bark / 6.0)).astype( np.float32) self.filterbank = aubio.filterbank(self._config["samples"], self._config["fft_size"]) self.filterbank.set_triangle_bands(self.melbank_frequencies, self._config["mic_rate"]) self.melbank_frequencies = self.melbank_frequencies[1:-1] # Slaney coefficients will always produce 40 samples spanning 133Hz to # 6000Hz if self._config["coeffs_type"] == "slaney": self.filterbank = aubio.filterbank(40, self._config["fft_size"]) self.filterbank.set_mel_coeffs_slaney(self._config["mic_rate"]) # Sanley frequencies are linear-log spaced where 133Hz to 1000Hz is linear # spaced and 1000Hz to 6000Hz is log spaced. It also produced a hardcoded # 40 samples. lowestFrequency = 133.3 linearSpacing = 66.6666666 logSpacing = 1.0711703 linearFilters = 13 logFilters = 27 linearSpacedFreqs = (lowestFrequency + np.arange(0, linearFilters) * linearSpacing) logSpacedFreqs = linearSpacedFreqs[-1] * np.power( logSpacing, np.arange(1, logFilters + 1)) self._config["samples"] = 40 self.melbank_frequencies = np.hstack( (linearSpacedFreqs, logSpacedFreqs)).astype(np.float32) # Standard mel coefficients if self._config["coeffs_type"] == "mel": self.filterbank = aubio.filterbank(self._config["samples"], self._config["fft_size"]) self.filterbank.set_mel_coeffs( self._config["mic_rate"], self._config["min_frequency"], self._config["max_frequency"], ) # Frequencies wil be linearly spaced in the mel scale melbank_mel = np.linspace( aubio.hztomel(self._config["min_frequency"]), aubio.hztomel(self._config["max_frequency"]), self._config["samples"], ) self.melbank_frequencies = np.array( [aubio.meltohz(mel) for mel in melbank_mel]) # HTK mel coefficients if self._config["coeffs_type"] == "htk": self.filterbank = aubio.filterbank(self._config["samples"], self._config["fft_size"]) self.filterbank.set_mel_coeffs_htk( self._config["mic_rate"], self._config["min_frequency"], self._config["max_frequency"], ) # Frequencies wil be linearly spaced in the mel scale melbank_mel = np.linspace( aubio.hztomel(self._config["min_frequency"]), aubio.hztomel(self._config["max_frequency"]), self._config["samples"], ) self.melbank_frequencies = np.array( [aubio.meltohz(mel) for mel in melbank_mel]) # Coefficients based on Scott's audio reactive led project if self._config["coeffs_type"] == "scott": ( melmat, center_frequencies_hz, freqs, ) = mel.compute_melmat( num_mel_bands=self._config["samples"], freq_min=self._config["min_frequency"], freq_max=self._config["max_frequency"], num_fft_bands=int(self._config["fft_size"] // 2) + 1, sample_rate=self._config["mic_rate"], ) self.filterbank = aubio.filterbank(self._config["samples"], self._config["fft_size"]) self.filterbank.set_coeffs(melmat.astype(np.float32)) self.melbank_frequencies = center_frequencies_hz # "Mel"-spacing based on Scott's audio reactive led project. This # should in theory be the same as the above, but there seems to be # slight differences. Leaving both for science! if self._config["coeffs_type"] == "scott_mel": def hertz_to_scott(freq): return 3340.0 * log(1 + (freq / 250.0), 9) def scott_to_hertz(scott): return 250.0 * (9**(scott / 3340.0)) - 250.0 melbank_scott = np.linspace( hertz_to_scott(self._config["min_frequency"]), hertz_to_scott(self._config["max_frequency"]), self._config["samples"] + 2, ) self.melbank_frequencies = np.array([ scott_to_hertz(scott) for scott in melbank_scott ]).astype(np.float32) self.filterbank = aubio.filterbank(self._config["samples"], self._config["fft_size"]) self.filterbank.set_triangle_bands(self.melbank_frequencies, self._config["mic_rate"]) self.melbank_frequencies = self.melbank_frequencies[1:-1] # Modified scott_mel, spreads out the low range and compresses the # highs if self._config["coeffs_type"] == "matt_mel": def hertz_to_matt(freq): return 3700.0 * log(1 + (freq / 200.0), 13) def matt_to_hertz(matt): return 200.0 * (10**(matt / 3700.0)) - 200.0 melbank_matt = np.linspace( hertz_to_matt(self._config["min_frequency"]), hertz_to_matt(self._config["max_frequency"]), self._config["samples"] + 2, ) self.melbank_frequencies = np.array([ matt_to_hertz(matt) for matt in melbank_matt ]).astype(np.float32) self.filterbank = aubio.filterbank(self._config["samples"], self._config["fft_size"]) self.filterbank.set_triangle_bands(self.melbank_frequencies, self._config["mic_rate"]) self.melbank_frequencies = self.melbank_frequencies[1:-1] if self._config["coeffs_type"] == "fixed": ranges = FREQUENCY_RANGES.values() upper_edges_hz = np.zeros(len(ranges)) lower_edges_hz = np.zeros(len(ranges)) for idx, value in enumerate(ranges): lower_edges_hz[idx] = value.min upper_edges_hz[idx] = value.max ( melmat, center_frequencies_hz, freqs, ) = mel.compute_melmat_from_range( lower_edges_hz=lower_edges_hz, upper_edges_hz=upper_edges_hz, num_fft_bands=int(self._config["fft_size"] // 2) + 1, sample_rate=self._config["mic_rate"], ) self._config["samples"] = len(center_frequencies_hz) self.filterbank = aubio.filterbank(self._config["samples"], self._config["fft_size"]) self.filterbank.set_coeffs(melmat.astype(np.float32)) self.melbank_frequencies = center_frequencies_hz if self._config["coeffs_type"] == "fixed_simple": ranges = FREQUENCY_RANGES_SIMPLE.values() upper_edges_hz = np.zeros(len(ranges)) lower_edges_hz = np.zeros(len(ranges)) for idx, value in enumerate(ranges): lower_edges_hz[idx] = value.min upper_edges_hz[idx] = value.max ( melmat, center_frequencies_hz, freqs, ) = mel.compute_melmat_from_range( lower_edges_hz=lower_edges_hz, upper_edges_hz=upper_edges_hz, num_fft_bands=int(self._config["fft_size"] // 2) + 1, sample_rate=self._config["mic_rate"], ) self._config["samples"] = len(center_frequencies_hz) self.filterbank = aubio.filterbank(self._config["samples"], self._config["fft_size"]) self.filterbank.set_coeffs(melmat.astype(np.float32)) self.melbank_frequencies = center_frequencies_hz self.melbank_frequencies = self.melbank_frequencies.astype(int) # Normalize the filterbank triangles to a consistent height, the # default coeffs (for types other than legacy) will be normalized # by the triangles area which results in an uneven melbank if (self._config["coeffs_type"] != "scott" and self._config["coeffs_type"] == "scott_mel"): coeffs = self.filterbank.get_coeffs() coeffs /= np.max(coeffs, axis=-1)[:, None] self.filterbank.set_coeffs(coeffs) # Find the indexes for each of the frequency ranges self.lows_index = self.mids_index = self.highs_index = 1 for i in range(0, len(self.melbank_frequencies)): if (self.melbank_frequencies[i] < FREQUENCY_RANGES_SIMPLE["Low (1-250Hz)"].max): self.lows_index = i + 1 elif (self.melbank_frequencies[i] < FREQUENCY_RANGES_SIMPLE["Mid (250Hz-4kHz)"].max): self.mids_index = i + 1 elif (self.melbank_frequencies[i] < FREQUENCY_RANGES_SIMPLE["High (4kHz-24kHz)"].max): self.highs_index = i + 1 # Build up some of the common filters self.mel_gain = ExpFilter( np.tile(1e-1, self._config["samples"]), alpha_decay=0.01, alpha_rise=0.99, ) self.mel_smoothing = ExpFilter( np.tile(1e-1, self._config["samples"]), alpha_decay=0.2, alpha_rise=0.99, ) self.common_filter = ExpFilter(alpha_decay=0.99, alpha_rise=0.01)
def _initialize_melbank(self): """Initialize all the melbank related variables""" # Few difference coefficient types for experimentation if self._config['coeffs_type'] == 'triangle': melbank_mel = np.linspace( aubio.hztomel(self._config['min_frequency']), aubio.hztomel(self._config['max_frequency']), self._config['samples'] + 2) self.melbank_frequencies = np.array( [aubio.meltohz(mel) for mel in melbank_mel]).astype(np.float32) self.filterbank = aubio.filterbank(self._config['samples'], self._config['fft_size']) self.filterbank.set_triangle_bands(self.melbank_frequencies, self._config['mic_rate']) self.melbank_frequencies = self.melbank_frequencies[1:-1] if self._config['coeffs_type'] == 'bark': melbank_bark = np.linspace( 6.0 * np.arcsinh(self._config['min_frequency'] / 600.0), 6.0 * np.arcsinh(self._config['max_frequency'] / 600.0), self._config['samples'] + 2) self.melbank_frequencies = (600.0 * np.sinh(melbank_bark / 6.0)).astype( np.float32) self.filterbank = aubio.filterbank(self._config['samples'], self._config['fft_size']) self.filterbank.set_triangle_bands(self.melbank_frequencies, self._config['mic_rate']) self.melbank_frequencies = self.melbank_frequencies[1:-1] # Slaney coefficients will always produce 40 samples spanning 133Hz to 6000Hz if self._config['coeffs_type'] == 'slaney': self.filterbank = aubio.filterbank(40, self._config['fft_size']) self.filterbank.set_mel_coeffs_slaney(self._config['mic_rate']) # Sanley frequencies are linear-log spaced where 133Hz to 1000Hz is linear # spaced and 1000Hz to 6000Hz is log spaced. It also produced a hardcoded # 40 samples. lowestFrequency = 133.3 linearSpacing = 66.6666666 logSpacing = 1.0711703 linearFilters = 13 logFilters = 27 linearSpacedFreqs = lowestFrequency + np.arange( 0, linearFilters) * linearSpacing logSpacedFreqs = linearSpacedFreqs[-1] * np.power( logSpacing, np.arange(1, logFilters + 1)) self._config['samples'] = 40 self.melbank_frequencies = np.hstack( (linearSpacedFreqs, logSpacedFreqs)).astype(np.float32) # Standard mel coefficients if self._config['coeffs_type'] == 'mel': self.filterbank = aubio.filterbank(self._config['samples'], self._config['fft_size']) self.filterbank.set_mel_coeffs(self._config['mic_rate'], self._config['min_frequency'], self._config['max_frequency']) # Frequencies wil be linearly spaced in the mel scale melbank_mel = np.linspace( aubio.hztomel(self._config['min_frequency']), aubio.hztomel(self._config['max_frequency']), self._config['samples']) self.melbank_frequencies = np.array( [aubio.meltohz(mel) for mel in melbank_mel]) # HTK mel coefficients if self._config['coeffs_type'] == 'htk': self.filterbank = aubio.filterbank(self._config['samples'], self._config['fft_size']) self.filterbank.set_mel_coeffs_htk(self._config['mic_rate'], self._config['min_frequency'], self._config['max_frequency']) # Frequencies wil be linearly spaced in the mel scale melbank_mel = np.linspace( aubio.hztomel(self._config['min_frequency']), aubio.hztomel(self._config['max_frequency']), self._config['samples']) self.melbank_frequencies = np.array( [aubio.meltohz(mel) for mel in melbank_mel]) # Coefficients based on Scott's audio reactive led project if self._config['coeffs_type'] == 'scott': (melmat, center_frequencies_hz, freqs) = mel.compute_melmat( num_mel_bands=self._config['samples'], freq_min=self._config['min_frequency'], freq_max=self._config['max_frequency'], num_fft_bands=int(self._config['fft_size'] // 2) + 1, sample_rate=self._config['mic_rate']) self.filterbank = aubio.filterbank(self._config['samples'], self._config['fft_size']) self.filterbank.set_coeffs(melmat.astype(np.float32)) self.melbank_frequencies = center_frequencies_hz # "Mel"-spacing based on Scott's audio reactive led project. This # should in theory be the same as the above, but there seems to be # slight differences. Leaving both for science! if self._config['coeffs_type'] == 'scott_mel': def hertz_to_scott(freq): return 3340.0 * log(1 + (freq / 250.0), 9) def scott_to_hertz(scott): return 250.0 * (9**(scott / 3340.0)) - 250.0 melbank_scott = np.linspace( hertz_to_scott(self._config['min_frequency']), hertz_to_scott(self._config['max_frequency']), self._config['samples'] + 2) self.melbank_frequencies = np.array([ scott_to_hertz(scott) for scott in melbank_scott ]).astype(np.float32) self.filterbank = aubio.filterbank(self._config['samples'], self._config['fft_size']) self.filterbank.set_triangle_bands(self.melbank_frequencies, self._config['mic_rate']) self.melbank_frequencies = self.melbank_frequencies[1:-1] self.melbank_frequencies = self.melbank_frequencies.astype(int) # Normalize the filterbank triangles to a consistent height, the # default coeffs (for types other than legacy) will be normalized # by the triangles area which results in an uneven melbank if self._config['coeffs_type'] != 'scott' and self._config[ 'coeffs_type'] == 'scott_mel': coeffs = self.filterbank.get_coeffs() coeffs /= np.max(coeffs, axis=-1)[:, None] self.filterbank.set_coeffs(coeffs) # Find the indexes for each of the frequency ranges for i in range(0, len(self.melbank_frequencies) - 1): if self.melbank_frequencies[i] < FREQUENCY_RANGES_SIMPLE['low'].max: self.lows_index = i elif self.melbank_frequencies[i] < FREQUENCY_RANGES_SIMPLE[ 'mid'].max: self.mids_index = i elif self.melbank_frequencies[i] < FREQUENCY_RANGES_SIMPLE[ 'high'].max: self.highs_index = i # Build up some of the common filters self.mel_gain = ExpFilter(np.tile(1e-1, self._config['samples']), alpha_decay=0.01, alpha_rise=0.99) self.mel_smoothing = ExpFilter(np.tile(1e-1, self._config['samples']), alpha_decay=0.2, alpha_rise=0.99) self.common_filter = ExpFilter(alpha_decay=0.99, alpha_rise=0.01)
def test_set_coeffs(self): f = filterbank(40, 512) r = np.random.random([40, int(512 / 2) + 1]).astype(float_type) f.set_coeffs(r) assert_equal (r, f.get_coeffs())
def test_norm(self): f = filterbank(40, 512) c = cvec(512) c.norm[:] = 1 assert_equal( f(c), 0);
def test_mfcc_coeffs_16000(self): expected = array_from_text_file('filterbank_mfcc_16000_512.expected') f = filterbank(40, 512) f.set_mel_coeffs_slaney(16000) assert_almost_equal(expected, f.get_coeffs())
n_channels = 1 stream = p.open(format=pyaudio_format, channels=n_channels, rate=samplerate, input=True, frames_per_buffer=buffer_size) # setup pitch pitch_o = aubio.pitch("default", win_s, hop_s, samplerate) pitch_o.set_unit("midi") pitch_o.set_tolerance(tolerance) pitch = 0 pitchque = deque([], pitch_samples) # setup filter f = aubio.filterbank(40, win_s) # documentation says 40 for mel coeffs f.set_mel_coeffs_slaney(samplerate) pv = aubio.pvoc(win_s, hop_s) energies_raw = np.zeros((energy_samples, n_energies)) energies_max = np.zeros(n_energies) energies = np.zeros(n_energies) def get_signal(): global pitch, energies_raw, energies, energies_max try: audiobuffer = stream.read(buffer_size) signal = np.fromstring(audiobuffer, dtype=np.float32) pitch_raw = int(pitch_o(signal)[0]) % 255 pitchque.append(pitch_raw)
def test_mfcc_coeffs_get_coeffs(self): f = filterbank(40, 512) coeffs = f.get_coeffs() self.assertIsInstance(coeffs, np.ndarray) assert_equal(coeffs, 0) assert_equal(np.shape(coeffs), (40, 512 / 2 + 1))
import aubio import numpy as np import matplotlib.pyplot as plt # sampling rate and size of the fft samplerate = 48000 win_s = 2048 # define a list of custom frequency freq_list = [60, 80, 200, 400, 800, 1600, 3200, 6400, 12800, 24000] # number of filters to create n_filters = len(freq_list) - 2 # create a new filterbank f = aubio.filterbank(n_filters, win_s) freqs = aubio.fvec(freq_list) f.set_triangle_bands(freqs, samplerate) # get the coefficients from the filterbank coeffs = f.get_coeffs() # apply a gain to fifth band coeffs[4] *= 6. # load the modified coeffs into the filterbank f.set_coeffs(coeffs) # display the band gains in a loglog plot freqs = np.vstack([np.arange(win_s // 2 + 1) * samplerate / win_s] * n_filters) plt.title('filterbank built from a list of frequencies\n' 'The 5th band has been amplified by a factor 6.') plt.loglog(freqs.T, f.get_coeffs().T, '.-')
def test_filterbank_long_cvec(self): f = filterbank(40, 512) with self.assertRaises(ValueError): f(cvec(1024))
def test_random_norm(self): f = filterbank(40, 512) c = cvec(512) c.norm[:] = random.random((512 / 2 + 1,)).astype('float32') assert_equal( f(c), 0)
def test_filterbank_short_cvec(self): f = filterbank(40, 512) with self.assertRaises(ValueError): f(cvec(256))
if len(sys.argv) < 2: print("Usage: %s <filename> [samplerate]" % sys.argv[0]) sys.exit(1) filename = sys.argv[1] samplerate = 0 if len(sys.argv) > 2: samplerate = int(sys.argv[2]) s = source(filename, samplerate, hop_s) samplerate = s.samplerate pv = pvoc(win_s, hop_s) f = filterbank(40, win_s) f.set_mel_coeffs_slaney(samplerate) energies = zeros((40, )) o = {} total_frames = 0 downsample = 2 while True: samples, read = s() fftgrain = pv(samples) new_energies = f(fftgrain) timestr = '%f' % (total_frames / float(samplerate)) print('{:s} {:s}'.format(timestr, ' '.join(['%f' % b for b in new_energies])))
def test_slaney(self): f = filterbank(40, 512) f.set_mel_coeffs_slaney(16000) a = f.get_coeffs() assert_equal(shape(a), (40, 512 / 2 + 1))
def test_slaney(self): f = filterbank(40, 512) f.set_mel_coeffs_slaney(16000) a = f.get_coeffs() assert_equal(shape (a), (40, 512/2 + 1) )
def test_mfcc_coeffs_16000(self): expected = array_from_text_file('filterbank_mfcc_16000_512.expected') f = filterbank(40, 512) f.set_mel_coeffs_slaney(16000) assert_almost_equal ( expected, f.get_coeffs() )
def test_triangle_freqs_wrong_norm(self): f = filterbank(10, 1024) with self.assertRaises(ValueError): f.set_norm(-1)
def test_members(self): f = filterbank(40, 512) assert_equal ([f.n_filters, f.win_s], [40, 512])
if len(sys.argv) < 2: print "Usage: %s <filename> [samplerate]" % sys.argv[0] sys.exit(1) filename = sys.argv[1] samplerate = 0 if len( sys.argv ) > 2: samplerate = int(sys.argv[2]) s = source(filename, samplerate, hop_s) samplerate = s.samplerate pv = pvoc(win_s, hop_s) f = filterbank(40, win_s) f.set_mel_coeffs_slaney(samplerate) energies = zeros((40,)) o = {} total_frames = 0 downsample = 2 while True: samples, read = s() fftgrain = pv(samples) new_energies = f(fftgrain) print '%f' % (total_frames / float(samplerate) ), print ' '.join(['%f' % b for b in new_energies]) energies = vstack( [energies, new_energies] )
def test_phase(self): f = filterbank(40, 512) c = cvec(512) c.phas[:] = np.pi assert_equal( f(c), 0);
def test_mfcc_coeffs_get_coeffs(self): f = filterbank(40, 512) coeffs = f.get_coeffs() self.assertIsInstance(coeffs, np.ndarray) assert_equal (coeffs, 0) assert_equal (np.shape(coeffs), (40, 512 / 2 + 1))
def test_random_norm(self): f = filterbank(40, 512) c = cvec(512) c.norm[:] = np.random.random((int(512 / 2) + 1,)).astype(float_type) assert_equal( f(c), 0)
def test_random_norm(self): f = filterbank(40, 512) c = cvec(512) c.norm[:] = np.random.random((int(512 / 2) + 1, )).astype(float_type) assert_equal(f(c), 0)
def test_members(self): f = filterbank(40, 512) assert_equal([f.n_filters, f.win_s], [40, 512])
def get_mfcc_aubio(file_path): _p = 0.97 samplerate = 16000 # for Computing a Spectrum win_s = 512 # Window Size hop_size = 160 # Hop Size => シフト幅(0.01[s]にずらすサンプル数(juliusの解像度に合わせる) n_filters = 40 # must be 40 for mfcc n_coeffs = 13 src = source(file_path, samplerate, hop_size) samplerate = src.samplerate total_frames = 0 total_samples = np.array([]) pv = pvoc(win_s, hop_size) f = filterbank(n_coeffs, win_s) f.set_mel_coeffs_slaney(samplerate) energies = np.zeros((n_coeffs, )) while True: hop_samples, read = src() # read hop_size new samples from source total_samples = np.append(total_samples, hop_samples) fftgrain = pv(hop_samples) new_energies = f(fftgrain) energies = np.vstack([energies, new_energies]) total_frames += read # increment total number of frames if read < hop_size: # end of file reached break # preEmphasis total_samples = preEmphasis(total_samples, _p).astype("float32") p = pvoc(win_s, hop_size) m = mfcc(win_s, n_filters, n_coeffs, samplerate) mfccs = np.zeros([ n_coeffs, ]) index = 1 while True: old_frame = hop_size * (index - 1) cur_frame = hop_size * index if total_frames - old_frame < hop_size: samples = total_samples[old_frame:total_frames] # ケツを0で埋めてhopサイズに間に合わせる samples = np.pad(samples, [0, hop_size - (total_frames - old_frame)], "constant") else: samples = total_samples[old_frame:cur_frame] spec = p(samples) mfcc_out = m(spec) mfccs = np.vstack((mfccs, mfcc_out)) if total_frames - old_frame < hop_size: break index += 1 # mfccの1次元はいらないから消す mfccs = np.delete(mfccs, 0, axis=1) energies = np.mean(energies, axis=1) # 対数パワー項を末尾に追加 mfccs = np.hstack((mfccs, energies.reshape(energies.shape[0], 1))) deltas = np.diff(mfccs, axis=0) deltas = np.pad(deltas, [(1, 0), (0, 0)], "constant") ddeltas = np.diff(deltas, axis=0) ddeltas = np.pad(ddeltas, [(1, 0), (0, 0)], "constant") mfccs = mfccs.transpose() deltas = deltas.transpose() ddeltas = ddeltas.transpose() all_mfccs = mfccs.tolist() + deltas.tolist() + ddeltas.tolist() print("Get MFCC in " + file_path + " ...") return all_mfccs
def test_set_coeffs(self): f = filterbank(40, 512) r = np.random.random([40, int(512 / 2) + 1]).astype(float_type) f.set_coeffs(r) assert_equal(r, f.get_coeffs())
def test_phase(self): f = filterbank(40, 512) c = cvec(512) c.phas[:] = np.pi assert_equal(f(c), 0)
def test_norm(self): f = filterbank(40, 512) c = cvec(512) c.norm[:] = 1 assert_equal(f(c), 0)
#! /usr/bin/env python from aubio import filterbank, fvec from pylab import loglog, show, subplot, xlim, ylim, xlabel, ylabel, title from numpy import vstack, arange win_s = 2048 samplerate = 48000 freq_list = [60, 80, 200, 400, 800, 1600, 3200, 6400, 12800, 24000] n_filters = len(freq_list) - 2 f = filterbank(n_filters, win_s) freqs = fvec(freq_list) f.set_triangle_bands(freqs, samplerate) coeffs = f.get_coeffs() coeffs[4] *= 5. f.set_coeffs(coeffs) times = vstack([arange(win_s / 2 + 1) * samplerate / win_s] * n_filters) title('Bank of filters built using a simple list of boundaries\nThe middle band has been amplified by 2.') loglog(times.T, f.get_coeffs().T, '.-') xlim([50, samplerate/2]) ylim([1.0e-6, 2.0e-2]) xlabel('log frequency (Hz)') ylabel('log amplitude') show()
from aubio import filterbank, fvec f = filterbank(9, 1024) freq_list = [60, 80, 200, 400, 800, 1600, 3200, 6400, 12800, 15000, 24000] freqs = fvec(freq_list) f.set_triangle_bands(freq_list, 48000) f.get_coeffs().T from pylab import loglog, show loglog(f.get_coeffs().T, '+-') show()