Example #1
0
 def test_other_slaney(self):
     f = filterbank(40, 512*2)
     f.set_mel_coeffs_slaney(44100)
     _ = f.get_coeffs()
     #print "sum is", sum(sum(a))
     for win_s in [256, 512, 1024, 2048, 4096]:
         f = filterbank(40, win_s)
         f.set_mel_coeffs_slaney(32000)
         _ = f.get_coeffs()
 def test_other_slaney(self):
     f = filterbank(40, 512*2)
     f.set_mel_coeffs_slaney(44100)
     self.assertIsInstance(f.get_coeffs(), np.ndarray)
     #print "sum is", sum(sum(a))
     for win_s in [256, 512, 1024, 2048, 4096]:
         f = filterbank(40, win_s)
         f.set_mel_coeffs_slaney(32000)
         #print "sum is", sum(sum(a))
         self.assertIsInstance(f.get_coeffs(), np.ndarray)
Example #3
0
 def test_triangle_freqs_zeros(self):
     f = filterbank(9, 1024)
     freq_list = [40, 80, 200, 400, 800, 1600, 3200, 6400, 12800, 15000, 24000]
     freqs = array(freq_list, dtype = float_type)
     f.set_triangle_bands(freqs, 48000)
     _ = f.get_coeffs().T
     assert_equal ( f(cvec(1024)), 0)
Example #4
0
def getMelEnergy(path):
	win_s = 512 		#fft size
	hop_s = win_s / 4 	#hop size

	samplerate = 0
	s = source(path, samplerate, hop_s)
	samplerate = s.samplerate

	pv = pvoc(win_s, hop_s)
	f = filterbank(40, win_s)
	f.set_mel_coeffs_slaney(samplerate)

	energies = np.zeros((40, ))
	o = {}

	total_frames = 0
	downsample = 2

	while True:
		samples, read = s()
		fftgrain = pv(samples)
		new_energies = f(fftgrain)
		energies = np.vstack([energies, new_energies])
		total_frames += read
		if read < hop_s:
			break
	return energies
Example #5
0
 def test_mfcc_coeffs(self):
     f = filterbank(40, 512)
     c = cvec(512)
     f.set_mel_coeffs_slaney(44100)
     c.norm[:] = np.random.random((int(512 / 2) + 1,)).astype(float_type)
     assert_equal ( f(c) < 1., True )
     assert_equal ( f(c) > 0., True )
 def test_random_coeffs(self):
   f = filterbank(40, 512)
   c = cvec(512)
   r = random.random([40, 512 / 2 + 1]).astype('float32')
   r /= r.sum()
   f.set_coeffs(r)
   c.norm[:] = random.random((512 / 2 + 1,)).astype('float32')
   assert_equal ( f(c) < 1., True )
   assert_equal ( f(c) > 0., True )
Example #7
0
 def test_random_coeffs(self):
     win_s = 128
     f = filterbank(40, win_s)
     c = cvec(win_s)
     r = np.random.random([40, int(win_s / 2) + 1]).astype(float_type)
     r /= r.sum()
     f.set_coeffs(r)
     c.norm[:] = np.random.random((int(win_s / 2) + 1,)).astype(float_type)
     assert_equal ( f(c) < 1., True )
     assert_equal ( f(c) > 0., True )
Example #8
0
 def setup(self, channels=None, samplerate=None,
           blocksize=None, totalframes=None):
     super(AubioMelEnergy, self).setup(
         channels, samplerate, blocksize, totalframes)
     self.n_filters = 40
     self.n_coeffs = 13
     self.pvoc = pvoc(self.input_blocksize, self.input_stepsize)
     self.melenergy = filterbank(self.n_filters, self.input_blocksize)
     self.melenergy.set_mel_coeffs_slaney(samplerate)
     self.block_read = 0
     self.melenergy_results = []
Example #9
0
 def test_triangle_freqs_ones(self):
     f = filterbank(9, 1024)
     freq_list = [40, 80, 200, 400, 800, 1600, 3200, 6400, 12800, 15000, 24000]
     freqs = array(freq_list, dtype = float_type)
     f.set_triangle_bands(freqs, 48000)
     _ = f.get_coeffs().T
     spec = cvec(1024)
     spec.norm[:] = 1
     assert_almost_equal ( f(spec),
             [ 0.02070313, 0.02138672, 0.02127604, 0.02135417,
                 0.02133301, 0.02133301, 0.02133311, 0.02133334, 0.02133345])
Example #10
0
    def __init__(self):
        super(AubioMelEnergy, self).__init__()
        self.input_blocksize = 1024
        self.input_stepsize = self.input_blocksize / 4

        # Aubio Melenergy Initialisation
        self.n_filters = 40
        self.n_coeffs = 13
        self.pvoc = pvoc(self.input_blocksize, self.input_stepsize)
        self.melenergy = filterbank(self.n_filters, self.input_blocksize)
        self.block_read = 0
        self.melenergy_results = []
Example #11
0
    def __init__(self, options):
        if options.settings['fbuckets'] == 'third-octave':
            options.settings['fbuckets'] = [22.4,
                                            25, 31.5, 40, 50, 63,
                                            80, 100, 125, 160, 200,
                                            250, 315, 400, 500, 630,
                                            800, 1000, 1250, 1600, 2000,
                                            2500, 3150, 4000, 5000, 6300,
                                            8000, 10000, 12500, 16000, 20000,
                                            22390]
        elif options.settings['fbuckets'] == 'octave':
            options.settings['fbuckets'] = [22,
                                            31.5, 63, 125, 250, 500,
                                            1000, 2000, 4000, 8000, 16000,
                                            22720]
        self.midi_processor = None
        self.sysex_command_array = []
        for command in options.settings['fsysexnum'].split(' '):
            self.sysex_command_array.append(int(command, 0))
        self.filter_bank = filterbank(len(options.settings['fbuckets']) - 2,
                                      (int(options.settings['framesize']) *
                                       int(options.settings['fframemult'])))
        self.frequencies = fvec(options.settings['fbuckets'])
        self.filter_bank.set_triangle_bands(self.frequencies,
                                            int(options.settings[
                                                    'samplerate']))
        self.phase_vocoder = pvoc(int(float(options.settings['framesize']) *
                                      float(options.settings['fframemult'])),
                                  int(float(options.settings['framesize']) *
                                      float(options.settings['fframemult']) *
                                      float(options.settings['fhopmult'])))

        self.frame_arrays = np.zeros(
            (int(float(options.settings['fframemult'])),
             int(float(options.settings['framesize']))),
            dtype=np.float32)
        self.frame_count = 0
        self.maximum_frequencies = np.zeros(
            (len(options.settings['fbuckets']) - 2,), dtype=np.float32)
        self.last_energies = np.zeros((len(options.settings['fbuckets']) - 2,),
                                      dtype=np.float32)
        self.count_energies = np.zeros((int(options.settings['fcount']),
                                        (len(options.settings[
                                                 'fbuckets']) - 2)),
                                       dtype=np.float32)
        self.energy_count = 0
        self.rest_stop = 0
        self.frame_multiplier = int(options.settings['fframemult'])
        self.count = int(options.settings['fcount'])
        self.graceful = float(options.settings['fgraceful'])
Example #12
0
File: cmd.py Project: aubio/aubio
    def __init__(self, args):
        self.args = args
        valid_opts = ['hop_size', 'buf_size']
        self.parse_options(args, valid_opts)
        self.remap_pvoc_options(self.options)
        self.pv = aubio.pvoc(**self.options)

        valid_opts = ['buf_size', 'n_filters']
        self.parse_options(args, valid_opts)
        self.remap_pvoc_options(self.options)
        self.filterbank = aubio.filterbank(**self.options)
        self.filterbank.set_mel_coeffs_slaney(args.samplerate)

        super(process_melbands, self).__init__(args)
Example #13
0
 def test_zero_fmax(self):
     f = filterbank(40, 1024)
     f.set_mel_coeffs(44100, 0, 0)
Example #14
0
 def test_mel_coeffs_htk(self):
     f = filterbank(40, 1024)
     f.set_mel_coeffs_htk(44100, 0, 44100 / 2)
 def test_set_coeffs(self):
   f = filterbank(40, 512)
   r = random.random([40, 512 / 2 + 1]).astype('float32')
   f.set_coeffs(r)
   assert_equal (r, f.get_coeffs())
def process_wav_file(filename,
                     samplerate=0,
                     win_s=4096,
                     seconds_window=3,
                     svm=True):
    from aubio import source, pitch

    hop_s = win_s // 4
    #filename = filename.replace("b'", "")[:-1]
    filename = filename.strip()

    pitches = []
    confidences = []
    #print(filename)
    n_filters = 40  # must be 40 for mfcc
    n_coeffs = 13
    s = source(filename, samplerate, hop_s)

    samplerate = s.samplerate

    tolerance = 0.8

    pitch_o = pitch("yin", win_s, hop_s, samplerate)
    pitch_o.set_unit("midi")
    pitch_o.set_tolerance(tolerance)

    p = pvoc(win_s, hop_s)
    m = mfcc(win_s, n_filters, n_coeffs, samplerate)
    n_samples = 1  #s.duration / s.samplerate / seconds_window

    pv = pvoc(win_s, hop_s)

    f = filterbank(40, win_s)
    f.set_mel_coeffs_slaney(samplerate)

    energies = zeros((40, ))
    if n_samples == 0:
        return []
    mfccs = zeros([
        n_coeffs,
    ])
    frames_read = 0
    while True:
        samples, read = s()
        #print(samples, read)
        spec = p(samples)
        mfcc_out = m(spec)
        mfccs = vstack((mfccs, mfcc_out))
        pitch = pitch_o(samples)[0]
        #pitch = int(round(pitch))
        fftgrain = pv(samples)
        new_energies = f(fftgrain)
        energies = vstack([energies, new_energies])
        confidence = pitch_o.get_confidence()
        pitches += [pitch]
        confidences += [confidence]
        frames_read += read
        if read < hop_s: break

    mfccs1 = diff(mfccs, axis=0)
    mfccs2 = diff(mfccs, axis=0)
    #print mfccs.shape, mfccs1.shape, mfccs2.shape

    # total number of fra

    pitches = np.array(pitches)
    pitches = pitches.reshape((len(pitches), 1))
    #     print(pitches.shape)
    #     print(mfccs1.shape)

    all_data = np.concatenate(
        (mfccs[1:, :], mfccs1, mfccs2, pitches, energies[1:]), 1)

    final = []
    size_row = len(all_data) / n_samples
    if svm:
        final.append(get_mean_avg_etc(all_data))
    else:
        final.append(all_data)


#     for i in range(n_samples):
#         if svm:
#             final.append(get_mean_avg_etc(all_data[i*size_row: (i+1)*size_row]))
#         else:
#             final.append(all_data[i*size_row: (i+1)*size_row])
    return final
Example #17
0
#! /usr/bin/env python

from aubio import filterbank, fvec
from pylab import loglog, show, xlim, ylim, xlabel, ylabel, title
from numpy import vstack, arange

win_s = 2048
samplerate = 48000

freq_list = [60, 80, 200, 400, 800, 1600, 3200, 6400, 12800, 24000]
n_filters = len(freq_list) - 2

f = filterbank(n_filters, win_s)
freqs = fvec(freq_list)
f.set_triangle_bands(freqs, samplerate)

coeffs = f.get_coeffs()
coeffs[4] *= 5.

f.set_coeffs(coeffs)

times = vstack([arange(win_s // 2 + 1) * samplerate / win_s] * n_filters)
title(
    'Bank of filters built using a simple list of boundaries\nThe middle band has been amplified by 2.'
)
loglog(times.T, f.get_coeffs().T, '.-')
xlim([50, samplerate / 2])
ylim([1.0e-6, 2.0e-2])
xlabel('log frequency (Hz)')
ylabel('log amplitude')
Example #18
0
 def test_triangle_freqs_with_wrong_negative(self):
     """make sure set_triangle_bands fails when list contains a negative"""
     freq_list = [-10, 0, 80]
     f = filterbank(len(freq_list) - 2, 1024)
     with self.assertRaises(ValueError):
         f.set_triangle_bands(fvec(freq_list), 48000)
Example #19
0
 def test_random_norm(self):
     f = filterbank(40, 512)
     c = cvec(512)
     c.norm[:] = random.random((512 / 2 + 1, )).astype('float32')
     assert_equal(f(c), 0)
Example #20
0
 def test_set_coeffs(self):
     f = filterbank(40, 512)
     r = random.random([40, 512 / 2 + 1]).astype('float32')
     f.set_coeffs(r)
     assert_equal(r, f.get_coeffs())
Example #21
0
 def test_triangle_freqs_with_wrong_ordering(self):
     """make sure set_triangle_bands fails when list not ordered"""
     freq_list = [0, 80, 40]
     f = filterbank(len(freq_list) - 2, 1024)
     with self.assertRaises(ValueError):
         f.set_triangle_bands(fvec(freq_list), 48000)
Example #22
0
    def _initialize_melbank(self):
        """Initialize all the melbank related variables"""

        # Few difference coefficient types for experimentation
        if self._config["coeffs_type"] == "triangle":
            melbank_mel = np.linspace(
                aubio.hztomel(self._config["min_frequency"]),
                aubio.hztomel(self._config["max_frequency"]),
                self._config["samples"] + 2,
            )
            self.melbank_frequencies = np.array(
                [aubio.meltohz(mel) for mel in melbank_mel]).astype(np.float32)

            self.filterbank = aubio.filterbank(self._config["samples"],
                                               self._config["fft_size"])
            self.filterbank.set_triangle_bands(self.melbank_frequencies,
                                               self._config["mic_rate"])
            self.melbank_frequencies = self.melbank_frequencies[1:-1]

        if self._config["coeffs_type"] == "bark":
            melbank_bark = np.linspace(
                6.0 * np.arcsinh(self._config["min_frequency"] / 600.0),
                6.0 * np.arcsinh(self._config["max_frequency"] / 600.0),
                self._config["samples"] + 2,
            )
            self.melbank_frequencies = (600.0 *
                                        np.sinh(melbank_bark / 6.0)).astype(
                                            np.float32)

            self.filterbank = aubio.filterbank(self._config["samples"],
                                               self._config["fft_size"])
            self.filterbank.set_triangle_bands(self.melbank_frequencies,
                                               self._config["mic_rate"])
            self.melbank_frequencies = self.melbank_frequencies[1:-1]

        # Slaney coefficients will always produce 40 samples spanning 133Hz to
        # 6000Hz
        if self._config["coeffs_type"] == "slaney":
            self.filterbank = aubio.filterbank(40, self._config["fft_size"])
            self.filterbank.set_mel_coeffs_slaney(self._config["mic_rate"])

            # Sanley frequencies are linear-log spaced where 133Hz to 1000Hz is linear
            # spaced and 1000Hz to 6000Hz is log spaced. It also produced a hardcoded
            # 40 samples.
            lowestFrequency = 133.3
            linearSpacing = 66.6666666
            logSpacing = 1.0711703
            linearFilters = 13
            logFilters = 27
            linearSpacedFreqs = (lowestFrequency +
                                 np.arange(0, linearFilters) * linearSpacing)
            logSpacedFreqs = linearSpacedFreqs[-1] * np.power(
                logSpacing, np.arange(1, logFilters + 1))

            self._config["samples"] = 40
            self.melbank_frequencies = np.hstack(
                (linearSpacedFreqs, logSpacedFreqs)).astype(np.float32)

        # Standard mel coefficients
        if self._config["coeffs_type"] == "mel":
            self.filterbank = aubio.filterbank(self._config["samples"],
                                               self._config["fft_size"])
            self.filterbank.set_mel_coeffs(
                self._config["mic_rate"],
                self._config["min_frequency"],
                self._config["max_frequency"],
            )

            # Frequencies wil be linearly spaced in the mel scale
            melbank_mel = np.linspace(
                aubio.hztomel(self._config["min_frequency"]),
                aubio.hztomel(self._config["max_frequency"]),
                self._config["samples"],
            )
            self.melbank_frequencies = np.array(
                [aubio.meltohz(mel) for mel in melbank_mel])

        # HTK mel coefficients
        if self._config["coeffs_type"] == "htk":
            self.filterbank = aubio.filterbank(self._config["samples"],
                                               self._config["fft_size"])
            self.filterbank.set_mel_coeffs_htk(
                self._config["mic_rate"],
                self._config["min_frequency"],
                self._config["max_frequency"],
            )

            # Frequencies wil be linearly spaced in the mel scale
            melbank_mel = np.linspace(
                aubio.hztomel(self._config["min_frequency"]),
                aubio.hztomel(self._config["max_frequency"]),
                self._config["samples"],
            )
            self.melbank_frequencies = np.array(
                [aubio.meltohz(mel) for mel in melbank_mel])

        # Coefficients based on Scott's audio reactive led project
        if self._config["coeffs_type"] == "scott":
            (
                melmat,
                center_frequencies_hz,
                freqs,
            ) = mel.compute_melmat(
                num_mel_bands=self._config["samples"],
                freq_min=self._config["min_frequency"],
                freq_max=self._config["max_frequency"],
                num_fft_bands=int(self._config["fft_size"] // 2) + 1,
                sample_rate=self._config["mic_rate"],
            )
            self.filterbank = aubio.filterbank(self._config["samples"],
                                               self._config["fft_size"])
            self.filterbank.set_coeffs(melmat.astype(np.float32))
            self.melbank_frequencies = center_frequencies_hz

        # "Mel"-spacing based on Scott's audio reactive led project. This
        # should in theory be the same as the above, but there seems to be
        # slight differences. Leaving both for science!
        if self._config["coeffs_type"] == "scott_mel":

            def hertz_to_scott(freq):
                return 3340.0 * log(1 + (freq / 250.0), 9)

            def scott_to_hertz(scott):
                return 250.0 * (9**(scott / 3340.0)) - 250.0

            melbank_scott = np.linspace(
                hertz_to_scott(self._config["min_frequency"]),
                hertz_to_scott(self._config["max_frequency"]),
                self._config["samples"] + 2,
            )
            self.melbank_frequencies = np.array([
                scott_to_hertz(scott) for scott in melbank_scott
            ]).astype(np.float32)

            self.filterbank = aubio.filterbank(self._config["samples"],
                                               self._config["fft_size"])
            self.filterbank.set_triangle_bands(self.melbank_frequencies,
                                               self._config["mic_rate"])
            self.melbank_frequencies = self.melbank_frequencies[1:-1]

        # Modified scott_mel, spreads out the low range and compresses the
        # highs
        if self._config["coeffs_type"] == "matt_mel":

            def hertz_to_matt(freq):
                return 3700.0 * log(1 + (freq / 200.0), 13)

            def matt_to_hertz(matt):
                return 200.0 * (10**(matt / 3700.0)) - 200.0

            melbank_matt = np.linspace(
                hertz_to_matt(self._config["min_frequency"]),
                hertz_to_matt(self._config["max_frequency"]),
                self._config["samples"] + 2,
            )
            self.melbank_frequencies = np.array([
                matt_to_hertz(matt) for matt in melbank_matt
            ]).astype(np.float32)

            self.filterbank = aubio.filterbank(self._config["samples"],
                                               self._config["fft_size"])
            self.filterbank.set_triangle_bands(self.melbank_frequencies,
                                               self._config["mic_rate"])
            self.melbank_frequencies = self.melbank_frequencies[1:-1]

        if self._config["coeffs_type"] == "fixed":
            ranges = FREQUENCY_RANGES.values()
            upper_edges_hz = np.zeros(len(ranges))
            lower_edges_hz = np.zeros(len(ranges))
            for idx, value in enumerate(ranges):
                lower_edges_hz[idx] = value.min
                upper_edges_hz[idx] = value.max

            (
                melmat,
                center_frequencies_hz,
                freqs,
            ) = mel.compute_melmat_from_range(
                lower_edges_hz=lower_edges_hz,
                upper_edges_hz=upper_edges_hz,
                num_fft_bands=int(self._config["fft_size"] // 2) + 1,
                sample_rate=self._config["mic_rate"],
            )

            self._config["samples"] = len(center_frequencies_hz)
            self.filterbank = aubio.filterbank(self._config["samples"],
                                               self._config["fft_size"])
            self.filterbank.set_coeffs(melmat.astype(np.float32))
            self.melbank_frequencies = center_frequencies_hz

        if self._config["coeffs_type"] == "fixed_simple":
            ranges = FREQUENCY_RANGES_SIMPLE.values()
            upper_edges_hz = np.zeros(len(ranges))
            lower_edges_hz = np.zeros(len(ranges))
            for idx, value in enumerate(ranges):
                lower_edges_hz[idx] = value.min
                upper_edges_hz[idx] = value.max

            (
                melmat,
                center_frequencies_hz,
                freqs,
            ) = mel.compute_melmat_from_range(
                lower_edges_hz=lower_edges_hz,
                upper_edges_hz=upper_edges_hz,
                num_fft_bands=int(self._config["fft_size"] // 2) + 1,
                sample_rate=self._config["mic_rate"],
            )

            self._config["samples"] = len(center_frequencies_hz)
            self.filterbank = aubio.filterbank(self._config["samples"],
                                               self._config["fft_size"])
            self.filterbank.set_coeffs(melmat.astype(np.float32))
            self.melbank_frequencies = center_frequencies_hz

        self.melbank_frequencies = self.melbank_frequencies.astype(int)

        # Normalize the filterbank triangles to a consistent height, the
        # default coeffs (for types other than legacy) will be normalized
        # by the triangles area which results in an uneven melbank
        if (self._config["coeffs_type"] != "scott"
                and self._config["coeffs_type"] == "scott_mel"):
            coeffs = self.filterbank.get_coeffs()
            coeffs /= np.max(coeffs, axis=-1)[:, None]
            self.filterbank.set_coeffs(coeffs)

        # Find the indexes for each of the frequency ranges
        self.lows_index = self.mids_index = self.highs_index = 1
        for i in range(0, len(self.melbank_frequencies)):
            if (self.melbank_frequencies[i] <
                    FREQUENCY_RANGES_SIMPLE["Low (1-250Hz)"].max):
                self.lows_index = i + 1
            elif (self.melbank_frequencies[i] <
                  FREQUENCY_RANGES_SIMPLE["Mid (250Hz-4kHz)"].max):
                self.mids_index = i + 1
            elif (self.melbank_frequencies[i] <
                  FREQUENCY_RANGES_SIMPLE["High (4kHz-24kHz)"].max):
                self.highs_index = i + 1

        # Build up some of the common filters
        self.mel_gain = ExpFilter(
            np.tile(1e-1, self._config["samples"]),
            alpha_decay=0.01,
            alpha_rise=0.99,
        )
        self.mel_smoothing = ExpFilter(
            np.tile(1e-1, self._config["samples"]),
            alpha_decay=0.2,
            alpha_rise=0.99,
        )
        self.common_filter = ExpFilter(alpha_decay=0.99, alpha_rise=0.01)
Example #23
0
    def _initialize_melbank(self):
        """Initialize all the melbank related variables"""

        # Few difference coefficient types for experimentation
        if self._config['coeffs_type'] == 'triangle':
            melbank_mel = np.linspace(
                aubio.hztomel(self._config['min_frequency']),
                aubio.hztomel(self._config['max_frequency']),
                self._config['samples'] + 2)
            self.melbank_frequencies = np.array(
                [aubio.meltohz(mel) for mel in melbank_mel]).astype(np.float32)

            self.filterbank = aubio.filterbank(self._config['samples'],
                                               self._config['fft_size'])
            self.filterbank.set_triangle_bands(self.melbank_frequencies,
                                               self._config['mic_rate'])
            self.melbank_frequencies = self.melbank_frequencies[1:-1]

        if self._config['coeffs_type'] == 'bark':
            melbank_bark = np.linspace(
                6.0 * np.arcsinh(self._config['min_frequency'] / 600.0),
                6.0 * np.arcsinh(self._config['max_frequency'] / 600.0),
                self._config['samples'] + 2)
            self.melbank_frequencies = (600.0 *
                                        np.sinh(melbank_bark / 6.0)).astype(
                                            np.float32)

            self.filterbank = aubio.filterbank(self._config['samples'],
                                               self._config['fft_size'])
            self.filterbank.set_triangle_bands(self.melbank_frequencies,
                                               self._config['mic_rate'])
            self.melbank_frequencies = self.melbank_frequencies[1:-1]

        # Slaney coefficients will always produce 40 samples spanning 133Hz to 6000Hz
        if self._config['coeffs_type'] == 'slaney':
            self.filterbank = aubio.filterbank(40, self._config['fft_size'])
            self.filterbank.set_mel_coeffs_slaney(self._config['mic_rate'])

            # Sanley frequencies are linear-log spaced where 133Hz to 1000Hz is linear
            # spaced and 1000Hz to 6000Hz is log spaced. It also produced a hardcoded
            # 40 samples.
            lowestFrequency = 133.3
            linearSpacing = 66.6666666
            logSpacing = 1.0711703
            linearFilters = 13
            logFilters = 27
            linearSpacedFreqs = lowestFrequency + np.arange(
                0, linearFilters) * linearSpacing
            logSpacedFreqs = linearSpacedFreqs[-1] * np.power(
                logSpacing, np.arange(1, logFilters + 1))

            self._config['samples'] = 40
            self.melbank_frequencies = np.hstack(
                (linearSpacedFreqs, logSpacedFreqs)).astype(np.float32)

        # Standard mel coefficients
        if self._config['coeffs_type'] == 'mel':
            self.filterbank = aubio.filterbank(self._config['samples'],
                                               self._config['fft_size'])
            self.filterbank.set_mel_coeffs(self._config['mic_rate'],
                                           self._config['min_frequency'],
                                           self._config['max_frequency'])

            # Frequencies wil be linearly spaced in the mel scale
            melbank_mel = np.linspace(
                aubio.hztomel(self._config['min_frequency']),
                aubio.hztomel(self._config['max_frequency']),
                self._config['samples'])
            self.melbank_frequencies = np.array(
                [aubio.meltohz(mel) for mel in melbank_mel])

        # HTK mel coefficients
        if self._config['coeffs_type'] == 'htk':
            self.filterbank = aubio.filterbank(self._config['samples'],
                                               self._config['fft_size'])
            self.filterbank.set_mel_coeffs_htk(self._config['mic_rate'],
                                               self._config['min_frequency'],
                                               self._config['max_frequency'])

            # Frequencies wil be linearly spaced in the mel scale
            melbank_mel = np.linspace(
                aubio.hztomel(self._config['min_frequency']),
                aubio.hztomel(self._config['max_frequency']),
                self._config['samples'])
            self.melbank_frequencies = np.array(
                [aubio.meltohz(mel) for mel in melbank_mel])

        # Coefficients based on Scott's audio reactive led project
        if self._config['coeffs_type'] == 'scott':
            (melmat, center_frequencies_hz, freqs) = mel.compute_melmat(
                num_mel_bands=self._config['samples'],
                freq_min=self._config['min_frequency'],
                freq_max=self._config['max_frequency'],
                num_fft_bands=int(self._config['fft_size'] // 2) + 1,
                sample_rate=self._config['mic_rate'])
            self.filterbank = aubio.filterbank(self._config['samples'],
                                               self._config['fft_size'])
            self.filterbank.set_coeffs(melmat.astype(np.float32))
            self.melbank_frequencies = center_frequencies_hz

        # "Mel"-spacing based on Scott's audio reactive led project. This
        # should in theory be the same as the above, but there seems to be
        # slight differences. Leaving both for science!
        if self._config['coeffs_type'] == 'scott_mel':

            def hertz_to_scott(freq):
                return 3340.0 * log(1 + (freq / 250.0), 9)

            def scott_to_hertz(scott):
                return 250.0 * (9**(scott / 3340.0)) - 250.0

            melbank_scott = np.linspace(
                hertz_to_scott(self._config['min_frequency']),
                hertz_to_scott(self._config['max_frequency']),
                self._config['samples'] + 2)
            self.melbank_frequencies = np.array([
                scott_to_hertz(scott) for scott in melbank_scott
            ]).astype(np.float32)

            self.filterbank = aubio.filterbank(self._config['samples'],
                                               self._config['fft_size'])
            self.filterbank.set_triangle_bands(self.melbank_frequencies,
                                               self._config['mic_rate'])
            self.melbank_frequencies = self.melbank_frequencies[1:-1]

        self.melbank_frequencies = self.melbank_frequencies.astype(int)

        # Normalize the filterbank triangles to a consistent height, the
        # default coeffs (for types other than legacy) will be normalized
        # by the triangles area which results in an uneven melbank
        if self._config['coeffs_type'] != 'scott' and self._config[
                'coeffs_type'] == 'scott_mel':
            coeffs = self.filterbank.get_coeffs()
            coeffs /= np.max(coeffs, axis=-1)[:, None]
            self.filterbank.set_coeffs(coeffs)

        # Find the indexes for each of the frequency ranges
        for i in range(0, len(self.melbank_frequencies) - 1):
            if self.melbank_frequencies[i] < FREQUENCY_RANGES_SIMPLE['low'].max:
                self.lows_index = i
            elif self.melbank_frequencies[i] < FREQUENCY_RANGES_SIMPLE[
                    'mid'].max:
                self.mids_index = i
            elif self.melbank_frequencies[i] < FREQUENCY_RANGES_SIMPLE[
                    'high'].max:
                self.highs_index = i

        # Build up some of the common filters
        self.mel_gain = ExpFilter(np.tile(1e-1, self._config['samples']),
                                  alpha_decay=0.01,
                                  alpha_rise=0.99)
        self.mel_smoothing = ExpFilter(np.tile(1e-1, self._config['samples']),
                                       alpha_decay=0.2,
                                       alpha_rise=0.99)
        self.common_filter = ExpFilter(alpha_decay=0.99, alpha_rise=0.01)
Example #24
0
 def test_set_coeffs(self):
     f = filterbank(40, 512)
     r = np.random.random([40, int(512 / 2) + 1]).astype(float_type)
     f.set_coeffs(r)
     assert_equal (r, f.get_coeffs())
Example #25
0
 def test_norm(self):
     f = filterbank(40, 512)
     c = cvec(512)
     c.norm[:] = 1
     assert_equal( f(c), 0);
Example #26
0
 def test_mfcc_coeffs_16000(self):
     expected = array_from_text_file('filterbank_mfcc_16000_512.expected')
     f = filterbank(40, 512)
     f.set_mel_coeffs_slaney(16000)
     assert_almost_equal(expected, f.get_coeffs())
Example #27
0
n_channels = 1
stream = p.open(format=pyaudio_format,
                channels=n_channels,
                rate=samplerate,
                input=True,
                frames_per_buffer=buffer_size)

# setup pitch
pitch_o = aubio.pitch("default", win_s, hop_s, samplerate)
pitch_o.set_unit("midi")
pitch_o.set_tolerance(tolerance)
pitch = 0
pitchque = deque([], pitch_samples)

# setup filter
f = aubio.filterbank(40, win_s)  # documentation says 40 for mel coeffs
f.set_mel_coeffs_slaney(samplerate)
pv = aubio.pvoc(win_s, hop_s)
energies_raw = np.zeros((energy_samples, n_energies))
energies_max = np.zeros(n_energies)
energies = np.zeros(n_energies)


def get_signal():
    global pitch, energies_raw, energies, energies_max
    try:
        audiobuffer = stream.read(buffer_size)
        signal = np.fromstring(audiobuffer, dtype=np.float32)

        pitch_raw = int(pitch_o(signal)[0]) % 255
        pitchque.append(pitch_raw)
Example #28
0
 def test_mfcc_coeffs_get_coeffs(self):
     f = filterbank(40, 512)
     coeffs = f.get_coeffs()
     self.assertIsInstance(coeffs, np.ndarray)
     assert_equal(coeffs, 0)
     assert_equal(np.shape(coeffs), (40, 512 / 2 + 1))
Example #29
0
import aubio
import numpy as np
import matplotlib.pyplot as plt

# sampling rate and size of the fft
samplerate = 48000
win_s = 2048

# define a list of custom frequency
freq_list = [60, 80, 200, 400, 800, 1600, 3200, 6400, 12800, 24000]
# number of filters to create
n_filters = len(freq_list) - 2

# create a new filterbank
f = aubio.filterbank(n_filters, win_s)
freqs = aubio.fvec(freq_list)
f.set_triangle_bands(freqs, samplerate)

# get the coefficients from the filterbank
coeffs = f.get_coeffs()
# apply a gain to fifth band
coeffs[4] *= 6.
# load the modified coeffs into the filterbank
f.set_coeffs(coeffs)

# display the band gains in a loglog plot
freqs = np.vstack([np.arange(win_s // 2 + 1) * samplerate / win_s] * n_filters)
plt.title('filterbank built from a list of frequencies\n'
          'The 5th band has been amplified by a factor 6.')
plt.loglog(freqs.T, f.get_coeffs().T, '.-')
Example #30
0
 def test_filterbank_long_cvec(self):
     f = filterbank(40, 512)
     with self.assertRaises(ValueError):
         f(cvec(1024))
 def test_random_norm(self):
   f = filterbank(40, 512)
   c = cvec(512)
   c.norm[:] = random.random((512 / 2 + 1,)).astype('float32')
   assert_equal( f(c), 0)
Example #32
0
 def test_filterbank_short_cvec(self):
     f = filterbank(40, 512)
     with self.assertRaises(ValueError):
         f(cvec(256))
Example #33
0
if len(sys.argv) < 2:
    print("Usage: %s <filename> [samplerate]" % sys.argv[0])
    sys.exit(1)

filename = sys.argv[1]

samplerate = 0
if len(sys.argv) > 2: samplerate = int(sys.argv[2])

s = source(filename, samplerate, hop_s)
samplerate = s.samplerate

pv = pvoc(win_s, hop_s)

f = filterbank(40, win_s)
f.set_mel_coeffs_slaney(samplerate)

energies = zeros((40, ))
o = {}

total_frames = 0
downsample = 2

while True:
    samples, read = s()
    fftgrain = pv(samples)
    new_energies = f(fftgrain)
    timestr = '%f' % (total_frames / float(samplerate))
    print('{:s} {:s}'.format(timestr,
                             ' '.join(['%f' % b for b in new_energies])))
Example #34
0
 def test_slaney(self):
     f = filterbank(40, 512)
     f.set_mel_coeffs_slaney(16000)
     a = f.get_coeffs()
     assert_equal(shape(a), (40, 512 / 2 + 1))
Example #35
0
 def test_slaney(self):
     f = filterbank(40, 512)
     f.set_mel_coeffs_slaney(16000)
     a = f.get_coeffs()
     assert_equal(shape (a), (40, 512/2 + 1) )
Example #36
0
 def test_mfcc_coeffs_16000(self):
     expected = array_from_text_file('filterbank_mfcc_16000_512.expected')
     f = filterbank(40, 512)
     f.set_mel_coeffs_slaney(16000)
     assert_almost_equal ( expected, f.get_coeffs() )
Example #37
0
 def test_triangle_freqs_wrong_norm(self):
     f = filterbank(10, 1024)
     with self.assertRaises(ValueError):
         f.set_norm(-1)
Example #38
0
 def test_filterbank_short_cvec(self):
     f = filterbank(40, 512)
     with self.assertRaises(ValueError):
         f(cvec(256))
Example #39
0
 def test_members(self):
     f = filterbank(40, 512)
     assert_equal ([f.n_filters, f.win_s], [40, 512])
Example #40
0
if len(sys.argv) < 2:
    print "Usage: %s <filename> [samplerate]" % sys.argv[0]
    sys.exit(1)

filename = sys.argv[1]

samplerate = 0
if len( sys.argv ) > 2: samplerate = int(sys.argv[2])

s = source(filename, samplerate, hop_s)
samplerate = s.samplerate

pv = pvoc(win_s, hop_s)

f = filterbank(40, win_s)
f.set_mel_coeffs_slaney(samplerate)

energies = zeros((40,))
o = {}

total_frames = 0
downsample = 2

while True:
    samples, read = s()
    fftgrain = pv(samples)
    new_energies = f(fftgrain)
    print '%f' % (total_frames / float(samplerate) ),
    print ' '.join(['%f' % b for b in new_energies])
    energies = vstack( [energies, new_energies] )
Example #41
0
 def test_phase(self):
     f = filterbank(40, 512)
     c = cvec(512)
     c.phas[:] = np.pi
     assert_equal( f(c), 0);
Example #42
0
 def test_mfcc_coeffs_get_coeffs(self):
     f = filterbank(40, 512)
     coeffs = f.get_coeffs()
     self.assertIsInstance(coeffs, np.ndarray)
     assert_equal (coeffs, 0)
     assert_equal (np.shape(coeffs), (40, 512 / 2 + 1))
Example #43
0
 def test_random_norm(self):
     f = filterbank(40, 512)
     c = cvec(512)
     c.norm[:] = np.random.random((int(512 / 2) + 1,)).astype(float_type)
     assert_equal( f(c), 0)
Example #44
0
 def test_random_norm(self):
     f = filterbank(40, 512)
     c = cvec(512)
     c.norm[:] = np.random.random((int(512 / 2) + 1, )).astype(float_type)
     assert_equal(f(c), 0)
Example #45
0
 def test_members(self):
     f = filterbank(40, 512)
     assert_equal([f.n_filters, f.win_s], [40, 512])
Example #46
0
def get_mfcc_aubio(file_path):

    _p = 0.97
    samplerate = 16000

    # for Computing a Spectrum
    win_s = 512  # Window Size
    hop_size = 160  # Hop Size => シフト幅(0.01[s]にずらすサンプル数(juliusの解像度に合わせる)

    n_filters = 40  # must be 40 for mfcc
    n_coeffs = 13

    src = source(file_path, samplerate, hop_size)
    samplerate = src.samplerate
    total_frames = 0
    total_samples = np.array([])

    pv = pvoc(win_s, hop_size)
    f = filterbank(n_coeffs, win_s)
    f.set_mel_coeffs_slaney(samplerate)
    energies = np.zeros((n_coeffs, ))

    while True:
        hop_samples, read = src()  # read hop_size new samples from source
        total_samples = np.append(total_samples, hop_samples)

        fftgrain = pv(hop_samples)
        new_energies = f(fftgrain)
        energies = np.vstack([energies, new_energies])

        total_frames += read  # increment total number of frames
        if read < hop_size:  # end of file reached
            break

    # preEmphasis
    total_samples = preEmphasis(total_samples, _p).astype("float32")

    p = pvoc(win_s, hop_size)
    m = mfcc(win_s, n_filters, n_coeffs, samplerate)
    mfccs = np.zeros([
        n_coeffs,
    ])
    index = 1

    while True:
        old_frame = hop_size * (index - 1)
        cur_frame = hop_size * index

        if total_frames - old_frame < hop_size:
            samples = total_samples[old_frame:total_frames]

            # ケツを0で埋めてhopサイズに間に合わせる
            samples = np.pad(samples,
                             [0, hop_size - (total_frames - old_frame)],
                             "constant")
        else:
            samples = total_samples[old_frame:cur_frame]

        spec = p(samples)
        mfcc_out = m(spec)
        mfccs = np.vstack((mfccs, mfcc_out))

        if total_frames - old_frame < hop_size:
            break
        index += 1

    # mfccの1次元はいらないから消す
    mfccs = np.delete(mfccs, 0, axis=1)

    energies = np.mean(energies, axis=1)

    # 対数パワー項を末尾に追加
    mfccs = np.hstack((mfccs, energies.reshape(energies.shape[0], 1)))

    deltas = np.diff(mfccs, axis=0)
    deltas = np.pad(deltas, [(1, 0), (0, 0)], "constant")

    ddeltas = np.diff(deltas, axis=0)
    ddeltas = np.pad(ddeltas, [(1, 0), (0, 0)], "constant")

    mfccs = mfccs.transpose()
    deltas = deltas.transpose()
    ddeltas = ddeltas.transpose()

    all_mfccs = mfccs.tolist() + deltas.tolist() + ddeltas.tolist()

    print("Get MFCC in " + file_path + " ...")

    return all_mfccs
Example #47
0
 def test_filterbank_long_cvec(self):
     f = filterbank(40, 512)
     with self.assertRaises(ValueError):
         f(cvec(1024))
Example #48
0
 def test_set_coeffs(self):
     f = filterbank(40, 512)
     r = np.random.random([40, int(512 / 2) + 1]).astype(float_type)
     f.set_coeffs(r)
     assert_equal(r, f.get_coeffs())
Example #49
0
 def test_phase(self):
     f = filterbank(40, 512)
     c = cvec(512)
     c.phas[:] = np.pi
     assert_equal(f(c), 0)
Example #50
0
 def test_norm(self):
     f = filterbank(40, 512)
     c = cvec(512)
     c.norm[:] = 1
     assert_equal(f(c), 0)
#! /usr/bin/env python

from aubio import filterbank, fvec
from pylab import loglog, show, subplot, xlim, ylim, xlabel, ylabel, title
from numpy import vstack, arange

win_s = 2048
samplerate = 48000

freq_list = [60, 80, 200, 400, 800, 1600, 3200, 6400, 12800, 24000]
n_filters = len(freq_list) - 2

f = filterbank(n_filters, win_s)
freqs = fvec(freq_list)
f.set_triangle_bands(freqs, samplerate)

coeffs = f.get_coeffs()
coeffs[4] *= 5.

f.set_coeffs(coeffs)

times = vstack([arange(win_s / 2 + 1) * samplerate / win_s] * n_filters)
title('Bank of filters built using a simple list of boundaries\nThe middle band has been amplified by 2.')
loglog(times.T, f.get_coeffs().T, '.-')
xlim([50, samplerate/2])
ylim([1.0e-6, 2.0e-2])
xlabel('log frequency (Hz)')
ylabel('log amplitude')

show()
Example #52
0
from aubio import filterbank, fvec

f = filterbank(9, 1024)
freq_list = [60, 80, 200, 400, 800, 1600, 3200, 6400, 12800, 15000, 24000]
freqs = fvec(freq_list)
f.set_triangle_bands(freq_list, 48000)
f.get_coeffs().T

from pylab import loglog, show
loglog(f.get_coeffs().T, '+-')
show()