def Extract_MFCC(row, col): import sys from aubio import source, pvoc, mfcc from numpy import vstack, zeros, diff n_filters = 40 # must be 40 for mfcc n_coeffs = 13 source_filename = 'Test.wav' samplerate = 44100 win_s = 512 hop_s = 128 s = source(source_filename, samplerate, hop_s) samplerate = s.samplerate p = pvoc(win_s, hop_s) m = mfcc(win_s, n_filters, n_coeffs, samplerate) mfccs = zeros([ n_coeffs, ]) frames_read = 0 while True: samples, read = s() spec = p(samples) mfcc_out = m(spec) mfccs = vstack((mfccs, mfcc_out)) frames_read += read if read < hop_s: break if (row == grid_size.nRow - 1 and grid_size == grid_size.nCol - 1): mfccs = mfccs + 100 return mfccs
def test_run_with_params(self, buf_size, n_filters, n_coeffs, samplerate): " check mfcc can run with reasonable parameters " o = mfcc(buf_size, n_filters, n_coeffs, samplerate) spec = cvec(buf_size) spec.phas[0] = 0.2 for _ in range(10): o(spec)
def Extract_MFCC(self,player_pos_x,player_pos_y): import sys from aubio import source, pvoc, mfcc from numpy import vstack, zeros, diff player=[player_pos_x,player_pos_y] target=[target_position_x,target_position_y] distance=scipy.spatial.distance.euclidean(player, target) n_filters = 40 # must be 40 for mfcc n_coeffs = 13 source_filename = './Audios/Hello.wav' samplerate = 44100 win_s = 512 hop_s = 128 s = source(source_filename, samplerate, hop_s) samplerate = s.samplerate p = pvoc(win_s, hop_s) m = mfcc(win_s, n_filters, n_coeffs, samplerate) mfccs = zeros([n_coeffs, ]) frames_read = 0 while True: samples, read = s() spec = p(samples) mfcc_out = m(spec) mfccs = vstack((mfccs, mfcc_out)) frames_read += read if read < hop_s: break if(distance==0): factor=1 else: factor=1/distance return mfccs*factor
def __init__(self, samplerate=44100, winsize=1024, hopsize=512, filters=40, coeffs=13): super(AubioAnalyser, self).__init__() self.winsize = winsize self.hopsize = hopsize self.coeffs = coeffs self.filters = filters self.descriptors = {} self.methods = [ "default", "energy", "hfc", "complex", "phase", "specdiff", "kl", "mkl", "specflux", "centroid", "slope", "rolloff", "spread", "skewness", "kurtosis", "decrease" ] for method in self.methods: self.descriptors[method] = aubio.specdesc(method, self.winsize) self.pvocoder = aubio.pvoc(self.winsize, self.hopsize) self.mfcc_feature = aubio.mfcc(winsize, filters, coeffs, samplerate) self.mfccs = numpy.zeros([ self.coeffs, ])
def test_set_mel_coeffs_slaney(self): buf_size, n_filters, n_coeffs, samplerate = 512, 40, 10, 16000 m = mfcc(buf_size, n_filters, n_coeffs, samplerate) m.set_mel_coeffs_slaney() m(cvec(buf_size)) assert m.get_power() == 1 assert m.get_scale() == 1
def mfcc(frame, audiofile): ''' Computes the MEL FREQUENCY CEPSTRAL COEFFICIENTS for the frame, the frame is zero padded to achieve a frame lenght which is a power of two if this is not already the case. The power spectrum is then computed and this is placed into filterbanks on a mel-scale. The coefficents of 12 of the banks is then returned. ''' coefficientsCount = 12 sampleRate = audiofile['sample_rate'] frame_size = audiofile['frame_size'] fftsize = pow(2, int(math.log(frame_size, 2) + 0.5)) # Round to nearest power of 2 to facilitate FFT m = aub.mfcc(fftsize, 40, coefficientsCount, sampleRate) #first we need to convert this frame to the power spectrum using a DFT p = aub.pvoc(fftsize, int(frame_size)) #in order to compute DFT the frame must be of a length which is a power of 2, so expand to fftsize using zero padding if len(frame) != 16000: frame = np.pad(frame, (0, frame_size - len(frame)), 'constant', constant_values=0) #compute the power spectrum spec = p(frame.astype(np.float32)) #compute the MFCC, which returns the coefficents of each of the 12 coefficents mfcc_out = m(spec) return mfcc_out
def __init__(self): self.redis = redis.StrictRedis(host=redishost, port=6379, password="", decode_responses=True) self.p = pyaudio.PyAudio() stream = self.p.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, output=True, input_device_index = self.get_input_device_index(), output_device_index = self.get_output_device_index(), frames_per_buffer = self.CHUNK, stream_callback=self.callback) self.a_onset = aubio.onset("default", self.CHUNK, self.hop_s, self.RATE) self.a_tempo = aubio.tempo("specflux", self.CHUNK, self.hop_s, self.RATE) self.a_pitch = aubio.pitch("default", self.CHUNK, self.hop_s, self.RATE) self.a_notes = aubio.notes("default", self.CHUNK, self.hop_s, self.RATE) n_filters = 40 # required n_coeffs = 13 # I wonder if i made this 1.... self.a_pvoc = aubio.pvoc(self.CHUNK, self.hop_s) self.a_mfcc = aubio.mfcc(self.CHUNK, n_filters, n_coeffs, self.RATE) self.tolerance = 0.8 self.a_pitch.set_tolerance(self.tolerance) self.highest_pitch = 0 self.lowest_pitch = 99999999 self.average_pitch = 0 self.average_pitch_samples = 0 self.last_average = 0 self.colors = None self.pitch_range = None self.range_counter = 0 self.all_notes = set() stream.start_stream()
def mfcc(self, **options): nFilters = options.get("nFilters") or 40 # must be 40 for mfcc nCoeffs = options.get("nCoefs") or 13 sourceBuffer = source(self.audioFilename, self.samplerate, self.hopSize) pvocBuffer = pvoc(self.fftWindowSize, self.hopSize) mfccBuffer = mfcc(self.fftWindowSize, nFilters, nCoeffs, self.samplerate) mfccs = np.zeros([nCoeffs, ]) timings = [] frames = [] totalFrames = 0 while True: samples, read = sourceBuffer() spec = pvocBuffer(samples) mfcc_out = mfccBuffer(spec) mfccs = np.vstack((mfccs, mfcc_out)) totalFrames += read timings += [float(totalFrames) / self.samplerate] frames += [totalFrames] if read < self.hopSize: break return mfccs
def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): super(AubioMfcc, self).setup( channels, samplerate, blocksize, totalframes) self.mfcc = mfcc(self.input_blocksize, self.n_filters, self.n_coeffs, samplerate)
def __init__(self, sample_rate=11025, buf_size=1024, hop_size=256, n_mfcc_filters=40, n_mfcc_coeffs=13, pitch_method='yin'): """Initialize feature extractor.""" self._sample_rate = int(sample_rate) self._pvoc = aubio.pvoc(buf_size, hop_size) self._mfcc = aubio.mfcc(buf_size, n_mfcc_filters, n_mfcc_coeffs, self._sample_rate) self._pitch = aubio.pitch( method=pitch_method, buf_size=buf_size, hop_size=hop_size, samplerate=self._sample_rate)
def analyzeMFCC(grain): windowSize = int(float(grain["frameCount"])) s = source(grain["file"], int(grain["sampleRate"]), windowSize) sampleRate = s.samplerate p = pvoc(windowSize, windowSize) m = mfcc(windowSize, 40, 13, s.samplerate) samples, read = s() spec = p(samples) mfcc_out = m(spec) mfccs = mfcc_out.tolist() return mfccs
def analyzeMFCC(grain): windowSize = int(float(grain["frameCount"])) s = source(grain["file"], int(grain["sampleRate"]), windowSize - 1) sampleRate = s.samplerate p = pvoc(windowSize, windowSize - 1) m = mfcc(windowSize, 40, 13, s.samplerate) samples, read = s() spec = p(samples) mfcc_out = m(spec) mfccs = mfcc_out.tolist() return mfccs
def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): super(AubioMfcc, self).setup( channels, samplerate, blocksize, totalframes) self.n_filters = 40 self.n_coeffs = 13 self.pvoc = pvoc(self.input_blocksize, self.input_stepsize) self.mfcc = mfcc(self.input_blocksize, self.n_filters, self.n_coeffs, samplerate) self.block_read = 0 self.mfcc_results = numpy.zeros([self.n_coeffs, ])
def mfccs(windows): p = aubio.pvoc(window_size, hop_size) m = aubio.mfcc(window_size, n_filters, n_coeffs, sample_rate) windows = np.float32(windows) mfccs = np.zeros([13,]) for i in range(len(windows)): samples = windows[i] spec = p(samples) mfcc_out = m(spec) mfccs = np.vstack((mfccs, mfcc_out)) del p del m return mfccs
def __init__(self, args): valid_opts1 = ['hop_size', 'buf_size'] self.parse_options(args, valid_opts1) self.remap_pvoc_options(self.options) self.pv = aubio.pvoc(**self.options) valid_opts2 = ['buf_size', 'n_filters', 'n_coeffs', 'samplerate'] self.parse_options(args, valid_opts2) self.mfcc = aubio.mfcc(**self.options) # remember all options self.parse_options(args, list(set(valid_opts1 + valid_opts2))) super(process_mfcc, self).__init__(args)
def __init__(self, sample_rate=11025, buf_size=1024, hop_size=256, n_mfcc_filters=40, n_mfcc_coeffs=13, pitch_method='yin'): """Initialize feature extractor.""" self._sample_rate = int(sample_rate) self._pvoc = aubio.pvoc(buf_size, hop_size) self._mfcc = aubio.mfcc(buf_size, n_mfcc_filters, n_mfcc_coeffs, self._sample_rate) self._pitch = aubio.pitch(method=pitch_method, buf_size=buf_size, hop_size=hop_size, samplerate=self._sample_rate)
def mfccs(windows): p = aubio.pvoc(window_size, hop_size) m = aubio.mfcc(window_size, n_filters, n_coeffs, sample_rate) windows = np.float32(windows) mfccs = np.zeros([ 13, ]) for i in range(len(windows)): samples = windows[i] spec = p(samples) mfcc_out = m(spec) mfccs = np.vstack((mfccs, mfcc_out)) del p del m return mfccs
def test_members(self): o = mfcc(buf_size, n_filters, n_coeffs, samplerate) #assert_equal ([o.buf_size, o.method], [buf_size, method]) spec = cvec(buf_size) #spec.norm[0] = 1 #spec.norm[1] = 1./2. #print "%20s" % method, str(o(spec)) coeffs = o(spec) self.assertEqual(coeffs.size, n_coeffs) #print coeffs spec.norm = random.random_sample((len(spec.norm),)).astype(float_type) spec.phas = random.random_sample((len(spec.phas),)).astype(float_type) #print "%20s" % method, str(o(spec)) self.assertEqual(count_nonzero(o(spec) != 0.), n_coeffs)
def mfcc(frame, audiofile): coefficientsCount = 12 sampleRate = audiofile['sample_rate'] frame_size = audiofile['frame_size'] fftsize = pow(2, int(math.log(frame_size, 2) + 0.5)) # Round to nearest power of 2 m = aub.mfcc(fftsize, 40, coefficientsCount, sampleRate) p = aub.pvoc(fftsize, int(frame_size)) if len(frame) != 128: frame = np.pad(frame, (0, frame_size - len(frame)), 'constant', constant_values=0) spec = p(frame.astype(np.float32)) mfcc_out = m(spec) return mfcc_out
def __init__(self, samplerate=44100, winsize=1024, hopsize=512, filters=40, coeffs=13): super(AubioAnalyser, self).__init__() self.winsize = winsize self.hopsize = hopsize self.coeffs = coeffs self.filters = filters self.descriptors = {} self.methods = ["default", "energy", "hfc", "complex", "phase", "specdiff", "kl", "mkl", "specflux", "centroid", "slope", "rolloff", "spread", "skewness", "kurtosis", "decrease"] for method in self.methods: self.descriptors[method] = aubio.specdesc(method, self.winsize) self.pvocoder = aubio.pvoc(self.winsize, self.hopsize) self.mfcc_feature = aubio.mfcc(winsize, filters, coeffs, samplerate) self.mfccs = numpy.zeros([self.coeffs, ])
def __init__(self): self.redis = redis.StrictRedis(host=redishost, port=6379, password="", decode_responses=True) self.a_onset = aubio.onset("default", self.CHUNK, self.hop_s, self.RATE) self.a_tempo = aubio.tempo("specflux", self.CHUNK, self.hop_s, self.RATE) self.a_notes = aubio.notes("default", self.CHUNK, self.hop_s, self.RATE) n_filters = 40 # required n_coeffs = 13 # I wonder if i made this 1.... self.a_pvoc = aubio.pvoc(self.CHUNK, self.hop_s) self.a_mfcc = aubio.mfcc(self.CHUNK, n_filters, n_coeffs, self.RATE) self.last_average = 0 self.colors = None self.range_counter = 0 self.all_notes = set() self.start_stream()
def getMFCC(source_filename, win_s=512, n_coeffs=14, samplerate=0): #win_s = 512 # fft size hop_s = win_s // 4 # hop size n_filters = 40 # must be 40 for mfcc #n_coeffs = 13 #samplerate = 0 s = source(source_filename, samplerate, hop_s) samplerate = s.samplerate p = pvoc(win_s, hop_s) m = mfcc(win_s, n_filters, n_coeffs, samplerate) mfccs = zeros([ n_coeffs, ]) frames_read = 0 while True: samples, read = s() spec = p(samples) mfcc_out = m(spec) mfccs = vstack((mfccs, mfcc_out)) frames_read += read if read < hop_s: break return mfccs
def test_wrong_n_filters(self): with self.assertRaises(ValueError): mfcc(n_filters = -1)
samplerate = 44100 if len(sys.argv) < 2: print "Usage: %s <source_filename>" % sys.argv[0] sys.exit(1) source_filename = sys.argv[1] samplerate = 0 if len(sys.argv) > 2: samplerate = int(sys.argv[2]) s = source(source_filename, samplerate, hop_s) samplerate = s.samplerate p = pvoc(win_s, hop_s) m = mfcc(win_s, n_filters, n_coeffs, samplerate) desc = [] tdesc = [] mfccs = zeros([13]) frames_read = 0 while True: samples, read = s() spec = p(samples) mfcc_out = m(spec) mfccs = vstack((mfccs, mfcc_out)) frames_read += read if read < hop_s: break
def test_set_mel_coeffs_htk(self): buf_size, n_filters, n_coeffs, samplerate = 512, 20, 10, 16000 m = mfcc(buf_size, n_filters, n_coeffs, samplerate) m.set_mel_coeffs_htk(0., samplerate/2.) m(cvec(buf_size))
def test_read_only_member(self, name): o = mfcc() with assert_raises((TypeError, AttributeError)): setattr(o, name, 0)
def test_default_param(self, name, expected): """ test mfcc.{:s} = {:d} """.format(name, expected) o = mfcc() assert getattr(o, name) == expected
def get_mfcc_aubio(file_path): _p = 0.97 samplerate = 16000 # for Computing a Spectrum win_s = 512 # Window Size hop_size = 160 # Hop Size => シフト幅(0.01[s]にずらすサンプル数(juliusの解像度に合わせる) n_filters = 40 # must be 40 for mfcc n_coeffs = 13 src = source(file_path, samplerate, hop_size) samplerate = src.samplerate total_frames = 0 total_samples = np.array([]) pv = pvoc(win_s, hop_size) f = filterbank(n_coeffs, win_s) f.set_mel_coeffs_slaney(samplerate) energies = np.zeros((n_coeffs, )) while True: hop_samples, read = src() # read hop_size new samples from source total_samples = np.append(total_samples, hop_samples) fftgrain = pv(hop_samples) new_energies = f(fftgrain) energies = np.vstack([energies, new_energies]) total_frames += read # increment total number of frames if read < hop_size: # end of file reached break # preEmphasis total_samples = preEmphasis(total_samples, _p).astype("float32") p = pvoc(win_s, hop_size) m = mfcc(win_s, n_filters, n_coeffs, samplerate) mfccs = np.zeros([ n_coeffs, ]) index = 1 while True: old_frame = hop_size * (index - 1) cur_frame = hop_size * index if total_frames - old_frame < hop_size: samples = total_samples[old_frame:total_frames] # ケツを0で埋めてhopサイズに間に合わせる samples = np.pad(samples, [0, hop_size - (total_frames - old_frame)], "constant") else: samples = total_samples[old_frame:cur_frame] spec = p(samples) mfcc_out = m(spec) mfccs = np.vstack((mfccs, mfcc_out)) if total_frames - old_frame < hop_size: break index += 1 # mfccの1次元はいらないから消す mfccs = np.delete(mfccs, 0, axis=1) energies = np.mean(energies, axis=1) # 対数パワー項を末尾に追加 mfccs = np.hstack((mfccs, energies.reshape(energies.shape[0], 1))) deltas = np.diff(mfccs, axis=0) deltas = np.pad(deltas, [(1, 0), (0, 0)], "constant") ddeltas = np.diff(deltas, axis=0) ddeltas = np.pad(ddeltas, [(1, 0), (0, 0)], "constant") mfccs = mfccs.transpose() deltas = deltas.transpose() ddeltas = ddeltas.transpose() all_mfccs = mfccs.tolist() + deltas.tolist() + ddeltas.tolist() print("Get MFCC in " + file_path + " ...") return all_mfccs
def test_wrong_samplerate(self): with self.assertRaises(ValueError): mfcc(samplerate = -1)
def test_wrong_input_size(self): m = mfcc(buf_size = 1024) with self.assertRaises(ValueError): m(cvec(512))
def test_set_power(self): buf_size, n_filters, n_coeffs, samplerate = 512, 20, 10, 16000 m = mfcc(buf_size, n_filters, n_coeffs, samplerate) m.set_power(2.5) assert m.get_power() == 2.5 m(cvec(buf_size))
def test_wrong_buf_size(self): with self.assertRaises(ValueError): mfcc(buf_size = -1)
def test_wrong_n_coeffs(self): with self.assertRaises(ValueError): mfcc(n_coeffs = -1)
def setUp(self): self.o = mfcc()