def DPGMM_test(cov_type, alpha_val): #speakers_MFCC_dict = {} #speaker_GMM_dict = {} files = glob.glob(os.getcwd()+'\\speakers\\*.wav') gauss_num = 32 iterator = 1 test_files = [] good = 0 bad = 0 total = 0 for file in files: if file[-6:-4] == '09': test_files.append(file) for file in files: #print(file) if file[-6:-4] == '00': #file[len(file)-12:len(file)-9] current_speaker = file[len(file)-10:len(file)-6] #print("############# Calculate MFCC and DPGMM for ", current_speaker, " , speaker no ", str(iterator)) #if iterator == 572: # print("Tu bedzie error") merged_files = np.array([]) for i in range(0, 9): current_file = wav.read(file[:-5]+str(i)+file[-4:]) merged_files = np.append(merged_files, current_file[1]) #print(type(merged_files)) speaker_MFCC = MFCC.extract(merged_files) speaker_MFCC = speaker_MFCC[:, 1:] #speakers_MFCC_dict[current_speaker] = speaker_MFCC g = mixture.DPGMM(n_components=gauss_num, n_iter=100, covariance_type=cov_type, alpha=alpha_val) g.fit(speaker_MFCC) #speaker_model = np.array([g.means_, g.precs_, np.repeat(g.weights_[:, np.newaxis], 12, 1)]) #speaker_GMM_dict[current_speaker] = speaker_model log_prob = -10000 winner = 'nobody' for test_file in test_files: current_test_speaker = test_file[len(test_file)-10:len(test_file)-6] current_test_file = wav.read(test_file) test_speaker_MFCC = MFCC.extract(current_test_file[1]) test_speaker_MFCC = test_speaker_MFCC[:, 1:] temp_prob = np.mean(g.score(test_speaker_MFCC)) if temp_prob > log_prob: log_prob = temp_prob winner = current_test_speaker if winner == current_speaker: good += 1 else: bad += 1 total +=1 #print(current_speaker, " speaker no ", str(iterator), " is similar to ", winner, " - log prob = ", str(log_prob)) #print("good = ", str(good), ", bad = ", str(bad), ", total = ", str(total)) iterator += 1 print("DPGMM (covariance_type - ", cov_type, ", alpha - ", str(alpha_val), "), Efficiency = ", str(good/total))
def mix_feature(tup): mfcc = MFCC.extract(tup) lpc = LPC.extract(tup) if len(mfcc) == 0: print >> sys.stderr, "ERROR.. failed to extract mfcc feature:", len( tup[1]) return np.concatenate((mfcc, lpc), axis=1)
def select_events(nevents,nfeatures): global groups fftbins = 8192 featurewidth = 16 print "Selecting %d random spectral features.." % nfeatures feature_bins = np.random.randint(featurewidth/2,(fftbins/8),nfeatures) print "Selecting %d random audio events.." % nevents events = np.random.randint(0,len(faudio)-grain_mid,nevents) # Initialise features array with the first variable as index features = np.zeros((14,nevents)) features[0] = np.arange(0,nevents) print "Computing audio event spectrograms.." # For each event.. for i in range(0,nevents): # Calculate spectrogram for the event _fftevent = faudio[events[i]:min(events[i]+1000,len(faudio))]*sig.hann(1000) mfcc = MFCC.extract(_fftevent) features[:,i] = np.append(i,mfcc) #powerspec = abs(fft(_fftevent,fftbins)) ** 2 #melspec = np.dot(powerspec,melFilterBank(len(_fftevent))) #logspec = np.log(melspec) #mfcc = dct(logspec,type=2) #print mfcc # Calculate each feature for this event #for j in range(0,nfeatures): # features[j+1][i] = abs(np.mean(abs(mags[(feature_bins[j]-featurewidth/2):(feature_bins[j]+featurewidth/2)]))) print "Clustering events with K-Means algorithm.." groups = kmeans(np.transpose(features),tracks,minit='points',iter=30)[1] return [events,groups]
def Classify (self, sample, verbose = True): length = len (sample) features = MFCC.extract (numpy.frombuffer (sample, numpy.int16)) gestures = {} for gesture in self.params: d = [] for tsample in self.params[gesture]: total_distance = 0 smpl_length = len(tsample) if(numpy.abs(length - smpl_length) <= 0): continue for i in range (min (len (features), len (tsample))): total_distance += dist.cityblock(features[i], tsample[i]) d.append (total_distance/float (i)) score = numpy.min(d) gestures[gesture] = score if(verbose): print "Gesture %s: %f" % (gesture, score) try: if (score < minimum): minimum = score lowest = gesture except: minimum = score lowest = gesture if verbose: print lowest, minimum if(minimum < THRESHOLD): return lowest else: return None
def test(filename, verbose = False): rawdata = loadWAVfile(filename) mfcc = MFCC.extract(rawdata, show=False) #Test the hmm HMM_Model.test(mfcc, verbose) return
def GenerateParams (gestures, verbose = True): params = {} for gesture in gestures: if(verbose): print "Processing " + gesture l = [] for sample in gestures[gesture]: l.append (MFCC.extract (numpy.frombuffer (sample, numpy.int16))) params[gesture] = l return params
def train(filename, id): rawdata = loadWAVfile(filename) mfcc = MFCC.extract(rawdata, show=False) model = VQ.Model(id) #Train the VQ model.train(mfcc) #Train the HMM create_file(mfcc, id) return
def load(): names = [ "Mathematics", "Biology", "PoliticalScience", "Statistics", "Psychology" ] sampledict = {} for name in names: sampledict[name] = [] for fname in glob.glob("Samples/" + name + " *"): w = wread(fname) sampledict[name].append(MFCC.extract(w[1])[:30]) return names, sampledict
def produce_mfcc(self, filename): wav = wave.open(filename, "r") x = np.fromstring(wav.readframes(self.sz), dtype=np.int16) #(nchannels, sampwidth, framerate, nframes, # comptype, compname) = wav.getparams() mfcc = MFCC.extract(x) match = self.lab_extractor.match(filename) try: label = match.group(1) except: label = "unknown" print >> sys.stderr, "unknown labels encountered" return (mfcc, label)
def add_to_database(url_, person_name_): gmm_models = {} if os.path.isfile('mfcc.mat'): gmm_models = sio.loadmat('mfcc.mat') print "Recording and processing...\n\n" full_sound_model = read_radio_stream(url_) wav.write('People\\'+person_name_+'.wav', 11025, full_sound_model/32767.0) print "Calculating MFCC and saving the model..." mfcc_features = MFCC.extract(full_sound_model) mfcc_features = mfcc_features[:, 1:] g = mixture.GMM(n_components=128) g.fit(mfcc_features) model = np.array([g.means_, g.covars_, np.repeat(g.weights_[:, np.newaxis], 12, 1)]) # weights have to be repeated to properly save the np array print len(g.means_) gmm_models[person_name_] = model sio.savemat('mfcc_32.mat', gmm_models, oned_as='row')
def read_radio_stream(url_): database = sio.loadmat('mfcc_16_fft256_GMM.mat') database.pop('__header__') database.pop('__version__') database.pop('__globals__') r2 = urllib.urlopen(url_) pygame.mixer.init(44100, -16, 2, 2048) print pygame.mixer.get_init() chan1 = pygame.mixer.find_channel() format = sound.AFMT_S16_LE print sound.getODevices() #snd_out = sound.Output(44100, 2, format) dm = muxer.Demuxer('mp3') dec = None snd = None print(r2.info()) print('###################\n') #f = open('radio.mp3', 'wb') #g = open('radio.wav', 'wb') i = 0 while True: #i < 3: samples = r2.read(15000) frames = dm.parse(samples) if dec is None: # Open decoder dec = acodec.Decoder(dm.streams[0]) #start = time.time() sound_np_array = ansic_to_numpy(frames, dec) #print (sound_np_array.shape[0])/44100.0 #elapsed = (time.time() - start) #print 'decode and ndaray - %2.8f' %elapsed #start = time.time() to_play = np.array(np.repeat(sound_np_array[:, np.newaxis], 2, 1), dtype = 'int16') sounds = pygame.sndarray.make_sound(to_play) chan1.queue(sounds) #elapsed = (time.time() - start) #print 'to play - %2.8f' %elapsed #start = time.time() sound_np_array = decimate(sound_np_array, 4) #elapsed = (time.time() - start) #print 'downsample - %2.8f' %elapsed #start = time.time() mfcc_features = MFCC.extract(sound_np_array) #1.5s mfcc_features = mfcc_features[:, 1:] #elapsed = (time.time() - start) #print 'mfcc - %2.8f' %elapsed g = mixture.GMM(n_components=16) log_prob = -10000 winner = 'nobody' for key, values in database.iteritems(): try: g.means_ = values[0, :, :] g.covars_ = values[1, :, :] g.weights_ = values[2, :, 1] #start = time.time() temp_prob = np.mean(g.score(mfcc_features)) #elapsed = (time.time() - start) #print 'log-likelihood - %2.8f' %elapsed if temp_prob > log_prob: log_prob = temp_prob winner = key except TypeError: print 'error dla ', key print winner, log_prob print('\n###################')
def get_mfcc_worker(fpath): print('mfcc: ' + fpath) fs, signal = wavfile.read(fpath) mfcc = MFCC.extract(fs, signal) return mfcc[:1500]
def collect(n=20): obs = [] for i in xrange(n): os.system("arecord -f S16_LE --rate=44100 -D hw:1,0 -d 3 test.wav") obs.append(MFCC.extract(wavfile.read("test.wav")[1])) return obs
def GMM_test(ii): speakers_MFCC_dict = {} speaker_GMM_dict = {} files = glob.glob(os.getcwd()+'\\speakers\\*.wav') gauss_num = 32 iterator = 1 num_iter = ii if os.path.isfile('mfcc_'+str(gauss_num)+'.mat'): speaker_GMM_dict = sio.loadmat('mfcc_'+str(gauss_num)+'.mat') speaker_GMM_dict.pop('__header__') speaker_GMM_dict.pop('__version__') speaker_GMM_dict.pop('__globals__') else: for file in files: #print(file) if file[-6:-4] == '00': #file[len(file)-12:len(file)-9] current_speaker = file[len(file)-10:len(file)-6] print("############# Calculate MFCC and GMM for ", current_speaker, " , speaker no ", str(iterator)) #if iterator == 572: # print("Tu bedzie error") iterator += 1 merged_files = np.array([]) for i in range(0, 9): current_file = wav.read(file[:-5]+str(i)+file[-4:]) merged_files = np.append(merged_files, current_file[1]) #print(type(merged_files)) speaker_MFCC = MFCC.extract(merged_files) speaker_MFCC = speaker_MFCC[:, 1:] speakers_MFCC_dict[current_speaker] = speaker_MFCC g = mixture.GMM(n_components=gauss_num, n_iter=num_iter) g.fit(speaker_MFCC) speaker_model = np.array([g.means_, g.covars_, np.repeat(g.weights_[:, np.newaxis], 12, 1)]) speaker_GMM_dict[current_speaker] = speaker_model sio.savemat('mfcc_'+str(gauss_num)+'.mat', speaker_GMM_dict, oned_as='row') iterator = 1 good = 0 bad = 0 total = 0 for file in files: if file[-6:-4] == '09': g = mixture.GMM(n_components=gauss_num, n_iter=num_iter) current_file = wav.read(file) current_speaker = file[len(file)-10:len(file)-6] #print(current_speaker, ) speaker_MFCC = MFCC.extract(current_file[1]) speaker_MFCC = speaker_MFCC[:, 1:] log_prob = -10000 winner = 'nobody' for key, values in speaker_GMM_dict.items(): try: g.means_ = values[0, :, :] g.covars_ = values[1, :, :] g.weights_ = values[2, :, 1] temp_prob = np.mean(g.score(speaker_MFCC)) if temp_prob > log_prob: log_prob = temp_prob winner = key except TypeError: print('error for ', key) if current_speaker == winner: good += 1 else: bad += 1 total +=1 print(current_speaker, " speaker no ", str(iterator), " is similar to ", winner, " - log prob = ", str(log_prob)) print("good = ", str(good), ", bad = ", str(bad), ", total = ", str(total)) iterator += 1 print("GMM, n_iter = ", num_iter, ", Efficiency = ", str(good/total))
def CalculateMFCCs(self): # This function calculates and returns the MFCC from the given wavfile mfccs = MFCC.extract(self.wav_data) return mfccs
# python -i <name of this .py file> import numpy as np from scikits.audiolab import Sndfile SOUND_DIRECTORY = 'small_data_sample/right_whale' test_file = '%s/train12.aiff' % SOUND_DIRECTORY f = Sndfile(test_file, 'r') # Sndfile instances can be queried for the audio file meta-data fs = f.samplerate nc = f.channels enc = f.encoding # Reading is straightfoward data = f.read_frames(1000) # This reads the next 1000 frames, e.g. from 1000 to 2000, but as single precision data_float = f.read_frames(1000, dtype=np.float32) print data_float.shape import MFCC # data_float is a wave signal saved in a 1-D numpy array # mfcc is a 2-D numpy array, where each row is the # MFCC of a frame in data_float mfcc = MFCC.extract(data_float, show = True) # This will also plot the MFCC and the spectrogram # reconstructed from MFCC by inverse DCT