H = 501/2 mX, pX = stft.stftAnal(audio, rates, w, 2048, H) temp = [] for i in range(mX.shape[0]): temp.append(min(mX[i])) minimum = min(temp) temp = [] for i in range(mX.shape[0]): temp.append(max(mX[i])) maximum = max(temp) t = 0.5 sebaran = np.arange(minimum,maximum) s_index = int(sebaran.size*(1-t)) treshold = sebaran[-s_index] print "treshold =",treshold ploc = peakdetect.peakDetection(mX,treshold) peak_loc = [] for i in range(len(ploc)-1): if ploc[i] != ploc[i+1]: peak_loc.append(ploc[i]) peak_loc.append(ploc[-1]) peak_loc = np.array(peak_loc) vector = mX[peak_loc] log_likelihood = np.zeros(len(models)) for i in range(len(models)): gmm = models[i] #checking with each model one by one scores = np.array(gmm.score(vector)) log_likelihood[i] = scores.sum()
def proses(self): if len(self.path) > 0: start_time = time.time() rates, audio = read(self.path) noise = self.generateNoise(audio.size, 1) #audio += noise print audio newAudio = audio + noise print newAudio INT16_FAC = (2**15) - 1 INT32_FAC = (2**31) - 1 INT64_FAC = (2**63) - 1 norm_fact = { 'int16': INT16_FAC, 'int32': INT32_FAC, 'int64': INT64_FAC, 'float32': 1.0, 'float64': 1.0 } newAudio = np.float32(newAudio) / norm_fact[newAudio.dtype.name] w = get_window('hamming', int(self.WSizeTxt.get())) H = int(float(self.WSizeTxt.get()) * float(self.OvlSizeTxt.get())) N = 2048 #STFT rate mX, pX = stft.stftAnal(newAudio, rates, w, N, H) minimum = np.min(mX) maximum = np.max(mX) t = float(self.PTreshTxt.get()) sebaran = np.arange(minimum, maximum) s_index = int(sebaran.size * (1 - t)) treshold = sebaran[-s_index] print "treshold:", treshold ploc = peakdetect.peakDetection(mX, treshold) #print "ploc:",ploc if ploc.size != 0: peak_loc = [] for i in range(len(ploc) - 1): if ploc[i] != ploc[i + 1]: peak_loc.append(ploc[i]) peak_loc.append(ploc[-1]) peak_loc = np.array(peak_loc) #print peak_loc.size,"\n" vector = mX[peak_loc] log_likelihood = np.zeros(len(models)) for i in range(len(models)): gmm = models[i] #checking with each model one by one scores = np.array(gmm.score(vector)) log_likelihood[i] = scores.sum() winner = np.argmax(log_likelihood) self.resultLbl.config(text=speakers[winner]) self.scoreLbl.config(text=np.max(log_likelihood)) self.peakloc = peak_loc freqaxis = rates * np.arange(N / 2) / float(N) loc = [] for m in mX[peak_loc]: loc.append(np.argmax(m)) Freq = freqaxis[loc] df = pd.DataFrame(Freq) df.to_excel("Frekuensi/Frekuensi Penyusun " + self.path.split("/")[-1].split(".")[0] + ".xlsx", index=False) maxplotfreq = rates / 8.82 plt.figure(figsize=(12, 9)) plt.plot(np.arange(newAudio.size) / float(rates), newAudio) plt.axis([ 0, newAudio.size / float(rates), min(newAudio), max(newAudio) ]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.autoscale(tight=True) plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) plt.savefig("Time Domain Testing Noise.png") self.resizeImg("Time Domain Testing Noise.png") self.TimeDomImg = Image.open("Time Domain Testing Noise.png") self.TimeDomImg = ImageTk.PhotoImage(self.TimeDomImg) self.timeDomPlt.config(image=self.TimeDomImg, width=500, height=200) self.timeDomPlt.image = self.TimeDomImg plt.close() N = 2048 #STFT rate numFrames = int(mX[:, 0].size) frmTime = H * np.arange(numFrames) / float(rates) binFreq = rates * np.arange(N * maxplotfreq / rates) / N plt.pcolormesh( frmTime, binFreq, np.transpose(mX[:, :int(N * maxplotfreq / rates + 1)])) #plt.xlabel('time (sec)') #plt.ylabel('frequency (Hz)') #plt.title('magnitude spectrogram') plt.autoscale(tight=True) plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) plt.savefig("Spektrogram Frekuensi Testing Noise.png") self.resizeImg("Spektrogram Frekuensi Testing Noise.png") self.SpectrogramImg = Image.open( "Spektrogram Frekuensi Testing Noise.png") self.SpectrogramImg = ImageTk.PhotoImage(self.SpectrogramImg) self.spectrogramPlt.config(image=self.SpectrogramImg, width=500, height=200) self.spectrogramPlt.image = self.SpectrogramImg plt.close() plt.plot(mX[peak_loc[0]] ) #menampilkan magnitude frequency di index peak_loc[0] #plt.xlabel('frequency (Hz)') #plt.ylabel('magnitude') plt.axhline(y=treshold) plt.autoscale(tight=True) plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) plt.savefig("Frekuensi Domain Testing Noise.png") self.resizeImg("Frekuensi Domain Testing Noise.png") self.FreqDomImg = Image.open("Frekuensi Domain Testing Noise.png") self.FreqDomImg = ImageTk.PhotoImage(self.FreqDomImg) self.freqDomPlt.config(image=self.FreqDomImg, width=500, height=200) self.freqDomPlt.image = self.FreqDomImg plt.close() plt.plot(mX[peak_loc]) #plt.xlabel('frequency (Hz)') #plt.ylabel('magnitude') plt.axhline(y=treshold) plt.autoscale(tight=True) plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) plt.savefig("All Frekuensi Domain Testing Noise.png") self.resizeImg("All Frekuensi Domain Testing Noise.png") self.AllFreqDomImg = Image.open( "All Frekuensi Domain Testing Noise.png") self.AllFreqDomImg = ImageTk.PhotoImage(self.AllFreqDomImg) self.allFreqDomPlt.config(image=self.AllFreqDomImg, width=500, height=200) self.allFreqDomPlt.image = self.AllFreqDomImg plt.close() end_time = time.time() - start_time self.proTimeLbl.config( text="Identifikasi berakhir dengan total waktu %s detik" % end_time) else: print "belum ada pupuh diinput"
def temp(self): # Read the test directory and get the list of test audio files w_sizes = [256, 512, 1024, 2048, 4096] ov_sizes = [0.25, 0.5, 0.75] tresholds = np.arange(1, 10) * 0.1 paths = [] for path in file_paths: path = path.strip() paths.append(path) for ind_w in range(len(w_sizes)): for ind_ov in range(len(ov_sizes)): for ind_t in range(len(tresholds)): print "treshold rate =", tresholds[ind_t] x = [] #list penampung data for ind_p in range(len(paths)): paths[ind_p] = paths[ind_p].strip() print paths[ind_p] rates, audio = read(source + paths[ind_p]) #Framing framerate = rates #menentukan jumlah frame frame = round(len(audio) / framerate) #mengukur banyak data/frame n_frames = 10 #jumlah frame yang diperiksa time_jump = 5 #lompatan waktu (detik) a = 0 #index penunjuk frame while a < len(audio): f_data = audio[int(a):int(a + n_frames * framerate)] f_time = np.arange( a, (a + framerate * n_frames)) / float(framerate) a += time_jump * rates INT16_FAC = (2**15) - 1 INT32_FAC = (2**31) - 1 INT64_FAC = (2**63) - 1 norm_fact = { 'int16': INT16_FAC, 'int32': INT32_FAC, 'int64': INT64_FAC, 'float32': 1.0, 'float64': 1.0 } f_data = np.float32(f_data) / norm_fact[ f_data.dtype.name] w = get_window('hamming', w_sizes[ind_w]) H = int(w_sizes[ind_w] * ov_sizes[ind_ov]) mX, pX = stft.stftAnal(f_data, rates, w, 2048, H) minimum = np.min(mX) maximum = np.max(mX) t = float(self.PTreshTxt.get()) sebaran = np.arange(int(round(minimum)), int(round(maximum))) s_index = int(sebaran.size * (1 - t)) treshold = sebaran[-s_index] ploc = peakdetect.peakDetection(mX, treshold) if ploc.size != 0: peak_loc = [] for i in range(len(ploc) - 1): if ploc[i] != ploc[i + 1]: peak_loc.append(ploc[i]) peak_loc.append(ploc[-1]) peak_loc = np.array(peak_loc) #print peak_loc.size,"\n" vector = mX[peak_loc] log_likelihood = np.zeros(len(models)) for i in range(len(models)): gmm = models[ i] #checking with each model one by one scores = np.array(gmm.score(vector)) log_likelihood[i] = scores.sum() winner = np.argmax(log_likelihood) #print "score =",log_likelihood #print "highest score =",np.max(log_likelihood) #print "\tdetected as - ", speakers[winner] #time.sleep(1.0) x.append(paths[ind_p]) temp = str(np.min(f_time)) ''' k = temp.split('.') l = k[0]+','+k[1] ''' x.append(temp) temp = str(np.max(f_time)) ''' k = temp.split('.') l = k[0]+','+k[1] ''' x.append(temp) temp = str(np.max(log_likelihood)) k = temp.split('.') l = k[0] + ',' + k[1] x.append(l) x.append(speakers[winner]) #time.sleep(2.0) print "len x:", len(x) x = np.array(x) x = np.reshape(x, (len(x) / 5, 5)) print x, "\n\n" time.sleep(2) df = pd.DataFrame(x) df.to_excel("test/Test_" + str(w_sizes[ind_w]) + "_" + str(ov_sizes[ind_ov]) + "_" + str(tresholds[ind_t]) + ".xls", index=False)
def play(self): """ Play entire file """ data = self.wf.readframes(chunk) while data != '': self.stream.write(data) data = self.wf.readframes(chunk) #save data to file file = open("Text File/signal_data.txt", "w") for item in self.datas: file.write("%s " % item) file.close() ''' #Framing framerate = self.rates print framerate #menentukan jumlah frame frame = round(len(self.datas)/framerate) #mengukur banyak data/frame hop = 10 #jumlah frame yang diperiksa overlap = 5 #lompatan frame a = 0 while a < frame: f_data = self.datas[a*int(frame):(a+hop)*int(frame)] f_time = np.arange(a*(f_data.size/hop),(a+hop)*(f_data.size/hop))/float(self.rates) title = "Frame",a/50+1,"Time-domain" plt.title(title) plt.xlabel("Time") plt.ylabel("Amplitude") plt.plot(f_time,f_data) plt.show() a += overlap ''' ''' for i in range(hop): f_data = self.datas[i*int(frame):(i+1)*int(frame)] f_time = np.arange(i*f_data.size,(i+1)*f_data.size)/float(self.rates) plt.title("Frame Time-domain") plt.xlabel("Time") plt.ylabel("Amplitude") plt.plot(f_time,f_data) plt.show() ''' ''' f_time_sec = f_time[-1] f_data = np.asarray(()) f_time = np.asarray(()) for i in range(int(frame)): if len(f_data) == 0: f_data = self.datas[i*framerate:(i+1)*framerate] f_time = i*f_time_sec else: f_data = np.hstack((f_data,self.datas[i*framerate:(i+1)*framerate])) f_time = np.hstack((f_time,i*f_time_sec)) if len(self.datas) % framerate > 0: f_data = np.hstack((f_data,self.datas[(i+1)*framerate:])) temp = (i+1)*f_time_sec f_time = np.hstack((f_time,temp)) mod = len(self.datas) % framerate antimod = len(self.datas) - mod f_data = np.reshape(f_data[:antimod],(-1,framerate)) sisa_f_data = f_data[antimod:] print f_data print sisa_f_data print f_time ''' #proses STFT N = 2048 M = 501 #'''bisa di set''' H = M / 2 #bisa di set manual' x = self.datas x = np.float32(x) / norm_fact[x.dtype.name] fs = self.rates w = get_window('hamming', M) global mX mX, pX = stft.stftAnal(x, fs, w, N, H) y = stft.stftSynth(mX, pX, M, H) file = open("Text File/mX.txt", "w") for j in range(len(mX)): for item in mX[j]: file.write("%s " % item) file.write("\n\n") file.close() plt.figure(figsize=(12, 9)) maxplotfreq = 5000.0 plt.subplot(4, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') plt.subplot(4, 1, 2) numFrames = int(mX[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(N * maxplotfreq / fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, :int(N * maxplotfreq / fs + 1)])) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('magnitude spectrogram') plt.autoscale(tight=True) file = open("Text File/STFT frequencies.txt", "w") for item in binFreq: file.write("%s\n" % item) file.close() plt.subplot(4, 1, 3) numFrames = int(pX[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(N * maxplotfreq / fs) / N plt.pcolormesh( frmTime, binFreq, np.transpose(np.diff(pX[:, :int(N * maxplotfreq / fs + 1)], axis=1))) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('phase spectrogram (derivative)') plt.autoscale(tight=True) plt.subplot(4, 1, 4) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False) #print mX.shape #proses finding peak minimum = np.min(mX) maximum = np.max(mX) t = 0.8 sebaran = np.arange(minimum, maximum) s_index = int(sebaran.size * (1 - t)) treshold = sebaran[-s_index] print "treshold =", treshold ploc = peakdetect.peakDetection(mX, treshold) global peak_loc global pmag peak_loc = [] for i in range(len(ploc) - 1): if ploc[i] != ploc[i + 1]: peak_loc.append(ploc[i]) peak_loc.append(ploc[-1]) peak_loc = np.array(peak_loc) file = open("Text File/peaks location.txt", "w") for item in peak_loc: file.write("%s\n" % item) file.close() file = open("Text File/peaks magnitude.txt", "w") for item in peak_loc: file.write("%s " % item) for i in range(len(mX[item])): file.write("%s " % mX[item, i]) file.write("\n\n") file.close() pmag = mX[peak_loc] pl.plot(mX[-10, :]) pl.xlabel('Index') pl.ylabel('Value') pl.show() ''' plt.plot(pX[-10,:]) pl.xlabel('Index') pl.ylabel('Value') pl.show() ''' freqaxis = fs * np.arange(N / 2) / float(N) plt.plot(freqaxis, mX[peak_loc[0], :-1]) pl.xlabel("Frequency") pl.ylabel("Magnitude") pl.show() ''' file = open("Text File/peak frequencies.txt","w") file.write("Frequency\tMagnitude\n") for item in peak_loc: file.write("%s\t" % freqaxis[item]) file.write("%s\n" % mX[peak_loc[0],item]) file.close() ''' pl.plot(fs * peak_loc / float(N), pmag) pl.xlabel("Frequency") pl.ylabel("Magnitude") pl.show() #Menampilkan frequensi pada masing-masing Frame hasil STFT loc = [] for m in pmag: loc.append(np.argmax(m)) Freq = freqaxis[loc] df = pd.DataFrame(Freq) df.to_excel("Frekuensi Penyusun " + self.filename.split("/")[-1].split(".")[0] + ".xlsx", index=False) #execfile("find_peak_cwt.py") '''