H = 501/2
 mX, pX = stft.stftAnal(audio, rates, w, 2048, H)
 temp = []
 for i in range(mX.shape[0]):
     temp.append(min(mX[i]))
 minimum = min(temp)
 temp = []
 for i in range(mX.shape[0]):
     temp.append(max(mX[i]))
 maximum = max(temp)
 t = 0.5
 sebaran = np.arange(minimum,maximum)
 s_index = int(sebaran.size*(1-t))
 treshold = sebaran[-s_index]
 print "treshold =",treshold
 ploc = peakdetect.peakDetection(mX,treshold)
 peak_loc = []
 for i in range(len(ploc)-1):
     if ploc[i] != ploc[i+1]:
         peak_loc.append(ploc[i])
 peak_loc.append(ploc[-1])
 peak_loc = np.array(peak_loc)
 
 vector   = mX[peak_loc]
 
 log_likelihood = np.zeros(len(models)) 
 
 for i in range(len(models)):
     gmm    = models[i]  #checking with each model one by one
     scores = np.array(gmm.score(vector))
     log_likelihood[i] = scores.sum()
    def proses(self):
        if len(self.path) > 0:
            start_time = time.time()
            rates, audio = read(self.path)
            noise = self.generateNoise(audio.size, 1)
            #audio += noise
            print audio
            newAudio = audio + noise
            print newAudio
            INT16_FAC = (2**15) - 1
            INT32_FAC = (2**31) - 1
            INT64_FAC = (2**63) - 1
            norm_fact = {
                'int16': INT16_FAC,
                'int32': INT32_FAC,
                'int64': INT64_FAC,
                'float32': 1.0,
                'float64': 1.0
            }
            newAudio = np.float32(newAudio) / norm_fact[newAudio.dtype.name]
            w = get_window('hamming', int(self.WSizeTxt.get()))
            H = int(float(self.WSizeTxt.get()) * float(self.OvlSizeTxt.get()))
            N = 2048  #STFT rate
            mX, pX = stft.stftAnal(newAudio, rates, w, N, H)
            minimum = np.min(mX)
            maximum = np.max(mX)
            t = float(self.PTreshTxt.get())
            sebaran = np.arange(minimum, maximum)
            s_index = int(sebaran.size * (1 - t))
            treshold = sebaran[-s_index]
            print "treshold:", treshold
            ploc = peakdetect.peakDetection(mX, treshold)

            #print "ploc:",ploc
            if ploc.size != 0:
                peak_loc = []
                for i in range(len(ploc) - 1):
                    if ploc[i] != ploc[i + 1]:
                        peak_loc.append(ploc[i])
                peak_loc.append(ploc[-1])
                peak_loc = np.array(peak_loc)
                #print peak_loc.size,"\n"
                vector = mX[peak_loc]

                log_likelihood = np.zeros(len(models))

                for i in range(len(models)):
                    gmm = models[i]  #checking with each model one by one
                    scores = np.array(gmm.score(vector))
                    log_likelihood[i] = scores.sum()

                winner = np.argmax(log_likelihood)
            self.resultLbl.config(text=speakers[winner])
            self.scoreLbl.config(text=np.max(log_likelihood))
            self.peakloc = peak_loc

            freqaxis = rates * np.arange(N / 2) / float(N)
            loc = []
            for m in mX[peak_loc]:
                loc.append(np.argmax(m))
            Freq = freqaxis[loc]
            df = pd.DataFrame(Freq)
            df.to_excel("Frekuensi/Frekuensi Penyusun " +
                        self.path.split("/")[-1].split(".")[0] + ".xlsx",
                        index=False)
            maxplotfreq = rates / 8.82

            plt.figure(figsize=(12, 9))
            plt.plot(np.arange(newAudio.size) / float(rates), newAudio)
            plt.axis([
                0, newAudio.size / float(rates),
                min(newAudio),
                max(newAudio)
            ])
            plt.ylabel('amplitude')
            plt.xlabel('time (sec)')
            plt.autoscale(tight=True)
            plt.subplots_adjust(top=1,
                                bottom=0,
                                right=1,
                                left=0,
                                hspace=0,
                                wspace=0)
            plt.savefig("Time Domain Testing Noise.png")
            self.resizeImg("Time Domain Testing Noise.png")
            self.TimeDomImg = Image.open("Time Domain Testing Noise.png")
            self.TimeDomImg = ImageTk.PhotoImage(self.TimeDomImg)
            self.timeDomPlt.config(image=self.TimeDomImg,
                                   width=500,
                                   height=200)
            self.timeDomPlt.image = self.TimeDomImg
            plt.close()
            N = 2048  #STFT rate
            numFrames = int(mX[:, 0].size)
            frmTime = H * np.arange(numFrames) / float(rates)
            binFreq = rates * np.arange(N * maxplotfreq / rates) / N
            plt.pcolormesh(
                frmTime, binFreq,
                np.transpose(mX[:, :int(N * maxplotfreq / rates + 1)]))
            #plt.xlabel('time (sec)')
            #plt.ylabel('frequency (Hz)')
            #plt.title('magnitude spectrogram')
            plt.autoscale(tight=True)
            plt.subplots_adjust(top=1,
                                bottom=0,
                                right=1,
                                left=0,
                                hspace=0,
                                wspace=0)
            plt.savefig("Spektrogram Frekuensi Testing Noise.png")
            self.resizeImg("Spektrogram Frekuensi Testing Noise.png")
            self.SpectrogramImg = Image.open(
                "Spektrogram Frekuensi Testing Noise.png")
            self.SpectrogramImg = ImageTk.PhotoImage(self.SpectrogramImg)
            self.spectrogramPlt.config(image=self.SpectrogramImg,
                                       width=500,
                                       height=200)
            self.spectrogramPlt.image = self.SpectrogramImg
            plt.close()
            plt.plot(mX[peak_loc[0]]
                     )  #menampilkan magnitude frequency di index peak_loc[0]
            #plt.xlabel('frequency (Hz)')
            #plt.ylabel('magnitude')
            plt.axhline(y=treshold)
            plt.autoscale(tight=True)
            plt.subplots_adjust(top=1,
                                bottom=0,
                                right=1,
                                left=0,
                                hspace=0,
                                wspace=0)
            plt.savefig("Frekuensi Domain Testing Noise.png")
            self.resizeImg("Frekuensi Domain Testing Noise.png")
            self.FreqDomImg = Image.open("Frekuensi Domain Testing Noise.png")
            self.FreqDomImg = ImageTk.PhotoImage(self.FreqDomImg)
            self.freqDomPlt.config(image=self.FreqDomImg,
                                   width=500,
                                   height=200)
            self.freqDomPlt.image = self.FreqDomImg
            plt.close()
            plt.plot(mX[peak_loc])
            #plt.xlabel('frequency (Hz)')
            #plt.ylabel('magnitude')
            plt.axhline(y=treshold)
            plt.autoscale(tight=True)
            plt.subplots_adjust(top=1,
                                bottom=0,
                                right=1,
                                left=0,
                                hspace=0,
                                wspace=0)
            plt.savefig("All Frekuensi Domain Testing Noise.png")
            self.resizeImg("All Frekuensi Domain Testing Noise.png")
            self.AllFreqDomImg = Image.open(
                "All Frekuensi Domain Testing Noise.png")
            self.AllFreqDomImg = ImageTk.PhotoImage(self.AllFreqDomImg)
            self.allFreqDomPlt.config(image=self.AllFreqDomImg,
                                      width=500,
                                      height=200)
            self.allFreqDomPlt.image = self.AllFreqDomImg
            plt.close()
            end_time = time.time() - start_time
            self.proTimeLbl.config(
                text="Identifikasi berakhir dengan total waktu %s detik" %
                end_time)
        else:
            print "belum ada pupuh diinput"
    def temp(self):
        # Read the test directory and get the list of test audio files
        w_sizes = [256, 512, 1024, 2048, 4096]
        ov_sizes = [0.25, 0.5, 0.75]
        tresholds = np.arange(1, 10) * 0.1
        paths = []
        for path in file_paths:
            path = path.strip()
            paths.append(path)

        for ind_w in range(len(w_sizes)):
            for ind_ov in range(len(ov_sizes)):
                for ind_t in range(len(tresholds)):
                    print "treshold rate =", tresholds[ind_t]
                    x = []  #list penampung data
                    for ind_p in range(len(paths)):
                        paths[ind_p] = paths[ind_p].strip()
                        print paths[ind_p]
                        rates, audio = read(source + paths[ind_p])

                        #Framing
                        framerate = rates  #menentukan jumlah frame
                        frame = round(len(audio) /
                                      framerate)  #mengukur banyak data/frame
                        n_frames = 10  #jumlah frame yang diperiksa
                        time_jump = 5  #lompatan waktu (detik)
                        a = 0  #index penunjuk frame
                        while a < len(audio):
                            f_data = audio[int(a):int(a +
                                                      n_frames * framerate)]
                            f_time = np.arange(
                                a,
                                (a + framerate * n_frames)) / float(framerate)
                            a += time_jump * rates
                            INT16_FAC = (2**15) - 1
                            INT32_FAC = (2**31) - 1
                            INT64_FAC = (2**63) - 1
                            norm_fact = {
                                'int16': INT16_FAC,
                                'int32': INT32_FAC,
                                'int64': INT64_FAC,
                                'float32': 1.0,
                                'float64': 1.0
                            }
                            f_data = np.float32(f_data) / norm_fact[
                                f_data.dtype.name]
                            w = get_window('hamming', w_sizes[ind_w])
                            H = int(w_sizes[ind_w] * ov_sizes[ind_ov])
                            mX, pX = stft.stftAnal(f_data, rates, w, 2048, H)
                            minimum = np.min(mX)
                            maximum = np.max(mX)
                            t = float(self.PTreshTxt.get())
                            sebaran = np.arange(int(round(minimum)),
                                                int(round(maximum)))
                            s_index = int(sebaran.size * (1 - t))
                            treshold = sebaran[-s_index]
                            ploc = peakdetect.peakDetection(mX, treshold)
                            if ploc.size != 0:
                                peak_loc = []
                                for i in range(len(ploc) - 1):
                                    if ploc[i] != ploc[i + 1]:
                                        peak_loc.append(ploc[i])
                                peak_loc.append(ploc[-1])
                                peak_loc = np.array(peak_loc)
                                #print peak_loc.size,"\n"
                                vector = mX[peak_loc]

                                log_likelihood = np.zeros(len(models))

                                for i in range(len(models)):
                                    gmm = models[
                                        i]  #checking with each model one by one
                                    scores = np.array(gmm.score(vector))
                                    log_likelihood[i] = scores.sum()

                                winner = np.argmax(log_likelihood)
                                #print "score =",log_likelihood
                                #print "highest score =",np.max(log_likelihood)
                                #print "\tdetected as - ", speakers[winner]
                                #time.sleep(1.0)
                                x.append(paths[ind_p])
                                temp = str(np.min(f_time))
                                '''
                                k = temp.split('.')
                                l = k[0]+','+k[1]
                                '''
                                x.append(temp)
                                temp = str(np.max(f_time))
                                '''
                                k = temp.split('.')
                                l = k[0]+','+k[1]
                                '''
                                x.append(temp)
                                temp = str(np.max(log_likelihood))
                                k = temp.split('.')
                                l = k[0] + ',' + k[1]
                                x.append(l)
                                x.append(speakers[winner])
                    #time.sleep(2.0)
                    print "len x:", len(x)
                    x = np.array(x)
                    x = np.reshape(x, (len(x) / 5, 5))
                    print x, "\n\n"
                    time.sleep(2)
                    df = pd.DataFrame(x)
                    df.to_excel("test/Test_" + str(w_sizes[ind_w]) + "_" +
                                str(ov_sizes[ind_ov]) + "_" +
                                str(tresholds[ind_t]) + ".xls",
                                index=False)
Example #4
0
    def play(self):
        """ Play entire file """
        data = self.wf.readframes(chunk)
        while data != '':
            self.stream.write(data)
            data = self.wf.readframes(chunk)

        #save data to file
        file = open("Text File/signal_data.txt", "w")
        for item in self.datas:
            file.write("%s " % item)
        file.close()
        '''
        #Framing
        framerate = self.rates
        print framerate                            #menentukan jumlah frame
        frame = round(len(self.datas)/framerate)    #mengukur banyak data/frame
        hop = 10                                     #jumlah frame yang diperiksa
        overlap = 5                                #lompatan frame
        a = 0
        while a < frame:
            f_data = self.datas[a*int(frame):(a+hop)*int(frame)]
            f_time = np.arange(a*(f_data.size/hop),(a+hop)*(f_data.size/hop))/float(self.rates)
            title = "Frame",a/50+1,"Time-domain"
            plt.title(title)
            plt.xlabel("Time")
            plt.ylabel("Amplitude")
            plt.plot(f_time,f_data)
            plt.show()
            a += overlap
            
        '''
        '''
        for i in range(hop):
            f_data = self.datas[i*int(frame):(i+1)*int(frame)]
            f_time = np.arange(i*f_data.size,(i+1)*f_data.size)/float(self.rates)
            plt.title("Frame Time-domain")
            plt.xlabel("Time")
            plt.ylabel("Amplitude")
            plt.plot(f_time,f_data)
            plt.show()
        '''
        '''
        f_time_sec = f_time[-1]
        f_data = np.asarray(())
        f_time = np.asarray(())
        for i in range(int(frame)):
            if len(f_data) == 0:
                f_data = self.datas[i*framerate:(i+1)*framerate]
                f_time = i*f_time_sec
            else:
                f_data = np.hstack((f_data,self.datas[i*framerate:(i+1)*framerate]))
                f_time = np.hstack((f_time,i*f_time_sec))
        if len(self.datas) % framerate > 0:
            f_data = np.hstack((f_data,self.datas[(i+1)*framerate:]))
            temp = (i+1)*f_time_sec
            f_time = np.hstack((f_time,temp))
        mod = len(self.datas) % framerate
        antimod = len(self.datas) - mod
        f_data = np.reshape(f_data[:antimod],(-1,framerate))
        sisa_f_data = f_data[antimod:]
        print f_data
        print sisa_f_data
        print f_time
        '''

        #proses STFT
        N = 2048
        M = 501  #'''bisa di set'''
        H = M / 2  #bisa di set manual'

        x = self.datas
        x = np.float32(x) / norm_fact[x.dtype.name]
        fs = self.rates
        w = get_window('hamming', M)
        global mX
        mX, pX = stft.stftAnal(x, fs, w, N, H)
        y = stft.stftSynth(mX, pX, M, H)
        file = open("Text File/mX.txt", "w")
        for j in range(len(mX)):
            for item in mX[j]:
                file.write("%s " % item)
            file.write("\n\n")
        file.close()

        plt.figure(figsize=(12, 9))
        maxplotfreq = 5000.0

        plt.subplot(4, 1, 1)
        plt.plot(np.arange(x.size) / float(fs), x)
        plt.axis([0, x.size / float(fs), min(x), max(x)])
        plt.ylabel('amplitude')
        plt.xlabel('time (sec)')
        plt.title('input sound: x')

        plt.subplot(4, 1, 2)
        numFrames = int(mX[:, 0].size)
        frmTime = H * np.arange(numFrames) / float(fs)
        binFreq = fs * np.arange(N * maxplotfreq / fs) / N
        plt.pcolormesh(frmTime, binFreq,
                       np.transpose(mX[:, :int(N * maxplotfreq / fs + 1)]))
        plt.xlabel('time (sec)')
        plt.ylabel('frequency (Hz)')
        plt.title('magnitude spectrogram')
        plt.autoscale(tight=True)

        file = open("Text File/STFT frequencies.txt", "w")
        for item in binFreq:
            file.write("%s\n" % item)
        file.close()

        plt.subplot(4, 1, 3)
        numFrames = int(pX[:, 0].size)
        frmTime = H * np.arange(numFrames) / float(fs)
        binFreq = fs * np.arange(N * maxplotfreq / fs) / N
        plt.pcolormesh(
            frmTime, binFreq,
            np.transpose(np.diff(pX[:, :int(N * maxplotfreq / fs + 1)],
                                 axis=1)))
        plt.xlabel('time (sec)')
        plt.ylabel('frequency (Hz)')
        plt.title('phase spectrogram (derivative)')
        plt.autoscale(tight=True)

        plt.subplot(4, 1, 4)
        plt.plot(np.arange(y.size) / float(fs), y)
        plt.axis([0, y.size / float(fs), min(y), max(y)])
        plt.ylabel('amplitude')
        plt.xlabel('time (sec)')
        plt.title('output sound: y')

        plt.tight_layout()
        plt.show(block=False)

        #print mX.shape
        #proses finding peak
        minimum = np.min(mX)
        maximum = np.max(mX)
        t = 0.8
        sebaran = np.arange(minimum, maximum)
        s_index = int(sebaran.size * (1 - t))
        treshold = sebaran[-s_index]
        print "treshold =", treshold
        ploc = peakdetect.peakDetection(mX, treshold)
        global peak_loc
        global pmag
        peak_loc = []
        for i in range(len(ploc) - 1):
            if ploc[i] != ploc[i + 1]:
                peak_loc.append(ploc[i])
        peak_loc.append(ploc[-1])
        peak_loc = np.array(peak_loc)

        file = open("Text File/peaks location.txt", "w")
        for item in peak_loc:
            file.write("%s\n" % item)
        file.close()

        file = open("Text File/peaks magnitude.txt", "w")
        for item in peak_loc:
            file.write("%s " % item)
            for i in range(len(mX[item])):
                file.write("%s " % mX[item, i])
            file.write("\n\n")
        file.close()

        pmag = mX[peak_loc]

        pl.plot(mX[-10, :])
        pl.xlabel('Index')
        pl.ylabel('Value')
        pl.show()
        '''
        plt.plot(pX[-10,:])
        pl.xlabel('Index')
        pl.ylabel('Value')
        pl.show()
        '''

        freqaxis = fs * np.arange(N / 2) / float(N)
        plt.plot(freqaxis, mX[peak_loc[0], :-1])
        pl.xlabel("Frequency")
        pl.ylabel("Magnitude")
        pl.show()
        '''
        file = open("Text File/peak frequencies.txt","w")
        file.write("Frequency\tMagnitude\n")
        for item in peak_loc:
            file.write("%s\t" % freqaxis[item])
            file.write("%s\n" % mX[peak_loc[0],item])
        file.close()
        '''

        pl.plot(fs * peak_loc / float(N), pmag)
        pl.xlabel("Frequency")
        pl.ylabel("Magnitude")
        pl.show()

        #Menampilkan frequensi pada masing-masing Frame hasil STFT
        loc = []
        for m in pmag:
            loc.append(np.argmax(m))
        Freq = freqaxis[loc]
        df = pd.DataFrame(Freq)
        df.to_excel("Frekuensi Penyusun " +
                    self.filename.split("/")[-1].split(".")[0] + ".xlsx",
                    index=False)

        #execfile("find_peak_cwt.py")
        '''