예제 #1
0
    def _record(self):
        if self.isrecording:
            CHUNK = 1024
            FORMAT = pyaudio.paInt16
            CHANNELS = 1
            RATE = 16000
            WAVE_OUTPUT_FILENAME = "rec.wav"
            p = pyaudio.PyAudio()
            stream = p.open(format=FORMAT,
                            channels=CHANNELS,
                            rate=RATE,
                            input=True,
                            frames_per_buffer=CHUNK)
            self.play = True
            print("* recording")
            frames = []
            i = 1
            while self.isrecording:
                self.button.config(image=self.photo[i],
                                   width="325",
                                   height="200")
                data = stream.read(CHUNK)
                frames.append(data)
                i = i + 1
                if (i == 10):
                    i = 0
            self.button.config(image=self.photo[0], width="325", height="200")
            print("* done recording")

            stream.stop_stream()
            stream.close()
            p.terminate()
            wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
            wf.setnchannels(CHANNELS)
            wf.setsampwidth(p.get_sample_size(FORMAT))
            wf.setframerate(RATE)
            wf.writeframes(b''.join(frames))
            wf.close()
            wav_file = 'rec.wav'
            feat_file = 'rec.feat'
            x, s = sf.read(wav_file)
            fe = sp.FrontEnd(samp_rate=16000, mean_norm_feat=True)
            feat = fe.process_utterance(x)
            htk.write_htk_user_feat(feat, feat_file)
            feat_rscp_line = os.path.join(feat_file)
            line = "%s=%s[0,%d]\n" % (feat_file, feat_rscp_line,
                                      feat.shape[1] - 1)
            try:
                ret = sd_beta.decode(line, args.trn, args.beam_width,
                                     args.lmweight, z, fst, 1)
                self.ques.set(ret)
                nlp_main()
            except KeyboardInterrupt:
                print('End')
    # plot waveform
    plt.plot(x)
    plt.title('waveform')
    plt.savefig('fig/waveform.png', bbox_inches='tight')
    plt.close()

    # plot mel filterbank
    for i in range(0, fe.num_mel):
        plt.plot(fe.mel_filterbank[i, :])
    plt.title('mel filterbank')
    plt.savefig('fig/mel_filterbank.png', bbox_inches='tight')
    plt.close()

    # plot log mel spectrum (fbank)
    plt.imshow(
        feat, origin='lower', aspect=4
    )  # flip the image so that vertical frequency axis goes from low to high
    plt.title('log mel filterbank features (fbank)')
    plt.savefig('fig/fbank.png', bbox_inches='tight')
    plt.close()

htk.write_htk_user_feat(feat, feat_file)
print("Wrote {0} frames to {1}".format(feat.shape[1], feat_file))
#
#if you want to verify, that the file was written correctly:
feat2 = htk.read_htk_user_feat(name=feat_file)
print("Read {0} frames rom {1}".format(feat2.shape[1], feat_file))
print("Per-element absolute error is {0}".format(
    np.linalg.norm(feat - feat2) / (feat2.shape[0] * feat2.shape[1])))