Example #1
0
    def _record(self):
        if self.isrecording:
            CHUNK = 1024
            FORMAT = pyaudio.paInt16
            CHANNELS = 1
            RATE = 16000
            WAVE_OUTPUT_FILENAME = "rec.wav"
            p = pyaudio.PyAudio()
            stream = p.open(format=FORMAT,
                            channels=CHANNELS,
                            rate=RATE,
                            input=True,
                            frames_per_buffer=CHUNK)
            self.play = True
            print("* recording")
            frames = []
            i = 1
            while self.isrecording:
                self.button.config(image=self.photo[i],
                                   width="325",
                                   height="200")
                data = stream.read(CHUNK)
                frames.append(data)
                i = i + 1
                if (i == 10):
                    i = 0
            self.button.config(image=self.photo[0], width="325", height="200")
            print("* done recording")

            stream.stop_stream()
            stream.close()
            p.terminate()
            wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
            wf.setnchannels(CHANNELS)
            wf.setsampwidth(p.get_sample_size(FORMAT))
            wf.setframerate(RATE)
            wf.writeframes(b''.join(frames))
            wf.close()
            wav_file = 'rec.wav'
            feat_file = 'rec.feat'
            x, s = sf.read(wav_file)
            fe = sp.FrontEnd(samp_rate=16000, mean_norm_feat=True)
            feat = fe.process_utterance(x)
            htk.write_htk_user_feat(feat, feat_file)
            feat_rscp_line = os.path.join(feat_file)
            line = "%s=%s[0,%d]\n" % (feat_file, feat_rscp_line,
                                      feat.shape[1] - 1)
            try:
                ret = sd_beta.decode(line, args.trn, args.beam_width,
                                     args.lmweight, z, fst, 1)
                self.ques.set(ret)
                nlp_main()
            except KeyboardInterrupt:
                print('End')
def process_feat(samp_rate=48000):
    wav_files = glob("dev/*/*.wav")
    strings = np.unique(
        pd.Series(wav_files).replace(
            to_replace=r"\.\.\/[a-z0-9]+\/dev\/[a-zA-Z0-9]+\/",
            value="",
            regex=True).values.flatten()).tolist()

    for ii, wav_file in tqdm(enumerate(wav_files)):

        x, s = librosa.core.load(wav_file)

        fe = sp.FrontEnd(samp_rate=s,
                         mean_norm_feat=True,
                         frame_duration=0.032,
                         frame_shift=0.02274,
                         hi_freq=8000)

        feat = fe.process_utterance(x)
        if strings[ii].split("-")[6].replace(".wav", "") not in feat_actors:
            feat_actors[strings[ii].split("-")[6].replace(".wav", "")] = []
        feat_actors[strings[ii].split("-")[6].replace(".wav", "")].append(feat)

    return feat_actors
if not os.path.isfile(wav_file):
    raise RuntimeError(
        'input wav file is missing. Have you downloaded the LibriSpeech corpus?'
    )

if not os.path.exists(os.path.join(data_dir, 'feat')):
    os.mkdir(os.path.join(data_dir, 'feat'))

samp_rate = 16000

x, s = sf.read(wav_file)
if (s != samp_rate):
    raise RuntimeError("LibriSpeech files are 16000 Hz, found {0}".format(s))

fe = sp.FrontEnd(samp_rate=samp_rate, mean_norm_feat=True)

feat = fe.process_utterance(x)

if (plot_output):
    if not os.path.exists('fig'):
        os.mkdir('fig')

    # plot waveform
    plt.plot(x)
    plt.title('waveform')
    plt.savefig('fig/waveform.png', bbox_inches='tight')
    plt.close()

    # plot mel filterbank
    for i in range(0, fe.num_mel):
Example #4
0
        compute_stats = True
    else:
        compute_stats = False
    wav_list = os.path.join(data_dir, "lists/wav_{0}.list".format(args.set))
    feat_list = os.path.join(data_dir, "lists/feat_{0}.rscp".format(args.set))
    feat_dir = os.path.join(data_dir, "feat")
    rscp_dir = "..."  # note ... is CNTK notation for "relative to the location of the list of feature files
    mean_file = os.path.join(data_dir, "am/feat_mean.ascii")
    invstddev_file = os.path.join(data_dir, "am/feat_invstddev.ascii")
    wav_dir = "../.."
    if not os.path.exists(os.path.join(data_dir, 'am')):
        os.mkdir(os.path.join(data_dir, 'am'))

    samp_rate = 16000
    fe = sp.FrontEnd(samp_rate=samp_rate,
                     mean_norm_feat=True,
                     compute_stats=compute_stats)
    # read lines

    with open(wav_list) as f:
        wav_files = f.readlines()
        wav_files = [x.strip() for x in wav_files]

    if not os.path.exists(feat_dir):
        os.makedirs(feat_dir)

    if not os.path.exists(os.path.dirname(feat_list)):
        os.makedirs(os.path.dirname(feat_list))
    out_list = open(feat_list, "w")
    count = 0
    for line in wav_files: