def _record(self): if self.isrecording: CHUNK = 1024 FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 16000 WAVE_OUTPUT_FILENAME = "rec.wav" p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) self.play = True print("* recording") frames = [] i = 1 while self.isrecording: self.button.config(image=self.photo[i], width="325", height="200") data = stream.read(CHUNK) frames.append(data) i = i + 1 if (i == 10): i = 0 self.button.config(image=self.photo[0], width="325", height="200") print("* done recording") stream.stop_stream() stream.close() p.terminate() wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(b''.join(frames)) wf.close() wav_file = 'rec.wav' feat_file = 'rec.feat' x, s = sf.read(wav_file) fe = sp.FrontEnd(samp_rate=16000, mean_norm_feat=True) feat = fe.process_utterance(x) htk.write_htk_user_feat(feat, feat_file) feat_rscp_line = os.path.join(feat_file) line = "%s=%s[0,%d]\n" % (feat_file, feat_rscp_line, feat.shape[1] - 1) try: ret = sd_beta.decode(line, args.trn, args.beam_width, args.lmweight, z, fst, 1) self.ques.set(ret) nlp_main() except KeyboardInterrupt: print('End')
def process_feat(samp_rate=48000): wav_files = glob("dev/*/*.wav") strings = np.unique( pd.Series(wav_files).replace( to_replace=r"\.\.\/[a-z0-9]+\/dev\/[a-zA-Z0-9]+\/", value="", regex=True).values.flatten()).tolist() for ii, wav_file in tqdm(enumerate(wav_files)): x, s = librosa.core.load(wav_file) fe = sp.FrontEnd(samp_rate=s, mean_norm_feat=True, frame_duration=0.032, frame_shift=0.02274, hi_freq=8000) feat = fe.process_utterance(x) if strings[ii].split("-")[6].replace(".wav", "") not in feat_actors: feat_actors[strings[ii].split("-")[6].replace(".wav", "")] = [] feat_actors[strings[ii].split("-")[6].replace(".wav", "")].append(feat) return feat_actors
if not os.path.isfile(wav_file): raise RuntimeError( 'input wav file is missing. Have you downloaded the LibriSpeech corpus?' ) if not os.path.exists(os.path.join(data_dir, 'feat')): os.mkdir(os.path.join(data_dir, 'feat')) samp_rate = 16000 x, s = sf.read(wav_file) if (s != samp_rate): raise RuntimeError("LibriSpeech files are 16000 Hz, found {0}".format(s)) fe = sp.FrontEnd(samp_rate=samp_rate, mean_norm_feat=True) feat = fe.process_utterance(x) if (plot_output): if not os.path.exists('fig'): os.mkdir('fig') # plot waveform plt.plot(x) plt.title('waveform') plt.savefig('fig/waveform.png', bbox_inches='tight') plt.close() # plot mel filterbank for i in range(0, fe.num_mel):
compute_stats = True else: compute_stats = False wav_list = os.path.join(data_dir, "lists/wav_{0}.list".format(args.set)) feat_list = os.path.join(data_dir, "lists/feat_{0}.rscp".format(args.set)) feat_dir = os.path.join(data_dir, "feat") rscp_dir = "..." # note ... is CNTK notation for "relative to the location of the list of feature files mean_file = os.path.join(data_dir, "am/feat_mean.ascii") invstddev_file = os.path.join(data_dir, "am/feat_invstddev.ascii") wav_dir = "../.." if not os.path.exists(os.path.join(data_dir, 'am')): os.mkdir(os.path.join(data_dir, 'am')) samp_rate = 16000 fe = sp.FrontEnd(samp_rate=samp_rate, mean_norm_feat=True, compute_stats=compute_stats) # read lines with open(wav_list) as f: wav_files = f.readlines() wav_files = [x.strip() for x in wav_files] if not os.path.exists(feat_dir): os.makedirs(feat_dir) if not os.path.exists(os.path.dirname(feat_list)): os.makedirs(os.path.dirname(feat_list)) out_list = open(feat_list, "w") count = 0 for line in wav_files: