def record_and_write(audio_processor, hyper_params): import pyaudio _CHUNK = hyper_params["max_input_seq_length"] _SR = 22050 p = pyaudio.PyAudio() with tf.Session() as sess: # create model model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], 1, hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["input_dim"], hyper_params["batch_normalization"], hyper_params["char_map_length"]) model.create_forward_rnn() model.initialize(sess) model.restore(sess, hyper_params["checkpoint_dir"] + "/acoustic/") # Create stream of listening stream = p.open(format=pyaudio.paInt16, channels=1, rate=_SR, input=True, frames_per_buffer=_CHUNK) print("NOW RECORDING...") while True: data = stream.read(_CHUNK) data = np.fromstring(data) feat_vec, original_feat_vec_length = audio_processor.process_signal(data, _SR) (a, b) = feat_vec.shape feat_vec = feat_vec.reshape((a, 1, b)) predictions = model.process_input(sess, feat_vec, [original_feat_vec_length]) result = [dataprocessor.DataProcessor.get_labels_str(hyper_params["char_map"], prediction) for prediction in predictions] print(result, end="")
def process_file(audio_processor, hyper_params, file): feat_vec, original_feat_vec_length = audio_processor.process_audio_file(file) if original_feat_vec_length > hyper_params["max_input_seq_length"]: logging.warning("File too long") return elif original_feat_vec_length < hyper_params["max_input_seq_length"]: # Pad the feat_vec with zeros pad_length = hyper_params["max_input_seq_length"] - original_feat_vec_length padding = np.zeros((pad_length, hyper_params["input_dim"]), dtype=np.float) feat_vec = np.concatenate((feat_vec, padding), 0) with tf.Session() as sess: # create model model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], 1, hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["input_dim"], hyper_params["batch_normalization"], hyper_params["char_map_length"]) model.create_forward_rnn() model.initialize(sess) model.restore(sess, hyper_params["checkpoint_dir"] + "/acoustic/") (a, b) = feat_vec.shape feat_vec = feat_vec.reshape((a, 1, b)) predictions = model.process_input(sess, feat_vec, [original_feat_vec_length]) transcribed_text = [dataprocessor.DataProcessor.get_labels_str(hyper_params["char_map"], prediction) for prediction in predictions] print(transcribed_text[0])
def record_and_write(audio_processor, hyper_params): import pyaudio _CHUNK = hyper_params["max_input_seq_length"] _SR = 22050 p = pyaudio.PyAudio() with tf.Session() as sess: # create model model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], 1, hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["input_dim"], hyper_params["batch_normalization"], language=hyper_params["language"]) model.create_forward_rnn() model.initialize(sess) model.restore(sess, hyper_params["checkpoint_dir"]) # Create stream of listening stream = p.open(format=pyaudio.paInt16, channels=1, rate=_SR, input=True, frames_per_buffer=_CHUNK) print("NOW RECORDING...") while True: data = stream.read(_CHUNK) data = np.fromstring(data) feat_vec, original_feat_vec_length = audio_processor.process_signal(data, _SR) (a, b) = feat_vec.shape feat_vec = feat_vec.reshape((a, 1, b)) result = model.process_input(sess, feat_vec, [original_feat_vec_length]) print(result, end="")
def process_file(audio_processor, hyper_params, file): feat_vec, original_feat_vec_length = audio_processor.process_audio_file(file) if original_feat_vec_length > hyper_params["max_input_seq_length"]: logging.warning("File too long") return elif original_feat_vec_length < hyper_params["max_input_seq_length"]: # Pad the feat_vec with zeros pad_length = hyper_params["max_input_seq_length"] - original_feat_vec_length padding = np.zeros((pad_length, hyper_params["input_dim"]), dtype=np.float) feat_vec = np.concatenate((feat_vec, padding), 0) with tf.Session() as sess: # create model model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], 1, hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["input_dim"], hyper_params["batch_normalization"], hyper_params["char_map_length"]) model.create_forward_rnn() model.initialize(sess) model.restore(sess, hyper_params["checkpoint_dir"] + "/acoustic/") (a, b) = feat_vec.shape feat_vec = feat_vec.reshape((a, 1, b)) predictions = model.process_input(sess, feat_vec, [original_feat_vec_length]) transcribed_text = [dataprocessor.DataProcessor.get_labels_str(hyper_params["char_map"], prediction) for prediction in predictions] print(transcribed_text[0])
def process_file(audio_processor, hyper_params, file): feat_vec, original_feat_vec_length = audio_processor.process_audio_file(file) if original_feat_vec_length > hyper_params["max_input_seq_length"]: logging.warning("File too long") return with tf.Session() as sess: # create model model = AcousticModel(hyper_params["num_layers"], hyper_params["hidden_size"], 1, hyper_params["max_input_seq_length"], hyper_params["max_target_seq_length"], hyper_params["input_dim"], hyper_params["batch_normalization"], language=hyper_params["language"]) model.create_forward_rnn() model.initialize(sess) model.restore(sess, hyper_params["checkpoint_dir"]) (a, b) = feat_vec.shape feat_vec = feat_vec.reshape((a, 1, b)) transcribed_text = model.process_input(sess, feat_vec, [original_feat_vec_length]) print(transcribed_text[0])