def do_single_file_inference(input_file_path): with tfv1.Session(config=Config.session_config) as session: inputs, outputs, _ = create_inference_graph(batch_size=1, n_steps=-1) # Create a saver using variables from the above newly created graph saver = tfv1.train.Saver() # Restore variables from training checkpoint # TODO: This restores the most recent checkpoint, but if we use validation to counteract # over-fitting, we may want to restore an earlier checkpoint. checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if not checkpoint: log_error( 'Checkpoint directory ({}) does not contain a valid checkpoint state.' .format(FLAGS.checkpoint_dir)) exit(1) checkpoint_path = checkpoint.model_checkpoint_path saver.restore(session, checkpoint_path) features, features_len = audiofile_to_features(input_file_path) previous_state_c = np.zeros([1, Config.n_cell_dim]) previous_state_h = np.zeros([1, Config.n_cell_dim]) # Add batch dimension features = tf.expand_dims(features, 0) features_len = tf.expand_dims(features_len, 0) # Evaluate features = create_overlapping_windows(features).eval(session=session) features_len = features_len.eval(session=session) logits = outputs['outputs'].eval(feed_dict={ inputs['input']: features, inputs['input_lengths']: features_len, inputs['previous_state_c']: previous_state_c, inputs['previous_state_h']: previous_state_h, }, session=session) logits = np.squeeze(logits) if FLAGS.lm_binary_path: scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.lm_binary_path, FLAGS.lm_trie_path, Config.alphabet) else: scorer = None decoded = ctc_beam_search_decoder(logits, Config.alphabet, FLAGS.beam_width, scorer=scorer, cutoff_prob=FLAGS.cutoff_prob, cutoff_top_n=FLAGS.cutoff_top_n) # Print highest probability result print(decoded[0][1])
def do_single_file_inference(input_file_path): with tf.Session(config=Config.session_config) as session: inputs, outputs, layers = create_inference_graph(batch_size=1, n_steps=-1) # REVIEW josephz: Hack: print all layers here. for i, l in enumerate(layers): print("layer '{}': '{}'".format(i, l)) # Create a saver using variables from the above newly created graph mapping = { v.op.name: v for v in tf.global_variables() if not v.op.name.startswith('previous_state_') } saver = tf.train.Saver(mapping) # Restore variables from training checkpoint # TODO: This restores the most recent checkpoint, but if we use validation to counteract # over-fitting, we may want to restore an earlier checkpoint. checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if not checkpoint: log_error( 'Checkpoint directory ({}) does not contain a valid checkpoint state.' .format(FLAGS.checkpoint_dir)) exit(1) checkpoint_path = checkpoint.model_checkpoint_path saver.restore(session, checkpoint_path) session.run(outputs['initialize_state']) features = audiofile_to_input_vector(input_file_path, Config.n_input, Config.n_context) num_strides = len(features) - (Config.n_context * 2) # Create a view into the array with overlapping strides of size # numcontext (past) + 1 (present) + numcontext (future) window_size = 2 * Config.n_context + 1 features = np.lib.stride_tricks.as_strided( features, (num_strides, window_size, Config.n_input), (features.strides[0], features.strides[0], features.strides[1]), writeable=False) logits = session.run(outputs['outputs'], feed_dict={ inputs['input']: [features], inputs['input_lengths']: [num_strides], }) logits = np.squeeze(logits) scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.lm_binary_path, FLAGS.lm_trie_path, Config.alphabet) decoded = ctc_beam_search_decoder(logits, Config.alphabet, FLAGS.beam_width, scorer=scorer) # Print highest probability result print(decoded[0][1])
def classify(self, audio_path): """ Classify with 3rd-party ctc beam search """ sample = self.load_audio(audio_path) # Apply softmax for CTC decoder logits = tf.nn.softmax(self.logits, name='logits') logits = logits.eval(feed_dict={self.audio: sample}, session=self.sess) print('logits:', logits) logits = np.squeeze(logits) if FLAGS.lm_binary_path: self.scorer = Scorer( FLAGS.lm_alpha, FLAGS.lm_beta, os.path.join('DeepSpeech', FLAGS.lm_binary_path), os.path.join('DeepSpeech', FLAGS.lm_trie_path), Config.alphabet) else: self.scorer = None r = ctc_beam_search_decoder(logits, Config.alphabet, FLAGS.beam_width, scorer=self.scorer, cutoff_prob=FLAGS.cutoff_prob, cutoff_top_n=FLAGS.cutoff_top_n) # Print highest probability result print(r[0][1])
def evaluate(filename='data/test/1_input.npy'): points = np.load(filename) # print("Points before pre",points.shape) NORM_ARGS = [ "origin", "filp_h", "smooth", "slope", "resample", "slant", "height" ] FEAT_ARGS = [ "x_cor", "y_cor", "penup", "dir", "curv", "vic_aspect", "vic_curl", "vic_line", "vic_slope", "bitmap" ] # print("Normalizing trajectory...") traj = normalize_trajectory(points, NORM_ARGS) # print(traj) # print("Calculating feature vector sequence...") feat_seq_mat = preprocess_handwriting(traj, FEAT_ARGS) feat_seq_mat = feat_seq_mat.astype('float32') feat_seq_mat.shape data = [] train_input = handwriting_to_input_vector(feat_seq_mat, 20, 9) train_input = train_input.astype('float32') data.append(train_input) # data_len data = np.asarray(data) # data_len = np.asarray(train_input) # Pad input to max_time_step of this batch source, source_lengths = pad_sequences(data) my_logits = sess.run(logits, feed_dict={ input_tensor: source, seq_length: source_lengths }) my_logits = np.squeeze(my_logits) maxT, _ = my_logits.shape # dim0=t, dim1=c # apply softmax res = np.zeros(my_logits.shape) for t in range(maxT): y = my_logits[t, :] e = np.exp(y) s = np.sum(e) res[t, :] = e / s decoded = ctc_beam_search_decoder(res, alphabet, beam_width, scorer=scorer, cutoff_prob=cutoff_prob, cutoff_top_n=cutoff_top_n) print("Result : " + decodex(decoded[0][1], mapping))
def beam_search_with_lm(self, xb): with torch.no_grad(): out = self.forward(xb) softmax_out = out.softmax(2).cpu().numpy() char_list = [] for i in range(softmax_out.shape[0]): char_list.append( ctc_beam_search_decoder(probs_seq=softmax_out[i, :], alphabet=self.alphabet, scorer=self.scorer, beam_size=25)[0][1]) return char_list
def do_single_file_inference(input_file_path): with tf.Session(config=Config.session_config) as session: inputs, outputs, _ = create_inference_graph(batch_size=1, n_steps=-1) # Create a saver using variables from the above newly created graph mapping = { v.op.name: v for v in tf.global_variables() if not v.op.name.startswith('previous_state_') } saver = tf.train.Saver(mapping) # Restore variables from training checkpoint # TODO: This restores the most recent checkpoint, but if we use validation to counteract # over-fitting, we may want to restore an earlier checkpoint. checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if not checkpoint: log_error( 'Checkpoint directory ({}) does not contain a valid checkpoint state.' .format(FLAGS.checkpoint_dir)) exit(1) checkpoint_path = checkpoint.model_checkpoint_path saver.restore(session, checkpoint_path) session.run(outputs['initialize_state']) features, features_len = audiofile_to_features(input_file_path) # Add batch dimension features = tf.expand_dims(features, 0) features_len = tf.expand_dims(features_len, 0) # Evaluate features = create_overlapping_windows(features).eval(session=session) features_len = features_len.eval(session=session) logits = outputs['outputs'].eval(feed_dict={ inputs['input']: features, inputs['input_lengths']: features_len, }, session=session) logits = np.squeeze(logits) scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.lm_binary_path, FLAGS.lm_trie_path, Config.alphabet) decoded = ctc_beam_search_decoder(logits, Config.alphabet, FLAGS.beam_width, scorer=scorer) # Print highest probability result print(decoded[0][1])
def do_single_file_inference(input_file_path): with tfv1.Session(config=Config.session_config) as session: inputs, outputs, _ = create_inference_graph(batch_size=1, n_steps=-1) # Restore variables from training checkpoint if FLAGS.load == 'auto': method_order = ['best', 'last'] else: method_order = [FLAGS.load] load_or_init_graph(session, method_order) features, features_len = audiofile_to_features(input_file_path) previous_state_c = np.zeros([1, Config.n_cell_dim]) previous_state_h = np.zeros([1, Config.n_cell_dim]) # Add batch dimension features = tf.expand_dims(features, 0) features_len = tf.expand_dims(features_len, 0) # Evaluate features = create_overlapping_windows(features).eval(session=session) features_len = features_len.eval(session=session) logits = outputs['outputs'].eval(feed_dict={ inputs['input']: features, inputs['input_lengths']: features_len, inputs['previous_state_c']: previous_state_c, inputs['previous_state_h']: previous_state_h, }, session=session) logits = np.squeeze(logits) if FLAGS.scorer_path: scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.scorer_path, Config.alphabet) else: scorer = None decoded = ctc_beam_search_decoder(logits, Config.alphabet, FLAGS.beam_width, scorer=scorer, cutoff_prob=FLAGS.cutoff_prob, cutoff_top_n=FLAGS.cutoff_top_n) # Print highest probability result print(decoded[0][1])
def beam_search_with_lm(self, xb): with torch.no_grad(): out = self.forward(xb) # This tensor for each image in the batch contains probabilities of each label for each input feature out = out.softmax(2) softmax_out = out.permute(1, 0, 2).cpu().numpy() char_list = [] for i in range(softmax_out.shape[0]): char_list.append( ctc_beam_search_decoder(probs_seq=softmax_out[i, :], alphabet=self.alphabet, beam_size=25, scorer=self.scorer)[0][1]) return char_list
def do_single_file_inference(input_file_path): with tfv1.Session(config=Config.session_config) as session: inputs, outputs, _ = create_inference_graph(batch_size=1, n_steps=-1) # Create a saver using variables from the above newly created graph saver = tfv1.train.Saver() # Restore variables from training checkpoint loaded = False if not loaded and FLAGS.load in ['auto', 'last']: loaded = try_loading(session, saver, 'checkpoint', 'most recent', load_step=False) if not loaded and FLAGS.load in ['auto', 'best']: loaded = try_loading(session, saver, 'best_dev_checkpoint', 'best validation', load_step=False) if not loaded: print('Could not load checkpoint from {}'.format(FLAGS.checkpoint_dir)) sys.exit(1) features, features_len = audiofile_to_features(input_file_path) previous_state_c = np.zeros([1, Config.n_cell_dim]) previous_state_h = np.zeros([1, Config.n_cell_dim]) # Add batch dimension features = tf.expand_dims(features, 0) features_len = tf.expand_dims(features_len, 0) # Evaluate features = create_overlapping_windows(features).eval(session=session) features_len = features_len.eval(session=session) logits = outputs['outputs'].eval(feed_dict={ inputs['input']: features, inputs['input_lengths']: features_len, inputs['previous_state_c']: previous_state_c, inputs['previous_state_h']: previous_state_h, }, session=session) logits = np.squeeze(logits) if FLAGS.lm_binary_path: scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.lm_binary_path, FLAGS.lm_trie_path, Config.alphabet) else: scorer = None decoded = ctc_beam_search_decoder(logits, Config.alphabet, FLAGS.beam_width, scorer=scorer, cutoff_prob=FLAGS.cutoff_prob, cutoff_top_n=FLAGS.cutoff_top_n) # Print highest probability result print(decoded[0][1])
def do_single_file_inference(input_file_path): with tf.Session(config=Config.session_config) as session: inputs, outputs, _ = create_inference_graph(batch_size=1, n_steps=-1) # Create a saver using variables from the above newly created graph mapping = {v.op.name: v for v in tf.global_variables() if not v.op.name.startswith('previous_state_')} saver = tf.train.Saver(mapping) # Restore variables from training checkpoint # TODO: This restores the most recent checkpoint, but if we use validation to counteract # over-fitting, we may want to restore an earlier checkpoint. checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if not checkpoint: log_error('Checkpoint directory ({}) does not contain a valid checkpoint state.'.format(FLAGS.checkpoint_dir)) exit(1) checkpoint_path = checkpoint.model_checkpoint_path saver.restore(session, checkpoint_path) session.run(outputs['initialize_state']) features = audiofile_to_input_vector(input_file_path, Config.n_input, Config.n_context) num_strides = len(features) - (Config.n_context * 2) # Create a view into the array with overlapping strides of size # numcontext (past) + 1 (present) + numcontext (future) window_size = 2*Config.n_context+1 features = np.lib.stride_tricks.as_strided( features, (num_strides, window_size, Config.n_input), (features.strides[0], features.strides[0], features.strides[1]), writeable=False) logits = session.run(outputs['outputs'], feed_dict = { inputs['input']: [features], inputs['input_lengths']: [num_strides], }) logits = np.squeeze(logits) scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.lm_binary_path, FLAGS.lm_trie_path, Config.alphabet) decoded = ctc_beam_search_decoder(logits, Config.alphabet, FLAGS.beam_width, scorer=scorer) # Print highest probability result print(decoded[0][1])
def generate_lm( grouping_key: Tuple[np.str, np.str], data_df: pd.DataFrame ) -> pd.DataFrame: ( identifier, text_document_id, ) = grouping_key identifier = str(identifier) text_document_id = str(text_document_id) transcript = data_df.transcript[0] with tempfile.NamedTemporaryFile("w+t", dir=debug_work_dir) as input_txt: input_txt.write(transcript) input_txt.flush() os.makedirs(os.path.join(debug_work_dir, identifier), exist_ok=True) scorer_path = os.path.join( debug_work_dir, identifier, text_document_id + ".scorer" ) data_lower, vocab_str = convert_and_filter_topk( scorer_path, input_txt.name, 500000 ) build_lm( scorer_path, kenlm_path, 5, "85%", "0|0|1", True, 255, 8, "trie", data_lower, vocab_str, ) os.remove(scorer_path + "." + "lower.txt.gz") os.remove(scorer_path + "." + "lm.arpa") os.remove(scorer_path + "." + "lm_filtered.arpa") create_bundle( alphabet_path, scorer_path + "." + "lm.binary", scorer_path + "." + "vocab-500000.txt", scorer_path, False, 0.931289039105002, 1.1834137581510284, ) os.remove(scorer_path + "." + "lm.binary") os.remove(scorer_path + "." + "vocab-500000.txt") with open(alphabet_path) as fh: num_output_symbols = len(fh.readlines()) + 1 assert num_output_symbols == 32, f"GALVEZ:{num_output_symbols}" transcripts = [] id_to_symbol = {} with open(alphabet_path) as fh: for i, line in enumerate(fh): id_to_symbol[i] = line.rstrip() id_to_symbol[31] = "blank" for row in data_df.itertuples(): log_probabilities = row.log_probabilities.reshape(-1, num_output_symbols) probabilities = np.exp(log_probabilities) # np.exp(probabilities, out=probabilities) np.testing.assert_allclose(probabilities.sum(axis=1), 1.0, atol=1e-3) # simple_decoder_output = [] # for t in range(probabilities.shape[0]): # best = np.argmax(probabilities[t,:]) # print(np.max(probabilities[t,:])) # if (id_to_symbol[best] != "blank"): # simple_decoder_output.append(id_to_symbol[best]) # print("GALVEZ simple output:", "".join(simple_decoder_output)) cutoff_prob = 1.0 cutoff_top_n = 100 scorer = ds_ctcdecoder.Scorer() result = scorer.init( scorer_path.encode("utf-8"), alphabet_path.encode("utf-8") ) scorer.set_utf8_mode(False) assert result == 0, result alphabet = ds_ctcdecoder.Alphabet() result = alphabet.init(alphabet_path.encode("utf-8")) assert not scorer.is_utf8_mode() assert result == 0, result scorer = None outputs = ds_ctcdecoder.ctc_beam_search_decoder( probabilities, alphabet, 100, cutoff_prob, cutoff_top_n, scorer ) print(f"GALVEZ:output={outputs[0][1]}") print(f"GALVEZ:length={probabilities.shape[0] * 30. / 1000.}") transcripts.append(outputs[0][1]) return pd.DataFrame({"path": pd.Series(transcripts)})
text_file = open("chars_small.txt", "w", encoding='utf-8') text_file.write('\n'.join([x if x != '#' else '\\#' for x in list(classes)])) text_file.close() def softmax(matrix): time_steps, _ = matrix.shape result = np.zeros(matrix.shape) for t in range(time_steps): e = np.exp(matrix[t, :]) result[t, :] = e / np.sum(e) return result def load_rnn_output(fn): return np.genfromtxt(fn, delimiter=';')[:, :-1] alphabet = Alphabet(os.path.abspath("chars_small.txt")) crnn_output = softmax(load_rnn_output('./rnn_output.csv')) res = ctc_beam_search_decoder(probs_seq=crnn_output, alphabet=alphabet, beam_size=25, scorer=Scorer(alphabet=alphabet, scorer_path='iam.scorer', alpha=0.75, beta=1.85)) # predicted: the fake friend of the family has to # actual: the fake friend of the family, like the print(res[0][1])