def text_pipeline_func(self, batch, seq_len, vocab_path): """ Data pipeline function for core text process. """ vocab_path = os.path.abspath(vocab_path) token_ids = tokenize_sentence(batch, seq_len, vocab_path) return token_ids
def common_process_pipeline(self, batch): """ Data pipeline function for common process. This function is used both by online training and offline inference. """ vocab_path = os.path.abspath(self.text_vocab_file_path) token_ids = tokenize_sentence(batch, self.max_seq_len, vocab_path) return token_ids
def test_label_and_text(self): text = ["O O"] maxlen = 2 text_tokenize_t = tokenize_sentence(text, maxlen, self.vocab_text_filepath) label = ["B B"] maxlen = 2 label_tokenize_t = tokenize_label(label, maxlen, self.vocab_label_filepath, -1) with self.cached_session(use_gpu=False, force_gpu=False) as sess: res = sess.run([text_tokenize_t, label_tokenize_t]) logging.debug(res) self.assertAllEqual(res[0], [[3, 3]]) self.assertAllEqual(res[1], [[0, 0]])