def create_test_iterator(hparams, mode): """Create test iterator.""" src_vocab_table = lookup_ops.index_table_from_tensor( tf.constant([hparams.eos, "a", "b", "c", "d"])) tgt_vocab_mapping = tf.constant([hparams.sos, hparams.eos, "a", "b", "c"]) tgt_vocab_table = lookup_ops.index_table_from_tensor(tgt_vocab_mapping) if mode == tf.contrib.learn.ModeKeys.INFER: reverse_tgt_vocab_table = lookup_ops.index_to_string_table_from_tensor( tgt_vocab_mapping) src_dataset = tf.data.Dataset.from_tensor_slices( tf.constant(["a a b b c", "a b b"])) if mode != tf.contrib.learn.ModeKeys.INFER: tgt_dataset = tf.data.Dataset.from_tensor_slices( tf.constant(["a b c b c", "a b c b"])) return (iterator_utils.get_iterator( src_dataset=src_dataset, tgt_dataset=tgt_dataset, src_vocab_table=src_vocab_table, tgt_vocab_table=tgt_vocab_table, batch_size=hparams.batch_size, sos=hparams.sos, eos=hparams.eos, source_reverse=hparams.source_reverse, random_seed=hparams.random_seed, num_buckets=hparams.num_buckets), src_vocab_table, tgt_vocab_table) else: return (iterator_utils.get_infer_iterator( src_dataset=src_dataset, src_vocab_table=src_vocab_table, eos=hparams.eos, source_reverse=hparams.source_reverse, batch_size=hparams.batch_size), src_vocab_table, tgt_vocab_table, reverse_tgt_vocab_table)
def testGetInferIterator(self): src_vocab_table = lookup_ops.index_table_from_tensor( tf.constant(["a", "b", "c", "eos", "sos"])) src_dataset = tf.data.Dataset.from_tensor_slices( tf.constant(["c c a", "c a", "d", "f e a g"])) hparams = tf.contrib.training.HParams(random_seed=3, source_reverse=False, eos="eos", sos="sos") batch_size = 2 src_max_len = 3 iterator = iterator_utils.get_infer_iterator( src_dataset=src_dataset, src_vocab_table=src_vocab_table, batch_size=batch_size, eos=hparams.eos, source_reverse=hparams.source_reverse, src_max_len=src_max_len) table_initializer = tf.tables_initializer() source = iterator.source seq_len = iterator.source_sequence_length self.assertEqual([None, None], source.shape.as_list()) self.assertEqual([None], seq_len.shape.as_list()) with self.test_session() as sess: sess.run(table_initializer) sess.run(iterator.initializer) (source_v, seq_len_v) = sess.run((source, seq_len)) self.assertAllEqual( [ [2, 2, 0], # c c a [2, 0, 3] ], # c a eos source_v) self.assertAllEqual([3, 2], seq_len_v) (source_v, seq_len_v) = sess.run((source, seq_len)) self.assertAllEqual( [ [-1, 3, 3], # "d" == unknown, eos eos [-1, -1, 0] ], # "f" == unknown, "e" == unknown, a source_v) self.assertAllEqual([1, 3], seq_len_v) with self.assertRaisesOpError("End of sequence"): sess.run((source, seq_len))
def create_infer_model(model_creator, hparams, scope=None, extra_args=None): """Create inference model.""" graph = tf.Graph() src_vocab_file = hparams.src_vocab_file tgt_vocab_file = hparams.tgt_vocab_file lbl_vocab_file = hparams.lbl_vocab_file with graph.as_default(), tf.container(scope or "infer"): src_vocab_table, tgt_vocab_table, lbl_vocab_table = \ vocab_utils.create_vocab_tables(src_vocab_file, tgt_vocab_file, lbl_vocab_file, hparams.share_vocab) reverse_tgt_vocab_table = lookup_ops.index_to_string_table_from_file( tgt_vocab_file, default_value=vocab_utils.UNK) reverse_lbl_vocab_table = lookup_ops.index_to_string_table_from_file( lbl_vocab_file, default_value=vocab_utils.UNK) src_placeholder = tf.placeholder(shape=[None], dtype=tf.string) batch_size_placeholder = tf.placeholder(shape=[], dtype=tf.int64) src_dataset = tf.data.Dataset.from_tensor_slices(src_placeholder) iterator = iterator_utils.get_infer_iterator( src_dataset, src_vocab_table, batch_size=batch_size_placeholder, eos=hparams.eos, src_max_len=hparams.src_max_len_infer) model = model_creator( hparams, iterator=iterator, mode=tf.contrib.learn.ModeKeys.INFER, source_vocab_table=src_vocab_table, target_vocab_table=tgt_vocab_table, label_vocab_table=lbl_vocab_table, reverse_target_vocab_table=reverse_tgt_vocab_table, reverse_target_intent_vocab_table=reverse_lbl_vocab_table, scope=scope, extra_args=extra_args) return InferModel(graph=graph, model=model, src_placeholder=src_placeholder, batch_size_placeholder=batch_size_placeholder, iterator=iterator)
def create_infer_model(model_creator, hparams): src1_vocab_file = "%s/%s" % (hparams.data_dir, hparams.word_vocab) src2_vocab_file = "%s/%s" % (hparams.data_dir, hparams.pos_vocab) tgt_vocab_file = "%s/%s" % (hparams.data_dir, hparams.role_vocab) graph = tf.Graph() with graph.as_default(), tf.container("infer"): src1_vocab_table, src2_vocab_table, tgt_vocab_table = create_vocab_tables( src1_vocab_file, src2_vocab_file, tgt_vocab_file) reverse_tgt_vocab_table = lookup_ops.index_to_string_table_from_file( tgt_vocab_file, default_value=data_utils._UNK) src1_placeholder = tf.placeholder(shape=[None], dtype=tf.string) src2_placeholder = tf.placeholder(shape=[None], dtype=tf.string) batch_size_placeholder = tf.placeholder(shape=[], dtype=tf.int64) src1_dataset = tf.contrib.data.Dataset.from_tensor_slices( src1_placeholder) src2_dataset = tf.contrib.data.Dataset.from_tensor_slices( src2_placeholder) iterator = iterator_utils.get_infer_iterator( src1_dataset, src2_dataset, src1_vocab_table, src2_vocab_table, batch_size=batch_size_placeholder) model = model_creator( hparams, iterator=iterator, mode=tf.contrib.learn.ModeKeys.INFER, src1_vocab_table=src1_vocab_table, src2_vocab_table=src2_vocab_table, tgt_vocab_table=tgt_vocab_table, ) return InferModel(graph=graph, model=model, src1_placeholder=src1_placeholder, src2_placeholder=src2_placeholder, batch_size_placeholder=batch_size_placeholder, iterator=iterator)