コード例 #1
0
 def input_fn():
     vocab = MockVocab()
     input_pipeline = LmInputDataPipeline(vocab, batch_size=None)
     input_dataset = tf.data.Dataset.from_generator(input_generator,
                                                    output_types=tf.string)
     corpus = input_pipeline.load_data(input_dataset).repeat()
     corpus = input_pipeline.padded_batch(corpus, 3)
     return corpus
コード例 #2
0
def test_load_no_batching():
    def input_generator():
        yield ["a", "b", "c"]
        yield ["c", "b"]

    expected_output = [
        (
            {
                "inputs":
                np.array([[0.0, 1.0, 0.0, 0.0, 0.0, 0.0],
                          [0.0, 0.0, 0.0, 1.5, 2.5, 3.5],
                          [0.0, 0.0, 0.0, 4.5, 5.5, 6.5],
                          [0.0, 0.0, 0.0, 7.5, 8.5, 9.5]],
                         dtype=np.float32),
                "length":
                np.array(4, dtype=np.int32),
            },
            {
                "targets": np.array([4, 5, 6, 2], dtype=np.int32)
            },
        ),
        (
            {
                "inputs":
                np.array([
                    [0.0, 1.0, 0.0, 0.0, 0.0, 0.0],
                    [0.0, 0.0, 0.0, 7.5, 8.5, 9.5],
                    [0.0, 0.0, 0.0, 4.5, 5.5, 6.5],
                ],
                         dtype=np.float32),
                "length":
                np.array(3, dtype=np.int32),
            },
            {
                "targets": np.array([6, 5, 2], dtype=np.int32)
            },
        ),
    ]

    input_dataset = tf.data.Dataset.from_generator(input_generator,
                                                   output_types=tf.string)

    vocab = MockVocab()
    input_pipeline = LmInputDataPipeline(vocab, batch_size=None)
    input_data = input_pipeline.load_data(input_dataset)

    it = input_data.make_initializable_iterator()
    example = it.get_next()

    with tf.Session() as sess:
        sess.run(tf.tables_initializer())
        sess.run(it.initializer)
        #sess.run(tf.global_variables_initializer())
        for _, expected in enumerate(expected_output):
            actual = sess.run(example)
            assert actual[0]["inputs"] == approx(expected[0]["inputs"])
            assert actual[0]["length"] == approx(expected[0]["length"])
            assert actual[1]["targets"] == approx(expected[1]["targets"])
コード例 #3
0
 def create_input():
     simple_examples = SimpleExamplesCorpus()
     train_data = simple_examples.get_tokens_dataset(
         DatasetType.TRAIN).repeat().shuffle(1000, seed=0)
     input_pipe = LmInputDataPipeline(glove, 8)
     return input_pipe.load_data(train_data)
コード例 #4
0
 def input_fn():
     vocab = MockVocab()
     input_pipeline = LmInputDataPipeline(vocab, batch_size=3)
     input_dataset = tf.data.Dataset.from_generator(input_generator,
                                                    output_types=tf.string)
     return input_pipeline.load_data(input_dataset).repeat()
コード例 #5
0
 def create_input():
     simple_examples = SimpleExamplesCorpus()
     train_data = simple_examples.get_tokens_dataset(DatasetType.TRAIN)
     input_pipe = LmInputDataPipeline(glove, None)
     return input_pipe.load_data(train_data)