def test_batch_size_two_fit_generator_test(self):
        """Tests the case when the batch size is two for fit_generator.

        The input text is specified in the above __init__ method.
        """

        config = word_id_example_queue_config_pb2.WordIdExampleQueueConfig()
        config.file_name = self._input_text_file.name
        config.batch_size = 2
        config.randomize_order = False
        config.fit_input_format = True

        word_id_example_queue = WordIdExampleQueue(config)

        expected_output_list = []
        expected_output_list.append(
            # inputs part.
            (
                np.array([[2, 10, 3, 8, 13, 9], [2, 11, 3, 8, 12, 9]]),
                # targets part.
                np.array([[10, 3, 8, 13, 9, 1], [11, 3, 8, 12, 9, 1]])))

        expected_output_list.append(
            # inputs part.
            (
                np.array([[2, 7, 5, 4, 3], [2, 7, 5, 4, 6]]),
                # targets part.
                np.array([[7, 5, 4, 3, 1], [7, 5, 4, 6, 1]])))

        actual_output_list = []
        for data in word_id_example_queue.dataset:
            actual_output_list.append(data)

        # The output contains two batches. 5 // 2 = 2.
        self.assertEqual(2, len(actual_output_list))

        # Each batch has two elements, since the format is (inputs, targets)
        self.assertEqual(2, len(actual_output_list[0]))

        # Checks whether the batch size is two.
        self.assertEqual(2, actual_output_list[0][0].shape[0])

        # Compares the contents of the actual outputs with the expected output.
        self.assertTrue(
            (expected_output_list[0][0] == actual_output_list[0][0].numpy()
             ).all())
        self.assertTrue(
            (expected_output_list[0][1] == actual_output_list[0][1].numpy()
             ).all())
        self.assertTrue(
            (expected_output_list[1][0] == actual_output_list[1][0].numpy()
             ).all())
        self.assertTrue(
            (expected_output_list[1][1] == actual_output_list[1][1].numpy()
             ).all())
    def test_batch_size_two_test(self):
        """Tests the case when the batch size is two.

        The input text is specified in the above __init__ method.
        """

        config = word_id_example_queue_config_pb2.WordIdExampleQueueConfig()
        config.file_name = self._input_text_file.name
        config.batch_size = 2
        config.randomize_order = False

        word_id_example_queue = WordIdExampleQueue(config)

        expected_output_list = []
        expected_output_list.append(
            np.array([
                [2, 10, 3, 8, 13, 9, 1],
                [2, 11, 3, 8, 12, 9, 1],
            ]))
        expected_output_list.append(
            np.array([
                [2, 7, 5, 4, 3, 1],
                [2, 7, 5, 4, 6, 1],
            ]))

        actual_output_list = []
        with tf.Session() as sess:
            sess.run(word_id_example_queue.initializer)
            while True:
                try:
                    actual_output_list.append(
                        sess.run(word_id_example_queue.get_batch()))

                except tf.errors.OutOfRangeError:
                    break

        # The output contains two batches. 5 // 2 = 2.
        self.assertEqual(2, len(actual_output_list))

        # Each batch is expected to contain two examples, because the batch
        # size is two.
        self.assertEqual(2, actual_output_list[0].shape[0])

        # Compares the contents of the actual outputs with the expected output.
        self.assertTrue(
            (expected_output_list[0] == actual_output_list[0]).all())
        self.assertTrue(
            (expected_output_list[1] == actual_output_list[1]).all())
    def test_batch_size_one_test(self):
        """Tests the case when the batch size is one.

        The input text is specified in the above __init__ method.
        """

        config = word_id_example_queue_config_pb2.WordIdExampleQueueConfig()
        config.file_name = self._input_text_file.name
        config.batch_size = 1
        config.randomize_order = False

        word_id_example_queue = WordIdExampleQueue(config)

        expected_output_list = []
        expected_output_list.append(np.array([2, 10, 3, 8, 13, 9, 1]))
        expected_output_list.append(np.array([2, 11, 3, 8, 12, 9, 1]))
        expected_output_list.append(np.array([2, 7, 5, 4, 3, 1]))
        expected_output_list.append(np.array([2, 7, 5, 4, 6, 1]))
        expected_output_list.append(np.array([2, 10, 6, 5, 4, 3, 1]))

        actual_output_list = []
        with tf.Session() as sess:
            sess.run(word_id_example_queue.initializer)
            while True:
                try:
                    actual_output_list.append(
                        sess.run(word_id_example_queue.get_batch()))

                except tf.errors.OutOfRangeError:
                    break

        # The output contains five batches.
        self.assertEqual(len(expected_output_list), len(actual_output_list))

        # Each batch is expected to contain one example, because the batch
        # size is one.
        self.assertEqual(1, actual_output_list[0].shape[0])

        index = 0
        for expected_output in expected_output_list:
            self.assertTrue(
                (expected_output == actual_output_list[index]).all())
            index += 1
    def test_batch_size_two_test(self):
        """Tests the case when the batch size is two.

        The input text is specified in the above __init__ method.
        """

        config = word_id_example_queue_config_pb2.WordIdExampleQueueConfig()
        config.file_name = self._input_text_file.name
        config.batch_size = 2
        config.randomize_order = False
        config.fit_input_format = False

        word_id_example_queue = WordIdExampleQueue(config)

        expected_output_list = []
        expected_output_list.append(
            np.array([
                [2, 10, 3, 8, 13, 9, 1],
                [2, 11, 3, 8, 12, 9, 1],
            ]))
        expected_output_list.append(
            np.array([
                [2, 7, 5, 4, 3, 1],
                [2, 7, 5, 4, 6, 1],
            ]))

        actual_output_list = []
        for data in word_id_example_queue.dataset:
            actual_output_list.append(data.numpy())

        # The output contains two batches. 5 // 2 = 2.
        self.assertEqual(2, len(actual_output_list))

        # Each batch is expected to contain two examples, because the batch
        # size is two.
        self.assertEqual(2, actual_output_list[0].shape[0])

        # Compares the contents of the actual outputs with the expected output.
        self.assertTrue(
            (expected_output_list[0] == actual_output_list[0]).all())
        self.assertTrue(
            (expected_output_list[1] == actual_output_list[1]).all())
    def test_batch_size_one_test(self):
        """Tests the case when the batch size is one.

        The input text is specified in the above __init__ method.
        """

        config = word_id_example_queue_config_pb2.WordIdExampleQueueConfig()
        config.file_name = self._input_text_file.name
        config.batch_size = 1
        config.randomize_order = False
        config.fit_input_format = False

        word_id_example_queue = WordIdExampleQueue(config)

        expected_output_list = []
        expected_output_list.append(np.array([2, 10, 3, 8, 13, 9, 1]))
        expected_output_list.append(np.array([2, 11, 3, 8, 12, 9, 1]))
        expected_output_list.append(np.array([2, 7, 5, 4, 3, 1]))
        expected_output_list.append(np.array([2, 7, 5, 4, 6, 1]))
        expected_output_list.append(np.array([2, 10, 6, 5, 4, 3, 1]))

        actual_output_list = []

        for data in word_id_example_queue.dataset:
            actual_output_list.append(data.numpy())

        # The output contains five batches.
        self.assertEqual(len(expected_output_list), len(actual_output_list))

        # Each batch is expected to contain one example, because the batch
        # size is one.
        self.assertEqual(1, actual_output_list[0].shape[0])

        index = 0
        for expected_output in expected_output_list:
            self.assertTrue(
                (expected_output == actual_output_list[index]).all())
            index += 1
Exemple #6
0
        #self._dropout_2 =  tf.keras.layers.Dropout(
        #    dropout_rate, noise_shape=(input_shape[0], 1, cell_size)),
        self._softmax = tf.keras.layers.Dense(
            target_size,
            activation="softmax",
        )

    def call(self, inputs, training=False):
        output = self._embedding(inputs)
        output = self._lstm_0(output)
        output = self._softmax(output)

        return (output)


config = word_id_example_queue_config_pb2.WordIdExampleQueueConfig()

# TODO TODO (The following needs to be fixed)
config.file_name = "/home/chanwcom/chanwcom_local_repository/star_organization/speech01/speech/speech/trainer/keras_lm_trainer/data/ptb.train.txt"
config.batch_size = 20
config.randomize_order = False

word_id_example_queue = WordIdExampleQueue(config)

#TODO TODO(chanw.com) Adds a routine to construct the example queue.

#target_size = 10002
target_size = 10010
cell_size = 256
dropout_rate = 0.5