Exemple #1
0
 def testLSTMBlockCell(self):
     with self.session(use_gpu=True, graph=ops.Graph()) as sess:
         with variable_scope.variable_scope(
                 "root", initializer=init_ops.constant_initializer(0.5)):
             x = array_ops.zeros([1, 2])
             m0 = array_ops.zeros([1, 2])
             m1 = array_ops.zeros([1, 2])
             m2 = array_ops.zeros([1, 2])
             m3 = array_ops.zeros([1, 2])
             g, ((out_m0, out_m1),
                 (out_m2, out_m3)) = rnn_cell.MultiRNNCell(
                     [lstm_ops.LSTMBlockCell(2) for _ in range(2)],
                     state_is_tuple=True)(x, ((m0, m1), (m2, m3)))
             sess.run([variables.global_variables_initializer()])
             res = sess.run(
                 [g, out_m0, out_m1, out_m2, out_m3], {
                     x.name: np.array([[1., 1.]]),
                     m0.name: 0.1 * np.ones([1, 2]),
                     m1.name: 0.1 * np.ones([1, 2]),
                     m2.name: 0.1 * np.ones([1, 2]),
                     m3.name: 0.1 * np.ones([1, 2])
                 })
             self.assertEqual(len(res), 5)
             self.assertAllClose(res[0], [[0.24024698, 0.24024698]])
             # These numbers are from testBasicLSTMCell and only test c/h.
             self.assertAllClose(res[1], [[0.68967271, 0.68967271]])
             self.assertAllClose(res[2], [[0.44848421, 0.44848421]])
             self.assertAllClose(res[3], [[0.39897051, 0.39897051]])
             self.assertAllClose(res[4], [[0.24024698, 0.24024698]])
    def benchmarkTfRNNLSTMBlockCellTraining(self):
        test_configs = self._GetTestConfig()
        for config_name, config in test_configs.items():
            num_layers = config["num_layers"]
            num_units = config["num_units"]
            batch_size = config["batch_size"]
            seq_length = config["seq_length"]

            with ops.Graph().as_default(), ops.device("/device:GPU:0"):
                inputs = array_ops.zeros([batch_size, seq_length, num_units],
                                         dtypes.float32)

                multi_cell = contrib_rnn.MultiRNNCell([
                    lstm_ops.LSTMBlockCell(num_units)
                    for _ in range(num_layers)
                ])
                outputs, final_state = rnn.dynamic_rnn(multi_cell,
                                                       inputs,
                                                       dtype=dtypes.float32)
                trainable_variables = ops.get_collection(
                    ops.GraphKeys.TRAINABLE_VARIABLES)
                gradients = gradients_impl.gradients([outputs, final_state],
                                                     trainable_variables)
                training_op = control_flow_ops.group(*gradients)
                self._BenchmarkOp(
                    training_op, "tf_rnn_lstm_block_cell %s %s" %
                    (config_name, self._GetConfigDesc(config)))
Exemple #3
0
    def testLSTMBasicToBlockCell(self):
        with self.session(use_gpu=True) as sess:
            x = array_ops.zeros([1, 2])
            x_values = np.random.randn(1, 2)

            m0_val = 0.1 * np.ones([1, 2])
            m1_val = -0.1 * np.ones([1, 2])
            m2_val = -0.2 * np.ones([1, 2])
            m3_val = 0.2 * np.ones([1, 2])

            initializer = init_ops.random_uniform_initializer(-0.01,
                                                              0.01,
                                                              seed=19890212)
            with variable_scope.variable_scope("basic",
                                               initializer=initializer):
                m0 = array_ops.zeros([1, 2])
                m1 = array_ops.zeros([1, 2])
                m2 = array_ops.zeros([1, 2])
                m3 = array_ops.zeros([1, 2])
                g, ((out_m0, out_m1),
                    (out_m2, out_m3)) = rnn_cell.MultiRNNCell(
                        [
                            rnn_cell.BasicLSTMCell(2, state_is_tuple=True)
                            for _ in range(2)
                        ],
                        state_is_tuple=True)(x, ((m0, m1), (m2, m3)))
                sess.run([variables.global_variables_initializer()])
                basic_res = sess.run(
                    [g, out_m0, out_m1, out_m2, out_m3], {
                        x.name: x_values,
                        m0.name: m0_val,
                        m1.name: m1_val,
                        m2.name: m2_val,
                        m3.name: m3_val
                    })

            with variable_scope.variable_scope("block",
                                               initializer=initializer):
                m0 = array_ops.zeros([1, 2])
                m1 = array_ops.zeros([1, 2])
                m2 = array_ops.zeros([1, 2])
                m3 = array_ops.zeros([1, 2])
                g, ((out_m0, out_m1),
                    (out_m2, out_m3)) = rnn_cell.MultiRNNCell(
                        [lstm_ops.LSTMBlockCell(2) for _ in range(2)],
                        state_is_tuple=True)(x, ((m0, m1), (m2, m3)))
                sess.run([variables.global_variables_initializer()])
                block_res = sess.run(
                    [g, out_m0, out_m1, out_m2, out_m3], {
                        x.name: x_values,
                        m0.name: m0_val,
                        m1.name: m1_val,
                        m2.name: m2_val,
                        m3.name: m3_val
                    })

            self.assertEqual(len(basic_res), len(block_res))
            for basic, block in zip(basic_res, block_res):
                self.assertAllClose(basic, block)
Exemple #4
0
    def benchmarkLSTMBlockCellFpropWithDynamicRNN(self):
        print("BlockLSTMCell forward propagation via dynamic_rnn().")
        print("--------------------------------------------------------------")
        print("LSTMBlockCell Seconds per inference.")
        print("batch_size,cell_size,input_size,time_steps,use_gpu,wall_time")
        iters = 10
        for config in benchmarking.dict_product({
                "batch_size": [1, 8, 13, 32, 67, 128],
                "cell_size": [128, 250, 512, 650, 1024, 1350],
                "time_steps": [40],
                "use_gpu": [True, False],
                "dtype": ["float32", "float16"],
        }):
            dtype = dtypes.float32 if config[
                "dtype"] == "float32" else dtypes.float16
            with ops.Graph().as_default():
                with benchmarking.device(use_gpu=config["use_gpu"]):
                    inputs = variable_scope.get_variable(
                        "x",
                        dtype=dtype,
                        shape=[
                            config["time_steps"], config["batch_size"],
                            config["cell_size"]
                        ])
                    cell = lstm_ops.LSTMBlockCell(config["cell_size"],
                                                  dtype=dtype)
                    outputs = rnn.dynamic_rnn(cell,
                                              inputs,
                                              time_major=True,
                                              dtype=dtype)
                    init_op = variables.global_variables_initializer()

                with session.Session() as sess:
                    sess.run(init_op)
                    wall_time = benchmarking.seconds_per_run(
                        outputs, sess, iters)

                # Print to stdout. If the TEST_REPORT_FILE_PREFIX environment variable
                # is set, this will produce a copy-paste-able CSV file.
                print(",".join(
                    map(str, [
                        config["dtype"], config["batch_size"],
                        config["cell_size"], config["cell_size"],
                        config["time_steps"], config["use_gpu"], wall_time
                    ])))
                benchmark_name_template = "_".join([
                    "LSTMBlockCell_fprop", "DT_%(dtype)s", "BS%(batch_size)i",
                    "CS%(cell_size)i", "IS%(cell_size)i", "TS%(time_steps)i",
                    "gpu_%(use_gpu)s"
                ])

                self.report_benchmark(name=benchmark_name_template % config,
                                      iters=iters,
                                      wall_time=wall_time,
                                      extras=config)
Exemple #5
0
    def testCompatibleNames(self):
        with self.session(use_gpu=True, graph=ops.Graph()):
            cell = rnn_cell.LSTMCell(10)
            pcell = rnn_cell.LSTMCell(10, use_peepholes=True)
            inputs = [array_ops.zeros([4, 5])] * 6
            rnn.static_rnn(cell, inputs, dtype=dtypes.float32, scope="basic")
            rnn.static_rnn(pcell,
                           inputs,
                           dtype=dtypes.float32,
                           scope="peephole")
            basic_names = {
                v.name: v.get_shape()
                for v in variables.trainable_variables()
            }

        with self.session(use_gpu=True, graph=ops.Graph()):
            cell = lstm_ops.LSTMBlockCell(10)
            pcell = lstm_ops.LSTMBlockCell(10, use_peephole=True)
            inputs = [array_ops.zeros([4, 5])] * 6
            rnn.static_rnn(cell, inputs, dtype=dtypes.float32, scope="basic")
            rnn.static_rnn(pcell,
                           inputs,
                           dtype=dtypes.float32,
                           scope="peephole")
            block_names = {
                v.name: v.get_shape()
                for v in variables.trainable_variables()
            }

        with self.session(use_gpu=True, graph=ops.Graph()):
            cell = lstm_ops.LSTMBlockFusedCell(10)
            pcell = lstm_ops.LSTMBlockFusedCell(10, use_peephole=True)
            inputs = array_ops.stack([array_ops.zeros([4, 5])] * 6)
            cell(inputs, dtype=dtypes.float32, scope="basic/lstm_cell")
            pcell(inputs, dtype=dtypes.float32, scope="peephole/lstm_cell")
            fused_names = {
                v.name: v.get_shape()
                for v in variables.trainable_variables()
            }

        self.assertEqual(basic_names, block_names)
        self.assertEqual(basic_names, fused_names)
Exemple #6
0
    def testNoneDimsWithDynamicRNN(self):
        with self.session(use_gpu=True, graph=ops.Graph()) as sess:
            batch_size = 4
            num_steps = 5
            input_dim = 6
            cell_size = 7

            cell = lstm_ops.LSTMBlockCell(cell_size)
            x = array_ops.placeholder(dtypes.float32,
                                      shape=(None, None, input_dim))

            output, _ = rnn.dynamic_rnn(cell,
                                        x,
                                        time_major=True,
                                        dtype=dtypes.float32)
            sess.run(variables.global_variables_initializer())
            feed = {}
            feed[x] = np.random.randn(num_steps, batch_size, input_dim)
            sess.run(output, feed)