def testLSTMBlockCell(self): with self.session(use_gpu=True, graph=ops.Graph()) as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 2]) m0 = array_ops.zeros([1, 2]) m1 = array_ops.zeros([1, 2]) m2 = array_ops.zeros([1, 2]) m3 = array_ops.zeros([1, 2]) g, ((out_m0, out_m1), (out_m2, out_m3)) = rnn_cell.MultiRNNCell( [lstm_ops.LSTMBlockCell(2) for _ in range(2)], state_is_tuple=True)(x, ((m0, m1), (m2, m3))) sess.run([variables.global_variables_initializer()]) res = sess.run( [g, out_m0, out_m1, out_m2, out_m3], { x.name: np.array([[1., 1.]]), m0.name: 0.1 * np.ones([1, 2]), m1.name: 0.1 * np.ones([1, 2]), m2.name: 0.1 * np.ones([1, 2]), m3.name: 0.1 * np.ones([1, 2]) }) self.assertEqual(len(res), 5) self.assertAllClose(res[0], [[0.24024698, 0.24024698]]) # These numbers are from testBasicLSTMCell and only test c/h. self.assertAllClose(res[1], [[0.68967271, 0.68967271]]) self.assertAllClose(res[2], [[0.44848421, 0.44848421]]) self.assertAllClose(res[3], [[0.39897051, 0.39897051]]) self.assertAllClose(res[4], [[0.24024698, 0.24024698]])
def benchmarkTfRNNLSTMBlockCellTraining(self): test_configs = self._GetTestConfig() for config_name, config in test_configs.items(): num_layers = config["num_layers"] num_units = config["num_units"] batch_size = config["batch_size"] seq_length = config["seq_length"] with ops.Graph().as_default(), ops.device("/device:GPU:0"): inputs = array_ops.zeros([batch_size, seq_length, num_units], dtypes.float32) multi_cell = contrib_rnn.MultiRNNCell([ lstm_ops.LSTMBlockCell(num_units) for _ in range(num_layers) ]) outputs, final_state = rnn.dynamic_rnn(multi_cell, inputs, dtype=dtypes.float32) trainable_variables = ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES) gradients = gradients_impl.gradients([outputs, final_state], trainable_variables) training_op = control_flow_ops.group(*gradients) self._BenchmarkOp( training_op, "tf_rnn_lstm_block_cell %s %s" % (config_name, self._GetConfigDesc(config)))
def testLSTMBasicToBlockCell(self): with self.session(use_gpu=True) as sess: x = array_ops.zeros([1, 2]) x_values = np.random.randn(1, 2) m0_val = 0.1 * np.ones([1, 2]) m1_val = -0.1 * np.ones([1, 2]) m2_val = -0.2 * np.ones([1, 2]) m3_val = 0.2 * np.ones([1, 2]) initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=19890212) with variable_scope.variable_scope("basic", initializer=initializer): m0 = array_ops.zeros([1, 2]) m1 = array_ops.zeros([1, 2]) m2 = array_ops.zeros([1, 2]) m3 = array_ops.zeros([1, 2]) g, ((out_m0, out_m1), (out_m2, out_m3)) = rnn_cell.MultiRNNCell( [ rnn_cell.BasicLSTMCell(2, state_is_tuple=True) for _ in range(2) ], state_is_tuple=True)(x, ((m0, m1), (m2, m3))) sess.run([variables.global_variables_initializer()]) basic_res = sess.run( [g, out_m0, out_m1, out_m2, out_m3], { x.name: x_values, m0.name: m0_val, m1.name: m1_val, m2.name: m2_val, m3.name: m3_val }) with variable_scope.variable_scope("block", initializer=initializer): m0 = array_ops.zeros([1, 2]) m1 = array_ops.zeros([1, 2]) m2 = array_ops.zeros([1, 2]) m3 = array_ops.zeros([1, 2]) g, ((out_m0, out_m1), (out_m2, out_m3)) = rnn_cell.MultiRNNCell( [lstm_ops.LSTMBlockCell(2) for _ in range(2)], state_is_tuple=True)(x, ((m0, m1), (m2, m3))) sess.run([variables.global_variables_initializer()]) block_res = sess.run( [g, out_m0, out_m1, out_m2, out_m3], { x.name: x_values, m0.name: m0_val, m1.name: m1_val, m2.name: m2_val, m3.name: m3_val }) self.assertEqual(len(basic_res), len(block_res)) for basic, block in zip(basic_res, block_res): self.assertAllClose(basic, block)
def benchmarkLSTMBlockCellFpropWithDynamicRNN(self): print("BlockLSTMCell forward propagation via dynamic_rnn().") print("--------------------------------------------------------------") print("LSTMBlockCell Seconds per inference.") print("batch_size,cell_size,input_size,time_steps,use_gpu,wall_time") iters = 10 for config in benchmarking.dict_product({ "batch_size": [1, 8, 13, 32, 67, 128], "cell_size": [128, 250, 512, 650, 1024, 1350], "time_steps": [40], "use_gpu": [True, False], "dtype": ["float32", "float16"], }): dtype = dtypes.float32 if config[ "dtype"] == "float32" else dtypes.float16 with ops.Graph().as_default(): with benchmarking.device(use_gpu=config["use_gpu"]): inputs = variable_scope.get_variable( "x", dtype=dtype, shape=[ config["time_steps"], config["batch_size"], config["cell_size"] ]) cell = lstm_ops.LSTMBlockCell(config["cell_size"], dtype=dtype) outputs = rnn.dynamic_rnn(cell, inputs, time_major=True, dtype=dtype) init_op = variables.global_variables_initializer() with session.Session() as sess: sess.run(init_op) wall_time = benchmarking.seconds_per_run( outputs, sess, iters) # Print to stdout. If the TEST_REPORT_FILE_PREFIX environment variable # is set, this will produce a copy-paste-able CSV file. print(",".join( map(str, [ config["dtype"], config["batch_size"], config["cell_size"], config["cell_size"], config["time_steps"], config["use_gpu"], wall_time ]))) benchmark_name_template = "_".join([ "LSTMBlockCell_fprop", "DT_%(dtype)s", "BS%(batch_size)i", "CS%(cell_size)i", "IS%(cell_size)i", "TS%(time_steps)i", "gpu_%(use_gpu)s" ]) self.report_benchmark(name=benchmark_name_template % config, iters=iters, wall_time=wall_time, extras=config)
def testCompatibleNames(self): with self.session(use_gpu=True, graph=ops.Graph()): cell = rnn_cell.LSTMCell(10) pcell = rnn_cell.LSTMCell(10, use_peepholes=True) inputs = [array_ops.zeros([4, 5])] * 6 rnn.static_rnn(cell, inputs, dtype=dtypes.float32, scope="basic") rnn.static_rnn(pcell, inputs, dtype=dtypes.float32, scope="peephole") basic_names = { v.name: v.get_shape() for v in variables.trainable_variables() } with self.session(use_gpu=True, graph=ops.Graph()): cell = lstm_ops.LSTMBlockCell(10) pcell = lstm_ops.LSTMBlockCell(10, use_peephole=True) inputs = [array_ops.zeros([4, 5])] * 6 rnn.static_rnn(cell, inputs, dtype=dtypes.float32, scope="basic") rnn.static_rnn(pcell, inputs, dtype=dtypes.float32, scope="peephole") block_names = { v.name: v.get_shape() for v in variables.trainable_variables() } with self.session(use_gpu=True, graph=ops.Graph()): cell = lstm_ops.LSTMBlockFusedCell(10) pcell = lstm_ops.LSTMBlockFusedCell(10, use_peephole=True) inputs = array_ops.stack([array_ops.zeros([4, 5])] * 6) cell(inputs, dtype=dtypes.float32, scope="basic/lstm_cell") pcell(inputs, dtype=dtypes.float32, scope="peephole/lstm_cell") fused_names = { v.name: v.get_shape() for v in variables.trainable_variables() } self.assertEqual(basic_names, block_names) self.assertEqual(basic_names, fused_names)
def testNoneDimsWithDynamicRNN(self): with self.session(use_gpu=True, graph=ops.Graph()) as sess: batch_size = 4 num_steps = 5 input_dim = 6 cell_size = 7 cell = lstm_ops.LSTMBlockCell(cell_size) x = array_ops.placeholder(dtypes.float32, shape=(None, None, input_dim)) output, _ = rnn.dynamic_rnn(cell, x, time_major=True, dtype=dtypes.float32) sess.run(variables.global_variables_initializer()) feed = {} feed[x] = np.random.randn(num_steps, batch_size, input_dim) sess.run(output, feed)