def testTrainWithLocalVariable(self): with ops.Graph().as_default(): random_seed.set_random_seed(0) tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) local_multiplier = variables_lib.local_variable(1.0) tf_predictions = logistic_classifier(tf_inputs) * local_multiplier losses.log_loss(tf_labels, tf_predictions) total_loss = losses.get_total_loss() optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=1.0) train_op = training.create_train_op(total_loss, optimizer) loss = training.train( train_op, None, hooks=[basic_session_run_hooks.StopAtStepHook(num_steps=300)], save_summaries_steps=None, save_checkpoint_secs=None) self.assertIsNotNone(loss) self.assertLess(loss, .015)
def create_train_op(self, learning_rate=1.0, gradient_multiplier=1.0): tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = logistic_classifier(tf_inputs) losses.log_loss(tf_labels, tf_predictions) total_loss = losses.get_total_loss() optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=learning_rate) def transform_grads_fn(grads): if gradient_multiplier != 1.0: variables = variables_lib2.trainable_variables() gradient_multipliers = { var: gradient_multiplier for var in variables } with ops.name_scope('multiply_grads'): return training.multiply_gradients(grads, gradient_multipliers) else: return grads return training.create_train_op(total_loss, optimizer, transform_grads_fn=transform_grads_fn)
def testResumeTrainAchievesRoughlyTheSameLoss(self): number_of_steps = [300, 1, 5] logdir = os.path.join(self.get_temp_dir(), 'resume_train_same_loss') for i in range(len(number_of_steps)): with ops.Graph().as_default(): random_seed.set_random_seed(i) tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = logistic_classifier(tf_inputs) losses.log_loss(tf_labels, tf_predictions) total_loss = losses.get_total_loss() optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0) train_op = training.create_train_op(total_loss, optimizer) saver = saver_lib.Saver() loss = training.train( train_op, logdir, hooks=[ basic_session_run_hooks.StopAtStepHook( num_steps=number_of_steps[i]), basic_session_run_hooks.CheckpointSaverHook( logdir, save_steps=50, saver=saver), ], save_checkpoint_secs=None, save_summaries_steps=None) self.assertIsNotNone(loss) self.assertLess(loss, .015)
def ModelLoss(self): tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = logistic_classifier(tf_inputs) losses.log_loss(tf_labels, tf_predictions) return losses.get_total_loss()
def testResumeTrainAchievesRoughlyTheSameLoss(self): number_of_steps = [300, 1, 5] logdir = os.path.join(self.get_temp_dir(), 'resume_train_same_loss') for i in range(len(number_of_steps)): with ops.Graph().as_default(): random_seed.set_random_seed(i) tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = logistic_classifier(tf_inputs) losses.log_loss(tf_labels, tf_predictions) total_loss = losses.get_total_loss() optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0) train_op = training.create_train_op(total_loss, optimizer) saver = saver_lib.Saver() loss = training.train( train_op, logdir, hooks=[ basic_session_run_hooks.StopAtStepHook( num_steps=number_of_steps[i]), basic_session_run_hooks.CheckpointSaverHook( logdir, save_steps=50, saver=saver), ], save_checkpoint_secs=None, save_summaries_steps=None) self.assertIsNotNone(loss) self.assertLess(loss, .015)
def ModelLoss(self): tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = logistic_classifier(tf_inputs) losses.log_loss(tf_labels, tf_predictions) return losses.get_total_loss()
def _train_model(self, checkpoint_dir, num_steps): """Trains a simple classification model. Note that the data has been configured such that after around 300 steps, the model has memorized the dataset (e.g. we can expect %100 accuracy). Args: checkpoint_dir: The directory where the checkpoint is written to. num_steps: The number of steps to train for. """ with ops.Graph().as_default(): random_seed.set_random_seed(0) tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = logistic_classifier(tf_inputs) loss_op = losses.log_loss(labels=tf_labels, predictions=tf_predictions) optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0) train_op = optimizer.minimize(loss_op, training.get_or_create_global_step()) with monitored_session.MonitoredTrainingSession( checkpoint_dir=checkpoint_dir, hooks=[basic_session_run_hooks.StopAtStepHook(num_steps)]) as session: loss = None while not session.should_stop(): _, loss = session.run([train_op, loss_op]) if num_steps >= 300: assert loss < .015
def testGlobalStepNotIncrementedWhenSetToNone(self): with ops.Graph().as_default(): random_seed.set_random_seed(0) tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = batchnorm_classifier(tf_inputs) loss = losses.log_loss(tf_labels, tf_predictions) optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=1.0) train_op = training.create_train_op(loss, optimizer, global_step=None) global_step = variables_lib.get_or_create_global_step() with self.test_session() as session: # Initialize all variables session.run(variables_lib2.global_variables_initializer()) for _ in range(10): session.run(train_op) # Since train_op don't use global_step it shouldn't change. self.assertAllClose(global_step.eval(), 0)
def testEmptyUpdateOps(self): with ops.Graph().as_default(): random_seed.set_random_seed(0) tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = batchnorm_classifier(tf_inputs) loss = losses.log_loss(tf_labels, tf_predictions) optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=1.0) train_op = training.create_train_op(loss, optimizer, update_ops=[]) moving_mean = variables_lib.get_variables_by_name('moving_mean')[0] moving_variance = variables_lib.get_variables_by_name( 'moving_variance')[0] with self.test_session() as session: # Initialize all variables session.run(variables_lib2.global_variables_initializer()) mean, variance = session.run([moving_mean, moving_variance]) # After initialization moving_mean == 0 and moving_variance == 1. self.assertAllClose(mean, [0] * 4) self.assertAllClose(variance, [1] * 4) for _ in range(10): session.run(train_op) mean = moving_mean.eval() variance = moving_variance.eval() # Since we skip update_ops the moving_vars are not updated. self.assertAllClose(mean, [0] * 4) self.assertAllClose(variance, [1] * 4)
def testEmptyUpdateOps(self): with ops.Graph().as_default(): random_seed.set_random_seed(0) tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = batchnorm_classifier(tf_inputs) loss = losses.log_loss(tf_labels, tf_predictions) optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0) train_op = training.create_train_op(loss, optimizer, update_ops=[]) moving_mean = variables_lib.get_variables_by_name('moving_mean')[0] moving_variance = variables_lib.get_variables_by_name('moving_variance')[ 0] with self.test_session() as session: # Initialize all variables session.run(variables_lib2.global_variables_initializer()) mean, variance = session.run([moving_mean, moving_variance]) # After initialization moving_mean == 0 and moving_variance == 1. self.assertAllClose(mean, [0] * 4) self.assertAllClose(variance, [1] * 4) for _ in range(10): session.run(train_op) mean = moving_mean.eval() variance = moving_variance.eval() # Since we skip update_ops the moving_vars are not updated. self.assertAllClose(mean, [0] * 4) self.assertAllClose(variance, [1] * 4)
def testGlobalStepIsIncrementedByDefault(self): with ops.Graph().as_default(): random_seed.set_random_seed(0) tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = batchnorm_classifier(tf_inputs) loss = losses.log_loss(tf_labels, tf_predictions) optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=1.0) train_op = training.create_train_op(loss, optimizer) global_step = variables_lib.get_or_create_global_step() with self.cached_session() as session: # Initialize all variables session.run(variables_lib2.global_variables_initializer()) for _ in range(10): session.run(train_op) # After 10 updates global_step should be 10. self.assertAllClose(global_step.eval(), 10)
def testCanAchieveZeroLoss(self): with ops.Graph().as_default(): random_seed.set_random_seed(0) tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = logistic_classifier(tf_inputs) losses.log_loss(tf_labels, tf_predictions) total_loss = losses.get_total_loss() optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0) train_op = training.create_train_op(total_loss, optimizer) loss = training.train( train_op, None, hooks=[basic_session_run_hooks.StopAtStepHook(num_steps=300)], save_summaries_steps=None, save_checkpoint_secs=None) self.assertIsNotNone(loss) self.assertLess(loss, .015)
def testTrainWithNoInitAssignCanAchieveZeroLoss(self): with ops.Graph().as_default(): random_seed.set_random_seed(0) tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = batchnorm_classifier(tf_inputs) losses.log_loss(tf_labels, tf_predictions) total_loss = losses.get_total_loss() optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0) train_op = training.create_train_op(total_loss, optimizer) loss = training.train( train_op, None, hooks=[basic_session_run_hooks.StopAtStepHook(num_steps=300)], save_summaries_steps=None, save_checkpoint_secs=None) self.assertLess(loss, .1)
def testTrainOpInCollection(self): with ops.Graph().as_default(): tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = batchnorm_classifier(tf_inputs) loss = losses.log_loss(tf_labels, tf_predictions) optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0) train_op = training.create_train_op(loss, optimizer) # Make sure the training op was recorded in the proper collection self.assertTrue(train_op in ops.get_collection(ops.GraphKeys.TRAIN_OP))
def testTrainOpInCollection(self): with ops.Graph().as_default(): tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = batchnorm_classifier(tf_inputs) loss = losses.log_loss(tf_labels, tf_predictions) optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0) train_op = training.create_train_op(loss, optimizer) # Make sure the training op was recorded in the proper collection self.assertTrue(train_op in ops.get_collection(ops.GraphKeys.TRAIN_OP))
def create_train_op(self, learning_rate=1.0, gradient_multiplier=1.0): tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = logistic_classifier(tf_inputs) losses.log_loss(tf_labels, tf_predictions) total_loss = losses.get_total_loss() optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=learning_rate) def transform_grads_fn(grads): if gradient_multiplier != 1.0: variables = variables_lib2.trainable_variables() gradient_multipliers = {var: gradient_multiplier for var in variables} with ops.name_scope('multiply_grads'): return training.multiply_gradients(grads, gradient_multipliers) else: return grads return training.create_train_op( total_loss, optimizer, transform_grads_fn=transform_grads_fn)
def testGlobalStepIsIncrementedByDefault(self): with ops.Graph().as_default(): random_seed.set_random_seed(0) tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = batchnorm_classifier(tf_inputs) loss = losses.log_loss(tf_labels, tf_predictions) optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0) train_op = training.create_train_op(loss, optimizer) global_step = variables_lib.get_or_create_global_step() with self.cached_session() as session: # Initialize all variables session.run(variables_lib2.global_variables_initializer()) for _ in range(10): session.run(train_op) # After 10 updates global_step should be 10. self.assertAllClose(global_step.eval(), 10)
def testUseUpdateOps(self): with ops.Graph().as_default(): random_seed.set_random_seed(0) tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) expected_mean = np.mean(self._inputs, axis=(0)) expected_var = np.var(self._inputs, axis=(0)) tf_predictions = batchnorm_classifier(tf_inputs) loss = losses.log_loss(tf_labels, tf_predictions) optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=1.0) train_op = training.create_train_op(loss, optimizer) moving_mean = variables_lib.get_variables_by_name('moving_mean')[0] moving_variance = variables_lib.get_variables_by_name( 'moving_variance')[0] with self.cached_session() as session: # Initialize all variables session.run(variables_lib2.global_variables_initializer()) mean, variance = session.run([moving_mean, moving_variance]) # After initialization moving_mean == 0 and moving_variance == 1. self.assertAllClose(mean, [0] * 4) self.assertAllClose(variance, [1] * 4) for _ in range(10): session.run(train_op) mean = moving_mean.eval() variance = moving_variance.eval() # After 10 updates with decay 0.1 moving_mean == expected_mean and # moving_variance == expected_var. self.assertAllClose(mean, expected_mean) self.assertAllClose(variance, expected_var)
def testGlobalStepNotIncrementedWhenSetToNone(self): with ops.Graph().as_default(): random_seed.set_random_seed(0) tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = batchnorm_classifier(tf_inputs) loss = losses.log_loss(tf_labels, tf_predictions) optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0) train_op = training.create_train_op(loss, optimizer, global_step=None) global_step = variables_lib.get_or_create_global_step() with self.test_session() as session: # Initialize all variables session.run(variables_lib2.global_variables_initializer()) for _ in range(10): session.run(train_op) # Since train_op don't use global_step it shouldn't change. self.assertAllClose(global_step.eval(), 0)
def testTrainingLoop(self): random_seed.set_random_seed(1) # Model with ops.device("/device:IPU:0"): with variable_scope.variable_scope("vs", use_resource=True): x = array_ops.placeholder(np.float32, [4, 1, 4], name="a") l = array_ops.placeholder(np.float32, [4, 1, 1], name="b") y = layers.dense(x, 1, activation=nn.sigmoid) loss = losses.log_loss(l, y) train_op = gradient_descent.GradientDescentOptimizer(0.1) \ .minimize(loss) init = variables.global_variables_initializer() # Test data image_data = [[[1, 1, 1, 1]], [[2, 2, 2, 2]], [[3, 3, 3, 3]], [[4, 4, 4, 4]]] label_data = [[[1]], [[2]], [[3]], [[4]]] # Run training. with ms.MonitoredTrainingSession(is_chief=True, chief_only_hooks=None, save_summaries_steps=None, save_summaries_secs=None) as sess: sess.run(init) previous_loss = float("inf") for _ in range(5): measured_loss, _ = sess.run([loss, train_op], feed_dict={ x: image_data, l: label_data }) self.assertTrue(measured_loss < previous_loss) previous_loss = measured_loss
def _train_model(self, checkpoint_dir, num_steps): """Trains a simple classification model. Note that the data has been configured such that after around 300 steps, the model has memorized the dataset (e.g. we can expect %100 accuracy). Args: checkpoint_dir: The directory where the checkpoint is written to. num_steps: The number of steps to train for. """ with ops.Graph().as_default(): random_seed.set_random_seed(0) tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) tf_predictions = logistic_classifier(tf_inputs) loss_op = losses.log_loss(labels=tf_labels, predictions=tf_predictions) optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=1.0) train_op = optimizer.minimize(loss_op, training.get_or_create_global_step()) with monitored_session.MonitoredTrainingSession( checkpoint_dir=checkpoint_dir, hooks=[basic_session_run_hooks.StopAtStepHook(num_steps) ]) as session: loss = None while not session.should_stop(): _, loss = session.run([train_op, loss_op]) if num_steps >= 300: assert loss < .015
def testUseUpdateOps(self): with ops.Graph().as_default(): random_seed.set_random_seed(0) tf_inputs = constant_op.constant(self._inputs, dtype=dtypes.float32) tf_labels = constant_op.constant(self._labels, dtype=dtypes.float32) expected_mean = np.mean(self._inputs, axis=(0)) expected_var = np.var(self._inputs, axis=(0)) tf_predictions = batchnorm_classifier(tf_inputs) loss = losses.log_loss(tf_labels, tf_predictions) optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1.0) train_op = training.create_train_op(loss, optimizer) moving_mean = variables_lib.get_variables_by_name('moving_mean')[0] moving_variance = variables_lib.get_variables_by_name('moving_variance')[ 0] with self.cached_session() as session: # Initialize all variables session.run(variables_lib2.global_variables_initializer()) mean, variance = session.run([moving_mean, moving_variance]) # After initialization moving_mean == 0 and moving_variance == 1. self.assertAllClose(mean, [0] * 4) self.assertAllClose(variance, [1] * 4) for _ in range(10): session.run(train_op) mean = moving_mean.eval() variance = moving_variance.eval() # After 10 updates with decay 0.1 moving_mean == expected_mean and # moving_variance == expected_var. self.assertAllClose(mean, expected_mean) self.assertAllClose(variance, expected_var)
def _testCudnnCompatibleRnnCells(self, num_layers, seq_length, num_units, input_size, batch_size, rnn_mode, use_block_cell): has_state_c = rnn_mode == cudnn_rnn_ops.CUDNN_LSTM np.random.seed(0) # Train graph with ops.Graph().as_default(): random_seed.set_random_seed(299) input_data = array_ops.placeholder( dtypes.float32, shape=[seq_length, batch_size, input_size]) output_tuple, cudnn_model, cudnn_params = self._build_forward_cudnn_model( rnn_mode, num_layers, num_units, input_data, is_training=True) target_output = array_ops.placeholder(dtype=dtypes.float32, shape=None) total_sum = sum(map(math_ops.reduce_sum, output_tuple)) loss_op = losses.log_loss(labels=target_output, predictions=total_sum) optimizer = gradient_descent.GradientDescentOptimizer( learning_rate=1e-2) train_op = optimizer.minimize(loss_op) saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2) # Train Cudnn model with self.test_session(use_gpu=True, graph=ops.get_default_graph()) as sess: sess.run(variables.global_variables_initializer()) # Train 128 steps num_steps = 128 for _ in range(num_steps): inputs = np.random.rand(seq_length, batch_size, input_size).astype(np.float32) targets = np.random.rand() sess.run(train_op, feed_dict={ input_data: inputs, target_output: targets }) save_path = os.path.join(self.get_temp_dir(), ("cudnn-rnn-%s-test" % rnn_mode)) save_v = saver.save(sess, save_path) self.assertEqual(save_path, save_v) cudnn_params_v = sess.run(cudnn_params) # cuDNN inference graph with ops.Graph().as_default(): random_seed.set_random_seed(299) cudnn_inputs = array_ops.placeholder( dtypes.float32, shape=[seq_length, batch_size, input_size]) (cudnn_output_tuple, cudnn_model, cudnn_params) = self._build_forward_cudnn_model(rnn_mode, num_layers, num_units, cudnn_inputs, is_training=False) saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2) inference_input = np.random.rand(seq_length, batch_size, input_size).astype(np.float32) with self.test_session(use_gpu=True, graph=ops.get_default_graph()) as sess: sess.run(variables.global_variables_initializer()) saver.restore(sess, save_path) restored_cudnn_params_v = sess.run(cudnn_params) self.assertAllEqual(cudnn_params_v, restored_cudnn_params_v) # Cudnn inference cudnn_output = sess.run( cudnn_output_tuple, feed_dict={cudnn_inputs: inference_input}) # Canonical RNN inference graph with ops.Graph().as_default(): random_seed.set_random_seed(299) cell_inputs = array_ops.placeholder( dtypes.float32, shape=[seq_length, batch_size, input_size]) (output, states) = _create_cudnn_compatible_canonical_rnn( cudnn_model, cell_inputs, use_block_cell) saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2) with self.test_session(use_gpu=True, graph=ops.get_default_graph()) as sess: saver.restore(sess, save_path) # BlockCell inference output_v, states_v = sess.run( [output, states], feed_dict={cell_inputs: inference_input}) # output across timestamps are packed into one tensor. self.assertAllClose(cudnn_output[0], output_v, atol=1e-6, rtol=1e-6) for i in range(num_layers): if has_state_c: # output_h self.assertAllClose(cudnn_output[1][i, :], states_v[i].h, atol=1e-6, rtol=1e-6) # output_c self.assertAllClose(cudnn_output[2][i, :], states_v[i].c, atol=1e-6, rtol=1e-6) else: self.assertAllClose(cudnn_output[1][i, :], states_v[i], atol=1e-6, rtol=1e-6)
def _TestCudnnCompatibleRnnCells(self, num_layers, seq_length, num_units, input_size, batch_size, rnn_mode, direction): dtype = dtypes.float32 # Train graph with ops.Graph().as_default() as g: model = CudnnTestModel( rnn_mode, num_layers, num_units, input_size, direction=direction, dtype=dtype, training=True) target_output = array_ops.placeholder(dtype=dtype) loss_op = losses.log_loss( labels=target_output, predictions=model.total_sum) optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1e-2) train_op = optimizer.minimize(loss_op) saver = saver_lib.Saver() # Train Cudnn model seed = 0 with self.test_session(use_gpu=True, graph=g) as sess: sess.run(variables.global_variables_initializer()) # Train 128 steps num_steps = 128 for _ in range(num_steps): inputs, _ = model.SynthesizeInput(seq_length, batch_size, seed) targets = np.random.rand() sess.run( train_op, feed_dict={ model.inputs: inputs, model.initial_state: model.ZeroState(batch_size), target_output: targets }) seed += 1 save_path = os.path.join(self.get_temp_dir(), ("cudnn-rnn-%s-test" % rnn_mode)) save_v = saver.save(sess, save_path) self.assertEqual(save_path, save_v) # Cudnn inference graph with ops.Graph().as_default() as g: model = CudnnTestModel( rnn_mode, num_layers, num_units, input_size, direction=direction, dtype=dtype, training=False) rnn = model.rnn saver = saver_lib.Saver() inference_input = np.random.rand(seq_length, batch_size, input_size).astype(np.float32) with self.test_session(use_gpu=True, graph=g) as sess: sess.run(variables.global_variables_initializer()) saver.restore(sess, save_path) # Cudnn inference cudnn_outputs_v, cudnn_output_states_v = model.Feed( sess, inference_input, return_sum=False) # Canonical RNN inference graph with ops.Graph().as_default() as g: cell_inputs = array_ops.placeholder( dtype, shape=[seq_length, batch_size, input_size]) if direction == CUDNN_RNN_UNIDIRECTION: # outputs is one tensor, states are num_layer tuples, each 2 tensors (outputs, states) = _CreateCudnnCompatibleCanonicalRNN(rnn, cell_inputs) if rnn_mode == CUDNN_LSTM: output_h = array_ops.stack([s.h for s in states]) output_c = array_ops.stack([s.c for s in states]) else: output_state = array_ops.stack([s for s in states]) else: # outputs is one tensor. # states is a tuple of 2 tuples: # each sub tuple is num_layer tuples, each with 2 tensors. (outputs, states) = _CreateCudnnCompatibleCanonicalRNN( rnn, cell_inputs, is_bidi=True) output_state_fw, output_state_bw = states if rnn_mode == CUDNN_LSTM: output_h, output_c = [], [] for s_fw, s_bw in zip(output_state_fw, output_state_bw): output_h.append(array_ops.stack([s_fw.h, s_bw.h])) output_c.append(array_ops.stack([s_fw.c, s_bw.c])) output_h = array_ops.concat(output_h, axis=0) output_c = array_ops.concat(output_c, axis=0) else: output_state = [] for s_fw, s_bw in zip(output_state_fw, output_state_bw): output_state.append(array_ops.stack([s_fw, s_bw])) output_state = array_ops.concat(output_state, axis=0) saver = saver_lib.Saver() with self.test_session(use_gpu=True, graph=g) as sess: saver.restore(sess, save_path) # BlockCell inference if rnn_mode == CUDNN_LSTM: outputs_v, output_h_v, output_c_v = sess.run( [outputs, output_h, output_c], feed_dict={cell_inputs: inference_input}) self.assertAllClose(cudnn_outputs_v, outputs_v) cudnn_output_h_v, cudnn_output_c_v = cudnn_output_states_v self.assertAllClose(cudnn_output_h_v, output_h_v) self.assertAllClose(cudnn_output_c_v, output_c_v) else: outputs_v, output_state_v = sess.run( [outputs, output_state], feed_dict={cell_inputs: inference_input}) self.assertAllClose(cudnn_outputs_v, outputs_v, atol=2e-5, rtol=2e-5) (cudnn_output_h_v,) = cudnn_output_states_v self.assertAllClose(cudnn_output_h_v, output_state_v, atol=2e-5, rtol=2e-5)
def _testCudnnCompatibleRnnCells(self, num_layers, seq_length, num_units, input_size, batch_size, rnn_mode): has_state_c = rnn_mode == cudnn_rnn_ops.CUDNN_LSTM np.random.seed(0) # Train graph with ops.Graph().as_default(): random_seed.set_random_seed(299) input_data = array_ops.placeholder( dtypes.float32, shape=[seq_length, batch_size, input_size]) output_tuple, cudnn_model, cudnn_params = _BuildCudnnForward( rnn_mode, num_layers, num_units, input_data, is_training=True) target_output = array_ops.placeholder(dtype=dtypes.float32, shape=None) total_sum = sum(map(math_ops.reduce_sum, output_tuple)) loss_op = losses.log_loss(labels=target_output, predictions=total_sum) optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1e-2) train_op = optimizer.minimize(loss_op) saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2) # Train Cudnn model with self.test_session( use_gpu=True, graph=ops.get_default_graph()) as sess: sess.run(variables.global_variables_initializer()) # Train 128 steps num_steps = 128 for _ in range(num_steps): inputs = np.random.rand(seq_length, batch_size, input_size).astype(np.float32) targets = np.random.rand() sess.run( train_op, feed_dict={input_data: inputs, target_output: targets}) save_path = os.path.join(self.get_temp_dir(), ("cudnn-rnn-%s-test" % rnn_mode)) save_v = saver.save(sess, save_path) self.assertEqual(save_path, save_v) cudnn_params_v = sess.run(cudnn_params) # cuDNN inference graph with ops.Graph().as_default(): random_seed.set_random_seed(299) cudnn_inputs = array_ops.placeholder( dtypes.float32, shape=[seq_length, batch_size, input_size]) (cudnn_output_tuple, cudnn_model, cudnn_params) = _BuildCudnnForward( rnn_mode, num_layers, num_units, cudnn_inputs, is_training=False) saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2) inference_input = np.random.rand(seq_length, batch_size, input_size).astype(np.float32) with self.test_session( use_gpu=True, graph=ops.get_default_graph()) as sess: sess.run(variables.global_variables_initializer()) saver.restore(sess, save_path) restored_cudnn_params_v = sess.run(cudnn_params) self.assertAllEqual(cudnn_params_v, restored_cudnn_params_v) # Cudnn inference cudnn_output = sess.run( cudnn_output_tuple, feed_dict={cudnn_inputs: inference_input}) # Canonical RNN inference graph with ops.Graph().as_default(): random_seed.set_random_seed(299) cell_inputs = array_ops.placeholder( dtypes.float32, shape=[seq_length, batch_size, input_size]) (output, states) = _CreateCudnnCompatibleCanonicalRNN( cudnn_model, cell_inputs) saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2) with self.test_session( use_gpu=True, graph=ops.get_default_graph()) as sess: saver.restore(sess, save_path) # BlockCell inference output_v, states_v = sess.run( [output, states], feed_dict={cell_inputs: inference_input}) # output across timestamps are packed into one tensor. self.assertAllClose(cudnn_output[0], output_v, atol=1e-6, rtol=1e-6) for i in range(num_layers): if has_state_c: # output_h self.assertAllClose( cudnn_output[1][i, :], states_v[i].h, atol=1e-6, rtol=1e-6) # output_c self.assertAllClose( cudnn_output[2][i, :], states_v[i].c, atol=1e-6, rtol=1e-6) else: self.assertAllClose( cudnn_output[1][i, :], states_v[i], atol=1e-6, rtol=1e-6)
def _TestCudnnCompatibleRnnCells(self, num_layers, seq_length, num_units, input_size, batch_size, rnn_mode, direction): dtype = dtypes.float32 # Train graph with ops.Graph().as_default() as g: model = CudnnTestModel( rnn_mode, num_layers, num_units, input_size, direction=direction, dtype=dtype, training=True) target_output = array_ops.placeholder(dtype=dtype) loss_op = losses.log_loss( labels=target_output, predictions=model.total_sum) optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1e-2) train_op = optimizer.minimize(loss_op) saver = saver_lib.Saver() # Train Cudnn model seed = 0 with self.test_session(use_gpu=True, graph=g) as sess: sess.run(variables.global_variables_initializer()) # Train 128 steps num_steps = 128 for _ in range(num_steps): inputs, _ = model.SynthesizeInput(seq_length, batch_size, seed) targets = np.random.rand() sess.run( train_op, feed_dict={ model.inputs: inputs, model.initial_state: model.ZeroState(batch_size), target_output: targets }) seed += 1 save_path = os.path.join(self.get_temp_dir(), ("cudnn-rnn-%s-test" % rnn_mode)) save_v = saver.save(sess, save_path) self.assertEqual(save_path, save_v) # Cudnn inference graph with ops.Graph().as_default() as g: model = CudnnTestModel( rnn_mode, num_layers, num_units, input_size, direction=direction, dtype=dtype, training=False) rnn = model.rnn saver = saver_lib.Saver() inference_input = np.random.rand(seq_length, batch_size, input_size).astype(np.float32) with self.test_session(use_gpu=True, graph=g) as sess: sess.run(variables.global_variables_initializer()) saver.restore(sess, save_path) # Cudnn inference cudnn_outputs_v, cudnn_output_states_v = model.Feed( sess, inference_input, return_sum=False) # Canonical RNN inference graph with ops.Graph().as_default() as g: cell_inputs = array_ops.placeholder( dtype, shape=[seq_length, batch_size, input_size]) if direction == CUDNN_RNN_UNIDIRECTION: # outputs is one tensor, states are num_layer tuples, each 2 tensors (outputs, states) = _CreateCudnnCompatibleCanonicalRNN(rnn, cell_inputs) if rnn_mode == CUDNN_LSTM: output_h = array_ops.stack([s.h for s in states]) output_c = array_ops.stack([s.c for s in states]) else: output_state = array_ops.stack([s for s in states]) else: # outputs is one tensor. # states is a tuple of 2 tuples: # each sub tuple is num_layer tuples, each with 2 tensors. (outputs, states) = _CreateCudnnCompatibleCanonicalRNN( rnn, cell_inputs, is_bidi=True) output_state_fw, output_state_bw = states if rnn_mode == CUDNN_LSTM: output_h, output_c = [], [] for s_fw, s_bw in zip(output_state_fw, output_state_bw): output_h.append(array_ops.stack([s_fw.h, s_bw.h])) output_c.append(array_ops.stack([s_fw.c, s_bw.c])) output_h = array_ops.concat(output_h, axis=0) output_c = array_ops.concat(output_c, axis=0) else: output_state = [] for s_fw, s_bw in zip(output_state_fw, output_state_bw): output_state.append(array_ops.stack([s_fw, s_bw])) output_state = array_ops.concat(output_state, axis=0) saver = saver_lib.Saver() with self.test_session(use_gpu=True, graph=g) as sess: saver.restore(sess, save_path) # BlockCell inference if rnn_mode == CUDNN_LSTM: outputs_v, output_h_v, output_c_v = sess.run( [outputs, output_h, output_c], feed_dict={cell_inputs: inference_input}) self.assertAllClose(cudnn_outputs_v, outputs_v) cudnn_output_h_v, cudnn_output_c_v = cudnn_output_states_v self.assertAllClose(cudnn_output_h_v, output_h_v) self.assertAllClose(cudnn_output_c_v, output_c_v) else: outputs_v, output_state_v = sess.run( [outputs, output_state], feed_dict={cell_inputs: inference_input}) self.assertAllClose(cudnn_outputs_v, outputs_v, atol=2e-5, rtol=2e-5) (cudnn_output_h_v,) = cudnn_output_states_v self.assertAllClose(cudnn_output_h_v, output_state_v, atol=2e-5, rtol=2e-5)