def testCreateAgentNestSizeChecks(self, action_spec, expected_error):
     cloning_net = get_dummy_net(action_spec, self._observation_spec)
     if expected_error is not None:
         with self.assertRaisesRegex(ValueError, expected_error):
             behavioral_cloning_agent.BehavioralCloningAgent(
                 self._time_step_spec,
                 action_spec,
                 cloning_network=cloning_net,
                 optimizer=None)
     else:
         behavioral_cloning_agent.BehavioralCloningAgent(
             self._time_step_spec,
             action_spec,
             cloning_network=cloning_net,
             optimizer=None)
Example #2
0
    def test_inference(self):
        test_agent = behavioral_cloning_agent.BehavioralCloningAgent(
            self._time_step_spec,
            self._action_spec,
            self._network,
            tf.compat.v1.train.AdamOptimizer(),
            num_outer_dims=2)
        test_trainer = trainer.Trainer(root_dir=self.get_temp_dir(),
                                       agent=test_agent)

        inference_batch_size = 1
        random_time_step = tensor_spec.sample_spec_nest(
            self._time_step_spec, outer_dims=(inference_batch_size, ))

        initial_policy_state = test_trainer._agent.policy.get_initial_state(
            inference_batch_size)

        action_outputs = test_trainer._agent.policy.action(
            random_time_step, initial_policy_state)
        self.assertAllEqual([inference_batch_size],
                            action_outputs.action.shape)

        action_outputs = test_trainer._agent.policy.action(
            random_time_step, action_outputs.state)
        self.assertAllEqual([inference_batch_size],
                            action_outputs.action.shape)
Example #3
0
  def testTrainWithSingleOuterDimension(self):
    # Hard code a trajectory shaped (time=6, batch=1, ...).
    traj, time_step_spec, action_spec = create_arbitrary_trajectory()
    # Remove the batch dimension so there is only one outer dimension.
    traj = tf.nest.map_structure(lambda x: tf.squeeze(x, axis=1), traj)

    cloning_net = q_network.QNetwork(
        time_step_spec.observation, action_spec)
    agent = behavioral_cloning_agent.BehavioralCloningAgent(
        time_step_spec,
        action_spec,
        cloning_network=cloning_net,
        optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=0.01))
    # Disable clipping to make sure we can see the difference in behavior
    agent.policy._clip = False
    # Remove policy_info, as BehavioralCloningAgent expects none.
    traj = traj.replace(policy_info=())
    # TODO(b/123883319)
    if tf.executing_eagerly():
      train_and_loss = lambda: agent.train(traj)
    else:
      train_and_loss = agent.train(traj)
    self.evaluate(tf.compat.v1.global_variables_initializer())
    for _ in range(TRAIN_ITERATIONS):
      self.evaluate(train_and_loss)
Example #4
0
  def testTrainWithRNN(self):
    # Hard code a trajectory shaped (time=6, batch=1, ...).
    traj, time_step_spec, action_spec = create_arbitrary_trajectory()
    cloning_net = q_rnn_network.QRnnNetwork(
        time_step_spec.observation, action_spec)
    agent = behavioral_cloning_agent.BehavioralCloningAgent(
        time_step_spec,
        action_spec,
        cloning_network=cloning_net,
        optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=0.01),
        num_outer_dims=2)
    # Disable clipping to make sure we can see the difference in behavior
    agent.policy._clip = False
    # Remove policy_info, as BehavioralCloningAgent expects none.
    traj = traj.replace(policy_info=())
    # TODO(b/123883319)
    if tf.executing_eagerly():
      train_and_loss = lambda: agent.train(traj)
    else:
      train_and_loss = agent.train(traj)
    self.evaluate(tf.compat.v1.global_variables_initializer())

    initial_loss = self.evaluate(train_and_loss).loss
    for _ in range(TRAIN_ITERATIONS - 1):
      loss = self.evaluate(train_and_loss).loss

    # We don't necessarily converge to the same actions as in trajectory after
    # 10 steps of an untuned optimizer, but the loss should go down.
    self.assertGreater(initial_loss, loss)
    def testLoss(self):
        cloning_net = get_dummy_net(self._action_spec)
        agent = behavioral_cloning_agent.BehavioralCloningAgent(
            self._time_step_spec,
            self._action_spec,
            cloning_network=cloning_net,
            optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=0.001))

        observations = tf.constant([[1, 2], [3, 4]], dtype=tf.float32)
        actions = tf.constant([0, 1], dtype=tf.int32)
        rewards = tf.constant([10, 20], dtype=tf.float32)
        discounts = tf.constant([0.9, 0.9], dtype=tf.float32)

        experience = trajectory.first(observation=observations,
                                      action=actions,
                                      policy_info=(),
                                      reward=rewards,
                                      discount=discounts)

        self.evaluate(tf.compat.v1.global_variables_initializer())

        expected_loss = tf.reduce_mean(
            input_tensor=tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=actions, logits=cloning_net(observations)[0]))

        loss_info = agent.train(experience)
        total_loss = self.evaluate(loss_info.loss)

        self.assertAllClose(total_loss, expected_loss)

        test_util.test_loss_and_train_output(test=self,
                                             expect_equal_loss_values=True,
                                             agent=agent,
                                             experience=experience)
  def testLoss(self):
    cloning_net = DummyNet(self._observation_spec, self._action_spec)
    agent = behavioral_cloning_agent.BehavioralCloningAgent(
        self._time_step_spec,
        self._action_spec,
        cloning_network=cloning_net,
        optimizer=None)

    observations = [tf.constant([[1, 2], [3, 4]], dtype=tf.float32)]
    actions = tf.constant([0, 1], dtype=tf.int32)
    rewards = tf.constant([10, 20], dtype=tf.float32)
    discounts = tf.constant([0.9, 0.9], dtype=tf.float32)

    experience = trajectory.first(
        observation=observations,
        action=actions,
        policy_info=(),
        reward=rewards,
        discount=discounts)
    loss_info = agent._loss(experience)

    self.evaluate(tf.compat.v1.global_variables_initializer())
    total_loss, _ = self.evaluate(loss_info)

    expected_loss = tf.reduce_mean(
        input_tensor=tf.compat.v1.nn.sparse_softmax_cross_entropy_with_logits(
            logits=cloning_net(observations)[0], labels=actions))

    self.assertAllClose(total_loss, expected_loss)
 def testTrainWithSingleOuterDimension(self):
   # Emits trajectories shaped (batch=1, time=6, ...)
   traj, time_step_spec, action_spec = (
       driver_test_utils.make_random_trajectory())
   # Convert to shapes (batch=6, 1, ...) so this works with a non-RNN model.
   traj = tf.nest.map_structure(common.transpose_batch_time, traj)
   # Remove the time dimension so there is only one outer dimension.
   traj = tf.nest.map_structure(lambda x: tf.squeeze(x, axis=1), traj)
   cloning_net = q_network.QNetwork(
       time_step_spec.observation, action_spec)
   agent = behavioral_cloning_agent.BehavioralCloningAgent(
       time_step_spec,
       action_spec,
       cloning_network=cloning_net,
       optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=0.01))
   # Disable clipping to make sure we can see the difference in behavior
   agent.policy._clip = False
   # Remove policy_info, as BehavioralCloningAgent expects none.
   traj = traj.replace(policy_info=())
   # TODO(b/123883319)
   if tf.executing_eagerly():
     train_and_loss = lambda: agent.train(traj)
   else:
     train_and_loss = agent.train(traj)
   self.evaluate(tf.compat.v1.global_variables_initializer())
   for _ in range(TRAIN_ITERATIONS):
     self.evaluate(train_and_loss)
Example #8
0
    def testLearnerRaiseExceptionOnMismatchingBatchSetup(self):
        obs_spec = tensor_spec.TensorSpec([2], tf.float32)
        time_step_spec = ts.time_step_spec(obs_spec)
        action_spec = tensor_spec.BoundedTensorSpec([], tf.int32, 0, 1)
        flat_action_spec = tf.nest.flatten(action_spec)[0]
        num_actions = flat_action_spec.maximum - flat_action_spec.minimum + 1

        network = sequential.Sequential([
            tf.keras.layers.Dense(num_actions, dtype=tf.float32),
            inner_reshape.InnerReshape([None], [num_actions])
        ])

        agent = behavioral_cloning_agent.BehavioralCloningAgent(
            time_step_spec,
            action_spec,
            cloning_network=network,
            optimizer=None)

        with self.assertRaisesRegex(
                ValueError,
                'All of the Tensors in `value` must have one outer dimension.'
        ):
            learner.Learner(root_dir=os.path.join(
                self.create_tempdir().full_path, 'learner'),
                            train_step=train_utils.create_train_step(),
                            agent=agent)
  def testTrainWithRNN(self):
    # Emits trajectories shaped (batch=1, time=6, ...)
    traj, time_step_spec, action_spec = (
        driver_test_utils.make_random_trajectory())
    cloning_net = q_rnn_network.QRnnNetwork(
        time_step_spec.observation, action_spec)
    agent = behavioral_cloning_agent.BehavioralCloningAgent(
        time_step_spec,
        action_spec,
        cloning_network=cloning_net,
        optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=0.01),
        num_outer_dims=2)
    # Disable clipping to make sure we can see the difference in behavior
    agent.policy._clip = False
    # Remove policy_info, as BehavioralCloningAgent expects none.
    traj = traj.replace(policy_info=())
    # TODO(b/123883319)
    if tf.executing_eagerly():
      train_and_loss = lambda: agent.train(traj)
    else:
      train_and_loss = agent.train(traj)
    replay = trajectory_replay.TrajectoryReplay(agent.policy)
    self.evaluate(tf.compat.v1.global_variables_initializer())
    initial_actions = self.evaluate(replay.run(traj)[0])

    for _ in range(TRAIN_ITERATIONS):
      self.evaluate(train_and_loss)
    post_training_actions = self.evaluate(replay.run(traj)[0])
    # We don't necessarily converge to the same actions as in trajectory after
    # 10 steps of an untuned optimizer, but the policy does change.
    self.assertFalse(np.all(initial_actions == post_training_actions))
Example #10
0
 def testTrain(self):
     # Emits trajectories shaped (batch=1, time=6, ...)
     traj, time_step_spec, action_spec = (
         driver_test_utils.make_random_trajectory())
     # Convert to shapes (batch=6, 1, ...) so this works with a non-RNN model.
     traj = nest.map_structure(tf.contrib.rnn.transpose_batch_time, traj)
     cloning_net = q_network.QNetwork(time_step_spec.observation,
                                      action_spec)
     agent = behavioral_cloning_agent.BehavioralCloningAgent(
         time_step_spec,
         action_spec,
         cloning_network=cloning_net,
         optimizer=tf.train.AdamOptimizer(learning_rate=0.01))
     # Remove policy_info, as BehavioralCloningAgent expects none.
     traj = traj.replace(policy_info=())
     train_and_loss = agent.train(traj)
     replay = trajectory_replay.TrajectoryReplay(agent.policy())
     self.evaluate(tf.global_variables_initializer())
     initial_actions = self.evaluate(replay.run(traj)[0])
     for _ in range(TRAIN_ITERATIONS):
         self.evaluate(train_and_loss)
     post_training_actions = self.evaluate(replay.run(traj)[0])
     # We don't necessarily converge to the same actions as in trajectory after
     # 10 steps of an untuned optimizer, but the policy does change.
     self.assertFalse(np.all(initial_actions == post_training_actions))
Example #11
0
  def testLearnerRaiseExceptionOnMismatchingBatchSetup(self):
    obs_spec = tensor_spec.TensorSpec([2], tf.float32)
    time_step_spec = ts.time_step_spec(obs_spec)
    action_spec = tensor_spec.BoundedTensorSpec([], tf.int32, 0, 1)
    flat_action_spec = tf.nest.flatten(action_spec)[0]
    num_actions = flat_action_spec.maximum - flat_action_spec.minimum + 1

    network = sequential.Sequential([
        tf.keras.layers.Dense(num_actions, dtype=tf.float32),
        inner_reshape.InnerReshape([None], [num_actions])
    ])

    agent = behavioral_cloning_agent.BehavioralCloningAgent(
        time_step_spec, action_spec, cloning_network=network, optimizer=None)

    with self.assertRaisesRegex(
        RuntimeError,
        (r'The slot variable initialization failed. The learner assumes all '
         r'experience tensors required an `outer_rank = \(None, '
         r'agent.train_sequence_length\)`\. If that\'s not the case for your '
         r'agent try setting `run_optimizer_variable_init=False`\.')):
      learner.Learner(
          root_dir=os.path.join(self.create_tempdir().full_path, 'learner'),
          train_step=train_utils.create_train_step(),
          agent=agent)
 def testCreateAgent(self):
   cloning_net = DummyNet(self._observation_spec, self._action_spec)
   agent = behavioral_cloning_agent.BehavioralCloningAgent(
       self._time_step_spec,
       self._action_spec,
       cloning_network=cloning_net,
       optimizer=None)
   self.assertIsNotNone(agent.policy)
 def testCreateAgentDimChecks(self):
   action_spec = tensor_spec.BoundedTensorSpec([1, 2], tf.int32, 0, 1)
   cloning_net = DummyNet(self._observation_spec, action_spec)
   with self.assertRaisesRegexp(NotImplementedError, '.*scalar, unnested.*'):
     behavioral_cloning_agent.BehavioralCloningAgent(
         self._time_step_spec, action_spec,
         cloning_network=cloning_net,
         optimizer=None)
 def testCreateAgentWithListActionSpec(self):
   action_spec = [tensor_spec.BoundedTensorSpec([1], tf.int32, 0, 1)]
   cloning_net = DummyNet(self._observation_spec, action_spec)
   with self.assertRaisesRegexp(ValueError, '.*nested actions.*'):
     behavioral_cloning_agent.BehavioralCloningAgent(
         self._time_step_spec, action_spec,
         cloning_network=cloning_net,
         optimizer=None)
Example #15
0
 def test_trainer_initialization(self):
     test_agent = behavioral_cloning_agent.BehavioralCloningAgent(
         self._time_step_spec,
         self._action_spec,
         self._network,
         tf.compat.v1.train.AdamOptimizer(),
         num_outer_dims=2)
     test_trainer = trainer.Trainer(root_dir=self.get_temp_dir(),
                                    agent=test_agent)
     self.assertEqual(0, test_trainer._global_step.numpy())
 def verifyTrainAndRestore(self,
                           observation_spec,
                           action_spec,
                           actor_net,
                           loss_fn=None):
     """Helper function for testing correct variable updating and restoring."""
     batch_size = 2
     observations = tensor_spec.sample_spec_nest(observation_spec,
                                                 outer_dims=(batch_size, ))
     actions = tensor_spec.sample_spec_nest(action_spec,
                                            outer_dims=(batch_size, ))
     rewards = tf.constant([10, 20], dtype=tf.float32)
     discounts = tf.constant([0.9, 0.9], dtype=tf.float32)
     experience = trajectory.first(observation=observations,
                                   action=actions,
                                   policy_info=(),
                                   reward=rewards,
                                   discount=discounts)
     time_step_spec = ts.time_step_spec(observation_spec)
     strategy = tf.distribute.get_strategy()
     with strategy.scope():
         # Use BehaviorCloningAgent instead of AWRAgent to test the network.
         agent = behavioral_cloning_agent.BehavioralCloningAgent(
             time_step_spec,
             action_spec,
             cloning_network=actor_net,
             optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
             loss_fn=loss_fn)
     loss_before_train = agent.loss(experience).loss
     # Check loss is stable.
     self.assertEqual(loss_before_train, agent.loss(experience).loss)
     # Train 1 step, verify that loss is decreased for the same input.
     agent.train(experience)
     loss_after_train = agent.loss(experience).loss
     self.assertLessEqual(loss_after_train, loss_before_train)
     # Assert loss evaluation is still stable, e.g. deterministic.
     self.assertLessEqual(loss_after_train, agent.loss(experience).loss)
     # Save checkpoint
     ckpt_dir = self.create_tempdir()
     checkpointer = common.Checkpointer(ckpt_dir=ckpt_dir, agent=agent)
     global_step = tf.constant(1)
     checkpointer.save(global_step)
     # Assign all vars to 0.
     for var in tf.nest.flatten(agent.variables):
         var.assign(tf.zeros_like(var))
     loss_after_zero = agent.loss(experience).loss
     self.assertEqual(loss_after_zero, agent.loss(experience).loss)
     self.assertNotEqual(loss_after_zero, loss_after_train)
     # Restore
     checkpointer._checkpoint.restore(
         checkpointer._manager.latest_checkpoint)
     loss_after_restore = agent.loss(experience).loss
     self.assertNotEqual(loss_after_restore, loss_after_zero)
     self.assertEqual(loss_after_restore, loss_after_train)
Example #17
0
    def testCreateAgentNestSizeChecks(self):
        action_spec = [
            tensor_spec.BoundedTensorSpec([], tf.int32, 0, 1),
            tensor_spec.BoundedTensorSpec([], tf.int32, 0, 1)
        ]

        cloning_net = get_dummy_net(action_spec)
        with self.assertRaisesRegex(ValueError, 'Only scalar .*'):
            behavioral_cloning_agent.BehavioralCloningAgent(
                self._time_step_spec,
                action_spec,
                cloning_network=cloning_net,
                optimizer=None)
    def testCreateAgentNestSizeChecks(self):
        action_spec = [
            tensor_spec.BoundedTensorSpec([], tf.int32, 0, 1),
            tensor_spec.BoundedTensorSpec([], tf.int32, 0, 1)
        ]

        cloning_net = DummyNet(self._observation_spec, action_spec)
        with self.assertRaisesRegexp(ValueError, '.*multi-dimensional.*'):
            behavioral_cloning_agent.BehavioralCloningAgent(
                self._time_step_spec,
                action_spec,
                cloning_network=cloning_net,
                optimizer=None)
Example #19
0
def _create_behavioral_cloning_agent(time_step_spec, action_spec,
                                     policy_network):
    """Creates a behavioral_cloning_agent."""
    layers = tf.nest.map_structure(
        feature_ops.get_observation_processing_layer_creator(),
        time_step_spec.observation)

    network = policy_network(time_step_spec.observation,
                             action_spec,
                             preprocessing_layers=layers,
                             name='QNetwork')

    return behavioral_cloning_agent.BehavioralCloningAgent(
        time_step_spec, action_spec, cloning_network=network, num_outer_dims=2)
Example #20
0
    def test_training(self):
        test_agent = behavioral_cloning_agent.BehavioralCloningAgent(
            self._time_step_spec,
            self._action_spec,
            self._network,
            tf.compat.v1.train.AdamOptimizer(),
            num_outer_dims=2)
        test_trainer = trainer.Trainer(root_dir=self.get_temp_dir(),
                                       agent=test_agent)
        self.assertEqual(0, test_trainer._global_step.numpy())

        dataset_iter = _create_test_data(batch_size=3, sequence_length=3)
        test_trainer.train(dataset_iter, num_iterations=10)
        self.assertEqual(10, test_trainer._global_step.numpy())
 def testPolicy(self):
   cloning_net = DummyNet(self._observation_spec, self._action_spec)
   agent = behavioral_cloning_agent.BehavioralCloningAgent(
       self._time_step_spec,
       self._action_spec,
       cloning_network=cloning_net,
       optimizer=None)
   observations = [tf.constant([[1, 2], [3, 4]], dtype=tf.float32)]
   time_steps = ts.restart(observations, batch_size=2)
   policy = agent.policy
   action_step = policy.action(time_steps)
   # Batch size 2.
   self.assertAllEqual(
       [2] + self._action_spec.shape.as_list(),
       action_step.action.shape,
   )
   self.evaluate(tf.compat.v1.global_variables_initializer())
   actions_ = self.evaluate(action_step.action)
   self.assertTrue(all(actions_ <= self._action_spec.maximum))
   self.assertTrue(all(actions_ >= self._action_spec.minimum))
    def testTrainWithNN(self, is_convert, is_distribution_network):
        # Hard code a trajectory shaped (time=6, batch=1, ...).
        traj, time_step_spec, action_spec = create_arbitrary_trajectory()

        if is_convert:
            # Convert to single step trajectory of shapes (batch=6, 1, ...).
            traj = tf.nest.map_structure(common.transpose_batch_time, traj)

        if is_distribution_network:
            cloning_net = sequential.Sequential([
                expand_dims_layer.ExpandDims(-1),
                tf.keras.layers.Dense(action_spec.maximum -
                                      action_spec.minimum + 1),
                tf.keras.layers.Lambda(
                    lambda t: tfp.distributions.Categorical(logits=t)),
            ])
        else:
            cloning_net = q_network.QNetwork(time_step_spec.observation,
                                             action_spec)
        agent = behavioral_cloning_agent.BehavioralCloningAgent(
            time_step_spec,
            action_spec,
            cloning_network=cloning_net,
            optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=0.001),
            num_outer_dims=2)
        # Disable clipping to make sure we can see the difference in behavior
        agent.policy._clip = False
        # TODO(b/123883319)
        if tf.executing_eagerly():
            train_and_loss = lambda: agent.train(traj)
        else:
            train_and_loss = agent.train(traj)
        self.evaluate(tf.compat.v1.global_variables_initializer())

        initial_loss = self.evaluate(train_and_loss).loss
        for _ in range(TRAIN_ITERATIONS - 1):
            loss = self.evaluate(train_and_loss).loss

        # We don't necessarily converge to the same actions as in trajectory after
        # 10 steps of an untuned optimizer, but the loss should go down.
        self.assertGreater(initial_loss, loss)
  def testInitializeRestoreAgent(self):
    cloning_net = DummyNet(self._observation_spec, self._action_spec)
    agent = behavioral_cloning_agent.BehavioralCloningAgent(
        self._time_step_spec,
        self._action_spec,
        cloning_network=cloning_net,
        optimizer=None)
    observations = [tf.constant([[1, 2], [3, 4]], dtype=tf.float32)]
    time_steps = ts.restart(observations, batch_size=2)
    policy = agent.policy
    action_step = policy.action(time_steps)
    self.evaluate(tf.compat.v1.global_variables_initializer())

    checkpoint = tf.train.Checkpoint(agent=agent)

    latest_checkpoint = tf.train.latest_checkpoint(self.get_temp_dir())
    checkpoint_load_status = checkpoint.restore(latest_checkpoint)

    with self.cached_session() as sess:
      checkpoint_load_status.initialize_or_restore(sess)
      self.assertAllEqual(sess.run(action_step.action), [0, 0])
 def verifyVariableAssignAndRestore(self,
                                    observation_spec,
                                    action_spec,
                                    actor_net,
                                    loss_fn=None):
     strategy = tf.distribute.get_strategy()
     time_step_spec = ts.time_step_spec(observation_spec)
     with strategy.scope():
         # Use BehaviorCloningAgent instead of AWRAgent to test the network.
         agent = behavioral_cloning_agent.BehavioralCloningAgent(
             time_step_spec,
             action_spec,
             cloning_network=actor_net,
             optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
             loss_fn=loss_fn)
     # Assign all vars to 0.
     for var in tf.nest.flatten(agent.variables):
         var.assign(tf.zeros_like(var))
     # Save checkpoint
     ckpt_dir = self.create_tempdir()
     checkpointer = common.Checkpointer(ckpt_dir=ckpt_dir, agent=agent)
     global_step = tf.constant(0)
     checkpointer.save(global_step)
     # Assign all vars to 1.
     for var in tf.nest.flatten(agent.variables):
         var.assign(tf.ones_like(var))
     # Restore to 0.
     checkpointer._checkpoint.restore(
         checkpointer._manager.latest_checkpoint)
     for var in tf.nest.flatten(agent.variables):
         value = var.numpy()
         if isinstance(value, np.int64):
             self.assertEqual(value, 0)
         else:
             self.assertAllEqual(
                 value,
                 np.zeros_like(value),
                 msg='{} has var mean {}, expected 0.'.format(
                     var.name, value))
Example #25
0
    def test_training(self):
        test_agent = behavioral_cloning_agent.BehavioralCloningAgent(
            self._time_step_spec,
            self._action_spec,
            self._network,
            tf.compat.v1.train.AdamOptimizer(),
            num_outer_dims=2)
        test_trainer = trainer.Trainer(root_dir=self.get_temp_dir(),
                                       agent=test_agent)
        self.assertEqual(0, test_trainer._global_step.numpy())

        dataset_iter = _create_test_data(batch_size=3, sequence_length=3)
        monitor_dict = {'test': 1}

        with mock.patch.object(tf.summary, 'scalar',
                               autospec=True) as mock_scalar_summary:
            test_trainer.train(dataset_iter, monitor_dict, num_iterations=10)
            self.assertEqual(
                10,
                sum(1 for c in mock_scalar_summary.mock_calls
                    if c[2]['name'] == 'test'))
            self.assertEqual(10, test_trainer._global_step.numpy())
Example #26
0
    def test_save_policy(self):
        test_agent = behavioral_cloning_agent.BehavioralCloningAgent(
            self._time_step_spec, self._action_spec, self._network,
            tf.compat.v1.train.AdamOptimizer())
        policy_dict = {
            'saved_policy': test_agent.policy,
            'saved_collect_policy': test_agent.collect_policy
        }
        test_policy_saver = policy_saver.PolicySaver(policy_dict=policy_dict)

        root_dir = self.get_temp_dir()
        test_policy_saver.save(root_dir)

        sub_dirs = tf.io.gfile.listdir(root_dir)
        self.assertCountEqual(['saved_policy', 'saved_collect_policy'],
                              sub_dirs)

        for sub_dir in ['saved_policy', 'saved_collect_policy']:
            self.assertTrue(
                tf.io.gfile.exists(
                    os.path.join(root_dir, sub_dir, 'saved_model.pb')))
            self.assertTrue(
                tf.io.gfile.exists(
                    os.path.join(root_dir, sub_dir,
                                 'variables/variables.data-00000-of-00001')))
            output_signature_fn = os.path.join(root_dir, sub_dir,
                                               'output_spec.json')
            self.assertTrue(tf.io.gfile.exists(output_signature_fn))
            self.assertEqual([{
                'logging_name': 'inlining_decision',
                'tensor_spec': {
                    'name': 'StatefulPartitionedCall',
                    'port': 0,
                    'type': 'int64_t',
                    'shape': [1],
                }
            }], json.loads(tf.io.gfile.GFile(output_signature_fn).read()))