def test_tfrecord_observer(self):
     tfrecord_observer = example_encoding_dataset.TFRecordObserver(
         self.dataset_path, self.simple_data_spec)
     # Draw a random sample from the simple spec
     sample = tensor_spec.sample_spec_nest(self.simple_data_spec,
                                           np.random.RandomState(0),
                                           outer_dims=(1, ))
     # Write to file using __call__() function
     for _ in range(3):
         tfrecord_observer(sample)
     # Manually flush
     tfrecord_observer.flush()
     # Delete should call close() function
     del tfrecord_observer
예제 #2
0
    def test_build(self, outer_dims):
        observation_spec = tensor_spec.BoundedTensorSpec((8, 8, 3), tf.float32,
                                                         0, 1)
        time_step_spec = ts.time_step_spec(observation_spec)
        time_step = tensor_spec.sample_spec_nest(time_step_spec,
                                                 outer_dims=outer_dims)

        action_spec = [
            tensor_spec.BoundedTensorSpec((2, ), tf.float32, 2, 3),
            tensor_spec.BoundedTensorSpec((3, ), tf.float32, 0, 3)
        ]
        net = actor_rnn_network.ActorRnnNetwork(observation_spec,
                                                action_spec,
                                                conv_layer_params=[(4, 2, 2)],
                                                input_fc_layer_params=(5, ),
                                                lstm_size=(3, ),
                                                output_fc_layer_params=(5, ))

        actions, network_state = net(time_step.observation,
                                     time_step.step_type)
        self.assertEqual(list(outer_dims) + [2], actions[0].shape.as_list())
        self.assertEqual(list(outer_dims) + [3], actions[1].shape.as_list())

        self.assertEqual(13, len(net.variables))
        # Conv Net Kernel
        self.assertEqual((2, 2, 3, 4), net.variables[0].shape)
        # Conv Net bias
        self.assertEqual((4, ), net.variables[1].shape)
        # Fc Kernel
        self.assertEqual((64, 5), net.variables[2].shape)
        # Fc Bias
        self.assertEqual((5, ), net.variables[3].shape)
        # LSTM Cell Kernel
        self.assertEqual((5, 12), net.variables[4].shape)
        # LSTM Cell Recurrent Kernel
        self.assertEqual((3, 12), net.variables[5].shape)
        # LSTM Cell Bias
        self.assertEqual((12, ), net.variables[6].shape)
        # Fc Kernel
        self.assertEqual((3, 5), net.variables[7].shape)
        # Fc Bias
        self.assertEqual((5, ), net.variables[8].shape)
        # Action 1 Kernel
        self.assertEqual((5, 2), net.variables[9].shape)
        # Action 1 Bias
        self.assertEqual((2, ), net.variables[10].shape)
        # Action 2 Kernel
        self.assertEqual((5, 3), net.variables[11].shape)
        # Action 2 Bias
        self.assertEqual((3, ), net.variables[12].shape)
예제 #3
0
def get_distribution_class_spec(policy, time_step_spec):
    """Gets a nest of action distribution classes.

  Args:
    policy: Policy for constructing action distribution.
    time_step_spec: Spec for time_step for creating action distribution.
  Returns:
    The nest of distribution class references.
  """
    sample_distribution_step = policy.distribution(
        tensor_spec.sample_spec_nest(time_step_spec, outer_dims=[1]),
        policy_state=policy.get_initial_state(1))
    sample_distribution = sample_distribution_step.action
    return nest.map_structure(lambda dist: dist.__class__, sample_distribution)
    def _action(self, time_step, policy_state, seed):
        observation_and_action_constraint_splitter = (
            self.observation_and_action_constraint_splitter)

        if observation_and_action_constraint_splitter is not None:
            _, mask = observation_and_action_constraint_splitter(
                time_step.observation)

            zero_logits = tf.cast(tf.zeros_like(mask), tf.float32)
            masked_categorical = masked.MaskedCategorical(zero_logits, mask)
            #Modified to accomodate scalar action spaces
            #action_ = tf.cast(masked_categorical.sample() + self.action_spec.minimum,
            #                  self.action_spec.dtype)
            action_ = tf.reshape(
                tf.cast(masked_categorical.sample() + self.action_spec.minimum,
                        self.action_spec.dtype), [1])

            # If the action spec says each action should be shaped (1,), add another
            # dimension so the final shape is (B, 1) rather than (B,).
            if self.action_spec.shape.rank == 1:
                action_ = tf.expand_dims(action_, axis=-1)
        else:
            outer_dims = nest_utils.get_outer_shape(time_step,
                                                    self._time_step_spec)

            action_ = tensor_spec.sample_spec_nest(self._action_spec,
                                                   seed=seed,
                                                   outer_dims=outer_dims)

        # TODO(b/78181147): Investigate why this control dependency is required.
        if time_step is not None:
            with tf.control_dependencies(tf.nest.flatten(time_step)):
                action_ = tf.nest.map_structure(tf.identity, action_)
        step = policy_step.PolicyStep(action_, policy_state)

        if self.emit_log_probability:
            if observation_and_action_constraint_splitter is not None:
                log_probability = masked_categorical.log_prob(
                    action_ - self.action_spec.minimum)
            else:
                action_probability = tf.nest.map_structure(
                    _uniform_probability, self._action_spec)
                log_probability = tf.nest.map_structure(
                    tf.math.log, action_probability)

            info = policy_step.PolicyInfo(log_probability=log_probability)
            return step._replace(info=info)

        return step
예제 #5
0
파일: network.py 프로젝트: panwliu/agents
  def create_variables(self, input_tensor_spec=None, **kwargs):
    """Force creation of the network's variables.

    Return output specs.

    Args:
      input_tensor_spec: (Optional).  Override or provide an input tensor spec
        when creating variables.
      **kwargs: Other arguments to `network.call()`, e.g. `training=True`.

    Returns:
      Output specs - a nested spec calculated from the outputs (excluding any
      batch dimensions).  If any of the output elements is a tfp `Distribution`,
      the associated spec entry returned is `None`.

    Raises:
      ValueError: If no `input_tensor_spec` is provided, and the network did
        not provide one during construction.
    """
    if self._network_output_spec is not None:
      return self._network_output_spec
    if self._input_tensor_spec is None:
      self._input_tensor_spec = input_tensor_spec
    input_tensor_spec = self._input_tensor_spec
    if input_tensor_spec is None:
      raise ValueError(
          "Unable to create_variables: no input_tensor_spec provided, and "
          "Network did not define one.")

    random_input = tensor_spec.sample_spec_nest(
        input_tensor_spec, outer_dims=(1,))
    initial_state = self.get_initial_state(batch_size=1)
    step_type = tf.fill((1,), time_step.StepType.FIRST)
    outputs = self.__call__(
        random_input,
        step_type=step_type,
        network_state=initial_state,
        **kwargs)

    def _calc_unbatched_spec(x):
      if isinstance(x, tfp.distributions.Distribution):
        return None
      else:
        return nest_utils.remove_singleton_batch_spec_dim(
            tf.type_spec_from_value(x), outer_ndim=1)

    self._network_output_spec = tf.nest.map_structure(
        _calc_unbatched_spec, outputs[0])
    return self._network_output_spec
예제 #6
0
    def testL2RegularizationLossWithSharedVariables(self, not_zero):
        policy_l2_reg = 4e-4 * not_zero
        value_function_l2_reg = 2e-4 * not_zero
        shared_vars_l2_reg = 1e-4 * not_zero
        actor_net, value_net = _create_joint_actor_value_networks(
            self._obs_spec, self._action_spec)
        agent = ppo_agent.PPOAgent(
            self._time_step_spec,
            self._action_spec,
            tf.compat.v1.train.AdamOptimizer(),
            actor_net=actor_net,
            value_net=value_net,
            normalize_observations=False,
            policy_l2_reg=policy_l2_reg,
            value_function_l2_reg=value_function_l2_reg,
            shared_vars_l2_reg=shared_vars_l2_reg,
        )

        # Call other loss functions to make sure trainable variables are
        #   constructed.
        observations = tf.constant([[1, 2], [3, 4]], dtype=tf.float32)
        time_steps = ts.restart(observations, batch_size=2)
        actions = tf.constant([[0], [1]], dtype=tf.float32)
        returns = tf.constant([1.9, 1.0], dtype=tf.float32)
        sample_action_log_probs = tf.constant([[0.9], [0.3]], dtype=tf.float32)
        advantages = tf.constant([1.9, 1.0], dtype=tf.float32)
        current_policy_distribution, unused_network_state = DummyActorNet(
            self._obs_spec, self._action_spec)(time_steps.observation,
                                               time_steps.step_type, ())
        weights = tf.ones_like(advantages)
        agent.policy_gradient_loss(time_steps, actions,
                                   sample_action_log_probs, advantages,
                                   current_policy_distribution, weights)
        agent.value_estimation_loss(time_steps, returns, weights)

        # Now request L2 regularization loss.
        # Value function weights are [2, 1], actor net weights are [2, 1, 1, 1],
        # shared weights are [3, 1, 1, 1].
        expected_loss = value_function_l2_reg * (2**2 + 1) + policy_l2_reg * (
            2**2 + 1 + 1 + 1) + shared_vars_l2_reg * (3**2 + 1 + 1 + 1)
        # Make sure the network is built before we try to get variables.
        agent.policy.action(
            tensor_spec.sample_spec_nest(self._time_step_spec,
                                         outer_dims=(2, )))
        loss = agent.l2_regularization_loss()

        self.evaluate(tf.compat.v1.global_variables_initializer())
        loss_ = self.evaluate(loss)
        self.assertAllClose(loss_, expected_loss)
예제 #7
0
  def test_auto_reset(self):
    time_step = self.evaluate(self.random_env.reset())
    random_action = self.evaluate(
        tensor_spec.sample_spec_nest(self.action_spec, outer_dims=(1,)))

    while not time_step.is_last():
      time_step = self.evaluate(self.random_env.step(random_action))

    self.assertTrue(time_step.is_last())

    current_time_step = self.evaluate(self.random_env.current_time_step())
    self.assertTrue(current_time_step.is_last())

    first_time_step = self.evaluate(self.random_env.step(random_action))
    self.assertTrue(first_time_step.is_first())
예제 #8
0
    def testHandleBatchOnlyObservation(self):
        observation_spec = tensor_spec.BoundedTensorSpec((8, 8, 3), tf.float32,
                                                         0, 1)
        time_step_spec = ts.time_step_spec(observation_spec)
        time_step = tensor_spec.sample_spec_nest(time_step_spec,
                                                 outer_dims=(3, ))

        net = value_rnn_network.ValueRnnNetwork(observation_spec,
                                                conv_layer_params=[(4, 2, 2)],
                                                input_fc_layer_params=(5, ),
                                                lstm_size=(7, 5),
                                                output_fc_layer_params=(3, ))

        value, _ = net(time_step.observation, time_step.step_type)
        self.assertEqual([3], value.shape.as_list())
 def testPrunes(self):
     converter = data_converter.AsNStepTransition(self._data_context,
                                                  gamma=0.5)
     my_spec = self._data_context.transition_spec.replace(
         action_step=self._data_context.transition_spec.action_step.replace(
             action={
                 'action1': tf.TensorSpec((), tf.float32),
                 'action2': tf.TensorSpec([4], tf.int32)
             }))
     transition = tensor_spec.sample_spec_nest(my_spec, outer_dims=[2])
     converted = converter(transition)
     expected = tf.nest.map_structure(lambda x: x, transition)
     del expected.action_step.action['action2']
     (expected, converted) = self.evaluate((expected, converted))
     tf.nest.map_structure(self.assertAllEqual, converted, expected)
예제 #10
0
 def testCreateFeedForwardCommonTowerNetworkWithEmptyArmLayers(
         self, batch_size, feature_dim, num_actions):
     obs_spec = bandit_spec_utils.create_per_arm_observation_spec(
         7, feature_dim, num_actions)
     net = gafn.create_feed_forward_common_tower_network(
         obs_spec,
         global_layers=(4, 3, 2),
         arm_layers=(),
         common_layers=(7, 6, 5))
     input_nest = tensor_spec.sample_spec_nest(obs_spec,
                                               outer_dims=(batch_size, ))
     output, _ = net(input_nest)
     self.evaluate(tf.compat.v1.global_variables_initializer())
     output = self.evaluate(output)
     self.assertAllEqual(output.shape, (batch_size, num_actions))
예제 #11
0
  def testAgentFollowsActionSpec(self, agent_class):
    agent = agent_class(
        self._time_step_spec,
        self._action_spec,
        q_network=q_network.QNetwork(self._observation_spec, self._action_spec),
        optimizer=None)
    self.assertTrue(agent.policy() is not None)
    policy = agent.policy()
    observation = tensor_spec.sample_spec_nest(
        self._time_step_spec, seed=42, outer_dims=(1,))
    action_op = policy.action(observation).action
    self.evaluate(tf.initialize_all_variables())

    action = self.evaluate(action_op)
    self.assertEqual([1] + self._action_spec[0].shape.as_list(),
                     list(action[0].shape))
예제 #12
0
    def test_state_saved_after_step(self):
        self.evaluate(self.random_env.reset())
        random_action = self.evaluate(
            tensor_spec.sample_spec_nest(self.action_spec, outer_dims=(1, )))

        expected_time_step = self.evaluate(self.random_env.step(random_action))
        current_time_step = self.evaluate(self.random_env.current_time_step())

        np.testing.assert_almost_equal(expected_time_step.step_type,
                                       current_time_step.step_type)
        np.testing.assert_almost_equal(expected_time_step.observation,
                                       current_time_step.observation)
        np.testing.assert_almost_equal(expected_time_step.discount,
                                       current_time_step.discount)
        np.testing.assert_almost_equal(expected_time_step.reward,
                                       current_time_step.reward)
예제 #13
0
    def testBuildsScalarContinuousActionSpace(self):
        observation_spec = tensor_spec.BoundedTensorSpec((8, 8, 3), tf.float32,
                                                         0, 1)
        time_step_spec = ts.time_step_spec(observation_spec)
        time_step = tensor_spec.sample_spec_nest(time_step_spec,
                                                 outer_dims=(1, ))

        action_spec = tensor_spec.BoundedTensorSpec((), tf.float32, 2, 3)

        net = actor_distribution_network.ActorDistributionNetwork(
            observation_spec, action_spec)

        action_distributions, _ = net(time_step.observation,
                                      time_step.step_type, ())
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertEqual([1], action_distributions.mode().shape.as_list())
예제 #14
0
    def testBuilds(self):
        observation_spec = tensor_spec.BoundedTensorSpec((8, 8, 3), tf.float32,
                                                         0, 1)
        time_step_spec = ts.time_step_spec(observation_spec)
        time_step = tensor_spec.sample_spec_nest(time_step_spec,
                                                 outer_dims=(1, 3))

        net = value_rnn_network.ValueRnnNetwork(observation_spec,
                                                conv_layer_params=[(4, 2, 2)],
                                                input_fc_layer_params=(5, ),
                                                lstm_size=(7, ),
                                                output_fc_layer_params=(3, ))

        value, state = net(time_step.observation,
                           step_type=time_step.step_type,
                           network_state=net.get_initial_state(batch_size=1))
        self.evaluate(tf.compat.v1.global_variables_initializer())

        self.assertEqual((1, 3), value.shape)

        self.assertEqual(11, len(net.variables))
        # Conv Net Kernel
        self.assertEqual((2, 2, 3, 4), net.variables[0].shape)
        # Conv Net bias
        self.assertEqual((4, ), net.variables[1].shape)
        # Fc Kernel
        self.assertEqual((64, 5), net.variables[2].shape)
        # Fc Bias
        self.assertEqual((5, ), net.variables[3].shape)
        # LSTM Cell Kernel
        self.assertEqual((5, 28), net.variables[4].shape)
        # LSTM Cell Recurrent Kernel
        self.assertEqual((7, 28), net.variables[5].shape)
        # LSTM Cell Bias
        self.assertEqual((28, ), net.variables[6].shape)
        # Fc Kernel
        self.assertEqual((7, 3), net.variables[7].shape)
        # Fc Bias
        self.assertEqual((3, ), net.variables[8].shape)
        # Value Shrink Kernel
        self.assertEqual((3, 1), net.variables[9].shape)
        # Value Shrink bias
        self.assertEqual((1, ), net.variables[10].shape)

        # Assert LSTM cell is created.
        self.assertEqual((1, 7), state[0].shape)
        self.assertEqual((1, 7), state[1].shape)
예제 #15
0
    def testAgentFollowsActionSpecWithScalarAction(self, agent_class):
        action_spec = [tensor_spec.BoundedTensorSpec((), tf.int32, 0, 1)]
        agent = agent_class(self._time_step_spec,
                            action_spec,
                            q_network=q_network.QNetwork(
                                self._observation_spec, action_spec),
                            optimizer=None)
        self.assertIsNotNone(agent.policy)
        policy = agent.policy
        observation = tensor_spec.sample_spec_nest(self._time_step_spec,
                                                   seed=42,
                                                   outer_dims=(1, ))

        action_op = policy.action(observation).action
        self.evaluate(tf.compat.v1.initialize_all_variables())
        action = self.evaluate(action_op)
        self.assertEqual([1] + action_spec[0].shape.as_list(),
                         list(action[0].shape))
예제 #16
0
  def _action(self, time_step, policy_state, seed):
    outer_dims = nest_utils.get_outer_shape(time_step, self._time_step_spec)

    action_ = tensor_spec.sample_spec_nest(
        self._action_spec, seed=seed, outer_dims=outer_dims)
    # TODO(b/78181147): Investigate why this control dependency is required.
    if time_step is not None:
      with tf.control_dependencies(tf.nest.flatten(time_step)):
        action_ = tf.nest.map_structure(tf.identity, action_)
    step = policy_step.PolicyStep(action_, policy_state)

    if self.emit_log_probability:
      action_probability = tf.nest.map_structure(_uniform_probability,
                                                 self._action_spec)
      log_probability = tf.nest.map_structure(tf.math.log, action_probability)
      info = policy_step.PolicyInfo(log_probability=log_probability)
      return step._replace(info=info)

    return step
예제 #17
0
def make_random_trajectory():
    """Creates a random trajectory.

  This trajectory contains Tensors shaped `[1, 6, ...]` where `1` is the batch
  and `6` is the number of time steps.

  Observations are unbounded but actions are bounded to take values within
  `[1, 2]`.

  Policy info is also provided, and is equal to the actions.  It can be removed
  via:

  ```python
  traj = make_random_trajectory().clone(policy_info=())
  ```

  Returns:
    A `Trajectory`.
  """
    time_step_spec = ts.time_step_spec(
        tensor_spec.TensorSpec([], tf.int32, name='observation'))
    action_spec = tensor_spec.BoundedTensorSpec([],
                                                tf.int32,
                                                minimum=1,
                                                maximum=2,
                                                name='action')
    # info and policy state specs match that of TFPolicyMock.
    outer_dims = [1, 6]  # (batch_size, time)
    traj = trajectory.Trajectory(
        observation=tensor_spec.sample_spec_nest(time_step_spec.observation,
                                                 outer_dims=outer_dims),
        action=tensor_spec.sample_bounded_spec(action_spec,
                                               outer_dims=outer_dims),
        policy_info=tensor_spec.sample_bounded_spec(action_spec,
                                                    outer_dims=outer_dims),
        reward=tf.fill(outer_dims, tf.constant(0, dtype=tf.float32)),
        # step_type is F M L F M L.
        step_type=tf.reshape(tf.range(0, 6) % 3, outer_dims),
        # next_step_type is M L F M L F.
        next_step_type=tf.reshape(tf.range(1, 7) % 3, outer_dims),
        discount=tf.fill(outer_dims, tf.constant(1, dtype=tf.float32)),
    )
    return traj, time_step_spec, action_spec
예제 #18
0
  def testPolicySaverCompatibility(self):
    observation_spec = tensor_spec.TensorSpec(shape=(100,), dtype=tf.float32)
    action_spec = tensor_spec.TensorSpec(shape=(5,), dtype=tf.float32)
    time_step_tensor_spec = ts.time_step_spec(observation_spec)
    net = ActorNetwork(observation_spec, action_spec)
    net.create_variables()
    policy = actor_policy.ActorPolicy(time_step_tensor_spec, action_spec, net)

    sample = tensor_spec.sample_spec_nest(
        time_step_tensor_spec, outer_dims=(5,))

    policy.action(sample)

    train_step = common.create_variable('train_step')
    saver = policy_saver.PolicySaver(policy, train_step=train_step)
    self.initialize_v1_variables()

    with self.cached_session():
      saver.save(os.path.join(FLAGS.test_tmpdir, 'sequential_layer_model'))
예제 #19
0
 def testProcessExperiencePerArmFeaturesWithMask(self):
     mask_spec = tensor_spec.BoundedTensorSpec(shape=(5, ),
                                               minimum=0,
                                               maximum=1,
                                               dtype=tf.int32)
     observation_spec = ({
         'global':
         tf.TensorSpec(shape=(4, ), dtype=tf.float32),
         'per_arm': {
             'f1': tf.TensorSpec(shape=(5, ), dtype=tf.string),
             'f2': tf.TensorSpec(shape=(5, 2), dtype=tf.int32)
         }
     }, mask_spec)
     time_step_spec = time_step.time_step_spec(observation_spec)
     policy_info_spec = policy_utilities.PerArmPolicyInfo(
         chosen_arm_features={
             'f1': tf.TensorSpec(shape=(), dtype=tf.string),
             'f2': tf.TensorSpec(shape=(2, ), dtype=tf.int32)
         })
     training_data_spec = trajectory.Trajectory(
         step_type=time_step_spec.step_type,
         observation=time_step_spec.observation,
         action=tensor_spec.BoundedTensorSpec(shape=(),
                                              minimum=0,
                                              maximum=4,
                                              dtype=tf.int32),
         policy_info=policy_info_spec,
         next_step_type=time_step_spec.step_type,
         reward=tensor_spec.BoundedTensorSpec(shape=(),
                                              minimum=0,
                                              maximum=2,
                                              dtype=tf.float32),
         discount=time_step_spec.discount)
     experience = tensor_spec.sample_spec_nest(training_data_spec,
                                               outer_dims=(7, 2))
     observation, action, reward = utils.process_experience_for_neural_agents(
         experience, lambda x: (x[0], x[1]), True, training_data_spec)
     self.assertEqual(
         observation['per_arm']['f1'][0],
         experience.policy_info.chosen_arm_features['f1'][0, 0])
     self.assertAllEqual(action, tf.zeros(14, dtype=tf.int32))
     self.assertEqual(reward[0], experience.reward[0, 0])
예제 #20
0
    def testNestSample(self, dtype):
        if dtype == tf.string:
            self.skipTest("Not compatible with string type.")
        nested_spec = example_nested_tensor_spec(dtype)
        sample = tensor_spec.sample_spec_nest(nested_spec)
        spec_1 = tensor_spec.BoundedTensorSpec.from_spec(nested_spec["spec_1"])
        bounded_spec_1 = nested_spec["bounded_spec_1"]
        sample_ = self.evaluate(sample)
        self.assertTrue(np.all(sample_["spec_1"] >= spec_1.minimum))
        self.assertTrue(np.all(sample_["spec_1"] <= spec_1.maximum))

        self.assertTrue(
            np.all(sample_["bounded_spec_1"] >= bounded_spec_1.minimum))
        self.assertTrue(
            np.all(sample_["bounded_spec_1"] <= bounded_spec_1.maximum))

        self.assertIn("spec_2", sample_["dict_spec"])
        tensor_spec_2 = sample_["dict_spec"]["spec_2"]
        self.assertTrue(np.all(tensor_spec_2 >= spec_1.minimum))
        self.assertTrue(np.all(tensor_spec_2 <= spec_1.maximum))
        self.assertIn("bounded_spec_2", sample_["dict_spec"])
        sampled_bounded_spec_2 = sample_["dict_spec"]["bounded_spec_2"]
        self.assertTrue(np.all(sampled_bounded_spec_2 >= spec_1.minimum))
        self.assertTrue(np.all(sampled_bounded_spec_2 <= spec_1.maximum))

        self.assertIn("tuple_spec", sample_)
        self.assertTrue(np.all(sample_["tuple_spec"][0] >= spec_1.minimum))
        self.assertTrue(np.all(sample_["tuple_spec"][0] <= spec_1.maximum))
        self.assertTrue(
            np.all(sample_["tuple_spec"][1] >= bounded_spec_1.minimum))
        self.assertTrue(
            np.all(sample_["tuple_spec"][1] <= bounded_spec_1.maximum))

        self.assertIn("list_spec", sample_)
        self.assertTrue(np.all(sample_["list_spec"][0] >= spec_1.minimum))
        self.assertTrue(np.all(sample_["list_spec"][0] <= spec_1.maximum))
        self.assertTrue(np.all(sample_["list_spec"][1][0] >= spec_1.minimum))
        self.assertTrue(np.all(sample_["list_spec"][1][0] <= spec_1.maximum))
        self.assertTrue(
            np.all(sample_["list_spec"][1][1] >= bounded_spec_1.minimum))
        self.assertTrue(
            np.all(sample_["list_spec"][1][1] <= bounded_spec_1.maximum))
예제 #21
0
    def test_dict_spec_and_pre_processing(self):
        input_spec = {
            'a': tensor_spec.TensorSpec((32, 32, 3), tf.float32),
            'b': tensor_spec.TensorSpec((32, 32, 3), tf.float32)
        }
        network = encoding_network.EncodingNetwork(
            input_spec,
            preprocessing_layers={
                'a': tf.keras.layers.Flatten(),
                'b': tf.keras.layers.Flatten()
            },
            fc_layer_params=(),
            preprocessing_combiner=tf.keras.layers.Concatenate(axis=-1),
            activation_fn=tf.keras.activations.tanh,
        )

        sample_input = tensor_spec.sample_spec_nest(input_spec)
        output, _ = network(sample_input)
        # 6144 is the shape from a concat of flat (32, 32, 3) x2.
        self.assertEqual((6144, ), output.shape)
예제 #22
0
  def testHandlePreprocessingLayers(self):
    observation_spec = (tensor_spec.TensorSpec([1], tf.float32),
                        tensor_spec.TensorSpec([], tf.float32))
    observation = tensor_spec.sample_spec_nest(
        observation_spec, outer_dims=(3,))

    preprocessing_layers = (tf.keras.layers.Dense(4),
                            tf.keras.Sequential([
                                tf.keras.layers.Reshape((1,)),
                                tf.keras.layers.Dense(4)
                            ]))

    net = value_network.ValueNetwork(
        observation_spec,
        preprocessing_layers=preprocessing_layers,
        preprocessing_combiner=tf.keras.layers.Add())

    value, _ = net(observation)
    self.assertEqual([3], value.shape.as_list())
    self.assertGreater(len(net.trainable_variables), 4)
    def create_variables(self):
        if not self.built:
            random_input = tensor_spec.sample_spec_nest(self.input_tensor_spec,
                                                        outer_dims=(1, ))
            step_type = tf.expand_dims(time_step.StepType.FIRST, 0)
            output_tensors = self.__call__(random_input, step_type, None)

            with tf.variable_scope(self._name):
                scope = tf.get_variable_scope()
                self._weights = framework.get_variables(scope=scope)
                self._trainable_weights = framework.get_trainable_variables(
                    scope=scope)
                self._non_trainable_weights = [
                    var for var in self._weights
                    if var not in self._trainable_weights
                ]

            if self._output_tensor_spec is None:
                self._output_tensor_spec = nest.map_structure(
                    lambda t: tensor_spec.TensorSpec.from_tensor(
                        tf.squeeze(t, axis=0), name=t.name), output_tensors)
예제 #24
0
  def testBuilds(self):
    observation_spec = tensor_spec.BoundedTensorSpec((8, 8, 3), tf.float32, 0,
                                                     1)
    time_step_spec = ts.time_step_spec(observation_spec)
    time_step = tensor_spec.sample_spec_nest(time_step_spec, outer_dims=(1,))

    action_spec = [
        tensor_spec.BoundedTensorSpec((2,), tf.float32, 2, 3),
        tensor_spec.BoundedTensorSpec((3,), tf.int32, 0, 3)
    ]

    net = actor_distribution_network.ActorDistributionNetwork(
        observation_spec,
        action_spec,
        conv_layer_params=[(4, 2, 2)],
        fc_layer_params=(5,))

    action_distributions, _ = net(time_step.observation, time_step.step_type,
                                  ())
    self.evaluate(tf.compat.v1.global_variables_initializer())
    self.assertEqual([1, 2], action_distributions[0].mode().shape.as_list())
    self.assertEqual([1, 3], action_distributions[1].mode().shape.as_list())
예제 #25
0
    def testHandlePreprocessingLayers(self):
        observation_spec = (tensor_spec.TensorSpec([1], tf.float32),
                            tensor_spec.TensorSpec([], tf.float32))
        time_step_spec = ts.time_step_spec(observation_spec)
        time_step = tensor_spec.sample_spec_nest(time_step_spec,
                                                 outer_dims=(2, 3))

        preprocessing_layers = (tf.keras.layers.Dense(4),
                                tf.keras.Sequential([
                                    tf.keras.layers.Reshape((1, )),
                                    tf.keras.layers.Dense(4)
                                ]))

        net = value_rnn_network.ValueRnnNetwork(
            observation_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=tf.keras.layers.Add())

        value, _ = net(time_step.observation, time_step.step_type,
                       net.get_initial_state(batch_size=2))
        self.assertEqual([2, 3], value.shape.as_list())
        self.assertGreater(len(net.trainable_variables), 4)
예제 #26
0
    def create_variables(self):
        if not self.built:
            random_input = tensor_spec.sample_spec_nest(self.input_tensor_spec,
                                                        outer_dims=(1, ))
            step_type = tf.expand_dims(time_step.StepType.FIRST, 0)
            output_tensors = self.__call__(random_input, step_type, None)

            with tf.compat.v1.variable_scope(self._name):
                self._weights = tf.compat.v1.get_collection(
                    key=tf.compat.v1.GraphKeys.GLOBAL_VARIABLES,
                    scope=self._name)
                self._trainable_weights = tf.compat.v1.trainable_variables(
                    scope=self._name)
                self._non_trainable_weights = [
                    var for var in self._weights
                    if var not in self._trainable_weights
                ]

            if self._output_tensor_spec is None:
                self._output_tensor_spec = tf.nest.map_structure(
                    lambda t: tensor_spec.TensorSpec.from_tensor(
                        tf.squeeze(t, axis=0)), output_tensors)
예제 #27
0
    def test_auto_reset(self):
        time_step = self.evaluate(self.random_env.reset())
        random_action = self.evaluate(
            tensor_spec.sample_spec_nest(self.action_spec, outer_dims=(1, )))

        attempts = 0

        # With a 1/10 chance of resetting on each step, the probability of failure
        # after 500 attempts should be 0.9^500, roughly 1e-23. If we miss more than
        # 500 attempts, we can safely assume the test is broken.
        while not time_step.is_last() and attempts < 500:
            time_step = self.evaluate(self.random_env.step(random_action))
            attempts += 1

        self.assertLess(attempts, 500)
        self.assertTrue(time_step.is_last())

        current_time_step = self.evaluate(self.random_env.current_time_step())
        self.assertTrue(current_time_step.is_last())

        first_time_step = self.evaluate(self.random_env.step(random_action))
        self.assertTrue(first_time_step.is_first())
    def testHandlePreprocessingLayers(self, lstm_size, rnn_construction_fn):
        observation_spec = (tensor_spec.TensorSpec([1], tf.float32),
                            tensor_spec.TensorSpec([], tf.float32))
        time_step_spec = ts.time_step_spec(observation_spec)
        time_step = tensor_spec.sample_spec_nest(time_step_spec,
                                                 outer_dims=(3, 4))

        action_spec = [
            tensor_spec.BoundedTensorSpec((2, ), tf.float32, 2, 3),
            tensor_spec.BoundedTensorSpec((3, ), tf.int32, 0, 3)
        ]

        preprocessing_layers = (tf.keras.layers.Dense(4),
                                sequential_layer.SequentialLayer([
                                    tf.keras.layers.Reshape((1, )),
                                    tf.keras.layers.Dense(4)
                                ]))

        net = actor_distribution_rnn_network.ActorDistributionRnnNetwork(
            observation_spec,
            action_spec,
            preprocessing_layers=preprocessing_layers,
            lstm_size=lstm_size,
            preprocessing_combiner=tf.keras.layers.Add(),
            rnn_construction_fn=rnn_construction_fn,
            rnn_construction_kwargs={'lstm_size': 3})

        initial_state = actor_policy.ActorPolicy(time_step_spec, action_spec,
                                                 net).get_initial_state(3)
        action_distributions, _ = net(time_step.observation,
                                      time_step.step_type, initial_state)

        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertEqual([3, 4, 2],
                         action_distributions[0].mode().shape.as_list())
        self.assertEqual([3, 4, 3],
                         action_distributions[1].mode().shape.as_list())
        self.assertGreater(len(net.trainable_variables), 4)
예제 #29
0
  def testAgentTrajectoryTrain(self):
    agent = td3_agent.Td3Agent(
        self._time_step_spec,
        self._action_spec,
        critic_network=self._critic_net,
        actor_network=self._bounded_actor_net,
        actor_optimizer=tf.compat.v1.train.AdamOptimizer(0.001),
        critic_optimizer=tf.compat.v1.train.AdamOptimizer(0.001),
        )

    trajectory_spec = trajectory.Trajectory(
        step_type=self._time_step_spec.step_type,
        observation=self._time_step_spec.observation,
        action=self._action_spec,
        policy_info=(),
        next_step_type=self._time_step_spec.step_type,
        reward=tensor_spec.BoundedTensorSpec(
            [], tf.float32, minimum=0.0, maximum=1.0, name='reward'),
        discount=self._time_step_spec.discount)

    sample_trajectory_experience = tensor_spec.sample_spec_nest(
        trajectory_spec, outer_dims=(3, 2))
    agent.train(sample_trajectory_experience)
예제 #30
0
    def testBuildsStackedLstm(self):
        observation_spec = tensor_spec.BoundedTensorSpec((8, 8, 3), tf.int32,
                                                         0, 1)
        time_step_spec = ts.time_step_spec(observation_spec)
        time_step = tensor_spec.sample_spec_nest(time_step_spec,
                                                 outer_dims=(1, 3))

        net = value_rnn_network.ValueRnnNetwork(observation_spec,
                                                conv_layer_params=[(4, 2, 2)],
                                                input_fc_layer_params=(5, ),
                                                lstm_size=(7, 5),
                                                output_fc_layer_params=(3, ))

        _, state = net(time_step.observation, time_step.step_type)
        self.evaluate(tf.compat.v1.global_variables_initializer())

        # Assert LSTM cell is created.
        self.assertEqual((1, 7), state[0][0].shape)
        self.assertEqual((1, 7), state[0][1].shape)

        # Assert LSTM cell is created.
        self.assertEqual((1, 5), state[1][0].shape)
        self.assertEqual((1, 5), state[1][1].shape)