def test_initialisation_multiple_heads():
    """
    Test network initialisation with multiple action subspaces i.e. multiple heads.
    """
    # Set up the (IO) space shapes.
    observation_dim = 10
    # Ensure that the action subspace dimensions sum to the overall num_actions
    action_subspace_dimensions = (3, 5)
    input_tensor_spec = tf.TensorSpec(shape=tf.TensorShape(
        (observation_dim, )),
                                      dtype=tf.dtypes.float32,
                                      name="input")
    output_tensor_spec = (BoundedTensorSpec(shape=tf.TensorShape((3, )),
                                            dtype=tf.dtypes.float32,
                                            name="action_subspace_1",
                                            minimum=0,
                                            maximum=1),
                          BoundedTensorSpec(shape=tf.TensorShape((5, )),
                                            dtype=tf.dtypes.float32,
                                            name="action_subspace_2",
                                            minimum=0,
                                            maximum=1))
    # Use TensorSpecs to be compatible with TensorFlow.
    network = MultiHeadedCategoricalActionNetwork(
        input_tensor_spec,
        output_tensor_spec,
        action_subspace_dimensions=action_subspace_dimensions,
        hidden_units=(64, ))
    # Ensure that the network has set up some layers.
    assert hasattr(network,
                   "_shared_layers") and network._shared_layers is not None
    del network
Example #2
0
    def __init__(self,
                 batch_size,
                 episode_length,
                 obs_dim=1,
                 action_type=ActionType.Discrete):
        """Initializes the environment.

        Args:
            batch_size (int): The batch size expected for the actions and
                observations.
            episode_length (int): length of each episode
            action_type: ActionType
        """
        self._steps = 0
        self._episode_length = episode_length
        super(UnittestEnv, self).__init__()
        self._action_type = action_type
        if action_type == ActionType.Discrete:
            self._action_spec = BoundedTensorSpec(shape=(1, ),
                                                  dtype=tf.int64,
                                                  minimum=0,
                                                  maximum=1)
        else:
            self._action_spec = BoundedTensorSpec(shape=(1, ),
                                                  dtype=tf.float32,
                                                  minimum=[0],
                                                  maximum=[1])

        self._observation_spec = TensorSpec(shape=(obs_dim, ),
                                            dtype=tf.float32)
        self._batch_size = batch_size
        self.reset()
def save_model():

  optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate = 1e-3);
  obs_spec = TensorSpec((7,), dtype = tf.float32, name = 'observation');
  action_spec = BoundedTensorSpec((1,), dtype = tf.int32, minimum = 0, maximum = 3, name = 'action');
  actor_net = ActorDistributionRnnNetwork(obs_spec, action_spec, lstm_size = (100,100));
  value_net = ValueRnnNetwork(obs_spec);
  agent = ppo_agent.PPOAgent(
    time_step_spec = time_step_spec(obs_spec),
    action_spec = action_spec,
    optimizer = optimizer,
    actor_net = actor_net,
    value_net = value_net,
    normalize_observations = True,
    normalize_rewards = True,
    use_gae = True,
    num_epochs = 1,
  );
  checkpointer = Checkpointer(
    ckpt_dir = 'checkpoints/policy',
    max_to_keep = 1,
    agent = agent,
    policy = agent.policy,
    global_step = tf.compat.v1.train.get_or_create_global_step());
  checkpointer.initialize_or_restore();
  saver = policy_saver.PolicySaver(agent.policy);
  saver.save('final_policy');
def test_forward_pass_single_head():
    """
    Test a forward pass through a single headed action network.
    """
    # Set up the network as in the single-headed test above.
    num_actions = 5
    observation_dim = 10
    input_tensor_spec = tf.TensorSpec(shape=tf.TensorShape(
        (observation_dim, )),
                                      dtype=tf.dtypes.float32,
                                      name="input")
    output_tensor_spec = BoundedTensorSpec(shape=tf.TensorShape((5, )),
                                           dtype=tf.dtypes.float32,
                                           minimum=0,
                                           maximum=1,
                                           name="action")
    network = MultiHeadedCategoricalActionNetwork(
        input_tensor_spec,
        output_tensor_spec,
        action_subspace_dimensions=(num_actions, ),
        hidden_units=(64, ))
    # Test that zeros as input yields zeros as output. This follows from the biases being
    # initialised to zero.
    zeros_input = np.zeros((1, observation_dim))
    zeros_output = network(zeros_input, step_type=None)[0].logits
    assert np.all(zeros_output == 0)
    # Test that random inputs yield non-zero outputs.
    random_input = np.random.random((1, observation_dim))
    random_output = network(random_input, step_type=None)[0].logits
    assert np.all(random_output != 0)
def test_one_hot_categorical_projection_network():
    """
    Test the networks used as action heads.
    This tests initialisation and the forward pass.
    """
    # Set up for a single action head with 5 actions in the subspace.
    num_actions = 5
    sample_spec = BoundedTensorSpec(shape=tf.TensorShape((num_actions, )),
                                    dtype=tf.dtypes.float32,
                                    minimum=0,
                                    maximum=1,
                                    name="action")
    action_head = OneHotCategoricalProjectionNetwork(sample_spec, num_actions)
    # Test the initialisation.
    assert hasattr(
        action_head,
        "_projection_layer") and action_head._projection_layer is not None
    assert hasattr(action_head, "_output_spec") and isinstance(
        action_head._output_spec, DistributionSpec)
    # Test the forward pass (assuming a final output of the shared layers of dimension 64).
    inputs = tf.convert_to_tensor(np.random.random((1, 100, 64)))
    num_batch_dims = 2
    action_dist = action_head(inputs, num_batch_dims)
    assert isinstance(action_dist, tfp.distributions.OneHotCategorical)
    assert action_dist.event_shape == num_actions
    # Ensure that there are two trainable weights, the weights matrix and a bias of a single linear
    # layer.
    assert len(action_head.trainable_weights) == 2
    assert action_head.trainable_weights[0].shape == (64, 5)
    assert action_head.trainable_weights[1].shape == (5, )
def test_initialisation_single_head():
    """
    Test network initialisation with a single action subspace i.e. one head.
    """
    # Set up some (IO) space shapes.
    num_actions = 5
    observation_dim = 10
    # Use TensorSpecs to be compatible with TensorFlow.
    input_tensor_spec = tf.TensorSpec(shape=tf.TensorShape(
        (observation_dim, )),
                                      dtype=tf.dtypes.float32,
                                      name="input")
    output_tensor_spec = BoundedTensorSpec(shape=tf.TensorShape((5, )),
                                           dtype=tf.dtypes.float32,
                                           minimum=0,
                                           maximum=1,
                                           name="action")
    # Instantiate the network.
    network = MultiHeadedCategoricalActionNetwork(
        input_tensor_spec,
        output_tensor_spec,
        action_subspace_dimensions=(num_actions, ),
        hidden_units=(64, ))
    # Ensure that the network has set up some layers.
    assert hasattr(network,
                   "_shared_layers") and network._shared_layers is not None
    del network
def test_forward_pass_multiple_heads():
    """
    Test a forward pass through a multi-headed action network.
    """
    # Set up the network as in the multi-headed test above.
    batch_size = 1
    observation_dim = 10
    action_subspace_dimensions = (3, 5)
    input_tensor_spec = tf.TensorSpec(shape=tf.TensorShape(
        (observation_dim, )),
                                      dtype=tf.dtypes.float32,
                                      name="input")
    output_tensor_spec = (BoundedTensorSpec(shape=tf.TensorShape((3, )),
                                            dtype=tf.dtypes.float32,
                                            name="action_subspace_1",
                                            minimum=0,
                                            maximum=1),
                          BoundedTensorSpec(shape=tf.TensorShape((5, )),
                                            dtype=tf.dtypes.float32,
                                            name="action_subspace_2",
                                            minimum=0,
                                            maximum=1))
    network = MultiHeadedCategoricalActionNetwork(
        input_tensor_spec,
        output_tensor_spec,
        action_subspace_dimensions=action_subspace_dimensions,
        hidden_units=(64, ))
    # Test that zeros input yields zeros output as per the biases being zero.
    # Also test that the network returns values for each head and that the shapes of the outputs
    # are as we would expect. We check logits as these are the network's raw outputs.
    zeros_input = np.zeros((1, observation_dim))
    zeros_output = network(zeros_input, step_type=None)[0]
    assert len(zeros_output) == 2
    assert zeros_output[0].logits.shape == (batch_size, 1, 3)
    assert zeros_output[1].logits.shape == (batch_size, 1, 5)
    assert np.all(zeros_output[0].logits == 0) and np.all(
        zeros_output[1].logits == 0)
    # Perform the same tests with random inputs ensuring non-zero outputs.
    random_input = np.random.random((1, observation_dim))
    random_output = network(random_input, step_type=None)[0]
    assert len(random_output) == 2
    assert random_output[0].logits.shape == (batch_size, 1, 3)
    assert random_output[1].logits.shape == (batch_size, 1, 5)
    assert np.all(random_output[0].logits != 0) and np.all(
        random_output[1].logits != 0)
Example #8
0
  def __init__(self,
               gym_env,
               n_agents,
               discount=1.0,
               spec_dtype_map=None,
               match_obs_space_dtype=True,
               auto_reset=True,
               simplify_box_bounds=True):
    self.n_agents = n_agents

    super(MultiagentGymWrapper, self).__init__(
        gym_env, discount, spec_dtype_map, match_obs_space_dtype, auto_reset,
        simplify_box_bounds)

    # Create a single-agent version of the action spec and then tile it to
    # comply with tf-agents spec requirements.
    single_action_spec = BoundedTensorSpec(
        shape=(), dtype=self._action_spec.dtype, name=self._action_spec.name,
        minimum=self._action_spec.minimum, maximum=self._action_spec.maximum)
    self._action_spec = (single_action_spec,) * n_agents
NUM_EVAL_EPISODES = 1
AGENT_NAMES = ['inter0', 'inter1', 'inter2', 'coordinate']
NUM_ITERATIONS = 5
RANDOM_COLLECT_STEPS_INITIAL = 60
AGENTS_COLLECT_STEPS_INITIAL = 60
AGENTS_COLLECT_STEPS_PER_ITERATION = 0
RANDOM_COLLECT_STEPS_PER_ITERATION = 0
LOG_INTERVAL = 1
EVAL_INTERVAL = 5

# create some specifications for single agent and coordinate agent
FIXED_STEP_TYPE = tf.convert_to_tensor(0, dtype='int32')

obs_spec4independent_agent = BoundedTensorSpec(shape=(8, ),
                                               dtype=tf.float32,
                                               minimum=0,
                                               maximum=3.4028235e+38,
                                               name='observation')
act_spec4independent_agent = BoundedTensorSpec(shape=(4, ),
                                               dtype=tf.float32,
                                               minimum=0,
                                               maximum=10,
                                               name='action')
q_spec4independent_agent = BoundedTensorSpec(shape=(),
                                             dtype=tf.float32,
                                             minimum=-120,
                                             maximum=0,
                                             name='q_value')
ts_spec4independent_agent = time_step_spec(obs_spec4independent_agent)

obs_spec4coordinate_agent = BoundedTensorSpec(shape=(12, ),