Esempio n. 1
0
def test_reinforce_agent_init_with_multiple_resource_sets():
    """
    Tests agent set up and initialisation with multiple action subspaces (multiple resource sets).
    """
    # Set the environment name for this case as the asserts are difficult to make as variables.
    env_name = 'double_reentrant_line_shared_res_homogeneous_cost'

    # Set up environment using default parameters.
    # Environment parameters do not affect the test result here.
    tf_env, _ = rl_env_from_snc_env(load_scenario(
        env_name,
        job_gen_seed=10,
        override_env_params={'max_episode_length': 25})[1],
                                    discount_factor=0.99)

    # Instantiate and initialise a REINFORCE agent for the environment.
    reinforce_agent = create_reinforce_agent(tf_env)
    reinforce_agent.initialize()
    # Validate initialisation by checking some properties of the initalised agent.
    assert isinstance(reinforce_agent.action_spec, tuple)
    assert len(reinforce_agent.action_spec) == 2
    assert isinstance(reinforce_agent.action_spec[0], BoundedTensorSpec)
    assert isinstance(reinforce_agent.action_spec[1], BoundedTensorSpec)
    assert reinforce_agent.action_spec[0].shape == tf.TensorShape((1, 3))
    assert reinforce_agent.action_spec[1].shape == tf.TensorShape((1, 3))
    assert reinforce_agent.name == "reinforce_agent"
    assert reinforce_agent.time_step_spec == tf_env.time_step_spec()
Esempio n. 2
0
def test_reinforce_agent_play(env_name):
    """
    Extension of the agent set up and initialisation test to include playing episodes.
    """
    # Set up environment using default parameters.
    # Environment parameters do not affect the test result here.
    tf_env, _ = rl_env_from_snc_env(load_scenario(
        env_name,
        job_gen_seed=10,
        override_env_params={'max_episode_length': 25})[1],
                                    discount_factor=0.99)

    # Instantiate and initialise a REINFORCE agent.
    reinforce_agent = create_reinforce_agent(tf_env)
    reinforce_agent.initialize()

    # Reset the environment
    tf_env.reset()
    # Play 5 time steps in the environment.
    for _ in range(5):
        # Since we do not have the state stored at this point we capture it from the environment
        # fresh each time step as a TimeStep object (a named tuple).
        time_step = tf_env.current_time_step()
        # Attain our agent's action.
        action_step = reinforce_agent.collect_policy.action(time_step)
        if isinstance(action_step.action, tuple):
            action = tf.concat(action_step.action, axis=-1)
        else:
            action = action_step.action

        # Ensure that the action is binary as expected.
        assert snc.is_binary(action)

        # Play the action out in the environment.
        tf_env.step(action_step.action)
Esempio n. 3
0
def test_reinforce_agent_learning(env_name):
    """
    Extension of the test for an agent playing in the environment to include training.
    Note: This does not test that training improves the policy. It simply tests that the training
    loop runs effectively.
    """
    # Set up environment using default parameters.
    # Environment parameters do not affect the test result here.
    tf_env, _ = rl_env_from_snc_env(load_scenario(
        env_name,
        job_gen_seed=10,
        override_env_params={'max_episode_length': 25})[1],
                                    discount_factor=0.99)

    # Set up a training step counter.
    global_step = tf.compat.v1.train.get_or_create_global_step()
    # Instantiate a REINFORCE agent
    reinforce_agent = create_reinforce_agent(tf_env,
                                             training_step_counter=global_step)

    # Instantiate a replay buffer.
    replay_buffer = TFUniformReplayBuffer(
        data_spec=reinforce_agent.collect_data_spec,
        batch_size=tf_env.batch_size,
        max_length=1000)

    # Initialise the action network weights etc.
    reinforce_agent.initialize()

    # Use a driver to handle data collection for the agent. This handles a lot of the backend
    # TensorFlow set up and solves previous errors with episodes of differing lengths.
    collect_driver = DynamicEpisodeDriver(tf_env,
                                          reinforce_agent.collect_policy,
                                          observers=[replay_buffer.add_batch],
                                          num_episodes=2)

    # Get the initial states of the agent and environment before training.
    time_step = tf_env.reset()
    policy_state = reinforce_agent.collect_policy.get_initial_state(
        tf_env.batch_size)

    # Take a copy of the variables in order to ensure that training does lead to parameter changes.
    initial_vars = deepcopy(reinforce_agent.trainable_variables)
    assert len(initial_vars) > 0, "Agent has no trainable variables."

    # Set up a minimal training loop to simply test training mechanics work.
    for _ in range(5):
        # Collect experience.
        time_step, policy_state = collect_driver.run(time_step=time_step,
                                                     policy_state=policy_state)
        # Now the replay buffer should have data in it so we can collect the data and train the
        # agent.
        experience = replay_buffer.gather_all()
        reinforce_agent.train(experience)
        # Clear the replay buffer and return to play.
        replay_buffer.clear()

    # Check that training has had some effect
    for v1, v2 in zip(initial_vars, reinforce_agent.trainable_variables):
        assert not np.allclose(v1.numpy(), v2.numpy())
def get_reinforce_agent(
        env: TFPyEnvironment,
        discount_factor: float,
        debug: bool = False,
        agent_params: Optional[Dict[str, Any]] = None
    ) -> ReinforceAgent:
    """
    Builds and initialises a REINFORCE learning agent for the environment.

    :param env: The TensorFlow environment used to set up the agent with correct action spaces etc.
    :param discount_factor: The discount applied to future rewards.
    :param debug: Flag which determines whether to include extra TensorBoard logs for debugging.
    :param agent_params: A dictionary of possible overrides for the default TF-Agents agent set up.
    :return: An initialised REINFORCE agent.
    """
    # Set up a training step counter.
    global_step = tf.compat.v1.train.get_or_create_global_step()
    agent = create_reinforce_agent(
        env,
        gamma=discount_factor,
        debug=debug,
        training_step_counter=global_step,
        agent_params=agent_params
    )
    agent.initialize()
    agent.train = tf.function(agent.train)
    return agent
def test_rl_simulation_agent_serialisation():
    """
    Test the custom serialisation of the agent used when saving the state of the SNC simulator.
    The customised serialisation was required due to the inability to serialise TensorFlow objects.
    """
    # Set up the agent as before.
    seed = 72
    env = load_scenario("single_server_queue", job_gen_seed=seed).env
    rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99)
    rl_agent = create_reinforce_agent(rl_env)
    rl_agent.initialize()
    del rl_env
    sim_agent = RLSimulationAgent(env, rl_agent, normalise_obs=True)

    # Attain the dictionary representation of the agent and test that all the attributes expected
    # are present.
    serialised_agent = sim_agent.to_serializable()
    assert all(attr in serialised_agent for attr in [
        "_rl_env", "_rl_agent", "_policy", "_is_eval_policy", "env",
        "buffer_processing_matrix", "constituency_matrix", "demand_rate",
        "list_boundary_constraint_matrices", "name"
    ])
    # Ensure that the dictionary representation is compatible with the json module and the chosen
    # encoder.
    json_string = json.dumps(serialised_agent,
                             cls=NumpyEncoder,
                             indent=4,
                             sort_keys=True)
    assert bool(json_string)
def test_rl_simulation_agent_string_representation():
    """
    Tests that the string representation of the simulation agent is as expected.
    """
    # Set up the agent as before.
    seed = 72
    env = load_scenario("single_server_queue", job_gen_seed=seed).env
    rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99)
    rl_agent = create_reinforce_agent(rl_env)
    rl_agent.initialize()
    del rl_env
    sim_agent = RLSimulationAgent(env, rl_agent, normalise_obs=True)
    # Ensure that the string representation of the agent contains the instance name at the end.
    assert str(sim_agent)[-len(sim_agent.name):] == sim_agent.name
def test_rl_simulation_agent_discount_factor_reinforce():
    """
    Tests that the discount factor is passed from a REINFORCE agent to an RLSimulationAgent
    correctly.
    """
    # Set up the agent as before.
    seed = 72
    env = load_scenario("single_server_queue", job_gen_seed=seed).env
    rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99)
    reinforce_agent = create_reinforce_agent(rl_env, gamma=0.97)
    reinforce_agent.initialize()
    del rl_env
    reinforce_sim_agent = RLSimulationAgent(env,
                                            reinforce_agent,
                                            normalise_obs=True)
    assert reinforce_sim_agent.discount_factor == 0.97
Esempio n. 8
0
def test_reinforce_agent_init(env_name, expected_action_spec_shape):
    """
    Tests agent set up and initialisation.
    """
    # Set up environment using default parameters.
    # Environment parameters do not affect the test result here.
    tf_env, _ = rl_env_from_snc_env(load_scenario(
        env_name,
        job_gen_seed=10,
        override_env_params={'max_episode_length': 25})[1],
                                    discount_factor=0.99)

    # Instantiate and initialise a REINFORCE agent for the environment.
    reinforce_agent = create_reinforce_agent(tf_env)
    reinforce_agent.initialize()
    # Validate initialisation by checking some properties of the initalised agent.
    assert isinstance(reinforce_agent.action_spec, BoundedTensorSpec)
    assert reinforce_agent.action_spec.shape == expected_action_spec_shape
    assert reinforce_agent.name == "reinforce_agent"
    assert reinforce_agent.time_step_spec == tf_env.time_step_spec()
def test_rl_simulation_agent_action_mapping():
    """
    Tests that the RL Simulation Agent with the SNC interface is able to receive states and produce
    actions both of the expected type and form.
    """
    # Set up the agent as above
    seed = 72
    env = load_scenario("single_server_queue", job_gen_seed=seed).env
    rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99)
    rl_agent = create_reinforce_agent(rl_env)
    rl_agent.initialize()
    del rl_env
    sim_agent = RLSimulationAgent(env, rl_agent, normalise_obs=True)

    # Attain a state and form an action.
    state = env.reset()
    action = sim_agent.map_state_to_actions(state)
    # Ensure that the action is as expected first with a formal assertion and then by passing it
    # to the environment.
    assert isinstance(action, snc_types.ActionProcess)
    env.step(action)
def test_rl_simulation_agent_init():
    """
    Test the intitalisation of an RL agent with an interface compatible with the SNC simulator.
    """
    # To instantiate an agent from tf_agents we need an RL environment which itself requires a
    # standard SNC environment. We therefore set up an SNC environment and then wrap it for the
    # TensorFlow agent. This TF environment is later deleted since it is no longer required and to
    # ensure that it is not used inadvertently.
    seed = 72
    env = load_scenario("single_server_queue", job_gen_seed=seed).env
    rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99)
    rl_agent = create_reinforce_agent(rl_env)
    rl_agent.initialize()
    del rl_env
    # Wrapping the agent for the SNC simulator using information from the environment and the agent.
    sim_agent = RLSimulationAgent(env, rl_agent, normalise_obs=True)

    # Test that the agent has all of the attributed we want and that they are of the right type.
    assert hasattr(sim_agent, "_rl_env") and isinstance(
        sim_agent._rl_env, RLControlledRandomWalk)
    assert hasattr(sim_agent, "_rl_agent") and isinstance(
        sim_agent._rl_agent, TFAgent)
    assert hasattr(sim_agent, "_policy") and isinstance(
        sim_agent._policy, tf_policy.Base)
    assert hasattr(sim_agent, "_is_eval_policy") and isinstance(
        sim_agent._is_eval_policy, bool)
    assert hasattr(sim_agent, "env") and isinstance(sim_agent.env,
                                                    ControlledRandomWalk)
    assert hasattr(sim_agent, "buffer_processing_matrix") and isinstance(
        sim_agent.buffer_processing_matrix, snc_types.BufferMatrix)
    assert hasattr(sim_agent, "constituency_matrix") and isinstance(
        sim_agent.constituency_matrix, snc_types.ConstituencyMatrix)
    assert hasattr(sim_agent, "demand_rate") and isinstance(
        sim_agent.demand_rate, np.ndarray)
    assert hasattr(sim_agent,
                   "list_boundary_constraint_matrices") and isinstance(
                       sim_agent.list_boundary_constraint_matrices, list)
    assert hasattr(sim_agent, "name") and isinstance(sim_agent.name, str)
Esempio n. 11
0
def load_rl_agent(
        env: ControlledRandomWalk,
        rl_algorithm: str,
        load_path: str,
        discount_factor: float = 0.99,
        agent_params: Optional[Dict[str, Any]] = None) -> RLSimulationAgent:
    """
    Instantiates an RL agent in the RLSimulationAgent interface for compatibility and loads the
    weights from training into it.

    :param env: The controlled random walk environment for which the agent is required.
    :param rl_algorithm: The name of the RL algorithm used to train the agent.
    :param load_path: Path to a directory where TensorFlow checkpoints have been saved (i.e. where
        the model's weights are saved).
    :param discount_factor: A scalar discount factor to pass to the agent.
    :param agent_params: A dictionary of possible overrides for the default TF-Agents agent set up.
    :return: An RL agent initialised with saved weights ready for evaluation.
    """
    # Lazy import of TensorFlow as if no RL agent is run then it isn't needed.
    import tensorflow as tf

    # Attain a TensorFlow compatible version of the environment.
    # We need a TensorFlow environment to initialise the agent correctly.
    # First determine whether or not to normalise observations, PPO has its own normalisation so we
    # only normalise for reinforce agents or PPO agents where normalisation is turned off.
    normalise_obs = rl_algorithm == 'reinforce' or \
                    (rl_algorithm == 'ppo' and not agent_params.get('normalize_observations', True))
    tf_env, _ = rl_env.rl_env_from_snc_env(
        env, discount_factor, normalise_observations=normalise_obs)

    # Set up an enumeration of functions which build agents to allow for extending to new agents.
    # Pick out the correct RL agent from those we have implemented.
    if rl_algorithm.lower() == 'reinforce':
        agent = create_reinforce_agent(tf_env,
                                       gamma=discount_factor,
                                       agent_params=agent_params)
    elif rl_algorithm.lower() == 'ppo':
        agent = create_ppo_agent(tf_env,
                                 gamma=discount_factor,
                                 agent_params=agent_params)
    else:
        raise NotImplementedError(
            "An agent using the RL algorithm requested is not yet implemented")

    # Initialise the agent and load in parameters from the most recent save.
    # Note that this can be adjusted to load in weights from any point in training (so long as they
    # have been saved).
    agent.initialize()
    restorer = tf.train.Checkpoint(agent=agent)
    restore_manager = tf.train.CheckpointManager(restorer,
                                                 directory=load_path,
                                                 max_to_keep=20)
    restorer.listed = agent.trainable_variables
    restoration = restorer.restore(restore_manager.latest_checkpoint)
    restoration.run_restore_ops()
    # Check that the weights have been loaded and that the model from which the weights were saved
    # matches the model which they are being loaded into.
    restoration.assert_nontrivial_match()
    restoration.assert_existing_objects_matched()

    # We name the agent in line with the checkpoint used to restore the weights. This aids in
    # identifying which experiment run is being looked at from log files.
    agent_name = f"RLSimulationAgent - {restore_manager.latest_checkpoint}"

    # Finally wrap the agent for compatibility with the SNC simulator.
    simulation_agent = RLSimulationAgent(env,
                                         agent,
                                         normalise_obs,
                                         name=agent_name)
    return simulation_agent