def test_rl_simulation_agent_normalise_obs_usage_with_normalisation():
    """Ensure that the _normalise_obs property of RLSimulationAgent is used correctly."""
    # Set up the agent as before.
    seed = 72
    state = np.array([100, 100, 100, 100])
    env = load_scenario("klimov_model",
                        job_gen_seed=seed,
                        override_env_params={
                            "initial_state": state
                        }).env
    rl_env, _ = rl_env_from_snc_env(env,
                                    discount_factor=0.99,
                                    normalise_observations=True)
    ppo_agent = MagicMock()
    ppo_agent.discount_factor = 0.99
    ppo_agent._gamma = 0.99
    policy = MagicMock()
    ppo_agent.collect_policy = policy
    del rl_env
    ppo_sim_agent = RLSimulationAgent(env, ppo_agent, normalise_obs=True)
    ppo_sim_agent._rl_env.preprocess_action = MagicMock()
    ppo_sim_agent.map_state_to_actions(state)
    expected_timestep = TimeStep(step_type=StepType(0),
                                 reward=None,
                                 discount=0.99,
                                 observation=state.reshape(1, -1) /
                                 state.sum())
    assert policy.action.call_count == 1
    call_timestep = policy.action.call_args[0][0]
    assert (call_timestep.observation == expected_timestep.observation).all()
def get_environment(env_name: str,
                    agent_name: str,
                    episode_len_to_min_drain_time_ratio: float,
                    terminal_discount_factor: float = 0.7,
                    action_repetitions: int = 1,
                    parallel_environments: int = 8,
                    env_overload_params: Optional[Dict] = None,
                    agent_params: Optional[Dict] = None,
                    seed: Optional[int] = None) \
        -> Tuple[TFPyEnvironment, float, float, int, Tuple[int, ...]]:
    """
    Builds and initialises a TensorFlow environment implementation of the Single Server Queue.

    :param env_name: The name of the scenario to load. Must be in the list of implemented scenarios.
    :param agent_name: The name of the RL agent the environment is to be set up for.
    :param episode_len_to_min_drain_time_ratio: Maximum number of time steps per episode as a
        proportion of the minimal draining time.
    :param terminal_discount_factor: The discount applied to the final time step from which a
        per-step discount factor is calculated.
    :param action_repetitions: Number of time steps each selected action is repeated for.
    :param parallel_environments: Number of environments to run in parallel.
    :param env_overload_params: Dictionary of parameters to override the scenario defaults.
    :param agent_params: Optional dictionary of agent parameters the environment can be adapted for.
    :param seed: Random seed used to initialise the environment.
    :return: The environment wrapped and ready for TensorFlow Agents.
    """
    # Handle some default argument clean up.
    if env_overload_params is None:
        env_overload_params = {}

    env = scenarios.load_scenario(env_name, seed, env_overload_params).env

    if np.all(env.state_initialiser.initial_state == 0):
        env.max_episode_length = 450
    else:
        if env.state_initialiser.initial_state.ndim == 1:
            initial_state = env.state_initialiser.initial_state.reshape((-1, 1))
        else:
            initial_state = env.state_initialiser.initial_state
        minimal_draining_time = compute_minimal_draining_time_from_env_cvxpy(initial_state, env)
        env.max_episode_length = int(episode_len_to_min_drain_time_ratio * minimal_draining_time)
    discount_factor = np.exp(np.log(terminal_discount_factor) / env.max_episode_length)
    load = np.max(compute_load_workload_matrix(env).load)
    max_ep_len = env.max_episode_length

    # Allow toggling of observation normalisation in the environment.
    # The typical behaviour for PPO is that PPO normalises observations internally as necessary so
    # normalisation in the environment is not necessary.
    if agent_name == 'ppo' and agent_params.get('normalize_observations', True):
        normalise_obs_in_env = False
    else:
        normalise_obs_in_env = True

    # Wrap and parallelise environment for tf agents.
    tf_env, action_dims = rl_env_from_snc_env(env,
                                              discount_factor,
                                              action_repetitions,
                                              parallel_environments,
                                              normalise_observations=normalise_obs_in_env)
    return tf_env, discount_factor, load, max_ep_len, action_dims
Example #3
0
def test_reinforce_agent_play(env_name):
    """
    Extension of the agent set up and initialisation test to include playing episodes.
    """
    # Set up environment using default parameters.
    # Environment parameters do not affect the test result here.
    tf_env, _ = rl_env_from_snc_env(load_scenario(
        env_name,
        job_gen_seed=10,
        override_env_params={'max_episode_length': 25})[1],
                                    discount_factor=0.99)

    # Instantiate and initialise a REINFORCE agent.
    reinforce_agent = create_reinforce_agent(tf_env)
    reinforce_agent.initialize()

    # Reset the environment
    tf_env.reset()
    # Play 5 time steps in the environment.
    for _ in range(5):
        # Since we do not have the state stored at this point we capture it from the environment
        # fresh each time step as a TimeStep object (a named tuple).
        time_step = tf_env.current_time_step()
        # Attain our agent's action.
        action_step = reinforce_agent.collect_policy.action(time_step)
        if isinstance(action_step.action, tuple):
            action = tf.concat(action_step.action, axis=-1)
        else:
            action = action_step.action

        # Ensure that the action is binary as expected.
        assert snc.is_binary(action)

        # Play the action out in the environment.
        tf_env.step(action_step.action)
def test_ppo_agent_init_with_multiple_resource_sets():
    """
    Tests agent set up and initialisation with multiple action subspaces (multiple resource sets).
    """
    # Set the environment name for this case as the asserts are difficult to make as variables.
    env_name = 'double_reentrant_line_shared_res_homogeneous_cost'
    # Set up the environment parameters.
    # Environment parameters do not affect the test result here.
    tf_env, _ = rl_env_from_snc_env(load_scenario(env_name,
                                                  job_gen_seed=10)[1],
                                    discount_factor=0.99,
                                    normalise_observations=False)

    # Instantiate and initialise a PPO agent for the environment.
    ppo_agent = create_ppo_agent(tf_env, num_epochs=10)
    ppo_agent.initialize()
    # Validate initialisation by checking some properties of the initialised agent.
    assert isinstance(ppo_agent.action_spec, tuple)
    assert len(ppo_agent.action_spec) == 2
    assert isinstance(ppo_agent.action_spec[0], BoundedTensorSpec)
    assert isinstance(ppo_agent.action_spec[1], BoundedTensorSpec)
    assert ppo_agent.action_spec[0].shape == tf.TensorShape((3))
    assert ppo_agent.action_spec[1].shape == tf.TensorShape((3))
    assert ppo_agent.name == "PPO_Agent"
    assert ppo_agent.time_step_spec == tf_env.time_step_spec()
Example #5
0
def test_ppo_agent_play(env_name):
    """
    Extension of the agent set up and initialisation test to include playing episodes.
    """
    # Set up environment using default parameters.
    # Environment parameters do not affect the test result here.
    tf_env, action_dims = rl_env_from_snc_env(load_scenario(
        env_name, job_gen_seed=10)[1],
                                              discount_factor=0.99,
                                              normalise_observations=False)

    # Instantiate and initialise a PPO agent for the environment.
    ppo_agent = create_ppo_agent(tf_env, num_epochs=10)
    ppo_agent.initialize()

    # Reset the environment
    tf_env.reset()
    # Play 5 time steps in the environment.
    for _ in range(5):
        # Since we do not have the state stored at this point we capture it from the environment
        # fresh each time step as a TimeStep object (a named tuple).
        time_step = tf_env.current_time_step()
        # Attain our agent's action.
        action_step = ppo_agent.collect_policy.action(time_step)
        # Ensure that the action is one-hot as expected
        if isinstance(action_step.action, tuple):
            action = tf.concat(action_step.action, axis=-1)
        else:
            action = action_step.action

        # Ensure that the action is binary as expected.
        assert snc.is_binary(action)
        # Play the action out in the environment.
        tf_env.step(action_step.action)
Example #6
0
def test_reinforce_agent_learning(env_name):
    """
    Extension of the test for an agent playing in the environment to include training.
    Note: This does not test that training improves the policy. It simply tests that the training
    loop runs effectively.
    """
    # Set up environment using default parameters.
    # Environment parameters do not affect the test result here.
    tf_env, _ = rl_env_from_snc_env(load_scenario(
        env_name,
        job_gen_seed=10,
        override_env_params={'max_episode_length': 25})[1],
                                    discount_factor=0.99)

    # Set up a training step counter.
    global_step = tf.compat.v1.train.get_or_create_global_step()
    # Instantiate a REINFORCE agent
    reinforce_agent = create_reinforce_agent(tf_env,
                                             training_step_counter=global_step)

    # Instantiate a replay buffer.
    replay_buffer = TFUniformReplayBuffer(
        data_spec=reinforce_agent.collect_data_spec,
        batch_size=tf_env.batch_size,
        max_length=1000)

    # Initialise the action network weights etc.
    reinforce_agent.initialize()

    # Use a driver to handle data collection for the agent. This handles a lot of the backend
    # TensorFlow set up and solves previous errors with episodes of differing lengths.
    collect_driver = DynamicEpisodeDriver(tf_env,
                                          reinforce_agent.collect_policy,
                                          observers=[replay_buffer.add_batch],
                                          num_episodes=2)

    # Get the initial states of the agent and environment before training.
    time_step = tf_env.reset()
    policy_state = reinforce_agent.collect_policy.get_initial_state(
        tf_env.batch_size)

    # Take a copy of the variables in order to ensure that training does lead to parameter changes.
    initial_vars = deepcopy(reinforce_agent.trainable_variables)
    assert len(initial_vars) > 0, "Agent has no trainable variables."

    # Set up a minimal training loop to simply test training mechanics work.
    for _ in range(5):
        # Collect experience.
        time_step, policy_state = collect_driver.run(time_step=time_step,
                                                     policy_state=policy_state)
        # Now the replay buffer should have data in it so we can collect the data and train the
        # agent.
        experience = replay_buffer.gather_all()
        reinforce_agent.train(experience)
        # Clear the replay buffer and return to play.
        replay_buffer.clear()

    # Check that training has had some effect
    for v1, v2 in zip(initial_vars, reinforce_agent.trainable_variables):
        assert not np.allclose(v1.numpy(), v2.numpy())
def test_rl_simulation_agent_serialisation():
    """
    Test the custom serialisation of the agent used when saving the state of the SNC simulator.
    The customised serialisation was required due to the inability to serialise TensorFlow objects.
    """
    # Set up the agent as before.
    seed = 72
    env = load_scenario("single_server_queue", job_gen_seed=seed).env
    rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99)
    rl_agent = create_reinforce_agent(rl_env)
    rl_agent.initialize()
    del rl_env
    sim_agent = RLSimulationAgent(env, rl_agent, normalise_obs=True)

    # Attain the dictionary representation of the agent and test that all the attributes expected
    # are present.
    serialised_agent = sim_agent.to_serializable()
    assert all(attr in serialised_agent for attr in [
        "_rl_env", "_rl_agent", "_policy", "_is_eval_policy", "env",
        "buffer_processing_matrix", "constituency_matrix", "demand_rate",
        "list_boundary_constraint_matrices", "name"
    ])
    # Ensure that the dictionary representation is compatible with the json module and the chosen
    # encoder.
    json_string = json.dumps(serialised_agent,
                             cls=NumpyEncoder,
                             indent=4,
                             sort_keys=True)
    assert bool(json_string)
Example #8
0
def test_reinforce_agent_init_with_multiple_resource_sets():
    """
    Tests agent set up and initialisation with multiple action subspaces (multiple resource sets).
    """
    # Set the environment name for this case as the asserts are difficult to make as variables.
    env_name = 'double_reentrant_line_shared_res_homogeneous_cost'

    # Set up environment using default parameters.
    # Environment parameters do not affect the test result here.
    tf_env, _ = rl_env_from_snc_env(load_scenario(
        env_name,
        job_gen_seed=10,
        override_env_params={'max_episode_length': 25})[1],
                                    discount_factor=0.99)

    # Instantiate and initialise a REINFORCE agent for the environment.
    reinforce_agent = create_reinforce_agent(tf_env)
    reinforce_agent.initialize()
    # Validate initialisation by checking some properties of the initalised agent.
    assert isinstance(reinforce_agent.action_spec, tuple)
    assert len(reinforce_agent.action_spec) == 2
    assert isinstance(reinforce_agent.action_spec[0], BoundedTensorSpec)
    assert isinstance(reinforce_agent.action_spec[1], BoundedTensorSpec)
    assert reinforce_agent.action_spec[0].shape == tf.TensorShape((1, 3))
    assert reinforce_agent.action_spec[1].shape == tf.TensorShape((1, 3))
    assert reinforce_agent.name == "reinforce_agent"
    assert reinforce_agent.time_step_spec == tf_env.time_step_spec()
Example #9
0
def test_bellman_pets_agent_init_with_multiple_resource_sets():
    """
    Tests agent set up and initialisation with multiple action subspaces (multiple resource sets).
    """
    # Set the environment name for this case as the asserts are difficult to make as variables.
    env_name = 'double_reentrant_line_shared_res_homogeneous_cost'
    # Set up the environment parameters.
    # Environment parameters do not affect the test result here.
    tf_env, _ = rl_env_from_snc_env(load_scenario(env_name,
                                                  job_gen_seed=10)[1],
                                    discount_factor=0.99,
                                    normalise_observations=False)

    # Instantiate and initialise a PPO agent for the environment.
    bellman_pets_agent = create_bellman_pets_agent(
        env=tf_env,
        reward_model_class=CRWRewardModel,
        initial_state_distribution_model_class=CRWInitialStateModel,
    )

    # Validate initialisation by checking some properties of the initalised agent.
    assert isinstance(bellman_pets_agent.action_spec, tuple)
    assert len(bellman_pets_agent.action_spec) == 2
    assert isinstance(bellman_pets_agent.action_spec[0], BoundedTensorSpec)
    assert isinstance(bellman_pets_agent.action_spec[1], BoundedTensorSpec)
    assert bellman_pets_agent.action_spec[0].shape == tf.TensorShape((1, 3))
    assert bellman_pets_agent.action_spec[1].shape == tf.TensorShape((1, 3))
    assert bellman_pets_agent.name == "PETS_Agent"
    assert bellman_pets_agent.time_step_spec == tf_env.time_step_spec()
Example #10
0
def test_rl_env_normalise_obs_property():
    """
    Ensure that the normalise_obs property of RLControlledRandomWalk is set and updated correctly.
    """
    # Set the environment name for this case as the asserts are difficult to make as variables.
    env_name = 'double_reentrant_line_shared_res_homogeneous_cost'
    # Set up the environment parameters.
    # Environment parameters do not affect the test result here.
    env = load_scenario(env_name, job_gen_seed=10).env
    rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99, for_tf_agent=False)
    assert rl_env.normalise_obs is True
    rl_env.normalise_obs = False
    assert rl_env.normalise_obs is False
def test_rl_simulation_agent_string_representation():
    """
    Tests that the string representation of the simulation agent is as expected.
    """
    # Set up the agent as before.
    seed = 72
    env = load_scenario("single_server_queue", job_gen_seed=seed).env
    rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99)
    rl_agent = create_reinforce_agent(rl_env)
    rl_agent.initialize()
    del rl_env
    sim_agent = RLSimulationAgent(env, rl_agent, normalise_obs=True)
    # Ensure that the string representation of the agent contains the instance name at the end.
    assert str(sim_agent)[-len(sim_agent.name):] == sim_agent.name
Example #12
0
def test_rl_env_from_snc_env_action_space_dims_multiple_resource_sets():
    """
    Tests the formation and stability of the action space dimensions of the environment through
    the RL environment pipeline in a more complex setting.
    """
    # Set the environment name for this case as the asserts are difficult to make as variables.
    env_name = 'double_reentrant_line_shared_res_homogeneous_cost'
    # Set up the environment parameters.
    # Environment parameters do not affect the test result here.
    env = load_scenario(env_name, job_gen_seed=10).env
    rl_env, action_space_dims = rl_env_from_snc_env(env, discount_factor=0.99, for_tf_agent=False)
    _, action_space_dims_tf = rl_env_from_snc_env(env, discount_factor=0.99, for_tf_agent=True)
    assert action_space_dims == action_space_dims_tf
    assert len(rl_env.action_vectors) == sum(action_space_dims)
def test_rl_simulation_agent_discount_factor_ppo():
    """
    Tests that the discount factor is passed from a PPO agent to an RLSimulationAgent correctly.
    """
    # Set up the agent as before.
    seed = 72
    env = load_scenario("single_server_queue", job_gen_seed=seed).env
    rl_env, _ = rl_env_from_snc_env(env,
                                    discount_factor=0.99,
                                    normalise_observations=False)
    ppo_agent = create_ppo_agent(rl_env, gamma=0.90)
    ppo_agent.initialize()
    del rl_env
    ppo_sim_agent = RLSimulationAgent(env, ppo_agent, normalise_obs=False)
    assert ppo_sim_agent.discount_factor == 0.90
def test_rl_simulation_agent_normalise_obs_property():
    """Ensure that the _normalise_obs property of RLSimulationAgent is set correctly."""
    # Set up the agent as before.
    seed = 72
    env = load_scenario("single_server_queue", job_gen_seed=seed).env
    rl_env, _ = rl_env_from_snc_env(env,
                                    discount_factor=0.99,
                                    normalise_observations=False)
    ppo_agent = create_ppo_agent(rl_env, gamma=0.90)
    ppo_agent.initialize()
    del rl_env
    ppo_sim_agent = RLSimulationAgent(env, ppo_agent, normalise_obs=False)
    assert ppo_sim_agent._normalise_obs is False
    ppo_sim_agent = RLSimulationAgent(env, ppo_agent, normalise_obs=True)
    assert ppo_sim_agent._normalise_obs is True
def test_rl_simulation_agent_discount_factor_reinforce():
    """
    Tests that the discount factor is passed from a REINFORCE agent to an RLSimulationAgent
    correctly.
    """
    # Set up the agent as before.
    seed = 72
    env = load_scenario("single_server_queue", job_gen_seed=seed).env
    rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99)
    reinforce_agent = create_reinforce_agent(rl_env, gamma=0.97)
    reinforce_agent.initialize()
    del rl_env
    reinforce_sim_agent = RLSimulationAgent(env,
                                            reinforce_agent,
                                            normalise_obs=True)
    assert reinforce_sim_agent.discount_factor == 0.97
Example #16
0
def test_rl_env_normalise_obs_action():
    """
    Ensure that the normalise_obs property of RLControlledRandomWalk is used correctly.
    """
    # Set the environment name for this case as the asserts are difficult to make as variables.
    env_name = 'klimov_model'
    # Set up the environment parameters.
    # Environment parameters do not affect the test result here.
    env = load_scenario(env_name,
                        job_gen_seed=10,
                        override_env_params={"initial_state": [100, 100, 100, 100]}).env
    rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99, for_tf_agent=False)
    assert rl_env.normalise_obs is True
    s0_normalised = rl_env.reset()
    assert s0_normalised.tolist() == [0.25, 0.25, 0.25, 0.25]
    rl_env.normalise_obs = False
    s0_unnormalised = rl_env.reset()
    assert s0_unnormalised.tolist() == [100, 100, 100, 100]
Example #17
0
def test_ppo_agent_init(env_name, expected_action_spec_shape):
    """
    Tests agent set up and initialisation.
    """
    # Set up environment using default parameters.
    # Environment parameters do not affect the test result here.
    tf_env, _ = rl_env_from_snc_env(load_scenario(env_name,
                                                  job_gen_seed=10)[1],
                                    discount_factor=0.99,
                                    normalise_observations=False)

    # Instantiate and initialise a PPO agent for the environment.
    ppo_agent = create_ppo_agent(tf_env, num_epochs=10)
    ppo_agent.initialize()
    # Validate initialisation by checking relevant properties of the initalised agent.
    assert isinstance(ppo_agent.action_spec, BoundedTensorSpec)
    assert ppo_agent.action_spec.shape == expected_action_spec_shape
    assert ppo_agent.name == "PPO_Agent"
    assert ppo_agent.time_step_spec == tf_env.time_step_spec()
Example #18
0
def get_simple_link_constrained_model():
    cost_per_buffer = np.array([3, 1, 3, 1.5, 3]).reshape(-1, 1)
    param_overrides = dict(alpha1=4.8,
                           mu12=2.,
                           mu13=4.,
                           mu25=2.,
                           mu32=4.5,
                           mu34=1.8,
                           mu35=2.,
                           mu45=1.,
                           mu5=7.,
                           cost_per_buffer=cost_per_buffer)
    _, env = scenarios.load_scenario('simple_link_constrained_model', 0,
                                     param_overrides)

    _, workload_mat, nu = workload.compute_load_workload_matrix(env, 6)
    env.workload_mat = workload_mat
    env.nu = nu

    return env
Example #19
0
def test_reinforce_agent_init(env_name, expected_action_spec_shape):
    """
    Tests agent set up and initialisation.
    """
    # Set up environment using default parameters.
    # Environment parameters do not affect the test result here.
    tf_env, _ = rl_env_from_snc_env(load_scenario(
        env_name,
        job_gen_seed=10,
        override_env_params={'max_episode_length': 25})[1],
                                    discount_factor=0.99)

    # Instantiate and initialise a REINFORCE agent for the environment.
    reinforce_agent = create_reinforce_agent(tf_env)
    reinforce_agent.initialize()
    # Validate initialisation by checking some properties of the initalised agent.
    assert isinstance(reinforce_agent.action_spec, BoundedTensorSpec)
    assert reinforce_agent.action_spec.shape == expected_action_spec_shape
    assert reinforce_agent.name == "reinforce_agent"
    assert reinforce_agent.time_step_spec == tf_env.time_step_spec()
def test_rl_simulation_agent_action_mapping():
    """
    Tests that the RL Simulation Agent with the SNC interface is able to receive states and produce
    actions both of the expected type and form.
    """
    # Set up the agent as above
    seed = 72
    env = load_scenario("single_server_queue", job_gen_seed=seed).env
    rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99)
    rl_agent = create_reinforce_agent(rl_env)
    rl_agent.initialize()
    del rl_env
    sim_agent = RLSimulationAgent(env, rl_agent, normalise_obs=True)

    # Attain a state and form an action.
    state = env.reset()
    action = sim_agent.map_state_to_actions(state)
    # Ensure that the action is as expected first with a formal assertion and then by passing it
    # to the environment.
    assert isinstance(action, snc_types.ActionProcess)
    env.step(action)
def test_scenario(scenario_name, agent_class):
    """ Run a brief integration test on a given scenario
    """
    skip_tests = SKIPPED_TESTS + PULL_MODELS + PUSH_PULL_MODELS
    if scenario_name in skip_tests:
        pytest.skip()

    np.random.seed(SEED_NO)

    _, env = scenarios.load_scenario(scenario_name, SEED_NO)
    # Update parameters for quick tests.
    overrides = {
        "HedgehogHyperParams": {
            "theta_0": 0.5,
            "horizon_drain_time_ratio": 0.1,
            "horizon_mpc_ratio": 0.1,
            "minimum_horizon": 10
        },
        "AsymptoticCovarianceParams": {
            "num_presimulation_steps": 100,
            "num_batch": 20
        }
    }

    if scenario_name in MIP_REQUIRED_MODELS:
        overrides["HedgehogHyperParams"]["mpc_policy_class_name"] = "FeedbackMipFeasibleMpcPolicy"

    ac_params, wk_params, si_params, po_params, hh_params, si_class, dp_params, name \
        = load_agents.get_hedgehog_hyperparams(**overrides)
    discount_factor = 0.95
    if agent_class == BigStepHedgehogAgent:
        agent = agent_class(env, discount_factor, wk_params, hh_params, ac_params,
                            si_params, po_params, si_class, dp_params, name)
    elif agent_class == BigStepHedgehogGTOAgent:
        agent = agent_class(env, discount_factor, wk_params, hh_params, ac_params,
                            po_params, dp_params, name)
    else:
        assert False, f"Not recognised agent: {agent_class}"
    simulator = ps.SncSimulator(env, agent, discount_factor=discount_factor)
    simulator.run(num_simulation_steps=SIM_STEPS)
def test_rl_simulation_agent_init():
    """
    Test the intitalisation of an RL agent with an interface compatible with the SNC simulator.
    """
    # To instantiate an agent from tf_agents we need an RL environment which itself requires a
    # standard SNC environment. We therefore set up an SNC environment and then wrap it for the
    # TensorFlow agent. This TF environment is later deleted since it is no longer required and to
    # ensure that it is not used inadvertently.
    seed = 72
    env = load_scenario("single_server_queue", job_gen_seed=seed).env
    rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99)
    rl_agent = create_reinforce_agent(rl_env)
    rl_agent.initialize()
    del rl_env
    # Wrapping the agent for the SNC simulator using information from the environment and the agent.
    sim_agent = RLSimulationAgent(env, rl_agent, normalise_obs=True)

    # Test that the agent has all of the attributed we want and that they are of the right type.
    assert hasattr(sim_agent, "_rl_env") and isinstance(
        sim_agent._rl_env, RLControlledRandomWalk)
    assert hasattr(sim_agent, "_rl_agent") and isinstance(
        sim_agent._rl_agent, TFAgent)
    assert hasattr(sim_agent, "_policy") and isinstance(
        sim_agent._policy, tf_policy.Base)
    assert hasattr(sim_agent, "_is_eval_policy") and isinstance(
        sim_agent._is_eval_policy, bool)
    assert hasattr(sim_agent, "env") and isinstance(sim_agent.env,
                                                    ControlledRandomWalk)
    assert hasattr(sim_agent, "buffer_processing_matrix") and isinstance(
        sim_agent.buffer_processing_matrix, snc_types.BufferMatrix)
    assert hasattr(sim_agent, "constituency_matrix") and isinstance(
        sim_agent.constituency_matrix, snc_types.ConstituencyMatrix)
    assert hasattr(sim_agent, "demand_rate") and isinstance(
        sim_agent.demand_rate, np.ndarray)
    assert hasattr(sim_agent,
                   "list_boundary_constraint_matrices") and isinstance(
                       sim_agent.list_boundary_constraint_matrices, list)
    assert hasattr(sim_agent, "name") and isinstance(sim_agent.name, str)
Example #23
0
def test_bellman_pets_agent_init(env_name, expected_action_spec_shape):
    """
    Tests agent set up and initialisation.
    """
    # Set up environment using default parameters.
    # Environment parameters do not affect the test result here.
    tf_env, _ = rl_env_from_snc_env(load_scenario(env_name,
                                                  job_gen_seed=10)[1],
                                    discount_factor=0.99,
                                    normalise_observations=False)

    # Instantiate and initialise a PETS agent for the environment.
    bellman_pets_agent = create_bellman_pets_agent(
        env=tf_env,
        reward_model_class=CRWRewardModel,
        initial_state_distribution_model_class=CRWInitialStateModel,
    )

    # Validate initialisation by checking relevant properties of the initalised agent.
    assert isinstance(bellman_pets_agent.action_spec, BoundedTensorSpec)
    assert bellman_pets_agent.action_spec.shape == expected_action_spec_shape
    assert bellman_pets_agent.name == "PETS_Agent"
    assert bellman_pets_agent.time_step_spec == tf_env.time_step_spec()
def run_validation(arguments: argparse.Namespace) -> Dict[str, str]:
    """
    Run the validation on a particular scenario.

    :param arguments: Namespace of experiment parameters.
    """
    assert arguments.env_param_overrides['job_gen_seed'] is not None
    assert arguments.seed is not None
    # Note that if job_gen_seed was not in env_param_overrides, then at this point we will have:
    #   arguments.env_param_overrides['job_gen_seed'] == arguments.seed.
    job_gen_seed = arguments.env_param_overrides['job_gen_seed']
    global_seed = arguments.seed + 100
    agent_seed = arguments.seed + 200
    mpc_seed = arguments.seed + 300
    np.random.seed(global_seed)
    print(f"job_gen_seed {job_gen_seed}")
    print(f"global_seed {global_seed}")
    print(f"agent_seed {agent_seed}")
    print(f"mpc_seed {mpc_seed}")
    save_locations = dict()

    # Get Scenario
    _, env = scenarios.load_scenario(arguments.env_name, job_gen_seed,
                                     arguments.env_param_overrides)

    # Initialise an agent counter to ensure that the right checkpoint is loaded for each agent.
    rl_agent_count = 0
    for agent_name in arguments.agents:
        env.reset_with_random_state(job_gen_seed)
        agent_args = {}
        name_alias = agent_name  # Set name of folder storing results to agent_name by default.
        if agent_name in load_agents.HEDGEHOG_AGENTS:
            if arguments.hedgehog_param_overrides is None:
                arguments.hedgehog_param_overrides = dict()
            agent_args['hh_overrides'] = arguments.hedgehog_param_overrides
            agent_args['discount_factor'] = arguments.discount_factor
            agent_args['debug_info'] = arguments.debug_info
            agent_args['agent_seed'] = agent_seed
            agent_args['mpc_seed'] = mpc_seed
            # Replace directory name if name passed as an agent parameter.
            name_alias = arguments.hedgehog_param_overrides.get(
                'name', agent_name)
        elif agent_name == 'distribution_with_rebalancing_heuristic':
            agent_args['safety_stocks'] = 20 * np.ones(env.state.shape)
        elif agent_name in ['reinforce', 'ppo']:
            agent_args['discount_factor'] = arguments.discount_factor
            if arguments.rl_agent_params:
                # Update agent_args accordingly.
                if rl_agent_count < len(arguments.rl_agent_params):
                    if 'discount_factor' in arguments.rl_agent_params[
                            rl_agent_count]:
                        warn(
                            'WARNING: Overriding provided discount factor with agent specific '
                            'discount factor for {agent_name} agent')
                agent_args.update(arguments.rl_agent_params[rl_agent_count])
            else:
                if agent_name == "ppo":
                    raise ValueError(
                        "When running a PPO agent you must provide agent parameters."
                    )
                else:
                    warn("REINFORCE agent being run default agent parameters.")
            agent_args['rl_checkpoint'] = arguments.rl_checkpoints[
                rl_agent_count]
            rl_agent_count += 1
        elif agent_name == 'maxweight' or agent_name == 'scheduling_maxweight':
            if arguments.maxweight_param_overrides is None:
                arguments.maxweight_param_overrides = dict()
            agent_args['overrides'] = arguments.maxweight_param_overrides
            agent_args['agent_seed'] = agent_seed
            agent_args['mpc_seed'] = mpc_seed
            # Replace directory name if name passed as an agent parameter.
            name_alias = arguments.maxweight_param_overrides.get(
                'name', agent_name)
        else:
            agent_args['agent_seed'] = agent_seed

        agent = load_agents.get_agent(agent_name, env, **agent_args)
        sim = ps.SncSimulator(env, agent, **arguments.__dict__)

        print(f'\nSimulating {agent.name}...')
        validation_utils.print_agent_params(agent)

        is_hedgehog = isinstance(
            agent, (BigStepHedgehogAgent, PureFeedbackStationaryHedgehogAgent,
                    PureFeedbackMIPHedgehogAgent))
        save_location = f'{arguments.logdir}/{name_alias}'
        run_policy(sim, arguments.num_steps, arguments.server_mode,
                   is_hedgehog, save_location, job_gen_seed)

        if is_hedgehog:
            assert isinstance(
                agent,
                (BigStepHedgehogAgent, PureFeedbackStationaryHedgehogAgent,
                 PureFeedbackMIPHedgehogAgent))
            validation_utils.print_workload_to_physical_resources_indexes(
                agent.workload_tuple.nu)

        save_locations[agent.name] = save_location
        print(f'Data stored at: {save_location}.')
        print(f'Finished simulating {agent.name}.\n')

    print(f"job_gen_seed: {arguments.env_param_overrides.get('job_gen_seed')}")
    print("End of simulation!")
    if not arguments.server_mode:
        plt.ioff()
        plt.show()
    return save_locations