Example #1
0
def test_tensorflow_action_interpretation():
    """
    Tests that tuples of TensorFlow tensors can be handled correctly by the environment.
    """
    # Set up the environment parameters.
    alpha1, alpha3 = 2, 2
    mu1, mu2, mu3, mu4 = 10, 3, 10, 3
    cost_per_buffer = np.ones((4, 1))
    initial_state = (0, 0, 0, 0)
    capacity = np.ones((4, 1)) * np.inf
    job_conservation_flag = True
    seed = 72
    demand_rate = np.array([alpha1, 0, alpha3, 0])[:, None]
    buffer_processing_matrix = np.array([[-mu1, 0, 0, 0],
                                         [mu1, -mu2, 0, 0],
                                         [0, 0, -mu3, 0],
                                         [0, 0, mu3, -mu4]])
    constituency_matrix = np.array([[1, 0, 0, 1],
                                    [0, 1, 1, 0]])

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)

    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag)

    # Set up a TensorFlow action tuple with an action for each resource and ensure that the
    # environment is able to interpret it and update itself.
    action_tuple = (tf.convert_to_tensor([0, 1, 0]), tf.convert_to_tensor([0, 0, 1]))
    assert np.all(env.preprocess_action(action_tuple) == np.array([[1], [0], [1], [0]]))
def build_closed_loop_env_2_demand_buffers(demand_to_supplier_routes,
                                           constituency_matrix,
                                           initial_state=np.zeros((5, 1))):
    ind_surplus_buffers = [1, 3]
    job_gen_seed = 42
    mu = 1.5
    mud = 3
    mus = 1.5
    alpha = 0.95
    cost_per_buffer = np.array([[1], [2], [5], [3], [8]])
    demand_rate = np.array([[0], [0], [alpha], [0], [alpha]])
    buffer_processing_matrix = np.array([[-mu, -mu / 3, 0, mus, 0, 0],
                                         [2 * mu / 3, 0, -mud, 0, 0, 0],
                                         [0, 0, -mud, 0, 0, 0],
                                         [mu / 3, mu / 3, 0, 0, -mud, mus / 3],
                                         [0, 0, 0, 0, -mud, 0]])
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=job_gen_seed)
    state_initialiser = DeterministicCRWStateInitialiser(initial_state)
    cl_env = ClosedLoopCRW(demand_to_supplier_routes, ind_surplus_buffers,
                           cost_per_buffer,
                           np.ones_like(demand_rate) * np.inf,
                           constituency_matrix, job_generator,
                           state_initialiser)
    return cl_env
Example #3
0
def test_rl_env_from_snc_env_action_space_dims_simple():
    """
    Tests the formation and stability of the action space dimensions of the environment through
    the RL environment pipeline in a simple setting.
    """
    # Set up the environment parameters.
    cost_per_buffer = np.ones((1, 1))
    initial_state = (0,)
    capacity = np.ones((1, 1)) * np.inf
    demand_rate_val = 0.7
    job_conservation_flag = True
    seed = 72

    demand_rate = np.array([demand_rate_val])[:, None]
    buffer_processing_matrix = - np.ones((1, 1))
    constituency_matrix = np.ones((1, 1))
    list_boundary_constraint_matrices = [constituency_matrix]

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    assert job_generator.routes == {}
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 list_boundary_constraint_matrices)

    _, action_space_dims = rl_env_from_snc_env(env, discount_factor=0.99, for_tf_agent=False)
    _, action_space_dims_tf = rl_env_from_snc_env(env, discount_factor=0.99, for_tf_agent=True)
    assert action_space_dims == action_space_dims_tf
    assert len(env.action_vectors) == sum(action_space_dims)
Example #4
0
def test_tf_environment_wrapping():
    """
    Test wrapping the RL environment for use with TensorFlow Agents.

    Use Simple Server Queue for simplicity
    """
    # Set up single server queue.
    cost_per_buffer = np.ones((1, 1))
    initial_state = (0,)
    capacity = np.ones((1, 1)) * np.inf
    demand_rate_val = 0.7
    job_conservation_flag = True
    seed = 72

    demand_rate = np.array([demand_rate_val])[:, None]
    buffer_processing_matrix = - np.ones((1, 1))
    constituency_matrix = np.ones((1, 1))
    list_boundary_constraint_matrices = [constituency_matrix]

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    assert job_generator.routes == {}
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 list_boundary_constraint_matrices)

    # Try wrapping environment for tf agents.
    tf_env = TFPyEnvironment(GymWrapper(env))
    del tf_env
Example #5
0
def test_render_error():
    """
    Test the render method.

    Included to give full test coverage.
    """
    # Set up single server queue.
    cost_per_buffer = np.ones((1, 1))
    initial_state = (0,)
    capacity = np.ones((1, 1)) * np.inf
    demand_rate_val = 0.7
    job_conservation_flag = True
    seed = 72

    demand_rate = np.array([demand_rate_val])[:, None]
    buffer_processing_matrix = - np.ones((1, 1))
    constituency_matrix = np.ones((1, 1))
    list_boundary_constraint_matrices = [constituency_matrix]

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    assert job_generator.routes == {}
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 list_boundary_constraint_matrices)
    # Test that the environment fails to render. This will need to be fixed should we wish to
    # visualise episodes in future.
    pytest.raises(NotImplementedError, env.render)
Example #6
0
def test_action_interpretation_error():
    """
    Tests that when actions are not of an expected format that the correct error is thrown.
    Uses the single server queue for simplicity.
    """
    # Set up single server queue.
    cost_per_buffer = np.ones((1, 1))
    initial_state = (0,)
    capacity = np.ones((1, 1)) * np.inf
    demand_rate_val = 0.7
    job_conservation_flag = True
    seed = 72

    demand_rate = np.array([demand_rate_val])[:, None]
    buffer_processing_matrix = - np.ones((1, 1))
    constituency_matrix = np.ones((1, 1))
    list_boundary_constraint_matrices = [constituency_matrix]

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    assert job_generator.routes == {}
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 list_boundary_constraint_matrices)
    misformatted_action_tuple = (1, 2, 3)
    misformatted_action_value = int(1)
    pytest.raises(ValueError, env.preprocess_action, misformatted_action_tuple)
    pytest.raises(ValueError, env.preprocess_action, misformatted_action_value)
Example #7
0
def test_stepping_environment_with_action_tuple():
    """
    Tests the RL environment wrapper for the ksrs_network_model example as per the examples file
    from which the initial set up code is copied.
    see snc/stochastic_network_control/environments/examples.py for the original code.
    """
    # Set up the environment parameters.
    alpha1, alpha3 = 2, 2
    mu1, mu2, mu3, mu4 = 10, 3, 10, 3
    cost_per_buffer = np.ones((4, 1))
    initial_state = (0, 0, 0, 0)
    capacity = np.ones((4, 1)) * np.inf
    job_conservation_flag = True
    seed = 72
    demand_rate = np.array([alpha1, 0, alpha3, 0])[:, None]
    buffer_processing_matrix = np.array([[-mu1, 0, 0, 0],
                                         [mu1, -mu2, 0, 0],
                                         [0, 0, -mu3, 0],
                                         [0, 0, mu3, -mu4]])
    constituency_matrix = np.array([[1, 0, 0, 1],
                                    [0, 1, 1, 0]])

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)

    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag)

    # Set up a numpy action tuple with an action for each resource and ensure that the environment
    # is able to interpret it and update itself.
    action_tuple = (np.array([0, 1, 0]), np.array([0, 0, 1]))
    env.step(action_tuple)
Example #8
0
def test_simple_tensorflow_action_interpretation():
    """
    Test interpretation of action where there is only one action space. This is done in the single
    server queue setting so that there is one resource and therefore one action space.

    This tests actions as TensorFlow arrays.
    """
    # Set up single server queue.
    cost_per_buffer = np.ones((1, 1))
    initial_state = (0,)
    capacity = np.ones((1, 1)) * np.inf
    demand_rate_val = 0.7
    job_conservation_flag = True
    seed = 72

    demand_rate = np.array([demand_rate_val])[:, None]
    buffer_processing_matrix = - np.ones((1, 1))
    constituency_matrix = np.ones((1, 1))
    list_boundary_constraint_matrices = [constituency_matrix]

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    assert job_generator.routes == {}
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 list_boundary_constraint_matrices)
    action = tf.convert_to_tensor([0, 1], tf.float32)
    assert env.preprocess_action(action) == np.array([1])
Example #9
0
def test_independent_resource_actions():
    """
    Tests the RL environment wrapper for the ksrs_network_model example as per the examples file
    from which the initial set up code is copied.
    see snc/stochastic_network_control/environments/examples.py for the original code.
    """
    # Set up the environment parameters.
    alpha1, alpha3 = 2, 2
    mu1, mu2, mu3, mu4 = 10, 3, 10, 3
    cost_per_buffer = np.ones((4, 1))
    initial_state = (0, 0, 0, 0)
    capacity = np.ones((4, 1)) * np.inf
    job_conservation_flag = True
    seed = 72
    demand_rate = np.array([alpha1, 0, alpha3, 0])[:, None]
    buffer_processing_matrix = np.array([[-mu1, 0, 0, 0],
                                         [mu1, -mu2, 0, 0],
                                         [0, 0, -mu3, 0],
                                         [0, 0, mu3, -mu4]])
    constituency_matrix = np.array([[1, 0, 0, 1],
                                    [0, 1, 1, 0]])

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)

    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag)

    # Test that the wrapper sets the spaces up as we would expect.
    assert len(env._rl_action_space.spaces) == 2
    assert np.all(np.array([s.shape[1] for s in env._rl_action_space.spaces]) == 3)
    assert len(env._action_vectors) == 3 + 3
    assert env.observation_space.shape == (4,)
def test_assert_surplus_buffers_consistent_with_job_generator_wrong_one_surplus_buffer(
):
    ind_surplus_buffers = [2]
    mud = 1e2
    buffer_processing_matrix = np.array([[-10, 100, 0], [10, 0, -mud],
                                         [0, 0, -mud]])
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        np.array([[0], [0], [9]]), buffer_processing_matrix)
    assert not ControlledRandomWalk.is_surplus_buffers_consistent_with_job_generator(
        ind_surplus_buffers, job_generator.demand_nodes)
Example #11
0
def test_complex_rl_action_interpretation():
    """
    Tests the RL environment's action interpretation for the ksrs_network_model example as per the
    examples file from which the initial set up code is copied.
    See snc/stochastic_network_control/environments/examples.py for the original code.
    This tests action interpretation where each resource controls multiple activities.
    """
    # Set up model parameters
    alpha1, alpha3 = 2, 2
    mu1, mu2, mu3, mu4 = 10, 3, 10, 3
    cost_per_buffer = np.ones((4, 1))
    initial_state = (0, 0, 0, 0)
    capacity = np.ones((4, 1)) * np.inf
    job_conservation_flag = True
    seed = 72
    demand_rate = np.array([alpha1, 0, alpha3, 0])[:, None]
    buffer_processing_matrix = np.array([[-mu1, 0, 0, 0],
                                         [mu1, -mu2, 0, 0],
                                         [0, 0, -mu3, 0],
                                         [0, 0, mu3, -mu4]])
    constituency_matrix = np.array([[1, 0, 0, 1],
                                    [0, 1, 1, 0]])

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag)

    # ----------------- Enumeration of Actions -----------------
    # Zero: First group idles
    # First: First activity on (first action of first group)
    # Second: Fourth activity on (second action of first group)
    # Third: Second group idles
    # Fourth: Second activity (first action of second group)
    # Fifth: Third activity (second action of second group)

    # Build various test RL actions in increasing complexity
    idle_action = env._interpret_rl_action(np.zeros(6))
    binary_action_a = np.zeros(6)
    binary_action_a[2] = 1
    action_a = env._interpret_rl_action(binary_action_a)
    binary_action_b = np.zeros(6)
    binary_action_b[[2, 4]] = 1
    action_b = env._interpret_rl_action(binary_action_b)

    # Check that the null action, a simple action and an action combining activities across resource
    # sets is set up correctly.
    assert np.all(idle_action == np.zeros(4))
    assert np.all(action_a == np.array([0, 0, 0, 1]))
    assert np.all(action_b == np.array([0, 1, 0, 1]))
def create_pull_model(ind_surplus_buffers):
    model_type = 'pull'
    mud = 1e2
    buffer_processing_matrix = np.array([[-10, 0, 100], [10, -mud, 0],
                                         [0, -mud, 0]])
    demand_rate = np.array([0, 0, 9])[:, None]
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix)
    return ControlledRandomWalk(np.array([1, 0.5, 10])[:, None],
                                np.ones((3, 1)) * np.inf,
                                np.eye(3),
                                job_generator,
                                DeterministicCRWStateInitialiser(demand_rate),
                                model_type=model_type,
                                ind_surplus_buffers=ind_surplus_buffers)
Example #13
0
def test_rl_env_with_complex_activity_conditions():
    """
    A more complicated version of the previous test (test_rl_env_with_simple_activity_conditions).
    Tests the handling of more complex activity constraints in an 8 resource and 8 activity tandem
    model.
    """
    # Set up environment parameters.
    alpha1 = 4
    cost_per_buffer = np.ones((8, 1))
    initial_state = np.zeros((8, 1))
    capacity = np.ones((8, 1)) * np.inf
    job_conservation_flag = False
    seed = 72

    demand_rate = np.array([alpha1, 0, 0, 0, 0, 0, 0, 0])[:, None]
    buffer_processing_matrix = np.array([
        [-1, 0, 0, 0, 0, 0, 0, 0],
        [1, -1, 0, 0, 0, 0, 0, 0],
        [0, 1, -1, 0, 0, 0, 0, 0],
        [0, 0, 1, -1, 0, 0, 0, 0],
        [0, 0, 0, 1, -1, 0, 0, 0],
        [0, 0, 0, 0, 1, -1, 0, 0],
        [0, 0, 0, 0, 0, 1, -1, 0],
        [0, 0, 0, 0, 0, 0, 1, -1]
    ])

    constraints_matrix = np.array([
        [1, 1, 0, 0, 0, 0, 0, 0],
        [0, 0, 1, 1, 0, 0, 0, 0],
        [0, 1, 1, 0, 0, 0, 0, 0]
    ])
    constituency_matrix = np.vstack([np.eye(8), constraints_matrix])

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 index_phys_resources=(0, 1, 2, 3, 4, 5, 6, 7))

    # Test that action spaces are as expected.
    assert len(env._rl_action_space.spaces) == 5
    # First group has 4 resources, and 8 actions.
    assert env._rl_action_space.spaces[0].shape == (1, 8)
    # Each independent group has only two actions.
    assert env._rl_action_space.spaces[1].shape == (1, 2)
Example #14
0
def test_simple_rl_action_interpretation():
    """
    Tests the mechanism for reading in actions from an RL model.
    This utilises the 4 resource 4 activity tandem model.
    """
    # Tandem model set up without any additional activity constraints
    alpha1 = 4
    cost_per_buffer = np.ones((4, 1))
    initial_state = np.zeros((4, 1))
    capacity = np.ones((4, 1)) * np.inf
    job_conservation_flag = False
    seed = 72

    demand_rate = np.array([alpha1, 0, 0, 0])[:, None]
    buffer_processing_matrix = np.array([
        [-1, 0, 0, 0],
        [1, -1, 0, 0],
        [0, 1, -1, 0],
        [0, 0, 1, -1]
    ])

    constituency_matrix = np.eye(4)

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 index_phys_resources=(0, 1, 2, 3))

    # Build various test RL actions in increasing complexity
    null_action = env._interpret_rl_action(np.zeros(8))
    binary_action_one = np.zeros(8)
    binary_action_one[1] = 1
    action_one = env._interpret_rl_action(binary_action_one)
    binary_action_two = np.zeros(8)
    binary_action_two[[1, 3, 5, 7]] = 1
    action_two = env._interpret_rl_action(binary_action_two)

    # Test that the actions are interpreted correctly
    assert np.all(null_action == np.zeros(4))
    assert np.all(action_one == np.array([1, 0, 0, 0]))
    assert np.all(action_two == np.ones(4))
Example #15
0
def test_input_queued_switch_spaces():
    """
    Test the RL environment wrapper for the input_queued_switch_3x3_model example.
    This is a good test for resources with activity constraints.
    See snc/stochastic_network_control/environments/examples.py for the original set up code.
    """
    # Set up the environment parameters.
    mu11, mu12, mu13, mu21, mu22, mu23, mu31, mu32, mu33 = (1,) * 9
    cost_per_buffer = np.ones((9, 1))
    initial_state = np.zeros((9, 1))
    capacity = np.ones((9, 1)) * np.inf
    demand_rate = 0.3 * np.ones((9, 1))
    job_conservation_flag = True
    seed = 72
    buffer_processing_matrix = - np.diag([mu11, mu12, mu13, mu21, mu22, mu23, mu31, mu32, mu33])
    constraints = np.array([[1, 0, 0, 1, 0, 0, 1, 0, 0],
                            [0, 1, 0, 0, 1, 0, 0, 1, 0],
                            [0, 0, 1, 0, 0, 1, 0, 0, 1]])
    constituency_matrix = np.vstack([
        np.array([[1, 1, 1, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 1, 1, 1, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 1, 1, 1]]),
        constraints]
    )
    index_phys_resources = (0, 1, 2)

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 index_phys_resources=index_phys_resources)

    # Test that the environment wrapper works as expected.
    assert isinstance(env._rl_action_space, gym_spaces.Tuple)
    assert len(env._rl_action_space.spaces) == 1
    # There are overall 34 combinations of activities (or inactivity) that are feasible.
    assert len(env._action_vectors) == 34
    assert env._action_vectors.shape == (34, 9)
    assert snc.is_binary(env._action_vectors)
    assert snc.is_binary(np.matmul(env._action_vectors, constraints.T))
Example #16
0
def test_single_server_queue_spaces():
    """
    Tests the RL environment wrapper for the Single Server Queue. The set up is copied from the
    examples file: snc/stochastic_network_control/environments/examples.py
    """
    # Set up the environment parameters.
    cost_per_buffer = np.ones((1, 1))
    initial_state = (0,)
    capacity = np.ones((1, 1)) * np.inf
    demand_rate_val = 0.7
    job_conservation_flag = True
    seed = 72

    demand_rate = np.array([demand_rate_val])[:, None]
    buffer_processing_matrix = - np.ones((1, 1))
    constituency_matrix = np.ones((1, 1))
    list_boundary_constraint_matrices = [constituency_matrix]

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    assert job_generator.routes == {}
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 list_boundary_constraint_matrices)

    # Test that the wrapper sets the spaces up as we would expect.
    assert isinstance(env.action_space, gym_spaces.Tuple)
    assert len(env._rl_action_space.spaces) == 1
    assert isinstance(env._rl_action_space.spaces[0], gym_spaces.Box)
    assert env.action_space.spaces[0].shape == (1, 2)
    assert isinstance(env.activities_action_space, gym_spaces.Box)
    assert env.activities_action_space.shape == (1, 1)
    assert isinstance(env.observation_space, gym_spaces.Box)
    assert env.observation_space.shape == (1,)
Example #17
0
def test_rl_env_with_simple_activity_conditions():
    """
    Sets up a simple case of a tandem model with 4 independent resources each with one action.
    Activity constraints are then added as a way of testing the wrapper's handling of such activity
    constraints.
    """
    # Set up the environment parameters.
    alpha1 = 4
    cost_per_buffer = np.ones((4, 1))
    initial_state = np.zeros((4, 1))
    capacity = np.ones((4, 1)) * np.inf
    job_conservation_flag = False
    seed = 72

    demand_rate = np.array([alpha1, 0, 0, 0])[:, None]
    buffer_processing_matrix = np.array([
        [-1, 0, 0, 0],
        [1, -1, 0, 0],
        [0, 1, -1, 0],
        [0, 0, 1, -1]
    ])

    constituency_matrix = np.vstack([np.eye(4), np.array([[1, 1, 0, 0]])])

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 index_phys_resources=(0, 1, 2, 3))

    # Ensure that the action spaces are as expected.
    assert len(env._rl_action_space.spaces) == 3
    assert env._rl_action_space.spaces[0].shape == (1, 3)
    assert env._rl_action_space.spaces[1].shape == (1, 2)