예제 #1
0
def test_action_processing_in_single_server_queue():
    """
    Tests the interpretation and application of actions in the single server queue with
    deterministic dynamics. This is validated in terms of the environment state and rewards.
    """
    # Set up single server queue with no arrivals and deterministic dynamics.
    cost_per_buffer = np.ones((1, 1))
    initial_state = (10,)
    capacity = np.ones((1, 1)) * np.inf
    demand_rate_val = 0.0
    job_conservation_flag = True

    demand_rate = np.array([demand_rate_val])[:, None]
    buffer_processing_matrix = - np.ones((1, 1))
    constituency_matrix = np.ones((1, 1))
    list_boundary_constraint_matrices = [constituency_matrix]

    # Construct environment.
    job_generator = DeterministicDiscreteReviewJobGenerator(
        sim_time_interval=1,
        demand_rate=demand_rate,
        buffer_processing_matrix=buffer_processing_matrix
    )
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 list_boundary_constraint_matrices)
    new_state_idle, reward_idle, _, _ = env.step(np.array([1, 0]))
    assert new_state_idle[0] == initial_state[0]
    assert reward_idle == initial_state[0] * -1.
    new_state_active, reward_active, _, _ = env.step(np.array([0, 1]))
    assert new_state_active[0] == initial_state[0] - 1
    assert reward_active == (initial_state[0] - 1) * -1.
예제 #2
0
def test_stepping_environment_with_action_tuple():
    """
    Tests the RL environment wrapper for the ksrs_network_model example as per the examples file
    from which the initial set up code is copied.
    see snc/stochastic_network_control/environments/examples.py for the original code.
    """
    # Set up the environment parameters.
    alpha1, alpha3 = 2, 2
    mu1, mu2, mu3, mu4 = 10, 3, 10, 3
    cost_per_buffer = np.ones((4, 1))
    initial_state = (0, 0, 0, 0)
    capacity = np.ones((4, 1)) * np.inf
    job_conservation_flag = True
    seed = 72
    demand_rate = np.array([alpha1, 0, alpha3, 0])[:, None]
    buffer_processing_matrix = np.array([[-mu1, 0, 0, 0],
                                         [mu1, -mu2, 0, 0],
                                         [0, 0, -mu3, 0],
                                         [0, 0, mu3, -mu4]])
    constituency_matrix = np.array([[1, 0, 0, 1],
                                    [0, 1, 1, 0]])

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)

    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag)

    # Set up a numpy action tuple with an action for each resource and ensure that the environment
    # is able to interpret it and update itself.
    action_tuple = (np.array([0, 1, 0]), np.array([0, 0, 1]))
    env.step(action_tuple)
예제 #3
0
def test_action_processing_with_multiple_resource_sets():
    """
    Tests the interpretation and application of actions in a more complex environment with
    deterministic dynamics. This is validated in terms of the environment state and rewards.
    """
    # Set up the environment parameters with no arrivals and deterministic dynamics.
    alpha1, alpha3 = 0, 0
    mu1, mu2, mu3, mu4 = 10, 3, 10, 3
    cost_per_buffer = np.ones((4, 1))
    initial_state = (100, 100, 100, 100)
    capacity = np.ones((4, 1)) * np.inf
    job_conservation_flag = True

    demand_rate = np.array([alpha1, 0, alpha3, 0])[:, None]
    buffer_processing_matrix = np.array([[-mu1, 0, 0, 0],
                                         [mu1, -mu2, 0, 0],
                                         [0, 0, -mu3, 0],
                                         [0, 0, mu3, -mu4]])
    constituency_matrix = np.array([[1, 0, 0, 1],
                                    [0, 1, 1, 0]])

    # Construct deterministic job generator.
    job_generator = DeterministicDiscreteReviewJobGenerator(
        sim_time_interval=1,
        demand_rate=demand_rate,
        buffer_processing_matrix=buffer_processing_matrix
    )

    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag)

    # Ensure that the idling actions do not change the environment state and are rewarded
    # accordingly.
    idle_action = (np.array([1, 0, 0]), np.array([1, 0, 0]))
    new_state_idle, reward_idle, _, _ = env.step(idle_action)
    assert all((np.array(initial_state) / sum(initial_state)) == new_state_idle)
    assert reward_idle == -1 * np.dot(initial_state, cost_per_buffer)
    # Ensure that the action which activates activities 2 and 4 is interpreted and rewarded
    # correctly.
    active_action = (np.array([0, 0, 1]), np.array([0, 1, 0]))
    new_state_active, reward_active, _, _ = env.step(active_action)
    assert np.allclose(new_state_active, np.array([100, 97, 100, 97]) / 394)
    assert reward_active == -1 * np.dot(env.state.flatten(), cost_per_buffer)
예제 #4
0
def test_state_scaling_only_for_rl():
    """
    Tests that the state of the underlying ControlledRandomWalk object is not mutated as a side
    effect of the RLControlledRandomWalk wrapper.
    """
    # Set up the environment parameters with no arrivals and deterministic dynamics.
    alpha1, alpha3 = 0, 0
    mu1, mu2, mu3, mu4 = 10, 3, 10, 3
    cost_per_buffer = np.ones((4, 1))
    initial_state = (100, 100, 100, 100)
    capacity = np.ones((4, 1)) * np.inf
    job_conservation_flag = True

    demand_rate = np.array([alpha1, 0, alpha3, 0])[:, None]
    buffer_processing_matrix = np.array([[-mu1, 0, 0, 0],
                                         [mu1, -mu2, 0, 0],
                                         [0, 0, -mu3, 0],
                                         [0, 0, mu3, -mu4]])
    constituency_matrix = np.array([[1, 0, 0, 1],
                                    [0, 1, 1, 0]])

    # Construct deterministic job generator.
    job_generator = DeterministicDiscreteReviewJobGenerator(
        sim_time_interval=1,
        demand_rate=demand_rate,
        buffer_processing_matrix=buffer_processing_matrix
    )
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag)

    # Ensure that scaling is applied to the RL state but not the underlying stated.
    assert np.all(env.state == np.array([[100], [100], [100], [100]]))
    active_action = (np.array([0, 0, 1]), np.array([0, 1, 0]))
    new_state_active, _, _, _ = env.step(active_action)
    assert np.all(env.state == np.array([[100], [97], [100], [97]]))
    assert np.allclose(new_state_active, np.array([100, 97, 100, 97]) / 394)