def test_independent_resource_actions(): """ Tests the RL environment wrapper for the ksrs_network_model example as per the examples file from which the initial set up code is copied. see snc/stochastic_network_control/environments/examples.py for the original code. """ # Set up the environment parameters. alpha1, alpha3 = 2, 2 mu1, mu2, mu3, mu4 = 10, 3, 10, 3 cost_per_buffer = np.ones((4, 1)) initial_state = (0, 0, 0, 0) capacity = np.ones((4, 1)) * np.inf job_conservation_flag = True seed = 72 demand_rate = np.array([alpha1, 0, alpha3, 0])[:, None] buffer_processing_matrix = np.array([[-mu1, 0, 0, 0], [mu1, -mu2, 0, 0], [0, 0, -mu3, 0], [0, 0, mu3, -mu4]]) constituency_matrix = np.array([[1, 0, 0, 1], [0, 1, 1, 0]]) # Construct environment. job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator( demand_rate, buffer_processing_matrix, job_gen_seed=seed) state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state) env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator, state_initialiser, job_conservation_flag) # Test that the wrapper sets the spaces up as we would expect. assert len(env._rl_action_space.spaces) == 2 assert np.all(np.array([s.shape[1] for s in env._rl_action_space.spaces]) == 3) assert len(env._action_vectors) == 3 + 3 assert env.observation_space.shape == (4,)
def test_rl_env_from_snc_env_action_space_dims_simple(): """ Tests the formation and stability of the action space dimensions of the environment through the RL environment pipeline in a simple setting. """ # Set up the environment parameters. cost_per_buffer = np.ones((1, 1)) initial_state = (0,) capacity = np.ones((1, 1)) * np.inf demand_rate_val = 0.7 job_conservation_flag = True seed = 72 demand_rate = np.array([demand_rate_val])[:, None] buffer_processing_matrix = - np.ones((1, 1)) constituency_matrix = np.ones((1, 1)) list_boundary_constraint_matrices = [constituency_matrix] # Construct environment. job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator( demand_rate, buffer_processing_matrix, job_gen_seed=seed) assert job_generator.routes == {} state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state) env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator, state_initialiser, job_conservation_flag, list_boundary_constraint_matrices) _, action_space_dims = rl_env_from_snc_env(env, discount_factor=0.99, for_tf_agent=False) _, action_space_dims_tf = rl_env_from_snc_env(env, discount_factor=0.99, for_tf_agent=True) assert action_space_dims == action_space_dims_tf assert len(env.action_vectors) == sum(action_space_dims)
def get_default_env_params(state, buffer_processing_matrix=None): """ Default environment parameters used in the tests. """ num_buffers = state.shape[0] arrival_rate = np.ones_like(state) num_resources = num_buffers constituency_matrix = np.zeros((num_resources, num_resources)) time_interval = 1 if buffer_processing_matrix is None: # default upper triangular buffer_processing_matrix buffer_processing_matrix = -np.triu( np.ones((num_buffers, num_resources))) * 0.5 return { "cost_per_buffer": np.zeros_like(state), "capacity": np.zeros_like(state), "constituency_matrix": constituency_matrix, "job_generator": drjg.DeterministicDiscreteReviewJobGenerator( arrival_rate, buffer_processing_matrix, sim_time_interval=time_interval), "state_initialiser": si.DeterministicCRWStateInitialiser(state), "job_conservation_flag": True, "list_boundary_constraint_matrices": None, }
def test_ensure_jobs_conservation_with_multiple_demand_and_no_supply(): num_buffers = 5 num_activities = 5 mu1 = 1 mu2 = 2 mu3 = 3 mud1 = 4 mud2 = 5 buffer_processing_matrix = np.array([[-mu1, 0, 0, 0, 0], [mu1, -mu2, mu3, 0, -mud2], [0, 0, 0, 0, -mud2], [0, mu2, -mu3, -mud1, 0], [0, 0, 0, -mud1, 0]]) ind_surplus_buffers = [1, 3] s0 = stinit.DeterministicCRWStateInitialiser(np.zeros((num_buffers, 1))) job_generator = drjg(np.ones((num_buffers, 1)), buffer_processing_matrix, sim_time_interval=1) env = ControlledRandomWalk(np.ones((num_buffers, 1)), np.ones((num_buffers, 1)), np.zeros((num_buffers, num_activities)), job_generator, s0, model_type='pull', ind_surplus_buffers=ind_surplus_buffers) state = np.zeros((num_buffers, 1)) routing_jobs_matrix = env.ensure_jobs_conservation( buffer_processing_matrix, state) assert np.all(routing_jobs_matrix == np.zeros((num_buffers, num_activities)))
def get_null_env_params(state, num_resources=None, buffer_processing_matrix=None, constituency_matrix=None): num_buffers = state.shape[0] arrival_rate = np.ones_like(state) if num_resources is None: num_resources = num_buffers if buffer_processing_matrix is None: buffer_processing_matrix = -np.triu( np.ones((num_buffers, num_resources))) if constituency_matrix is None: constituency_matrix = np.zeros((num_resources, num_resources)) time_interval = 1 return { "cost_per_buffer": np.zeros_like(state), "capacity": np.zeros_like(state), "constituency_matrix": constituency_matrix, "job_generator": drjg.DeterministicDiscreteReviewJobGenerator( arrival_rate, buffer_processing_matrix, sim_time_interval=time_interval), "state_initialiser": si.DeterministicCRWStateInitialiser(state), "job_conservation_flag": True, "list_boundary_constraint_matrices": None, }
def test_render_error(): """ Test the render method. Included to give full test coverage. """ # Set up single server queue. cost_per_buffer = np.ones((1, 1)) initial_state = (0,) capacity = np.ones((1, 1)) * np.inf demand_rate_val = 0.7 job_conservation_flag = True seed = 72 demand_rate = np.array([demand_rate_val])[:, None] buffer_processing_matrix = - np.ones((1, 1)) constituency_matrix = np.ones((1, 1)) list_boundary_constraint_matrices = [constituency_matrix] # Construct environment. job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator( demand_rate, buffer_processing_matrix, job_gen_seed=seed) assert job_generator.routes == {} state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state) env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator, state_initialiser, job_conservation_flag, list_boundary_constraint_matrices) # Test that the environment fails to render. This will need to be fixed should we wish to # visualise episodes in future. pytest.raises(NotImplementedError, env.render)
def test_stepping_environment_with_action_tuple(): """ Tests the RL environment wrapper for the ksrs_network_model example as per the examples file from which the initial set up code is copied. see snc/stochastic_network_control/environments/examples.py for the original code. """ # Set up the environment parameters. alpha1, alpha3 = 2, 2 mu1, mu2, mu3, mu4 = 10, 3, 10, 3 cost_per_buffer = np.ones((4, 1)) initial_state = (0, 0, 0, 0) capacity = np.ones((4, 1)) * np.inf job_conservation_flag = True seed = 72 demand_rate = np.array([alpha1, 0, alpha3, 0])[:, None] buffer_processing_matrix = np.array([[-mu1, 0, 0, 0], [mu1, -mu2, 0, 0], [0, 0, -mu3, 0], [0, 0, mu3, -mu4]]) constituency_matrix = np.array([[1, 0, 0, 1], [0, 1, 1, 0]]) # Construct environment. job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator( demand_rate, buffer_processing_matrix, job_gen_seed=seed) state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state) env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator, state_initialiser, job_conservation_flag) # Set up a numpy action tuple with an action for each resource and ensure that the environment # is able to interpret it and update itself. action_tuple = (np.array([0, 1, 0]), np.array([0, 0, 1])) env.step(action_tuple)
def test_tf_environment_wrapping(): """ Test wrapping the RL environment for use with TensorFlow Agents. Use Simple Server Queue for simplicity """ # Set up single server queue. cost_per_buffer = np.ones((1, 1)) initial_state = (0,) capacity = np.ones((1, 1)) * np.inf demand_rate_val = 0.7 job_conservation_flag = True seed = 72 demand_rate = np.array([demand_rate_val])[:, None] buffer_processing_matrix = - np.ones((1, 1)) constituency_matrix = np.ones((1, 1)) list_boundary_constraint_matrices = [constituency_matrix] # Construct environment. job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator( demand_rate, buffer_processing_matrix, job_gen_seed=seed) assert job_generator.routes == {} state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state) env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator, state_initialiser, job_conservation_flag, list_boundary_constraint_matrices) # Try wrapping environment for tf agents. tf_env = TFPyEnvironment(GymWrapper(env)) del tf_env
def test_tensorflow_action_interpretation(): """ Tests that tuples of TensorFlow tensors can be handled correctly by the environment. """ # Set up the environment parameters. alpha1, alpha3 = 2, 2 mu1, mu2, mu3, mu4 = 10, 3, 10, 3 cost_per_buffer = np.ones((4, 1)) initial_state = (0, 0, 0, 0) capacity = np.ones((4, 1)) * np.inf job_conservation_flag = True seed = 72 demand_rate = np.array([alpha1, 0, alpha3, 0])[:, None] buffer_processing_matrix = np.array([[-mu1, 0, 0, 0], [mu1, -mu2, 0, 0], [0, 0, -mu3, 0], [0, 0, mu3, -mu4]]) constituency_matrix = np.array([[1, 0, 0, 1], [0, 1, 1, 0]]) # Construct environment. job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator( demand_rate, buffer_processing_matrix, job_gen_seed=seed) state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state) env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator, state_initialiser, job_conservation_flag) # Set up a TensorFlow action tuple with an action for each resource and ensure that the # environment is able to interpret it and update itself. action_tuple = (tf.convert_to_tensor([0, 1, 0]), tf.convert_to_tensor([0, 0, 1])) assert np.all(env.preprocess_action(action_tuple) == np.array([[1], [0], [1], [0]]))
def test_action_interpretation_error(): """ Tests that when actions are not of an expected format that the correct error is thrown. Uses the single server queue for simplicity. """ # Set up single server queue. cost_per_buffer = np.ones((1, 1)) initial_state = (0,) capacity = np.ones((1, 1)) * np.inf demand_rate_val = 0.7 job_conservation_flag = True seed = 72 demand_rate = np.array([demand_rate_val])[:, None] buffer_processing_matrix = - np.ones((1, 1)) constituency_matrix = np.ones((1, 1)) list_boundary_constraint_matrices = [constituency_matrix] # Construct environment. job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator( demand_rate, buffer_processing_matrix, job_gen_seed=seed) assert job_generator.routes == {} state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state) env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator, state_initialiser, job_conservation_flag, list_boundary_constraint_matrices) misformatted_action_tuple = (1, 2, 3) misformatted_action_value = int(1) pytest.raises(ValueError, env.preprocess_action, misformatted_action_tuple) pytest.raises(ValueError, env.preprocess_action, misformatted_action_value)
def test_simple_tensorflow_action_interpretation(): """ Test interpretation of action where there is only one action space. This is done in the single server queue setting so that there is one resource and therefore one action space. This tests actions as TensorFlow arrays. """ # Set up single server queue. cost_per_buffer = np.ones((1, 1)) initial_state = (0,) capacity = np.ones((1, 1)) * np.inf demand_rate_val = 0.7 job_conservation_flag = True seed = 72 demand_rate = np.array([demand_rate_val])[:, None] buffer_processing_matrix = - np.ones((1, 1)) constituency_matrix = np.ones((1, 1)) list_boundary_constraint_matrices = [constituency_matrix] # Construct environment. job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator( demand_rate, buffer_processing_matrix, job_gen_seed=seed) assert job_generator.routes == {} state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state) env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator, state_initialiser, job_conservation_flag, list_boundary_constraint_matrices) action = tf.convert_to_tensor([0, 1], tf.float32) assert env.preprocess_action(action) == np.array([1])
def test_controlled_random_walk_reset(): """Check that the CRW reset do reset its state and its job generator seed""" cost_per_buffer = np.array([[3.5]]) capacity = np.ones((1, 1)) * np.inf constituency_matrix = np.ones((1, 1)) demand_rate = np.array([[1000]]) buffer_processing_matrix = np.array([[5]]) initial_state = np.array([[20]]) seed = 42 job_generator = prjg(sim_time_interval=1, demand_rate=demand_rate, buffer_processing_matrix=buffer_processing_matrix, job_gen_seed=seed) initial_random_state = job_generator.np_random.get_state()[1] s0 = stinit.DeterministicCRWStateInitialiser(initial_state=initial_state) env = ControlledRandomWalk(cost_per_buffer=cost_per_buffer, capacity=capacity, constituency_matrix=constituency_matrix, job_generator=job_generator, state_initialiser=s0) next_state, _, _, _ = env.step(action=np.ones((1, 1))) next_random_state = job_generator.np_random.get_state()[1] assert np.any(next_state != initial_state) assert np.any(next_random_state != initial_random_state) env.reset_with_random_state() new_initial_state = env.state new_initial_random_state = job_generator.np_random.get_state()[1] assert np.all(new_initial_state == initial_state) assert np.all(new_initial_random_state == initial_random_state)
def test_action_processing_in_single_server_queue(): """ Tests the interpretation and application of actions in the single server queue with deterministic dynamics. This is validated in terms of the environment state and rewards. """ # Set up single server queue with no arrivals and deterministic dynamics. cost_per_buffer = np.ones((1, 1)) initial_state = (10,) capacity = np.ones((1, 1)) * np.inf demand_rate_val = 0.0 job_conservation_flag = True demand_rate = np.array([demand_rate_val])[:, None] buffer_processing_matrix = - np.ones((1, 1)) constituency_matrix = np.ones((1, 1)) list_boundary_constraint_matrices = [constituency_matrix] # Construct environment. job_generator = DeterministicDiscreteReviewJobGenerator( sim_time_interval=1, demand_rate=demand_rate, buffer_processing_matrix=buffer_processing_matrix ) state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state) env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator, state_initialiser, job_conservation_flag, list_boundary_constraint_matrices) new_state_idle, reward_idle, _, _ = env.step(np.array([1, 0])) assert new_state_idle[0] == initial_state[0] assert reward_idle == initial_state[0] * -1. new_state_active, reward_active, _, _ = env.step(np.array([0, 1])) assert new_state_active[0] == initial_state[0] - 1 assert reward_active == (initial_state[0] - 1) * -1.
def test_complex_rl_action_interpretation(): """ Tests the RL environment's action interpretation for the ksrs_network_model example as per the examples file from which the initial set up code is copied. See snc/stochastic_network_control/environments/examples.py for the original code. This tests action interpretation where each resource controls multiple activities. """ # Set up model parameters alpha1, alpha3 = 2, 2 mu1, mu2, mu3, mu4 = 10, 3, 10, 3 cost_per_buffer = np.ones((4, 1)) initial_state = (0, 0, 0, 0) capacity = np.ones((4, 1)) * np.inf job_conservation_flag = True seed = 72 demand_rate = np.array([alpha1, 0, alpha3, 0])[:, None] buffer_processing_matrix = np.array([[-mu1, 0, 0, 0], [mu1, -mu2, 0, 0], [0, 0, -mu3, 0], [0, 0, mu3, -mu4]]) constituency_matrix = np.array([[1, 0, 0, 1], [0, 1, 1, 0]]) # Construct environment. job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator( demand_rate, buffer_processing_matrix, job_gen_seed=seed) state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state) env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator, state_initialiser, job_conservation_flag) # ----------------- Enumeration of Actions ----------------- # Zero: First group idles # First: First activity on (first action of first group) # Second: Fourth activity on (second action of first group) # Third: Second group idles # Fourth: Second activity (first action of second group) # Fifth: Third activity (second action of second group) # Build various test RL actions in increasing complexity idle_action = env._interpret_rl_action(np.zeros(6)) binary_action_a = np.zeros(6) binary_action_a[2] = 1 action_a = env._interpret_rl_action(binary_action_a) binary_action_b = np.zeros(6) binary_action_b[[2, 4]] = 1 action_b = env._interpret_rl_action(binary_action_b) # Check that the null action, a simple action and an action combining activities across resource # sets is set up correctly. assert np.all(idle_action == np.zeros(4)) assert np.all(action_a == np.array([0, 0, 0, 1])) assert np.all(action_b == np.array([0, 1, 0, 1]))
def env_job_conservation(job_conservation_flag): demand_rate = np.zeros((2, 1)) cost_per_buffer = np.array([[2], [4]]) initial_state = np.array([[0], [0]]) capacity = np.array([[10], [10]]) buffer_processing_matrix = np.array([[-1, 0], [1, -1]]) job_generator = drjg(demand_rate, buffer_processing_matrix, sim_time_interval=1) s0 = stinit.DeterministicCRWStateInitialiser(initial_state) return initial_state, ControlledRandomWalk(cost_per_buffer, capacity, np.eye(2), job_generator, s0, job_conservation_flag)
def test_routing_two_serially_connected_resources_one_buffer_each(): """Two resources with 1 buffer each. There are 2 possible actions. At every iteration, each resource chooses whether to work on its buffer or idle. If it works, jobs are processed at some rate. Jobs processed from resource 1 go to the buffer at resource 2. Events are generated deterministically at the rate given by the mean rate.""" cost_per_buffer = np.array([[2], [4]]) d1 = 1 # Rate of job arrival at buffer 1 d2 = 0 # Rate of job arrival at buffer 2 demand_rate = np.array([[d1], [d2]]) initial_state = np.array([[0], [0]]) capacity = np.array([[10], [10]]) mu1 = 1 # Rate of job processing at buffer 1 mu2 = 1 # Rate of job processing at buffer 2 # Jobs processed at buffer 1 are routed to buffer 2. # Rows: buffer, columns: influence of activity. # Actions mean scheduling processing in one buffer. buffer_processing_matrix = np.array([[-mu1, 0], [mu1, -mu2]]) # activity 1 (column 0), processed job at buffer 1 (row 0) will be routed to buffer 2 (row 1). # Each row corresponds with a time-step. The resource can only work in one buffer at a time. actions = np.array([[0, 0], [1, 0], [1, 1], [0, 1], [1, 1]]) # Expected number of jobs jobs = np.array([ [1, 0], # None buffer work. A new job gets to buffer 1 (b1) [ 1, 1 ], # Process b1 so job goes to buffer 2 (b2, then new job arrives at b1. [1, 1], # Process b1 and b2, since a new job arrives at b1, and a job is # routed to b2, both buffers remain the same [ 2, 0 ], # Process b2 but not b1, so b1 accumulates one job, and b2 gets empty [2, 0] ]) # Process b1 and b2, so b1 has same number of jobs at the end, while # b2 aims to process the empty buffer and then gets one job routed from b1. # Expected cost cost = [2, 6, 6, 4, 4] job_generator = drjg(demand_rate, buffer_processing_matrix, sim_time_interval=1) assert job_generator.routes == {(1, 0): (0, 0)} s0 = stinit.DeterministicCRWStateInitialiser(initial_state) env = ControlledRandomWalk(cost_per_buffer, capacity, np.eye(2), job_generator, s0) for i in range(5): s, r, d, t = env.step(actions[i].reshape((2, 1))) assert np.all(s == jobs[i].reshape([2, 1])) np.testing.assert_approx_equal(r, -cost[i])
def test_rl_env_with_complex_activity_conditions(): """ A more complicated version of the previous test (test_rl_env_with_simple_activity_conditions). Tests the handling of more complex activity constraints in an 8 resource and 8 activity tandem model. """ # Set up environment parameters. alpha1 = 4 cost_per_buffer = np.ones((8, 1)) initial_state = np.zeros((8, 1)) capacity = np.ones((8, 1)) * np.inf job_conservation_flag = False seed = 72 demand_rate = np.array([alpha1, 0, 0, 0, 0, 0, 0, 0])[:, None] buffer_processing_matrix = np.array([ [-1, 0, 0, 0, 0, 0, 0, 0], [1, -1, 0, 0, 0, 0, 0, 0], [0, 1, -1, 0, 0, 0, 0, 0], [0, 0, 1, -1, 0, 0, 0, 0], [0, 0, 0, 1, -1, 0, 0, 0], [0, 0, 0, 0, 1, -1, 0, 0], [0, 0, 0, 0, 0, 1, -1, 0], [0, 0, 0, 0, 0, 0, 1, -1] ]) constraints_matrix = np.array([ [1, 1, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 0, 0, 0, 0], [0, 1, 1, 0, 0, 0, 0, 0] ]) constituency_matrix = np.vstack([np.eye(8), constraints_matrix]) # Construct environment. job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator( demand_rate, buffer_processing_matrix, job_gen_seed=seed) state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state) env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator, state_initialiser, job_conservation_flag, index_phys_resources=(0, 1, 2, 3, 4, 5, 6, 7)) # Test that action spaces are as expected. assert len(env._rl_action_space.spaces) == 5 # First group has 4 resources, and 8 actions. assert env._rl_action_space.spaces[0].shape == (1, 8) # Each independent group has only two actions. assert env._rl_action_space.spaces[1].shape == (1, 2)
def test_ensure_jobs_conservation_with_not_enough_jobs(): num_buffers = 2 num_activities = 2 buffer_processing_matrix = np.array([[-2, 0], [2, -3]]) s0 = stinit.DeterministicCRWStateInitialiser(np.zeros((num_buffers, 1))) job_generator = drjg(np.ones((num_buffers, 1)), buffer_processing_matrix, sim_time_interval=1) env = ControlledRandomWalk(np.ones((num_buffers, 1)), np.ones((num_buffers, 1)), np.zeros((num_buffers, num_activities)), job_generator, s0) state = np.ones((num_buffers, 1)) routing_jobs_matrix = env.ensure_jobs_conservation( buffer_processing_matrix, state) assert np.all(routing_jobs_matrix == np.array([[-1, 0], [1, -1]]))
def test_exceed_capacity_single_buffer(): """One resource with one buffer. Check number of jobs is always equal or less than maximum capacity, for the corner case when initial_state > capacity""" cost_per_buffer = np.array([[3.5]]) demand_rate = np.array([[3]]) initial_state = np.array([[10]]) capacity = np.array([[5]]) job_generator = drjg(demand_rate, -np.ones((1, 1)), sim_time_interval=1) s0 = stinit.DeterministicCRWStateInitialiser(initial_state) env = ControlledRandomWalk(cost_per_buffer, capacity, np.ones((1, 1)), job_generator, s0) action = np.ones((1, 1)) with pytest.raises(AssertionError): _ = env.step(action)
def test_scheduling_multiple_buffers_events(): """One resource (station) with 3 buffers. Thus, there are 4 possible actions namely schedule each buffer or idle. At every iteration, if the resource chooses to schedule to work one buffer, then the jobs of that buffer are removed deterministically at some processing rate. Then, new jobs arrive also at some deterministic rate. The number of jobs in a buffer is always nonnegative and less than or equal to capacity. """ cost_per_buffer = np.array([[1.1], [2.2], [3.3]]) d1 = 1 # Rate of job arrival at buffer 1 d2 = 2 # Rate of job arrival at buffer 2 d3 = 3 # Rate of job arrival at buffer 3 demand_rate = np.array([[d1], [d2], [d3]]) initial_state = np.array([[0], [0], [0]]) capacity = np.array([[40], [40], [40]]) mu1 = 1 # Rate of job processing at buffer 1 mu2 = 2 # Rate of job processing at buffer 2 mu3 = 3 # Rate of job processing at buffer 3 # Rows: buffer, columns: influence of activity. # Actions mean scheduling processing in one buffer. # There is no routing in this case, so this is a diagonal matrix. buffer_processing_matrix = np.array([[-mu1, 0, 0], [0, -mu2, 0], [0, 0, -mu3]]) # Each row corresponds with a time-step. The resource can only work in one buffer at a time. actions = np.array([[0, 0, 1], [1, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 0, 1]]) # When the resource works on a buffer, it compensate the arrivals, so the number of jobs remain # constant. Otherwise, the arrivals increases the number of jobs at the demand rate. jobs = np.array([[1, 2, 0], [1, 4, 3], [1, 6, 6], [2, 6, 9], [3, 8, 9], [4, 8, 12], [4, 10, 15], [5, 12, 15], [6, 12, 18], [6, 14, 21], [7, 16, 21]]) # Expected cost Computed as dot product of cost_per_buffer and number of jobs at each iteration. cost = [5.5, 19.8, 34.1, 45.1, 50.6, 61.6, 75.9, 81.4, 92.4, 106.7, 112.2] job_generator = drjg(demand_rate, buffer_processing_matrix, sim_time_interval=1) s0 = stinit.DeterministicCRWStateInitialiser(initial_state) env = ControlledRandomWalk(cost_per_buffer, capacity, np.ones((1, 3)), job_generator, s0) for i in range(11): s, r, d, t = env.step(actions[i].reshape((3, 1))) assert np.all(s == jobs[i].reshape([3, 1])) np.testing.assert_approx_equal(r, -cost[i])
def test_simple_rl_action_interpretation(): """ Tests the mechanism for reading in actions from an RL model. This utilises the 4 resource 4 activity tandem model. """ # Tandem model set up without any additional activity constraints alpha1 = 4 cost_per_buffer = np.ones((4, 1)) initial_state = np.zeros((4, 1)) capacity = np.ones((4, 1)) * np.inf job_conservation_flag = False seed = 72 demand_rate = np.array([alpha1, 0, 0, 0])[:, None] buffer_processing_matrix = np.array([ [-1, 0, 0, 0], [1, -1, 0, 0], [0, 1, -1, 0], [0, 0, 1, -1] ]) constituency_matrix = np.eye(4) # Construct environment. job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator( demand_rate, buffer_processing_matrix, job_gen_seed=seed) state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state) env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator, state_initialiser, job_conservation_flag, index_phys_resources=(0, 1, 2, 3)) # Build various test RL actions in increasing complexity null_action = env._interpret_rl_action(np.zeros(8)) binary_action_one = np.zeros(8) binary_action_one[1] = 1 action_one = env._interpret_rl_action(binary_action_one) binary_action_two = np.zeros(8) binary_action_two[[1, 3, 5, 7]] = 1 action_two = env._interpret_rl_action(binary_action_two) # Test that the actions are interpreted correctly assert np.all(null_action == np.zeros(4)) assert np.all(action_one == np.array([1, 0, 0, 0])) assert np.all(action_two == np.ones(4))
def test_routing_three_buffer_tree_topology(): """ Three resources with 1 buffer each, one parent with two children. There are 4 possible actions: children resources choose to idle or work on their respective buffer, while parent choose whether work and route to one child or work and route to the other. Events are generated deterministically at the rate given by the mean rate. """ cost_per_buffer = np.array([[2], [4], [4]]) d1 = 1 # Rate of job arrival at buffer 1 d2 = 0 # Rate of job arrival at buffer 2 d3 = 0 # Rate of job arrival at buffer 3 demand_rate = np.array([[d1], [d2], [d3]]) initial_state = np.array([[0], [0], [0]]) capacity = np.array([[10], [10], [10]]) mu12 = 1 # Rate of processing jobs at buffer 1 and routing to buffer 2 mu13 = 1 # Rate of processing jobs at buffer 1 and routing to buffer 3 mu2 = 1 # Rate of processing jobs at buffer 2 mu3 = 1 # Rate of processing jobs at buffer 3 # Jobs processed at buffer 1 are routed to either buffer 2 or 3. buffer_processing_matrix = np.array([[-mu12, -mu13, 0, 0], [mu12, 0, -mu2, 0], [0, mu13, 0, -mu3]]) # Each row corresponds with a time-step. The resource can only work in one buffer at a time. actions = np.array([[0, 1, 0, 0], [0, 1, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1], [1, 0, 1, 0], [1, 0, 0, 0], [0, 1, 0, 1], [1, 0, 0, 1]]) # Expected jobs jobs = np.array([[0, 0, 1], [0, 0, 2], [0, 1, 2], [0, 1, 3], [0, 1, 4], [1, 0, 4], [2, 0, 3], [2, 0, 3], [2, 1, 3], [2, 1, 3], [2, 2, 2]]) job_generator = drjg(demand_rate, buffer_processing_matrix, sim_time_interval=1) assert job_generator.routes == {(1, 0): (0, 0), (2, 1): (0, 1)} s0 = stinit.DeterministicCRWStateInitialiser(initial_state) constituency_matrix = np.array([[1, 0, 0, 0], [1, 0, 0, 0], [0, 0, 1, 1], [0, 0, 0, 1]]) env = ControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator, s0) for i in range(actions.shape[0]): s, r, d, t = env.step(actions[i].reshape((4, 1))) assert np.all(s == jobs[i].reshape([3, 1]))
def test_action_processing_with_multiple_resource_sets(): """ Tests the interpretation and application of actions in a more complex environment with deterministic dynamics. This is validated in terms of the environment state and rewards. """ # Set up the environment parameters with no arrivals and deterministic dynamics. alpha1, alpha3 = 0, 0 mu1, mu2, mu3, mu4 = 10, 3, 10, 3 cost_per_buffer = np.ones((4, 1)) initial_state = (100, 100, 100, 100) capacity = np.ones((4, 1)) * np.inf job_conservation_flag = True demand_rate = np.array([alpha1, 0, alpha3, 0])[:, None] buffer_processing_matrix = np.array([[-mu1, 0, 0, 0], [mu1, -mu2, 0, 0], [0, 0, -mu3, 0], [0, 0, mu3, -mu4]]) constituency_matrix = np.array([[1, 0, 0, 1], [0, 1, 1, 0]]) # Construct deterministic job generator. job_generator = DeterministicDiscreteReviewJobGenerator( sim_time_interval=1, demand_rate=demand_rate, buffer_processing_matrix=buffer_processing_matrix ) state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state) env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator, state_initialiser, job_conservation_flag) # Ensure that the idling actions do not change the environment state and are rewarded # accordingly. idle_action = (np.array([1, 0, 0]), np.array([1, 0, 0])) new_state_idle, reward_idle, _, _ = env.step(idle_action) assert all((np.array(initial_state) / sum(initial_state)) == new_state_idle) assert reward_idle == -1 * np.dot(initial_state, cost_per_buffer) # Ensure that the action which activates activities 2 and 4 is interpreted and rewarded # correctly. active_action = (np.array([0, 0, 1]), np.array([0, 1, 0])) new_state_active, reward_active, _, _ = env.step(active_action) assert np.allclose(new_state_active, np.array([100, 97, 100, 97]) / 394) assert reward_active == -1 * np.dot(env.state.flatten(), cost_per_buffer)
def test_scheduling_single_buffer_events_constant(): """One resource (station) with 1 buffers. Thus, there are 2 possible actions namely process or idle. At every iteration, we fill but also process the buffer, so the num of jobs remain constant = initial_state.""" cost_per_buffer = np.array([[3.5]]) demand_rate = np.array([[1]]) initial_state = np.array([[2]]) capacity = np.array([[20]]) job_generator = drjg(demand_rate, -np.ones((1, 1)), sim_time_interval=1) s0 = stinit.DeterministicCRWStateInitialiser(initial_state) env = ControlledRandomWalk(cost_per_buffer, capacity, np.ones((1, 1)), job_generator, s0) action = np.ones((1, 1)) for i in range(100): s, r, d, t = env.step(action) assert s[0, 0] == initial_state assert r == -cost_per_buffer[0] * s
def test_input_queued_switch_spaces(): """ Test the RL environment wrapper for the input_queued_switch_3x3_model example. This is a good test for resources with activity constraints. See snc/stochastic_network_control/environments/examples.py for the original set up code. """ # Set up the environment parameters. mu11, mu12, mu13, mu21, mu22, mu23, mu31, mu32, mu33 = (1,) * 9 cost_per_buffer = np.ones((9, 1)) initial_state = np.zeros((9, 1)) capacity = np.ones((9, 1)) * np.inf demand_rate = 0.3 * np.ones((9, 1)) job_conservation_flag = True seed = 72 buffer_processing_matrix = - np.diag([mu11, mu12, mu13, mu21, mu22, mu23, mu31, mu32, mu33]) constraints = np.array([[1, 0, 0, 1, 0, 0, 1, 0, 0], [0, 1, 0, 0, 1, 0, 0, 1, 0], [0, 0, 1, 0, 0, 1, 0, 0, 1]]) constituency_matrix = np.vstack([ np.array([[1, 1, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 1, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 1, 1, 1]]), constraints] ) index_phys_resources = (0, 1, 2) # Construct environment. job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator( demand_rate, buffer_processing_matrix, job_gen_seed=seed) state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state) env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator, state_initialiser, job_conservation_flag, index_phys_resources=index_phys_resources) # Test that the environment wrapper works as expected. assert isinstance(env._rl_action_space, gym_spaces.Tuple) assert len(env._rl_action_space.spaces) == 1 # There are overall 34 combinations of activities (or inactivity) that are feasible. assert len(env._action_vectors) == 34 assert env._action_vectors.shape == (34, 9) assert snc.is_binary(env._action_vectors) assert snc.is_binary(np.matmul(env._action_vectors, constraints.T))
def test_assert(self): bpm = np.array([[-1, 1], [-1, 0]]) demand_rate = np.array([0.5, 0.7])[:, None] env_params = { "job_generator": jg.DiscreteReviewJobGenerator(sim_time_interval=0.1, demand_rate=demand_rate, buffer_processing_matrix=bpm), # the following parameters are dummy parameters not used in this test "cost_per_buffer": np.array([1, 1])[:, None], "capacity": np.array([1, 1])[:, None], "constituency_matrix": np.array([[1, 1], [1, 0]]), "state_initialiser": si.DeterministicCRWStateInitialiser(np.array([0.5, 0.7])[:, None]), } env = crw.ControlledRandomWalk(**env_params) pytest.raises(AssertionError, mw.MaxWeightAgent, env)
def test_below_capacity_single_buffer(): """One resource with one buffer. Check number of jobs is always equal or less than maximum capacity.""" cost_per_buffer = np.array([[3.5]]) demand_rate = np.array([[3]]) initial_state = np.array([[0]]) capacity = np.array([[20]]) job_generator = drjg(demand_rate, -np.ones((1, 1)), sim_time_interval=1) s0 = stinit.DeterministicCRWStateInitialiser(initial_state) env = ControlledRandomWalk(cost_per_buffer, capacity, np.ones((1, 1)), job_generator, s0) action = np.ones((1, 1)) for i in range(10): s, r, d, t = env.step(action) assert 0 <= s <= capacity with pytest.raises(AssertionError): _ = env.step(action)
def test_job_conservation_multiple_routes_leave_same_buffer(): demand_rate = np.zeros((3, 1)) cost_per_buffer = np.ones((3, 1)) initial_state = np.array([[3], [3], [1]]) capacity = np.ones((3, 1)) * np.inf buffer_processing_matrix = np.array([[-2, -5, 0, 0], [2, 0, -10, 0], [0, 5, 0, -10]]) constituency_mat = np.eye(4) job_generator = drjg(demand_rate, buffer_processing_matrix, sim_time_interval=1) job_conservation_flag = True s0 = stinit.DeterministicCRWStateInitialiser(initial_state) env = ControlledRandomWalk(cost_per_buffer, capacity, constituency_mat, job_generator, s0, job_conservation_flag) s, r, d, t = env.step(np.array([1, 1, 1, 1])[:, None]) # Depending on the order of activities we can get different result, since they are given by a # dictionary # In a event driven simulator, the order would be FIFO assert (s == np.array([[0], [2], [1]])).all() or (s == np.array( [[0], [0], [3]])).all()
def test_index_phys_resources_with_repeated_indexes(): index_phys_resources = (0, 0) # Other needed parameters cost_per_buffer = np.zeros((2, 1)) demand_rate = np.zeros((2, 1)) initial_state = np.zeros((2, 1)) capacity = np.zeros((2, 1)) constituency_mat = np.eye(2) buffer_processing_mat = np.eye(2) job_generator = drjg(demand_rate, buffer_processing_mat, sim_time_interval=1) s0 = stinit.DeterministicCRWStateInitialiser(initial_state) with pytest.raises(AssertionError): ControlledRandomWalk(cost_per_buffer, capacity, constituency_mat, job_generator, s0, index_phys_resources=index_phys_resources)
def test_scheduling_single_buffer_events_remove_two_until_empty(): """One resource (station) with 1 buffers. Thus, there are 2 possible actions namely process or idle. We don't fill the buffer, just remove jobs, two at a time.""" cost_per_buffer = np.array([[3.5]]) demand_rate = np.array([[0]]) initial_state = np.array([[20]]) capacity = np.array([[20]]) job_generator = drjg(demand_rate, -2 * np.ones((1, 1)), sim_time_interval=1) s0 = stinit.DeterministicCRWStateInitialiser(initial_state) env = ControlledRandomWalk(cost_per_buffer, capacity, np.ones((1, 1)), job_generator, s0) action = np.ones((1, 1)) for i in range(10): s, r, d, t = env.step(action) assert s[0, 0] == np.max([0, initial_state[0] - (i + 1) * 2]) assert r == -cost_per_buffer[0] * s with pytest.raises(AssertionError): _ = env.step(action)