def test_complex_rl_action_interpretation(): """ Tests the RL environment's action interpretation for the ksrs_network_model example as per the examples file from which the initial set up code is copied. See snc/stochastic_network_control/environments/examples.py for the original code. This tests action interpretation where each resource controls multiple activities. """ # Set up model parameters alpha1, alpha3 = 2, 2 mu1, mu2, mu3, mu4 = 10, 3, 10, 3 cost_per_buffer = np.ones((4, 1)) initial_state = (0, 0, 0, 0) capacity = np.ones((4, 1)) * np.inf job_conservation_flag = True seed = 72 demand_rate = np.array([alpha1, 0, alpha3, 0])[:, None] buffer_processing_matrix = np.array([[-mu1, 0, 0, 0], [mu1, -mu2, 0, 0], [0, 0, -mu3, 0], [0, 0, mu3, -mu4]]) constituency_matrix = np.array([[1, 0, 0, 1], [0, 1, 1, 0]]) # Construct environment. job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator( demand_rate, buffer_processing_matrix, job_gen_seed=seed) state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state) env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator, state_initialiser, job_conservation_flag) # ----------------- Enumeration of Actions ----------------- # Zero: First group idles # First: First activity on (first action of first group) # Second: Fourth activity on (second action of first group) # Third: Second group idles # Fourth: Second activity (first action of second group) # Fifth: Third activity (second action of second group) # Build various test RL actions in increasing complexity idle_action = env._interpret_rl_action(np.zeros(6)) binary_action_a = np.zeros(6) binary_action_a[2] = 1 action_a = env._interpret_rl_action(binary_action_a) binary_action_b = np.zeros(6) binary_action_b[[2, 4]] = 1 action_b = env._interpret_rl_action(binary_action_b) # Check that the null action, a simple action and an action combining activities across resource # sets is set up correctly. assert np.all(idle_action == np.zeros(4)) assert np.all(action_a == np.array([0, 0, 0, 1])) assert np.all(action_b == np.array([0, 1, 0, 1]))
def test_simple_rl_action_interpretation(): """ Tests the mechanism for reading in actions from an RL model. This utilises the 4 resource 4 activity tandem model. """ # Tandem model set up without any additional activity constraints alpha1 = 4 cost_per_buffer = np.ones((4, 1)) initial_state = np.zeros((4, 1)) capacity = np.ones((4, 1)) * np.inf job_conservation_flag = False seed = 72 demand_rate = np.array([alpha1, 0, 0, 0])[:, None] buffer_processing_matrix = np.array([ [-1, 0, 0, 0], [1, -1, 0, 0], [0, 1, -1, 0], [0, 0, 1, -1] ]) constituency_matrix = np.eye(4) # Construct environment. job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator( demand_rate, buffer_processing_matrix, job_gen_seed=seed) state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state) env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator, state_initialiser, job_conservation_flag, index_phys_resources=(0, 1, 2, 3)) # Build various test RL actions in increasing complexity null_action = env._interpret_rl_action(np.zeros(8)) binary_action_one = np.zeros(8) binary_action_one[1] = 1 action_one = env._interpret_rl_action(binary_action_one) binary_action_two = np.zeros(8) binary_action_two[[1, 3, 5, 7]] = 1 action_two = env._interpret_rl_action(binary_action_two) # Test that the actions are interpreted correctly assert np.all(null_action == np.zeros(4)) assert np.all(action_one == np.array([1, 0, 0, 0])) assert np.all(action_two == np.ones(4))