def test_obtain_actions_no_num_mpc_steps(): num_activities = 3 z_star = np.zeros((num_activities, 1)) constituency_matrix = np.eye(num_activities) mpc_policy = StationaryActionMPCPolicy(constituency_matrix) actions = mpc_policy.obtain_actions(z_star=z_star) assert np.all(actions == np.zeros((num_activities, 1)))
def test_build_stationary_policy_matrix_for_1_resource_2_activities(): constituency_matrix = np.array([[1, 1]]) z_star = np.array([[0.3], [0.7]]) mpc_policy = StationaryActionMPCPolicy(constituency_matrix) policy_matrix = mpc_policy.build_stationary_policy_matrix(z_star) policy_matrix_theory = np.vstack((z_star, 0)).T assert np.all(policy_matrix == policy_matrix_theory)
def test_build_stationary_policy_matrix_for_2_resources_activities(): constituency_matrix = np.eye(2) z_star = np.array([[0.3], [0.7]]) mpc_policy = StationaryActionMPCPolicy(constituency_matrix) policy_matrix = mpc_policy.build_stationary_policy_matrix(z_star) policy_matrix_theory = np.array([[0.3, 0, 0.7], [0, 0.7, 0.3]]) np.testing.assert_almost_equal(policy_matrix, policy_matrix_theory)
def test_rounded_stationary_mpc_action_all_near_zero_independent_resource(): num_mpc_steps = 10 num_activities = 3 z_star = 1e-7 * (np.random.random_sample((num_activities, 1)) - 1) constituency_matrix = np.eye(num_activities) mpc_policy = StationaryActionMPCPolicy(constituency_matrix) actions = mpc_policy.obtain_actions(num_mpc_steps=num_mpc_steps, z_star=z_star) np.testing.assert_almost_equal(actions, np.zeros((num_activities, num_mpc_steps)))
def test_rounded_stationary_mpc_action_zeros(): num_mpc_steps = 10 num_activities = 3 z_star = np.zeros((num_activities, 1)) constituency_matrix = np.eye(num_activities) mpc_policy = StationaryActionMPCPolicy(constituency_matrix) actions = mpc_policy.obtain_actions(num_mpc_steps=num_mpc_steps, z_star=z_star) assert is_binary(actions) assert np.all(actions == np.zeros((num_activities, num_mpc_steps)))
def __init__(self, env: crw.ControlledRandomWalk, weight_per_buffer: Optional[Union[str, types.StateSpace]] = None, name: str = "MaxWeightAgent", agent_seed: Optional[int] = None, mpc_seed: Optional[int] = None) -> None: """ MaxWeight policy based on Chapter 6.4 (CTCN book online edition). This only works for scheduling and routing problems, where each activity drains only one buffer. NOTE: in case of a buffer managed by multiple resources, the job_conservation_flag has to be True otherwise the buffer may have negative value. Moreover, in this case, the algorithm performance may be affected in a negative way. :param env: CRW environment. :param weight_per_buffer: Vector whose entries weight the difference pressure at each buffer that MaxWeight aims to balance. For the case where this is given by the cost per buffer given by the environment, this is passed as a string: 'cost_per_buffer'. :param name: Agent identifier. :param agent_seed: Agent random seed. :param mpc_seed: MPC random seed. """ # verify that each activity drains exactly one buffer bpm = env.job_generator.buffer_processing_matrix assert np.all(np.sum(np.where(bpm < 0, -1, 0), axis=0) >= -1), \ f'Buffer processing matrix not allowed: {bpm}.' \ 'Current version only works for networks where each activity drains exactly one ' \ 'buffer (i.e., only works for scheduling and/or routing).' if weight_per_buffer is None: # Set BackPressure by default. self.weight_per_buffer = np.ones_like((env.num_buffers, 1)) elif isinstance(weight_per_buffer, types.StateSpace): assert weight_per_buffer.size == env.num_buffers, \ f'Length of weight_per_buffer = {weight_per_buffer.size}, but it should' \ f' equal num_buffers = {env.num_buffers}.' assert np.all(weight_per_buffer > 0), \ f'weight_per_buffer: {weight_per_buffer} must have positive entries.' self.weight_per_buffer = weight_per_buffer elif isinstance(weight_per_buffer, str): if weight_per_buffer == 'cost_per_buffer': self.weight_per_buffer = env.cost_per_buffer else: raise ValueError( 'weight_per_buffer must be a valid string (i.e. "cost_per_buffer") ' f'or numpy array. Current value is: {weight_per_buffer}.') else: assert isinstance(weight_per_buffer, types.StateSpace), \ 'weight_per_buffer must be a valid string (i.e. "cost_per_buffer") or numpy ' \ f'array. Current value is {weight_per_buffer}.' super().__init__(env, name, agent_seed) self.mpc_policy = StationaryActionMPCPolicy( env.physical_constituency_matrix, mpc_seed)
def test_rounded_mpc_action_probability_one(): np.random.seed(42) num_mpc_steps = int(1e3) z_star = np.array([1, 0])[:, None] constituency_matrix = np.ones((1, 2)) mpc_policy = StationaryActionMPCPolicy(constituency_matrix) actions = mpc_policy.obtain_actions(num_mpc_steps=num_mpc_steps, z_star=z_star) assert is_binary(actions) assert np.all(actions == np.vstack((np.ones((1, num_mpc_steps)), np.zeros((1, num_mpc_steps)))))
def test_rounded_mpc_action_some_distribution_two_partially_shared_resources(): np.random.seed(42) num_mpc_steps = int(1e5) z_star = np.array([0.8, 0.2, 1])[:, None] constituency_matrix = np.array([[1, 1, 0], [0, 0, 1]]) mpc_policy = StationaryActionMPCPolicy(constituency_matrix) actions = mpc_policy.obtain_actions(num_mpc_steps=num_mpc_steps, z_star=z_star) assert is_binary(actions) assert np.all(constituency_matrix @ actions == np.ones((2, num_mpc_steps))) actions_freq = (np.sum(actions, axis=1) / num_mpc_steps)[:, None] np.testing.assert_almost_equal(actions_freq, z_star, decimal=2)
def test_build_stationary_policy_matrix_for_many_resources_and_activities_positive_idling_prob( ): num_resources = 1000 constituency_matrix = np.eye(num_resources) z_star = np.random.dirichlet(0.5 * np.ones((num_resources, ))) np.testing.assert_almost_equal(np.sum(z_star), 1) mpc_policy = StationaryActionMPCPolicy(constituency_matrix) policy_matrix = mpc_policy.build_stationary_policy_matrix(z_star) idle = 1 - z_star.reshape((num_resources, 1)) policy_matrix_theory = np.hstack((np.diag(z_star), idle)) np.testing.assert_almost_equal(policy_matrix, policy_matrix_theory)
def test_rounded_mpc_action_some_distribution_one_resource_with_positive_prob_of_idling( ): np.random.seed(42) num_mpc_steps = int(1e4) z_star = np.array([0.2, 0.3])[:, None] constituency_matrix = np.ones((1, 2)) mpc_policy = StationaryActionMPCPolicy(constituency_matrix) actions = mpc_policy.obtain_actions(num_mpc_steps=num_mpc_steps, z_star=z_star) assert is_binary(actions) assert np.all(np.sum(actions, axis=0) <= np.ones((1, num_mpc_steps))) actions_freq = (np.sum(actions, axis=1) / num_mpc_steps)[:, None] np.testing.assert_almost_equal(actions_freq, z_star, decimal=2)
def test_mpc_action_many_random_distributions(): np.random.seed(42) num_tests = 20 num_mpc_steps = int(1e4) num_resources = 10 num_activities_per_resource = 3 # Build random probability distributions with extra idling activity policy_matrix = np.zeros( (num_resources, num_resources * num_activities_per_resource + 1)) for i in range(num_resources): p = np.random.dirichlet(0.5 * np.ones( (num_activities_per_resource + 1, )), size=1) policy_matrix[i, i * num_activities_per_resource:(i + 1) * num_activities_per_resource] = \ p[0, :-1] policy_matrix[i, -1] = p[0, -1] for i in range(num_tests): actions = StationaryActionMPCPolicy.draw_samples_from_stationary_policy_matrix( policy_matrix, num_mpc_steps) assert is_binary(actions) actions_freq = np.sum(actions, axis=1) / num_mpc_steps actions_freq_theory = np.sum(policy_matrix[:, 0:-1], axis=0) np.testing.assert_almost_equal(actions_freq, actions_freq_theory, decimal=2)
def test_mpc_action_many_random_distributions_2_resource(): np.random.seed(42) num_mpc_steps = int(1e5) num_tests = 50 constituency_matrix = np.array([[1, 1, 0], [0, 0, 1]]) for i in range(num_tests): p = np.random.random_sample((3, 1)) z_star = p / np.sum(p) mpc_policy = StationaryActionMPCPolicy(constituency_matrix) actions = mpc_policy.obtain_actions(num_mpc_steps=num_mpc_steps, z_star=z_star) assert is_binary(actions) assert np.all( constituency_matrix @ actions <= np.ones((1, num_mpc_steps))) actions_freq = (np.sum(actions, axis=1) / num_mpc_steps)[:, None] np.testing.assert_almost_equal(actions_freq, z_star, decimal=2)
def test_rounded_mpc_action_multiple_resources_owning_same_activity_but_passed_correctly( ): np.random.seed(42) num_mpc_steps = int(1e4) z_star = np.array([0.1, 0.2, 0.3])[:, None] constituency_matrix = np.array([[1, 1, 0], [0, 0, 1], [0, 1, 1]]) physical_constituency_matrix = constituency_matrix[0:-1, :] mpc_policy = StationaryActionMPCPolicy(physical_constituency_matrix) actions = mpc_policy.obtain_actions(num_mpc_steps=num_mpc_steps, z_star=z_star) assert is_binary(actions) assert np.all( constituency_matrix[0:-1, :] @ actions <= np.ones((2, num_mpc_steps))) actions_freq = (np.sum(actions, axis=1) / num_mpc_steps)[:, None] np.testing.assert_almost_equal(actions_freq, z_star, decimal=2)
def __init__(self, env: crw.ControlledRandomWalk, name: str = "SchedulingMaxWeightAgent", agent_seed: Optional[int] = None, mpc_seed: Optional[int] = None) -> None: """ Alternative description of the MaxWeight policy based on Prop. 4.8.1 (CTCN book online edition). This description only works for scheduling problems (i.e., with invertible B matrix) and only one draining activity per buffer. :param env: CRW environment. :param name: Agent identifier. :param agent_seed: Agent random seed. :param mpc_seed: MPC random seed. """ super().__init__(env, name, agent_seed) self.mpc_policy = StationaryActionMPCPolicy( env.physical_constituency_matrix, mpc_seed)
def test_draw_samples_from_stationary_policy_matrix_all_zeros(): num_mpc_steps = 10 num_resources = 10 num_activities = 20 policy_matrix = np.hstack((np.zeros( (num_resources, num_activities)), np.ones((num_resources, 1)))) actions = StationaryActionMPCPolicy.draw_samples_from_stationary_policy_matrix( policy_matrix, num_mpc_steps) assert is_binary(actions) assert np.all(actions == np.zeros((num_activities, num_mpc_steps)))
def test_draw_samples_from_stationary_policy_matrix_all_actions_with_prob_one( ): np.random.seed(42) num_mpc_steps = int(1e3) num_resources = 4 policy_matrix = np.hstack( (np.eye(num_resources), np.zeros((num_resources, 1)))) actions = StationaryActionMPCPolicy.draw_samples_from_stationary_policy_matrix( policy_matrix, num_mpc_steps) assert is_binary(actions) assert np.all(actions == np.ones((1, num_mpc_steps)))
def test_draw_samples_from_stationary_policy_matrix_some_distribution_3_resources_4_actions( ): np.random.seed(42) num_mpc_steps = int(1e5) policy_matrix = np.array([[0.45, 0.55, 0, 0, 0], [0, 0, 0.3, 0, 0.7], [0, 0, 0, 1, 0]]) actions = StationaryActionMPCPolicy.draw_samples_from_stationary_policy_matrix( policy_matrix, num_mpc_steps) assert is_binary(actions) actions_freq = np.sum(actions, axis=1) / num_mpc_steps actions_freq_theory = np.sum(policy_matrix[:, 0:-1], axis=0) np.testing.assert_almost_equal(actions_freq, actions_freq_theory, decimal=2)
def __init__(self, env: crw.ControlledRandomWalk, method: str = 'cvx.ECOS', name: str = "MaxWeightLpAgent", binary_action: Optional[bool] = False, agent_seed: Optional[int] = None, mpc_seed: Optional[int] = None) -> None: """ MaxWeight policy for general push models. :param env: CRW environment. :param method: Optimisation method/solver that CVXPY should use to solve the LP. :param name: Agent identifier. :param binary_action: where the problem is an MIP with binary variables or not. :param agent_seed: Agent random seed. :param mpc_seed: MPC random seed. :return None. """ super().__init__(env, name, agent_seed) self.mpc_policy = StationaryActionMPCPolicy( env.physical_constituency_matrix, mpc_seed) self.method = method self.binary_action = binary_action
def __init__(self, env) -> None: super().__init__(env.physical_constituency_matrix) self.draining_jobs_rate_matrix = env.job_generator.draining_jobs_rate_matrix self.demand_rate = env.job_generator.demand_rate self.num_resources = env.num_resources self.activities_per_resource, self.num_activities_per_resource \ = mpc_utils.get_ind_activities_per_resource(self.physical_constituency_matrix) self.ind_activities_drain_buffers = \ mpc_utils.get_ind_activities_drain_each_buffer(self.draining_jobs_rate_matrix) self.resource_per_activity = \ mpc_utils.get_resource_each_activity_belongs(self.physical_constituency_matrix) mw_mpc_policy = StationaryActionMPCPolicy( physical_constituency_matrix=env.physical_constituency_matrix) cost_per_buffer = np.ones(env.cost_per_buffer.shape[0])[:, None] self.agent_smw = sch_mw.SchedulingMaxWeightAgent( env, mw_mpc_policy, cost_per_buffer)
class SchedulingMaxWeightAgent(AgentInterface): def __init__(self, env: crw.ControlledRandomWalk, name: str = "SchedulingMaxWeightAgent", agent_seed: Optional[int] = None, mpc_seed: Optional[int] = None) -> None: """ Alternative description of the MaxWeight policy based on Prop. 4.8.1 (CTCN book online edition). This description only works for scheduling problems (i.e., with invertible B matrix) and only one draining activity per buffer. :param env: CRW environment. :param name: Agent identifier. :param agent_seed: Agent random seed. :param mpc_seed: MPC random seed. """ super().__init__(env, name, agent_seed) self.mpc_policy = StationaryActionMPCPolicy( env.physical_constituency_matrix, mpc_seed) def scheduling_max_weight_policy(self, state: types.StateSpace, eps: float = 1e-6) \ -> types.ActionSpace: """ Compute MaxWeight policy for scheduling problems based on the back-pressure formulation. :param state: Current state of the environment. :param eps: Tolerance to state if max gain is negative, and if state is nonempty. :return: MaxWeight policy. """ num_activities = self.env.constituency_matrix.shape[1] z_star = np.zeros((num_activities, 1)) for s in self.env.constituency_matrix: ind_actions_s = np.argwhere(s > 0) max_theta_s, list_max_action = get_max_gain_station_s( ind_actions_s, state, self.env.job_generator.buffer_processing_matrix, self.env.cost_per_buffer) if max_theta_s < -eps: z_star[ind_actions_s, :] = 0 else: num_positive_actions = 0 ind_positive_actions = [] for j in list_max_action: ind_drained_buffer = np.argwhere( self.env.job_generator.buffer_processing_matrix[:, j] < 0) if state[ind_drained_buffer] >= 1 - eps: ind_positive_actions.append(j) num_positive_actions += 1 if num_positive_actions > 0: z_star[ind_positive_actions] = 1 / num_positive_actions return z_star def map_state_to_actions(self, state: types.StateSpace, **override_args: Any) \ -> types.ActionProcess: """ Converts the continuous action vector to a set of binary actions. :param state: Current state of the environment. :param override_args: Arguments that can be overriden. :return: Set of binary actions. """ z_star = self.scheduling_max_weight_policy(state) actions = self.mpc_policy.obtain_actions(z_star=z_star, num_mpc_steps=1) return actions
def test_init_stationary_action_mpc_policy_from_constituency_matrix_with_multiple_ones_per_column( ): constituency_matrix = np.array([[1, 0], [1, 1]]) with pytest.raises(AssertionError): _ = StationaryActionMPCPolicy(constituency_matrix)
def test_build_stationary_policy_matrix_from_negative_z_star(): z_star = np.array([[-0.3], [0.7]]) mpc_policy = StationaryActionMPCPolicy(np.eye(2)) with pytest.raises(AssertionError): _ = mpc_policy.build_stationary_policy_matrix(z_star)
class MaxWeightAgent(AgentInterface): def __init__(self, env: crw.ControlledRandomWalk, weight_per_buffer: Optional[Union[str, types.StateSpace]] = None, name: str = "MaxWeightAgent", agent_seed: Optional[int] = None, mpc_seed: Optional[int] = None) -> None: """ MaxWeight policy based on Chapter 6.4 (CTCN book online edition). This only works for scheduling and routing problems, where each activity drains only one buffer. NOTE: in case of a buffer managed by multiple resources, the job_conservation_flag has to be True otherwise the buffer may have negative value. Moreover, in this case, the algorithm performance may be affected in a negative way. :param env: CRW environment. :param weight_per_buffer: Vector whose entries weight the difference pressure at each buffer that MaxWeight aims to balance. For the case where this is given by the cost per buffer given by the environment, this is passed as a string: 'cost_per_buffer'. :param name: Agent identifier. :param agent_seed: Agent random seed. :param mpc_seed: MPC random seed. """ # verify that each activity drains exactly one buffer bpm = env.job_generator.buffer_processing_matrix assert np.all(np.sum(np.where(bpm < 0, -1, 0), axis=0) >= -1), \ f'Buffer processing matrix not allowed: {bpm}.' \ 'Current version only works for networks where each activity drains exactly one ' \ 'buffer (i.e., only works for scheduling and/or routing).' if weight_per_buffer is None: # Set BackPressure by default. self.weight_per_buffer = np.ones_like((env.num_buffers, 1)) elif isinstance(weight_per_buffer, types.StateSpace): assert weight_per_buffer.size == env.num_buffers, \ f'Length of weight_per_buffer = {weight_per_buffer.size}, but it should' \ f' equal num_buffers = {env.num_buffers}.' assert np.all(weight_per_buffer > 0), \ f'weight_per_buffer: {weight_per_buffer} must have positive entries.' self.weight_per_buffer = weight_per_buffer elif isinstance(weight_per_buffer, str): if weight_per_buffer == 'cost_per_buffer': self.weight_per_buffer = env.cost_per_buffer else: raise ValueError( 'weight_per_buffer must be a valid string (i.e. "cost_per_buffer") ' f'or numpy array. Current value is: {weight_per_buffer}.') else: assert isinstance(weight_per_buffer, types.StateSpace), \ 'weight_per_buffer must be a valid string (i.e. "cost_per_buffer") or numpy ' \ f'array. Current value is {weight_per_buffer}.' super().__init__(env, name, agent_seed) self.mpc_policy = StationaryActionMPCPolicy( env.physical_constituency_matrix, mpc_seed) def max_weight_policy(self, state: types.StateSpace, eps: float = 1e-6) \ -> types.ActionSpace: """ Compute MaxWeight policy for scheduling and routing problems based on the back-pressure formulation. :param state: Current state of the environment. :param eps: Tolerance to state if max gain is negative, and if state is nonempty. :return: MaxWeight policy. """ num_activities = self.env.constituency_matrix.shape[1] z_star = np.zeros((num_activities, 1)) for s in self.env.constituency_matrix: ind_activities_s = np.argwhere(s > 0) max_theta_s, list_max_activity = get_max_gain_station_s( ind_activities_s, state, self.env.job_generator.buffer_processing_matrix, self.weight_per_buffer) if max_theta_s < -eps: z_star[ind_activities_s, :] = 0 else: num_positive_actions = 0 ind_positive_actions = [] for j in list_max_activity: ind_drained_buffer = np.argwhere( self.env.job_generator.buffer_processing_matrix[:, j] < 0) if state[ind_drained_buffer] >= 1 - eps: ind_positive_actions.append(j) num_positive_actions += 1 if num_positive_actions > 0: z_star[ind_positive_actions] = 1 / num_positive_actions return z_star def map_state_to_actions(self, state: types.StateSpace, **override_args: Any) \ -> types.ActionProcess: """ Converts the continuous action vector to a set of binary actions. :param state: Current state of the environment. :param override_args: Arguments that can be overriden. :return: Set of binary actions. """ z_star = self.max_weight_policy(state) actions = self.mpc_policy.obtain_actions(z_star=z_star, num_mpc_steps=1) return actions
def test_build_stationary_policy_matrix_from_z_star_with_too_large_components( ): z_star = np.array([[1.1], [0]]) mpc_policy = StationaryActionMPCPolicy(np.eye(2)) with pytest.raises(AssertionError): _ = mpc_policy.build_stationary_policy_matrix(z_star)
class MaxWeightLpAgent(AgentInterface): def __init__(self, env: crw.ControlledRandomWalk, method: str = 'cvx.ECOS', name: str = "MaxWeightLpAgent", binary_action: Optional[bool] = False, agent_seed: Optional[int] = None, mpc_seed: Optional[int] = None) -> None: """ MaxWeight policy for general push models. :param env: CRW environment. :param method: Optimisation method/solver that CVXPY should use to solve the LP. :param name: Agent identifier. :param binary_action: where the problem is an MIP with binary variables or not. :param agent_seed: Agent random seed. :param mpc_seed: MPC random seed. :return None. """ super().__init__(env, name, agent_seed) self.mpc_policy = StationaryActionMPCPolicy( env.physical_constituency_matrix, mpc_seed) self.method = method self.binary_action = binary_action def max_weight_policy( self, state: types.StateSpace) -> Tuple[types.ActionSpace, float]: """ MaxWeight policy expressed as the solution to an LP: z_star = arg min -x.T DB z s.t. Cz <= 1, z >= 0. :param state: Current state of the environment. :return: (z_star, opt_val) - z_star: solution to the LP provided by the solver. - opt_val: optimal value of the LP. """ _, num_activities = self.constituency_matrix.shape z = cvx.Variable((num_activities, 1), boolean=self.binary_action) diag_cost = np.diag(self.env.cost_per_buffer[:, -1]) objective = cvx.Minimize( state.T @ diag_cost @ self.buffer_processing_matrix @ z) constraints = [ self.env.constituency_matrix * z <= np.ones( (self.env.num_resources, 1)), z >= np.zeros( (self.env.num_activities, 1)) ] prob = cvx.Problem(objective, constraints) opt_val = prob.solve(solver=eval(self.method)) z_star = z.value z_star = set_action_draining_empty_buffer_to_zero( opt_val, z_star, state, self.buffer_processing_matrix, diag_cost) return z_star, opt_val def map_state_to_actions(self, state: types.StateSpace, **override_args: Any) \ -> types.ActionProcess: """ Converts the continuous action vector to a set of binary actions. :param state: Current state of the environment. :param override_args: Arguments that can be overriden. :return: Set of binary actions. """ z_star, _ = self.max_weight_policy(state) actions = self.mpc_policy.obtain_actions(num_mpc_steps=1, z_star=z_star) return actions