def test_obtain_actions_no_num_mpc_steps():
    num_activities = 3
    z_star = np.zeros((num_activities, 1))
    constituency_matrix = np.eye(num_activities)
    mpc_policy = StationaryActionMPCPolicy(constituency_matrix)
    actions = mpc_policy.obtain_actions(z_star=z_star)
    assert np.all(actions == np.zeros((num_activities, 1)))
def test_rounded_stationary_mpc_action_all_near_zero_independent_resource():
    num_mpc_steps = 10
    num_activities = 3
    z_star = 1e-7 * (np.random.random_sample((num_activities, 1)) - 1)
    constituency_matrix = np.eye(num_activities)
    mpc_policy = StationaryActionMPCPolicy(constituency_matrix)
    actions = mpc_policy.obtain_actions(num_mpc_steps=num_mpc_steps,
                                        z_star=z_star)
    np.testing.assert_almost_equal(actions,
                                   np.zeros((num_activities, num_mpc_steps)))
def test_rounded_stationary_mpc_action_zeros():
    num_mpc_steps = 10
    num_activities = 3
    z_star = np.zeros((num_activities, 1))
    constituency_matrix = np.eye(num_activities)
    mpc_policy = StationaryActionMPCPolicy(constituency_matrix)
    actions = mpc_policy.obtain_actions(num_mpc_steps=num_mpc_steps,
                                        z_star=z_star)
    assert is_binary(actions)
    assert np.all(actions == np.zeros((num_activities, num_mpc_steps)))
def test_rounded_mpc_action_probability_one():
    np.random.seed(42)
    num_mpc_steps = int(1e3)
    z_star = np.array([1, 0])[:, None]
    constituency_matrix = np.ones((1, 2))
    mpc_policy = StationaryActionMPCPolicy(constituency_matrix)
    actions = mpc_policy.obtain_actions(num_mpc_steps=num_mpc_steps,
                                        z_star=z_star)
    assert is_binary(actions)
    assert np.all(actions == np.vstack((np.ones((1, num_mpc_steps)),
                                        np.zeros((1, num_mpc_steps)))))
def test_rounded_mpc_action_some_distribution_two_partially_shared_resources():
    np.random.seed(42)
    num_mpc_steps = int(1e5)
    z_star = np.array([0.8, 0.2, 1])[:, None]
    constituency_matrix = np.array([[1, 1, 0], [0, 0, 1]])
    mpc_policy = StationaryActionMPCPolicy(constituency_matrix)
    actions = mpc_policy.obtain_actions(num_mpc_steps=num_mpc_steps,
                                        z_star=z_star)
    assert is_binary(actions)
    assert np.all(constituency_matrix @ actions == np.ones((2, num_mpc_steps)))
    actions_freq = (np.sum(actions, axis=1) / num_mpc_steps)[:, None]
    np.testing.assert_almost_equal(actions_freq, z_star, decimal=2)
def test_rounded_mpc_action_some_distribution_one_resource_with_positive_prob_of_idling(
):
    np.random.seed(42)
    num_mpc_steps = int(1e4)
    z_star = np.array([0.2, 0.3])[:, None]
    constituency_matrix = np.ones((1, 2))
    mpc_policy = StationaryActionMPCPolicy(constituency_matrix)
    actions = mpc_policy.obtain_actions(num_mpc_steps=num_mpc_steps,
                                        z_star=z_star)
    assert is_binary(actions)
    assert np.all(np.sum(actions, axis=0) <= np.ones((1, num_mpc_steps)))
    actions_freq = (np.sum(actions, axis=1) / num_mpc_steps)[:, None]
    np.testing.assert_almost_equal(actions_freq, z_star, decimal=2)
def test_mpc_action_many_random_distributions_2_resource():
    np.random.seed(42)
    num_mpc_steps = int(1e5)
    num_tests = 50
    constituency_matrix = np.array([[1, 1, 0], [0, 0, 1]])
    for i in range(num_tests):
        p = np.random.random_sample((3, 1))
        z_star = p / np.sum(p)
        mpc_policy = StationaryActionMPCPolicy(constituency_matrix)
        actions = mpc_policy.obtain_actions(num_mpc_steps=num_mpc_steps,
                                            z_star=z_star)
        assert is_binary(actions)
        assert np.all(
            constituency_matrix @ actions <= np.ones((1, num_mpc_steps)))
        actions_freq = (np.sum(actions, axis=1) / num_mpc_steps)[:, None]
        np.testing.assert_almost_equal(actions_freq, z_star, decimal=2)
def test_rounded_mpc_action_multiple_resources_owning_same_activity_but_passed_correctly(
):
    np.random.seed(42)
    num_mpc_steps = int(1e4)
    z_star = np.array([0.1, 0.2, 0.3])[:, None]
    constituency_matrix = np.array([[1, 1, 0], [0, 0, 1], [0, 1, 1]])
    physical_constituency_matrix = constituency_matrix[0:-1, :]

    mpc_policy = StationaryActionMPCPolicy(physical_constituency_matrix)
    actions = mpc_policy.obtain_actions(num_mpc_steps=num_mpc_steps,
                                        z_star=z_star)

    assert is_binary(actions)
    assert np.all(
        constituency_matrix[0:-1, :] @ actions <= np.ones((2, num_mpc_steps)))
    actions_freq = (np.sum(actions, axis=1) / num_mpc_steps)[:, None]
    np.testing.assert_almost_equal(actions_freq, z_star, decimal=2)
Example #9
0
class MaxWeightAgent(AgentInterface):
    def __init__(self,
                 env: crw.ControlledRandomWalk,
                 weight_per_buffer: Optional[Union[str,
                                                   types.StateSpace]] = None,
                 name: str = "MaxWeightAgent",
                 agent_seed: Optional[int] = None,
                 mpc_seed: Optional[int] = None) -> None:
        """
        MaxWeight policy based on Chapter 6.4 (CTCN book online edition).
        This only works for scheduling and routing problems,
        where each activity drains only one buffer.

        NOTE: in case of a buffer managed by multiple resources, the job_conservation_flag has to be
         True otherwise the buffer may have negative value.
         Moreover, in this case, the algorithm performance may be affected in a negative way.

        :param env: CRW environment.
        :param weight_per_buffer: Vector whose entries weight the difference pressure at each buffer
            that MaxWeight aims to balance. For the case where this is given by the cost per buffer
            given by the environment, this is passed as a string: 'cost_per_buffer'.
        :param name: Agent identifier.
        :param agent_seed: Agent random seed.
        :param mpc_seed: MPC random seed.
        """
        # verify that each activity drains exactly one buffer
        bpm = env.job_generator.buffer_processing_matrix
        assert np.all(np.sum(np.where(bpm < 0, -1, 0), axis=0) >= -1), \
            f'Buffer processing matrix not allowed: {bpm}.' \
            'Current version only works for networks where each activity drains exactly one ' \
            'buffer (i.e., only works for scheduling and/or routing).'

        if weight_per_buffer is None:  # Set BackPressure by default.
            self.weight_per_buffer = np.ones_like((env.num_buffers, 1))
        elif isinstance(weight_per_buffer, types.StateSpace):
            assert weight_per_buffer.size == env.num_buffers, \
                f'Length of weight_per_buffer = {weight_per_buffer.size}, but it should' \
                f' equal num_buffers = {env.num_buffers}.'
            assert np.all(weight_per_buffer > 0), \
                f'weight_per_buffer: {weight_per_buffer} must have positive entries.'
            self.weight_per_buffer = weight_per_buffer
        elif isinstance(weight_per_buffer, str):
            if weight_per_buffer == 'cost_per_buffer':
                self.weight_per_buffer = env.cost_per_buffer
            else:
                raise ValueError(
                    'weight_per_buffer must be a valid string (i.e. "cost_per_buffer") '
                    f'or  numpy array. Current value is: {weight_per_buffer}.')
        else:
            assert isinstance(weight_per_buffer, types.StateSpace), \
                'weight_per_buffer must be a valid string (i.e. "cost_per_buffer") or  numpy ' \
                f'array. Current value is {weight_per_buffer}.'

        super().__init__(env, name, agent_seed)
        self.mpc_policy = StationaryActionMPCPolicy(
            env.physical_constituency_matrix, mpc_seed)

    def max_weight_policy(self, state: types.StateSpace, eps: float = 1e-6) \
            -> types.ActionSpace:
        """
        Compute MaxWeight policy for scheduling and routing problems based on the back-pressure
        formulation.

        :param state: Current state of the environment.
        :param eps: Tolerance to state if max gain is negative, and if state is nonempty.
        :return: MaxWeight policy.
        """
        num_activities = self.env.constituency_matrix.shape[1]
        z_star = np.zeros((num_activities, 1))
        for s in self.env.constituency_matrix:
            ind_activities_s = np.argwhere(s > 0)
            max_theta_s, list_max_activity = get_max_gain_station_s(
                ind_activities_s, state,
                self.env.job_generator.buffer_processing_matrix,
                self.weight_per_buffer)
            if max_theta_s < -eps:
                z_star[ind_activities_s, :] = 0
            else:
                num_positive_actions = 0
                ind_positive_actions = []
                for j in list_max_activity:
                    ind_drained_buffer = np.argwhere(
                        self.env.job_generator.buffer_processing_matrix[:,
                                                                        j] < 0)
                    if state[ind_drained_buffer] >= 1 - eps:
                        ind_positive_actions.append(j)
                        num_positive_actions += 1
                if num_positive_actions > 0:
                    z_star[ind_positive_actions] = 1 / num_positive_actions
        return z_star

    def map_state_to_actions(self, state: types.StateSpace, **override_args: Any) \
            -> types.ActionProcess:
        """
        Converts the continuous action vector to a set of binary actions.

        :param state: Current state of the environment.
        :param override_args: Arguments that can be overriden.
        :return: Set of binary actions.
        """
        z_star = self.max_weight_policy(state)
        actions = self.mpc_policy.obtain_actions(z_star=z_star,
                                                 num_mpc_steps=1)
        return actions
class SchedulingMaxWeightAgent(AgentInterface):
    def __init__(self,
                 env: crw.ControlledRandomWalk,
                 name: str = "SchedulingMaxWeightAgent",
                 agent_seed: Optional[int] = None,
                 mpc_seed: Optional[int] = None) -> None:
        """
        Alternative description of the MaxWeight policy based on Prop. 4.8.1 (CTCN book online
        edition). This description only works for scheduling problems (i.e., with invertible B
        matrix) and only one draining activity per buffer.

        :param env: CRW environment.
        :param name: Agent identifier.
        :param agent_seed: Agent random seed.
        :param mpc_seed: MPC random seed.
        """
        super().__init__(env, name, agent_seed)
        self.mpc_policy = StationaryActionMPCPolicy(
            env.physical_constituency_matrix, mpc_seed)

    def scheduling_max_weight_policy(self, state: types.StateSpace, eps: float = 1e-6) \
            -> types.ActionSpace:
        """
        Compute MaxWeight policy for scheduling problems based on the back-pressure formulation.

        :param state: Current state of the environment.
        :param eps: Tolerance to state if max gain is negative, and if state is nonempty.
        :return: MaxWeight policy.
        """
        num_activities = self.env.constituency_matrix.shape[1]
        z_star = np.zeros((num_activities, 1))
        for s in self.env.constituency_matrix:
            ind_actions_s = np.argwhere(s > 0)
            max_theta_s, list_max_action = get_max_gain_station_s(
                ind_actions_s, state,
                self.env.job_generator.buffer_processing_matrix,
                self.env.cost_per_buffer)
            if max_theta_s < -eps:
                z_star[ind_actions_s, :] = 0
            else:
                num_positive_actions = 0
                ind_positive_actions = []
                for j in list_max_action:
                    ind_drained_buffer = np.argwhere(
                        self.env.job_generator.buffer_processing_matrix[:,
                                                                        j] < 0)
                    if state[ind_drained_buffer] >= 1 - eps:
                        ind_positive_actions.append(j)
                        num_positive_actions += 1
                if num_positive_actions > 0:
                    z_star[ind_positive_actions] = 1 / num_positive_actions
        return z_star

    def map_state_to_actions(self, state: types.StateSpace, **override_args: Any) \
            -> types.ActionProcess:
        """
        Converts the continuous action vector to a set of binary actions.

        :param state: Current state of the environment.
        :param override_args: Arguments that can be overriden.
        :return: Set of binary actions.
        """
        z_star = self.scheduling_max_weight_policy(state)
        actions = self.mpc_policy.obtain_actions(z_star=z_star,
                                                 num_mpc_steps=1)
        return actions
class MaxWeightLpAgent(AgentInterface):
    def __init__(self,
                 env: crw.ControlledRandomWalk,
                 method: str = 'cvx.ECOS',
                 name: str = "MaxWeightLpAgent",
                 binary_action: Optional[bool] = False,
                 agent_seed: Optional[int] = None,
                 mpc_seed: Optional[int] = None) -> None:
        """
        MaxWeight policy for general push models.

        :param env: CRW environment.
        :param method: Optimisation method/solver that CVXPY should use to solve the LP.
        :param name: Agent identifier.
        :param binary_action: where the problem is an MIP with binary variables or not.
        :param agent_seed: Agent random seed.
        :param mpc_seed: MPC random seed.
        :return None.
        """
        super().__init__(env, name, agent_seed)
        self.mpc_policy = StationaryActionMPCPolicy(
            env.physical_constituency_matrix, mpc_seed)
        self.method = method
        self.binary_action = binary_action

    def max_weight_policy(
            self, state: types.StateSpace) -> Tuple[types.ActionSpace, float]:
        """
        MaxWeight policy expressed as the solution to an LP:
            z_star = arg min -x.T DB z
                        s.t. Cz <= 1, z >= 0.

        :param state: Current state of the environment.
        :return: (z_star, opt_val)
            - z_star: solution to the LP provided by the solver.
            - opt_val: optimal value of the LP.
        """
        _, num_activities = self.constituency_matrix.shape
        z = cvx.Variable((num_activities, 1), boolean=self.binary_action)
        diag_cost = np.diag(self.env.cost_per_buffer[:, -1])
        objective = cvx.Minimize(
            state.T @ diag_cost @ self.buffer_processing_matrix @ z)
        constraints = [
            self.env.constituency_matrix * z <= np.ones(
                (self.env.num_resources, 1)), z >= np.zeros(
                    (self.env.num_activities, 1))
        ]
        prob = cvx.Problem(objective, constraints)
        opt_val = prob.solve(solver=eval(self.method))
        z_star = z.value
        z_star = set_action_draining_empty_buffer_to_zero(
            opt_val, z_star, state, self.buffer_processing_matrix, diag_cost)
        return z_star, opt_val

    def map_state_to_actions(self, state: types.StateSpace, **override_args: Any) \
            -> types.ActionProcess:
        """
        Converts the continuous action vector to a set of binary actions.

        :param state: Current state of the environment.
        :param override_args: Arguments that can be overriden.
        :return: Set of binary actions.
        """
        z_star, _ = self.max_weight_policy(state)
        actions = self.mpc_policy.obtain_actions(num_mpc_steps=1,
                                                 z_star=z_star)
        return actions