def test_mpc_action_many_random_distributions():
    np.random.seed(42)
    num_tests = 20
    num_mpc_steps = int(1e4)
    num_resources = 10
    num_activities_per_resource = 3
    # Build random probability distributions with extra idling activity
    policy_matrix = np.zeros(
        (num_resources, num_resources * num_activities_per_resource + 1))
    for i in range(num_resources):
        p = np.random.dirichlet(0.5 * np.ones(
            (num_activities_per_resource + 1, )),
                                size=1)
        policy_matrix[i, i * num_activities_per_resource:(i + 1) * num_activities_per_resource] = \
            p[0, :-1]
        policy_matrix[i, -1] = p[0, -1]

    for i in range(num_tests):
        actions = StationaryActionMPCPolicy.draw_samples_from_stationary_policy_matrix(
            policy_matrix, num_mpc_steps)
        assert is_binary(actions)
        actions_freq = np.sum(actions, axis=1) / num_mpc_steps
        actions_freq_theory = np.sum(policy_matrix[:, 0:-1], axis=0)
        np.testing.assert_almost_equal(actions_freq,
                                       actions_freq_theory,
                                       decimal=2)
Ejemplo n.º 2
0
def test_reinforce_agent_play(env_name):
    """
    Extension of the agent set up and initialisation test to include playing episodes.
    """
    # Set up environment using default parameters.
    # Environment parameters do not affect the test result here.
    tf_env, _ = rl_env_from_snc_env(load_scenario(
        env_name,
        job_gen_seed=10,
        override_env_params={'max_episode_length': 25})[1],
                                    discount_factor=0.99)

    # Instantiate and initialise a REINFORCE agent.
    reinforce_agent = create_reinforce_agent(tf_env)
    reinforce_agent.initialize()

    # Reset the environment
    tf_env.reset()
    # Play 5 time steps in the environment.
    for _ in range(5):
        # Since we do not have the state stored at this point we capture it from the environment
        # fresh each time step as a TimeStep object (a named tuple).
        time_step = tf_env.current_time_step()
        # Attain our agent's action.
        action_step = reinforce_agent.collect_policy.action(time_step)
        if isinstance(action_step.action, tuple):
            action = tf.concat(action_step.action, axis=-1)
        else:
            action = action_step.action

        # Ensure that the action is binary as expected.
        assert snc.is_binary(action)

        # Play the action out in the environment.
        tf_env.step(action_step.action)
Ejemplo n.º 3
0
def test_ppo_agent_play(env_name):
    """
    Extension of the agent set up and initialisation test to include playing episodes.
    """
    # Set up environment using default parameters.
    # Environment parameters do not affect the test result here.
    tf_env, action_dims = rl_env_from_snc_env(load_scenario(
        env_name, job_gen_seed=10)[1],
                                              discount_factor=0.99,
                                              normalise_observations=False)

    # Instantiate and initialise a PPO agent for the environment.
    ppo_agent = create_ppo_agent(tf_env, num_epochs=10)
    ppo_agent.initialize()

    # Reset the environment
    tf_env.reset()
    # Play 5 time steps in the environment.
    for _ in range(5):
        # Since we do not have the state stored at this point we capture it from the environment
        # fresh each time step as a TimeStep object (a named tuple).
        time_step = tf_env.current_time_step()
        # Attain our agent's action.
        action_step = ppo_agent.collect_policy.action(time_step)
        # Ensure that the action is one-hot as expected
        if isinstance(action_step.action, tuple):
            action = tf.concat(action_step.action, axis=-1)
        else:
            action = action_step.action

        # Ensure that the action is binary as expected.
        assert snc.is_binary(action)
        # Play the action out in the environment.
        tf_env.step(action_step.action)
Ejemplo n.º 4
0
    def step(self, action: snc_types.ActionSpace) \
            -> Tuple[snc_types.StateSpace, float, bool, Any]:
        """
        Implements controlled random walk. We remove jobs from each buffer if it is scheduled,
        and adds a new job if it has arrived any. Arrivals, processing and routing events happen
        all at the same time.

        :param action: Current action performed by the agent.
        :return: (state, reward, done, extra_data):
            - state: New state after performing action in current state.
            - reward: Reward (negative cost) obtained after the state transition.
            - done: Indicates if maximum number of steps has been reached. Currently used for
                training RL agents. It can implement other termination conditions in the future.
            - extra_data: Dictionary including new arrivals, total number of precessed jobs,
                processed jobs that have been added to any buffer (from suppliers or routed from
                other buffers), and processed jobs that have been drained from any buffer.
        """
        assert snc.is_binary(action)
        assert action.shape == (self.num_activities, 1)
        assert np.all(self.constituency_matrix @ action <= 1), \
            "Current action violates one action per resource constraint: C u <= 1."

        new_jobs = self.job_generator.get_arrival_jobs(
        )  # New job arrivals per buffer
        # Generate number of processed jobs
        routing_jobs_matrix = self.job_generator.get_routing_job_matrix()
        if self.energy_conservation_flag:
            routing_jobs_matrix = self.ensure_jobs_conservation(
                routing_jobs_matrix, self.state + new_jobs)
        action_effects = np.dot(routing_jobs_matrix, action)
        additions_effects = np.dot(
            np.clip(routing_jobs_matrix, a_min=0, a_max=None), action)
        drained_effects = np.dot(
            np.clip(routing_jobs_matrix, a_min=None, a_max=0), action)

        self.state = self.state + new_jobs + action_effects  # Update CRW state

        # Ensure physical constraints are satisfied.
        assert np.all(self.state >= 0)
        assert np.all(self.state <= self.capacity)

        cost = np.dot(self.cost_per_buffer.transpose(), self.state)
        reward = -float(cost)

        extra_data = {
            'arrivals': new_jobs,
            'processing': action_effects,
            'added': additions_effects,
            'drained': drained_effects
        }

        # Increment the time step.
        self._t += 1

        done = False
        if self.max_episode_length is not None and self._t >= self.max_episode_length:
            done = True
        return self.state, reward, done, extra_data
def test_rounded_stationary_mpc_action_zeros():
    num_mpc_steps = 10
    num_activities = 3
    z_star = np.zeros((num_activities, 1))
    constituency_matrix = np.eye(num_activities)
    mpc_policy = StationaryActionMPCPolicy(constituency_matrix)
    actions = mpc_policy.obtain_actions(num_mpc_steps=num_mpc_steps,
                                        z_star=z_star)
    assert is_binary(actions)
    assert np.all(actions == np.zeros((num_activities, num_mpc_steps)))
def test_draw_samples_from_stationary_policy_matrix_all_zeros():
    num_mpc_steps = 10
    num_resources = 10
    num_activities = 20
    policy_matrix = np.hstack((np.zeros(
        (num_resources, num_activities)), np.ones((num_resources, 1))))
    actions = StationaryActionMPCPolicy.draw_samples_from_stationary_policy_matrix(
        policy_matrix, num_mpc_steps)
    assert is_binary(actions)
    assert np.all(actions == np.zeros((num_activities, num_mpc_steps)))
Ejemplo n.º 7
0
def test_input_queued_switch_spaces():
    """
    Test the RL environment wrapper for the input_queued_switch_3x3_model example.
    This is a good test for resources with activity constraints.
    See snc/stochastic_network_control/environments/examples.py for the original set up code.
    """
    # Set up the environment parameters.
    mu11, mu12, mu13, mu21, mu22, mu23, mu31, mu32, mu33 = (1,) * 9
    cost_per_buffer = np.ones((9, 1))
    initial_state = np.zeros((9, 1))
    capacity = np.ones((9, 1)) * np.inf
    demand_rate = 0.3 * np.ones((9, 1))
    job_conservation_flag = True
    seed = 72
    buffer_processing_matrix = - np.diag([mu11, mu12, mu13, mu21, mu22, mu23, mu31, mu32, mu33])
    constraints = np.array([[1, 0, 0, 1, 0, 0, 1, 0, 0],
                            [0, 1, 0, 0, 1, 0, 0, 1, 0],
                            [0, 0, 1, 0, 0, 1, 0, 0, 1]])
    constituency_matrix = np.vstack([
        np.array([[1, 1, 1, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 1, 1, 1, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 1, 1, 1]]),
        constraints]
    )
    index_phys_resources = (0, 1, 2)

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 index_phys_resources=index_phys_resources)

    # Test that the environment wrapper works as expected.
    assert isinstance(env._rl_action_space, gym_spaces.Tuple)
    assert len(env._rl_action_space.spaces) == 1
    # There are overall 34 combinations of activities (or inactivity) that are feasible.
    assert len(env._action_vectors) == 34
    assert env._action_vectors.shape == (34, 9)
    assert snc.is_binary(env._action_vectors)
    assert snc.is_binary(np.matmul(env._action_vectors, constraints.T))
def test_rounded_mpc_action_probability_one():
    np.random.seed(42)
    num_mpc_steps = int(1e3)
    z_star = np.array([1, 0])[:, None]
    constituency_matrix = np.ones((1, 2))
    mpc_policy = StationaryActionMPCPolicy(constituency_matrix)
    actions = mpc_policy.obtain_actions(num_mpc_steps=num_mpc_steps,
                                        z_star=z_star)
    assert is_binary(actions)
    assert np.all(actions == np.vstack((np.ones((1, num_mpc_steps)),
                                        np.zeros((1, num_mpc_steps)))))
def test_draw_samples_from_stationary_policy_matrix_all_actions_with_prob_one(
):
    np.random.seed(42)
    num_mpc_steps = int(1e3)
    num_resources = 4
    policy_matrix = np.hstack(
        (np.eye(num_resources), np.zeros((num_resources, 1))))
    actions = StationaryActionMPCPolicy.draw_samples_from_stationary_policy_matrix(
        policy_matrix, num_mpc_steps)
    assert is_binary(actions)
    assert np.all(actions == np.ones((1, num_mpc_steps)))
def test_rounded_mpc_action_some_distribution_two_partially_shared_resources():
    np.random.seed(42)
    num_mpc_steps = int(1e5)
    z_star = np.array([0.8, 0.2, 1])[:, None]
    constituency_matrix = np.array([[1, 1, 0], [0, 0, 1]])
    mpc_policy = StationaryActionMPCPolicy(constituency_matrix)
    actions = mpc_policy.obtain_actions(num_mpc_steps=num_mpc_steps,
                                        z_star=z_star)
    assert is_binary(actions)
    assert np.all(constituency_matrix @ actions == np.ones((2, num_mpc_steps)))
    actions_freq = (np.sum(actions, axis=1) / num_mpc_steps)[:, None]
    np.testing.assert_almost_equal(actions_freq, z_star, decimal=2)
Ejemplo n.º 11
0
    def _interpret_rl_action(self, binary_action: np.ndarray) -> np.ndarray:
        """
        Takes an action from the RL agent and puts it in terms of activities in the network.

        :param binary_action: The action vector(s) from the RL agent which is formed from the
            concatenation of the one-hot actions for each resource set.
        :return: A binary action vector where the 1s denote active activities.
        """
        # Ensure that the action resulting from the RL agent is binary.
        # Binarisation should be handled in the agent.
        assert snc.is_binary(binary_action)
        # Determine which action vectors are selected.
        action_indices = np.where(binary_action == 1)[-1]
        # Use action indices to attain constituent action vectors (for each resource set)
        action_vectors = self._action_vectors[action_indices]
        # The overall action combines the activities across resource sets. This can be achieved by
        # summing over resource sets
        action = np.sum(action_vectors, axis=0)
        # Check that the action is binary (i.e. has no entries outside of {0, 1} which could occur
        # if the action vectors are not set up correctly)
        assert snc.is_binary(action)
        return action
def test_rounded_mpc_action_some_distribution_one_resource_with_positive_prob_of_idling(
):
    np.random.seed(42)
    num_mpc_steps = int(1e4)
    z_star = np.array([0.2, 0.3])[:, None]
    constituency_matrix = np.ones((1, 2))
    mpc_policy = StationaryActionMPCPolicy(constituency_matrix)
    actions = mpc_policy.obtain_actions(num_mpc_steps=num_mpc_steps,
                                        z_star=z_star)
    assert is_binary(actions)
    assert np.all(np.sum(actions, axis=0) <= np.ones((1, num_mpc_steps)))
    actions_freq = (np.sum(actions, axis=1) / num_mpc_steps)[:, None]
    np.testing.assert_almost_equal(actions_freq, z_star, decimal=2)
def test_draw_samples_from_stationary_policy_matrix_some_distribution_3_resources_4_actions(
):
    np.random.seed(42)
    num_mpc_steps = int(1e5)
    policy_matrix = np.array([[0.45, 0.55, 0, 0, 0], [0, 0, 0.3, 0, 0.7],
                              [0, 0, 0, 1, 0]])
    actions = StationaryActionMPCPolicy.draw_samples_from_stationary_policy_matrix(
        policy_matrix, num_mpc_steps)
    assert is_binary(actions)
    actions_freq = np.sum(actions, axis=1) / num_mpc_steps
    actions_freq_theory = np.sum(policy_matrix[:, 0:-1], axis=0)
    np.testing.assert_almost_equal(actions_freq,
                                   actions_freq_theory,
                                   decimal=2)
def test_mpc_action_many_random_distributions_2_resource():
    np.random.seed(42)
    num_mpc_steps = int(1e5)
    num_tests = 50
    constituency_matrix = np.array([[1, 1, 0], [0, 0, 1]])
    for i in range(num_tests):
        p = np.random.random_sample((3, 1))
        z_star = p / np.sum(p)
        mpc_policy = StationaryActionMPCPolicy(constituency_matrix)
        actions = mpc_policy.obtain_actions(num_mpc_steps=num_mpc_steps,
                                            z_star=z_star)
        assert is_binary(actions)
        assert np.all(
            constituency_matrix @ actions <= np.ones((1, num_mpc_steps)))
        actions_freq = (np.sum(actions, axis=1) / num_mpc_steps)[:, None]
        np.testing.assert_almost_equal(actions_freq, z_star, decimal=2)
def test_rounded_mpc_action_multiple_resources_owning_same_activity_but_passed_correctly(
):
    np.random.seed(42)
    num_mpc_steps = int(1e4)
    z_star = np.array([0.1, 0.2, 0.3])[:, None]
    constituency_matrix = np.array([[1, 1, 0], [0, 0, 1], [0, 1, 1]])
    physical_constituency_matrix = constituency_matrix[0:-1, :]

    mpc_policy = StationaryActionMPCPolicy(physical_constituency_matrix)
    actions = mpc_policy.obtain_actions(num_mpc_steps=num_mpc_steps,
                                        z_star=z_star)

    assert is_binary(actions)
    assert np.all(
        constituency_matrix[0:-1, :] @ actions <= np.ones((2, num_mpc_steps)))
    actions_freq = (np.sum(actions, axis=1) / num_mpc_steps)[:, None]
    np.testing.assert_almost_equal(actions_freq, z_star, decimal=2)
    def identify_draining_workload_vectors(self, workload_mat: types.WorkloadMatrix) \
            -> types.WorkloadSpace:
        """
        Returns a binary array indexing which workload vectors (i.e., rows of the workload matrix)
        are draining ones and therefore valid.

        :param workload_mat: Workload matrix, with rows being workload vectors.
        :return draining_workload_vec: Binary array with ones indexing valid workload vectors.
        """
        num_wl_vec = workload_mat.shape[0]
        draining_workload_vec = np.nan * np.ones((num_wl_vec,))
        for i, xi in enumerate(workload_mat):
            if np.any(xi < - self.tol):
                draining_workload_vec[i] = self.is_draining_workload_vector(xi[:, None])
            else:
                draining_workload_vec[i] = 1
        assert is_binary(draining_workload_vec)
        return draining_workload_vec
def test_is_binary():
    c = np.ones((1, 1))
    assert (snc.is_binary(c))

    c = np.zeros((1, 1))
    assert (snc.is_binary(c))

    c = np.ones((5, 4))
    assert (snc.is_binary(c))

    c = np.zeros((5, 4))
    assert (snc.is_binary(c))

    c = np.random.randint(0, 1, (3, 6))
    assert (snc.is_binary(c))

    c = []
    assert (not snc.is_binary(c))

    c = np.random.random_sample((3, 5))
    c[0] = 1
    assert (not snc.is_binary(c))
Ejemplo n.º 18
0
    def __init__(self,
                 cost_per_buffer: snc_types.StateSpace,
                 capacity: snc_types.StateSpace,
                 constituency_matrix: snc_types.ConstituencyMatrix,
                 job_generator: jobgen.JobGeneratorInterface,
                 state_initialiser: stinit.CRWStateInitialiser,
                 job_conservation_flag: bool = False,
                 list_boundary_constraint_matrices: Optional[List[
                     np.ndarray]] = None,
                 model_type: str = 'push',
                 index_phys_resources: Optional[Tuple] = None,
                 ind_surplus_buffers: Optional[List[int]] = None,
                 max_episode_length: Optional[int] = None) -> None:
        """
        Environment for testing stochastic network control:

        We consider a network of (possibly) multiple resources, each one with (possibly) multiple
        buffers. The resources can schedule in which of its buffers they process jobs at every time
        step, and where to route the processed jobs.

        The controlled random walk (CRW) describes the stochastic evolution of jobs in each
        buffer, under some policy that controls how the resources schedule and route the jobs,
        and where the arrival of new jobs and the processing of the current jobs are stochastic.
        The arrivals follow a job generator, which could be e.g. a Poisson process, such that the
        number of new jobs at each time step follows a Poisson distribution, with mean given by
        some demand rate vector. The success of finishing a job when a resource is working on it
        is also given by the job generator, which could be e.g. another Poisson process.

        Each buffer can start filled with some amount of jobs.
        Reward is negative (i.e., cost) and linear with the number of jobs in each buffer.
        This is a single agent environment.

        :param cost_per_buffer: cost per unit of inventory per buffer.
        :param capacity: List with maximum number of jobs allowed at each buffer.
        :param constituency_matrix: Matrix whose s-th row corresponds to resource s; and each entry,
            C_{si} in {0, 1}, specifies whether activity i corresponds to resource s (examples of
            activities are scheduling a buffer or routing to another resource).
        :param job_generator: object to generate events from
        :param state_initialiser: initialiser for state
        :param job_conservation_flag: boolean flag that controls whether:
            'True' = we want to ensure energy conservation when updating the step (moving only jobs
            that exist), or 'False' = We leave this task to the policy. This parameter is False by
            default.
        :param list_boundary_constraint_matrices: List of binary matrices, one per resource, that
            indicates conditions (number of rows) on which buffers cannot be empty to avoid idling.
            If this parameter is not passed, it assumes no boundary constraints by default.
        :param model_type: String indicating if this is a `'pull'` or `'push'` model.
        :param index_phys_resources: Tuple indexing the rows of constituency matrix that correspond
            to physical resources (as opposed to other rows that might represent coupled constraints
            among physical resources). If this parameter is not passed, then it considers all rows
            of the constituency matrix as corresponding to physical resources by default.
        :param ind_surplus_buffers: List of integers indicating the location of the surplus
            buffers in the buffer processing matrix.
        :param max_episode_length: Integer number of time steps to allow in each episode. Defaults
            to None meaning unlimited steps per episode (i.e. non-terminating episodes).
        """

        assert np.all(
            cost_per_buffer >= 0), 'Cost per buffer is assumed nonnegative'
        assert np.all(capacity >= 0), 'Capacity must be nonnegative'
        assert snc.is_binary(
            constituency_matrix), 'Constituency matrix must be binary'

        self._num_buffers = cost_per_buffer.size
        assert cost_per_buffer.shape == (self.num_buffers, 1)
        assert capacity.shape == (self.num_buffers, 1)

        self._num_resources, self._num_activities = constituency_matrix.shape
        assert job_generator.buffer_processing_matrix.shape == (
            self.num_buffers, self.num_activities)
        assert job_generator.num_buffers == self.num_buffers

        assert model_type in ['push', 'pull']

        self._cost_per_buffer = cost_per_buffer
        self._capacity = capacity
        self._constituency_matrix = constituency_matrix
        self._job_generator = job_generator
        self._state_initialiser = state_initialiser
        self._job_conservation_flag = job_conservation_flag
        self._model_type = model_type

        # If no list_boundary_constraint_matrices is passed, initialise to empty.
        if list_boundary_constraint_matrices is None:
            self._list_boundary_constraint_matrices = [
            ]  # type: List[np.ndarray]
            for s in range(self.num_resources):
                self._list_boundary_constraint_matrices.append(np.array([]))
        else:
            assert len(list_boundary_constraint_matrices) == self.num_resources
            for s in range(self.num_resources):
                assert list_boundary_constraint_matrices[s].shape[
                    1] == self.num_buffers
            self._list_boundary_constraint_matrices = list_boundary_constraint_matrices

        # If no index_phys_resources is passed, assume all rows of constituency matrix are physical
        # resources.
        if index_phys_resources is None:
            self._index_phys_resources = tuple(range(self._num_resources))
        else:
            assert min(index_phys_resources) >= 0
            assert max(
                index_phys_resources
            ) < self._num_resources  # index in [0, num_resources -1]
            assert len(set(index_phys_resources)) == len(index_phys_resources)
            self._index_phys_resources = index_phys_resources

        if model_type == 'pull':
            assert ind_surplus_buffers is not None, \
                "Pull models require surplus buffers, but none has been provided."
            assert isinstance(ind_surplus_buffers, list), \
                f"Type of 'ind_surplus_buffers' must be list, but provided: " \
                f"{type(ind_surplus_buffers)}."
            ind_surplus_buffers.sort()
            assert self.is_surplus_buffers_consistent_with_job_generator(
                ind_surplus_buffers, self.job_generator.demand_nodes.keys()
            ), "Provided list of surplus buffers is not consistent with buffer processing matrix."
        self._ind_surplus_buffers = ind_surplus_buffers

        # Build constituency with rows corresponding to physical resources.
        self._physical_constituency_matrix = constituency_matrix[
            self.index_phys_resources, :]

        # Set the episode length parameter and time step counter
        self._max_episode_length = max_episode_length
        self._t = 0

        # Set up the state attribute ahead of it being set in the reset method.
        self.state = self.state_initialiser.get_initial_state()
        self.reset()
Ejemplo n.º 19
0
def test_is_binary_zeros():
    a = np.zeros((2, 5))
    assert snc_tools.is_binary(a)
Ejemplo n.º 20
0
def test_is_binary_ones():
    a = np.ones((2, 5))
    assert snc_tools.is_binary(a)
Ejemplo n.º 21
0
def test_is_binary_not_binary():
    a = np.ones((2, 5))
    a[1, 1] = 0.1
    assert not snc_tools.is_binary(a)
Ejemplo n.º 22
0
def test_is_binary():
    a = np.zeros((2, 5))
    a[1, 1] = 1
    assert snc_tools.is_binary(a)