Ejemplo n.º 1
def test_pubsub_topic():
    pubsub = Pubsub(EventService())
    subscriber = DummySubscriber()

    publisher = pubsub.create_event_topic(DummyInterface)


    publisher.event2(param1=1, param2=2)

    assert len(subscriber.events) == 2

    record = subscriber.events[0]
    assert record.interface_method == DummyInterface.event1
    assert record.attributes == dict()

    record = subscriber.events[1]
    assert record.interface_method == DummyInterface.event2
    assert record.attributes == dict(param1=1, param2=2)
Ejemplo n.º 2
class Cutting2DCoreEnvironment(CoreEnv):
    """Environment for cutting 2D pieces based on the customer demand. Works as follows:
     - Keeps inventory of 2D pieces available for cutting and fulfilling the demand.
     - Produces a new demand for one piece in every step (here a static demand).
     - The agent should decide which piece from inventory to cut (and how) to fulfill the given demand.
     - What remains from the cut piece is put back in inventory.
     - All the time, one raw (full-size) piece is available in inventory.
       (If it gets cut, it is replenished in the next step.)
     - Rewards are calculated to motivate the agent to consume as few raw pieces as possible.
     - If inventory gets full, the oldest pieces get discarded.

    :param max_pieces_in_inventory: Size of the inventory.
    :param raw_piece_size: Size of a fresh raw (= full-size) piece.
    :param static_demand: Order to issue in each step.
    :param reward_aggregator: Either an instantiated aggregator or a configuration dictionary.
    def __init__(self, max_pieces_in_inventory: int,
                 raw_piece_size: (int, int), static_demand: (int, int),
                 reward_aggregator: RewardAggregatorInterface):

        self.max_pieces_in_inventory = max_pieces_in_inventory
        self.raw_piece_size = tuple(raw_piece_size)
        self.current_demand = static_demand

        # initialize rendering
        self.renderer = Cutting2DRenderer()

        # init pubsub for event to reward routing
        self.pubsub = Pubsub(self.context.event_service)

        # KPIs calculation
        self.kpi_calculator = Cutting2dKpiCalculator()

        # setup environment

        # init reward and register it with pubsub
        self.reward_aggregator = reward_aggregator

    def _setup_env(self):
        """Setup environment."""
        inventory_events = self.pubsub.create_event_topic(InventoryEvents)
        self.inventory = Inventory(self.max_pieces_in_inventory,
                                   self.raw_piece_size, inventory_events)

        self.cutting_events = self.pubsub.create_event_topic(CuttingEvents)

    def step(
        self, maze_action: Cutting2DMazeAction
    ) -> Tuple[Cutting2DMazeState, np.array, bool, Dict[Any, Any]]:
        """Summary of the step (simplified, not necessarily respecting the actual order in the code):
        1. Check if the selected piece to cut is valid (i.e. in inventory, large enough etc.)
        2. Attempt the cutting
        3. Replenish a fresh piece if needed and return an appropriate reward

        :param maze_action: Cutting maze_action to take.
        :return: state, reward, done, info

        info = {}
        replenishment_needed = False

        # check if valid piece id was selected
        if maze_action.piece_id >= self.inventory.size():
        # perform cutting
            piece_to_cut = self.inventory.pieces[maze_action.piece_id]

            # attempt the cut
            if self.inventory.cut(maze_action, self.current_demand):
                replenishment_needed = piece_to_cut == self.raw_piece_size
                # assign a negative reward for invalid cutting attempts

        # check if replenishment is required
        if replenishment_needed:
            # assign negative reward if a piece has to be replenished

        # step maze_action finished, write step statistics

        # compile env state
        maze_state = self.get_maze_state()

        # aggregate reward from events
        reward = self.reward_aggregator.summarize_reward(maze_state)

        return maze_state, reward, False, info

    def get_maze_state(self) -> Cutting2DMazeState:
        """Returns the current Cutting2DMazeState of the environment."""
        return Cutting2DMazeState(self.inventory.pieces,
                                  self.current_demand, self.raw_piece_size)

    def reset(self) -> Cutting2DMazeState:
        """Resets the environment to initial state."""
        return self.get_maze_state()

    def close(self):
        """No additional cleanup necessary."""

    def seed(self, seed: int) -> None:
        """Seed random state of environment."""
        # No randomness in the env at this point

    def get_renderer(self) -> Cutting2DRenderer:
        """Cutting 2D renderer module."""
        return self.renderer

    def is_actor_done(self) -> bool:
        """Returns True if the just stepped actor is done, which is different to the done flag of the environment."""
        return False

    def actor_id(self) -> ActorID:
        """Returns the currently executed actor along with the policy id. The id is unique only with
        respect to the policies (every policy has its own actor 0).
        Note that identities of done actors can not be reused in the same rollout.

        :return: The current actor, as tuple (policy id, actor number).
        return ActorID(step_key=0, agent_id=0)

    def agent_counts_dict(self) -> Dict[Union[str, int], int]:
        """Returns the count of agents for individual sub-steps (or -1 for dynamic agent count).

        As this is a single-step single-agent environment, in which 1 agent gets to act during sub-step 0,
        we return {0: 1}.
        return {0: 1}

    def get_kpi_calculator(self) -> Cutting2dKpiCalculator:
        """KPIs are supported."""
        return self.kpi_calculator

    # --- lets ignore everything below this line for now ---

    def get_serializable_components(self) -> Dict[str, Any]:
Ejemplo n.º 3
class CartPoleCoreEnvironment(CoreEnv):
    """This class holds core structure of the desired environment with the core method 'step'. This function especially
    should encode the behaviour of the env. In this example the OpenAI gym Cartpole-v1 env is implemented for
    the purpose of demonstrating an implementation.

    :param theta_threshold_radians: Angle at which to fail an episode (e.g., 12 * 2 * pi / 360 = 0.20943951).
    :param x_threshold: Position at which to fail an episode (e.g., 2.4).
    :param reward_aggregator: Either an instantiated aggregator or a configuration dictionary.

    def __init__(self, theta_threshold_radians: float, x_threshold: float,
                 reward_aggregator: RewardAggregatorInterface):

        self.theta_threshold_radians = theta_threshold_radians
        self.x_threshold = x_threshold

        # init pubsub for event to reward routing
        self.pubsub = Pubsub(self.context.event_service)

        # KPIs calculation
        self.kpi_calculator = CartPoleKpiCalculator()

        # init reward and register it with pubsub
        self.reward_aggregator = Factory(RewardAggregatorInterface).instantiate(reward_aggregator)

        # setup environment
        self.cart_position = None
        self.cart_velocity = None
        self.pole_angle = None
        self.pole_velocity = None

        self.env_rng: Optional[np.random.RandomState] = None

        # initialize rendering
        self.renderer = CartPoleRenderer(pole_length=self.length, x_threshold=self.x_threshold)

    def _setup_env(self) -> None:
        """Setup environment."""

        # Setup env here
        self.cart_position = self.env_rng.uniform(low=-0.05, high=0.05, size=(1,))[0]
        self.cart_velocity = self.env_rng.uniform(low=-0.05, high=0.05, size=(1,))[0]
        self.pole_angle = self.env_rng.uniform(low=-0.05, high=0.05, size=(1,))[0]
        self.pole_velocity = self.env_rng.uniform(low=-0.05, high=0.05, size=(1,))[0]

        self.gravity = 9.8
        self.masscart = 1.0
        self.masspole = 0.1
        self.total_mass = (self.masspole + self.masscart)
        self.length = 0.5  # actually half the pole's length
        self.polemass_length = (self.masspole * self.length)
        self.force_mag = 10.0
        self.tau = 0.02  # seconds between state updates
        self.kinematics_integrator = 'euler'

        # Initialize the events for the env
        self.events = self.pubsub.create_event_topic(CartPoleEvents)
        self.reward_aggregator.steps_beyond_done = None

    def step(self, maze_action: CartPoleMazeAction) \
            -> Tuple[CartPoleMazeState, np.array, bool, Dict[Any, Any]]:
        """Summary of the step (simplified, not necessarily respecting the actual order in the code):
        * Update the cart position and velocity
        * Update the pole position and velocity
        * Update events
        * Calculate reward

        :param maze_action: MazeAction to take.
        :return: state, reward, done, info

        info = {}
        # Implement you step function here and record events

        force = self.force_mag if maze_action.push_right else -self.force_mag
        costheta = math.cos(self.pole_angle)
        sintheta = math.sin(self.pole_angle)

        # For the interested reader:
        # https://coneural.org/florian/papers/05_cart_pole.pdf
        temp = (force + self.polemass_length * self.pole_velocity ** 2 * sintheta) / self.total_mass
        thetaacc = (self.gravity * sintheta - costheta * temp) / (self.length * (4.0 / 3.0 - self.masspole *
                                                                                 costheta ** 2 / self.total_mass))
        xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass

        if self.kinematics_integrator == 'euler':
            self.cart_position = self.cart_position + self.tau * self.cart_velocity
            self.cart_velocity = self.cart_velocity + self.tau * xacc
            self.pole_angle = self.pole_angle + self.tau * self.pole_velocity
            self.pole_velocity = self.pole_velocity + self.tau * thetaacc
        else:  # semi-implicit euler
            self.cart_velocity = self.cart_velocity + self.tau * xacc
            self.cart_position = self.cart_position + self.tau * self.cart_velocity
            self.pole_velocity = self.pole_velocity + self.tau * thetaacc
            self.pole_angle = self.pole_angle + self.tau * self.pole_velocity

        done = False
        if self.cart_position < -self.x_threshold or self.cart_position > self.x_threshold:
            done = True
        if self.pole_angle < -self.theta_threshold_radians or self.pole_angle > self.theta_threshold_radians:
            done = True


        # compile env state
        maze_state = self.get_maze_state()

        # aggregate reward from events
        rewards = self.reward_aggregator.summarize_reward(maze_state)

        return maze_state, sum(rewards), done, info

    def get_maze_state(self) -> CartPoleMazeState:
        """Returns the current MazeProjectTemplateMazeState of the environment."""
        return CartPoleMazeState(cart_position=self.cart_position, cart_velocity=self.cart_velocity,
                                 pole_angle=self.pole_angle, pole_angular_velocity=self.pole_velocity)

    def reset(self) -> CartPoleMazeState:
        """Resets the environment to initial state."""
        return self.get_maze_state()

    def close(self) -> None:
        """No additional cleanup necessary."""

    def seed(self, seed: Optional[int]) -> None:
        """Seed random state of environment."""
        self.env_rng = np.random.RandomState(seed)
        if seed is not None:

    def get_renderer(self) -> CartPoleRenderer:
        """MazeProject renderer module."""
        return self.renderer

    def is_actor_done(self) -> bool:
        """Returns True if the just stepped actor is done, which is different to the done flag of the environment."""
        return False

    def actor_id(self) -> ActorID:
        """Returns the currently executed actor along with the policy id. The id is unique only with
        respect to the policies (every policy has its own actor 0).
        Note that identities of done actors can not be reused in the same rollout.

        :return: The current actor, as tuple (policy id, actor number).
        return ActorID(step_key=0, agent_id=0)

    def agent_counts_dict(self) -> Dict[Union[str, int], int]:
        """Returns the count of agents for individual sub-steps (or -1 for dynamic agent count).

        As this is a single-step single-agent environment, in which 1 agent gets to act during sub-step 0,
        we return {0: 1}.
        return {0: 1}

    def get_kpi_calculator(self) -> CartPoleKpiCalculator:
        """KPIs are supported."""
        return self.kpi_calculator

    def get_serializable_components(self) -> Dict[str, Any]:
        """List components that should be serialized as part of trajectory data."""
Ejemplo n.º 4
class DummyCoreEnvironment(CoreEnv):
    Does as little as possible, returns random actions

    :param observation_space: The observation space for the environment (in the state to observation interface)
    def __init__(self, observation_space: gym.spaces.space.Space):

        self.pubsub = Pubsub(self.context.event_service)
        self.dummy_core_events = self.pubsub.create_event_topic(DummyEnvEvents)

        self.reward_aggregator = RewardAggregator()

        self.observation_space = observation_space

        # initialize rendering
        self.renderer = DummyMatplotlibRenderer()

    def step(
        self, maze_action: Dict
    ) -> Tuple[Dict[str, np.ndarray], float, bool, Optional[Dict]]:
        :param maze_action: Environment MazeAction to take.
        :return: state, reward, done, info


        return self.get_maze_state(), self.reward_aggregator.summarize_reward(
        ), False, {}

    def get_maze_state(self) -> Dict[str, np.ndarray]:
        :returns Random observation
        return self.observation_space.sample()

    def reset(self) -> Dict[str, np.ndarray]:
        Does nothing
        :return: The environment state
        return self.get_maze_state()

    def render(self, mode='human'):
        Not implemented

    def close(self):
        Not implemented

    def seed(self, seed: int):
        Sets the seed for the environment
        :param seed: The given seed
        # No randomness in the env

    def get_serializable_components(self) -> Dict[str, Any]:
        Not implemented
        :return: An empty dict
        return {}

    def get_renderer(self) -> Optional[Renderer]:
        Not implemented
        :return: None
        return self.renderer

    def actor_id(self) -> ActorID:
        """Single-step, single-agent environment"""
        return ActorID(step_key=0, agent_id=0)

    def is_actor_done(self) -> bool:
        Not implemented
        :return: False
        return False

    def agent_counts_dict(self) -> Dict[StepKeyType, int]:
        """Single-step, single agent env."""
        return {0: 1}