def test_wrong_arguments(): pubsub = Pubsub(EventService()) topic = pubsub.create_event_topic(DummyInterface) with pytest.raises(TypeError): # noinspection PyArgumentList topic.event2(wrong_name=1)
def __init__(self, max_pieces_in_inventory: int, raw_piece_size: Tuple[int, int], static_demand: List[Tuple[int, int]], reward_aggregator: RewardAggregatorInterface): super().__init__() self.max_pieces_in_inventory = max_pieces_in_inventory self.raw_piece_size = tuple(raw_piece_size) self.static_demand = static_demand # init current demand self.current_demand = None self.demand_idx = 0 # initialize rendering self.renderer = Cutting2DRenderer() # init pubsub for event to reward routing self.pubsub = Pubsub(self.context.event_service) # KPIs calculation self.kpi_calculator = Cutting2dKpiCalculator() # setup environment self._setup_env() # init reward and register it with pubsub self.reward_aggregator = reward_aggregator self.pubsub.register_subscriber(self.reward_aggregator)
def __init__(self, theta_threshold_radians: float, x_threshold: float, reward_aggregator: RewardAggregatorInterface): super().__init__() self.theta_threshold_radians = theta_threshold_radians self.x_threshold = x_threshold # init pubsub for event to reward routing self.pubsub = Pubsub(self.context.event_service) # KPIs calculation self.kpi_calculator = CartPoleKpiCalculator() # init reward and register it with pubsub self.reward_aggregator = Factory(RewardAggregatorInterface).instantiate(reward_aggregator) self.pubsub.register_subscriber(self.reward_aggregator) # setup environment self.cart_position = None self.cart_velocity = None self.pole_angle = None self.pole_velocity = None self.env_rng: Optional[np.random.RandomState] = None self.seed(None) self._setup_env() # initialize rendering self.renderer = CartPoleRenderer(pole_length=self.length, x_threshold=self.x_threshold)
def __init__(self, max_pieces_in_inventory: int, raw_piece_size: (int, int), static_demand: (int, int)): super().__init__() ... # init pubsub for event to reward routing self.pubsub = Pubsub(self.context.event_service) # KPIs calculation self.kpi_calculator = Cutting2dKpiCalculator()
def __init__(self, observation_space: gym.spaces.space.Space): super().__init__() self.pubsub = Pubsub(self.context.event_service) self.dummy_core_events = self.pubsub.create_event_topic(DummyEnvEvents) self.reward_aggregator = RewardAggregator() self.pubsub.register_subscriber(self.reward_aggregator) self.observation_space = observation_space # initialize rendering self.renderer = DummyMatplotlibRenderer()
def __init__(self, observation_space): super().__init__(observation_space) self.reward_aggregator = CustomDummyRewardAggregator() self.maze_state = self.observation_space.sample() self.pubsub: Pubsub = Pubsub(self.context.event_service) self.pubsub.register_subscriber(self.reward_aggregator) self.base_event_publisher = self.pubsub.create_event_topic(BaseEnvEvents) self.renderer = DummyRenderer()
def __init__(self, observation_space): super().__init__(observation_space) self.reward_aggregator = CustomDummyRewardAggregator() self.pubsub: Pubsub = Pubsub(self.context.event_service) self.pubsub.register_subscriber(self.reward_aggregator) self.base_event_publisher = self.pubsub.create_event_topic( BaseEnvEvents) self.kpi_calculator = CustomDummyKPICalculator()
def __init__(self, max_pieces_in_inventory: int, raw_piece_size: (int, int), static_demand: (int, int)): super().__init__() self.max_pieces_in_inventory = max_pieces_in_inventory self.raw_piece_size = tuple(raw_piece_size) self.current_demand = static_demand # initialize rendering self.renderer = Cutting2DRenderer() # init pubsub for event to reward routing self.pubsub = Pubsub(self.context.event_service) # KPIs calculation self.kpi_calculator = Cutting2dKpiCalculator() # setup environment self._setup_env()
def test_pubsub_topic(): pubsub = Pubsub(EventService()) subscriber = DummySubscriber() publisher = pubsub.create_event_topic(DummyInterface) pubsub.register_subscriber(subscriber) publisher.event1() publisher.event2(param1=1, param2=2) assert len(subscriber.events) == 2 record = subscriber.events[0] assert record.interface_method == DummyInterface.event1 assert record.attributes == dict() record = subscriber.events[1] assert record.interface_method == DummyInterface.event2 assert record.attributes == dict(param1=1, param2=2)
class Cutting2DCoreEnvironment(CoreEnv): """Environment for cutting 2D pieces based on the customer demand. Works as follows: - Keeps inventory of 2D pieces available for cutting and fulfilling the demand. - Produces a new demand for one piece in every step (here a static demand). - The agent should decide which piece from inventory to cut (and how) to fulfill the given demand. - What remains from the cut piece is put back in inventory. - All the time, one raw (full-size) piece is available in inventory. (If it gets cut, it is replenished in the next step.) - Rewards are calculated to motivate the agent to consume as few raw pieces as possible. - If inventory gets full, the oldest pieces get discarded. :param max_pieces_in_inventory: Size of the inventory. :param raw_piece_size: Size of a fresh raw (= full-size) piece. :param static_demand: Order to issue in each step. :param reward_aggregator: Either an instantiated aggregator or a configuration dictionary. """ def __init__(self, max_pieces_in_inventory: int, raw_piece_size: (int, int), static_demand: (int, int), reward_aggregator: RewardAggregatorInterface): super().__init__() self.max_pieces_in_inventory = max_pieces_in_inventory self.raw_piece_size = tuple(raw_piece_size) self.current_demand = static_demand # initialize rendering self.renderer = Cutting2DRenderer() # init pubsub for event to reward routing self.pubsub = Pubsub(self.context.event_service) # KPIs calculation self.kpi_calculator = Cutting2dKpiCalculator() # setup environment self._setup_env() # init reward and register it with pubsub self.reward_aggregator = reward_aggregator self.pubsub.register_subscriber(self.reward_aggregator) def _setup_env(self): """Setup environment.""" inventory_events = self.pubsub.create_event_topic(InventoryEvents) self.inventory = Inventory(self.max_pieces_in_inventory, self.raw_piece_size, inventory_events) self.inventory.replenish_piece() self.cutting_events = self.pubsub.create_event_topic(CuttingEvents) def step( self, maze_action: Cutting2DMazeAction ) -> Tuple[Cutting2DMazeState, np.array, bool, Dict[Any, Any]]: """Summary of the step (simplified, not necessarily respecting the actual order in the code): 1. Check if the selected piece to cut is valid (i.e. in inventory, large enough etc.) 2. Attempt the cutting 3. Replenish a fresh piece if needed and return an appropriate reward :param maze_action: Cutting maze_action to take. :return: state, reward, done, info """ info = {} replenishment_needed = False # check if valid piece id was selected if maze_action.piece_id >= self.inventory.size(): self.cutting_events.invalid_piece_selected() # perform cutting else: piece_to_cut = self.inventory.pieces[maze_action.piece_id] # attempt the cut if self.inventory.cut(maze_action, self.current_demand): self.cutting_events.valid_cut( current_demand=self.current_demand, piece_to_cut=piece_to_cut, raw_piece_size=self.raw_piece_size) replenishment_needed = piece_to_cut == self.raw_piece_size else: # assign a negative reward for invalid cutting attempts self.cutting_events.invalid_cut( current_demand=self.current_demand, piece_to_cut=piece_to_cut, raw_piece_size=self.raw_piece_size) # check if replenishment is required if replenishment_needed: self.inventory.replenish_piece() # assign negative reward if a piece has to be replenished # step maze_action finished, write step statistics self.inventory.log_step_statistics() # compile env state maze_state = self.get_maze_state() # aggregate reward from events reward = self.reward_aggregator.summarize_reward(maze_state) return maze_state, reward, False, info def get_maze_state(self) -> Cutting2DMazeState: """Returns the current Cutting2DMazeState of the environment.""" return Cutting2DMazeState(self.inventory.pieces, self.max_pieces_in_inventory, self.current_demand, self.raw_piece_size) def reset(self) -> Cutting2DMazeState: """Resets the environment to initial state.""" self._setup_env() return self.get_maze_state() def close(self): """No additional cleanup necessary.""" def seed(self, seed: int) -> None: """Seed random state of environment.""" # No randomness in the env at this point pass def get_renderer(self) -> Cutting2DRenderer: """Cutting 2D renderer module.""" return self.renderer def is_actor_done(self) -> bool: """Returns True if the just stepped actor is done, which is different to the done flag of the environment.""" return False def actor_id(self) -> ActorID: """Returns the currently executed actor along with the policy id. The id is unique only with respect to the policies (every policy has its own actor 0). Note that identities of done actors can not be reused in the same rollout. :return: The current actor, as tuple (policy id, actor number). """ return ActorID(step_key=0, agent_id=0) @property def agent_counts_dict(self) -> Dict[Union[str, int], int]: """Returns the count of agents for individual sub-steps (or -1 for dynamic agent count). As this is a single-step single-agent environment, in which 1 agent gets to act during sub-step 0, we return {0: 1}. """ return {0: 1} def get_kpi_calculator(self) -> Cutting2dKpiCalculator: """KPIs are supported.""" return self.kpi_calculator # --- lets ignore everything below this line for now --- def get_serializable_components(self) -> Dict[str, Any]: pass
class Cutting2DCoreEnvironment(CoreEnv): def __init__(self, max_pieces_in_inventory: int, raw_piece_size: (int, int), static_demand: (int, int)): super().__init__() ... # init pubsub for event to reward routing self.pubsub = Pubsub(self.context.event_service) # KPIs calculation self.kpi_calculator = Cutting2dKpiCalculator() def _setup_env(self): """Setup environment.""" inventory_events = self.pubsub.create_event_topic(InventoryEvents) self.inventory = Inventory(self.max_pieces_in_inventory, self.raw_piece_size, inventory_events) self.inventory.replenish_piece() self.cutting_events = self.pubsub.create_event_topic(CuttingEvents) def step( self, maze_action: Cutting2DMazeAction ) -> Tuple[Cutting2DMazeState, np.array, bool, Dict[Any, Any]]: """Summary of the step (simplified, not necessarily respecting the actual order in the code): 1. Check if the selected piece to cut is valid (i.e. in inventory, large enough etc.) 2. Attempt the cutting 3. Replenish a fresh piece if needed and return an appropriate reward :param maze_action: Cutting MazeAction to take. :return: maze_state, reward, done, info """ info, reward = {}, 0 replenishment_needed = False # check if valid piece id was selected if maze_action.piece_id >= self.inventory.size(): self.cutting_events.invalid_piece_selected() # perform cutting else: piece_to_cut = self.inventory.pieces[maze_action.piece_id] # attempt the cut if self.inventory.cut(maze_action, self.current_demand): self.cutting_events.valid_cut( current_demand=self.current_demand, piece_to_cut=piece_to_cut, raw_piece_size=self.raw_piece_size) replenishment_needed = piece_to_cut == self.raw_piece_size else: # assign a negative reward for invalid cutting attempts self.cutting_events.invalid_cut( current_demand=self.current_demand, piece_to_cut=piece_to_cut, raw_piece_size=self.raw_piece_size) reward = -2 # check if replenishment is required if replenishment_needed: self.inventory.replenish_piece() # assign negative reward if a piece has to be replenished reward = -1 # step execution finished, write step statistics self.inventory.log_step_statistics() # compile env state maze_state = self.get_maze_state() return maze_state, reward, False, info def get_kpi_calculator(self) -> Cutting2dKpiCalculator: """KPIs are supported.""" return self.kpi_calculator
class CartPoleCoreEnvironment(CoreEnv): """This class holds core structure of the desired environment with the core method 'step'. This function especially should encode the behaviour of the env. In this example the OpenAI gym Cartpole-v1 env is implemented for the purpose of demonstrating an implementation. :param theta_threshold_radians: Angle at which to fail an episode (e.g., 12 * 2 * pi / 360 = 0.20943951). :param x_threshold: Position at which to fail an episode (e.g., 2.4). :param reward_aggregator: Either an instantiated aggregator or a configuration dictionary. """ def __init__(self, theta_threshold_radians: float, x_threshold: float, reward_aggregator: RewardAggregatorInterface): super().__init__() self.theta_threshold_radians = theta_threshold_radians self.x_threshold = x_threshold # init pubsub for event to reward routing self.pubsub = Pubsub(self.context.event_service) # KPIs calculation self.kpi_calculator = CartPoleKpiCalculator() # init reward and register it with pubsub self.reward_aggregator = Factory(RewardAggregatorInterface).instantiate(reward_aggregator) self.pubsub.register_subscriber(self.reward_aggregator) # setup environment self.cart_position = None self.cart_velocity = None self.pole_angle = None self.pole_velocity = None self.env_rng: Optional[np.random.RandomState] = None self.seed(None) self._setup_env() # initialize rendering self.renderer = CartPoleRenderer(pole_length=self.length, x_threshold=self.x_threshold) def _setup_env(self) -> None: """Setup environment.""" # Setup env here self.cart_position = self.env_rng.uniform(low=-0.05, high=0.05, size=(1,))[0] self.cart_velocity = self.env_rng.uniform(low=-0.05, high=0.05, size=(1,))[0] self.pole_angle = self.env_rng.uniform(low=-0.05, high=0.05, size=(1,))[0] self.pole_velocity = self.env_rng.uniform(low=-0.05, high=0.05, size=(1,))[0] self.gravity = 9.8 self.masscart = 1.0 self.masspole = 0.1 self.total_mass = (self.masspole + self.masscart) self.length = 0.5 # actually half the pole's length self.polemass_length = (self.masspole * self.length) self.force_mag = 10.0 self.tau = 0.02 # seconds between state updates self.kinematics_integrator = 'euler' # Initialize the events for the env self.events = self.pubsub.create_event_topic(CartPoleEvents) self.reward_aggregator.steps_beyond_done = None @override(CoreEnv) def step(self, maze_action: CartPoleMazeAction) \ -> Tuple[CartPoleMazeState, np.array, bool, Dict[Any, Any]]: """Summary of the step (simplified, not necessarily respecting the actual order in the code): * Update the cart position and velocity * Update the pole position and velocity * Update events * Calculate reward :param maze_action: MazeAction to take. :return: state, reward, done, info """ info = {} # Implement you step function here and record events force = self.force_mag if maze_action.push_right else -self.force_mag costheta = math.cos(self.pole_angle) sintheta = math.sin(self.pole_angle) # For the interested reader: # https://coneural.org/florian/papers/05_cart_pole.pdf temp = (force + self.polemass_length * self.pole_velocity ** 2 * sintheta) / self.total_mass thetaacc = (self.gravity * sintheta - costheta * temp) / (self.length * (4.0 / 3.0 - self.masspole * costheta ** 2 / self.total_mass)) xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass if self.kinematics_integrator == 'euler': self.cart_position = self.cart_position + self.tau * self.cart_velocity self.cart_velocity = self.cart_velocity + self.tau * xacc self.pole_angle = self.pole_angle + self.tau * self.pole_velocity self.pole_velocity = self.pole_velocity + self.tau * thetaacc else: # semi-implicit euler self.cart_velocity = self.cart_velocity + self.tau * xacc self.cart_position = self.cart_position + self.tau * self.cart_velocity self.pole_velocity = self.pole_velocity + self.tau * thetaacc self.pole_angle = self.pole_angle + self.tau * self.pole_velocity done = False if self.cart_position < -self.x_threshold or self.cart_position > self.x_threshold: done = True self.events.cart_moved_away() if self.pole_angle < -self.theta_threshold_radians or self.pole_angle > self.theta_threshold_radians: done = True self.events.pole_fell_over() self.events.cart_velocity(velocity=self.cart_velocity) # compile env state maze_state = self.get_maze_state() # aggregate reward from events rewards = self.reward_aggregator.summarize_reward(maze_state) return maze_state, sum(rewards), done, info @override(CoreEnv) def get_maze_state(self) -> CartPoleMazeState: """Returns the current MazeProjectTemplateMazeState of the environment.""" return CartPoleMazeState(cart_position=self.cart_position, cart_velocity=self.cart_velocity, pole_angle=self.pole_angle, pole_angular_velocity=self.pole_velocity) @override(CoreEnv) def reset(self) -> CartPoleMazeState: """Resets the environment to initial state.""" self._setup_env() return self.get_maze_state() @override(CoreEnv) def close(self) -> None: """No additional cleanup necessary.""" pass @override(CoreEnv) def seed(self, seed: Optional[int]) -> None: """Seed random state of environment.""" self.env_rng = np.random.RandomState(seed) if seed is not None: self._setup_env() @override(CoreEnv) def get_renderer(self) -> CartPoleRenderer: """MazeProject renderer module.""" return self.renderer @override(CoreEnv) def is_actor_done(self) -> bool: """Returns True if the just stepped actor is done, which is different to the done flag of the environment.""" return False @override(CoreEnv) def actor_id(self) -> ActorID: """Returns the currently executed actor along with the policy id. The id is unique only with respect to the policies (every policy has its own actor 0). Note that identities of done actors can not be reused in the same rollout. :return: The current actor, as tuple (policy id, actor number). """ return ActorID(step_key=0, agent_id=0) @property @override(CoreEnv) def agent_counts_dict(self) -> Dict[Union[str, int], int]: """Returns the count of agents for individual sub-steps (or -1 for dynamic agent count). As this is a single-step single-agent environment, in which 1 agent gets to act during sub-step 0, we return {0: 1}. """ return {0: 1} @override(CoreEnv) def get_kpi_calculator(self) -> CartPoleKpiCalculator: """KPIs are supported.""" return self.kpi_calculator @override(CoreEnv) def get_serializable_components(self) -> Dict[str, Any]: """List components that should be serialized as part of trajectory data.""" pass
class DummyCoreEnvironment(CoreEnv): """ Does as little as possible, returns random actions :param observation_space: The observation space for the environment (in the state to observation interface) """ def __init__(self, observation_space: gym.spaces.space.Space): super().__init__() self.pubsub = Pubsub(self.context.event_service) self.dummy_core_events = self.pubsub.create_event_topic(DummyEnvEvents) self.reward_aggregator = RewardAggregator() self.pubsub.register_subscriber(self.reward_aggregator) self.observation_space = observation_space # initialize rendering self.renderer = DummyMatplotlibRenderer() def step( self, maze_action: Dict ) -> Tuple[Dict[str, np.ndarray], float, bool, Optional[Dict]]: """ :param maze_action: Environment MazeAction to take. :return: state, reward, done, info """ self.dummy_core_events.twice_per_step(3) self.dummy_core_events.twice_per_step(7) return self.get_maze_state(), self.reward_aggregator.summarize_reward( ), False, {} def get_maze_state(self) -> Dict[str, np.ndarray]: """ :returns Random observation """ return self.observation_space.sample() def reset(self) -> Dict[str, np.ndarray]: """ Does nothing :return: The environment state """ return self.get_maze_state() def render(self, mode='human'): """ Not implemented """ pass def close(self): """ Not implemented """ pass def seed(self, seed: int): """ Sets the seed for the environment :param seed: The given seed """ # No randomness in the env pass def get_serializable_components(self) -> Dict[str, Any]: """ Not implemented :return: An empty dict """ return {} def get_renderer(self) -> Optional[Renderer]: """ Not implemented :return: None """ return self.renderer def actor_id(self) -> ActorID: """Single-step, single-agent environment""" return ActorID(step_key=0, agent_id=0) def is_actor_done(self) -> bool: """ Not implemented :return: False """ return False @property def agent_counts_dict(self) -> Dict[StepKeyType, int]: """Single-step, single agent env.""" return {0: 1}