def __init__(self, action_scheme: ActionScheme, reward_scheme: RewardScheme, observer: Observer, stopper: Stopper, informer: Informer, renderer: Renderer, min_periods: int = None, **kwargs) -> None: super().__init__() self.clock = Clock() self.action_scheme = action_scheme self.reward_scheme = reward_scheme self.observer = observer self.stopper = stopper self.informer = informer self.renderer = renderer self.min_periods = min_periods for c in self.components.values(): c.clock = self.clock self.action_space = action_scheme.action_space self.observation_space = observer.observation_space self._enable_logger = kwargs.get('enable_logger', False) if self._enable_logger: self.logger = logging.getLogger(kwargs.get('logger_name', __name__)) self.logger.setLevel(kwargs.get('log_level', logging.DEBUG))
def test_basic_clock_init(): clock = Clock() assert clock assert clock.start == 0 assert clock.step == 0
class TradingEnv(gym.Env, TimeIndexed): """A trading environment made for use with Gym-compatible reinforcement learning algorithms. Parameters ---------- action_scheme : `ActionScheme` A component for generating an action to perform at each step of the environment. reward_scheme : `RewardScheme` A component for computing reward after each step of the environment. observer : `Observer` A component for generating observations after each step of the environment. informer : `Informer` A component for providing information after each step of the environment. renderer : `Renderer` A component for rendering the environment. kwargs : keyword arguments Additional keyword arguments needed to create the environment. """ agent_id: str = None episode_id: str = None def __init__(self, action_scheme: ActionScheme, reward_scheme: RewardScheme, observer: Observer, stopper: Stopper, informer: Informer, renderer: Renderer, min_periods: int = None, **kwargs) -> None: super().__init__() self.clock = Clock() self.action_scheme = action_scheme self.reward_scheme = reward_scheme self.observer = observer self.stopper = stopper self.informer = informer self.renderer = renderer self.min_periods = min_periods for c in self.components.values(): c.clock = self.clock self.action_space = action_scheme.action_space self.observation_space = observer.observation_space self._enable_logger = kwargs.get('enable_logger', False) if self._enable_logger: self.logger = logging.getLogger(kwargs.get('logger_name', __name__)) self.logger.setLevel(kwargs.get('log_level', logging.DEBUG)) @property def components(self) -> 'Dict[str, Component]': """The components of the environment. (`Dict[str,Component]`, read-only)""" return { "action_scheme": self.action_scheme, "reward_scheme": self.reward_scheme, "observer": self.observer, "stopper": self.stopper, "informer": self.informer, "renderer": self.renderer } def step(self, action: Any) -> 'Tuple[np.array, float, bool, dict]': """Makes on step through the environment. Parameters ---------- action : Any An action to perform on the environment. Returns ------- `np.array` The observation of the environment after the action being performed. float The computed reward for performing the action. bool Whether or not the episode is complete. dict The information gathered after completing the step. """ self.action_scheme.perform(self, action) obs = self.observer.observe(self) reward = self.reward_scheme.reward(self) done = self.stopper.stop(self) info = self.informer.info(self) self.clock.increment() return obs, reward, done, info def reset(self) -> 'np.array': """Resets the environment. Returns ------- obs : `np.array` The first observation of the environment. """ self.episode_id = str(uuid.uuid4()) self.clock.reset() for c in self.components.values(): if hasattr(c, "reset"): c.reset() obs = self.observer.observe(self) self.clock.increment() return obs def render(self, **kwargs) -> None: """Renders the environment.""" self.renderer.render(self, **kwargs) def save(self) -> None: """Saves the rendered view of the environment.""" self.renderer.save() def close(self) -> None: """Closes the environment.""" self.renderer.close()
def test_basic_clock_increment(): clock = Clock() clock.increment() assert clock.step == 1