Ejemplo n.º 1
0
class TradingEnvironment(gym.Env, TimeIndexed):
    """A trading environments made for use with Gym-compatible reinforcement learning algorithms."""

    agent_id: str = None
    episode_id: str = None

    def __init__(self,
                 portfolio: Union[Portfolio, str],
                 action_scheme: Union[ActionScheme, str],
                 reward_scheme: Union[RewardScheme, str],
                 feed: DataFeed = None,
                 window_size: int = 1,
                 use_internal=True,
                 **kwargs):
        """
        Arguments:
            portfolio: The `Portfolio` of wallets used to submit and execute orders from.
            action_scheme:  The component for transforming an action into an `Order` at each timestep.
            reward_scheme: The component for determining the reward at each timestep.
            feed (optional): The pipeline of features to pass the observations through.
            kwargs (optional): Additional arguments for tuning the environments, logging, etc.
        """
        super().__init__()

        self.portfolio = portfolio
        self.action_scheme = action_scheme
        self.reward_scheme = reward_scheme
        self.feed = feed
        self.window_size = window_size
        self.use_internal = use_internal

        if self.feed:
            self._external_keys = self.feed.next().keys()
            self.feed.reset()

        self.history = ObservationHistory(window_size=window_size)
        self._broker = Broker(exchanges=self.portfolio.exchanges)

        self.clock = Clock()
        self.action_space = None
        self.observation_space = None
        self.viewer = None

        self._enable_logger = kwargs.get('enable_logger', False)
        self._observation_dtype = kwargs.get('dtype', np.float32)
        self._observation_lows = kwargs.get('observation_lows', 0)
        self._observation_highs = kwargs.get('observation_highs', 1)

        if self._enable_logger:
            self.logger = logging.getLogger(kwargs.get('logger_name', __name__))
            self.logger.setLevel(kwargs.get('log_level', logging.DEBUG))

        logging.getLogger('tensorflow').disabled = kwargs.get('disable_tensorflow_logger', True)

        self.compile()

    def compile(self):
        """
        Sets the observation space and the action space of the environment.
        Creates the internal feed and sets initialization for different components.
        """
        for component in [self._broker, self.portfolio, self.action_scheme, self.reward_scheme]:
            component.clock = self.clock

        self.action_scheme.set_pairs(exchange_pairs=self.portfolio.exchange_pairs)
        self.action_space = Discrete(len(self.action_scheme))

        if not self.feed:
            self.feed = create_internal_feed(self.portfolio)

        self.feed = self.feed + create_internal_feed(self.portfolio)

        initial_obs = self.feed.next()
        n_features = len(initial_obs.keys()) if self.use_internal else len(self._external_keys)

        self.observation_space = Box(
            low=self._observation_lows,
            high=self._observation_highs,
            shape=(self.window_size, n_features),
            dtype=self._observation_dtype
        )

        self.feed.reset()

    @property
    def portfolio(self) -> Portfolio:
        """The portfolio of instruments currently held on this exchange."""
        return self._portfolio

    @portfolio.setter
    def portfolio(self, portfolio: Union[Portfolio, str]):
        self._portfolio = wallets.get(portfolio) if isinstance(portfolio, str) else portfolio

    @property
    def broker(self) -> Broker:
        """The broker used to execute orders within the environment."""
        return self._broker

    @property
    def episode_trades(self) -> Dict[str, 'Trade']:
        """A dictionary of trades made this episode, organized by order id."""
        return self._broker.trades

    @property
    def action_scheme(self) -> ActionScheme:
        """The component for transforming an action into an `Order` at each time step."""
        return self._action_scheme

    @action_scheme.setter
    def action_scheme(self, action_scheme: Union[ActionScheme, str]):
        self._action_scheme = actions.get(action_scheme) if isinstance(
            action_scheme, str) else action_scheme

    @property
    def reward_scheme(self) -> RewardScheme:
        """The component for determining the reward at each time step."""
        return self._reward_scheme

    @reward_scheme.setter
    def reward_scheme(self, reward_scheme: Union[RewardScheme, str]):
        self._reward_scheme = rewards.get(reward_scheme) if isinstance(
            reward_scheme, str) else reward_scheme

    def step(self, action: int) -> Tuple[np.array, float, bool, dict]:
        """Run one timestep within the environments based on the specified action.

        Arguments:
            action: The trade action provided by the agent for this timestep.

        Returns:
            observation (pandas.DataFrame): Provided by the environments's exchange, often OHLCV or tick trade history data points.
            reward (float): An size corresponding to the benefit earned by the action taken this timestep.
            done (bool): If `True`, the environments is complete and should be restarted.
            info (dict): Any auxiliary, diagnostic, or debugging information to output.
        """
        order = self.action_scheme.get_order(action, self.portfolio)

        if order:
            self._broker.submit(order)

        self._broker.update()

        obs_row = self.feed.next()

        if not self.use_internal:
            obs_row = {k: obs_row[k] for k in self._external_keys}

        self.history.push(obs_row)

        obs = self.history.observe()

        reward = self.reward_scheme.get_reward(self._portfolio)
        reward = np.nan_to_num(reward)

        if np.bitwise_not(np.isfinite(reward)):
            raise ValueError('Reward returned by the reward scheme must by a finite float.')

        done = (self.portfolio.profit_loss < 0.1) or not self.feed.has_next()

        info = {
            'step': self.clock.step,
            'portfolio': self.portfolio,
            'broker': self._broker,
            'order': order,
        }

        if self._enable_logger:
            self.logger.debug('Order:       {}'.format(order))
            self.logger.debug('Observation: {}'.format(obs))
            self.logger.debug('P/L:         {}'.format(self._portfolio.profit_loss))
            self.logger.debug('Reward ({}): {}'.format(self.clock.step, reward))
            self.logger.debug('Performance: {}'.format(self._portfolio.performance.tail(1)))

        self.clock.increment()

        return obs, reward, done, info

    def reset(self) -> np.array:
        """Resets the state of the environments and returns an initial observation.

        Returns:
            The episode's initial observation.
        """
        self.episode_id = uuid.uuid4()

        self.clock.reset()
        self.feed.reset()
        self.action_scheme.reset()
        self.reward_scheme.reset()
        self.portfolio.reset()
        self.history.reset()
        self._broker.reset()

        obs_row = self.feed.next()

        if not self.use_internal:
            obs_row = {k: obs_row[k] for k in self._external_keys}

        self.history.push(obs_row)

        obs = self.history.observe()

        self.clock.increment()

        return obs

    def render(self, mode='none'):
        """Renders the environment via matplotlib."""
        if mode == 'log':
            self.logger.info('Performance: ' + str(self._portfolio.performance))
        elif mode == 'chart':
            if self.viewer is None:
                raise NotImplementedError()

            self.viewer.render(self.clock.step - 1,
                               self._portfolio.performance,
                               self._broker.trades)

    def close(self):
        """Utility method to clean environment before closing."""
        if self.viewer is not None:
            self.viewer.close()
class TradingEnvironment(gym.Env, TimeIndexed):
    """A trading environments made for use with Gym-compatible reinforcement learning algorithms."""

    agent_id: str = None
    episode_id: str = None

    def __init__(self,
                 portfolio: Union[Portfolio, str],
                 action_scheme: Union[ActionScheme, str],
                 reward_scheme: Union[RewardScheme, str],
                 feed: DataFeed = None,
                 window_size: int = 1,
                 use_internal: bool = True,
                 renderers: Union[str, List[str], List['BaseRenderer']] = 'screenlog',
                 **kwargs):
        """
        Arguments:
            portfolio: The `Portfolio` of wallets used to submit and execute orders from.
            action_scheme:  The component for transforming an action into an `Order` at each timestep.
            reward_scheme: The component for determining the reward at each timestep.
            feed (optional): The pipeline of features to pass the observations through.
            renderers (optional): single or list of renderers for output by name or as objects.
                String Values: 'screenlog', 'filelog', or 'plotly'. None for no rendering.
            price_history (optional): OHLCV price history feed used for rendering
                the chart. Required if render_mode is 'plotly'.
            kwargs (optional): Additional arguments for tuning the environments, logging, etc.
        """
        super().__init__()

        self.portfolio = portfolio
        self.action_scheme = action_scheme
        self.reward_scheme = reward_scheme
        self.feed = feed
        self.window_size = window_size
        self.use_internal = use_internal
        self._price_history: pd.DataFrame = kwargs.get('price_history', None)

        if self.feed:
            self._external_keys = self.feed.next().keys()
            self.feed.reset()

        self.history = ObservationHistory(window_size=window_size)
        self._broker = Broker(exchanges=self.portfolio.exchanges)

        self.clock = Clock()
        self.action_space = None
        self.observation_space = None

        if not renderers:
            renderers = []
        elif type(renderers) is not list:
            renderers = [renderers]

        self._renderers = []
        for renderer in renderers:
            if isinstance(renderer, str):
                renderer = get(renderer)
            self._renderers.append(renderer)

        self._enable_logger = kwargs.get('enable_logger', False)
        self._observation_dtype = kwargs.get('dtype', np.float32)
        self._observation_lows = kwargs.get('observation_lows', -np.iinfo(np.int64).max)
        self._observation_highs = kwargs.get('observation_highs', np.iinfo(np.int64).max)
        self._max_allowed_loss = kwargs.get('max_allowed_loss', 0.1)

        if self._enable_logger:
            self.logger = logging.getLogger(kwargs.get('logger_name', __name__))
            self.logger.setLevel(kwargs.get('log_level', logging.DEBUG))

        self._max_episodes = None
        self._max_steps = None

        logging.getLogger('tensorflow').disabled = kwargs.get('disable_tensorflow_logger', True)

        self.compile()

    @property
    def max_episodes(self) -> int:
        return self._max_episodes

    @max_episodes.setter
    def max_episodes(self, max_episodes: int):
        self._max_episodes = max_episodes

    @property
    def max_steps(self) -> int:
        return self._max_steps

    @max_steps.setter
    def max_steps(self, max_steps: int):
        self._max_steps = max_steps

    def compile(self):
        """
        Sets the observation space and the action space of the environment.
        Creates the internal feed and sets initialization for different components.
        """
        components = [self._broker, self.portfolio, self.action_scheme,
                      self.reward_scheme] + self.portfolio.exchanges

        for component in components:
            component.clock = self.clock

        self.action_scheme.exchange_pairs = self.portfolio.exchange_pairs
        self.action_scheme.compile()
        self.action_space = self.action_scheme.action_space

        if not self.feed:
            self.feed = create_internal_feed(self.portfolio)
        else:
            self.feed = self.feed + create_internal_feed(self.portfolio)

        initial_obs = self.feed.next()
        n_features = len(initial_obs.keys()) if self.use_internal else len(self._external_keys)

        self.observation_space = Box(
            low=self._observation_lows,
            high=self._observation_highs,
            shape=(self.window_size, n_features),
            dtype=self._observation_dtype
        )

        self.feed.reset()

    @property
    def portfolio(self) -> Portfolio:
        """The portfolio of instruments currently held on this exchange."""
        return self._portfolio

    @portfolio.setter
    def portfolio(self, portfolio: Union[Portfolio, str]):
        self._portfolio = wallets.get(portfolio) if isinstance(portfolio, str) else portfolio

    @property
    def broker(self) -> Broker:
        """The broker used to execute orders within the environment."""
        return self._broker

    @property
    def episode_trades(self) -> Dict[str, 'Trade']:
        """A dictionary of trades made this episode, organized by order id."""
        return self._broker.trades

    @property
    def action_scheme(self) -> ActionScheme:
        """The component for transforming an action into an `Order` at each time step."""
        return self._action_scheme

    @action_scheme.setter
    def action_scheme(self, action_scheme: Union[ActionScheme, str]):
        self._action_scheme = actions.get(action_scheme) if isinstance(
            action_scheme, str) else action_scheme

    @property
    def reward_scheme(self) -> RewardScheme:
        """The component for determining the reward at each time step."""
        return self._reward_scheme

    @reward_scheme.setter
    def reward_scheme(self, reward_scheme: Union[RewardScheme, str]):
        self._reward_scheme = rewards.get(reward_scheme) if isinstance(
            reward_scheme, str) else reward_scheme

    @property
    def price_history(self) -> pd.DataFrame:
        return self._price_history

    @price_history.setter
    def price_history(self, price_history):
        self._price_history = price_history

    def step(self, action: int) -> Tuple[np.array, float, bool, dict]:
        """Run one timestep within the environments based on the specified action.

        Arguments:
            action: The trade action provided by the agent for this timestep.

        Returns:
            observation (pandas.DataFrame): Provided by the environments's exchange, often OHLCV or tick trade history data points.
            reward (float): An size corresponding to the benefit earned by the action taken this timestep.
            done (bool): If `True`, the environments is complete and should be restarted.
            info (dict): Any auxiliary, diagnostic, or debugging information to output.
        """
        order = self.action_scheme.get_order(action, self.portfolio)

        if order:
            self._broker.submit(order)

        self._broker.update()

        obs_row = self.feed.next()

        if not self.use_internal:
            obs_row = {k: obs_row[k] for k in self._external_keys}

        self.history.push(obs_row)

        obs = self.history.observe()
        obs = obs.astype(self._observation_dtype)

        reward = self.reward_scheme.get_reward(self._portfolio)
        reward = np.nan_to_num(reward)

        if np.bitwise_not(np.isfinite(reward)):
            raise ValueError('Reward returned by the reward scheme must by a finite float.')

        done = (self.portfolio.profit_loss < self._max_allowed_loss) or not self.feed.has_next()

        info = {
            'step': self.clock.step,
            'portfolio': self.portfolio,
            'broker': self._broker,
            'order': order,
        }

        if self._enable_logger:
            self.logger.debug('Order:       {}'.format(order))
            self.logger.debug('Observation: {}'.format(obs))
            self.logger.debug('P/L:         {}'.format(self._portfolio.profit_loss))
            self.logger.debug('Reward ({}): {}'.format(self.clock.step, reward))
            self.logger.debug('Performance: {}'.format(self._portfolio.performance.tail(1)))

        self.clock.increment()

        return obs, reward, done, info

    def reset(self) -> np.array:
        """Resets the state of the environments and returns an initial observation.

        Returns:
            The episode's initial observation.
        """
        self.episode_id = uuid.uuid4()
        self.clock.reset()
        self.feed.reset()
        self.action_scheme.reset()
        self.reward_scheme.reset()
        self.portfolio.reset()
        self.history.reset()
        self._broker.reset()

        for renderer in self._renderers:
            renderer.reset()

        obs_row = self.feed.next()

        if not self.use_internal:
            obs_row = {k: obs_row[k] for k in self._external_keys}

        self.history.push(obs_row)

        obs = self.history.observe()

        self.clock.increment()

        return obs

    def render(self, episode: int = None):
        """Renders the environment.

        Arguments:
            episode: Current episode number (0-based).
        """
        current_step = self.clock.step - 1

        for renderer in self._renderers:
            price_history = None if self._price_history is None else self._price_history[self._price_history.index < current_step]
            renderer.render(episode=episode,
                            max_episodes=self._max_episodes,
                            step=current_step,
                            max_steps=self._max_steps,
                            price_history=price_history,
                            net_worth=self._portfolio.performance.net_worth,
                            performance=self._portfolio.performance.drop(columns=['base_symbol']),
                            trades=self._broker.trades)

    def save(self):
        """Saves the environment.

        Arguments:
            episode: Current episode number (0-based).
        """
        for renderer in self._renderers:
            renderer.save()

    def close(self):
        """Utility method to clean environment before closing."""
        for renderer in self._renderers:
            if callable(hasattr(renderer, 'close')):
                renderer.close()  # pylint: disable=no-member