Ejemplo n.º 1
0
class PBR(TensorTradeRewardScheme):

    registered_name = "pbr"

    def __init__(self, price: 'Stream'):
        super().__init__()
        self.position = -1

        r = Stream.sensor(price, lambda p: p.value, dtype="float").diff()
        position = Stream.sensor(self, lambda rs: rs.position, dtype="float")

        reward = (r * position).fillna(0).rename("reward")

        self.feed = DataFeed([reward])
        self.feed.compile()

    def on_action(self, action: int):
        self.position = -1 if action == 0 else 1

    def get_reward(self, portfolio: 'Portfolio'):
        return self.feed.next()["reward"]

    def reset(self):
        self.position = -1
        self.feed.reset()
Ejemplo n.º 2
0
class PBREX(TensorTradeRewardScheme):
    """A reward scheme for position-based returns.

    * Let :math:`p_t` denote the price at time t.
    * Let :math:`x_t` denote the position at time t.
    * Let :math:`R_t` denote the reward at time t.

    Then the reward is defined as,
    :math:`R_{t} = (p_{t} - p_{t-1}) \cdot x_{t}`.

    Parameters
    ----------
    price : `Stream`
        The price stream to use for computing rewards.
    """

    registered_name = "pbrex"

    def __init__(self, price: 'Stream') -> None:
        super().__init__()
        self.position = 0
        self.price = 0

        self.pre_networth = 0

        r = Stream.sensor(price, lambda p: p.value, dtype="float").diff()
        position = Stream.sensor(self, lambda rs: rs.position, dtype="float")

        reward = (position * r).fillna(0).rename("reward")

        self.feed = DataFeed([reward])
        self.feed.compile()

    def on_action(self, side: TradeSide) -> None:
        pass

    def get_reward(self, portfolio: 'Portfolio') -> float:
        net_worths = [nw['net_worth'] for nw in portfolio.performance.values()]
        reward = 0 if len(net_worths) < 2 else net_worths[-1] - net_worths[-2]
        return reward

    def reset(self) -> None:
        """Resets the `position` and `feed` of the reward scheme."""
        self.position = -1
        self.feed.reset()
Ejemplo n.º 3
0
class PBR(TensorTradeRewardScheme):
    """A reward scheme for position-based returns.

    * Let :math:`p_t` denote the price at time t.
    * Let :math:`x_t` denote the position at time t.
    * Let :math:`R_t` denote the reward at time t.

    Then the reward is defined as,
    :math:`R_{t} = (p_{t} - p_{t-1}) \cdot x_{t}`.

    Parameters
    ----------
    price : `Stream`
        The price stream to use for computing rewards.
    """

    registered_name = "pbr"

    def __init__(self, price: 'Stream') -> None:
        super().__init__()
        self.position = -1

        ## PBR works when commissions are negligible.
        ## Need to modify the following line to make it work for cases where commissions are substantial:
        ###NOTE:  the agent learns to hold its position when abs(r) < commission
        #r = Stream.sensor(price, lambda p: p.value, dtype="float").diff()
        r = Stream.sensor(price, lambda p: p.value, dtype="float").pct_change()
        position = Stream.sensor(self, lambda rs: rs.position, dtype="float")
        reward = ((position * r).fillna(0)).rename("reward")

        self.feed = DataFeed([reward])
        self.feed.compile()

    def on_action(self, action: int, hasOrder: bool,
                  current_step: int) -> None:
        self.position = -1 if action == 0 else 1

    def get_reward(self, portfolio: 'Portfolio') -> float:
        return self.feed.next()["reward"]

    def reset(self) -> None:
        """Resets the `position` and `feed` of the reward scheme."""
        self.position = -1
        self.feed.reset()
Ejemplo n.º 4
0
class SinglePositionProfit(TensorTradeRewardScheme):
    """A reward scheme for single position return.


    Parameters
    ----------
    price : `Stream`
        The price stream to use for computing rewards.
    """

    registered_name = "spp"

    def __init__(self, price: 'Stream') -> None:
        super().__init__()
        self.position = -1
        self.executed = 0
        r = (Stream.sensor(price, lambda p: p.value,
                           dtype="float").pct_change())  #.log10()).diff()
        position = Stream.sensor(self, lambda rs: rs.position, dtype="float")
        executed = Stream.sensor(self, lambda rs: rs.executed, dtype="float")
        reward = (position * r - executed).fillna(0).rename("reward")

        self.feed = DataFeed([reward])
        self.feed.compile()

    def on_action(self, action: int, hasOrder: bool,
                  current_step: int) -> None:
        self.position = -1 if action == 0 else 1
        performance = pd.DataFrame.from_dict(portfolio.performance,
                                             orient='index')
        net_worths = performance["net_worth"]
        self.executed = len(self.broker.executed) * 0.002606

    def get_reward(self, portfolio: 'Portfolio') -> float:
        #n_executed = len(self.broker.executed)/2
        return (self.feed.next()["reward"])  # - n_executed*0.002606)

    def reset(self) -> None:
        """Resets the `position` and `feed` of the reward scheme."""
        self.position = -1
        self.feed.reset()
        self.executed = 0
Ejemplo n.º 5
0
class PBR(TensorTradeRewardScheme):
    """A reward scheme for position-based returns.

    * Let :math:`p_t` denote the price at time t.
    * Let :math:`x_t` denote the position at time t.
    * Let :math:`R_t` denote the reward at time t.

    Then the reward is defined as,
    :math:`R_{t} = (p_{t} - p_{t-1}) \cdot x_{t}`.

    Parameters
    ----------
    price : `Stream`
        The price stream to use for computing rewards.
    """

    registered_name = "pbr"

    def __init__(self, price: 'Stream') -> None:
        super().__init__()
        self.position = -1

        r = Stream.sensor(price, lambda p: p.value, dtype="float").diff()
        position = Stream.sensor(self, lambda rs: rs.position, dtype="float")

        reward = (position * r).fillna(0).rename("reward")

        self.feed = DataFeed([reward])
        self.feed.compile()

    def on_action(self, action: int) -> None:
        self.position = -1 if action == 0 else 1

    def get_reward(self, portfolio: 'Portfolio') -> float:
        reward_ = self.feed.next()["reward"]
        #print("Reward: {}".format(reward_))
        return reward_

    def reset(self) -> None:
        """Resets the `position` and `feed` of the reward scheme."""
        self.position = -1
        self.feed.reset()
Ejemplo n.º 6
0
class IntradayObserver(Observer):
    """The IntradayObserver observer that is compatible with the other `default`
    components.
    Parameters
    ----------
    portfolio : `Portfolio`
        The portfolio to be used to create the internal data feed mechanism.
    feed : `DataFeed`
        The feed to be used to collect observations to the observation window.
    renderer_feed : `DataFeed`
        The feed to be used for giving information to the renderer.
    stop_time : datetime.time
        The time at which the episode will stop.
    window_size : int
        The size of the observation window.
    min_periods : int
        The amount of steps needed to warmup the `feed`.
    randomize : bool
        Whether or not to select a random episode when reset.
    **kwargs : keyword arguments
        Additional keyword arguments for observer creation.
    Attributes
    ----------
    feed : `DataFeed`
        The master feed in charge of streaming the internal, external, and
        renderer data feeds.
    stop_time : datetime.time
        The time at which the episode will stop.
    window_size : int
        The size of the observation window.
    min_periods : int
        The amount of steps needed to warmup the `feed`.
    randomize : bool
        Whether or not a random episode is selected when reset.
    history : `ObservationHistory`
        The observation history.
    renderer_history : `List[dict]`
        The history of the renderer data feed.
    """
    def __init__(self,
                 portfolio: 'Portfolio',
                 feed: 'DataFeed' = None,
                 renderer_feed: 'DataFeed' = None,
                 stop_time: 'datetime.time' = dt.time(16, 0, 0),
                 window_size: int = 1,
                 min_periods: int = None,
                 randomize: bool = False,
                 **kwargs) -> None:
        internal_group = Stream.group(
            _create_internal_streams(portfolio)).rename("internal")
        external_group = Stream.group(feed.inputs).rename("external")

        if renderer_feed:
            renderer_group = Stream.group(
                renderer_feed.inputs).rename("renderer")

            self.feed = DataFeed(
                [internal_group, external_group, renderer_group])
        else:
            self.feed = DataFeed([internal_group, external_group])

        self.stop_time = stop_time
        self.window_size = window_size
        self.min_periods = min_periods
        self.randomize = randomize

        self._observation_dtype = kwargs.get('dtype', np.float32)
        self._observation_lows = kwargs.get('observation_lows', -np.inf)
        self._observation_highs = kwargs.get('observation_highs', np.inf)

        self.history = ObservationHistory(window_size=window_size)

        initial_obs = self.feed.next()["external"]
        initial_obs.pop('timestamp', None)
        n_features = len(initial_obs.keys())

        self._observation_space = Box(low=self._observation_lows,
                                      high=self._observation_highs,
                                      shape=(self.window_size, n_features),
                                      dtype=self._observation_dtype)

        self.feed = self.feed.attach(portfolio)

        self.renderer_history = []

        if self.randomize:
            self.num_episodes = 0
            while (self.feed.has_next()):
                ts = self.feed.next()["external"]["timestamp"]
                if ts.time() == self.stop_time:
                    self.num_episodes += 1

        self.feed.reset()
        self.warmup()

        self.stop = False

    @property
    def observation_space(self) -> Space:
        return self._observation_space

    def warmup(self) -> None:
        """Warms up the data feed.
        """
        if self.min_periods is not None:
            for _ in range(self.min_periods):
                if self.has_next():
                    obs_row = self.feed.next()["external"]
                    obs_row.pop('timestamp', None)
                    self.history.push(obs_row)

    def observe(self, env: 'TradingEnv') -> np.array:
        """Observes the environment.
        As a consequence of observing the `env`, a new observation is generated
        from the `feed` and stored in the observation history.
        Returns
        -------
        `np.array`
            The current observation of the environment.
        """
        data = self.feed.next()

        # Save renderer information to history
        if "renderer" in data.keys():
            self.renderer_history += [data["renderer"]]

        # Push new observation to observation history
        obs_row = data["external"]
        try:
            obs_ts = obs_row.pop('timestamp')
        except KeyError:
            raise KeyError(
                "Include Stream of Timestamps named 'timestamp' in feed")
        self.history.push(obs_row)

        # Check if episode should be stopped
        if obs_ts.time() == self.stop_time:
            self.stop = True

        obs = self.history.observe()
        obs = obs.astype(self._observation_dtype)
        return obs

    def has_next(self) -> bool:
        """Checks if there is another observation to be generated.
        Returns
        -------
        bool
            Whether there is another observation to be generated.
        """
        return self.feed.has_next() and not self.stop

    def reset(self) -> None:
        """Resets the observer"""
        self.renderer_history = []
        self.history.reset()

        if self.randomize or not self.feed.has_next():
            self.feed.reset()
            if self.randomize:
                episode_num = 0
                while (episode_num < randrange(self.num_episodes)):
                    ts = self.feed.next()["external"]["timestamp"]
                    if ts.time() == self.stop_time:
                        episode_num += 1

        self.warmup()

        self.stop = False
Ejemplo n.º 7
0
class TensorTradeObserver(Observer):
    """The TensorTrade observer that is compatible with the other `default`
    components.

    Parameters
    ----------
    portfolio : `Portfolio`
        The portfolio to be used to create the internal data feed mechanism.
    feed : `DataFeed`
        The feed to be used to collect observations to the observation window.
    renderer_feed : `DataFeed`
        The feed to be used for giving information to the renderer.
    window_size : int
        The size of the observation window.
    min_periods : int
        The amount of steps needed to warmup the `feed`.
    **kwargs : keyword arguments
        Additional keyword arguments for observer creation.

    Attributes
    ----------
    feed : `DataFeed`
        The master feed in charge of streaming the internal, external, and
        renderer data feeds.
    window_size : int
        The size of the observation window.
    min_periods : int
        The amount of steps needed to warmup the `feed`.
    history : `ObservationHistory`
        The observation history.
    renderer_history : `List[dict]`
        The history of the renderer data feed.
    """
    def __init__(self,
                 portfolio: 'Portfolio',
                 feed: 'DataFeed' = None,
                 renderer_feed: 'DataFeed' = None,
                 window_size: int = 1,
                 min_periods: int = None,
                 **kwargs) -> None:
        internal_group = Stream.group(
            _create_internal_streams(portfolio)).rename("internal")
        external_group = Stream.group(feed.inputs).rename("external")

        if renderer_feed:
            renderer_group = Stream.group(
                renderer_feed.inputs).rename("renderer")

            self.feed = DataFeed(
                [internal_group, external_group, renderer_group])
        else:
            self.feed = DataFeed([internal_group, external_group])

        self.window_size = window_size
        self.min_periods = min_periods

        self._observation_dtype = kwargs.get('dtype', np.float32)
        self._observation_lows = kwargs.get('observation_lows', -np.inf)
        self._observation_highs = kwargs.get('observation_highs', np.inf)

        self.history = ObservationHistory(window_size=window_size)

        initial_obs = self.feed.next()["external"]
        n_features = len(initial_obs.keys())

        self._observation_space = Box(low=self._observation_lows,
                                      high=self._observation_highs,
                                      shape=(self.window_size, n_features),
                                      dtype=self._observation_dtype)

        self.feed = self.feed.attach(portfolio)

        self.renderer_history = []

        self.feed.reset()
        self.warmup()

    @property
    def observation_space(self) -> Space:
        return self._observation_space

    def warmup(self) -> None:
        """Warms up the data feed.
        """
        if self.min_periods is not None:
            for _ in range(self.min_periods):
                if self.has_next():
                    obs_row = self.feed.next()["external"]
                    self.history.push(obs_row)

    def observe(self, env: 'TradingEnv') -> np.array:
        """Observes the environment.

        As a consequence of observing the `env`, a new observation is generated
        from the `feed` and stored in the observation history.

        Returns
        -------
        `np.array`
            The current observation of the environment.
        """
        data = self.feed.next()

        # Save renderer information to history
        if "renderer" in data.keys():
            self.renderer_history += [data["renderer"]]

        # Push new observation to observation history
        obs_row = data["external"]
        self.history.push(obs_row)

        obs = self.history.observe()
        obs = obs.astype(self._observation_dtype)
        return obs

    def has_next(self) -> bool:
        """Checks if there is another observation to be generated.

        Returns
        -------
        bool
            Whether there is another observation to be generated.
        """
        return self.feed.has_next()

    def reset(self) -> None:
        """Resets the observer"""
        self.renderer_history = []
        self.history.reset()
        self.feed.reset()
        self.warmup()