コード例 #1
0
ファイル: test_simulator.py プロジェクト: krusty45/crypto-rl
def test_get_tick_history():
    """
    Test case to query Arctic TickStore
    :return:
    """
    start_time = dt.now(TIMEZONE)

    sim = Simulator(use_arctic=True)
    query = {'ccy': ['BTC-USD'], 'start_date': 20181231, 'end_date': 20190102}
    tick_history = sim.get_tick_history(query=query)
    print('\n{}\n'.format(tick_history))

    elapsed = (dt.now(TIMEZONE) - start_time).seconds
    print('Completed %s in %i seconds' % (__name__, elapsed))
    print('DONE. EXITING %s' % __name__)
コード例 #2
0
def test_extract_features() -> None:
    """
    Test case to export *multiple* testing/training data sets for reinforcement learning
    """
    start_time = dt.now(tz=TIMEZONE)

    sim = Simulator()

    for ccy in ['ETH-USD']:
        # for ccy, ccy2 in [('LTC-USD', 'tLTCUSD')]:
        query = {
            'ccy': [ccy],  # ccy2],  # parameter must be a list
            'start_date': 20191208,  # parameter format for dates
            'end_date': 20191209,  # parameter format for dates
        }
        sim.extract_features(query)

    elapsed = (dt.now(tz=TIMEZONE) - start_time).seconds
    print('Completed %s in %i seconds' % (__name__, elapsed))
    print('DONE. EXITING %s' % __name__)
コード例 #3
0
ファイル: test_simulator.py プロジェクト: krusty45/crypto-rl
def test_get_orderbook_snapshot_history():
    """
    Test case to export testing/training data for reinforcement learning
    :return:
    """
    start_time = dt.now(TIMEZONE)

    sim = Simulator(use_arctic=True)
    query = {'ccy': ['LTC-USD'], 'start_date': 20190406, 'end_date': 20190407}
    orderbook_snapshot_history = sim.get_orderbook_snapshot_history(
        query=query)

    filename = '{}_{}'.format(query['ccy'][0], query['start_date'])
    sim.export_to_csv(data=orderbook_snapshot_history,
                      filename=filename,
                      compress=False)

    elapsed = (dt.now(TIMEZONE) - start_time).seconds
    print('Completed %s in %i seconds' % (__name__, elapsed))
    print('DONE. EXITING %s' % __name__)
コード例 #4
0
def test_extract_features():
    """
    Test case to export multiple testing/training data sets for reinforcement learning
    :return:
    """
    start_time = dt.now(TIMEZONE)

    sim = Simulator(use_arctic=True)

    # for ccy in ['BTC-USD', 'ETH-USD', 'LTC-USD']:  #, 'BCH-USD']:
    for ccy, ccy2 in [('LTC-USD', 'tLTCUSD')]:
        query = {
            'ccy': [ccy, ccy2],
            'start_date': 20190314,
            'end_date': 20190317
        }
        sim.extract_features(query)

    elapsed = (dt.now(TIMEZONE) - start_time).seconds
    print('Completed %s in %i seconds' % (__name__, elapsed))
    print('DONE. EXITING %s' % __name__)
コード例 #5
0
def test_get_orderbook_snapshot_history() -> None:
    """
    Test case to export testing/training data for reinforcement learning
    """
    start_time = dt.now(tz=TIMEZONE)

    sim = Simulator()
    query = {'ccy': ['LTC-USD'], 'start_date': 20190926, 'end_date': 20190928}
    orderbook_snapshot_history = sim.get_orderbook_snapshot_history(
        query=query)
    if orderbook_snapshot_history is None:
        print('Exiting: orderbook_snapshot_history is NONE')
        return

    filename = 'test_' + '{}_{}'.format(query['ccy'][0], query['start_date'])
    sim.export_to_csv(data=orderbook_snapshot_history,
                      filename=filename,
                      compress=False)

    elapsed = (dt.now(tz=TIMEZONE) - start_time).seconds
    print('Completed %s in %i seconds' % (__name__, elapsed))
    print('DONE. EXITING %s' % __name__)
コード例 #6
0
ファイル: base_env.py プロジェクト: habibzadeh/crypto-rl
    def __init__(self,
                 fitting_file='BTC-USD_2019-04-07.csv.xz',
                 testing_file='BTC-USD_2019-04-08.csv.xz',
                 step_size=1,
                 max_position=5,
                 window_size=10,
                 seed=1,
                 action_repeats=10,
                 training=True,
                 format_3d=True,
                 z_score=True,
                 reward_type='default',
                 scale_rewards=True,
                 ema_alpha=EMA_ALPHA):
        """
        Base class for creating environments extending OpenAI's GYM framework.

        :param fitting_file: historical data used to fit environment data (i.e.,
            previous trading day)
        :param testing_file: historical data used in environment
        :param step_size: increment size for steps (NOTE: leave a 1, otherwise market
            transaction data will be overlooked)
        :param max_position: maximum number of positions able to hold in inventory
        :param window_size: number of lags to include in observation space
        :param seed: random seed number
        :param action_repeats: number of steps to take in environment after a given action
        :param training: if TRUE, then randomize starting point in environment
        :param format_3d: if TRUE, reshape observation space from matrix to tensor
        :param z_score: if TRUE, normalize data set with Z-Score, otherwise use Min-Max
            (i.e., range of 0 to 1)
        :param reward_type: method for calculating the environment's reward:
            1) 'trade_completion' --> reward is generated per trade's round trip
            2) 'continuous_total_pnl' --> change in realized & unrealized pnl between
                                            time steps
            3) 'continuous_realized_pnl' --> change in realized pnl between time steps
            4) 'continuous_unrealized_pnl' --> change in unrealized pnl between time steps
            5) 'normed' --> refer to https://arxiv.org/abs/1804.04216v1
            6) 'div' --> reward is generated per trade's round trip divided by
                inventory count (again, refer to https://arxiv.org/abs/1804.04216v1)
            7) 'asymmetrical' --> extended version of *default* and enhanced
                with a reward for being filled above/below midpoint,
                and returns only negative rewards for Unrealized PnL to
                discourage long-term speculation.
            8) 'asymmetrical_adj' --> extended version of *default* and enhanced
                with a reward for being filled above/below midpoint,
                and weighted up/down unrealized returns.
            9) 'default' --> Pct change in Unrealized PnL + Realized PnL of
                respective time step.
        :param ema_alpha: decay factor for EMA, usually between 0.9 and 0.9999; if NONE,
            raw values are returned in place of smoothed values
        """
        # properties required for instantiation
        self.action_repeats = action_repeats
        self._seed = seed
        self._random_state = np.random.RandomState(seed=self._seed)
        self.training = training
        self.step_size = step_size
        self.max_position = max_position
        self.window_size = window_size
        self.reward_type = reward_type
        self.format_3d = format_3d  # e.g., [window, features, *NEW_AXIS*]
        self.sym = testing_file[:7]  # slice the CCY from the filename
        self.scale_rewards = scale_rewards

        # properties that get reset()
        self.reward = 0.0
        self.done = False
        self.local_step_number = 0
        self.midpoint = 0.0
        self.observation = None
        self.action = 0
        self.last_pnl = 0.
        self.last_midpoint = None
        self.midpoint_change = None

        # properties to override in sub-classes
        self.actions = None
        self.broker = None
        self.action_space = None
        self.observation_space = None

        # get historical data for simulations
        self.sim = Sim(z_score=z_score, alpha=ema_alpha)

        self.prices_, self.data, self.normalized_data = self.sim.load_environment_data(
            fitting_file=fitting_file,
            testing_file=testing_file,
            include_imbalances=True,
            as_pandas=False)
        self.best_bid = self.best_ask = None

        self.max_steps = self.data.shape[
            0] - self.step_size * self.action_repeats - 1

        # load indicators into the indicator manager
        self.tns = IndicatorManager()
        self.rsi = IndicatorManager()
        for window in INDICATOR_WINDOW:
            self.tns.add(
                ('tns_{}'.format(window), TnS(window=window, alpha=ema_alpha)))
            self.rsi.add(
                ('rsi_{}'.format(window), RSI(window=window, alpha=ema_alpha)))

        # conditionally load PnlNorm, since it calculates in O(n) time complexity
        self.pnl_norm = PnlNorm(
            window=INDICATOR_WINDOW[0],
            alpha=None) if self.reward_type == 'normed' else None

        # rendering class
        self._render = TradingGraph(sym=self.sym)

        # graph midpoint prices
        self._render.reset_render_data(
            y_vec=self.prices_[:np.shape(self._render.x_vec)[0]])

        # buffer for appending lags
        self.data_buffer = list()
コード例 #7
0
ファイル: base_env.py プロジェクト: habibzadeh/crypto-rl
class BaseEnvironment(Env, ABC):
    metadata = {'render.modes': ['human']}

    # Index of specific data points used to generate the observation space
    # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True)
    features = Sim.get_feature_labels(include_system_time=False,
                                      include_bitfinex=False,
                                      include_imbalances=True,
                                      include_ema=False,
                                      include_spread=True)
    best_bid_index = features.index('coinbase_bid_distance_0')
    best_ask_index = features.index('coinbase_ask_distance_0')
    notional_bid_index = features.index('coinbase_bid_notional_0')
    notional_ask_index = features.index('coinbase_ask_notional_0')
    buy_trade_index = features.index('coinbase_buys')
    sell_trade_index = features.index('coinbase_sells')

    def __init__(self,
                 fitting_file='BTC-USD_2019-04-07.csv.xz',
                 testing_file='BTC-USD_2019-04-08.csv.xz',
                 step_size=1,
                 max_position=5,
                 window_size=10,
                 seed=1,
                 action_repeats=10,
                 training=True,
                 format_3d=True,
                 z_score=True,
                 reward_type='default',
                 scale_rewards=True,
                 ema_alpha=EMA_ALPHA):
        """
        Base class for creating environments extending OpenAI's GYM framework.

        :param fitting_file: historical data used to fit environment data (i.e.,
            previous trading day)
        :param testing_file: historical data used in environment
        :param step_size: increment size for steps (NOTE: leave a 1, otherwise market
            transaction data will be overlooked)
        :param max_position: maximum number of positions able to hold in inventory
        :param window_size: number of lags to include in observation space
        :param seed: random seed number
        :param action_repeats: number of steps to take in environment after a given action
        :param training: if TRUE, then randomize starting point in environment
        :param format_3d: if TRUE, reshape observation space from matrix to tensor
        :param z_score: if TRUE, normalize data set with Z-Score, otherwise use Min-Max
            (i.e., range of 0 to 1)
        :param reward_type: method for calculating the environment's reward:
            1) 'trade_completion' --> reward is generated per trade's round trip
            2) 'continuous_total_pnl' --> change in realized & unrealized pnl between
                                            time steps
            3) 'continuous_realized_pnl' --> change in realized pnl between time steps
            4) 'continuous_unrealized_pnl' --> change in unrealized pnl between time steps
            5) 'normed' --> refer to https://arxiv.org/abs/1804.04216v1
            6) 'div' --> reward is generated per trade's round trip divided by
                inventory count (again, refer to https://arxiv.org/abs/1804.04216v1)
            7) 'asymmetrical' --> extended version of *default* and enhanced
                with a reward for being filled above/below midpoint,
                and returns only negative rewards for Unrealized PnL to
                discourage long-term speculation.
            8) 'asymmetrical_adj' --> extended version of *default* and enhanced
                with a reward for being filled above/below midpoint,
                and weighted up/down unrealized returns.
            9) 'default' --> Pct change in Unrealized PnL + Realized PnL of
                respective time step.
        :param ema_alpha: decay factor for EMA, usually between 0.9 and 0.9999; if NONE,
            raw values are returned in place of smoothed values
        """
        # properties required for instantiation
        self.action_repeats = action_repeats
        self._seed = seed
        self._random_state = np.random.RandomState(seed=self._seed)
        self.training = training
        self.step_size = step_size
        self.max_position = max_position
        self.window_size = window_size
        self.reward_type = reward_type
        self.format_3d = format_3d  # e.g., [window, features, *NEW_AXIS*]
        self.sym = testing_file[:7]  # slice the CCY from the filename
        self.scale_rewards = scale_rewards

        # properties that get reset()
        self.reward = 0.0
        self.done = False
        self.local_step_number = 0
        self.midpoint = 0.0
        self.observation = None
        self.action = 0
        self.last_pnl = 0.
        self.last_midpoint = None
        self.midpoint_change = None

        # properties to override in sub-classes
        self.actions = None
        self.broker = None
        self.action_space = None
        self.observation_space = None

        # get historical data for simulations
        self.sim = Sim(z_score=z_score, alpha=ema_alpha)

        self.prices_, self.data, self.normalized_data = self.sim.load_environment_data(
            fitting_file=fitting_file,
            testing_file=testing_file,
            include_imbalances=True,
            as_pandas=False)
        self.best_bid = self.best_ask = None

        self.max_steps = self.data.shape[
            0] - self.step_size * self.action_repeats - 1

        # load indicators into the indicator manager
        self.tns = IndicatorManager()
        self.rsi = IndicatorManager()
        for window in INDICATOR_WINDOW:
            self.tns.add(
                ('tns_{}'.format(window), TnS(window=window, alpha=ema_alpha)))
            self.rsi.add(
                ('rsi_{}'.format(window), RSI(window=window, alpha=ema_alpha)))

        # conditionally load PnlNorm, since it calculates in O(n) time complexity
        self.pnl_norm = PnlNorm(
            window=INDICATOR_WINDOW[0],
            alpha=None) if self.reward_type == 'normed' else None

        # rendering class
        self._render = TradingGraph(sym=self.sym)

        # graph midpoint prices
        self._render.reset_render_data(
            y_vec=self.prices_[:np.shape(self._render.x_vec)[0]])

        # buffer for appending lags
        self.data_buffer = list()

    @abstractmethod
    def map_action_to_broker(self, action: int):
        """
        Translate agent's action into an order and submit order to broker.
        :param action: (int) agent's action for current step
        :return: (tuple) reward, pnl
        """
        return 0., 0.

    @abstractmethod
    def _create_position_features(self):
        """
        Create agent space feature set reflecting the positions held in inventory.
        :return: (np.array) position features
        """
        return np.array([np.nan], dtype=np.float32)

    @staticmethod
    def _trade_completion_reward(step_pnl: float):
        """
        Alternate approach for reward calculation which places greater importance on
        trades that have returned at least a 1:1 profit-to-loss ratio after
        transaction fees.
        :param step_pnl: limit order pnl and any penalties for bad actions
        :return: normalized reward (-0.1 to 0.1) range, which can be scaled to
            (-1, 1) in self._get_step_reward() method
        """
        reward = 0.0
        if step_pnl > MARKET_ORDER_FEE * 2:  # e.g.,  2:1 profit to loss ratio
            reward += 1.0
        elif step_pnl > 0.0:
            reward += step_pnl
        elif step_pnl < -MARKET_ORDER_FEE:  # skew penalty so
            reward -= 1.0
        else:
            reward -= step_pnl
        return reward

    def _asymmetrical_reward(self,
                             long_filled: bool,
                             short_filled: bool,
                             step_pnl: float,
                             dampening=0.15):
        """
        Asymmetrical reward type for environments, which is derived from percentage
        changes and notional values.
        The inputs are as follows:
            (1) Change in exposure value between time steps, in percentage terms; and,
            (2) Realized PnL from a open order being filled between time steps,
                in dollar terms.
        :param long_filled: TRUE if long order is filled within same time step
        :param short_filled: TRUE if short order is filled within same time step
        :param step_pnl: limit order pnl and any penalties for bad actions
        :param dampening: discount factor towards pnl change between time steps
        :return: (float)
        """
        exposure_change = self.broker.total_inventory_count * self.midpoint_change
        long_fill_reward = short_fill_reward = 0.

        if long_filled:
            long_fill_reward += ((self.midpoint / self.best_bid) - 1.)
            print("long_fill_reward: {:.6f}".format(long_fill_reward))
        if short_filled:
            short_fill_reward += ((self.best_ask / self.midpoint) - 1.)
            print("short_fill_reward: {:.6f}".format(short_fill_reward))

        reward = (long_fill_reward + short_fill_reward) + \
            min(0., exposure_change * dampening)

        if long_filled:
            reward += step_pnl
        if short_filled:
            reward += step_pnl

        return reward

    def _asymmetrical_reward_adj(self,
                                 long_filled: bool,
                                 short_filled: bool,
                                 step_pnl: float,
                                 dampening=0.25):
        """
        Asymmetrical reward type for environments with balanced feedback, which is
        derived from percentage
        changes and notional values.
        The inputs are as follows:
            (1) Change in exposure value between time steps, in percentage terms; and,
            (2) Realized PnL from a open order being filled between time steps,
                in dollar terms.
        :param long_filled: TRUE if long order is filled within same time step
        :param short_filled: TRUE if short order is filled within same time step
        :param step_pnl: limit order pnl and any penalties for bad actions
        :param dampening: discount factor towards pnl change between time steps
        :return: (float)
        """
        exposure_change = self.broker.total_inventory_count * self.midpoint_change
        long_fill_reward = short_fill_reward = 0.

        if long_filled:
            long_fill_reward += ((self.midpoint / self.best_bid) - 1.)
            print("long_fill_reward: {:.6f}".format(long_fill_reward))
        if short_filled:
            short_fill_reward += ((self.best_ask / self.midpoint) - 1.)
            print("short_fill_reward: {:.6f}".format(short_fill_reward))

        reward = (long_fill_reward + short_fill_reward) + \
            min(0., exposure_change * (1. - dampening)*0.1) + \
            max(0., exposure_change * dampening*0.1)

        if long_filled:
            reward += step_pnl
        if short_filled:
            reward += step_pnl

        return reward

    def _default_reward(self, long_filled: bool, short_filled: bool,
                        step_pnl: float):
        """
        Default reward type for environments, which is derived from PnL and order
        quantity.
        The inputs are as follows:
            (1) Change in exposure value between time steps, in dollar terms; and,
            (2) Realized PnL from a open order being filled between time steps,
                in dollar terms.
        :param long_filled: TRUE if long order is filled within same time step
        :param short_filled: TRUE if short order is filled within same time step
        :param step_pnl: limit order pnl and any penalties for bad actions
        :return:
        """
        reward = self.broker.total_inventory_count * self.midpoint_change
        if long_filled:
            reward += step_pnl
        if short_filled:
            reward += step_pnl
        return reward

    def _get_step_reward(self, step_pnl: float, long_filled: bool,
                         short_filled: bool):
        """
        Get reward for current time step.
            Note: 'reward_type' is set during environment instantiation.
        :param step_pnl: (float) PnL accrued from order fills at current time step
        :return: (float) reward
        """
        reward = 0.0
        if self.reward_type == 'default':  # pnl in dollar terms
            reward += self._default_reward(long_filled, short_filled, step_pnl)
        elif self.reward_type == 'asymmetrical':
            reward += self._asymmetrical_reward(long_filled=long_filled,
                                                short_filled=short_filled,
                                                step_pnl=step_pnl)
        elif self.reward_type == 'asymmetrical_adj':
            reward += self._asymmetrical_reward_adj(long_filled=long_filled,
                                                    short_filled=short_filled,
                                                    step_pnl=step_pnl)
        elif self.reward_type == 'trade_completion':  # reward is [-1,1]
            reward += self._trade_completion_reward(step_pnl=step_pnl)
            # Note: we do not need to update last_pnl for this reward approach
        elif self.reward_type == 'continuous_total_pnl':  # pnl in percentage
            new_pnl = self.broker.get_total_pnl(self.best_bid, self.best_ask)
            difference = new_pnl - self.last_pnl  # Difference in PnL over time step
            # include step_pnl to net out drops in unrealized PnL from position closing
            reward += difference + step_pnl
            self.last_pnl = new_pnl
        elif self.reward_type == 'continuous_realized_pnl':
            new_pnl = self.broker.realized_pnl
            reward += new_pnl - self.last_pnl  # Difference in PnL
            self.last_pnl = new_pnl
        elif self.reward_type == 'continuous_unrealized_pnl':
            new_pnl = self.broker.get_unrealized_pnl(self.best_bid,
                                                     self.best_ask)
            difference = new_pnl - self.last_pnl  # Difference in PnL over time step
            # include step_pnl to net out drops in unrealized PnL from position closing
            reward += difference + step_pnl
            self.last_pnl = new_pnl
        elif self.reward_type == 'normed':
            # refer to https://arxiv.org/abs/1804.04216v1
            new_pnl = self.pnl_norm.raw_value
            reward += new_pnl - self.last_pnl  # Difference in PnL
            self.last_pnl = new_pnl
        elif self.reward_type == 'div':
            reward += step_pnl / max(self.broker.total_inventory_count, 1)
        else:  # Default implementation
            reward += self._default_reward(long_filled, short_filled, step_pnl)

        if self.scale_rewards:
            reward *= 100.  # multiply to avoid division error

        return reward

    def step(self, action: int):
        """
        Step through environment with action
        :param action: (int) action to take in environment
        :return: (tuple) observation, reward, is_done, and empty `dict`
        """
        for current_step in range(self.action_repeats):

            if self.done:
                self.reset()
                return self.observation, self.reward, self.done

            # reset the reward if there ARE action repeats
            if current_step == 0:
                self.reward = 0.
                step_action = action
            else:
                step_action = 0

            # Get current step's midpoint
            self.midpoint = self.prices_[self.local_step_number]
            self.midpoint_change = (self.midpoint / self.last_midpoint) - 1.

            # Pass current time step bid/ask prices to broker to calculate PnL,
            # or if any open orders are to be filled
            self.best_bid, self.best_ask = self._get_nbbo()
            buy_volume = self._get_book_data(BaseEnvironment.buy_trade_index)
            sell_volume = self._get_book_data(BaseEnvironment.sell_trade_index)

            # Update indicators
            self.tns.step(buys=buy_volume, sells=sell_volume)
            self.rsi.step(price=self.midpoint)

            # Get PnL from any filled LIMIT orders
            limit_pnl, long_filled, short_filled = self.broker.step_limit_order_pnl(
                bid_price=self.best_bid,
                ask_price=self.best_ask,
                buy_volume=buy_volume,
                sell_volume=sell_volume,
                step=self.local_step_number)

            # Get PnL from any filled MARKET orders AND action penalties for invalid
            # actions made by the agent for future discouragement
            step_reward, market_pnl = self.map_action_to_broker(
                action=step_action)
            step_pnl = limit_pnl + step_reward + market_pnl

            # step thru pnl_norm if not None
            if self.pnl_norm:
                self.pnl_norm.step(pnl=self.broker.get_unrealized_pnl(
                    bid_price=self.best_bid, ask_price=self.best_ask))

            self.reward += self._get_step_reward(step_pnl=step_pnl,
                                                 long_filled=long_filled,
                                                 short_filled=short_filled)

            step_observation = self._get_step_observation(action=action)
            self.data_buffer.append(step_observation)

            if len(self.data_buffer) > self.window_size:
                del self.data_buffer[0]

            self.local_step_number += self.step_size
            self.last_midpoint = self.midpoint

        self.observation = self._get_observation()

        if self.local_step_number > self.max_steps:
            self.done = True
            flatten_pnl = self.broker.flatten_inventory(
                self.best_bid, self.best_ask)
            self.reward += self._get_step_reward(step_pnl=flatten_pnl,
                                                 long_filled=False,
                                                 short_filled=False)

        return self.observation, self.reward, self.done, {}

    def reset(self):
        """
        Reset the environment.
        :return: (np.array) Observation at first step
        """
        if self.training:
            self.local_step_number = self._random_state.randint(
                low=0, high=self.data.shape[0] // 5)
        else:
            self.local_step_number = 0

        msg = (' {}-{} reset. Episode pnl: {:.4f} with {} trades. '
               'Avg. Trade PnL: {:.4f}.  First step: {}').format(
                   self.sym, self._seed, self.broker.realized_pnl,
                   self.broker.total_trade_count,
                   self.broker.average_trade_pnl, self.local_step_number)
        print(msg)

        self.reward = 0.0
        self.done = False
        self.broker.reset()
        self.data_buffer.clear()
        self.rsi.reset()
        self.tns.reset()
        if self.pnl_norm:
            self.pnl_norm.reset()

        for step in range(self.window_size + INDICATOR_WINDOW_MAX + 1):
            self.midpoint = self.prices_[self.local_step_number]
            self.best_bid, self.best_ask = self._get_nbbo()

            step_buy_volume = self._get_book_data(
                BaseEnvironment.buy_trade_index)
            step_sell_volume = self._get_book_data(
                BaseEnvironment.sell_trade_index)
            self.tns.step(buys=step_buy_volume, sells=step_sell_volume)
            self.rsi.step(price=self.midpoint)

            # step thru pnl_norm if not None
            if self.pnl_norm:
                self.pnl_norm.step(pnl=self.broker.get_unrealized_pnl(
                    bid_price=self.best_bid, ask_price=self.best_ask))

            step_observation = self._get_step_observation(action=0)
            self.data_buffer.append(step_observation)

            self.local_step_number += self.step_size
            self.last_midpoint = self.midpoint
            if len(self.data_buffer) > self.window_size:
                del self.data_buffer[0]

        self.midpoint_change = (self.midpoint / self.last_midpoint) - 1.
        self.observation = self._get_observation()

        return self.observation

    def render(self, mode='human'):
        """
        Render midpoint prices
        :param mode: (str) flag for type of rendering. Only 'human' supported.
        :return: (void)
        """
        self._render.render(midpoint=self.midpoint, mode=mode)

    def close(self):
        """
        Free clear memory when closing environment
        :return: (void)
        """
        self.data = None
        self.normalized_data = None
        self.prices_ = None
        self.broker.reset()
        self.data_buffer.clear()
        self.sim = None
        self.tns = None
        self.rsi = None
        self.pnl_norm = None

    def seed(self, seed=1):
        """
        Set random seed in environment
        :param seed: (int) random seed number
        :return: (list) seed number in a list
        """
        self._random_state = np.random.RandomState(seed=seed)
        self._seed = seed
        return [seed]

    @staticmethod
    def _process_data(_next_state):
        """
        Reshape observation for function approximator
        :param _next_state: observation space
        :return: (np.array) clipped observation space
        """
        return np.clip(_next_state.reshape((1, -1)), -10, 10)

    def _create_action_features(self, action):
        """
        Create a features array for the current time step's action.
        :param action: (int) action number
        :return: (np.array) One-hot of current action
        """
        return self.actions[action]

    def _create_indicator_features(self):
        """
        Create features vector with environment indicators.
        :return: (np.array) Indicator values for current time step
        """
        return np.array((*self.tns.get_value(), *self.rsi.get_value()),
                        dtype=np.float32).reshape(1, -1)

    def _get_nbbo(self):
        """
        Get best bid and offer
        :return: (tuple) best bid and offer
        """
        best_bid = round(
            self.midpoint -
            self._get_book_data(BaseEnvironment.best_bid_index), 2)
        best_ask = round(
            self.midpoint +
            self._get_book_data(BaseEnvironment.best_ask_index), 2)
        return best_bid, best_ask

    def _get_book_data(self, index=0):
        """
        Return step 'n' of order book snapshot data
        :param index: (int) step 'n' to look up in order book snapshot history
        :return: (np.array) order book snapshot vector
        """
        return self.data[self.local_step_number][index]

    def _get_step_observation(self, action=0):
        """
        Current step observation, NOT including historical data.
        :param action: (int) current step action
        :return: (np.array) Current step observation
        """
        step_position_features = self._create_position_features()
        step_action_features = self._create_action_features(action=action)
        step_indicator_features = self._create_indicator_features()
        return np.concatenate(
            (self._process_data(self.normalized_data[self.local_step_number]),
             step_indicator_features, step_position_features,
             step_action_features, np.array([self.reward], dtype=np.float32)),
            axis=None)

    def _get_observation(self):
        """
        Current step observation, including historical data.

        If format_3d is TRUE: Expand the observation space from 2 to 3 dimensions.
        (note: This is necessary for conv nets in Baselines.)
        :return: (np.array) Observation state for current time step
        """
        # Note: reversing the data to chronological order is actually faster when
        # making an array in Python / Numpy, which is odd. #timeit
        observation = np.asarray(self.data_buffer, dtype=np.float32)
        if self.format_3d:
            observation = np.expand_dims(observation, axis=-1)
        return observation
コード例 #8
0
class MarketMaker(Env):
    # gym.env required
    metadata = {'render.modes': ['human']}
    id = 'market-maker-v0'

    # constants
    inventory_features = [
        'long_inventory', 'short_inventory',
        'total_unrealized_and_realized_pnl', 'long_unrealized_pnl',
        'short_unrealized_pnl', 'buy_distance_to_midpoint',
        'short_distance_to_midpoint', 'buy_queue_vol', 'short_queue_vol'
    ]
    # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True)
    features = Sim.get_feature_labels(include_system_time=False,
                                      include_bitfinex=False)
    indicator_features = ['tns', 'rsi']
    best_bid_index = features.index('coinbase-bid-distance-0')
    best_ask_index = features.index('coinbase-ask-distance-0')
    notional_bid_index = features.index('coinbase-bid-notional-0')
    notional_ask_index = features.index('coinbase-ask-notional-0')

    buy_trade_index = features.index('coinbase-buys')
    sell_trade_index = features.index('coinbase-sells')

    target_pnl = BROKER_FEE * 10 * 5  # e.g., 5 for max_positions

    def __init__(self,
                 *,
                 training=True,
                 fitting_file='ETH-USD_2018-12-31.xz',
                 testing_file='ETH-USD_2019-01-01.xz',
                 step_size=1,
                 max_position=5,
                 window_size=10,
                 seed=1,
                 action_repeats=10,
                 format_3d=False):

        # properties required for instantiation
        self.action_repeats = action_repeats
        self._seed = seed
        self._random_state = np.random.RandomState(seed=self._seed)
        self.training = training
        self.step_size = step_size
        self.max_position = max_position
        self.window_size = window_size
        self.format_3d = format_3d  # e.g., [window, features, *NEW_AXIS*]

        self.action = 0
        # derive gym.env properties
        self.actions = np.eye(17)

        self.sym = testing_file[:7]  # slice the CCY from the filename

        # properties that get reset()
        self.reward = 0.0
        self.done = False
        self.local_step_number = 0
        self.midpoint = 0.0
        self.observation = None

        # get Broker class to keep track of PnL and orders
        self.broker = Broker(max_position=max_position)
        # get historical data for simulations
        self.sim = Sim(use_arctic=False)

        fitting_data_filepath = '{}/data_exports/{}'.format(
            self.sim.cwd, fitting_file)
        data_used_in_environment = '{}/data_exports/{}'.format(
            self.sim.cwd, testing_file)
        # print('Fitting data: {}\nTesting Data: {}'.format(fitting_data_filepath,
        #                                                data_used_in_environment))

        fitting_data = self.sim.import_csv(filename=fitting_data_filepath)
        fitting_data['coinbase_midpoint'] = np.log(
            fitting_data['coinbase_midpoint'].values)
        fitting_data['coinbase_midpoint'] = (
            fitting_data['coinbase_midpoint'] -
            fitting_data['coinbase_midpoint'].shift(1)).fillna(method='bfill')
        self.sim.fit_scaler(fitting_data)
        del fitting_data

        self.data = self.sim.import_csv(filename=data_used_in_environment)
        self.prices_ = self.data[
            'coinbase_midpoint'].values  # used to calculate PnL

        self.normalized_data = self.data.copy()
        self.data = self.data.values

        self.max_steps = self.data.shape[0] - self.step_size * \
                         self.action_repeats - 1

        self.normalized_data['coinbase_midpoint'] = \
            np.log(self.normalized_data['coinbase_midpoint'].values)
        self.normalized_data['coinbase_midpoint'] = (
            self.normalized_data['coinbase_midpoint'] -
            self.normalized_data['coinbase_midpoint'].shift(1)).fillna(
                method='bfill')

        self.tns = TnS()
        self.rsi = RSI()

        logger.info("Pre-scaling {}-{} data...".format(self.sym, self._seed))
        self.normalized_data = self.normalized_data.apply(self.sim.z_score,
                                                          axis=1).values
        logger.info("...{}-{} pre-scaling complete.".format(
            self.sym, self._seed))

        # rendering class
        self._render = TradingGraph(sym=self.sym)
        # graph midpoint prices
        self._render.reset_render_data(
            y_vec=self.prices_[:np.shape(self._render.x_vec)[0]])

        self.data_buffer = list()

        self.action_space = spaces.Discrete(len(self.actions))

        variable_features_count = len(self.inventory_features) + len(self.actions) + 1 + \
                                  len(MarketMaker.indicator_features)

        if self.format_3d:
            shape = (self.window_size,
                     len(MarketMaker.features) + variable_features_count, 1)
        else:
            shape = (self.window_size,
                     len(MarketMaker.features) + variable_features_count)

        self.observation_space = spaces.Box(low=self.data.min(),
                                            high=self.data.max(),
                                            shape=shape,
                                            dtype=np.int)

        print(
            'MarketMaker #{} instantiated.\nself.observation_space.shape : {}'.
            format(self._seed, self.observation_space.shape))

    def __str__(self):
        return '{} | {}-{}'.format(MarketMaker.id, self.sym, self._seed)

    def step(self, action):

        for current_step in range(self.action_repeats):

            if self.done:
                self.reset()
                return self.observation, self.reward, self.done

            # reset the reward if there ARE action repeats
            if current_step == 0:
                self.reward = 0.
                step_action = action
            else:
                step_action = 0

            # Get current step's midpoint
            self.midpoint = self.prices_[self.local_step_number]
            # Pass current time step midpoint to broker to calculate PnL,
            # or if any open orders are to be filled
            step_best_bid, step_best_ask = self._get_nbbo()
            buy_volume = self._get_book_data(MarketMaker.buy_trade_index)
            sell_volume = self._get_book_data(MarketMaker.sell_trade_index)

            self.tns.step(buys=buy_volume, sells=sell_volume)
            self.rsi.step(price=self.midpoint)

            step_reward = self.broker.step(bid_price=step_best_bid,
                                           ask_price=step_best_ask,
                                           buy_volume=buy_volume,
                                           sell_volume=sell_volume,
                                           step=self.local_step_number)

            self.reward += self._send_to_broker_and_get_reward(step_action)
            self.reward += step_reward

            step_position_features = self._create_position_features()
            step_action_features = self._create_action_features(
                action=step_action)
            step_indicator_features = self._create_indicator_features()

            step_observation = np.concatenate(
                (self.process_data(
                    self.normalized_data[self.local_step_number]),
                 step_indicator_features, step_position_features,
                 step_action_features, np.array([self.reward],
                                                dtype=np.float32)),
                axis=None)
            self.data_buffer.append(step_observation)

            if len(self.data_buffer) > self.window_size:
                del self.data_buffer[0]

            self.local_step_number += self.step_size

        self.observation = np.array(self.data_buffer, dtype=np.float32)

        # Expand the observation space from 2 to 3 dimensions.
        # This is necessary for conv nets in Baselines.
        if self.format_3d:
            self.observation = np.expand_dims(self.observation, axis=-1)

        if self.local_step_number > self.max_steps:
            self.done = True
            self.reward += self.broker.flatten_inventory(*self._get_nbbo())

        return self.observation, self.reward, self.done, {}

    def reset(self):
        if self.training:
            self.local_step_number = self._random_state.randint(
                low=1, high=self.data.shape[0] // 4)
        else:
            self.local_step_number = 0

        msg = ' {}-{} reset. Episode pnl: {:.4f} with {} trades | First step: {}'.format(
            self.sym, self._seed,
            self.broker.get_total_pnl(midpoint=self.midpoint),
            self.broker.get_total_trade_count(), self.local_step_number)
        logger.info(msg)
        self.reward = 0.0
        self.done = False
        self.broker.reset()
        self.data_buffer.clear()
        self.rsi.reset()
        self.tns.reset()

        for step in range(self.window_size + self.tns.window):

            self.midpoint = self.prices_[self.local_step_number]

            step_buy_volume = self._get_book_data(MarketMaker.buy_trade_index)
            step_sell_volume = self._get_book_data(
                MarketMaker.sell_trade_index)

            self.tns.step(buys=step_buy_volume, sells=step_sell_volume)
            self.rsi.step(price=self.midpoint)

            step_position_features = self._create_position_features()
            step_action_features = self._create_action_features(action=0)
            step_indicator_features = self._create_indicator_features()

            step_observation = np.concatenate(
                (self.process_data(
                    self.normalized_data[self.local_step_number]),
                 step_indicator_features, step_position_features,
                 step_action_features, np.array([self.reward])),
                axis=None)
            self.data_buffer.append(step_observation)
            self.local_step_number += self.step_size

            if len(self.data_buffer) > self.window_size:
                del self.data_buffer[0]

        self.observation = np.array(self.data_buffer, dtype=np.float32)

        # Expand the observation space from 2 to 3 dimensions.
        # This is necessary for conv nets in Baselines.
        if self.format_3d:
            self.observation = np.expand_dims(self.observation, axis=-1)

        return self.observation

    def render(self, mode='human'):
        self._render.render(midpoint=self.midpoint, mode=mode)

    def close(self):
        logger.info('{}-{} is being closed.'.format(self.id, self.sym))
        self.data = None
        self.normalized_data = None
        self.prices_ = None
        self.broker = None
        self.sim = None
        self.data_buffer = None
        self.tns = None
        self.rsi = None
        return

    def seed(self, seed=1):
        self._random_state = np.random.RandomState(seed=seed)
        self._seed = seed
        logger.info('Setting seed in MarketMaker.seed({})'.format(seed))
        return [seed]

    @staticmethod
    def process_data(_next_state):
        return np.clip(_next_state.reshape((1, -1)), -10., 10.)

    # def process_data(self, _next_state):
    #     # return self.sim.scale_state(_next_state).values.reshape((1, -1))
    #     return np.reshape(_next_state, (1, -1))

    def _send_to_broker_and_get_reward(self, action):
        reward = 0.0
        discouragement = 0.000000000001

        if action == 0:  # do nothing
            reward += discouragement

        elif action == 1:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=0,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='short')

        elif action == 2:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=0,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='short')
        elif action == 3:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=0,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='short')

        elif action == 4:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=0,
                                                  side='short')

        elif action == 5:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='short')

        elif action == 6:

            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='short')
        elif action == 7:

            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='short')

        elif action == 8:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=0,
                                                  side='short')

        elif action == 9:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='short')

        elif action == 10:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='short')

        elif action == 11:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='short')

        elif action == 12:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=0,
                                                  side='short')

        elif action == 13:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='short')

        elif action == 14:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='short')

        elif action == 15:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='short')
        elif action == 16:
            reward += self.broker.flatten_inventory(*self._get_nbbo())
        else:
            logger.info("L'action n'exist pas ! Il faut faire attention !")

        return reward

    def _create_position_features(self):
        return np.array(
            (self.broker.long_inventory.position_count / self.max_position,
             self.broker.short_inventory.position_count / self.max_position,
             self.broker.get_total_pnl(midpoint=self.midpoint) /
             MarketMaker.target_pnl,
             self.broker.long_inventory.get_unrealized_pnl(self.midpoint) /
             self.broker.reward_scale,
             self.broker.short_inventory.get_unrealized_pnl(self.midpoint) /
             self.broker.reward_scale,
             self.broker.get_long_order_distance_to_midpoint(
                 midpoint=self.midpoint),
             self.broker.get_short_order_distance_to_midpoint(
                 midpoint=self.midpoint),
             *self.broker.get_queues_ahead_features()),
            dtype=np.float32)

    def _create_action_features(self, action):
        return self.actions[action]

    def _create_indicator_features(self):
        return np.array((self.tns.get_value(), self.rsi.get_value()),
                        dtype=np.float32)

    def _create_order_at_level(self,
                               reward,
                               discouragement,
                               level=0,
                               side='long'):
        adjustment = 1 if level > 0 else 0

        if side == 'long':
            best_bid = self._get_book_data(MarketMaker.best_bid_index + level)
            above_best_bid = round(
                self._get_book_data(MarketMaker.best_bid_index + level -
                                    adjustment), 2)
            price_improvement_bid = round(best_bid + 0.01, 2)

            if above_best_bid == price_improvement_bid:
                bid_price = round(self.midpoint - best_bid, 2)
                bid_queue_ahead = self._get_book_data(
                    MarketMaker.notional_bid_index)
            else:
                bid_price = round(self.midpoint - price_improvement_bid, 2)
                bid_queue_ahead = 0.

            bid_order = Order(ccy=self.sym,
                              side='long',
                              price=bid_price,
                              step=self.local_step_number,
                              queue_ahead=bid_queue_ahead)

            if self.broker.add(order=bid_order) is False:
                reward -= discouragement
            else:
                reward += discouragement

        if side == 'short':
            best_ask = self._get_book_data(MarketMaker.best_bid_index + level)
            above_best_ask = round(
                self._get_book_data(MarketMaker.best_ask_index + level -
                                    adjustment), 2)
            price_improvement_ask = round(best_ask - 0.01, 2)

            if above_best_ask == price_improvement_ask:
                ask_price = round(self.midpoint + best_ask, 2)
                ask_queue_ahead = self._get_book_data(
                    MarketMaker.notional_ask_index)
            else:
                ask_price = round(self.midpoint + price_improvement_ask, 2)
                ask_queue_ahead = 0.

            ask_order = Order(ccy=self.sym,
                              side='short',
                              price=ask_price,
                              step=self.local_step_number,
                              queue_ahead=ask_queue_ahead)

            if self.broker.add(order=ask_order) is False:
                reward -= discouragement
            else:
                reward += discouragement

        return reward

    def _get_nbbo(self):
        best_bid = round(
            self.midpoint - self._get_book_data(MarketMaker.best_bid_index), 2)
        best_ask = round(
            self.midpoint + self._get_book_data(MarketMaker.best_ask_index), 2)
        return best_bid, best_ask

    def _get_book_data(self, index=0):
        return self.data[self.local_step_number][index]
コード例 #9
0
class BaseEnvironment(Env, ABC):
    metadata = {'render.modes': ['human']}

    # Index of specific data points used to generate the observation space
    # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True)
    features = Sim.get_feature_labels(include_system_time=False,
                                      include_bitfinex=False)
    best_bid_index = features.index('coinbase_bid_distance_0')
    best_ask_index = features.index('coinbase_ask_distance_0')
    notional_bid_index = features.index('coinbase_bid_notional_0')
    notional_ask_index = features.index('coinbase_ask_notional_0')
    buy_trade_index = features.index('coinbase_buys')
    sell_trade_index = features.index('coinbase_sells')

    # Constants for scaling data
    target_pnl = 0.03  # 3.0% gain per episode (i.e., day)

    def __init__(self,
                 fitting_file='LTC-USD_2019-04-07.csv.xz',
                 testing_file='LTC-USD_2019-04-08.csv.xz',
                 step_size=1,
                 max_position=5,
                 window_size=10,
                 seed=1,
                 action_repeats=10,
                 training=True,
                 format_3d=False,
                 z_score=True,
                 reward_type='trade_completion',
                 scale_rewards=True):
        """
        Base class for creating environments extending OpenAI's GYM framework.

        :param fitting_file: historical data used to fit environment data (i.e.,
            previous trading day)
        :param testing_file: historical data used in environment
        :param step_size: increment size for steps (NOTE: leave a 1, otherwise market
            transaction data will be overlooked)
        :param max_position: maximum number of positions able to hold in inventory
        :param window_size: number of lags to include in observation space
        :param seed: random seed number
        :param action_repeats: number of steps to take in environment after a given action
        :param training: if TRUE, then randomize starting point in environment
        :param format_3d: if TRUE, reshape observation space from matrix to tensor
        :param z_score: if TRUE, normalize data set with Z-Score, otherwise use Min-Max
            (i.e., range of 0 to 1)
        :param reward_type: method for calculating the environment's reward:
            1) 'trade_completion' --> reward is generated per trade's round trip
            2) 'continuous_total_pnl' --> change in realized & unrealized pnl between
                                            time steps
            3) 'continuous_realized_pnl' --> change in realized pnl between time steps
            4) 'continuous_unrealized_pnl' --> change in unrealized pnl between time steps
        """
        # properties required for instantiation
        self.action_repeats = action_repeats
        self._seed = seed
        self._random_state = np.random.RandomState(seed=self._seed)
        self.training = training
        self.step_size = step_size
        self.max_position = max_position
        self.window_size = window_size
        self.reward_type = reward_type
        self.format_3d = format_3d  # e.g., [window, features, *NEW_AXIS*]
        self.sym = testing_file[:7]  # slice the CCY from the filename
        self.scale_rewards = scale_rewards

        # properties that get reset()
        self.reward = 0.0
        self.done = False
        self.local_step_number = 0
        self.midpoint = 0.0
        self.observation = None
        self.action = 0
        self.last_pnl = 0.

        # properties to override in sub-classes
        self.actions = None
        self.broker = None
        self.action_space = None
        self.observation_space = None

        # get historical data for simulations
        self.sim = Sim(use_arctic=False, z_score=z_score)

        self.prices_, self.data, self.normalized_data = self.sim.load_environment_data(
            fitting_file, testing_file)
        self.best_bid = self.best_ask = None

        self.max_steps = self.data.shape[
            0] - self.step_size * self.action_repeats - 1

        # load indicators into the indicator manager
        self.tns = IndicatorManager()
        self.rsi = IndicatorManager()
        for window in INDICATOR_WINDOW:
            self.tns.add(('tns_{}'.format(window), TnS(window=window)))
            self.rsi.add(('rsi_{}'.format(window), RSI(window=window)))

        # rendering class
        self._render = TradingGraph(sym=self.sym)

        # graph midpoint prices
        self._render.reset_render_data(
            y_vec=self.prices_[:np.shape(self._render.x_vec)[0]])

        # buffer for appending lags
        self.data_buffer = list()

    @abstractmethod
    def map_action_to_broker(self, action: int):
        """
        Translate agent's action into an order and submit order to broker.
        :param action: (int) agent's action for current step
        :return: (tuple) reward, pnl
        """
        return 0., 0.

    @abstractmethod
    def _create_position_features(self):
        """
        Create agent space feature set reflecting the positions held in inventory.
        :return: (np.array) position features
        """
        return np.array([np.nan], dtype=np.float32)

    def _get_step_reward(self, step_pnl: float):
        """
        Get reward for current time step.
            Note: 'reward_type' is set during environment instantiation.
        :param step_pnl: (float) PnL accrued from order fills at current time step
        :return: (float) reward
        """
        reward = 0.
        if self.reward_type == 'trade_completion':
            reward += step_pnl
            # Note: we do not need to update last_pnl for this reward approach
        elif self.reward_type == 'continuous_total_pnl':
            new_pnl = self.broker.get_total_pnl(self.best_bid, self.best_ask)
            reward += new_pnl - self.last_pnl  # Difference in PnL
            self.last_pnl = new_pnl
        elif self.reward_type == 'continuous_realized_pnl':
            new_pnl = self.broker.realized_pnl
            reward += new_pnl - self.last_pnl  # Difference in PnL
            self.last_pnl = new_pnl
        elif self.reward_type == 'continuous_unrealized_pnl':
            new_pnl = self.broker.get_unrealized_pnl(self.best_bid,
                                                     self.best_ask)
            reward += new_pnl - self.last_pnl  # Difference in PnL
            self.last_pnl = new_pnl
        else:
            print("_get_step_reward() Unknown reward_type: {}".format(
                self.reward_type))

        if self.scale_rewards:
            reward /= self.broker.reward_scale

        return reward

    def step(self, action: int):
        """
        Step through environment with action
        :param action: (int) action to take in environment
        :return: (tuple) observation, reward, is_done, and empty `dict`
        """
        for current_step in range(self.action_repeats):

            if self.done:
                self.reset()
                return self.observation, self.reward, self.done

            # reset the reward if there ARE action repeats
            if current_step == 0:
                self.reward = 0.
                step_action = action
            else:
                step_action = 0

            # Get current step's midpoint
            self.midpoint = self.prices_[self.local_step_number]

            # Pass current time step bid/ask prices to broker to calculate PnL,
            # or if any open orders are to be filled
            self.best_bid, self.best_ask = self._get_nbbo()
            buy_volume = self._get_book_data(BaseEnvironment.buy_trade_index)
            sell_volume = self._get_book_data(BaseEnvironment.sell_trade_index)

            # Update indicators
            self.tns.step(buys=buy_volume, sells=sell_volume)
            self.rsi.step(price=self.midpoint)

            # Get PnL from any filled LIMIT orders
            limit_pnl = self.broker.step_limit_order_pnl(
                bid_price=self.best_bid,
                ask_price=self.best_ask,
                buy_volume=buy_volume,
                sell_volume=sell_volume,
                step=self.local_step_number)

            # Get PnL from any filled MARKET orders AND action penalties for invalid
            # actions made by the agent for future discouragement
            step_reward, market_pnl = self.map_action_to_broker(
                action=step_action)
            step_pnl = limit_pnl + step_reward + market_pnl
            self.reward += self._get_step_reward(step_pnl=step_pnl)

            step_observation = self._get_step_observation(action=action)
            self.data_buffer.append(step_observation)

            if len(self.data_buffer) > self.window_size:
                del self.data_buffer[0]

            self.local_step_number += self.step_size

        self.observation = self._get_observation()

        if self.local_step_number > self.max_steps:
            self.done = True
            flatten_pnl = self.broker.flatten_inventory(
                self.best_bid, self.best_ask)
            self.reward += self._get_step_reward(step_pnl=flatten_pnl)

        return self.observation, self.reward, self.done, {}

    def reset(self):
        """
        Reset the environment.
        :return: (np.array) Observation at first step
        """
        if self.training:
            self.local_step_number = self._random_state.randint(
                low=0, high=self.data.shape[0] // 4)
        else:
            self.local_step_number = 0

        msg = ' {}-{} reset. Episode pnl: {:.4f} with {} trades. First step: {}'.format(
            self.sym, self._seed,
            self.broker.get_total_pnl(self.best_bid, self.best_ask),
            self.broker.total_trade_count, self.local_step_number)
        print(msg)

        self.reward = 0.0
        self.done = False
        self.broker.reset()
        self.data_buffer.clear()
        self.rsi.reset()
        self.tns.reset()

        for step in range(self.window_size + INDICATOR_WINDOW_MAX):
            self.midpoint = self.prices_[self.local_step_number]
            self.best_bid, self.best_ask = self._get_nbbo()

            step_buy_volume = self._get_book_data(
                BaseEnvironment.buy_trade_index)
            step_sell_volume = self._get_book_data(
                BaseEnvironment.sell_trade_index)
            self.tns.step(buys=step_buy_volume, sells=step_sell_volume)
            self.rsi.step(price=self.midpoint)

            step_observation = self._get_step_observation(action=0)
            self.data_buffer.append(step_observation)

            self.local_step_number += self.step_size
            if len(self.data_buffer) > self.window_size:
                del self.data_buffer[0]

        self.observation = self._get_observation()

        return self.observation

    def render(self, mode='human'):
        """
        Render midpoint prices
        :param mode: (str) flag for type of rendering. Only 'human' supported.
        :return: (void)
        """
        self._render.render(midpoint=self.midpoint, mode=mode)

    def close(self):
        """
        Free clear memory when closing environment
        :return: (void)
        """
        self.data = None
        self.normalized_data = None
        self.prices_ = None
        self.broker = None
        self.sim = None
        self.data_buffer = None
        self.tns = None
        self.rsi = None

    def seed(self, seed=1):
        """
        Set random seed in environment
        :param seed: (int) random seed number
        :return: (list) seed number in a list
        """
        self._random_state = np.random.RandomState(seed=seed)
        self._seed = seed
        return [seed]

    @staticmethod
    def _process_data(_next_state):
        """
        Reshape observation for function approximator
        :param _next_state: observation space
        :return: (np.array) clipped observation space
        """
        return _next_state.reshape((1, -1))

    def _create_action_features(self, action):
        """
        Create a features array for the current time step's action.
        :param action: (int) action number
        :return: (np.array) One-hot of current action
        """
        return self.actions[action]

    def _create_indicator_features(self):
        """
        Create features vector with environment indicators.
        :return: (np.array) Indicator values for current time step
        """
        return np.array((*self.tns.get_value(), *self.rsi.get_value()),
                        dtype=np.float32)

    def _get_nbbo(self):
        """
        Get best bid and offer
        :return: (tuple) best bid and offer
        """
        best_bid = round(
            self.midpoint -
            self._get_book_data(BaseEnvironment.best_bid_index), 2)
        best_ask = round(
            self.midpoint +
            self._get_book_data(BaseEnvironment.best_ask_index), 2)
        return best_bid, best_ask

    def _get_book_data(self, index=0):
        """
        Return step 'n' of order book snapshot data
        :param index: (int) step 'n' to look up in order book snapshot history
        :return: (np.array) order book snapshot vector
        """
        return self.data[self.local_step_number][index]

    def _get_step_observation(self, action=0):
        """
        Current step observation, NOT including historical data.
        :param action: (int) current step action
        :return: (np.array) Current step observation
        """
        step_position_features = self._create_position_features()
        step_action_features = self._create_action_features(action=action)
        step_indicator_features = self._create_indicator_features()
        return np.concatenate(
            (self._process_data(self.normalized_data[self.local_step_number]),
             step_indicator_features, step_position_features,
             step_action_features, np.array([self.reward])),
            axis=None)

    def _get_observation(self):
        """
        Current step observation, including historical data.

        If format_3d is TRUE: Expand the observation space from 2 to 3 dimensions.
        (note: This is necessary for conv nets in Baselines.)
        :return: (np.array) Observation state for current time step
        """
        observation = np.array(self.data_buffer, dtype=np.float32)
        if self.format_3d:
            observation = np.expand_dims(observation, axis=-1)
        return observation
コード例 #10
0
class PriceJump(Env):

    metadata = {'render.modes': ['human']}
    id = 'long-short-v0'
    action_repeats = 4
    inventory_features = [
        'long_inventory', 'short_inventory',
        'total_unrealized_and_realized_pnl', 'long_unrealized_pnl',
        'short_unrealized_pnl'
    ]
    # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True)
    features = Sim.get_feature_labels(include_system_time=False,
                                      include_bitfinex=False)
    indicator_features = ['tns', 'rsi']
    best_bid_index = features.index('coinbase-bid-distance-0')
    best_ask_index = features.index('coinbase-ask-distance-0')
    notional_bid_index = features.index('coinbase-bid-notional-0')
    notional_ask_index = features.index('coinbase-ask-notional-0')

    buy_trade_index = features.index('coinbase-buys')
    sell_trade_index = features.index('coinbase-sells')
    instance_count = 0

    def __init__(self,
                 *,
                 training=True,
                 fitting_file='ETH-USD_2018-12-31.xz',
                 testing_file='ETH-USD_2019-01-01.xz',
                 step_size=1,
                 max_position=5,
                 window_size=4,
                 frame_stack=False):

        # properties required for instantiation
        PriceJump.instance_count += 1
        self._seed = int(PriceJump.instance_count)  # seed
        self._random_state = np.random.RandomState(seed=self._seed)
        self.training = training
        self.step_size = step_size
        self.fee = BROKER_FEE
        self.max_position = max_position
        self.window_size = window_size
        self.frame_stack = frame_stack
        self.frames_to_add = 3 if self.frame_stack else 0

        self.action = 0
        # derive gym.env properties
        self.actions = np.eye(3)

        self.sym = testing_file[:7]  # slice the CCY from the filename

        # properties that get reset()
        self.reward = 0.0
        self.done = False
        self.local_step_number = 0
        self.midpoint = 0.0
        self.observation = None

        # get Broker class to keep track of PnL and orders
        self.broker = Broker(max_position=max_position)
        # get historical data for simulations
        self.sim = Sim(use_arctic=False)

        fitting_data_filepath = '{}/data_exports/{}'.format(
            self.sim.cwd, fitting_file)
        data_used_in_environment = '{}/data_exports/{}'.format(
            self.sim.cwd, testing_file)
        # print('Fitting data: {}\nTesting Data: {}'.format(fitting_data_filepath,
        #                                                data_used_in_environment))

        fitting_data = self.sim.import_csv(filename=fitting_data_filepath)
        fitting_data['coinbase_midpoint'] = np.log(
            fitting_data['coinbase_midpoint'].values)
        fitting_data['coinbase_midpoint'] = fitting_data['coinbase_midpoint']. \
            pct_change().fillna(method='bfill')
        self.sim.fit_scaler(fitting_data)
        del fitting_data

        self.data = self.sim.import_csv(filename=data_used_in_environment)
        self.prices_ = self.data[
            'coinbase_midpoint'].values  # used to calculate PnL

        self.normalized_data = self.data.copy()
        self.data = self.data.values

        self.normalized_data['coinbase_midpoint'] = np.log(
            self.normalized_data['coinbase_midpoint'].values)
        self.normalized_data['coinbase_midpoint'] = (
            self.normalized_data['coinbase_midpoint'] -
            self.normalized_data['coinbase_midpoint'].shift(1)).fillna(
                method='bfill')

        self.tns = TnS()
        self.rsi = RSI()

        logger.info("Pre-scaling {}-{} data...".format(self.sym, self._seed))
        self.normalized_data = self.normalized_data.apply(self.sim.z_score,
                                                          axis=1).values
        logger.info("...{}-{} pre-scaling complete.".format(
            self.sym, self._seed))

        # rendering class
        self._render = TradingGraph(sym=self.sym)
        # graph midpoint prices
        self._render.reset_render_data(
            y_vec=self.prices_[:np.shape(self._render.x_vec)[0]])

        self.data_buffer, self.frame_stacker = list(), list()

        self.action_space = spaces.Discrete(len(self.actions))

        variable_features_count = len(self.inventory_features) + len(self.actions) + 1 + \
                                  len(PriceJump.indicator_features)

        if self.frame_stack:
            shape = (4, len(PriceJump.features) + variable_features_count,
                     self.window_size)
        else:
            shape = (self.window_size,
                     len(PriceJump.features) + variable_features_count)

        self.observation_space = spaces.Box(low=self.data.min(),
                                            high=self.data.max(),
                                            shape=shape,
                                            dtype=np.int)

        print('PriceJump #{} instantiated.\nself.observation_space.shape : {}'.
              format(PriceJump.instance_count, self.observation_space.shape))

    def __str__(self):
        return '{} | {}-{}'.format(PriceJump.id, self.sym, self._seed)

    def step(self, action):

        for current_step in range(PriceJump.action_repeats):

            if self.done:
                self.reset()
                return self.observation, self.reward, self.done

            # reset the reward if there ARE action repeats
            if current_step == 0:
                self.reward = 0.
                step_action = action
            else:
                step_action = 0

            # Get current step's midpoint
            self.midpoint = self.prices_[self.local_step_number]
            # Pass current time step midpoint to broker to calculate PnL,
            # or if any open orders are to be filled
            buy_volume = self._get_book_data(PriceJump.buy_trade_index)
            sell_volume = self._get_book_data(PriceJump.sell_trade_index)

            self.tns.step(buys=buy_volume, sells=sell_volume)
            self.rsi.step(price=self.midpoint)

            self.broker.step(midpoint=self.midpoint)

            self.reward += self._send_to_broker_and_get_reward(
                action=step_action)

            step_position_features = self._create_position_features()
            step_action_features = self._create_action_features(
                action=step_action)
            step_indicator_features = self._create_indicator_features()

            step_observation = np.concatenate(
                (self.process_data(
                    self.normalized_data[self.local_step_number]),
                 step_indicator_features, step_position_features,
                 step_action_features, np.array([self.reward],
                                                dtype=np.float32)),
                axis=None)
            self.data_buffer.append(step_observation)

            if len(self.data_buffer) >= self.window_size:
                self.frame_stacker.append(
                    np.array(self.data_buffer, dtype=np.float32))
                del self.data_buffer[0]

                if len(self.frame_stacker) > self.frames_to_add + 1:
                    del self.frame_stacker[0]

            self.local_step_number += self.step_size

        self.observation = np.array(self.frame_stacker, dtype=np.float32)

        # This removes a dimension to be compatible with the Keras-rl module
        # because Keras-rl uses its own frame-stacker. There are future
        # plans to integrate this repository with more reinforcement learning
        # packages, such as baselines.
        if self.frame_stack is False:
            self.observation = np.squeeze(self.observation, axis=0)

        if self.local_step_number > self.data.shape[0] - 40:
            self.done = True
            order = Order(ccy=self.sym,
                          side=None,
                          price=self.midpoint,
                          step=self.local_step_number)
            self.reward = self.broker.flatten_inventory(order=order)

        return self.observation, self.reward, self.done, {}

    def reset(self):
        if self.training:
            self.local_step_number = self._random_state.randint(
                low=1, high=self.data.shape[0] // 4)
        else:
            self.local_step_number = 0

        logger.info(' {}-{} reset. Episode pnl: {} | First step: {}'.format(
            self.sym, self._seed,
            self.broker.get_total_pnl(midpoint=self.midpoint),
            self.local_step_number))
        self.reward = 0.0
        self.done = False
        self.broker.reset()
        self.data_buffer.clear()
        self.frame_stacker.clear()
        self.rsi.reset()
        self.tns.reset()

        for step in range(self.window_size + self.frames_to_add +
                          self.tns.window):

            self.midpoint = self.prices_[self.local_step_number]

            step_buy_volume = self._get_book_data(PriceJump.buy_trade_index)
            step_sell_volume = self._get_book_data(PriceJump.sell_trade_index)

            self.tns.step(buys=step_buy_volume, sells=step_sell_volume)
            self.rsi.step(price=self.midpoint)

            step_position_features = self._create_position_features()
            step_action_features = self._create_action_features(action=0)
            step_indicator_features = self._create_indicator_features()

            step_observation = np.concatenate(
                (self.process_data(
                    self.normalized_data[self.local_step_number]),
                 step_indicator_features, step_position_features,
                 step_action_features, np.array([self.reward])),
                axis=None)
            self.data_buffer.append(step_observation)
            self.local_step_number += self.step_size

            if step >= self.window_size - 1:
                self.frame_stacker.append(
                    np.array(self.data_buffer, dtype=np.float32))
                del self.data_buffer[0]

                if len(self.frame_stacker) > self.frames_to_add + 1:
                    del self.frame_stacker[0]

        self.observation = np.array(self.frame_stacker, dtype=np.float32)

        # This removes a dimension to be compatible with the Keras-rl module
        # because Keras-rl uses its own frame-stacker. There are future plans
        # to integrate this repository with more reinforcement learning packages,
        # such as baselines.
        if self.frame_stack is False:
            self.observation = np.squeeze(self.observation, axis=0)

        return self.observation

    def render(self, mode='human'):
        self._render.render(midpoint=self.midpoint, mode=mode)

    def close(self):
        logger.info('{}-{} is being closed.'.format(self.id, self.sym))
        self.data = None
        self.normalized_data = None
        self.prices_ = None
        self.broker = None
        self.sim = None
        self.data_buffer = None
        self.tns = None
        self.rsi = None
        return

    def seed(self, seed=1):
        self._random_state = np.random.RandomState(seed=seed)
        self._seed = seed
        logger.info('PriceJump.seed({})'.format(seed))
        return [seed]

    @staticmethod
    def process_data(_next_state):
        return np.clip(_next_state.reshape((1, -1)), -10., 10.)

    # def process_data(self, _next_state):
    #     # return self.sim.scale_state(_next_state).values.reshape((1, -1))
    #     return np.reshape(_next_state, (1, -1))

    def _send_to_broker_and_get_reward(self, action):
        reward = 0.0
        discouragement = 0.000000000001

        if action == 0:  # do nothing
            pass

        elif action == 1:  # buy
            price_fee_adjusted = self.midpoint + (self.fee * self.midpoint)
            if self.broker.short_inventory_count > 0:
                order = Order(ccy=self.sym,
                              side='short',
                              price=price_fee_adjusted,
                              step=self.local_step_number)
                self.broker.remove(order=order)
                reward += self.broker.get_reward(side=order.side)

            elif self.broker.long_inventory_count >= 0:
                order = Order(ccy=self.sym,
                              side='long',
                              price=price_fee_adjusted,
                              step=self.local_step_number)
                if self.broker.add(order=order) is False:
                    reward -= discouragement

            else:
                logger.info(
                    ('gym_trading.get_reward() ' + 'Error for action #{} - ' +
                     'unable to place an order with broker').format(action))

        elif action == 2:  # sell
            price_fee_adjusted = self.midpoint - (self.fee * self.midpoint)
            if self.broker.long_inventory_count > 0:
                order = Order(ccy=self.sym,
                              side='long',
                              price=price_fee_adjusted,
                              step=self.local_step_number)
                self.broker.remove(order=order)
                reward += self.broker.get_reward(side=order.side)
            elif self.broker.short_inventory_count >= 0:
                order = Order(ccy=self.sym,
                              side='short',
                              price=price_fee_adjusted,
                              step=self.local_step_number)
                if self.broker.add(order=order) is False:
                    reward -= discouragement

            else:
                logger.info(
                    'gym_trading.get_reward() ' + 'Error for action #{} - ' +
                    'unable to place an order with broker'.format(action))

        else:
            logger.info(
                ('Unknown action to take in get_reward(): ' +
                 'action={} | midpoint={}').format(action, self.midpoint))

        return reward

    def _create_position_features(self):
        return np.array(
            (self.broker.long_inventory.position_count / self.max_position,
             self.broker.short_inventory.position_count / self.max_position,
             self.broker.get_total_pnl(midpoint=self.midpoint),
             self.broker.long_inventory.get_unrealized_pnl(self.midpoint),
             self.broker.short_inventory.get_unrealized_pnl(self.midpoint)))

    def _create_action_features(self, action):
        return self.actions[action]

    def _create_indicator_features(self):
        return np.array((self.tns.get_value(), self.rsi.get_value()),
                        dtype=np.float32)

    def _get_nbbo(self):
        best_bid = round(
            self.midpoint - self._get_book_data(PriceJump.best_bid_index), 2)
        best_ask = round(
            self.midpoint + self._get_book_data(PriceJump.best_ask_index), 2)
        return best_bid, best_ask

    def _get_book_data(self, index=0):
        return self.data[self.local_step_number][index]
コード例 #11
0
    def __init__(self,
                 *,
                 fitting_file='LTC-USD_2019-04-07.csv.xz',
                 testing_file='LTC-USD_2019-04-08.csv.xz',
                 step_size=1,
                 max_position=5,
                 window_size=10,
                 seed=1,
                 action_repeats=10,
                 training=True,
                 format_3d=False,
                 z_score=True):

        # properties required for instantiation
        self.action_repeats = action_repeats
        self._seed = seed
        self._random_state = np.random.RandomState(seed=self._seed)
        self.training = training
        self.step_size = step_size
        self.max_position = max_position
        self.window_size = window_size
        self.format_3d = format_3d  # e.g., [window, features, *NEW_AXIS*]

        self.action = 0
        # derive gym.env properties
        self.actions = np.eye(3, dtype=np.float32)

        self.sym = testing_file[:7]  # slice the CCY from the filename

        # properties that get reset()
        self.reward = 0.0
        self.done = False
        self.local_step_number = 0
        self.midpoint = 0.0
        self.observation = None

        # get Broker class to keep track of PnL and orders
        self.broker = Broker(max_position=max_position)
        # get historical data for simulations
        self.sim = Sim(use_arctic=False, z_score=z_score)

        self.prices_, self.data, self.normalized_data = self.sim.load_environment_data(
            fitting_file, testing_file)

        self.max_steps = self.data.shape[0] - self.step_size * \
            self.action_repeats - 1

        # load indicators into the indicator manager
        self.tns = IndicatorManager()
        self.rsi = IndicatorManager()
        for window in INDICATOR_WINDOW:
            self.tns.add(('tns_{}'.format(window), TnS(window=window)))
            self.rsi.add(('rsi_{}'.format(window), RSI(window=window)))

        # rendering class
        self._render = TradingGraph(sym=self.sym)

        # graph midpoint prices
        self._render.reset_render_data(
            y_vec=self.prices_[:np.shape(self._render.x_vec)[0]])

        # buffer for appending lags
        self.data_buffer = list()

        self.action_space = spaces.Discrete(len(self.actions))
        self.reset()  # reset to load observation.shape
        self.observation_space = spaces.Box(low=-10,
                                            high=10,
                                            shape=self.observation.shape,
                                            dtype=np.float32)

        print(
            '{} PriceJump #{} instantiated.\nself.observation_space.shape : {}'
            .format(self.sym, self._seed, self.observation_space.shape))
コード例 #12
0
class PriceJump(Env):

    metadata = {'render.modes': ['human']}
    id = 'long-short-v0'
    # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True)
    features = Sim.get_feature_labels(include_system_time=False,
                                      include_bitfinex=False)
    best_bid_index = features.index('coinbase_bid_distance_0')
    best_ask_index = features.index('coinbase_ask_distance_0')
    notional_bid_index = features.index('coinbase_bid_notional_0')
    notional_ask_index = features.index('coinbase_ask_notional_0')

    buy_trade_index = features.index('coinbase_buys')
    sell_trade_index = features.index('coinbase_sells')

    target_pnl = 0.03  # 3.0% gain per episode (i.e., day)
    fee = MARKET_ORDER_FEE

    def __init__(self,
                 *,
                 fitting_file='LTC-USD_2019-04-07.csv.xz',
                 testing_file='LTC-USD_2019-04-08.csv.xz',
                 step_size=1,
                 max_position=5,
                 window_size=10,
                 seed=1,
                 action_repeats=10,
                 training=True,
                 format_3d=False,
                 z_score=True):

        # properties required for instantiation
        self.action_repeats = action_repeats
        self._seed = seed
        self._random_state = np.random.RandomState(seed=self._seed)
        self.training = training
        self.step_size = step_size
        self.max_position = max_position
        self.window_size = window_size
        self.format_3d = format_3d  # e.g., [window, features, *NEW_AXIS*]

        self.action = 0
        # derive gym.env properties
        self.actions = np.eye(3, dtype=np.float32)

        self.sym = testing_file[:7]  # slice the CCY from the filename

        # properties that get reset()
        self.reward = 0.0
        self.done = False
        self.local_step_number = 0
        self.midpoint = 0.0
        self.observation = None

        # get Broker class to keep track of PnL and orders
        self.broker = Broker(max_position=max_position)
        # get historical data for simulations
        self.sim = Sim(use_arctic=False, z_score=z_score)

        self.prices_, self.data, self.normalized_data = self.sim.load_environment_data(
            fitting_file, testing_file)

        self.max_steps = self.data.shape[0] - self.step_size * \
            self.action_repeats - 1

        # load indicators into the indicator manager
        self.tns = IndicatorManager()
        self.rsi = IndicatorManager()
        for window in INDICATOR_WINDOW:
            self.tns.add(('tns_{}'.format(window), TnS(window=window)))
            self.rsi.add(('rsi_{}'.format(window), RSI(window=window)))

        # rendering class
        self._render = TradingGraph(sym=self.sym)

        # graph midpoint prices
        self._render.reset_render_data(
            y_vec=self.prices_[:np.shape(self._render.x_vec)[0]])

        # buffer for appending lags
        self.data_buffer = list()

        self.action_space = spaces.Discrete(len(self.actions))
        self.reset()  # reset to load observation.shape
        self.observation_space = spaces.Box(low=-10,
                                            high=10,
                                            shape=self.observation.shape,
                                            dtype=np.float32)

        print(
            '{} PriceJump #{} instantiated.\nself.observation_space.shape : {}'
            .format(self.sym, self._seed, self.observation_space.shape))

    def __str__(self):
        return '{} | {}-{}'.format(PriceJump.id, self.sym, self._seed)

    def step(self, action: int):
        for current_step in range(self.action_repeats):

            if self.done:
                self.reset()
                return self.observation, self.reward, self.done

            # reset the reward if there ARE action repeats
            if current_step == 0:
                self.reward = 0.
                step_action = action
            else:
                step_action = 0

            # Get current step's midpoint
            self.midpoint = self.prices_[self.local_step_number]
            # Pass current time step midpoint to broker to calculate PnL,
            # or if any open orders are to be filled
            buy_volume = self._get_book_data(PriceJump.buy_trade_index)
            sell_volume = self._get_book_data(PriceJump.sell_trade_index)

            self.tns.step(buys=buy_volume, sells=sell_volume)
            self.rsi.step(price=self.midpoint)

            self.broker.step(midpoint=self.midpoint)

            self.reward += self._send_to_broker_and_get_reward(
                action=step_action)

            step_observation = self._get_step_observation(action=action)
            self.data_buffer.append(step_observation)

            if len(self.data_buffer) > self.window_size:
                del self.data_buffer[0]

            self.local_step_number += self.step_size

        self.observation = self._get_observation()

        if self.local_step_number > self.max_steps:
            self.done = True
            order = Order(ccy=self.sym,
                          side=None,
                          price=self.midpoint,
                          step=self.local_step_number)
            self.reward = self.broker.flatten_inventory(order=order)

        return self.observation, self.reward, self.done, {}

    def reset(self):
        if self.training:
            self.local_step_number = self._random_state.randint(
                low=1, high=self.data.shape[0] // 4)
        else:
            self.local_step_number = 0

        msg = ' {}-{} reset. Episode pnl: {:.4f} with {} trades. First step: {}'.format(
            self.sym, self._seed,
            self.broker.get_total_pnl(midpoint=self.midpoint),
            self.broker.get_total_trade_count(), self.local_step_number)
        logger.info(msg)

        self.reward = 0.0
        self.done = False
        self.broker.reset()
        self.data_buffer.clear()
        self.rsi.reset()
        self.tns.reset()

        for step in range(self.window_size + INDICATOR_WINDOW_MAX):
            self.midpoint = self.prices_[self.local_step_number]

            step_buy_volume = self._get_book_data(PriceJump.buy_trade_index)
            step_sell_volume = self._get_book_data(PriceJump.sell_trade_index)
            self.tns.step(buys=step_buy_volume, sells=step_sell_volume)
            self.rsi.step(price=self.midpoint)

            step_observation = self._get_step_observation(action=0)
            self.data_buffer.append(step_observation)

            self.local_step_number += self.step_size
            if len(self.data_buffer) > self.window_size:
                del self.data_buffer[0]

        self.observation = self._get_observation()

        return self.observation

    def render(self, mode='human'):
        self._render.render(midpoint=self.midpoint, mode=mode)

    def close(self):
        logger.info('{}-{} is being closed.'.format(self.id, self.sym))
        self.data = None
        self.normalized_data = None
        self.prices_ = None
        self.broker = None
        self.sim = None
        self.data_buffer = None
        self.tns = None
        self.rsi = None
        return

    def seed(self, seed=1):
        self._random_state = np.random.RandomState(seed=seed)
        self._seed = seed
        logger.info('Setting seed in PriceJump.seed({})'.format(seed))
        return [seed]

    @staticmethod
    def _process_data(_next_state):
        """
        Reshape observation and clip outliers (values +/- 10)
        :param _next_state: observation space
        :return: (np.array) clipped observation space
        """
        return np.clip(_next_state.reshape((1, -1)), -10., 10.)

    def _send_to_broker_and_get_reward(self, action: int):
        """
        Create or adjust orders per a specified action and adjust for penalties.
        :param action: (int) current step's action
        :return: (float) reward
        """
        reward = 0.0
        discouragement = 0.000000000001

        if action == 0:  # do nothing
            reward += discouragement

        elif action == 1:  # buy
            price_fee_adjusted = self.midpoint + (PriceJump.fee *
                                                  self.midpoint)
            if self.broker.short_inventory_count > 0:
                order = Order(ccy=self.sym,
                              side='short',
                              price=price_fee_adjusted,
                              step=self.local_step_number)
                self.broker.remove(order=order)
                reward += self.broker.get_reward(side=order.side) / \
                    self.broker.reward_scale  # scale realized PnL

            elif self.broker.long_inventory_count >= 0:
                order = Order(ccy=self.sym,
                              side='long',
                              price=price_fee_adjusted,
                              step=self.local_step_number)
                if self.broker.add(order=order) is False:
                    reward -= discouragement

            else:
                logger.info(
                    ('gym_trading.get_reward() ' + 'Error for action #{} - ' +
                     'unable to place an order with broker').format(action))

        elif action == 2:  # sell
            price_fee_adjusted = self.midpoint - (PriceJump.fee *
                                                  self.midpoint)
            if self.broker.long_inventory_count > 0:
                order = Order(ccy=self.sym,
                              side='long',
                              price=price_fee_adjusted,
                              step=self.local_step_number)
                self.broker.remove(order=order)
                reward += self.broker.get_reward(side=order.side) / \
                    self.broker.reward_scale  # scale realized PnL
            elif self.broker.short_inventory_count >= 0:
                order = Order(ccy=self.sym,
                              side='short',
                              price=price_fee_adjusted,
                              step=self.local_step_number)
                if self.broker.add(order=order) is False:
                    reward -= discouragement

            else:
                logger.info(
                    ('gym_trading.get_reward() ' + 'Error for action #{} - ' +
                     'unable to place an order with broker').format(action))

        else:
            logger.info(
                ('Unknown action to take in get_reward(): ' +
                 'action={} | midpoint={}').format(action, self.midpoint))

        return reward

    def _create_position_features(self):
        """
        Create an array with features related to the agent's inventory
        :return: (np.array) normalized position features
        """
        return np.array(
            (self.broker.long_inventory.position_count / self.max_position,
             self.broker.short_inventory.position_count / self.max_position,
             self.broker.get_total_pnl(midpoint=self.midpoint) /
             PriceJump.target_pnl,
             self.broker.long_inventory.get_unrealized_pnl(self.midpoint) /
             self.broker.reward_scale,
             self.broker.short_inventory.get_unrealized_pnl(self.midpoint) /
             self.broker.reward_scale),
            dtype=np.float32)

    def _create_action_features(self, action):
        """
        Create a features array for the current time step's action.
        :param action: (int) action number
        :return: (np.array) One-hot of current action
        """
        return self.actions[action]

    def _create_indicator_features(self):
        """
        Create features vector with environment indicators.
        :return: (np.array) Indicator values for current time step
        """
        return np.array((*self.tns.get_value(), *self.rsi.get_value()),
                        dtype=np.float32)

    def _get_nbbo(self):
        """
        Get best bid and offer
        :return: (tuple) best bid and offer
        """
        best_bid = round(
            self.midpoint - self._get_book_data(PriceJump.best_bid_index), 2)
        best_ask = round(
            self.midpoint + self._get_book_data(PriceJump.best_ask_index), 2)
        return best_bid, best_ask

    def _get_book_data(self, index=0):
        """
        Return step 'n' of order book snapshot data
        :param index: (int) step 'n' to look up in order book snapshot history
        :return: (np.array) order book snapshot vector
        """
        return self.data[self.local_step_number][index]

    def _get_step_observation(self, action=0):
        """
        Current step observation, NOT including historical data.
        :param action: (int) current step action
        :return: (np.array) Current step observation
        """
        step_position_features = self._create_position_features()
        step_action_features = self._create_action_features(action=action)
        step_indicator_features = self._create_indicator_features()
        return np.concatenate(
            (self._process_data(self.normalized_data[self.local_step_number]),
             step_indicator_features, step_position_features,
             step_action_features, np.array([self.reward])),
            axis=None)

    def _get_observation(self):
        """
        Current step observation, including historical data.

        If format_3d is TRUE: Expand the observation space from 2 to 3 dimensions.
        (note: This is necessary for conv nets in Baselines.)
        :return: (np.array) Observation state for current time step
        """
        observation = np.array(self.data_buffer, dtype=np.float32)
        if self.format_3d:
            observation = np.expand_dims(observation, axis=-1)
        return observation
コード例 #13
0
class MarketMaker(Env):
    # gym.env required
    metadata = {'render.modes': ['human']}
    id = 'market-maker-v0'

    # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True)
    features = Sim.get_feature_labels(include_system_time=False,
                                      include_bitfinex=False)
    best_bid_index = features.index('coinbase_bid_distance_0')
    best_ask_index = features.index('coinbase_ask_distance_0')
    notional_bid_index = features.index('coinbase_bid_notional_0')
    notional_ask_index = features.index('coinbase_ask_notional_0')

    buy_trade_index = features.index('coinbase_buys')
    sell_trade_index = features.index('coinbase_sells')

    target_pnl = 0.03  # 3.0% gain per episode (i.e., day)

    def __init__(self,
                 *,
                 fitting_file='LTC-USD_2019-04-07.csv.xz',
                 testing_file='LTC-USD_2019-04-08.csv.xz',
                 step_size=1,
                 max_position=5,
                 window_size=10,
                 seed=1,
                 action_repeats=10,
                 training=True,
                 format_3d=False,
                 z_score=True):

        # properties required for instantiation
        self.action_repeats = action_repeats
        self._seed = seed
        self._random_state = np.random.RandomState(seed=self._seed)
        self.training = training
        self.step_size = step_size
        self.max_position = max_position
        self.window_size = window_size
        self.format_3d = format_3d  # e.g., [window, features, *NEW_AXIS*]

        self.action = 0
        # derive gym.env properties
        self.actions = np.eye(17, dtype=np.float32)

        self.sym = testing_file[:7]  # slice the CCY from the filename

        # properties that get reset()
        self.reward = 0.0
        self.done = False
        self.local_step_number = 0
        self.midpoint = 0.0
        self.observation = None

        # get Broker class to keep track of PnL and orders
        self.broker = Broker(max_position=max_position)
        # get historical data for simulations
        self.sim = Sim(use_arctic=False, z_score=z_score)

        self.prices_, self.data, self.normalized_data = self.sim.load_environment_data(
            fitting_file, testing_file)

        self.max_steps = self.data.shape[0] - self.step_size * \
            self.action_repeats - 1

        # load indicators into the indicator manager
        self.tns = IndicatorManager()
        self.rsi = IndicatorManager()
        for window in INDICATOR_WINDOW:
            self.tns.add(('tns_{}'.format(window), TnS(window=window)))
            self.rsi.add(('rsi_{}'.format(window), RSI(window=window)))

        # rendering class
        self._render = TradingGraph(sym=self.sym)

        # graph midpoint prices
        self._render.reset_render_data(
            y_vec=self.prices_[:np.shape(self._render.x_vec)[0]])

        # buffer for appending lags
        self.data_buffer = list()

        self.action_space = spaces.Discrete(len(self.actions))
        self.reset()  # reset to load observation.shape
        self.observation_space = spaces.Box(low=-10,
                                            high=10,
                                            shape=self.observation.shape,
                                            dtype=np.float32)

        print(
            '{} MarketMaker #{} instantiated\nself.observation_space.shape: {}'
            .format(self.sym, self._seed, self.observation_space.shape))

    def __str__(self):
        return '{} | {}-{}'.format(MarketMaker.id, self.sym, self._seed)

    def step(self, action: int):
        for current_step in range(self.action_repeats):

            if self.done:
                self.reset()
                return self.observation, self.reward, self.done

            # reset the reward if there ARE action repeats
            if current_step == 0:
                self.reward = 0.
                step_action = action
            else:
                step_action = 0

            # Get current step's midpoint
            self.midpoint = self.prices_[self.local_step_number]
            # Pass current time step midpoint to broker to calculate PnL,
            # or if any open orders are to be filled
            step_best_bid, step_best_ask = self._get_nbbo()
            buy_volume = self._get_book_data(MarketMaker.buy_trade_index)
            sell_volume = self._get_book_data(MarketMaker.sell_trade_index)

            self.tns.step(buys=buy_volume, sells=sell_volume)
            self.rsi.step(price=self.midpoint)

            step_reward = self.broker.step(bid_price=step_best_bid,
                                           ask_price=step_best_ask,
                                           buy_volume=buy_volume,
                                           sell_volume=sell_volume,
                                           step=self.local_step_number)

            self.reward += self._send_to_broker_and_get_reward(
                action=step_action)
            self.reward += step_reward

            step_observation = self._get_step_observation(action=action)
            self.data_buffer.append(step_observation)

            if len(self.data_buffer) > self.window_size:
                del self.data_buffer[0]

            self.local_step_number += self.step_size

        self.observation = self._get_observation()

        if self.local_step_number > self.max_steps:
            self.done = True
            self.reward += self.broker.flatten_inventory(*self._get_nbbo())

        return self.observation, self.reward, self.done, {}

    def reset(self):
        if self.training:
            self.local_step_number = self._random_state.randint(
                low=1, high=self.data.shape[0] // 4)
        else:
            self.local_step_number = 0

        msg = ' {}-{} reset. Episode pnl: {:.4f} with {} trades | First step: {}'.format(
            self.sym, self._seed,
            self.broker.get_total_pnl(midpoint=self.midpoint),
            self.broker.get_total_trade_count(), self.local_step_number)
        logger.info(msg)

        self.reward = 0.0
        self.done = False
        self.broker.reset()
        self.data_buffer.clear()
        self.rsi.reset()
        self.tns.reset()

        for step in range(self.window_size + INDICATOR_WINDOW_MAX):
            self.midpoint = self.prices_[self.local_step_number]

            step_buy_volume = self._get_book_data(MarketMaker.buy_trade_index)
            step_sell_volume = self._get_book_data(
                MarketMaker.sell_trade_index)
            self.tns.step(buys=step_buy_volume, sells=step_sell_volume)
            self.rsi.step(price=self.midpoint)

            step_observation = self._get_step_observation(action=0)
            self.data_buffer.append(step_observation)

            self.local_step_number += self.step_size
            if len(self.data_buffer) > self.window_size:
                del self.data_buffer[0]

        self.observation = self._get_observation()

        return self.observation

    def render(self, mode='human'):
        self._render.render(midpoint=self.midpoint, mode=mode)

    def close(self):
        logger.info('{}-{} is being closed.'.format(self.id, self.sym))
        self.data = None
        self.normalized_data = None
        self.prices_ = None
        self.broker = None
        self.sim = None
        self.data_buffer = None
        self.tns = None
        self.rsi = None
        return

    def seed(self, seed=1):
        self._random_state = np.random.RandomState(seed=seed)
        self._seed = seed
        logger.info('Setting seed in MarketMaker.seed({})'.format(seed))
        return [seed]

    @staticmethod
    def _process_data(_next_state):
        """
        Reshape observation and clip outliers (values +/- 10)
        :param _next_state: observation space
        :return: (np.array) clipped observation space
        """
        return np.clip(_next_state.reshape((1, -1)), -10., 10.)

    def _send_to_broker_and_get_reward(self, action: int):
        """
        Create or adjust orders per a specified action and adjust for penalties.
        :param action: (int) current step's action
        :return: (float) reward
        """
        reward = 0.0
        discouragement = 0.000000000001

        if action == 0:  # do nothing
            reward += discouragement

        elif action == 1:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=0,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='short')

        elif action == 2:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=0,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='short')
        elif action == 3:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=0,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='short')

        elif action == 4:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=0,
                                                  side='short')

        elif action == 5:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='short')

        elif action == 6:

            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='short')
        elif action == 7:

            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='short')

        elif action == 8:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=0,
                                                  side='short')

        elif action == 9:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='short')

        elif action == 10:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='short')

        elif action == 11:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='short')

        elif action == 12:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=0,
                                                  side='short')

        elif action == 13:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=4,
                                                  side='short')

        elif action == 14:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=9,
                                                  side='short')

        elif action == 15:
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='long')
            reward += self._create_order_at_level(reward,
                                                  discouragement,
                                                  level=14,
                                                  side='short')
        elif action == 16:
            reward += self.broker.flatten_inventory(*self._get_nbbo())
        else:
            logger.info("L'action n'exist pas ! Il faut faire attention !")

        return reward

    def _create_position_features(self):
        """
        Create an array with features related to the agent's inventory
        :return: (np.array) normalized position features
        """
        return np.array(
            (self.broker.long_inventory.position_count / self.max_position,
             self.broker.short_inventory.position_count / self.max_position,
             self.broker.get_total_pnl(midpoint=self.midpoint) /
             MarketMaker.target_pnl,
             self.broker.long_inventory.get_unrealized_pnl(self.midpoint) /
             self.broker.reward_scale,
             self.broker.short_inventory.get_unrealized_pnl(self.midpoint) /
             self.broker.reward_scale,
             self.broker.get_long_order_distance_to_midpoint(
                 midpoint=self.midpoint),
             self.broker.get_short_order_distance_to_midpoint(
                 midpoint=self.midpoint),
             *self.broker.get_queues_ahead_features()),
            dtype=np.float32)

    def _create_action_features(self, action):
        """
        Create a features array for the current time step's action.
        :param action: (int) action number
        :return: (np.array) One-hot of current action
        """
        return self.actions[action]

    def _create_indicator_features(self):
        """
        Create features vector with environment indicators.
        :return: (np.array) Indicator values for current time step
        """
        return np.array((*self.tns.get_value(), *self.rsi.get_value()),
                        dtype=np.float32)

    def _create_order_at_level(self,
                               reward: float,
                               discouragement: float,
                               level=0,
                               side='long'):
        """
        Create a new order at a specified LOB level
        :param reward: (float) current step reward
        :param discouragement: (float) penalty deducted from reward for erroneous actions
        :param level: (int) level in the limit order book
        :param side: (str) direction of trade e.g., 'long' or 'short'
        :return: (float) reward with penalties added
        """
        adjustment = 1 if level > 0 else 0

        if side == 'long':
            best = self._get_book_data(MarketMaker.best_bid_index - level)
            denormalized_best = round(self.midpoint * (best + 1), 2)
            inside_best = self._get_book_data(MarketMaker.best_bid_index -
                                              level + adjustment)
            denormalized_inside_best = round(self.midpoint * (inside_best + 1),
                                             2)
            plus_one = denormalized_best + 0.01

            if denormalized_inside_best == plus_one:
                # stick to best bid
                bid_price = denormalized_best
                # since LOB is rendered as cumulative notional, deduct the prior price
                # level to derive the notional value of orders ahead in the queue
                bid_queue_ahead = self._get_book_data(
                    MarketMaker.notional_bid_index -
                    level) - self._get_book_data(
                        MarketMaker.notional_bid_index - level + adjustment)
            else:
                # insert a cent ahead to jump a queue
                bid_price = plus_one
                bid_queue_ahead = 0.

            bid_order = Order(ccy=self.sym,
                              side='long',
                              price=bid_price,
                              step=self.local_step_number,
                              queue_ahead=bid_queue_ahead)

            if self.broker.add(order=bid_order) is False:
                reward -= discouragement
            else:
                reward += discouragement

        if side == 'short':
            best = self._get_book_data(MarketMaker.best_ask_index + level)
            denormalized_best = round(self.midpoint * (best + 1), 2)
            inside_best = self._get_book_data(MarketMaker.best_ask_index +
                                              level - adjustment)
            denormalized_inside_best = round(self.midpoint * (inside_best + 1),
                                             2)
            plus_one = denormalized_best + 0.01

            if denormalized_inside_best == plus_one:
                ask_price = denormalized_best
                # since LOB is rendered as cumulative notional, deduct the prior price
                # level to derive the notional value of orders ahead in the queue
                ask_queue_ahead = self._get_book_data(
                    MarketMaker.notional_ask_index +
                    level) - self._get_book_data(
                        MarketMaker.notional_ask_index + level - adjustment)
            else:
                ask_price = plus_one
                ask_queue_ahead = 0.

            ask_order = Order(ccy=self.sym,
                              side='short',
                              price=ask_price,
                              step=self.local_step_number,
                              queue_ahead=ask_queue_ahead)

            if self.broker.add(order=ask_order) is False:
                reward -= discouragement
            else:
                reward += discouragement

        return reward

    def _get_nbbo(self):
        """
        Get best bid and offer
        :return: (tuple) best bid and offer
        """
        best_bid = round(
            self.midpoint - self._get_book_data(MarketMaker.best_bid_index), 2)
        best_ask = round(
            self.midpoint + self._get_book_data(MarketMaker.best_ask_index), 2)
        return best_bid, best_ask

    def _get_book_data(self, index=0):
        """
        Return step 'n' of order book snapshot data
        :param index: (int) step 'n' to look up in order book snapshot history
        :return: (np.array) order book snapshot vector
        """
        return self.data[self.local_step_number][index]

    def _get_step_observation(self, action=0):
        """
        Current step observation, NOT including historical data.
        :param action: (int) current step action
        :return: (np.array) Current step observation
        """
        step_position_features = self._create_position_features()
        step_action_features = self._create_action_features(action=action)
        step_indicator_features = self._create_indicator_features()
        return np.concatenate(
            (self._process_data(self.normalized_data[self.local_step_number]),
             step_indicator_features, step_position_features,
             step_action_features, np.array([self.reward])),
            axis=None)

    def _get_observation(self):
        """
        Current step observation, including historical data.

        If format_3d is TRUE: Expand the observation space from 2 to 3 dimensions.
        (note: This is necessary for conv nets in Baselines.)
        :return: (np.array) Observation state for current time step
        """
        observation = np.array(self.data_buffer, dtype=np.float32)
        if self.format_3d:
            observation = np.expand_dims(observation, axis=-1)
        return observation
コード例 #14
0
ファイル: market_maker.py プロジェクト: femtotrader/crypto-rl
class MarketMaker(Env):

    metadata = {'render.modes': ['human']}
    id = 'market-maker-v0'
    action_repeats = 4
    bid_price_features = ['coinbase-bid-distance-0', 'coinbase-bid-distance-1',
                          'coinbase-bid-distance-2', 'coinbase-bid-distance-3',
                          'coinbase-bid-distance-4', 'coinbase-bid-distance-5',
                          'coinbase-bid-distance-6', 'coinbase-bid-distance-7',
                          'coinbase-bid-distance-8', 'coinbase-bid-distance-9']
    ask_price_features = ['coinbase-ask-distance-0', 'coinbase-ask-distance-1',
                          'coinbase-ask-distance-2', 'coinbase-ask-distance-3',
                          'coinbase-ask-distance-4', 'coinbase-ask-distance-5',
                          'coinbase-ask-distance-6', 'coinbase-ask-distance-7',
                          'coinbase-ask-distance-8', 'coinbase-ask-distance-9']
    bid_notional_features = ['coinbase-bid-notional-0', 'coinbase-bid-notional-1',
                             'coinbase-bid-notional-2', 'coinbase-bid-notional-3',
                             'coinbase-bid-notional-4', 'coinbase-bid-notional-5',
                             'coinbase-bid-notional-6', 'coinbase-bid-notional-7',
                             'coinbase-bid-notional-8', 'coinbase-bid-notional-9']
    ask_notional_features = ['coinbase-ask-notional-0', 'coinbase-ask-notional-1',
                             'coinbase-ask-notional-2', 'coinbase-ask-notional-3',
                             'coinbase-ask-notional-4', 'coinbase-ask-notional-5',
                             'coinbase-ask-notional-6', 'coinbase-ask-notional-7',
                             'coinbase-ask-notional-8', 'coinbase-ask-notional-9']

    def __init__(self, training=True,
                 fitting_file='ETH-USD_2018-12-31.xz',
                 testing_file='ETH-USD_2019-01-01.xz',
                 step_size=1,
                 max_position=5,
                 window_size=50,
                 seed=1,
                 frame_stack=False):

        # properties required for instantiation
        self._random_state = np.random.RandomState(seed=seed)
        self._seed = seed
        self.training = training
        self.step_size = step_size
        self.fee = BROKER_FEE
        self.max_position = max_position
        self.window_size = window_size
        self.frame_stack = frame_stack
        self.frames_to_add = 3 if self.frame_stack else 0
        self.inventory_features = ['long_inventory', 'short_inventory',
                                   'long_unrealized_pnl', 'short_unrealized_pnl',
                                   'buy_distance_to_midpoint', 'short_distance_to_midpoint']

        self._action = 0
        # derive gym.env properties
        self.actions = np.eye(24)

        self.sym = testing_file[:7]  # slice the CCY from the filename

        # properties that get reset()
        self.reward = 0.0
        self.done = False
        self._local_step_number = 0
        self.midpoint = 0.0
        self.observation = None

        # get historical data for simulations
        self.broker = Broker(max_position=max_position)
        self.sim = Sim(use_arctic=False)

        # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True)
        self.features = self.sim.get_feature_labels(include_system_time=False,
                                                    include_bitfinex=False)

        fitting_data_filepath = '{}/data_exports/{}'.format(self.sim.cwd, fitting_file)
        data_used_in_environment = '{}/data_exports/{}'.format(self.sim.cwd, testing_file)
        # print('Fitting data: {}\nTesting Data: {}'.format(fitting_data_filepath,
        #                                                   data_used_in_environment))

        self.sim.fit_scaler(self.sim.import_csv(filename=fitting_data_filepath))
        self.data = self.sim.import_csv(filename=data_used_in_environment)
        self.prices = self.data['coinbase_midpoint'].values  # used to calculate PnL
        self.bid_prices = self.data[MarketMaker.bid_price_features].values  # used for LOB placement
        self.ask_prices = self.data[MarketMaker.ask_price_features].values  # used for LOB placement
        self.bid_notionals = self.data[MarketMaker.bid_notional_features].values  # used for LOB placement
        self.ask_notionals = self.data[MarketMaker.ask_notional_features].values  # used for LOB placement

        # self.data = self.data.apply(self.sim.z_score, axis=1)
        self.data_ = self.data.copy()  # used for rendering data
        self.data = self.data.values  # used for the observation space
        # self.data = None

        self.data_buffer, self.frame_stacker = list(), list()
        self.action_space = spaces.Discrete(len(self.actions))
        variable_features_count = len(self.inventory_features) + len(self.actions) + 1

        if self.frame_stack is False:
            shape = (len(self.features) + variable_features_count, self.window_size)
        else:
            shape = (len(self.features) + variable_features_count, self.window_size, 4)

        self.observation_space = spaces.Box(low=self.data.min(),
                                            high=self.data.max(),
                                            shape=shape,
                                            dtype=np.int)

        # attributes for rendering
        self.line1 = []
        self.screen_size = 200
        self.y_vec = None
        self.x_vec = None
        self._reset_render_data()

        self.reset()
        # print('MarketMaker instantiated. ' +
        #       '\nself.observation_space.shape : {}'.format(
        #           self.observation_space.shape))

    def __str__(self):
        return '{} | {}-{}'.format(MarketMaker.id, self.sym, self.seed)

    def _reset_render_data(self):
        self.x_vec = np.linspace(0, self.screen_size * 10, self.screen_size + 1)[0:-1]
        self.y_vec = np.array(self.prices[:np.shape(self.x_vec)[0]])
        self.line1 = []

    @property
    def step_number(self):
        return self._local_step_number

    def step(self, action_):

        for current_step in range(MarketMaker.action_repeats):

            if self.done:
                self.reset()
                return self.observation, self.reward, self.done

            # reset the reward if there are action repeats
            if current_step == 0:
                self.reward = 0.
                action = action_
            else:
                action = 0

            # Get current step's midpoint to calculate PnL, or if
            # an open order got filled.
            self.midpoint = self.prices[self._local_step_number]
            _step_reward = self.broker.step(
                bid_price=self.midpoint - self.bid_prices[self._local_step_number][0],
                ask_price=self.midpoint + self.ask_prices[self._local_step_number][0],
                buy_volume=self.data[self._local_step_number][-2],
                sell_volume=self.data[self._local_step_number][-1],
                step=self._local_step_number
            )

            self.reward += self._send_to_broker_and_get_reward(action) + _step_reward

            position_features = self._create_position_features()
            action_features = self._create_action_features(action=action)

            _observation = np.concatenate((self.process_data(self.data[self._local_step_number]),
                                           position_features,
                                           action_features,
                                           np.array([self.reward])),
                                          axis=None)
            self.data_buffer.append(_observation)

            if len(self.data_buffer) >= self.window_size:
                self.frame_stacker.append(np.array(self.data_buffer, dtype=np.float32))
                del self.data_buffer[0]

                if len(self.frame_stacker) > self.frames_to_add + 1:
                    del self.frame_stacker[0]

            self._local_step_number += self.step_size

        # output shape is [n_features, window_size, frames_to_add] e.g., [40, 100, 1]
        self.observation = np.array(self.frame_stacker, dtype=np.float32).transpose()

        # This removes a dimension to be compatible with the Keras-rl module
        # because Keras-rl uses its own frame-stacker. There are future plans to integrate
        # this repository with more reinforcement learning packages, such as baselines.
        if self.frame_stack is False:
            self.observation = self.observation.reshape(self.observation.shape[0], -1)

        if self._local_step_number > self.data.shape[0] - 8:
            self.done = True
            best_bid = round(self.midpoint + self.bid_prices[self._local_step_number][0], 2)
            best_ask = round(self.midpoint + self.ask_prices[self._local_step_number][0], 2)
            self.reward += self.broker.flatten_inventory(bid_price=best_bid, ask_price=best_ask)

        return self.observation, self.reward, self.done, {}

    def reset(self):
        if self.training:
            self._local_step_number = self._random_state.randint(low=1, high=5000)
        else:
            self._local_step_number = 0

        logger.info(' {}-{} reset. Episode pnl: {} | First step: {}, max_pos: {}'.format(
            self.sym, self._seed,
            self.broker.get_total_pnl(midpoint=self.midpoint),
            self._local_step_number, self.max_position))
        self.reward = 0.0
        self.done = False
        self.broker.reset()
        self.data_buffer.clear()
        self.frame_stacker.clear()

        self._reset_render_data()

        for step in range(self.window_size + self.frames_to_add):

            position_features = self._create_position_features()
            action_features = self._create_action_features(action=0)

            _observation = np.concatenate((self.process_data(self.data[self._local_step_number]),
                                           position_features,
                                           action_features,
                                           np.array([self.reward])),
                                          axis=None)
            self.data_buffer.append(_observation)
            self._local_step_number += self.step_size

            if step >= self.window_size - 1:
                self.frame_stacker.append(np.array(self.data_buffer, dtype=np.float32))
                del self.data_buffer[0]

                if len(self.frame_stacker) > self.frames_to_add + 1:
                    del self.frame_stacker[0]

        # output shape is [n_features, window_size, frames_to_add] eg [40, 100, 1]
        self.observation = np.array(self.frame_stacker, dtype=np.float32).transpose()

        # This removes a dimension to be compatible with the Keras-rl module
        # because Keras-rl uses its own frame-stacker. There are future plans to integrate
        # this repository with more reinforcement learning packages, such as baselines.
        if self.frame_stack is False:
            self.observation = self.observation.reshape(self.observation.shape[0], -1)

        return self.observation

    def render(self, mode='human'):
        if mode == 'human':
            self.line1 = _live_plotter(self.x_vec,
                                       self.y_vec,
                                       self.line1,
                                       identifier=self.sym)
            self.y_vec = np.append(self.y_vec[1:], self.midpoint)

    def close(self):
        logger.info('{}-{} is being closed.'.format(self.id, self.sym))
        self.data = None
        self.broker = None
        self.sim = None
        self.data_buffer = None
        plt.close()
        return

    def seed(self, seed=1):
        self._random_state = np.random.RandomState(seed=seed)
        self._seed = seed
        return [seed]

    # @staticmethod
    # def process_data(_next_state):
    #     return np.clip(_next_state.reshape((1, -1)), -10., 10.)

    def process_data(self, _next_state):
        # return self.sim.scale_state(_next_state).values.reshape((1, -1))
        return np.reshape(_next_state, (1, -1))

    def _send_to_broker_and_get_reward(self, action):
        reward = 0.0
        discouragement = 0.000000000001

        if action == 0:  # do nothing
            reward += discouragement

        elif action == 1:  # set bid to inside spread or [ask_price - 0.01]
            best_bid = self.bid_prices[self._local_step_number][0]
            best_ask = self.ask_prices[self._local_step_number][0]
            price = round(max(self.midpoint - best_bid, self.midpoint + best_ask - 0.01), 2)
            order = Order(ccy=self.sym, side='long', price=price, step=self._local_step_number)
            if self.broker.add(order=order) is False:
                reward -= discouragement

        elif action == 2:  # set bid to best_bid - row 0
            reward = self._create_bid_order_at_level(reward, discouragement, 0)

        elif action == 3:  # set bid to best_bid - row 1
            reward = self._create_bid_order_at_level(reward, discouragement, 1)

        elif action == 4:  # set bid to best_bid - row 2
            reward = self._create_bid_order_at_level(reward, discouragement, 2)

        elif action == 5:  # set bid to best_bid - row 3
            reward = self._create_bid_order_at_level(reward, discouragement, 3)

        if action == 6:  # set bid to best_bid - row 4
            reward = self._create_bid_order_at_level(reward, discouragement, 4)

        elif action == 7:  # set bid to best_bid - row 5
            reward = self._create_bid_order_at_level(reward, discouragement, 5)

        elif action == 8:  # set bid to best_bid - row 6
            reward = self._create_bid_order_at_level(reward, discouragement, 6)

        elif action == 9:  # set bid to best_bid - row 7
            reward = self._create_bid_order_at_level(reward, discouragement, 7)

        if action == 10:  # set bid to best_bid - row 8
            reward = self._create_bid_order_at_level(reward, discouragement, 8)

        elif action == 11:  # set bid to best_bid - row 9
            reward = self._create_bid_order_at_level(reward, discouragement, 9)

        elif action == 12:  # set ask to inside spread or [bid_price + 0.01]
            best_bid = self.bid_prices[self._local_step_number][0]
            best_ask = self.ask_prices[self._local_step_number][0]
            price = round(min(best_ask + self.midpoint, self.midpoint - best_bid + 0.01), 2)
            order = Order(ccy=self.sym, side='long', price=price, step=self._local_step_number)
            if self.broker.add(order=order) is False:
                reward -= discouragement

        if action == 13:  # set ask to best_bid - row 0
            reward = self._create_ask_order_at_level(reward, discouragement, 0)

        elif action == 14:  # set ask to best_bid - row 1
            reward = self._create_ask_order_at_level(reward, discouragement, 1)

        elif action == 15:  # set ask to best_bid - row 2
            reward = self._create_ask_order_at_level(reward, discouragement, 2)

        if action == 16:  # set ask to best_bid - row 3
            reward = self._create_ask_order_at_level(reward, discouragement, 3)

        elif action == 17:  # set ask to best_bid - row 4
            reward = self._create_ask_order_at_level(reward, discouragement, 4)

        elif action == 18:  # set ask to best_bid - row 5
            reward = self._create_ask_order_at_level(reward, discouragement, 5)

        if action == 19:  # set ask to best_bid - row 6
            reward = self._create_ask_order_at_level(reward, discouragement, 6)

        elif action == 20:  # set ask to best_bid - row 7
            reward = self._create_ask_order_at_level(reward, discouragement, 7)

        elif action == 21:  # set ask to best_bid - row 8
            reward = self._create_ask_order_at_level(reward, discouragement, 8)

        elif action == 22:  # set ask to best_bid - row 9
            reward = self._create_ask_order_at_level(reward, discouragement, 9)

        if action == 23:  # flatten all positions
            best_bid = round(self.midpoint + self.bid_prices[self._local_step_number][0], 2)
            best_ask = round(self.midpoint + self.ask_prices[self._local_step_number][0], 2)
            reward += self.broker.flatten_inventory(bid_price=best_bid, ask_price=best_ask)

        elif action == 24:  #
            logger.info("L'action n.25 n'exist pas ! Il faut faire attention !")
            pass

        return reward

    def _create_position_features(self):
        return np.array((self.broker.long_inventory.position_count / self.max_position,
                         self.broker.short_inventory.position_count / self.max_position,
                         self.broker.long_inventory.get_unrealized_pnl(self.midpoint),
                         self.broker.short_inventory.get_unrealized_pnl(self.midpoint),
                         self.broker.get_long_order_distance_to_midpoint(midpoint=self.midpoint),
                         self.broker.get_short_order_distance_to_midpoint(midpoint=self.midpoint)))

    def _create_action_features(self, action):
        return self.actions[action]

    def _create_bid_order_at_level(self, reward, discouragement, level=0):
        if level > 0:
            above_best_bid = self.bid_prices[self._local_step_number][level-1]
            best_bid = self.bid_prices[self._local_step_number][level]

            if round(above_best_bid, 2) == round(best_bid + 0.01, 2):
                price = round(self.midpoint - best_bid, 2)
                queue_ahead = self.bid_notionals[self._local_step_number][level]
            else:
                price = round(self.midpoint - best_bid + 0.01, 2)
                queue_ahead = 0.

            order = Order(ccy=self.sym, side='long', price=price,
                          step=self._local_step_number, queue_ahead=queue_ahead)
            if self.broker.add(order=order) is False:
                reward -= discouragement
        else:
            best_bid = self.bid_prices[self._local_step_number][level]
            price = round(self.midpoint - best_bid, 2)
            queue_ahead = self.bid_notionals[self._local_step_number][level]
            order = Order(ccy=self.sym, side='long', price=price,
                          step=self._local_step_number, queue_ahead=queue_ahead)
            if self.broker.add(order=order) is False:
                reward -= discouragement
        return reward

    def _create_ask_order_at_level(self, reward, discouragement, level=0):
        if level > 0:
            above_best_ask = self.ask_prices[self._local_step_number][level - 1]
            best_ask = self.ask_prices[self._local_step_number][level]

            if round(above_best_ask, 2) == round(best_ask - 0.01, 2):
                price = round(best_ask + self.midpoint, 2)
                queue_ahead = self.ask_notionals[self._local_step_number][level]
            else:
                price = round(best_ask + 0.01 + self.midpoint, 2)
                queue_ahead = 0.

            order = Order(ccy=self.sym, side='short', price=price,
                          step=self._local_step_number, queue_ahead=queue_ahead)
            if self.broker.add(order=order) is False:
                reward -= discouragement
        else:
            best_ask = self.ask_prices[self._local_step_number][level]
            price = round(best_ask + self.midpoint, 2)
            queue_ahead = self.ask_notionals[self._local_step_number][level]
            order = Order(ccy=self.sym, side='short', price=price,
                          step=self._local_step_number, queue_ahead=queue_ahead)
            if self.broker.add(order=order) is False:
                reward -= discouragement
        return reward
コード例 #15
0
    def __init__(self,
                 fitting_file='LTC-USD_2019-04-07.csv.xz',
                 testing_file='LTC-USD_2019-04-08.csv.xz',
                 step_size=1,
                 max_position=5,
                 window_size=10,
                 seed=1,
                 action_repeats=10,
                 training=True,
                 format_3d=False,
                 z_score=True,
                 reward_type='trade_completion',
                 scale_rewards=True):
        """
        Base class for creating environments extending OpenAI's GYM framework.

        :param fitting_file: historical data used to fit environment data (i.e.,
            previous trading day)
        :param testing_file: historical data used in environment
        :param step_size: increment size for steps (NOTE: leave a 1, otherwise market
            transaction data will be overlooked)
        :param max_position: maximum number of positions able to hold in inventory
        :param window_size: number of lags to include in observation space
        :param seed: random seed number
        :param action_repeats: number of steps to take in environment after a given action
        :param training: if TRUE, then randomize starting point in environment
        :param format_3d: if TRUE, reshape observation space from matrix to tensor
        :param z_score: if TRUE, normalize data set with Z-Score, otherwise use Min-Max
            (i.e., range of 0 to 1)
        :param reward_type: method for calculating the environment's reward:
            1) 'trade_completion' --> reward is generated per trade's round trip
            2) 'continuous_total_pnl' --> change in realized & unrealized pnl between
                                            time steps
            3) 'continuous_realized_pnl' --> change in realized pnl between time steps
            4) 'continuous_unrealized_pnl' --> change in unrealized pnl between time steps
        """
        # properties required for instantiation
        self.action_repeats = action_repeats
        self._seed = seed
        self._random_state = np.random.RandomState(seed=self._seed)
        self.training = training
        self.step_size = step_size
        self.max_position = max_position
        self.window_size = window_size
        self.reward_type = reward_type
        self.format_3d = format_3d  # e.g., [window, features, *NEW_AXIS*]
        self.sym = testing_file[:7]  # slice the CCY from the filename
        self.scale_rewards = scale_rewards

        # properties that get reset()
        self.reward = 0.0
        self.done = False
        self.local_step_number = 0
        self.midpoint = 0.0
        self.observation = None
        self.action = 0
        self.last_pnl = 0.

        # properties to override in sub-classes
        self.actions = None
        self.broker = None
        self.action_space = None
        self.observation_space = None

        # get historical data for simulations
        self.sim = Sim(use_arctic=False, z_score=z_score)

        self.prices_, self.data, self.normalized_data = self.sim.load_environment_data(
            fitting_file, testing_file)
        self.best_bid = self.best_ask = None

        self.max_steps = self.data.shape[
            0] - self.step_size * self.action_repeats - 1

        # load indicators into the indicator manager
        self.tns = IndicatorManager()
        self.rsi = IndicatorManager()
        for window in INDICATOR_WINDOW:
            self.tns.add(('tns_{}'.format(window), TnS(window=window)))
            self.rsi.add(('rsi_{}'.format(window), RSI(window=window)))

        # rendering class
        self._render = TradingGraph(sym=self.sym)

        # graph midpoint prices
        self._render.reset_render_data(
            y_vec=self.prices_[:np.shape(self._render.x_vec)[0]])

        # buffer for appending lags
        self.data_buffer = list()
コード例 #16
0
ファイル: price_jump.py プロジェクト: blackivory/crypto-rl
class PriceJump(Env):

    metadata = {'render.modes': ['human']}
    id = 'long-short-v0'
    # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True)
    features = Sim.get_feature_labels(include_system_time=False,
                                      include_bitfinex=False)
    best_bid_index = features.index('coinbase-bid-distance-0')
    best_ask_index = features.index('coinbase-ask-distance-0')
    notional_bid_index = features.index('coinbase-bid-notional-0')
    notional_ask_index = features.index('coinbase-ask-notional-0')

    buy_trade_index = features.index('coinbase-buys')
    sell_trade_index = features.index('coinbase-sells')

    target_pnl = BROKER_FEE * 10 * 5  # e.g., 5 for max_positions
    fee = BROKER_FEE

    def __init__(self,
                 *,
                 fitting_file='ETH-USD_2018-12-31.xz',
                 testing_file='ETH-USD_2019-01-01.xz',
                 step_size=1,
                 max_position=5,
                 window_size=10,
                 seed=1,
                 action_repeats=10,
                 training=True,
                 format_3d=False,
                 z_score=True):

        # properties required for instantiation
        self.action_repeats = action_repeats
        self._seed = seed
        self._random_state = np.random.RandomState(seed=self._seed)
        self.training = training
        self.step_size = step_size
        self.max_position = max_position
        self.window_size = window_size
        self.format_3d = format_3d  # e.g., [window, features, *NEW_AXIS*]

        self.action = 0
        # derive gym.env properties
        self.actions = np.eye(3)

        self.sym = testing_file[:7]  # slice the CCY from the filename

        # properties that get reset()
        self.reward = 0.0
        self.done = False
        self.local_step_number = 0
        self.midpoint = 0.0
        self.observation = None

        # get Broker class to keep track of PnL and orders
        self.broker = Broker(max_position=max_position)
        # get historical data for simulations
        self.sim = Sim(use_arctic=False)

        self.data = self._load_environment_data(fitting_file, testing_file)
        self.prices_ = self.data[
            'coinbase_midpoint'].values  # used to calculate PnL

        self.normalized_data = self.data.copy()
        self.data = self.data.values

        self.max_steps = self.data.shape[0] - self.step_size * \
                         self.action_repeats - 1

        # normalize midpoint data
        self.normalized_data['coinbase_midpoint'] = \
            np.log(self.normalized_data['coinbase_midpoint'].values)
        self.normalized_data['coinbase_midpoint'] = (
            self.normalized_data['coinbase_midpoint'] -
            self.normalized_data['coinbase_midpoint'].shift(1)).fillna(0.)

        # load indicators into the indicator manager
        self.tns = IndicatorManager()
        self.rsi = IndicatorManager()
        for window in INDICATOR_WINDOW:
            self.tns.add(('tns_{}'.format(window), TnS(window=window)))
            self.rsi.add(('rsi_{}'.format(window), RSI(window=window)))

        if z_score:
            logger.info("Pre-scaling {}-{} data...".format(
                self.sym, self._seed))
            self.normalized_data = self.normalized_data.apply(self.sim.z_score,
                                                              axis=1).values
            logger.info("...{}-{} pre-scaling complete.".format(
                self.sym, self._seed))
        else:
            self.normalized_data = self.normalized_data.values

        # rendering class
        self._render = TradingGraph(sym=self.sym)
        # graph midpoint prices
        self._render.reset_render_data(
            y_vec=self.prices_[:np.shape(self._render.x_vec)[0]])
        # buffer for appending lags
        self.data_buffer = list()

        self.action_space = spaces.Discrete(len(self.actions))
        self.reset()  # reset to load observation.shape
        self.observation_space = spaces.Box(low=-10,
                                            high=10,
                                            shape=self.observation.shape,
                                            dtype=np.float32)

        print(
            '{} PriceJump #{} instantiated.\nself.observation_space.shape : {}'
            .format(self.sym, self._seed, self.observation_space.shape))

    def __str__(self):
        return '{} | {}-{}'.format(PriceJump.id, self.sym, self._seed)

    def step(self, action: int):
        for current_step in range(self.action_repeats):

            if self.done:
                self.reset()
                return self.observation, self.reward, self.done

            # reset the reward if there ARE action repeats
            if current_step == 0:
                self.reward = 0.
                step_action = action
            else:
                step_action = 0

            # Get current step's midpoint
            self.midpoint = self.prices_[self.local_step_number]
            # Pass current time step midpoint to broker to calculate PnL,
            # or if any open orders are to be filled
            buy_volume = self._get_book_data(PriceJump.buy_trade_index)
            sell_volume = self._get_book_data(PriceJump.sell_trade_index)

            self.tns.step(buys=buy_volume, sells=sell_volume)
            self.rsi.step(price=self.midpoint)

            self.broker.step(midpoint=self.midpoint)

            self.reward += self._send_to_broker_and_get_reward(
                action=step_action)

            step_observation = self._get_step_observation(action=action)
            self.data_buffer.append(step_observation)

            if len(self.data_buffer) > self.window_size:
                del self.data_buffer[0]

            self.local_step_number += self.step_size

        self.observation = self._get_observation()

        if self.local_step_number > self.max_steps:
            self.done = True
            order = Order(ccy=self.sym,
                          side=None,
                          price=self.midpoint,
                          step=self.local_step_number)
            self.reward = self.broker.flatten_inventory(order=order)

        return self.observation, self.reward, self.done, {}

    def reset(self):
        if self.training:
            self.local_step_number = self._random_state.randint(
                low=1, high=self.data.shape[0] // 4)
        else:
            self.local_step_number = 0

        msg = ' {}-{} reset. Episode pnl: {:.4f} with {} trades | First step: {}'.format(
            self.sym, self._seed,
            self.broker.get_total_pnl(midpoint=self.midpoint),
            self.broker.get_total_trade_count(), self.local_step_number)
        logger.info(msg)

        self.reward = 0.0
        self.done = False
        self.broker.reset()
        self.data_buffer.clear()
        self.rsi.reset()
        self.tns.reset()

        for step in range(self.window_size + INDICATOR_WINDOW_MAX):
            self.midpoint = self.prices_[self.local_step_number]

            step_buy_volume = self._get_book_data(PriceJump.buy_trade_index)
            step_sell_volume = self._get_book_data(PriceJump.sell_trade_index)
            self.tns.step(buys=step_buy_volume, sells=step_sell_volume)
            self.rsi.step(price=self.midpoint)

            step_observation = self._get_step_observation(action=0)
            self.data_buffer.append(step_observation)

            self.local_step_number += self.step_size
            if len(self.data_buffer) > self.window_size:
                del self.data_buffer[0]

        self.observation = self._get_observation()

        return self.observation

    def render(self, mode='human'):
        self._render.render(midpoint=self.midpoint, mode=mode)

    def close(self):
        logger.info('{}-{} is being closed.'.format(self.id, self.sym))
        self.data = None
        self.normalized_data = None
        self.prices_ = None
        self.broker = None
        self.sim = None
        self.data_buffer = None
        self.tns = None
        self.rsi = None
        return

    def seed(self, seed=1):
        self._random_state = np.random.RandomState(seed=seed)
        self._seed = seed
        logger.info('Setting seed in PriceJump.seed({})'.format(seed))
        return [seed]

    @staticmethod
    def _process_data(_next_state):
        return np.clip(_next_state.reshape((1, -1)), -10., 10.)

    # def _process_data(self, _next_state):
    #     # return self.sim.scale_state(_next_state).values.reshape((1, -1))
    #     return np.reshape(_next_state, (1, -1))

    def _send_to_broker_and_get_reward(self, action):
        reward = 0.0
        discouragement = 0.000000000001

        if action == 0:  # do nothing
            reward += discouragement

        elif action == 1:  # buy
            price_fee_adjusted = self.midpoint + (PriceJump.fee *
                                                  self.midpoint)
            if self.broker.short_inventory_count > 0:
                order = Order(ccy=self.sym,
                              side='short',
                              price=price_fee_adjusted,
                              step=self.local_step_number)
                self.broker.remove(order=order)
                reward += self.broker.get_reward(side=order.side)

            elif self.broker.long_inventory_count >= 0:
                order = Order(ccy=self.sym,
                              side='long',
                              price=price_fee_adjusted,
                              step=self.local_step_number)
                if self.broker.add(order=order) is False:
                    reward -= discouragement

            else:
                logger.info(
                    ('gym_trading.get_reward() ' + 'Error for action #{} - ' +
                     'unable to place an order with broker').format(action))

        elif action == 2:  # sell
            price_fee_adjusted = self.midpoint - (PriceJump.fee *
                                                  self.midpoint)
            if self.broker.long_inventory_count > 0:
                order = Order(ccy=self.sym,
                              side='long',
                              price=price_fee_adjusted,
                              step=self.local_step_number)
                self.broker.remove(order=order)
                reward += self.broker.get_reward(side=order.side)
            elif self.broker.short_inventory_count >= 0:
                order = Order(ccy=self.sym,
                              side='short',
                              price=price_fee_adjusted,
                              step=self.local_step_number)
                if self.broker.add(order=order) is False:
                    reward -= discouragement

            else:
                logger.info(
                    'gym_trading.get_reward() ' + 'Error for action #{} - ' +
                    'unable to place an order with broker'.format(action))

        else:
            logger.info(
                ('Unknown action to take in get_reward(): ' +
                 'action={} | midpoint={}').format(action, self.midpoint))

        return reward

    def _create_position_features(self):
        return np.array(
            (self.broker.long_inventory.position_count / self.max_position,
             self.broker.short_inventory.position_count / self.max_position,
             self.broker.get_total_pnl(midpoint=self.midpoint) /
             PriceJump.target_pnl,
             self.broker.long_inventory.get_unrealized_pnl(self.midpoint) /
             self.broker.reward_scale,
             self.broker.short_inventory.get_unrealized_pnl(self.midpoint) /
             self.broker.reward_scale),
            dtype=np.float32)

    def _create_action_features(self, action):
        return self.actions[action]

    def _create_indicator_features(self):
        return np.array((*self.tns.get_value(), *self.rsi.get_value()),
                        dtype=np.float32)

    def _get_nbbo(self):
        best_bid = round(
            self.midpoint - self._get_book_data(PriceJump.best_bid_index), 2)
        best_ask = round(
            self.midpoint + self._get_book_data(PriceJump.best_ask_index), 2)
        return best_bid, best_ask

    def _get_book_data(self, index=0):
        return self.data[self.local_step_number][index]

    def _get_step_observation(self, action=0):
        step_position_features = self._create_position_features()
        step_action_features = self._create_action_features(action=action)
        step_indicator_features = self._create_indicator_features()
        return np.concatenate(
            (self._process_data(self.normalized_data[self.local_step_number]),
             step_indicator_features, step_position_features,
             step_action_features, np.array([self.reward])),
            axis=None)

    def _get_observation(self):
        observation = np.array(self.data_buffer, dtype=np.float32)
        # Expand the observation space from 2 to 3 dimensions.
        # This is necessary for conv nets in Baselines.
        if self.format_3d:
            observation = np.expand_dims(observation, axis=-1)
        return observation

    def _load_environment_data(self, fitting_file, testing_file):
        fitting_data_filepath = '{}/data_exports/{}'.format(
            self.sim.cwd, fitting_file)
        data_used_in_environment = '{}/data_exports/{}'.format(
            self.sim.cwd, testing_file)
        fitting_data = self.sim.import_csv(filename=fitting_data_filepath)
        fitting_data['coinbase_midpoint'] = np.log(
            fitting_data['coinbase_midpoint'].values)
        fitting_data['coinbase_midpoint'] = (
            fitting_data['coinbase_midpoint'] -
            fitting_data['coinbase_midpoint'].shift(1)).fillna(method='bfill')
        self.sim.fit_scaler(fitting_data)
        del fitting_data
        return self.sim.import_csv(filename=data_used_in_environment)
コード例 #17
0
ファイル: price_jump.py プロジェクト: blackivory/crypto-rl
    def __init__(self,
                 *,
                 fitting_file='ETH-USD_2018-12-31.xz',
                 testing_file='ETH-USD_2019-01-01.xz',
                 step_size=1,
                 max_position=5,
                 window_size=10,
                 seed=1,
                 action_repeats=10,
                 training=True,
                 format_3d=False,
                 z_score=True):

        # properties required for instantiation
        self.action_repeats = action_repeats
        self._seed = seed
        self._random_state = np.random.RandomState(seed=self._seed)
        self.training = training
        self.step_size = step_size
        self.max_position = max_position
        self.window_size = window_size
        self.format_3d = format_3d  # e.g., [window, features, *NEW_AXIS*]

        self.action = 0
        # derive gym.env properties
        self.actions = np.eye(3)

        self.sym = testing_file[:7]  # slice the CCY from the filename

        # properties that get reset()
        self.reward = 0.0
        self.done = False
        self.local_step_number = 0
        self.midpoint = 0.0
        self.observation = None

        # get Broker class to keep track of PnL and orders
        self.broker = Broker(max_position=max_position)
        # get historical data for simulations
        self.sim = Sim(use_arctic=False)

        self.data = self._load_environment_data(fitting_file, testing_file)
        self.prices_ = self.data[
            'coinbase_midpoint'].values  # used to calculate PnL

        self.normalized_data = self.data.copy()
        self.data = self.data.values

        self.max_steps = self.data.shape[0] - self.step_size * \
                         self.action_repeats - 1

        # normalize midpoint data
        self.normalized_data['coinbase_midpoint'] = \
            np.log(self.normalized_data['coinbase_midpoint'].values)
        self.normalized_data['coinbase_midpoint'] = (
            self.normalized_data['coinbase_midpoint'] -
            self.normalized_data['coinbase_midpoint'].shift(1)).fillna(0.)

        # load indicators into the indicator manager
        self.tns = IndicatorManager()
        self.rsi = IndicatorManager()
        for window in INDICATOR_WINDOW:
            self.tns.add(('tns_{}'.format(window), TnS(window=window)))
            self.rsi.add(('rsi_{}'.format(window), RSI(window=window)))

        if z_score:
            logger.info("Pre-scaling {}-{} data...".format(
                self.sym, self._seed))
            self.normalized_data = self.normalized_data.apply(self.sim.z_score,
                                                              axis=1).values
            logger.info("...{}-{} pre-scaling complete.".format(
                self.sym, self._seed))
        else:
            self.normalized_data = self.normalized_data.values

        # rendering class
        self._render = TradingGraph(sym=self.sym)
        # graph midpoint prices
        self._render.reset_render_data(
            y_vec=self.prices_[:np.shape(self._render.x_vec)[0]])
        # buffer for appending lags
        self.data_buffer = list()

        self.action_space = spaces.Discrete(len(self.actions))
        self.reset()  # reset to load observation.shape
        self.observation_space = spaces.Box(low=-10,
                                            high=10,
                                            shape=self.observation.shape,
                                            dtype=np.float32)

        print(
            '{} PriceJump #{} instantiated.\nself.observation_space.shape : {}'
            .format(self.sym, self._seed, self.observation_space.shape))
コード例 #18
0
class PriceJump(Env):

    metadata = {'render.modes': ['human']}
    id = 'long-short-v0'
    action_repeats = 4

    def __init__(self, training=True,
                 fitting_file='ETH-USD_2018-12-31.xz',
                 testing_file='ETH-USD_2019-01-01.xz',
                 step_size=1,
                 max_position=1,
                 window_size=50,
                 seed=1,
                 frame_stack=False):

        # properties required for instantiation
        self._random_state = np.random.RandomState(seed=seed)
        self._seed = seed
        self.training = training
        self.step_size = step_size
        self.fee = BROKER_FEE
        self.max_position = max_position
        self.window_size = window_size
        self.frame_stack = frame_stack
        self.frames_to_add = 3 if self.frame_stack else 0
        self.inventory_features = ['long_inventory', 'short_inventory',
                                   'long_unrealized_pnl', 'short_unrealized_pnl']

        self._action = 0
        # derive gym.env properties
        self.actions = ((1, 0, 0),  # 0. do nothing
                        (0, 1, 0),  # 1. buy
                        (0, 0, 1)  # 2. sell
                        )
        self.sym = testing_file[:7]  # slice the CCY from the filename

        # properties that get reset()
        self.reward = 0.0
        self.done = False
        self._local_step_number = 0
        self.midpoint = 0.0
        self.observation = None

        # get historical data for simulations
        self.broker = Broker(max_position=max_position)
        self.sim = Sim(use_arctic=False)

        # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True)
        self.features = self.sim.get_feature_labels(include_system_time=False,
                                                    include_bitfinex=False)

        fitting_data_filepath = '{}/data_exports/{}'.format(self.sim.cwd, fitting_file)
        data_used_in_environment = '{}/data_exports/{}'.format(self.sim.cwd, testing_file)
        print('Fitting data: {}\nTesting Data: {}'.format(fitting_data_filepath,
                                                          data_used_in_environment))

        self.sim.fit_scaler(self.sim.import_csv(filename=fitting_data_filepath))
        self.data = self.sim.import_csv(filename=data_used_in_environment)
        self.prices = self.data['coinbase_midpoint'].values

        self.data = self.data.apply(self.sim.z_score, axis=1)
        self.data = self.data.values
        self.data_buffer, self.frame_stacker = list(), list()
        self.action_space = spaces.Discrete(len(self.actions))
        variable_features_count = len(self.inventory_features) + len(self.actions) + 1

        if self.frame_stack is False:
            shape = (len(self.features) + variable_features_count, self.window_size)
        else:
            shape = (len(self.features) + variable_features_count, self.window_size, 4)

        self.observation_space = spaces.Box(low=self.data.min(),
                                            high=self.data.max(),
                                            shape=shape,
                                            dtype=np.int)

        self.reset()
        # print('PriceJump instantiated. ' +
        #       '\nself.observation_space.shape : {}'.format(
        #           self.observation_space.shape))

    def __str__(self):
        return '{} | {}-{}'.format(PriceJump.id, self.sym, self.seed)

    @property
    def step_number(self):
        return self._local_step_number

    def step(self, action):

        for current_step in range(PriceJump.action_repeats):

            if self.done:
                self.reset()
                return self.observation, self.reward, self.done

            position_features = self._create_position_features()
            action_features = self._create_action_features(action=action)

            self.midpoint = self.prices[self._local_step_number]
            self.broker.step(midpoint=self.midpoint)

            if current_step == 0:
                self.reward = 0.

            self.reward += self._send_to_broker_and_get_reward(action=action)

            _observation = np.concatenate((self.process_data(self.data[self._local_step_number]),
                                           position_features,
                                           action_features,
                                           np.array([self.reward])),
                                          axis=None)
            self.data_buffer.append(_observation)

            if len(self.data_buffer) >= self.window_size:
                self.frame_stacker.append(np.array(self.data_buffer, dtype=np.float32))
                del self.data_buffer[0]

                if len(self.frame_stacker) > self.frames_to_add + 1:
                    del self.frame_stacker[0]

            self._local_step_number += self.step_size

        # output shape is [n_features, window_size, frames_to_add] eg [40, 100, 1]
        self.observation = np.array(self.frame_stacker, dtype=np.float32).transpose()

        # This removes a dimension to be compatible with the Keras-rl module
        # because Keras-rl uses its own frame-stacker. There are future plans to integrate
        # this repository with more reinforcement learning packages, such as baselines.
        if self.frame_stack is False:
            self.observation = self.observation.reshape(self.observation.shape[0], -1)

        if self._local_step_number > self.data.shape[0] - 8:
            self.done = True
            order = Order(ccy=self.sym, side=None, price=self.midpoint,
                          step=self._local_step_number)
            self.reward = self.broker.flatten_inventory(order=order)

        return self.observation, self.reward, self.done, {}

    def reset(self):
        if self.training:
            self._local_step_number = self._random_state.randint(low=1, high=5000)
        else:
            self._local_step_number = 0

        logger.info(' %s-%i reset. Episode pnl: %.4f | First step: %i, max_pos: %i'
                    % (self.sym, self._seed,
                       self.broker.get_total_pnl(midpoint=self.midpoint),
                       self._local_step_number, self.max_position))
        self.reward = 0.0
        self.done = False
        self.broker.reset()
        self.data_buffer.clear()
        self.frame_stacker.clear()

        for step in range(self.window_size + self.frames_to_add):
            position_features = self._create_position_features()
            action_features = self._create_action_features(action=0)

            _observation = np.concatenate((self.process_data(self.data[self._local_step_number]),
                                           position_features,
                                           action_features,
                                           np.array([self.reward])),
                                          axis=None)
            self.data_buffer.append(_observation)
            self._local_step_number += self.step_size

            if step >= self.window_size - 1:
                self.frame_stacker.append(np.array(self.data_buffer, dtype=np.float32))
                del self.data_buffer[0]

                if len(self.frame_stacker) > self.frames_to_add + 1:
                    del self.frame_stacker[0]

        # output shape is [n_features, window_size, frames_to_add] eg [40, 100, 1]
        self.observation = np.array(self.frame_stacker, dtype=np.float32).transpose()

        # This removes a dimension to be compatible with the Keras-rl module
        # because Keras-rl uses its own frame-stacker. There are future plans to integrate
        # this repository with more reinforcement learning packages, such as baselines.
        if self.frame_stack is False:
            self.observation = self.observation.reshape(self.observation.shape[0], -1)

        return self.observation

    def render(self, mode='human'):
        pass

    def close(self):
        logger.info('{}-{} is being closed.'.format(self.id, self.sym))
        self.data = None
        self.broker = None
        self.sim = None
        self.data_buffer = None
        return

    def seed(self, seed=1):
        self._random_state = np.random.RandomState(seed=seed)
        self._seed = seed
        return [seed]

    @staticmethod
    def process_data(_next_state):
        # return self.sim.scale_state(_next_state).values.reshape((1, -1))
        return np.clip(_next_state.reshape((1, -1)), -10., 10.)

    def _send_to_broker_and_get_reward(self, action):
        reward = 0.0

        if action == 0:  # do nothing
            pass

        elif action == 1:  # buy
            price_fee_adjusted = self.midpoint + (self.fee * self.midpoint)
            if self.broker.short_inventory_count > 0:
                order = Order(ccy=self.sym, side='short',
                              price=price_fee_adjusted,
                              step=self._local_step_number)
                self.broker.remove(order=order)
                reward += self.broker.get_reward(side=order.side)

            elif self.broker.long_inventory_count >= 0:
                order = Order(ccy=self.sym, side='long',
                              price=price_fee_adjusted,
                              step=self._local_step_number)
                if self.broker.add(order=order) is False:
                    reward -= 0.00000001

            else:
                logger.warning(('gym_trading.get_reward() ' +
                                'Error for action #{} - ' +
                                'unable to place an order with broker').format(action))

        elif action == 2:  # sell
            price_fee_adjusted = self.midpoint - (self.fee * self.midpoint)
            if self.broker.long_inventory_count > 0:
                order = Order(ccy=self.sym, side='long',
                              price=price_fee_adjusted,
                              step=self._local_step_number)
                self.broker.remove(order=order)
                reward += self.broker.get_reward(side=order.side)
            elif self.broker.short_inventory_count >= 0:
                order = Order(ccy=self.sym, side='short',
                              price=price_fee_adjusted,
                              step=self._local_step_number)
                if self.broker.add(order=order) is False:
                    reward -= 0.00000001

            else:
                logger.warning('gym_trading.get_reward() ' +
                               'Error for action #{} - ' +
                               'unable to place an order with broker'
                               .format(action))

        else:
            logger.warning(('Unknown action to take in get_reward(): ' +
                            'action={} | midpoint={}').format(action, self.midpoint))

        return reward

    def _create_position_features(self):
        return np.array((self.broker.long_inventory.position_count / self.max_position,
                         self.broker.short_inventory.position_count / self.max_position,
                         self.broker.long_inventory.get_unrealized_pnl(self.midpoint),
                         self.broker.short_inventory.get_unrealized_pnl(self.midpoint)))

    def _create_action_features(self, action):
        return np.array(self.actions[action])
コード例 #19
0
    def __init__(self,
                 *,
                 training=True,
                 fitting_file='ETH-USD_2018-12-31.xz',
                 testing_file='ETH-USD_2019-01-01.xz',
                 step_size=1,
                 max_position=5,
                 window_size=4,
                 frame_stack=False):

        # properties required for instantiation
        PriceJump.instance_count += 1
        self._seed = int(PriceJump.instance_count)  # seed
        self._random_state = np.random.RandomState(seed=self._seed)
        self.training = training
        self.step_size = step_size
        self.fee = BROKER_FEE
        self.max_position = max_position
        self.window_size = window_size
        self.frame_stack = frame_stack
        self.frames_to_add = 3 if self.frame_stack else 0

        self.action = 0
        # derive gym.env properties
        self.actions = np.eye(3)

        self.sym = testing_file[:7]  # slice the CCY from the filename

        # properties that get reset()
        self.reward = 0.0
        self.done = False
        self.local_step_number = 0
        self.midpoint = 0.0
        self.observation = None

        # get Broker class to keep track of PnL and orders
        self.broker = Broker(max_position=max_position)
        # get historical data for simulations
        self.sim = Sim(use_arctic=False)

        fitting_data_filepath = '{}/data_exports/{}'.format(
            self.sim.cwd, fitting_file)
        data_used_in_environment = '{}/data_exports/{}'.format(
            self.sim.cwd, testing_file)
        # print('Fitting data: {}\nTesting Data: {}'.format(fitting_data_filepath,
        #                                                data_used_in_environment))

        fitting_data = self.sim.import_csv(filename=fitting_data_filepath)
        fitting_data['coinbase_midpoint'] = np.log(
            fitting_data['coinbase_midpoint'].values)
        fitting_data['coinbase_midpoint'] = fitting_data['coinbase_midpoint']. \
            pct_change().fillna(method='bfill')
        self.sim.fit_scaler(fitting_data)
        del fitting_data

        self.data = self.sim.import_csv(filename=data_used_in_environment)
        self.prices_ = self.data[
            'coinbase_midpoint'].values  # used to calculate PnL

        self.normalized_data = self.data.copy()
        self.data = self.data.values

        self.normalized_data['coinbase_midpoint'] = np.log(
            self.normalized_data['coinbase_midpoint'].values)
        self.normalized_data['coinbase_midpoint'] = (
            self.normalized_data['coinbase_midpoint'] -
            self.normalized_data['coinbase_midpoint'].shift(1)).fillna(
                method='bfill')

        self.tns = TnS()
        self.rsi = RSI()

        logger.info("Pre-scaling {}-{} data...".format(self.sym, self._seed))
        self.normalized_data = self.normalized_data.apply(self.sim.z_score,
                                                          axis=1).values
        logger.info("...{}-{} pre-scaling complete.".format(
            self.sym, self._seed))

        # rendering class
        self._render = TradingGraph(sym=self.sym)
        # graph midpoint prices
        self._render.reset_render_data(
            y_vec=self.prices_[:np.shape(self._render.x_vec)[0]])

        self.data_buffer, self.frame_stacker = list(), list()

        self.action_space = spaces.Discrete(len(self.actions))

        variable_features_count = len(self.inventory_features) + len(self.actions) + 1 + \
                                  len(PriceJump.indicator_features)

        if self.frame_stack:
            shape = (4, len(PriceJump.features) + variable_features_count,
                     self.window_size)
        else:
            shape = (self.window_size,
                     len(PriceJump.features) + variable_features_count)

        self.observation_space = spaces.Box(low=self.data.min(),
                                            high=self.data.max(),
                                            shape=shape,
                                            dtype=np.int)

        print('PriceJump #{} instantiated.\nself.observation_space.shape : {}'.
              format(PriceJump.instance_count, self.observation_space.shape))
コード例 #20
0
    def __init__(self, training=True,
                 fitting_file='ETH-USD_2018-12-31.xz',
                 testing_file='ETH-USD_2019-01-01.xz',
                 step_size=1,
                 max_position=1,
                 window_size=50,
                 seed=1,
                 frame_stack=False):

        # properties required for instantiation
        self._random_state = np.random.RandomState(seed=seed)
        self._seed = seed
        self.training = training
        self.step_size = step_size
        self.fee = BROKER_FEE
        self.max_position = max_position
        self.window_size = window_size
        self.frame_stack = frame_stack
        self.frames_to_add = 3 if self.frame_stack else 0
        self.inventory_features = ['long_inventory', 'short_inventory',
                                   'long_unrealized_pnl', 'short_unrealized_pnl']

        self._action = 0
        # derive gym.env properties
        self.actions = ((1, 0, 0),  # 0. do nothing
                        (0, 1, 0),  # 1. buy
                        (0, 0, 1)  # 2. sell
                        )
        self.sym = testing_file[:7]  # slice the CCY from the filename

        # properties that get reset()
        self.reward = 0.0
        self.done = False
        self._local_step_number = 0
        self.midpoint = 0.0
        self.observation = None

        # get historical data for simulations
        self.broker = Broker(max_position=max_position)
        self.sim = Sim(use_arctic=False)

        # Turn to true if Bitifinex is in the dataset (e.g., include_bitfinex=True)
        self.features = self.sim.get_feature_labels(include_system_time=False,
                                                    include_bitfinex=False)

        fitting_data_filepath = '{}/data_exports/{}'.format(self.sim.cwd, fitting_file)
        data_used_in_environment = '{}/data_exports/{}'.format(self.sim.cwd, testing_file)
        print('Fitting data: {}\nTesting Data: {}'.format(fitting_data_filepath,
                                                          data_used_in_environment))

        self.sim.fit_scaler(self.sim.import_csv(filename=fitting_data_filepath))
        self.data = self.sim.import_csv(filename=data_used_in_environment)
        self.prices = self.data['coinbase_midpoint'].values

        self.data = self.data.apply(self.sim.z_score, axis=1)
        self.data = self.data.values
        self.data_buffer, self.frame_stacker = list(), list()
        self.action_space = spaces.Discrete(len(self.actions))
        variable_features_count = len(self.inventory_features) + len(self.actions) + 1

        if self.frame_stack is False:
            shape = (len(self.features) + variable_features_count, self.window_size)
        else:
            shape = (len(self.features) + variable_features_count, self.window_size, 4)

        self.observation_space = spaces.Box(low=self.data.min(),
                                            high=self.data.max(),
                                            shape=shape,
                                            dtype=np.int)

        self.reset()