class BitcoinTradingEnv(gym.Env):
    """A Bitcoin trading environment for OpenAI gym"""
    metadata = {'render.modes': ['human', 'system', 'none']}
    scaler = preprocessing.MinMaxScaler()
    viewer = None

    def __init__(self, df, lookback_window_size=40, initial_balance=10000, commission=0.00075, serial=False):
        super(BitcoinTradingEnv, self).__init__()

        self.df = df.dropna().reset_index()
        self.lookback_window_size = lookback_window_size
        self.initial_balance = initial_balance
        self.commission = commission
        self.serial = serial

        # Actions of the format Buy 1/10, Sell 3/10, Hold (amount ignored), etc.
        self.action_space = spaces.MultiDiscrete([3, 10])

        # Observes the OHCLV values, net worth, and trade history
        self.observation_space = spaces.Box(
            low=0, high=1, shape=(10, lookback_window_size + 1), dtype=np.float16)

    def _next_observation(self):
        end = self.current_step + self.lookback_window_size + 1

        scaled_df = self.active_df.values[:end].astype('float64')
        scaled_df = self.scaler.fit_transform(scaled_df)
        scaled_df = pd.DataFrame(scaled_df, columns=self.df.columns)

        obs = np.array([
            scaled_df['Open'].values[self.current_step:end],
            scaled_df['High'].values[self.current_step:end],
            scaled_df['Low'].values[self.current_step:end],
            scaled_df['Close'].values[self.current_step:end],
            scaled_df['Volume_(BTC)'].values[self.current_step:end],
        ])

        scaled_history = self.scaler.fit_transform(self.account_history)

        obs = np.append(
            obs, scaled_history[:, -(self.lookback_window_size + 1):], axis=0)

        return obs

    def _reset_session(self):
        self.current_step = 0

        if self.serial:
            self.steps_left = len(self.df) - self.lookback_window_size - 1
            self.frame_start = self.lookback_window_size
        else:
            self.steps_left = np.random.randint(1, MAX_TRADING_SESSION)
            self.frame_start = np.random.randint(
                self.lookback_window_size, len(self.df) - self.steps_left)

        self.active_df = self.df[self.frame_start - self.lookback_window_size:
                                 self.frame_start + self.steps_left]

    def reset(self):
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.btc_held = 0

        self._reset_session()

        self.account_history = np.repeat([
            [self.balance],
            [0],
            [0],
            [0],
            [0]
        ], self.lookback_window_size + 1, axis=1)
        self.trades = []

        return self._next_observation()

    def _get_current_price(self):
        return self.df['Close'].values[self.frame_start + self.current_step]

    def _take_action(self, action, current_price):
        action_type = action[0]
        amount = action[1] / 10

        btc_bought = 0
        btc_sold = 0
        cost = 0
        sales = 0

        if action_type < 1:
            btc_bought = self.balance / current_price * amount
            cost = btc_bought * current_price * (1 + self.commission)

            self.btc_held += btc_bought
            self.balance -= cost

        elif action_type < 2:
            btc_sold = self.btc_held * amount
            sales = btc_sold * current_price * (1 - self.commission)

            self.btc_held -= btc_sold
            self.balance += sales

        if btc_sold > 0 or btc_bought > 0:
            self.trades.append({'step': self.frame_start + self.current_step,
                                'amount': btc_sold if btc_sold > 0 else btc_bought, 'total': sales if btc_sold > 0 else cost,
                                'type': "sell" if btc_sold > 0 else "buy"})

        self.net_worth = self.balance + self.btc_held * current_price

        self.account_history = np.append(self.account_history, [
            [self.balance],
            [btc_bought],
            [cost],
            [btc_sold],
            [sales]
        ], axis=1)

    def step(self, action):
        current_price = self._get_current_price() + 0.01

        prev_net_worth = self.net_worth

        self._take_action(action, current_price)

        self.steps_left -= 1
        self.current_step += 1

        if self.steps_left == 0:
            self.balance += self.btc_held * current_price
            self.btc_held = 0

            self._reset_session()

        obs = self._next_observation()
        reward = self.net_worth - prev_net_worth
        done = self.net_worth <= 0

        return obs, reward, done, {}

    def render(self, mode='human', title='BTC'):
        if mode == 'system':
            print('Price: ' + str(self._get_current_price()))
            print(
                'Bought: ' + str(self.account_history[2][self.current_step + self.frame_start]))
            print(
                'Sold: ' + str(self.account_history[4][self.current_step + self.frame_start]))
            print('Net worth: ' + str(self.net_worth))

        elif mode == 'human':
            if self.viewer is None:
                self.viewer = BitcoinTradingGraph(
                    self.df, title = title)

            self.viewer.render(self.frame_start + self.current_step,
                               self.net_worth,
                               self.trades,
                               window_size=self.lookback_window_size)

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None
Ejemplo n.º 2
0
class BitcoinTradingEnv(gym.Env):
    '''A Bitcoin trading environment for OpenAI gym'''
    metadata = {'render.modes': ['human', 'system', 'none']}
    viewer = None

    def __init__(self,
                 df,
                 initial_balance=10000,
                 commission=0.0025,
                 reward_func='sortino',
                 **kwargs):
        super(BitcoinTradingEnv, self).__init__()

        self.initial_balance = initial_balance
        self.commission = commission
        self.reward_func = reward_func

        self.df = df.fillna(method='bfill').reset_index()
        self.stationary_df = log_and_difference(
            self.df,
            ['Open', 'High', 'Low', 'Close', 'Volume BTC', 'Volume USD'])

        benchmarks = kwargs.get('benchmarks', [])
        self.benchmarks = [
            {
                'label':
                'Buy and HODL',
                'values':
                buy_and_hodl(self.df['Close'], initial_balance, commission)
            },
            {
                'label':
                'RSI Divergence',
                'values':
                rsi_divergence(self.df['Close'], initial_balance, commission)
            },
            {
                'label':
                'SMA Crossover',
                'values':
                sma_crossover(self.df['Close'], initial_balance, commission)
            },
            *benchmarks,
        ]

        self.forecast_len = kwargs.get('forecast_len', 10)
        self.confidence_interval = kwargs.get('confidence_interval', 0.95)
        self.obs_shape = (1, 5 + len(self.df.columns) - 2 +
                          (self.forecast_len * 3))

        # Actions of the format Buy 1/4, Sell 3/4, Hold (amount ignored), etc.
        self.action_space = spaces.Discrete(12)

        # Observes the price action, indicators, account action, price forecasts
        self.observation_space = spaces.Box(low=0,
                                            high=1,
                                            shape=self.obs_shape,
                                            dtype=np.float16)

    def _next_observation(self):
        scaler = preprocessing.MinMaxScaler()

        features = self.stationary_df[self.stationary_df.columns.difference(
            ['index', 'Date'])]

        scaled = features[:self.current_step + self.forecast_len + 1].values
        scaled[abs(scaled) == inf] = 0
        scaled = scaler.fit_transform(scaled.astype('float32'))
        scaled = pd.DataFrame(scaled, columns=features.columns)

        obs = scaled.values[-1]

        past_df = self.stationary_df['Close'][:self.current_step +
                                              self.forecast_len + 1]
        forecast_model = SARIMAX(past_df.values, enforce_stationarity=False)
        model_fit = forecast_model.fit(method='bfgs', disp=False)
        forecast = model_fit.get_forecast(steps=self.forecast_len,
                                          alpha=(1 - self.confidence_interval))

        obs = np.insert(obs, len(obs), forecast.predicted_mean, axis=0)
        obs = np.insert(obs, len(obs), forecast.conf_int().flatten(), axis=0)

        scaled_history = scaler.fit_transform(
            self.account_history.astype('float32'))

        obs = np.insert(obs, len(obs), scaled_history[:, -1], axis=0)

        obs = np.reshape(obs.astype('float16'), self.obs_shape)
        obs[np.bitwise_not(np.isfinite(obs))] = 0

        return obs

    def _current_price(self):
        return self.df['Close'].values[self.current_step +
                                       self.forecast_len] + 0.01

    def _take_action(self, action):
        current_price = self._current_price()
        action_type = int(action / 4)
        amount = 1 / (action % 4 + 1)

        btc_bought = 0
        btc_sold = 0
        cost = 0
        sales = 0

        if action_type == 0:
            price = current_price * (1 + self.commission)
            btc_bought = min(self.balance * amount / price,
                             self.balance / price)
            cost = btc_bought * price

            self.btc_held += btc_bought
            self.balance -= cost
        elif action_type == 1:
            price = current_price * (1 - self.commission)
            btc_sold = self.btc_held * amount
            sales = btc_sold * price

            self.btc_held -= btc_sold
            self.balance += sales

        if btc_sold > 0 or btc_bought > 0:
            self.trades.append({
                'step': self.current_step,
                'amount': btc_sold if btc_sold > 0 else btc_bought,
                'total': sales if btc_sold > 0 else cost,
                'type': 'sell' if btc_sold > 0 else 'buy'
            })

        self.net_worths.append(self.balance + self.btc_held * current_price)

        self.account_history = np.append(
            self.account_history,
            [[self.balance], [btc_bought], [cost], [btc_sold], [sales]],
            axis=1)

    def _reward(self):
        length = min(self.current_step, self.forecast_len)
        returns = np.diff(self.net_worths[-length:])

        if np.count_nonzero(returns) < 1:
            return 0

        if self.reward_func == 'sortino':
            reward = sortino_ratio(returns, annualization=365 * 24)
        elif self.reward_func == 'calmar':
            reward = calmar_ratio(returns, annualization=365 * 24)
        elif self.reward_func == 'omega':
            reward = omega_ratio(returns, annualization=365 * 24)
        else:
            reward = returns[-1]

        return reward if np.isfinite(reward) else 0

    def _done(self):
        return self.net_worths[
            -1] < self.initial_balance / 10 or self.current_step == len(
                self.df) - self.forecast_len - 1

    def reset(self):
        self.balance = self.initial_balance
        self.net_worths = [self.initial_balance]
        self.btc_held = 0
        self.current_step = 0

        self.account_history = np.array([[self.balance], [0], [0], [0], [0]])
        self.trades = []

        return self._next_observation()

    def step(self, action):
        self._take_action(action)

        self.current_step += 1

        obs = self._next_observation()
        reward = self._reward()
        done = self._done()

        return obs, reward, done, {}

    def render(self, mode='human'):
        if mode == 'system':
            print('Price: ' + str(self._current_price()))
            print('Bought: ' + str(self.account_history[2][self.current_step]))
            print('Sold: ' + str(self.account_history[4][self.current_step]))
            print('Net worth: ' + str(self.net_worths[-1]))

        elif mode == 'human':
            if self.viewer is None:
                self.viewer = BitcoinTradingGraph(self.df)

            self.viewer.render(self.current_step, self.net_worths,
                               self.benchmarks, self.trades)

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None
Ejemplo n.º 3
0
class BitcoinTradingEnv(gym.Env):
    """A Bitcoin trading environment for OpenAI gym"""
    metadata = {'render.modes': ['human', 'system', 'file', 'none']}
    scaler = preprocessing.MinMaxScaler()
    viewer = None

    def __init__(self,
                 df,
                 lookback_window_size=127,
                 initial_balance=10000,
                 commission=0.00075,
                 serial=False):
        super(BitcoinTradingEnv, self).__init__()

        self.df = df.dropna().reset_index()
        self.lookback_window_size = lookback_window_size
        self.initial_balance = initial_balance
        self.commission = commission
        self.serial = serial

        self.file_history = ""

        #Add column from OHLCV using TA-Lib Library
        scaled_df = self.df[['Open', 'High', 'Low', 'Close',
                             'Volume']].astype('float64')
        scaled_df.rename(columns={
            'Open': 'open',
            'High': 'high',
            'Low': 'low',
            'Close': 'close',
            'Volume': 'volume'
        },
                         inplace=True)

        stock = StockDataFrame.retype(scaled_df)
        indicators_df = stock[['macd', 'vr']]
        indicators_df = pd.concat([
            indicators_df,
            CCI(scaled_df),
            OBV(scaled_df),
            RSI(scaled_df),
            STOCHRSI(scaled_df)
        ],
                                  axis=1)
        indicators_df.rename(columns={
            0: 'cci',
            1: 'obv',
            2: 'rsi'
        },
                             inplace=True)

        self.df = pd.concat([self.df, indicators_df], axis=1)
        self.df = self.df.dropna()

        # Actions of the format Buy 1/10, Sell 3/10, Hold (amount ignored), etc.
        self.action_space = spaces.Discrete(9)

        # Observes the OHCLV values, net worth, and trade history
        self.observation_space = spaces.Box(low=0,
                                            high=2,
                                            shape=(13,
                                                   lookback_window_size + 1),
                                            dtype=np.float16)

    def _next_observation(self):
        end = self.current_step + self.lookback_window_size + 1

        scaled_df = self.active_df.values[:end].astype('float64')
        scaled_df = pd.DataFrame(scaled_df, columns=self.df.columns)

        obs = np.array([
            scaled_df['Close'].values[self.current_step:end],
            scaled_df['macd'].values[self.current_step:end],
            scaled_df['vr'].values[self.current_step:end],
            scaled_df['cci'].values[self.current_step:end],
            scaled_df['obv'].values[self.current_step:end],
            scaled_df['rsi'].values[self.current_step:end],
            scaled_df['fastk'].values[self.current_step:end],
            scaled_df['fastd'].values[self.current_step:end],
        ])

        obs = np.append(
            obs,
            self.account_history[:, -(self.lookback_window_size + 1):],
            axis=0)

        obs = self.scaler.fit_transform(obs)

        return obs

    def _reset_session(self):
        #self.current_step = int(len(self.df)*random.random())
        #if len(self.df) - self.current_step <= 500:
        #    self.current_step -= 500
        self.current_step = 0

        if self.serial:
            self.steps_left = len(self.df) - self.lookback_window_size - 1
            self.frame_start = self.lookback_window_size
        else:
            self.steps_left = np.random.randint(1, MAX_TRADING_SESSION)
            self.frame_start = np.random.randint(
                self.lookback_window_size,
                len(self.df) - self.steps_left)

        self.active_df = self.df[self.frame_start -
                                 self.lookback_window_size:self.frame_start +
                                 self.steps_left]

    def reset(self):
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.btc_held = 0

        self._reset_session()

        self.account_history = np.repeat([[self.balance], [0], [0], [0], [0]],
                                         self.lookback_window_size + 1,
                                         axis=1)
        self.trades = []

        return self._next_observation()

    def _get_current_price(self):
        return self.df['Close'].values[self.frame_start + self.current_step]

    def _take_action(self, action, current_price):
        #return_value = 0

        action_type = action // 4
        amount = ((action % 4) + 1) / 4

        btc_bought = 0
        btc_sold = 0
        cost = 0
        sales = 0

        if (action_type == 0 and self.balance < self.initial_balance * 0.01) or \
            (action_type == 1 and self.btc_held <= 0) or action_type == 2:
            return -1

        if action_type < 1:  #BUY
            btc_bought = (self.balance * amount) / current_price
            cost = btc_bought * current_price * (1 + self.commission)

            self.btc_held += btc_bought
            self.balance -= cost

        elif action_type < 2:  #SELL
            btc_sold = self.btc_held * amount
            sales = btc_sold * current_price * (1 - self.commission)

            self.btc_held -= btc_sold
            self.balance += sales

        if btc_sold > 0 or btc_bought > 0:
            self.trades.append({
                'step': self.frame_start + self.current_step,
                'amount': btc_sold if btc_sold > 0 else btc_bought,
                'total': sales if btc_sold > 0 else cost,
                'type': "sell" if btc_sold > 0 else "buy"
            })

        self.net_worth = self.balance + self.btc_held * current_price

        self.account_history = np.append(
            self.account_history,
            [[self.balance], [btc_bought], [cost], [btc_sold], [sales]],
            axis=1)

        return 0

    def step(self, action):
        current_price = self._get_current_price()

        prev_net_worth = self.net_worth

        if (self._take_action(action, current_price) < 0):
            return None, None, True, {}

        self.steps_left -= 1
        self.current_step += 1

        if self.steps_left == 0:
            self.balance += self.btc_held * current_price
            self.btc_held = 0

            self._reset_session()

        obs = self._next_observation()
        reward = ((self.net_worth - prev_net_worth) / prev_net_worth) * 100
        done = self.current_step + self.lookback_window_size + 3 > len(self.df) or self.net_worth <= self.initial_balance*0.8 or \
               self.net_worth >= self.initial_balance * 1.2 or self.current_step > 200

        if action // 4 == 0:
            amount = ((action % 4) + 1) / 4
            self.file_history = "{}%({:.1f}) BUY ({:.1f} -> {:.1f})".format(
                amount * 100, self.trades[-1]['total'], prev_net_worth,
                self.net_worth)
        elif action // 4 == 1:
            amount = ((action % 4) + 1) / 4
            self.file_history = "{}%({:.1f}) SELL ({:.1f} -> {:.1f})".format(
                amount * 100, self.trades[-1]['total'], prev_net_worth,
                self.net_worth)
        else:
            self.file_history = "HOLD ({} -> {})".format(
                prev_net_worth, self.net_worth)

        return obs, reward, done, {}

    def render(self, mode='human', **kwargs):
        if mode == 'system':
            print('Price: ' + str(self._get_current_price()))
            print('Bought: ' + str(self.account_history[2][self.current_step +
                                                           self.frame_start]))
            print('Sold: ' + str(self.account_history[4][self.current_step +
                                                         self.frame_start]))
            print('Net worth: ' + str(self.net_worth))

        elif mode == 'human':
            if self.viewer is None:
                self.viewer = BitcoinTradingGraph(self.df,
                                                  kwargs.get('title', None))

            self.viewer.render(self.frame_start + self.current_step,
                               self.net_worth,
                               self.trades,
                               window_size=self.lookback_window_size)

        elif mode == 'file':
            with open('log/hisotry.txt', 'a') as f:
                f.write("{}\n".format(self.file_history))

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None