class BitcoinTradingEnv(gym.Env): """A Bitcoin trading environment for OpenAI gym""" metadata = {'render.modes': ['human', 'system', 'none']} scaler = preprocessing.MinMaxScaler() viewer = None def __init__(self, df, lookback_window_size=40, initial_balance=10000, commission=0.00075, serial=False): super(BitcoinTradingEnv, self).__init__() self.df = df.dropna().reset_index() self.lookback_window_size = lookback_window_size self.initial_balance = initial_balance self.commission = commission self.serial = serial # Actions of the format Buy 1/10, Sell 3/10, Hold (amount ignored), etc. self.action_space = spaces.MultiDiscrete([3, 10]) # Observes the OHCLV values, net worth, and trade history self.observation_space = spaces.Box( low=0, high=1, shape=(10, lookback_window_size + 1), dtype=np.float16) def _next_observation(self): end = self.current_step + self.lookback_window_size + 1 scaled_df = self.active_df.values[:end].astype('float64') scaled_df = self.scaler.fit_transform(scaled_df) scaled_df = pd.DataFrame(scaled_df, columns=self.df.columns) obs = np.array([ scaled_df['Open'].values[self.current_step:end], scaled_df['High'].values[self.current_step:end], scaled_df['Low'].values[self.current_step:end], scaled_df['Close'].values[self.current_step:end], scaled_df['Volume_(BTC)'].values[self.current_step:end], ]) scaled_history = self.scaler.fit_transform(self.account_history) obs = np.append( obs, scaled_history[:, -(self.lookback_window_size + 1):], axis=0) return obs def _reset_session(self): self.current_step = 0 if self.serial: self.steps_left = len(self.df) - self.lookback_window_size - 1 self.frame_start = self.lookback_window_size else: self.steps_left = np.random.randint(1, MAX_TRADING_SESSION) self.frame_start = np.random.randint( self.lookback_window_size, len(self.df) - self.steps_left) self.active_df = self.df[self.frame_start - self.lookback_window_size: self.frame_start + self.steps_left] def reset(self): self.balance = self.initial_balance self.net_worth = self.initial_balance self.btc_held = 0 self._reset_session() self.account_history = np.repeat([ [self.balance], [0], [0], [0], [0] ], self.lookback_window_size + 1, axis=1) self.trades = [] return self._next_observation() def _get_current_price(self): return self.df['Close'].values[self.frame_start + self.current_step] def _take_action(self, action, current_price): action_type = action[0] amount = action[1] / 10 btc_bought = 0 btc_sold = 0 cost = 0 sales = 0 if action_type < 1: btc_bought = self.balance / current_price * amount cost = btc_bought * current_price * (1 + self.commission) self.btc_held += btc_bought self.balance -= cost elif action_type < 2: btc_sold = self.btc_held * amount sales = btc_sold * current_price * (1 - self.commission) self.btc_held -= btc_sold self.balance += sales if btc_sold > 0 or btc_bought > 0: self.trades.append({'step': self.frame_start + self.current_step, 'amount': btc_sold if btc_sold > 0 else btc_bought, 'total': sales if btc_sold > 0 else cost, 'type': "sell" if btc_sold > 0 else "buy"}) self.net_worth = self.balance + self.btc_held * current_price self.account_history = np.append(self.account_history, [ [self.balance], [btc_bought], [cost], [btc_sold], [sales] ], axis=1) def step(self, action): current_price = self._get_current_price() + 0.01 prev_net_worth = self.net_worth self._take_action(action, current_price) self.steps_left -= 1 self.current_step += 1 if self.steps_left == 0: self.balance += self.btc_held * current_price self.btc_held = 0 self._reset_session() obs = self._next_observation() reward = self.net_worth - prev_net_worth done = self.net_worth <= 0 return obs, reward, done, {} def render(self, mode='human', title='BTC'): if mode == 'system': print('Price: ' + str(self._get_current_price())) print( 'Bought: ' + str(self.account_history[2][self.current_step + self.frame_start])) print( 'Sold: ' + str(self.account_history[4][self.current_step + self.frame_start])) print('Net worth: ' + str(self.net_worth)) elif mode == 'human': if self.viewer is None: self.viewer = BitcoinTradingGraph( self.df, title = title) self.viewer.render(self.frame_start + self.current_step, self.net_worth, self.trades, window_size=self.lookback_window_size) def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None
class BitcoinTradingEnv(gym.Env): '''A Bitcoin trading environment for OpenAI gym''' metadata = {'render.modes': ['human', 'system', 'none']} viewer = None def __init__(self, df, initial_balance=10000, commission=0.0025, reward_func='sortino', **kwargs): super(BitcoinTradingEnv, self).__init__() self.initial_balance = initial_balance self.commission = commission self.reward_func = reward_func self.df = df.fillna(method='bfill').reset_index() self.stationary_df = log_and_difference( self.df, ['Open', 'High', 'Low', 'Close', 'Volume BTC', 'Volume USD']) benchmarks = kwargs.get('benchmarks', []) self.benchmarks = [ { 'label': 'Buy and HODL', 'values': buy_and_hodl(self.df['Close'], initial_balance, commission) }, { 'label': 'RSI Divergence', 'values': rsi_divergence(self.df['Close'], initial_balance, commission) }, { 'label': 'SMA Crossover', 'values': sma_crossover(self.df['Close'], initial_balance, commission) }, *benchmarks, ] self.forecast_len = kwargs.get('forecast_len', 10) self.confidence_interval = kwargs.get('confidence_interval', 0.95) self.obs_shape = (1, 5 + len(self.df.columns) - 2 + (self.forecast_len * 3)) # Actions of the format Buy 1/4, Sell 3/4, Hold (amount ignored), etc. self.action_space = spaces.Discrete(12) # Observes the price action, indicators, account action, price forecasts self.observation_space = spaces.Box(low=0, high=1, shape=self.obs_shape, dtype=np.float16) def _next_observation(self): scaler = preprocessing.MinMaxScaler() features = self.stationary_df[self.stationary_df.columns.difference( ['index', 'Date'])] scaled = features[:self.current_step + self.forecast_len + 1].values scaled[abs(scaled) == inf] = 0 scaled = scaler.fit_transform(scaled.astype('float32')) scaled = pd.DataFrame(scaled, columns=features.columns) obs = scaled.values[-1] past_df = self.stationary_df['Close'][:self.current_step + self.forecast_len + 1] forecast_model = SARIMAX(past_df.values, enforce_stationarity=False) model_fit = forecast_model.fit(method='bfgs', disp=False) forecast = model_fit.get_forecast(steps=self.forecast_len, alpha=(1 - self.confidence_interval)) obs = np.insert(obs, len(obs), forecast.predicted_mean, axis=0) obs = np.insert(obs, len(obs), forecast.conf_int().flatten(), axis=0) scaled_history = scaler.fit_transform( self.account_history.astype('float32')) obs = np.insert(obs, len(obs), scaled_history[:, -1], axis=0) obs = np.reshape(obs.astype('float16'), self.obs_shape) obs[np.bitwise_not(np.isfinite(obs))] = 0 return obs def _current_price(self): return self.df['Close'].values[self.current_step + self.forecast_len] + 0.01 def _take_action(self, action): current_price = self._current_price() action_type = int(action / 4) amount = 1 / (action % 4 + 1) btc_bought = 0 btc_sold = 0 cost = 0 sales = 0 if action_type == 0: price = current_price * (1 + self.commission) btc_bought = min(self.balance * amount / price, self.balance / price) cost = btc_bought * price self.btc_held += btc_bought self.balance -= cost elif action_type == 1: price = current_price * (1 - self.commission) btc_sold = self.btc_held * amount sales = btc_sold * price self.btc_held -= btc_sold self.balance += sales if btc_sold > 0 or btc_bought > 0: self.trades.append({ 'step': self.current_step, 'amount': btc_sold if btc_sold > 0 else btc_bought, 'total': sales if btc_sold > 0 else cost, 'type': 'sell' if btc_sold > 0 else 'buy' }) self.net_worths.append(self.balance + self.btc_held * current_price) self.account_history = np.append( self.account_history, [[self.balance], [btc_bought], [cost], [btc_sold], [sales]], axis=1) def _reward(self): length = min(self.current_step, self.forecast_len) returns = np.diff(self.net_worths[-length:]) if np.count_nonzero(returns) < 1: return 0 if self.reward_func == 'sortino': reward = sortino_ratio(returns, annualization=365 * 24) elif self.reward_func == 'calmar': reward = calmar_ratio(returns, annualization=365 * 24) elif self.reward_func == 'omega': reward = omega_ratio(returns, annualization=365 * 24) else: reward = returns[-1] return reward if np.isfinite(reward) else 0 def _done(self): return self.net_worths[ -1] < self.initial_balance / 10 or self.current_step == len( self.df) - self.forecast_len - 1 def reset(self): self.balance = self.initial_balance self.net_worths = [self.initial_balance] self.btc_held = 0 self.current_step = 0 self.account_history = np.array([[self.balance], [0], [0], [0], [0]]) self.trades = [] return self._next_observation() def step(self, action): self._take_action(action) self.current_step += 1 obs = self._next_observation() reward = self._reward() done = self._done() return obs, reward, done, {} def render(self, mode='human'): if mode == 'system': print('Price: ' + str(self._current_price())) print('Bought: ' + str(self.account_history[2][self.current_step])) print('Sold: ' + str(self.account_history[4][self.current_step])) print('Net worth: ' + str(self.net_worths[-1])) elif mode == 'human': if self.viewer is None: self.viewer = BitcoinTradingGraph(self.df) self.viewer.render(self.current_step, self.net_worths, self.benchmarks, self.trades) def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None
class BitcoinTradingEnv(gym.Env): """A Bitcoin trading environment for OpenAI gym""" metadata = {'render.modes': ['human', 'system', 'file', 'none']} scaler = preprocessing.MinMaxScaler() viewer = None def __init__(self, df, lookback_window_size=127, initial_balance=10000, commission=0.00075, serial=False): super(BitcoinTradingEnv, self).__init__() self.df = df.dropna().reset_index() self.lookback_window_size = lookback_window_size self.initial_balance = initial_balance self.commission = commission self.serial = serial self.file_history = "" #Add column from OHLCV using TA-Lib Library scaled_df = self.df[['Open', 'High', 'Low', 'Close', 'Volume']].astype('float64') scaled_df.rename(columns={ 'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'volume' }, inplace=True) stock = StockDataFrame.retype(scaled_df) indicators_df = stock[['macd', 'vr']] indicators_df = pd.concat([ indicators_df, CCI(scaled_df), OBV(scaled_df), RSI(scaled_df), STOCHRSI(scaled_df) ], axis=1) indicators_df.rename(columns={ 0: 'cci', 1: 'obv', 2: 'rsi' }, inplace=True) self.df = pd.concat([self.df, indicators_df], axis=1) self.df = self.df.dropna() # Actions of the format Buy 1/10, Sell 3/10, Hold (amount ignored), etc. self.action_space = spaces.Discrete(9) # Observes the OHCLV values, net worth, and trade history self.observation_space = spaces.Box(low=0, high=2, shape=(13, lookback_window_size + 1), dtype=np.float16) def _next_observation(self): end = self.current_step + self.lookback_window_size + 1 scaled_df = self.active_df.values[:end].astype('float64') scaled_df = pd.DataFrame(scaled_df, columns=self.df.columns) obs = np.array([ scaled_df['Close'].values[self.current_step:end], scaled_df['macd'].values[self.current_step:end], scaled_df['vr'].values[self.current_step:end], scaled_df['cci'].values[self.current_step:end], scaled_df['obv'].values[self.current_step:end], scaled_df['rsi'].values[self.current_step:end], scaled_df['fastk'].values[self.current_step:end], scaled_df['fastd'].values[self.current_step:end], ]) obs = np.append( obs, self.account_history[:, -(self.lookback_window_size + 1):], axis=0) obs = self.scaler.fit_transform(obs) return obs def _reset_session(self): #self.current_step = int(len(self.df)*random.random()) #if len(self.df) - self.current_step <= 500: # self.current_step -= 500 self.current_step = 0 if self.serial: self.steps_left = len(self.df) - self.lookback_window_size - 1 self.frame_start = self.lookback_window_size else: self.steps_left = np.random.randint(1, MAX_TRADING_SESSION) self.frame_start = np.random.randint( self.lookback_window_size, len(self.df) - self.steps_left) self.active_df = self.df[self.frame_start - self.lookback_window_size:self.frame_start + self.steps_left] def reset(self): self.balance = self.initial_balance self.net_worth = self.initial_balance self.btc_held = 0 self._reset_session() self.account_history = np.repeat([[self.balance], [0], [0], [0], [0]], self.lookback_window_size + 1, axis=1) self.trades = [] return self._next_observation() def _get_current_price(self): return self.df['Close'].values[self.frame_start + self.current_step] def _take_action(self, action, current_price): #return_value = 0 action_type = action // 4 amount = ((action % 4) + 1) / 4 btc_bought = 0 btc_sold = 0 cost = 0 sales = 0 if (action_type == 0 and self.balance < self.initial_balance * 0.01) or \ (action_type == 1 and self.btc_held <= 0) or action_type == 2: return -1 if action_type < 1: #BUY btc_bought = (self.balance * amount) / current_price cost = btc_bought * current_price * (1 + self.commission) self.btc_held += btc_bought self.balance -= cost elif action_type < 2: #SELL btc_sold = self.btc_held * amount sales = btc_sold * current_price * (1 - self.commission) self.btc_held -= btc_sold self.balance += sales if btc_sold > 0 or btc_bought > 0: self.trades.append({ 'step': self.frame_start + self.current_step, 'amount': btc_sold if btc_sold > 0 else btc_bought, 'total': sales if btc_sold > 0 else cost, 'type': "sell" if btc_sold > 0 else "buy" }) self.net_worth = self.balance + self.btc_held * current_price self.account_history = np.append( self.account_history, [[self.balance], [btc_bought], [cost], [btc_sold], [sales]], axis=1) return 0 def step(self, action): current_price = self._get_current_price() prev_net_worth = self.net_worth if (self._take_action(action, current_price) < 0): return None, None, True, {} self.steps_left -= 1 self.current_step += 1 if self.steps_left == 0: self.balance += self.btc_held * current_price self.btc_held = 0 self._reset_session() obs = self._next_observation() reward = ((self.net_worth - prev_net_worth) / prev_net_worth) * 100 done = self.current_step + self.lookback_window_size + 3 > len(self.df) or self.net_worth <= self.initial_balance*0.8 or \ self.net_worth >= self.initial_balance * 1.2 or self.current_step > 200 if action // 4 == 0: amount = ((action % 4) + 1) / 4 self.file_history = "{}%({:.1f}) BUY ({:.1f} -> {:.1f})".format( amount * 100, self.trades[-1]['total'], prev_net_worth, self.net_worth) elif action // 4 == 1: amount = ((action % 4) + 1) / 4 self.file_history = "{}%({:.1f}) SELL ({:.1f} -> {:.1f})".format( amount * 100, self.trades[-1]['total'], prev_net_worth, self.net_worth) else: self.file_history = "HOLD ({} -> {})".format( prev_net_worth, self.net_worth) return obs, reward, done, {} def render(self, mode='human', **kwargs): if mode == 'system': print('Price: ' + str(self._get_current_price())) print('Bought: ' + str(self.account_history[2][self.current_step + self.frame_start])) print('Sold: ' + str(self.account_history[4][self.current_step + self.frame_start])) print('Net worth: ' + str(self.net_worth)) elif mode == 'human': if self.viewer is None: self.viewer = BitcoinTradingGraph(self.df, kwargs.get('title', None)) self.viewer.render(self.frame_start + self.current_step, self.net_worth, self.trades, window_size=self.lookback_window_size) elif mode == 'file': with open('log/hisotry.txt', 'a') as f: f.write("{}\n".format(self.file_history)) def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None