def reset(self, env_steps_size=0): self.visualization = TradingGraph( Render_range=self.Render_range, Show_reward=self.Show_reward, Show_indicators=self.Show_indicators) # init visualization self.trades = deque(maxlen=self.Render_range ) # limited orders memory for visualization self.balance = self.initial_balance self.net_worth = self.initial_balance self.prev_net_worth = self.initial_balance self.crypto_held = 0 self.crypto_sold = 0 self.crypto_bought = 0 self.episode_orders = 0 # track episode orders count self.prev_episode_orders = 0 # track previous episode orders count self.rewards = deque(maxlen=self.Render_range) self.env_steps_size = env_steps_size self.punish_value = 0 if env_steps_size > 0: # used for training dataset self.start_step = random.randint( self.lookback_window_size, self.df_total_steps - env_steps_size) self.end_step = self.start_step + env_steps_size else: # used for testing dataset self.start_step = self.lookback_window_size self.end_step = self.df_total_steps self.current_step = self.start_step for i in reversed(range(self.lookback_window_size)): current_step = self.current_step - i self.orders_history.append([ self.balance / self.normalize_value, self.net_worth / self.normalize_value, self.crypto_bought / self.normalize_value, self.crypto_sold / self.normalize_value, self.crypto_held / self.normalize_value ]) # one line for loop to fill market history withing reset call self.market_history.append([ self.df_normalized.loc[current_step, column] for column in self.columns ]) state = np.concatenate((self.orders_history, self.market_history), axis=1) return state
def reset(self, env_steps_size=0): self.visualization = TradingGraph( Render_range=self.Render_range, Show_reward=self.Show_reward) # init visualization self.trades = deque(maxlen=self.Render_range ) # limited orders memory for visualization self.balance = self.initial_balance self.net_worth = self.initial_balance self.prev_net_worth = self.initial_balance self.crypto_held = 0 self.crypto_sold = 0 self.crypto_bought = 0 self.episode_orders = 0 # track episode orders count self.prev_episode_orders = 0 # track previous episode orders count self.rewards = deque(maxlen=self.Render_range) self.env_steps_size = env_steps_size self.punish_value = 0 if env_steps_size > 0: # used for training dataset self.start_step = random.randint( self.lookback_window_size, self.df_total_steps - env_steps_size) self.end_step = self.start_step + env_steps_size else: # used for testing dataset self.start_step = self.lookback_window_size self.end_step = self.df_total_steps self.current_step = self.start_step for i in reversed(range(self.lookback_window_size)): current_step = self.current_step - i self.orders_history.append([ self.balance, self.net_worth, self.crypto_bought, self.crypto_sold, self.crypto_held ]) self.market_history.append([ self.df.loc[current_step, 'Open'], self.df.loc[current_step, 'High'], self.df.loc[current_step, 'Low'], self.df.loc[current_step, 'Close'], self.df.loc[current_step, 'Volume'] ]) state = np.concatenate((self.market_history, self.orders_history), axis=1) return state
class CustomEnv: # A custom Bitcoin trading environment def __init__(self, df, initial_balance=1000, lookback_window_size=50, Render_range=100, Show_reward=False, Show_indicators=False, normalize_value=40000): # Define action space and state size and other custom parameters self.df = df.dropna().reset_index() self.df_total_steps = len(self.df) - 1 self.initial_balance = initial_balance self.lookback_window_size = lookback_window_size self.Render_range = Render_range # render range in visualization self.Show_reward = Show_reward # show order reward in rendered visualization self.Show_indicators = Show_indicators # show main indicators in rendered visualization # Orders history contains the balance, net_worth, crypto_bought, crypto_sold, crypto_held values for the last lookback_window_size steps self.orders_history = deque(maxlen=self.lookback_window_size) # Market history contains the OHCL values for the last lookback_window_size prices self.market_history = deque(maxlen=self.lookback_window_size) self.indicators_history = deque(maxlen=self.lookback_window_size) self.normalize_value = normalize_value # Reset the state of the environment to an initial state def reset(self, env_steps_size=0): self.visualization = TradingGraph( Render_range=self.Render_range, Show_reward=self.Show_reward, Show_indicators=self.Show_indicators) # init visualization self.trades = deque(maxlen=self.Render_range ) # limited orders memory for visualization self.balance = self.initial_balance self.net_worth = self.initial_balance self.prev_net_worth = self.initial_balance self.crypto_held = 0 self.crypto_sold = 0 self.crypto_bought = 0 self.episode_orders = 0 # track episode orders count self.prev_episode_orders = 0 # track previous episode orders count self.rewards = deque(maxlen=self.Render_range) self.env_steps_size = env_steps_size self.punish_value = 0 if env_steps_size > 0: # used for training dataset self.start_step = random.randint( self.lookback_window_size, self.df_total_steps - env_steps_size) self.end_step = self.start_step + env_steps_size else: # used for testing dataset self.start_step = self.lookback_window_size self.end_step = self.df_total_steps self.current_step = self.start_step for i in reversed(range(self.lookback_window_size)): current_step = self.current_step - i self.orders_history.append([ self.balance, self.net_worth, self.crypto_bought, self.crypto_sold, self.crypto_held ]) self.market_history.append([ self.df.loc[current_step, 'Open'], self.df.loc[current_step, 'High'], self.df.loc[current_step, 'Low'], self.df.loc[current_step, 'Close'], self.df.loc[current_step, 'Volume'], ]) self.indicators_history.append([ self.df.loc[current_step, 'sma7'] / self.normalize_value, self.df.loc[current_step, 'sma25'] / self.normalize_value, self.df.loc[current_step, 'sma99'] / self.normalize_value, self.df.loc[current_step, 'bb_bbm'] / self.normalize_value, self.df.loc[current_step, 'bb_bbh'] / self.normalize_value, self.df.loc[current_step, 'bb_bbl'] / self.normalize_value, self.df.loc[current_step, 'psar'] / self.normalize_value, self.df.loc[current_step, 'MACD'] / 400, self.df.loc[current_step, 'RSI'] / 100 ]) state = np.concatenate((self.market_history, self.orders_history), axis=1) / self.normalize_value state = np.concatenate((state, self.indicators_history), axis=1) return state # Get the data points for the given current_step def _next_observation(self): self.market_history.append([ self.df.loc[self.current_step, 'Open'], self.df.loc[self.current_step, 'High'], self.df.loc[self.current_step, 'Low'], self.df.loc[self.current_step, 'Close'], self.df.loc[self.current_step, 'Volume'], ]) self.indicators_history.append([ self.df.loc[self.current_step, 'sma7'] / self.normalize_value, self.df.loc[self.current_step, 'sma25'] / self.normalize_value, self.df.loc[self.current_step, 'sma99'] / self.normalize_value, self.df.loc[self.current_step, 'bb_bbm'] / self.normalize_value, self.df.loc[self.current_step, 'bb_bbh'] / self.normalize_value, self.df.loc[self.current_step, 'bb_bbl'] / self.normalize_value, self.df.loc[self.current_step, 'psar'] / self.normalize_value, self.df.loc[self.current_step, 'MACD'] / 400, self.df.loc[self.current_step, 'RSI'] / 100 ]) obs = np.concatenate((self.market_history, self.orders_history), axis=1) / self.normalize_value obs = np.concatenate((obs, self.indicators_history), axis=1) return obs # Execute one time step within the environment def step(self, action): self.crypto_bought = 0 self.crypto_sold = 0 self.current_step += 1 # Set the current price to a random price between open and close #current_price = random.uniform( # self.df.loc[self.current_step, 'Open'], # self.df.loc[self.current_step, 'Close']) current_price = self.df.loc[self.current_step, 'Open'] Date = self.df.loc[self.current_step, 'Date'] # for visualization High = self.df.loc[self.current_step, 'High'] # for visualization Low = self.df.loc[self.current_step, 'Low'] # for visualization if action == 0: # Hold pass elif action == 1 and self.balance > self.initial_balance / 100: # Buy with 100% of current balance self.crypto_bought = self.balance / current_price self.balance -= self.crypto_bought * current_price self.crypto_held += self.crypto_bought self.trades.append({ 'Date': Date, 'High': High, 'Low': Low, 'total': self.crypto_bought, 'type': "buy", 'current_price': current_price }) self.episode_orders += 1 elif action == 2 and self.crypto_held > 0: # Sell 100% of current crypto held self.crypto_sold = self.crypto_held self.balance += self.crypto_sold * current_price self.crypto_held -= self.crypto_sold self.trades.append({ 'Date': Date, 'High': High, 'Low': Low, 'total': self.crypto_sold, 'type': "sell", 'current_price': current_price }) self.episode_orders += 1 self.prev_net_worth = self.net_worth self.net_worth = self.balance + self.crypto_held * current_price self.orders_history.append([ self.balance, self.net_worth, self.crypto_bought, self.crypto_sold, self.crypto_held ]) # Receive calculated reward reward = self.get_reward() if self.net_worth <= self.initial_balance / 2: done = True else: done = False obs = self._next_observation() return obs, reward, done # Calculate reward def get_reward(self): self.punish_value += self.net_worth * 0.00001 if self.episode_orders > 1 and self.episode_orders > self.prev_episode_orders: self.prev_episode_orders = self.episode_orders if self.trades[-1]['type'] == "buy" and self.trades[-2][ 'type'] == "sell": reward = self.trades[-2]['total'] * self.trades[-2][ 'current_price'] - self.trades[-2]['total'] * self.trades[ -1]['current_price'] reward -= self.punish_value self.punish_value = 0 self.trades[-1]["Reward"] = reward return reward elif self.trades[-1]['type'] == "sell" and self.trades[-2][ 'type'] == "buy": reward = self.trades[-1]['total'] * self.trades[-1][ 'current_price'] - self.trades[-2]['total'] * self.trades[ -2]['current_price'] reward -= self.punish_value self.punish_value = 0 self.trades[-1]["Reward"] = reward return reward else: return 0 - self.punish_value # render environment def render(self, visualize=False): #print(f'Step: {self.current_step}, Net Worth: {self.net_worth}') if visualize: # Render the environment to the screen img = self.visualization.render(self.df.loc[self.current_step], self.net_worth, self.trades) return img
class CustomEnv: # A custom Bitcoin trading environment def __init__(self, df, initial_balance=1000, lookback_window_size=50, Render_range=100): # Define action space and state size and other custom parameters self.df = df.dropna().reset_index() self.df_total_steps = len(self.df) - 1 self.initial_balance = initial_balance self.lookback_window_size = lookback_window_size self.Render_range = Render_range # render range in visualization # Action space from 0 to 3, 0 is hold, 1 is buy, 2 is sell self.action_space = np.array([0, 1, 2]) # Orders history contains the balance, net_worth, crypto_bought, crypto_sold, crypto_held values for the last lookback_window_size steps self.orders_history = deque(maxlen=self.lookback_window_size) # Market history contains the OHCL values for the last lookback_window_size prices self.market_history = deque(maxlen=self.lookback_window_size) # State size contains Market+Orders history for the last lookback_window_size steps self.state_size = (self.lookback_window_size, 10) # Reset the state of the environment to an initial state def reset(self, env_steps_size=0): self.visualization = TradingGraph( Render_range=self.Render_range) # init visualization self.trades = deque(maxlen=self.Render_range ) # limited orders memory for visualization self.balance = self.initial_balance self.net_worth = self.initial_balance self.prev_net_worth = self.initial_balance self.crypto_held = 0 self.crypto_sold = 0 self.crypto_bought = 0 if env_steps_size > 0: # used for training dataset self.start_step = random.randint( self.lookback_window_size, self.df_total_steps - env_steps_size) self.end_step = self.start_step + env_steps_size else: # used for testing dataset self.start_step = self.lookback_window_size self.end_step = self.df_total_steps self.current_step = self.start_step for i in reversed(range(self.lookback_window_size)): current_step = self.current_step - i self.orders_history.append([ self.balance, self.net_worth, self.crypto_bought, self.crypto_sold, self.crypto_held ]) self.market_history.append([ self.df.loc[current_step, 'Open'], self.df.loc[current_step, 'High'], self.df.loc[current_step, 'Low'], self.df.loc[current_step, 'Close'], self.df.loc[current_step, 'Volume'] ]) state = np.concatenate((self.market_history, self.orders_history), axis=1) return state # Get the data points for the given current_step def _next_observation(self): self.market_history.append([ self.df.loc[self.current_step, 'Open'], self.df.loc[self.current_step, 'High'], self.df.loc[self.current_step, 'Low'], self.df.loc[self.current_step, 'Close'], self.df.loc[self.current_step, 'Volume'] ]) obs = np.concatenate((self.market_history, self.orders_history), axis=1) return obs # Execute one time step within the environment def step(self, action): self.crypto_bought = 0 self.crypto_sold = 0 self.current_step += 1 # Set the current price to a random price between open and close current_price = random.uniform(self.df.loc[self.current_step, 'Open'], self.df.loc[self.current_step, 'Close']) Date = self.df.loc[self.current_step, 'Date'] # for visualization High = self.df.loc[self.current_step, 'High'] # for visualization Low = self.df.loc[self.current_step, 'Low'] # for visualization if action == 0: # Hold pass elif action == 1 and self.balance > self.initial_balance / 100: # Buy with 100% of current balance self.crypto_bought = self.balance / current_price self.balance -= self.crypto_bought * current_price self.crypto_held += self.crypto_bought self.trades.append({ 'Date': Date, 'High': High, 'Low': Low, 'total': self.crypto_bought, 'type': "buy" }) elif action == 2 and self.crypto_held > 0: # Sell 100% of current crypto held self.crypto_sold = self.crypto_held self.balance += self.crypto_sold * current_price self.crypto_held -= self.crypto_sold self.trades.append({ 'Date': Date, 'High': High, 'Low': Low, 'total': self.crypto_sold, 'type': "sell" }) self.prev_net_worth = self.net_worth self.net_worth = self.balance + self.crypto_held * current_price self.orders_history.append([ self.balance, self.net_worth, self.crypto_bought, self.crypto_sold, self.crypto_held ]) #Write_to_file(Date, self.orders_history[-1]) # Calculate reward reward = self.net_worth - self.prev_net_worth if self.net_worth <= self.initial_balance / 2: done = True else: done = False obs = self._next_observation() return obs, reward, done # render environment def render(self, visualize=False): #print(f'Step: {self.current_step}, Net Worth: {self.net_worth}') if visualize: Date = self.df.loc[self.current_step, 'Date'] Open = self.df.loc[self.current_step, 'Open'] Close = self.df.loc[self.current_step, 'Close'] High = self.df.loc[self.current_step, 'High'] Low = self.df.loc[self.current_step, 'Low'] Volume = self.df.loc[self.current_step, 'Volume'] # Render the environment to the screen self.visualization.render(Date, Open, High, Low, Close, Volume, self.net_worth, self.trades)
class CustomEnv: # A custom Bitcoin trading environment def __init__(self, df, initial_balance=1000, lookback_window_size=50, Render_range=100): # Define action space and state size and other custom parameters self.df = df.dropna().reset_index() self.df_total_steps = len(self.df) - 1 self.initial_balance = initial_balance self.lookback_window_size = lookback_window_size self.Render_range = Render_range # render range in visualization # Action space from 0 to 3, 0 is hold, 1 is buy, 2 is sell self.action_space = np.array([0, 1, 2]) # Orders history contains the balance, net_worth, crypto_bought, crypto_sold, crypto_held values for the last lookback_window_size steps self.orders_history = deque(maxlen=self.lookback_window_size) # Market history contains the OHCL values for the last lookback_window_size prices self.market_history = deque(maxlen=self.lookback_window_size) # State size contains Market+Orders history for the last lookback_window_size steps self.state_size = (self.lookback_window_size, 10) # Neural Networks part bellow self.lr = 0.00001 self.epochs = 1 self.normalize_value = 100000 self.optimizer = Adam # Create Actor-Critic network model self.Actor = Actor_Model(input_shape=self.state_size, action_space=self.action_space.shape[0], lr=self.lr, optimizer=self.optimizer) self.Critic = Critic_Model(input_shape=self.state_size, action_space=self.action_space.shape[0], lr=self.lr, optimizer=self.optimizer) # create tensorboard writer def create_writer(self): self.replay_count = 0 self.writer = SummaryWriter(comment="Crypto_trader") # Reset the state of the environment to an initial state def reset(self, env_steps_size=0): self.visualization = TradingGraph( Render_range=self.Render_range) # init visualization self.trades = deque(maxlen=self.Render_range ) # limited orders memory for visualization self.balance = self.initial_balance self.net_worth = self.initial_balance self.prev_net_worth = self.initial_balance self.crypto_held = 0 self.crypto_sold = 0 self.crypto_bought = 0 self.episode_orders = 0 # test self.env_steps_size = env_steps_size if env_steps_size > 0: # used for training dataset self.start_step = random.randint( self.lookback_window_size, self.df_total_steps - env_steps_size) self.end_step = self.start_step + env_steps_size else: # used for testing dataset self.start_step = self.lookback_window_size self.end_step = self.df_total_steps self.current_step = self.start_step for i in reversed(range(self.lookback_window_size)): current_step = self.current_step - i self.orders_history.append([ self.balance, self.net_worth, self.crypto_bought, self.crypto_sold, self.crypto_held ]) self.market_history.append([ self.df.loc[current_step, 'Open'], self.df.loc[current_step, 'High'], self.df.loc[current_step, 'Low'], self.df.loc[current_step, 'Close'], self.df.loc[current_step, 'Volume'] ]) state = np.concatenate((self.market_history, self.orders_history), axis=1) return state # Get the data points for the given current_step def _next_observation(self): self.market_history.append([ self.df.loc[self.current_step, 'Open'], self.df.loc[self.current_step, 'High'], self.df.loc[self.current_step, 'Low'], self.df.loc[self.current_step, 'Close'], self.df.loc[self.current_step, 'Volume'] ]) obs = np.concatenate((self.market_history, self.orders_history), axis=1) return obs # Execute one time step within the environment def step(self, action): self.crypto_bought = 0 self.crypto_sold = 0 self.current_step += 1 # Set the current price to a random price between open and close current_price = random.uniform(self.df.loc[self.current_step, 'Open'], self.df.loc[self.current_step, 'Close']) Date = self.df.loc[self.current_step, 'Date'] # for visualization High = self.df.loc[self.current_step, 'High'] # for visualization Low = self.df.loc[self.current_step, 'Low'] # for visualization if action == 0: # Hold pass elif action == 1 and self.balance > self.initial_balance / 100: # Buy with 100% of current balance self.crypto_bought = self.balance / current_price self.balance -= self.crypto_bought * current_price self.crypto_held += self.crypto_bought self.trades.append({ 'Date': Date, 'High': High, 'Low': Low, 'total': self.crypto_bought, 'type': "buy" }) self.episode_orders += 1 elif action == 2 and self.crypto_held > 0: # Sell 100% of current crypto held self.crypto_sold = self.crypto_held self.balance += self.crypto_sold * current_price self.crypto_held -= self.crypto_sold self.trades.append({ 'Date': Date, 'High': High, 'Low': Low, 'total': self.crypto_sold, 'type': "sell" }) self.episode_orders += 1 self.prev_net_worth = self.net_worth self.net_worth = self.balance + self.crypto_held * current_price self.orders_history.append([ self.balance, self.net_worth, self.crypto_bought, self.crypto_sold, self.crypto_held ]) #Write_to_file(Date, self.orders_history[-1]) # Calculate reward reward = self.net_worth - self.prev_net_worth if self.net_worth <= self.initial_balance / 2: done = True else: done = False obs = self._next_observation() / self.normalize_value return obs, reward, done # render environment def render(self, visualize=False): #print(f'Step: {self.current_step}, Net Worth: {self.net_worth}') if visualize: Date = self.df.loc[self.current_step, 'Date'] Open = self.df.loc[self.current_step, 'Open'] Close = self.df.loc[self.current_step, 'Close'] High = self.df.loc[self.current_step, 'High'] Low = self.df.loc[self.current_step, 'Low'] Volume = self.df.loc[self.current_step, 'Volume'] # Render the environment to the screen self.visualization.render(Date, Open, High, Low, Close, Volume, self.net_worth, self.trades) def get_gaes(self, rewards, dones, values, next_values, gamma=0.99, lamda=0.95, normalize=True): deltas = [ r + gamma * (1 - d) * nv - v for r, d, nv, v in zip(rewards, dones, next_values, values) ] deltas = np.stack(deltas) gaes = copy.deepcopy(deltas) for t in reversed(range(len(deltas) - 1)): gaes[t] = gaes[t] + (1 - dones[t]) * gamma * lamda * gaes[t + 1] target = gaes + values if normalize: gaes = (gaes - gaes.mean()) / (gaes.std() + 1e-8) return np.vstack(gaes), np.vstack(target) def replay(self, states, actions, rewards, predictions, dones, next_states): # reshape memory to appropriate shape for training states = np.vstack(states) next_states = np.vstack(next_states) actions = np.vstack(actions) predictions = np.vstack(predictions) # Compute discounted rewards #discounted_r = np.vstack(self.discount_rewards(rewards)) # Get Critic network predictions values = self.Critic.predict(states) next_values = self.Critic.predict(next_states) # Compute advantages #advantages = discounted_r - values advantages, target = self.get_gaes(rewards, dones, np.squeeze(values), np.squeeze(next_values)) ''' pylab.plot(target,'-') pylab.plot(advantages,'.') ax=pylab.gca() ax.grid(True) pylab.show() ''' # stack everything to numpy array y_true = np.hstack([advantages, predictions, actions]) # training Actor and Critic networks a_loss = self.Actor.Actor.fit(states, y_true, epochs=self.epochs, verbose=0, shuffle=True) c_loss = self.Critic.Critic.fit(states, target, epochs=self.epochs, verbose=0, shuffle=True) self.writer.add_scalar('Data/actor_loss_per_replay', np.sum(a_loss.history['loss']), self.replay_count) self.writer.add_scalar('Data/critic_loss_per_replay', np.sum(c_loss.history['loss']), self.replay_count) self.replay_count += 1 def act(self, state): # Use the network to predict the next action to take, using the model prediction = self.Actor.predict(np.expand_dims(state, axis=0))[0] action = np.random.choice(self.action_space, p=prediction) return action, prediction def save(self, name="Crypto_trader"): # save keras model weights self.Actor.Actor.save_weights(f"{name}_Actor.h5") self.Critic.Critic.save_weights(f"{name}_Critic.h5") def load(self, name="Crypto_trader"): # load keras model weights self.Actor.Actor.load_weights(f"{name}_Actor.h5") self.Critic.Critic.load_weights(f"{name}_Critic.h5")
class CustomEnv: # A custom Bitcoin trading environment def __init__(self, df, df_normalized, lookback_window_size, **kwargs): # Define action space and state size and other custom parameters self.df = df.reset_index( ) #.reset_index()#.dropna().copy().reset_index() self.df_normalized = df_normalized.reset_index( ) #.reset_index()#.copy().dropna().reset_index() self.lookback_window_size = lookback_window_size self.initial_balance = kwargs.get("initial_balance", 1000) self.render_range = kwargs.get("render_range", 100) # render range in visualization self.show_reward = kwargs.get( "show_reward", False) # show order reward in rendered visualization self.show_indicators = kwargs.get( "show_indicators", False) # show main indicators in rendered visualization self.df_total_steps = len(self.df) - 1 # Orders history contains the balance, net_worth, crypto_bought, crypto_sold, crypto_held values for the last lookback_window_size steps self.orders_history = deque(maxlen=self.lookback_window_size) # Market history contains the OHCL values for the last lookback_window_size prices self.market_history = deque(maxlen=self.lookback_window_size) self.normalize_value = kwargs.get("normalize_value", 40000) self.fees = 0.002 # default Binance 0.1% order fees self.columns = list(self.df_normalized.columns[2:]) # # Reset the state of the environment to an initial state def reset(self, env_steps_size=0): self.visualization = TradingGraph( render_range=self.render_range, show_reward=self.show_reward, show_indicators=self.show_indicators) # init visualization self.trades = deque(maxlen=self.render_range ) # limited orders memory for visualization self.balance = self.initial_balance self.net_worth = self.initial_balance self.prev_net_worth = self.initial_balance self.crypto_held = 0 self.crypto_sold = 0 self.crypto_bought = 0 self.episode_orders = 0 # track episode orders count self.prev_episode_orders = 0 # track previous episode orders count self.rewards = deque(maxlen=self.render_range) self.env_steps_size = env_steps_size self.punish_value = 0 if env_steps_size > 0: # used for training dataset self.start_step = np.random.randint( self.lookback_window_size, self.df_total_steps - env_steps_size) self.end_step = self.start_step + env_steps_size else: # used for testing dataset self.start_step = self.lookback_window_size self.end_step = self.df_total_steps self.current_step = self.start_step for i in reversed(range(self.lookback_window_size)): current_step = self.current_step - i self.orders_history.append([ self.balance / self.normalize_value, self.net_worth / self.normalize_value, self.crypto_bought / self.normalize_value, self.crypto_sold / self.normalize_value, self.crypto_held / self.normalize_value ]) # one line for loop to fill market history withing reset call self.market_history.append([ self.df_normalized.loc[current_step, column] for column in self.columns ]) state = np.concatenate((self.orders_history, self.market_history), axis=1) return state # # Get the data points for the given current_step def next_observation(self): self.market_history.append([ self.df_normalized.loc[self.current_step, column] for column in self.columns ]) obs = np.concatenate((self.orders_history, self.market_history), axis=1) return obs # # Execute one time step within the environment def step(self, action): self.crypto_bought = 0 self.crypto_sold = 0 self.current_step += 1 # Set the current price to a random price between open and close #current_price = random.uniform( # self.df.loc[self.current_step, 'Open'], # self.df.loc[self.current_step, 'Close']) current_price = self.df.loc[self.current_step, 'Open'] Date = self.df.loc[self.current_step, 'Date'] # for visualization High = self.df.loc[self.current_step, 'High'] # for visualization Low = self.df.loc[self.current_step, 'Low'] # for visualization if action == 0: # Hold pass elif (action == 1) and (self.balance > self.initial_balance * 0.05): # Buy with 100% of current balance self.crypto_bought = self.balance / current_price self.crypto_bought *= (1 - self.fees) # substract fees self.balance -= self.crypto_bought * current_price self.crypto_held += self.crypto_bought self.trades.append({ 'Date': Date, 'High': High, 'Low': Low, 'total': self.crypto_bought, 'type': "buy", 'current_price': current_price }) self.episode_orders += 1 elif (action == 2) and (self.crypto_held * current_price > self.initial_balance * 0.05): # Sell 100% of current crypto held self.crypto_sold = self.crypto_held self.crypto_sold *= (1 - self.fees) # substract fees self.balance += self.crypto_sold * current_price self.crypto_held -= self.crypto_sold self.trades.append({ 'Date': Date, 'High': High, 'Low': Low, 'total': self.crypto_sold, 'type': "sell", 'current_price': current_price }) self.episode_orders += 1 self.prev_net_worth = self.net_worth self.net_worth = self.balance + self.crypto_held * current_price self.orders_history.append([ self.balance / self.normalize_value, self.net_worth / self.normalize_value, self.crypto_bought / self.normalize_value, self.crypto_sold / self.normalize_value, self.crypto_held / self.normalize_value ]) # Receive calculated reward reward = self.get_reward() if self.net_worth <= self.initial_balance / 2: done = True else: done = False obs = self.next_observation() return obs, reward, done # # Calculate reward def get_reward(self): if self.episode_orders > 1 and self.episode_orders > self.prev_episode_orders: self.prev_episode_orders = self.episode_orders if self.trades[-1]['type'] == "buy" and self.trades[-2][ 'type'] == "sell": reward = self.trades[-2]['total'] * self.trades[-2][ 'current_price'] - self.trades[-2]['total'] * self.trades[ -1]['current_price'] self.trades[-1]["Reward"] = reward return reward elif self.trades[-1]['type'] == "sell" and self.trades[-2][ 'type'] == "buy": reward = self.trades[-1]['total'] * self.trades[-1][ 'current_price'] - self.trades[-2]['total'] * self.trades[ -2]['current_price'] self.trades[-1]["Reward"] = reward return reward else: return 0 # # render environment def render(self, visualize=False): #print(f'Step: {self.current_step}, Net Worth: {self.net_worth}') if visualize: # Render the environment to the screen img = self.visualization.render(self.df.loc[self.current_step], self.net_worth, self.trades) return img