def reset(self, env_steps_size=0):
        self.visualization = TradingGraph(
            Render_range=self.Render_range,
            Show_reward=self.Show_reward,
            Show_indicators=self.Show_indicators)  # init visualization
        self.trades = deque(maxlen=self.Render_range
                            )  # limited orders memory for visualization

        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.prev_net_worth = self.initial_balance
        self.crypto_held = 0
        self.crypto_sold = 0
        self.crypto_bought = 0
        self.episode_orders = 0  # track episode orders count
        self.prev_episode_orders = 0  # track previous episode orders count
        self.rewards = deque(maxlen=self.Render_range)
        self.env_steps_size = env_steps_size
        self.punish_value = 0
        if env_steps_size > 0:  # used for training dataset
            self.start_step = random.randint(
                self.lookback_window_size,
                self.df_total_steps - env_steps_size)
            self.end_step = self.start_step + env_steps_size
        else:  # used for testing dataset
            self.start_step = self.lookback_window_size
            self.end_step = self.df_total_steps

        self.current_step = self.start_step

        for i in reversed(range(self.lookback_window_size)):
            current_step = self.current_step - i
            self.orders_history.append([
                self.balance / self.normalize_value,
                self.net_worth / self.normalize_value,
                self.crypto_bought / self.normalize_value,
                self.crypto_sold / self.normalize_value,
                self.crypto_held / self.normalize_value
            ])

            # one line for loop to fill market history withing reset call
            self.market_history.append([
                self.df_normalized.loc[current_step, column]
                for column in self.columns
            ])

        state = np.concatenate((self.orders_history, self.market_history),
                               axis=1)

        return state
    def reset(self, env_steps_size=0):
        self.visualization = TradingGraph(
            Render_range=self.Render_range,
            Show_reward=self.Show_reward)  # init visualization
        self.trades = deque(maxlen=self.Render_range
                            )  # limited orders memory for visualization

        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.prev_net_worth = self.initial_balance
        self.crypto_held = 0
        self.crypto_sold = 0
        self.crypto_bought = 0
        self.episode_orders = 0  # track episode orders count
        self.prev_episode_orders = 0  # track previous episode orders count
        self.rewards = deque(maxlen=self.Render_range)
        self.env_steps_size = env_steps_size
        self.punish_value = 0
        if env_steps_size > 0:  # used for training dataset
            self.start_step = random.randint(
                self.lookback_window_size,
                self.df_total_steps - env_steps_size)
            self.end_step = self.start_step + env_steps_size
        else:  # used for testing dataset
            self.start_step = self.lookback_window_size
            self.end_step = self.df_total_steps

        self.current_step = self.start_step

        for i in reversed(range(self.lookback_window_size)):
            current_step = self.current_step - i
            self.orders_history.append([
                self.balance, self.net_worth, self.crypto_bought,
                self.crypto_sold, self.crypto_held
            ])
            self.market_history.append([
                self.df.loc[current_step, 'Open'],
                self.df.loc[current_step, 'High'], self.df.loc[current_step,
                                                               'Low'],
                self.df.loc[current_step, 'Close'], self.df.loc[current_step,
                                                                'Volume']
            ])

        state = np.concatenate((self.market_history, self.orders_history),
                               axis=1)
        return state
Ejemplo n.º 3
0
class CustomEnv:
    # A custom Bitcoin trading environment
    def __init__(self,
                 df,
                 initial_balance=1000,
                 lookback_window_size=50,
                 Render_range=100,
                 Show_reward=False,
                 Show_indicators=False,
                 normalize_value=40000):
        # Define action space and state size and other custom parameters
        self.df = df.dropna().reset_index()
        self.df_total_steps = len(self.df) - 1
        self.initial_balance = initial_balance
        self.lookback_window_size = lookback_window_size
        self.Render_range = Render_range  # render range in visualization
        self.Show_reward = Show_reward  # show order reward in rendered visualization
        self.Show_indicators = Show_indicators  # show main indicators in rendered visualization

        # Orders history contains the balance, net_worth, crypto_bought, crypto_sold, crypto_held values for the last lookback_window_size steps
        self.orders_history = deque(maxlen=self.lookback_window_size)

        # Market history contains the OHCL values for the last lookback_window_size prices
        self.market_history = deque(maxlen=self.lookback_window_size)

        self.indicators_history = deque(maxlen=self.lookback_window_size)

        self.normalize_value = normalize_value

    # Reset the state of the environment to an initial state
    def reset(self, env_steps_size=0):
        self.visualization = TradingGraph(
            Render_range=self.Render_range,
            Show_reward=self.Show_reward,
            Show_indicators=self.Show_indicators)  # init visualization
        self.trades = deque(maxlen=self.Render_range
                            )  # limited orders memory for visualization

        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.prev_net_worth = self.initial_balance
        self.crypto_held = 0
        self.crypto_sold = 0
        self.crypto_bought = 0
        self.episode_orders = 0  # track episode orders count
        self.prev_episode_orders = 0  # track previous episode orders count
        self.rewards = deque(maxlen=self.Render_range)
        self.env_steps_size = env_steps_size
        self.punish_value = 0
        if env_steps_size > 0:  # used for training dataset
            self.start_step = random.randint(
                self.lookback_window_size,
                self.df_total_steps - env_steps_size)
            self.end_step = self.start_step + env_steps_size
        else:  # used for testing dataset
            self.start_step = self.lookback_window_size
            self.end_step = self.df_total_steps

        self.current_step = self.start_step

        for i in reversed(range(self.lookback_window_size)):
            current_step = self.current_step - i
            self.orders_history.append([
                self.balance, self.net_worth, self.crypto_bought,
                self.crypto_sold, self.crypto_held
            ])

            self.market_history.append([
                self.df.loc[current_step, 'Open'],
                self.df.loc[current_step, 'High'],
                self.df.loc[current_step, 'Low'],
                self.df.loc[current_step, 'Close'],
                self.df.loc[current_step, 'Volume'],
            ])

            self.indicators_history.append([
                self.df.loc[current_step, 'sma7'] / self.normalize_value,
                self.df.loc[current_step, 'sma25'] / self.normalize_value,
                self.df.loc[current_step, 'sma99'] / self.normalize_value,
                self.df.loc[current_step, 'bb_bbm'] / self.normalize_value,
                self.df.loc[current_step, 'bb_bbh'] / self.normalize_value,
                self.df.loc[current_step, 'bb_bbl'] / self.normalize_value,
                self.df.loc[current_step, 'psar'] / self.normalize_value,
                self.df.loc[current_step, 'MACD'] / 400,
                self.df.loc[current_step, 'RSI'] / 100
            ])

        state = np.concatenate((self.market_history, self.orders_history),
                               axis=1) / self.normalize_value
        state = np.concatenate((state, self.indicators_history), axis=1)

        return state

    # Get the data points for the given current_step
    def _next_observation(self):
        self.market_history.append([
            self.df.loc[self.current_step, 'Open'],
            self.df.loc[self.current_step, 'High'],
            self.df.loc[self.current_step, 'Low'],
            self.df.loc[self.current_step, 'Close'],
            self.df.loc[self.current_step, 'Volume'],
        ])

        self.indicators_history.append([
            self.df.loc[self.current_step, 'sma7'] / self.normalize_value,
            self.df.loc[self.current_step, 'sma25'] / self.normalize_value,
            self.df.loc[self.current_step, 'sma99'] / self.normalize_value,
            self.df.loc[self.current_step, 'bb_bbm'] / self.normalize_value,
            self.df.loc[self.current_step, 'bb_bbh'] / self.normalize_value,
            self.df.loc[self.current_step, 'bb_bbl'] / self.normalize_value,
            self.df.loc[self.current_step, 'psar'] / self.normalize_value,
            self.df.loc[self.current_step, 'MACD'] / 400,
            self.df.loc[self.current_step, 'RSI'] / 100
        ])

        obs = np.concatenate((self.market_history, self.orders_history),
                             axis=1) / self.normalize_value
        obs = np.concatenate((obs, self.indicators_history), axis=1)

        return obs

    # Execute one time step within the environment
    def step(self, action):
        self.crypto_bought = 0
        self.crypto_sold = 0
        self.current_step += 1

        # Set the current price to a random price between open and close
        #current_price = random.uniform(
        #    self.df.loc[self.current_step, 'Open'],
        #    self.df.loc[self.current_step, 'Close'])
        current_price = self.df.loc[self.current_step, 'Open']
        Date = self.df.loc[self.current_step, 'Date']  # for visualization
        High = self.df.loc[self.current_step, 'High']  # for visualization
        Low = self.df.loc[self.current_step, 'Low']  # for visualization

        if action == 0:  # Hold
            pass

        elif action == 1 and self.balance > self.initial_balance / 100:
            # Buy with 100% of current balance
            self.crypto_bought = self.balance / current_price
            self.balance -= self.crypto_bought * current_price
            self.crypto_held += self.crypto_bought
            self.trades.append({
                'Date': Date,
                'High': High,
                'Low': Low,
                'total': self.crypto_bought,
                'type': "buy",
                'current_price': current_price
            })
            self.episode_orders += 1

        elif action == 2 and self.crypto_held > 0:
            # Sell 100% of current crypto held
            self.crypto_sold = self.crypto_held
            self.balance += self.crypto_sold * current_price
            self.crypto_held -= self.crypto_sold
            self.trades.append({
                'Date': Date,
                'High': High,
                'Low': Low,
                'total': self.crypto_sold,
                'type': "sell",
                'current_price': current_price
            })
            self.episode_orders += 1

        self.prev_net_worth = self.net_worth
        self.net_worth = self.balance + self.crypto_held * current_price

        self.orders_history.append([
            self.balance, self.net_worth, self.crypto_bought, self.crypto_sold,
            self.crypto_held
        ])

        # Receive calculated reward
        reward = self.get_reward()

        if self.net_worth <= self.initial_balance / 2:
            done = True
        else:
            done = False

        obs = self._next_observation()

        return obs, reward, done

    # Calculate reward
    def get_reward(self):
        self.punish_value += self.net_worth * 0.00001
        if self.episode_orders > 1 and self.episode_orders > self.prev_episode_orders:
            self.prev_episode_orders = self.episode_orders
            if self.trades[-1]['type'] == "buy" and self.trades[-2][
                    'type'] == "sell":
                reward = self.trades[-2]['total'] * self.trades[-2][
                    'current_price'] - self.trades[-2]['total'] * self.trades[
                        -1]['current_price']
                reward -= self.punish_value
                self.punish_value = 0
                self.trades[-1]["Reward"] = reward
                return reward
            elif self.trades[-1]['type'] == "sell" and self.trades[-2][
                    'type'] == "buy":
                reward = self.trades[-1]['total'] * self.trades[-1][
                    'current_price'] - self.trades[-2]['total'] * self.trades[
                        -2]['current_price']
                reward -= self.punish_value
                self.punish_value = 0
                self.trades[-1]["Reward"] = reward
                return reward
        else:
            return 0 - self.punish_value

    # render environment
    def render(self, visualize=False):
        #print(f'Step: {self.current_step}, Net Worth: {self.net_worth}')
        if visualize:
            # Render the environment to the screen
            img = self.visualization.render(self.df.loc[self.current_step],
                                            self.net_worth, self.trades)
            return img
class CustomEnv:
    # A custom Bitcoin trading environment
    def __init__(self,
                 df,
                 initial_balance=1000,
                 lookback_window_size=50,
                 Render_range=100):
        # Define action space and state size and other custom parameters
        self.df = df.dropna().reset_index()
        self.df_total_steps = len(self.df) - 1
        self.initial_balance = initial_balance
        self.lookback_window_size = lookback_window_size
        self.Render_range = Render_range  # render range in visualization

        # Action space from 0 to 3, 0 is hold, 1 is buy, 2 is sell
        self.action_space = np.array([0, 1, 2])

        # Orders history contains the balance, net_worth, crypto_bought, crypto_sold, crypto_held values for the last lookback_window_size steps
        self.orders_history = deque(maxlen=self.lookback_window_size)

        # Market history contains the OHCL values for the last lookback_window_size prices
        self.market_history = deque(maxlen=self.lookback_window_size)

        # State size contains Market+Orders history for the last lookback_window_size steps
        self.state_size = (self.lookback_window_size, 10)

    # Reset the state of the environment to an initial state
    def reset(self, env_steps_size=0):
        self.visualization = TradingGraph(
            Render_range=self.Render_range)  # init visualization
        self.trades = deque(maxlen=self.Render_range
                            )  # limited orders memory for visualization

        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.prev_net_worth = self.initial_balance
        self.crypto_held = 0
        self.crypto_sold = 0
        self.crypto_bought = 0
        if env_steps_size > 0:  # used for training dataset
            self.start_step = random.randint(
                self.lookback_window_size,
                self.df_total_steps - env_steps_size)
            self.end_step = self.start_step + env_steps_size
        else:  # used for testing dataset
            self.start_step = self.lookback_window_size
            self.end_step = self.df_total_steps

        self.current_step = self.start_step

        for i in reversed(range(self.lookback_window_size)):
            current_step = self.current_step - i
            self.orders_history.append([
                self.balance, self.net_worth, self.crypto_bought,
                self.crypto_sold, self.crypto_held
            ])
            self.market_history.append([
                self.df.loc[current_step, 'Open'],
                self.df.loc[current_step, 'High'], self.df.loc[current_step,
                                                               'Low'],
                self.df.loc[current_step, 'Close'], self.df.loc[current_step,
                                                                'Volume']
            ])

        state = np.concatenate((self.market_history, self.orders_history),
                               axis=1)
        return state

    # Get the data points for the given current_step
    def _next_observation(self):
        self.market_history.append([
            self.df.loc[self.current_step,
                        'Open'], self.df.loc[self.current_step, 'High'],
            self.df.loc[self.current_step,
                        'Low'], self.df.loc[self.current_step, 'Close'],
            self.df.loc[self.current_step, 'Volume']
        ])
        obs = np.concatenate((self.market_history, self.orders_history),
                             axis=1)
        return obs

    # Execute one time step within the environment
    def step(self, action):
        self.crypto_bought = 0
        self.crypto_sold = 0
        self.current_step += 1

        # Set the current price to a random price between open and close
        current_price = random.uniform(self.df.loc[self.current_step, 'Open'],
                                       self.df.loc[self.current_step, 'Close'])
        Date = self.df.loc[self.current_step, 'Date']  # for visualization
        High = self.df.loc[self.current_step, 'High']  # for visualization
        Low = self.df.loc[self.current_step, 'Low']  # for visualization

        if action == 0:  # Hold
            pass

        elif action == 1 and self.balance > self.initial_balance / 100:
            # Buy with 100% of current balance
            self.crypto_bought = self.balance / current_price
            self.balance -= self.crypto_bought * current_price
            self.crypto_held += self.crypto_bought
            self.trades.append({
                'Date': Date,
                'High': High,
                'Low': Low,
                'total': self.crypto_bought,
                'type': "buy"
            })

        elif action == 2 and self.crypto_held > 0:
            # Sell 100% of current crypto held
            self.crypto_sold = self.crypto_held
            self.balance += self.crypto_sold * current_price
            self.crypto_held -= self.crypto_sold
            self.trades.append({
                'Date': Date,
                'High': High,
                'Low': Low,
                'total': self.crypto_sold,
                'type': "sell"
            })

        self.prev_net_worth = self.net_worth
        self.net_worth = self.balance + self.crypto_held * current_price

        self.orders_history.append([
            self.balance, self.net_worth, self.crypto_bought, self.crypto_sold,
            self.crypto_held
        ])
        #Write_to_file(Date, self.orders_history[-1])

        # Calculate reward
        reward = self.net_worth - self.prev_net_worth

        if self.net_worth <= self.initial_balance / 2:
            done = True
        else:
            done = False

        obs = self._next_observation()

        return obs, reward, done

    # render environment
    def render(self, visualize=False):
        #print(f'Step: {self.current_step}, Net Worth: {self.net_worth}')
        if visualize:
            Date = self.df.loc[self.current_step, 'Date']
            Open = self.df.loc[self.current_step, 'Open']
            Close = self.df.loc[self.current_step, 'Close']
            High = self.df.loc[self.current_step, 'High']
            Low = self.df.loc[self.current_step, 'Low']
            Volume = self.df.loc[self.current_step, 'Volume']

            # Render the environment to the screen
            self.visualization.render(Date, Open, High, Low, Close, Volume,
                                      self.net_worth, self.trades)
class CustomEnv:
    # A custom Bitcoin trading environment
    def __init__(self,
                 df,
                 initial_balance=1000,
                 lookback_window_size=50,
                 Render_range=100):
        # Define action space and state size and other custom parameters
        self.df = df.dropna().reset_index()
        self.df_total_steps = len(self.df) - 1
        self.initial_balance = initial_balance
        self.lookback_window_size = lookback_window_size
        self.Render_range = Render_range  # render range in visualization

        # Action space from 0 to 3, 0 is hold, 1 is buy, 2 is sell
        self.action_space = np.array([0, 1, 2])

        # Orders history contains the balance, net_worth, crypto_bought, crypto_sold, crypto_held values for the last lookback_window_size steps
        self.orders_history = deque(maxlen=self.lookback_window_size)

        # Market history contains the OHCL values for the last lookback_window_size prices
        self.market_history = deque(maxlen=self.lookback_window_size)

        # State size contains Market+Orders history for the last lookback_window_size steps
        self.state_size = (self.lookback_window_size, 10)

        # Neural Networks part bellow
        self.lr = 0.00001
        self.epochs = 1
        self.normalize_value = 100000
        self.optimizer = Adam

        # Create Actor-Critic network model
        self.Actor = Actor_Model(input_shape=self.state_size,
                                 action_space=self.action_space.shape[0],
                                 lr=self.lr,
                                 optimizer=self.optimizer)
        self.Critic = Critic_Model(input_shape=self.state_size,
                                   action_space=self.action_space.shape[0],
                                   lr=self.lr,
                                   optimizer=self.optimizer)

    # create tensorboard writer
    def create_writer(self):
        self.replay_count = 0
        self.writer = SummaryWriter(comment="Crypto_trader")

    # Reset the state of the environment to an initial state
    def reset(self, env_steps_size=0):
        self.visualization = TradingGraph(
            Render_range=self.Render_range)  # init visualization
        self.trades = deque(maxlen=self.Render_range
                            )  # limited orders memory for visualization

        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.prev_net_worth = self.initial_balance
        self.crypto_held = 0
        self.crypto_sold = 0
        self.crypto_bought = 0
        self.episode_orders = 0  # test
        self.env_steps_size = env_steps_size
        if env_steps_size > 0:  # used for training dataset
            self.start_step = random.randint(
                self.lookback_window_size,
                self.df_total_steps - env_steps_size)
            self.end_step = self.start_step + env_steps_size
        else:  # used for testing dataset
            self.start_step = self.lookback_window_size
            self.end_step = self.df_total_steps

        self.current_step = self.start_step

        for i in reversed(range(self.lookback_window_size)):
            current_step = self.current_step - i
            self.orders_history.append([
                self.balance, self.net_worth, self.crypto_bought,
                self.crypto_sold, self.crypto_held
            ])
            self.market_history.append([
                self.df.loc[current_step, 'Open'],
                self.df.loc[current_step, 'High'], self.df.loc[current_step,
                                                               'Low'],
                self.df.loc[current_step, 'Close'], self.df.loc[current_step,
                                                                'Volume']
            ])

        state = np.concatenate((self.market_history, self.orders_history),
                               axis=1)
        return state

    # Get the data points for the given current_step
    def _next_observation(self):
        self.market_history.append([
            self.df.loc[self.current_step,
                        'Open'], self.df.loc[self.current_step, 'High'],
            self.df.loc[self.current_step,
                        'Low'], self.df.loc[self.current_step, 'Close'],
            self.df.loc[self.current_step, 'Volume']
        ])
        obs = np.concatenate((self.market_history, self.orders_history),
                             axis=1)
        return obs

    # Execute one time step within the environment
    def step(self, action):
        self.crypto_bought = 0
        self.crypto_sold = 0
        self.current_step += 1

        # Set the current price to a random price between open and close
        current_price = random.uniform(self.df.loc[self.current_step, 'Open'],
                                       self.df.loc[self.current_step, 'Close'])
        Date = self.df.loc[self.current_step, 'Date']  # for visualization
        High = self.df.loc[self.current_step, 'High']  # for visualization
        Low = self.df.loc[self.current_step, 'Low']  # for visualization

        if action == 0:  # Hold
            pass

        elif action == 1 and self.balance > self.initial_balance / 100:
            # Buy with 100% of current balance
            self.crypto_bought = self.balance / current_price
            self.balance -= self.crypto_bought * current_price
            self.crypto_held += self.crypto_bought
            self.trades.append({
                'Date': Date,
                'High': High,
                'Low': Low,
                'total': self.crypto_bought,
                'type': "buy"
            })
            self.episode_orders += 1

        elif action == 2 and self.crypto_held > 0:
            # Sell 100% of current crypto held
            self.crypto_sold = self.crypto_held
            self.balance += self.crypto_sold * current_price
            self.crypto_held -= self.crypto_sold
            self.trades.append({
                'Date': Date,
                'High': High,
                'Low': Low,
                'total': self.crypto_sold,
                'type': "sell"
            })
            self.episode_orders += 1

        self.prev_net_worth = self.net_worth
        self.net_worth = self.balance + self.crypto_held * current_price

        self.orders_history.append([
            self.balance, self.net_worth, self.crypto_bought, self.crypto_sold,
            self.crypto_held
        ])
        #Write_to_file(Date, self.orders_history[-1])

        # Calculate reward
        reward = self.net_worth - self.prev_net_worth

        if self.net_worth <= self.initial_balance / 2:
            done = True
        else:
            done = False

        obs = self._next_observation() / self.normalize_value

        return obs, reward, done

    # render environment
    def render(self, visualize=False):
        #print(f'Step: {self.current_step}, Net Worth: {self.net_worth}')
        if visualize:
            Date = self.df.loc[self.current_step, 'Date']
            Open = self.df.loc[self.current_step, 'Open']
            Close = self.df.loc[self.current_step, 'Close']
            High = self.df.loc[self.current_step, 'High']
            Low = self.df.loc[self.current_step, 'Low']
            Volume = self.df.loc[self.current_step, 'Volume']

            # Render the environment to the screen
            self.visualization.render(Date, Open, High, Low, Close, Volume,
                                      self.net_worth, self.trades)

    def get_gaes(self,
                 rewards,
                 dones,
                 values,
                 next_values,
                 gamma=0.99,
                 lamda=0.95,
                 normalize=True):
        deltas = [
            r + gamma * (1 - d) * nv - v
            for r, d, nv, v in zip(rewards, dones, next_values, values)
        ]
        deltas = np.stack(deltas)
        gaes = copy.deepcopy(deltas)
        for t in reversed(range(len(deltas) - 1)):
            gaes[t] = gaes[t] + (1 - dones[t]) * gamma * lamda * gaes[t + 1]

        target = gaes + values
        if normalize:
            gaes = (gaes - gaes.mean()) / (gaes.std() + 1e-8)
        return np.vstack(gaes), np.vstack(target)

    def replay(self, states, actions, rewards, predictions, dones,
               next_states):
        # reshape memory to appropriate shape for training
        states = np.vstack(states)
        next_states = np.vstack(next_states)
        actions = np.vstack(actions)
        predictions = np.vstack(predictions)

        # Compute discounted rewards
        #discounted_r = np.vstack(self.discount_rewards(rewards))

        # Get Critic network predictions
        values = self.Critic.predict(states)
        next_values = self.Critic.predict(next_states)
        # Compute advantages
        #advantages = discounted_r - values
        advantages, target = self.get_gaes(rewards, dones, np.squeeze(values),
                                           np.squeeze(next_values))
        '''
        pylab.plot(target,'-')
        pylab.plot(advantages,'.')
        ax=pylab.gca()
        ax.grid(True)
        pylab.show()
        '''
        # stack everything to numpy array
        y_true = np.hstack([advantages, predictions, actions])

        # training Actor and Critic networks
        a_loss = self.Actor.Actor.fit(states,
                                      y_true,
                                      epochs=self.epochs,
                                      verbose=0,
                                      shuffle=True)
        c_loss = self.Critic.Critic.fit(states,
                                        target,
                                        epochs=self.epochs,
                                        verbose=0,
                                        shuffle=True)

        self.writer.add_scalar('Data/actor_loss_per_replay',
                               np.sum(a_loss.history['loss']),
                               self.replay_count)
        self.writer.add_scalar('Data/critic_loss_per_replay',
                               np.sum(c_loss.history['loss']),
                               self.replay_count)
        self.replay_count += 1

    def act(self, state):
        # Use the network to predict the next action to take, using the model
        prediction = self.Actor.predict(np.expand_dims(state, axis=0))[0]
        action = np.random.choice(self.action_space, p=prediction)
        return action, prediction

    def save(self, name="Crypto_trader"):
        # save keras model weights
        self.Actor.Actor.save_weights(f"{name}_Actor.h5")
        self.Critic.Critic.save_weights(f"{name}_Critic.h5")

    def load(self, name="Crypto_trader"):
        # load keras model weights
        self.Actor.Actor.load_weights(f"{name}_Actor.h5")
        self.Critic.Critic.load_weights(f"{name}_Critic.h5")
Ejemplo n.º 6
0
class CustomEnv:
    # A custom Bitcoin trading environment
    def __init__(self, df, df_normalized, lookback_window_size, **kwargs):
        # Define action space and state size and other custom parameters
        self.df = df.reset_index(
        )  #.reset_index()#.dropna().copy().reset_index()
        self.df_normalized = df_normalized.reset_index(
        )  #.reset_index()#.copy().dropna().reset_index()
        self.lookback_window_size = lookback_window_size
        self.initial_balance = kwargs.get("initial_balance", 1000)
        self.render_range = kwargs.get("render_range",
                                       100)  # render range in visualization
        self.show_reward = kwargs.get(
            "show_reward",
            False)  # show order reward in rendered visualization
        self.show_indicators = kwargs.get(
            "show_indicators",
            False)  # show main indicators in rendered visualization

        self.df_total_steps = len(self.df) - 1
        # Orders history contains the balance, net_worth, crypto_bought, crypto_sold, crypto_held values for the last lookback_window_size steps
        self.orders_history = deque(maxlen=self.lookback_window_size)
        # Market history contains the OHCL values for the last lookback_window_size prices
        self.market_history = deque(maxlen=self.lookback_window_size)

        self.normalize_value = kwargs.get("normalize_value", 40000)

        self.fees = 0.002  # default Binance 0.1% order fees

        self.columns = list(self.df_normalized.columns[2:])

#     # Reset the state of the environment to an initial state

    def reset(self, env_steps_size=0):
        self.visualization = TradingGraph(
            render_range=self.render_range,
            show_reward=self.show_reward,
            show_indicators=self.show_indicators)  # init visualization
        self.trades = deque(maxlen=self.render_range
                            )  # limited orders memory for visualization
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.prev_net_worth = self.initial_balance
        self.crypto_held = 0
        self.crypto_sold = 0
        self.crypto_bought = 0
        self.episode_orders = 0  # track episode orders count
        self.prev_episode_orders = 0  # track previous episode orders count
        self.rewards = deque(maxlen=self.render_range)
        self.env_steps_size = env_steps_size
        self.punish_value = 0
        if env_steps_size > 0:  # used for training dataset
            self.start_step = np.random.randint(
                self.lookback_window_size,
                self.df_total_steps - env_steps_size)
            self.end_step = self.start_step + env_steps_size
        else:  # used for testing dataset
            self.start_step = self.lookback_window_size
            self.end_step = self.df_total_steps

        self.current_step = self.start_step
        for i in reversed(range(self.lookback_window_size)):
            current_step = self.current_step - i
            self.orders_history.append([
                self.balance / self.normalize_value,
                self.net_worth / self.normalize_value,
                self.crypto_bought / self.normalize_value,
                self.crypto_sold / self.normalize_value,
                self.crypto_held / self.normalize_value
            ])

            # one line for loop to fill market history withing reset call
            self.market_history.append([
                self.df_normalized.loc[current_step, column]
                for column in self.columns
            ])
        state = np.concatenate((self.orders_history, self.market_history),
                               axis=1)

        return state

#     # Get the data points for the given current_step

    def next_observation(self):
        self.market_history.append([
            self.df_normalized.loc[self.current_step, column]
            for column in self.columns
        ])
        obs = np.concatenate((self.orders_history, self.market_history),
                             axis=1)

        return obs

#     # Execute one time step within the environment

    def step(self, action):
        self.crypto_bought = 0
        self.crypto_sold = 0

        self.current_step += 1

        # Set the current price to a random price between open and close
        #current_price = random.uniform(
        #    self.df.loc[self.current_step, 'Open'],
        #    self.df.loc[self.current_step, 'Close'])
        current_price = self.df.loc[self.current_step, 'Open']
        Date = self.df.loc[self.current_step, 'Date']  # for visualization
        High = self.df.loc[self.current_step, 'High']  # for visualization
        Low = self.df.loc[self.current_step, 'Low']  # for visualization
        if action == 0:  # Hold
            pass

        elif (action == 1) and (self.balance > self.initial_balance * 0.05):
            # Buy with 100% of current balance
            self.crypto_bought = self.balance / current_price
            self.crypto_bought *= (1 - self.fees)  # substract fees
            self.balance -= self.crypto_bought * current_price
            self.crypto_held += self.crypto_bought
            self.trades.append({
                'Date': Date,
                'High': High,
                'Low': Low,
                'total': self.crypto_bought,
                'type': "buy",
                'current_price': current_price
            })
            self.episode_orders += 1

        elif (action == 2) and (self.crypto_held * current_price >
                                self.initial_balance * 0.05):
            # Sell 100% of current crypto held
            self.crypto_sold = self.crypto_held
            self.crypto_sold *= (1 - self.fees)  # substract fees
            self.balance += self.crypto_sold * current_price
            self.crypto_held -= self.crypto_sold
            self.trades.append({
                'Date': Date,
                'High': High,
                'Low': Low,
                'total': self.crypto_sold,
                'type': "sell",
                'current_price': current_price
            })
            self.episode_orders += 1

        self.prev_net_worth = self.net_worth
        self.net_worth = self.balance + self.crypto_held * current_price

        self.orders_history.append([
            self.balance / self.normalize_value,
            self.net_worth / self.normalize_value,
            self.crypto_bought / self.normalize_value,
            self.crypto_sold / self.normalize_value,
            self.crypto_held / self.normalize_value
        ])
        # Receive calculated reward
        reward = self.get_reward()

        if self.net_worth <= self.initial_balance / 2:
            done = True
        else:
            done = False

        obs = self.next_observation()

        return obs, reward, done

#     # Calculate reward

    def get_reward(self):
        if self.episode_orders > 1 and self.episode_orders > self.prev_episode_orders:
            self.prev_episode_orders = self.episode_orders
            if self.trades[-1]['type'] == "buy" and self.trades[-2][
                    'type'] == "sell":
                reward = self.trades[-2]['total'] * self.trades[-2][
                    'current_price'] - self.trades[-2]['total'] * self.trades[
                        -1]['current_price']
                self.trades[-1]["Reward"] = reward
                return reward
            elif self.trades[-1]['type'] == "sell" and self.trades[-2][
                    'type'] == "buy":
                reward = self.trades[-1]['total'] * self.trades[-1][
                    'current_price'] - self.trades[-2]['total'] * self.trades[
                        -2]['current_price']
                self.trades[-1]["Reward"] = reward
                return reward
        else:
            return 0

#     # render environment

    def render(self, visualize=False):
        #print(f'Step: {self.current_step}, Net Worth: {self.net_worth}')
        if visualize:
            # Render the environment to the screen
            img = self.visualization.render(self.df.loc[self.current_step],
                                            self.net_worth, self.trades)
            return img