def test_policy(self, symbol="IBM", start_date=dt.datetime(2010, 1, 1), end_date=dt.datetime(2011, 12, 31), start_val=10000): """Use the existing policy and test it against new data. Parameters: symbol: The stock symbol to act on start_date: A datetime object that represents the start date end_date: A datetime object that represents the end date start_val: Start value of the portfolio which contains only the symbol Returns: df_trades: A dataframe whose values represent trades for each day: +1000 indicating a BUY of 1000 shares, and -1000 indicating a SELL of 1000 shares """ dates = pd.date_range(start_date, end_date) # Get adjusted close prices for symbol df_prices = get_data([symbol], dates) # Get features and thresholds df_features = self.get_features(df_prices[symbol]) thresholds = self.get_thresholds(df_features, self.num_steps) # Initial position is holding nothing position = self.CASH # Create a series that captures order signals based on actions taken orders = pd.Series(index=df_features.index) # Iterate over the data by date for date in df_features.index: # Get a state; add 1 to position so that states >= 0 state = self.discretize(df_features.loc[date], position + 1, thresholds) action = self.q_learner.query_set_state(state) # On the last day, close any open positions if date == df_features.index[-1]: new_pos = -position else: new_pos = self.get_position(position, action - 1) # Add new_pos to orders orders.loc[date] = new_pos # Update current position position += new_pos # Create a trade dataframe df_trades = create_df_trades(orders, symbol, self.num_shares) return df_trades
def add_evidence(self, symbol="IBM", start_date=dt.datetime(2008,1,1), end_date=dt.datetime(2009,12,31), start_val = 10000): """Create a QLearner, and train it for trading. Parameters: symbol: The stock symbol to act on start_date: A datetime object that represents the start date end_date: A datetime object that represents the end date start_val: Start value of the portfolio which contains only the symbol """ dates = pd.date_range(start_date, end_date) # Get adjusted close prices for symbol df_prices = get_data([symbol], dates) # Get features and thresholds df_features = self.get_features(df_prices[symbol]) thresholds = self.get_thresholds(df_features, self.num_steps) cum_returns = [] for epoch in range(1, self.epochs + 1): # Initial position is holding nothing position = self.CASH # Create a series that captures order signals based on actions taken orders = pd.Series(index=df_features.index) # Iterate over the data by date for day, date in enumerate(df_features.index): # Get a state; add 1 to position so that states >= 0 state = self.discretize(df_features.loc[date], position + 1, thresholds) # On the first day, get an action without updating the Q-table if date == df_features.index[0]: # Get the first action based on nothing # action = self.q_learner.act(state) action = self.q_learner.act(state, 0.0, update=False) # On other days, calculate the reward and update the Q-table else: prev_price = df_prices[symbol].iloc[day-1] curr_price = df_prices[symbol].loc[date] reward = self.get_daily_reward(prev_price, curr_price, position) action = self.q_learner.act(state, reward, update=True, done=date==df_features.index[-1]) # On the last day, close any open positions if date == df_features.index[-1]: new_pos = -position else: new_pos = self.get_position(position, action - 1) # Add new_pos to orders orders.loc[date] = new_pos # Update current position position += new_pos self.q_learner.replay(batch_size=32) df_trades = create_df_trades(orders, symbol, self.num_shares) portvals = compute_portvals_single_symbol(df_orders=df_trades, symbol=symbol, start_val=start_val, commission=self.commission, impact=self.impact) cum_return = get_portfolio_stats(portvals)[0] cum_returns.append(cum_return) if self.verbose: print (epoch, cum_return) # Check for convergence after running for at least 20 epochs if epoch > 20: # Stop if the cum_return doesn't improve for 10 epochs if self.has_converged(cum_returns): break if self.verbose: sns.heatmap(self.q_learner.Q, cmap='Blues') plt.plot(cum_returns) plt.xlabel("Epoch") plt.ylabel("Cumulative return (%)") plt.show()
def add_evidence(self, df_prices, symbol="IBM", start_val=100000): """Create a QLearner, and train it for trading. Parameters: df_prices: Data price dataframe symbol: The stock symbol to act on start_val: Start value of the portfolio which contains only the symbol """ # Get features and thresholds df_features = self.get_features(df_prices['Adj Close']) thresholds = self.get_thresholds(df_features, self.num_steps) cum_returns = [] epochs = [] for epoch in range(1, self.epochs + 1): # Initial position is holding nothing position = self.CASH # Create a series that captures order signals based on actions taken orders = pd.Series(index=df_features.index) # Iterate over the data by date for day, date in enumerate(df_features.index): # Get a state; add 1 to position so that states >= 0 state = self.discretize(df_features.loc[date], position + 1, thresholds) # On the first day, get an action without updating the Q-table if date == df_features.index[0]: action = self.q_learner.query_set_state(state) # On other days, calculate the reward and update the Q-table else: prev_price = df_prices['Adj Close'].iloc[day - 1] curr_price = df_prices['Adj Close'].loc[date] reward = self.get_daily_reward(prev_price, curr_price, position) action = self.q_learner.query(state, reward) # On the last day, close any open positions if date == df_features.index[-1]: new_pos = -position else: new_pos = self.get_position(position, action - 1) # Add new_pos to orders orders.loc[date] = new_pos # Update current position position += new_pos df_trades = create_df_trades(orders, symbol, self.num_shares) portvals = compute_portvals_single_symbol( df_orders=df_trades, symbol=symbol, start_val=start_val, commission=self.commission, impact=self.impact) cum_return = get_portfolio_stats(portvals)[0] cum_returns.append(cum_return) epochs.append(epoch) if self.verbose: print(epoch, cum_return) # Check for convergence after running for at least 20 epochs if epoch > 10: # Stop if the cum_return doesn't improve for 10 epochs if self.has_converged(cum_returns): break if self.verbose: return plot_cum_return(epochs, cum_returns)