def testPolicy(self, symbol = "IBM", \ sd=dt.datetime(2009,1,1), \ ed=dt.datetime(2010,1,1), \ sv = 10000): syms = [symbol] # here we build a fake set of trades # your code should return the same sort of data syms = [symbol] dates = pd.date_range(sd, ed) prices, prices_SPY = id.get_price(syms, dates) if self.verbose: print prices daily_returns = (prices / prices.shift(1)) - 1 daily_returns = daily_returns[1:] # get indicators and combine them into as a feature data_frame lookback = 14 _, PSR = id.get_SMA(prices, lookback) _, _, bb_indicator = id.get_BB(prices, lookback) momentum = id.get_momentum(prices, lookback) indices = prices.index holdings = pd.DataFrame(np.nan, index=indices, columns=['Holdings']) holdings.iloc[0] = 0 for i in range(1, daily_returns.shape[0]): state = self.indicators_to_state(PSR.iloc[i], bb_indicator.iloc[i], momentum.iloc[i]) # Get action by Query learner with current state and reward to get action action = self.learner.querysetstate(state) #print("SL 286 action is ", action) # Get holdings with the new action. holdings.iloc[i], _ = self.apply_action(holdings.iloc[i - 1][0], action, 0) holdings.ffill(inplace=True) holdings.fillna(0, inplace=True) trades = holdings.diff() trades.iloc[0] = 0 # buy and sell happens when the difference change direction df_trades = pd.DataFrame(data=trades.values, index=trades.index, columns=['Trades']) #print("293: ", df_trades) if self.verbose: print type(df_trades) # it better be a DataFrame! if self.verbose: print trades if self.verbose: print prices return df_trades
def testPolicy(self, symbol, sd, ed, sv=100000): # this policy is like this: buy when the price will go up the next day, sell when the price will do down the next day # get price data dates = pd.date_range(sd, ed) prices_all = get_data([symbol], dates, addSPY=True, colname='Adj Close') prices = prices_all[symbol] # only portfolio symbols # get indicators lookback = 14 _, PSR = id.get_SMA(prices, lookback) _, _, bb_indicator = id.get_BB(prices, lookback) momentum = id.get_momentum(prices, lookback) holdings = pd.DataFrame(np.nan, index=prices.index, columns=['Holdings']) # make sure when PSR (= price / SMA -1) >0.05 and bb_indicator > 1 and momentum > 0.05 SELL or hold -1000 # when PSR (= price / SMA -1) < -0.05 and bb_indicator < -1 and momentum < -0.05 Buy or hold -1000 #print("PSR: ", PSR) #print("momentum: ", momentum) #print("bb_indicator: ", bb_indicator) for t in range(prices.shape[0]): if PSR.iloc[t] < -0.02 and bb_indicator.iloc[ t] < -0.8 and momentum.iloc[t] < -0.03: holdings.iloc[t] = 1000 elif PSR.iloc[t] > 0.02 and bb_indicator.iloc[ t] > 0.8 and momentum.iloc[t] > 0.03: holdings.iloc[t] = -1000 # fill the NAN data holdings.ffill(inplace=True) holdings.fillna(0, inplace=True) trades = holdings.diff() trades.iloc[0] = 0 #trades.iloc[-1] = 0 #trades.columns = 'Trades' # buy and sell happens when the difference change direction df_trades = pd.DataFrame(data=trades.values, index=trades.index, columns=['Trades']) return df_trades
def experiment1(): ms = ManualStrategy.ManualStrategy() sl = StrategyLearner.StrategyLearner() commission = 9.95 impact = 0.005 # in sample start_date = dt.datetime(2008, 1, 1) end_date = dt.datetime(2009, 12, 31) # dates = pd.date_range(start_date, end_date) symbol = 'JPM' sl.addEvidence(symbol=symbol, sd=start_date, ed=end_date, sv=100000, n_bins=5) df_trades_ms = ms.testPolicy(symbol=symbol, sd=start_date, ed=end_date, sv=100000) df_trades_sl = sl.testPolicy(symbol=symbol, sd=start_date, ed=end_date, sv=100000) # generate orders based on trades df_orders_ms, benchmark_orders = ManualStrategy.generate_orders( df_trades_ms, symbol) df_orders_sl, _ = ManualStrategy.generate_orders(df_trades_sl, symbol) port_vals_ms = ManualStrategy.compute_portvals(df_orders_ms, start_val=100000, sd=start_date, ed=end_date, commission=commission, impact=impact) port_vals_sl = ManualStrategy.compute_portvals(df_orders_sl, start_val=100000, sd=start_date, ed=end_date, commission=commission, impact=impact) #benchmark_orders.loc[benchmark_orders.index[1], 'Shares'] = 0 benchmark_vals = ManualStrategy.compute_portvals(benchmark_orders, sd=start_date, ed=end_date, start_val=100000, commission=commission, impact=impact) normed_port_ms = port_vals_ms / port_vals_ms.ix[0] normed_port_sl = port_vals_sl / port_vals_sl.ix[0] normed_bench = benchmark_vals / benchmark_vals.ix[0] dates = pd.date_range(start_date, end_date) prices_all = get_data([symbol], dates, addSPY=True, colname='Adj Close') prices = prices_all[symbol] # only portfolio symbols # get indicators lookback = 14 _, PSR = id.get_SMA(prices, lookback) _, _, bb_indicator = id.get_BB(prices, lookback) momentum = id.get_momentum(prices, lookback) # figure 5. plt.figure(figsize=(12, 6.5)) top = plt.subplot2grid((5, 1), (0, 0), rowspan=3, colspan=1) bottom = plt.subplot2grid((5, 1), (3, 0), rowspan=2, colspan=1, sharex=top) # plot the Long or short action for index, marks in df_trades_sl.iterrows(): if marks['Trades'] > 0: plt.axvline(x=index, color='blue', linestyle='dashed', alpha=.9) elif marks['Trades'] < 0: plt.axvline(x=index, color='black', linestyle='dashed', alpha=.9) else: pass top.xaxis_date() top.grid(True) top.plot(normed_port_sl, lw=2, color='red', label='Q-Learning Strategy') top.plot(normed_port_ms, lw=1.5, color='black', label='Manual Strategy') top.plot(normed_bench, lw=1.2, color='green', label='Benchmark') top.set_title( 'Machine Learning Strategy (MLS), Manual Strategy (MS) - In Sample Analysis' ) top.set_ylabel('Normalized Value') for index, marks in df_trades_sl.iterrows(): if marks['Trades'] > 0: top.axvline(x=index, color='blue', linestyle='dashed', alpha=.9) elif marks['Trades'] < 0: top.axvline(x=index, color='black', linestyle='dashed', alpha=.9) else: pass bottom.plot(momentum, color='olive', lw=1, label="momentum") bottom.plot(PSR, color='purple', lw=1, label="PSR") #bottom.plot(bb_indicator, color='blue', lw=1, label="Bollinger") bottom.set_title('Indicators') bottom.axhline(y=-0.2, color='grey', linestyle='--', alpha=0.5) bottom.axhline(y=0, color='grey', linestyle='--', alpha=0.5) bottom.axhline(y=0.2, color='grey', linestyle='--', alpha=0.5) bottom.legend() top.legend() top.axes.get_xaxis().set_visible(False) plt.xlim(start_date, end_date) filename = '01_MLS_insample.png' plt.savefig(filename) plt.close() port_cr_sl, port_adr_sl, port_stddr_sl, port_sr_sl = ManualStrategy.get_portfolio_stats( port_vals_sl) port_cr_ms, port_adr_ms, port_stddr_ms, port_sr_ms = ManualStrategy.get_portfolio_stats( port_vals_ms) bench_cr, bench_adr, bench_stddr, bench_sr = ManualStrategy.get_portfolio_stats( benchmark_vals) # Compare portfolio against benchmark print "=== Machine Learning Strategy (MLS) V.S. Manual Strategy (MS) In Sample ===" print "Date Range: {} to {}".format(start_date, end_date) print print "Sharpe Ratio of MLS: {}".format(port_sr_sl) print "Sharpe Ratio of MS: {}".format(port_sr_ms) print "Sharpe Ratio of BenchMark : {}".format(bench_sr) print print "Cumulative Return of MLS: {}".format(port_cr_sl) print "Cumulative Return of MS: {}".format(port_cr_ms) print "Cumulative Return of Benchmark : {}".format(bench_cr) print print "Standard Deviation of MLS: {}".format(port_stddr_sl) print "Standard Deviation of MS: {}".format(port_stddr_ms) print "Standard Deviation of Benchmark : {}".format(bench_stddr) print print "Average Daily Return of MLS: {}".format(port_adr_sl) print "Average Daily Return of MS: {}".format(port_adr_ms) print "Average Daily Return of BenchMark : {}".format(bench_adr) print print "Final MLS Portfolio Value: {}".format(port_vals_sl[-1]) print "Final MS Portfolio Value: {}".format(port_vals_ms[-1]) print "Final Benchmark Portfolio Value: {}".format(benchmark_vals[-1]) print # ======================== # OUT OF SAMPLE Analysis # ======================== start_date = dt.datetime(2010, 1, 1) end_date = dt.datetime(2011, 12, 31) # dates = pd.date_range(start_date, end_date) symbol = 'JPM' df_trades_ms = ms.testPolicy(symbol=symbol, sd=start_date, ed=end_date, sv=100000) df_trades_sl = sl.testPolicy(symbol=symbol, sd=start_date, ed=end_date, sv=100000) # generate orders based on trades df_orders_ms, benchmark_orders = ManualStrategy.generate_orders( df_trades_ms, symbol) df_orders_sl, _ = ManualStrategy.generate_orders(df_trades_sl, symbol) port_vals_ms = ManualStrategy.compute_portvals(df_orders_ms, start_val=100000, sd=start_date, ed=end_date, commission=commission, impact=impact) port_vals_sl = ManualStrategy.compute_portvals(df_orders_sl, start_val=100000, sd=start_date, ed=end_date, commission=commission, impact=impact) #benchmark_orders.loc[benchmark_orders.index[1], 'Shares'] = 0 benchmark_vals = ManualStrategy.compute_portvals(benchmark_orders, sd=start_date, ed=end_date, start_val=100000, commission=commission, impact=impact) normed_port_ms = port_vals_ms / port_vals_ms.ix[0] normed_port_sl = port_vals_sl / port_vals_sl.ix[0] normed_bench = benchmark_vals / benchmark_vals.ix[0] dates = pd.date_range(start_date, end_date) prices_all = get_data([symbol], dates, addSPY=True, colname='Adj Close') prices = prices_all[symbol] # only portfolio symbols # get indicators lookback = 14 _, PSR = id.get_SMA(prices, lookback) _, _, bb_indicator = id.get_BB(prices, lookback) momentum = id.get_momentum(prices, lookback) # figure 5. plt.figure(figsize=(12, 6.5)) top = plt.subplot2grid((5, 1), (0, 0), rowspan=3, colspan=1) bottom = plt.subplot2grid((5, 1), (3, 0), rowspan=2, colspan=1, sharex=top) # plot the Long or short action for index, marks in df_trades_sl.iterrows(): if marks['Trades'] > 0: plt.axvline(x=index, color='blue', linestyle='dashed', alpha=.9) elif marks['Trades'] < 0: plt.axvline(x=index, color='black', linestyle='dashed', alpha=.9) else: pass top.xaxis_date() top.grid(True) top.plot(normed_port_sl, lw=2, color='red', label='Q-Learning Strategy') top.plot(normed_port_ms, lw=1.5, color='black', label='Manual Strategy') top.plot(normed_bench, lw=1.2, color='green', label='Benchmark') top.set_title( 'Machine Learning Strategy (MLS) V.S. Manual Strategy (MS) - Out Sample Analysis' ) top.set_ylabel('Normalized Value') for index, marks in df_trades_sl.iterrows(): if marks['Trades'] > 0: top.axvline(x=index, color='blue', linestyle='dashed', alpha=.9) elif marks['Trades'] < 0: top.axvline(x=index, color='black', linestyle='dashed', alpha=.9) else: pass bottom.plot(momentum, color='olive', lw=1, label="momentum") bottom.plot(PSR, color='purple', lw=1, label="PSR") #bottom.plot(bb_indicator, color='blue', lw=1, label="Bollinger") bottom.set_title('Indicators') bottom.axhline(y=-0.2, color='grey', linestyle='--', alpha=0.5) bottom.axhline(y=0, color='grey', linestyle='--', alpha=0.5) bottom.axhline(y=0.2, color='grey', linestyle='--', alpha=0.5) bottom.legend() top.legend() top.axes.get_xaxis().set_visible(False) plt.xlim(start_date, end_date) filename = '02_MLS_outsample.png' plt.savefig(filename) plt.close() port_cr_sl, port_adr_sl, port_stddr_sl, port_sr_sl = ManualStrategy.get_portfolio_stats( port_vals_sl) port_cr_ms, port_adr_ms, port_stddr_ms, port_sr_ms = ManualStrategy.get_portfolio_stats( port_vals_ms) bench_cr, bench_adr, bench_stddr, bench_sr = ManualStrategy.get_portfolio_stats( benchmark_vals) # Compare portfolio against benchmark print "=== Machine Learning Strategy (MLS) V.S. Manual Strategy (MS) OUT Sample ===" print "Date Range: {} to {}".format(start_date, end_date) print print "Sharpe Ratio of MLS: {}".format(port_sr_sl) print "Sharpe Ratio of MS: {}".format(port_sr_ms) print "Sharpe Ratio of BenchMark : {}".format(bench_sr) print print "Cumulative Return of MLS: {}".format(port_cr_sl) print "Cumulative Return of MS: {}".format(port_cr_ms) print "Cumulative Return of Benchmark : {}".format(bench_cr) print print "Standard Deviation of MLS: {}".format(port_stddr_sl) print "Standard Deviation of MS: {}".format(port_stddr_ms) print "Standard Deviation of Benchmark : {}".format(bench_stddr) print print "Average Daily Return of MLS: {}".format(port_adr_sl) print "Average Daily Return of MS: {}".format(port_adr_ms) print "Average Daily Return of BenchMark : {}".format(bench_adr) print print "Final MLS Portfolio Value: {}".format(port_vals_sl[-1]) print "Final MS Portfolio Value: {}".format(port_vals_ms[-1]) print "Final Benchmark Portfolio Value: {}".format(benchmark_vals[-1]) print
def addEvidence(self, symbol = "IBM", \ sd=dt.datetime(2008,1,1), \ ed=dt.datetime(2009,1,1), \ sv = 10000,n_bins=6): # this method should create a QLearner, and train it for trading syms = [symbol] dates = pd.date_range(sd, ed) prices, prices_SPY = id.get_price(syms, dates) if self.verbose: print prices daily_returns = (prices / prices.shift(1)) - 1 daily_returns = daily_returns[1:] # get indicators and combine them into as a feature data_frame lookback = 14 _, PSR = id.get_SMA(prices, lookback) _, _, bb_indicator = id.get_BB(prices, lookback) momentum = id.get_momentum(prices, lookback) _, self.pbins = pd.qcut(PSR, n_bins, labels=False, retbins=True) _, self.bbins = pd.qcut(bb_indicator, n_bins, labels=False, retbins=True) _, self.mbins = pd.qcut(momentum, n_bins, labels=False, retbins=True) self.pbins = self.pbins[1:-1] self.bbins = self.bbins[1:-1] self.mbins = self.mbins[1:-1] # start training converged = False df_trades = None count = 0 old_cum_ret = 0.0 converge_count = 0 converged_prev = False #print "Total number of states is:", total_states # Initialize QLearner, self.learner = ql.QLearner(num_states=100 * n_bins, num_actions=self.num_actions, alpha=0.5, gamma=0.9, rar=0.0, radr=0.0, dyna=0, verbose=self.verbose) while (not converged) and (count < 100): # Set first state to the first data point (first day) indices = daily_returns.index holdings = pd.DataFrame(np.nan, index=indices, columns=['Holdings']) #first_state = self.indicators_to_state(PSR.iloc[0], bb_indicator.iloc[0], momentum.iloc[0]) #print("SL 152: holdings.iloc[0] = ", holdings.iloc[0][0], "; daily_rets.iloc[1] = ", daily_returns.iloc[1][0]) holdings.iloc[0] = 0. #print("SL 153") #df_prices = prices.copy() #df_prices['Cash'] = pd.Series(1.0, index=indices) #df_trades = df_prices.copy() #df_trades[:] = 0.0 state = self.indicators_to_state(PSR.iloc[0], bb_indicator.iloc[0], momentum.iloc[0]) action = self.learner.querysetstate(state) holdings.iloc[0], reward = self.apply_action( holdings.iloc[0][0], action, daily_returns.iloc[1][0]) #print("SL 171: PSR.shape[0] = ",PSR.shape[0],"; daily_returns.shape[0] = ",daily_returns.shape[0]) # Cycle through dates for j in range(1, daily_returns.shape[0]): state = self.indicators_to_state(PSR.iloc[j], bb_indicator.iloc[j], momentum.iloc[j]) # Get action by Query learner with current state and reward to get action action = self.learner.query(state, reward) # update reward and holdings with the new action. holdings.iloc[j], reward = self.apply_action( holdings.iloc[j - 1][0], action, daily_returns.iloc[j][0]) #print("SL 183: holdings.iloc[j][0] = ",holdings.iloc[j][0]) # Implement action returned by learner and update portfolio #print("SL 206: one learning is done.") #print("SL 215, holdings.iloc[0]",holdings.iloc[0]) holdings.iloc[-1] = 0 holdings.ffill(inplace=True) holdings.fillna(0, inplace=True) #print("SL 216 holdings = ",holdings) trades = holdings.diff() trades.iloc[0] = 0 # buy and sell happens when the difference change direction df_trades = pd.DataFrame(data=trades.values, index=indices, columns=['Trades']) df_orders, _ = generate_orders(df_trades, symbol) port_vals = compute_portvals(df_orders, sd=sd, ed=ed, impact=self.impact, start_val=sv, commission=self.commission) cum_ret, _, _, _ = get_portfolio_stats(port_vals) count += 1 old_cum_ret,converged_prev,converge_count,converged = \ check_convergence(old_cum_ret,cum_ret,converged_prev,converge_count) # check if converge #if converged: #print("SL 212: converged at iteration # ",count, "cum_ret is: ", cum_ret) return df_trades # example use with new colname volume_all = ut.get_data(syms, dates, colname="Volume") # automatically adds SPY volume = volume_all[syms] # only portfolio symbols volume_SPY = volume_all['SPY'] # only SPY, for comparison later if self.verbose: print volume
def testPolicy( self, symbol="jpm", sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 12, 31), sv=10000, ): """ Tests your learner using data outside of the training data :param symbol: The stock symbol that you trained on on :type symbol: str :param sd: A datetime object that represents the start date, defaults to 1/1/2008 :type sd: datetime :param ed: A datetime object that represents the end date, defaults to 1/1/2009 :type ed: datetime :param sv: The starting value of the portfolio :type sv: int :return: A DataFrame with values representing trades for each day. Legal values are +1000.0 indicating a BUY of 1000 shares, -1000.0 indicating a SELL of 1000 shares, and 0.0 indicating NOTHING. Values of +2000 and -2000 for trades are also legal when switching from long to short or short to long so long as net holdings are constrained to -1000, 0, and 1000. :rtype: pandas.DataFrame """ dates = pd.date_range(sd, ed) df_prices = ind.get_price(symbol, dates) daily_rets = (df_prices / df_prices.shift(1)) - 1 daily_rets = daily_rets[1:] sd_older = sd - dt.timedelta(days=365) dates_older = pd.date_range(sd_older, ed) df_prices_older = ind.get_price(symbol, dates_older) sd_key = df_prices.index[0] sd_index = df_prices_older.index.get_loc(sd_key) df_holdings = df_prices.copy() df_holdings['Holdings'] = np.nan del df_holdings[symbol] # print(df_holdings) cum_ret_prev = 0 iters = 0 num_bins = len(self.bins) _, _, ind1 = ind.get_BB(df_prices_older, self.lookback) ind2 = ind.get_CCI(df_prices_older, self.lookback) _, _, ind3 = ind.get_SMA_Cross(self.lookback, 100, df_prices_older) ind4 = ind.get_momentum(df_prices_older, self.lookback) _, _, ind5 = ind.get_MACD(df_prices_older) BB = ind1.iloc[sd_index:].values CCI = ind2.iloc[sd_index:].values SMA_Cross = ind3.iloc[sd_index:].values Momentum = ind4.iloc[sd_index:].values MACD = ind5.iloc[sd_index:].values _, self.x0bins = pd.qcut(BB[:, 0], num_bins, labels=False, retbins=True) _, self.x1bins = pd.qcut(CCI[:, 0], num_bins, labels=False, retbins=True) _, self.x2bins = pd.qcut(SMA_Cross[:, 0], num_bins, labels=False, retbins=True) _, self.x3bins = pd.qcut(Momentum[:, 0], num_bins, labels=False, retbins=True) _, self.x4bins = pd.qcut(MACD[:, 0], num_bins, labels=False, retbins=True) x_0 = np.digitize(BB[:, 0], self.x0bins[1:-1]) x_1 = np.digitize(CCI[:, 0], self.x1bins[1:-1]) x_2 = np.digitize(SMA_Cross[:, 0], self.x2bins[1:-1]) x_3 = np.digitize(Momentum[:, 0], self.x3bins[1:-1]) x_4 = np.digitize(MACD[:, 0], self.x4bins[1:-1]) state = x_0 + x_3 * 10 + x_4 * 100 self.learner.rar = 0 action = self.learner.querysetstate(state[0]) daily_return = daily_rets.iloc[0][symbol] df_holdings.iloc[0]['Holdings'] = 0 for day_idx in range(1, daily_rets.shape[0]): # implement action cur_price = df_prices.iloc[day_idx - 1][symbol] next_price = df_prices.iloc[day_idx][symbol] action = self.learner.querysetstate(state[day_idx]) df_holdings.iloc[day_idx]['Holdings'], _ = self.take_action( df_holdings.iloc[day_idx - 1]['Holdings'], action, cur_price, next_price) df_holdings.iloc[-1]['Holdings'] = 0 df_trades = df_holdings.diff() df_trades['Trades'] = df_trades['Holdings'] del df_trades['Holdings'] df_trades.iloc[0]['Trades'] = 0 return df_trades
def add_evidence( self, symbol="IBM", sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 1, 1), sv=10000, ): """ Trains your strategy learner over a given time frame. :param symbol: The stock symbol to train on :type symbol: str :param sd: A datetime object that represents the start date, defaults to 1/1/2008 :type sd: datetime :param ed: A datetime object that represents the end date, defaults to 1/1/2009 :type ed: datetime :param sv: The starting value of the portfolio :type sv: int """ # add your code to do learning here converged = False x = np.zeros((3, 1)) dates = pd.date_range(sd, ed) df_prices = ind.get_price(symbol, dates) daily_rets = (df_prices / df_prices.shift(1)) - 1 daily_rets = daily_rets[1:] sd_older = sd - dt.timedelta(days=365) dates_older = pd.date_range(sd_older, ed) df_prices_older = ind.get_price(symbol, dates_older) sd_key = df_prices.index[0] sd_index = df_prices_older.index.get_loc(sd_key) num_bins = len(self.bins) max_state_idx = num_bins + num_bins * 10 + num_bins * 100 # Call Q-Learner Constructor self.learner = QLearner( num_states=(max_state_idx + 1), num_actions=3, alpha=0.01, gamma=0.0, rar=0.98, radr=0.9995, dyna=0, verbose=False, ) # df_trades = df_prices.copy() df_holdings = df_prices.copy() df_holdings['Holdings'] = np.nan del df_holdings[symbol] # print(df_holdings) # Initlialize Vars cum_ret_prev = 0 iters = 0 conv_counter = 0 Q_prev = np.copy(self.learner.Q) # Get Indicator Values _, _, ind1 = ind.get_BB(df_prices_older, self.lookback) ind2 = ind.get_CCI(df_prices_older, self.lookback) _, _, ind3 = ind.get_SMA_Cross(self.lookback, 100, df_prices_older) ind4 = ind.get_momentum(df_prices_older, self.lookback) _, _, ind5 = ind.get_MACD(df_prices_older) BB = ind1.iloc[sd_index:].values CCI = ind2.iloc[sd_index:].values SMA_Cross = ind3.iloc[sd_index:].values Momentum = ind4.iloc[sd_index:].values MACD = ind5.iloc[sd_index:].values _, self.x0bins = pd.qcut(BB[:, 0], num_bins, labels=False, retbins=True) _, self.x1bins = pd.qcut(CCI[:, 0], num_bins, labels=False, retbins=True) _, self.x2bins = pd.qcut(SMA_Cross[:, 0], num_bins, labels=False, retbins=True) _, self.x3bins = pd.qcut(Momentum[:, 0], num_bins, labels=False, retbins=True) _, self.x4bins = pd.qcut(MACD[:, 0], num_bins, labels=False, retbins=True) x_0 = np.digitize(BB[:, 0], self.x0bins[1:-1]) x_1 = np.digitize(CCI[:, 0], self.x1bins[1:-1]) x_2 = np.digitize(SMA_Cross[:, 0], self.x2bins[1:-1]) x_3 = np.digitize(Momentum[:, 0], self.x3bins[1:-1]) x_4 = np.digitize(MACD[:, 0], self.x4bins[1:-1]) state = x_0 + x_3 * 10 + x_4 * 100 while not converged: action = self.learner.querysetstate(state[0]) daily_return = daily_rets.iloc[0][symbol] cur_price = df_prices.iloc[0][symbol] next_price = df_prices.iloc[1][symbol] df_holdings.iloc[0]['Holdings'], reward = self.take_action( 0, action, cur_price, next_price) for day_idx in range(1, daily_rets.shape[0]): daily_return = daily_rets.iloc[day_idx][symbol] cur_price = df_prices.iloc[day_idx - 1][symbol] next_price = df_prices.iloc[day_idx][symbol] df_holdings.iloc[day_idx][ 'Holdings'], reward = self.take_action( df_holdings.iloc[day_idx - 1]['Holdings'], action, cur_price, next_price) action = self.learner.query(state[day_idx], reward) df_holdings.iloc[-1]['Holdings'] = 0 df_trades = df_holdings.diff() df_trades['Trades'] = df_trades['Holdings'] del df_trades['Holdings'] df_trades.iloc[0]['Trades'] = 0 portvals = msc.compute_portvals( df_trades, symbol, sv, self.commission, self.impact, ) cum_ret = (portvals[-1] / portvals[0]) - 1 Q_diff = np.abs(self.learner.Q - Q_prev) Q_max_diff = Q_diff.max() if iters > 20: # if abs(cum_ret - cum_ret_prev) < 0.0001: if Q_max_diff < 0.001: conv_counter += 1 else: conv_counter = 0 if conv_counter > 5 or iters > 20000: converged = True # if iters > 100: # if iters % 100 == 0: # print("Iteration #", iters) print("----------------------------------------------") print("-- --") print("Iteration #", iters) print("Error = ", abs(cum_ret - cum_ret_prev)) print("Q Diff: ", Q_max_diff) print("Epsilon: ", self.learner.rar) cum_ret_prev = cum_ret Q_prev = np.copy(self.learner.Q) iters += 1 self.learner.rar *= self.learner.radr # print("Iters = ", iters) print("Mode Trained in ", iters, " iterations!") np.savetxt('Q_Table.csv', self.learner.Q, delimiter=',')
def testPolicy( self, symbol="IBM", sd=dt.datetime(2009, 1, 1), ed=dt.datetime(2010, 1, 1), sv=10000, ): """ Tests your learner using data outside of the training data :param symbol: The stock symbol that you trained on on :type symbol: str :param sd: A datetime object that represents the start date, defaults to 1/1/2008 :type sd: datetime :param ed: A datetime object that represents the end date, defaults to 1/1/2009 :type ed: datetime :param sv: The starting value of the portfolio :type sv: int :return: A DataFrame with values representing trades for each day. Legal values are +1000.0 indicating a BUY of 1000 shares, -1000.0 indicating a SELL of 1000 shares, and 0.0 indicating NOTHING. Values of +2000 and -2000 for trades are also legal when switching from long to short or short to long so long as net holdings are constrained to -1000, 0, and 1000. :rtype: pandas.DataFrame """ dates = pd.date_range(sd,ed) df_prices = ind.get_price(symbol, dates) daily_rets = (df_prices / df_prices.shift(1)) - 1 daily_rets = daily_rets[1:] sd_older = sd - dt.timedelta(days=365) dates_older = pd.date_range(sd_older,ed) df_prices_older = ind.get_price(symbol, dates_older) sd_key = df_prices.index[0] sd_index = df_prices_older.index.get_loc(sd_key) df_holdings = df_prices.copy() df_holdings['Holdings'] = np.nan del df_holdings[symbol] # print(df_holdings) # Get Indicator Values _,_,ind1 = ind.get_BB(df_prices_older, self.lookback) ind2 = ind.get_CCI(df_prices_older, self.lookback) _,_,ind3 = ind.get_SMA_Cross(self.lookback, 100, df_prices_older) ind4 = ind.get_momentum(df_prices_older, self.lookback) _,_,ind5 = ind.get_MACD(df_prices_older) BB = ind1.iloc[sd_index:].values CCI = ind2.iloc[sd_index:].values SMA_Cross = ind3.iloc[sd_index:].values Momentum = ind4.iloc[sd_index:].values MACD = ind5.iloc[sd_index:].values df_holdings.iloc[0]['Holdings'] = 0 action = 2 # BB_threshold = 1.0 # MACD_threshold = 0.15 # Momentum_threshold = 0.1 # BB_threshold = 0. BB_threshold = 0.0 MACD_threshold = 0.0 Momentum_threshold = 0 for day_idx in range(1,daily_rets.shape[0]): # BB Logic BB_sell = BB[day_idx-1] >= 1-BB_threshold and BB[day_idx] < 1-BB_threshold BB_buy = BB[day_idx-1] <= BB_threshold and BB[day_idx] > BB_threshold # MACD Logic # MACD_sell = MACD[day_idx] > MACD[day_idx-1] and MACD[day_idx] > 0 and MACD[day_idx] < MACD_threshold # MACD_buy = MACD[day_idx] < MACD[day_idx-1] and MACD[day_idx] < 0 and MACD[day_idx] > -1*MACD_threshold MACD_sell = MACD[day_idx] >= 0 and MACD[day_idx-1] < 0 MACD_buy = MACD[day_idx] <= 0 and MACD[day_idx-1] > 0 # Momentum Logic Momentum_sell = Momentum[day_idx] > Momentum_threshold Momentum_buy = Momentum[day_idx] < -1*Momentum_threshold # SMA Cross Logic SMA_sell = SMA_Cross[day_idx] <= 0 and SMA_Cross[day_idx-1] > 0 SMA_buy = SMA_Cross[day_idx] >= 0 and SMA_Cross[day_idx-1] < 0 # Momentum_sell = True # Momentum_buy = True # Momentum_buy = Momentum[day_idx] > Momentum_threshold # Momentum_sell = Momentum[day_idx] < -1*Momentum_threshold action = 2 if (BB_sell and Momentum_sell) or (MACD_sell and Momentum_sell): action = 1 # Sell signal if (BB_buy and Momentum_buy) or (MACD_buy and Momentum_buy): action = 0 # Buy Signal df_holdings.iloc[day_idx]['Holdings'] = self.take_action(df_holdings.iloc[day_idx-1]['Holdings'], action) df_holdings.iloc[-1]['Holdings'] = 0 df_trades = df_holdings.diff() df_trades['Trades'] = df_trades['Holdings'] del df_trades['Holdings'] df_trades.iloc[0]['Trades'] = 0 return df_trades