def addEvidence(self, symbol = "IBM", \ sd=dt.datetime(2008,1,1), \ ed=dt.datetime(2009,1,1), \ sv = 1000000): # add your code to do learning here (normalized_values, bbp, moving_avarage, rsi_val, rsi_spy, momentum, sma_cross) = indicators(sd=sd, ed=ed, syms=[symbol], allocs=[1], sv=sv, gen_plot=False) norm_val = normalized_values.copy() normalized_values = pd.DataFrame(data=pd.qcut(normalized_values, 10, labels=False), index=normalized_values.index) bbp = pd.DataFrame(data=pd.qcut(bbp, 10, labels=False), index=bbp.index) moving_avarage = pd.DataFrame(data=pd.qcut(moving_avarage, 10, labels=False), index=moving_avarage.index) rsi_val = pd.DataFrame(data=pd.qcut(rsi_val, 10, labels=False), index=rsi_val.index) rsi_spy = pd.DataFrame(data=pd.qcut(rsi_spy, 10, labels=False), index=rsi_spy.index) momentum = pd.DataFrame(data=pd.qcut(momentum, 10, labels=False), index=momentum.index) sma_cross = pd.DataFrame(data=pd.cut(sma_cross, 3, labels=False), index=sma_cross.index) states = pd.concat([ normalized_values, bbp, moving_avarage, rsi_val, rsi_spy, momentum, sma_cross ], axis=1).apply(lambda x: x.fillna(0)).iloc[13:, :] self.prices = normalized_values.copy() state_size = 7 action_size = 5 max_iter = 600 actions_df = pd.DataFrame(index=states.index, data=[0] * len(states)) iter_num = 0 converged = False dates = pd.date_range(sd, ed) self.prices_all = ut.get_data([symbol], dates)[symbol] agent = DQNAgent(state_size, action_size) batch_size = 32 for e in range(max_iter): print("Va por la iteracion ", e) holdings_actions = 0 syms = [symbol] X = np.array([states.iloc[0]]) action = 2 for key, row in states.iloc[1:].iterrows(): (reward, holdings_actions) = self.calculate_reward( holdings_actions_1=holdings_actions, action=action, key=key) #next_state, reward, done, _ = self.calculate_reward(action) #reward = reward if not done else -10 next_state = np.array([row]) agent.remember(X, action, reward, next_state) #if done: # agent.update_target_model() # print("episode: {}/{}, score: {}, e: {:.2}" # .format(e, EPISODES, time, agent.epsilon)) # break if len(agent.memory) > batch_size: agent.replay(batch_size) X = np.array([row]) holdings_actions_1 = holdings_actions action = agent.act(X) actions_df.loc[key, iter_num] = holdings_actions iter_num += 1 #Check convergence converged = False pdb.set_trace() previous_days = 13 trades = pd.DataFrame( data=actions_df.iloc[:, -1], index=actions_df.index).diff().shift(-1).fillna(0) #trades = pd.concat([pd.DataFrame(data=[[0]]*previous_days,index=normalized_values.index[0:previous_days],columns=trades.columns.tolist()),trades]) #trades = pd.concat([pd.DataFrame(data=actions_df.iloc[0,-1],index=normalized_values.index[0],columns=trades.columns.tolist()),trades]) #trades = trades.append(pd.DataFrame(data=[actions_df.iloc[:,-1][0]],columns=trades.columns.tolist(),index=[trades.index[0]])) trades.sort_index(axis=0, inplace=True) trades.iloc[-1] = -1 * trades.iloc[:-1].sum() trades.columns = ['Shares'] trades['Symbol'] = symbol trades['Order'] = trades['Shares'].to_frame().applymap( lambda x: { -2000: 'SELL', -1500: 'SELL', -1000: 'SELL', -500: 'SELL', 0: 0, 500: "BUY", 1000: "BUY", 1500: "BUY", 2000: "BUY" }[x]) trades['Shares'] = trades['Shares'].abs() trades['Date'] = trades.index self.agent = agent
def addEvidence(self, symbol = "IBM", \ sd=dt.datetime(2008,1,1), \ ed=dt.datetime(2009,1,1), \ sv = 1000000): # add your code to do learning here (normalized_values, bbp, moving_avarage, rsi_val, rsi_spy, momentum, sma_cross) = indicators(sd=sd, ed=ed, syms=[symbol], allocs=[1], sv=sv, gen_plot=False) norm_val = normalized_values.copy() normalized_values = pd.DataFrame(data=pd.qcut(normalized_values, 10, labels=False), index=normalized_values.index) bbp = pd.DataFrame(data=pd.qcut(bbp, 10, labels=False), index=bbp.index) moving_avarage = pd.DataFrame(data=pd.qcut(moving_avarage, 10, labels=False), index=moving_avarage.index) rsi_val = pd.DataFrame(data=pd.qcut(rsi_val, 10, labels=False), index=rsi_val.index) rsi_spy = pd.DataFrame(data=pd.qcut(rsi_spy, 10, labels=False), index=rsi_spy.index) momentum = pd.DataFrame(data=pd.qcut(momentum, 10, labels=False), index=momentum.index) sma_cross = pd.DataFrame( data=pd.cut(sma_cross, 3, labels=False), index=sma_cross.index) # REVISAR ESTE , CREO QUE ESTA MAL #start = pd.DataFrame(data=[[0] + [1]*(len(sma_cross)-1)][0],index=normalized_values.index) #states = pd.concat([normalized_values,bbp,moving_avarage,rsi_val,rsi_spy,momentum,sma_cross],axis=1).apply(lambda x : x.fillna(0).astype(int).astype(str).str.cat(),axis=1).to_frame().iloc[13:,:] states = pd.concat( [bbp, moving_avarage, rsi_val, rsi_spy, momentum, sma_cross], axis=1).apply( lambda x: x.fillna(0).astype(int).astype(str).str.cat(), axis=1).to_frame().iloc[13:, :] robot = Ql.QLearner( num_states=(10**6) + (10**5) + (10**4) + (10**3) + (10**2) + 3, num_actions=5 ) # PENDIENTE MEJORAR PARA QUE SEA DEL TAMANIO DE LAS BINS FILA 29 a la 35 max_iter = 5000 actions_df = pd.DataFrame(index=states.index, data=[0] * len(states)) iter_num = 0 converged = False dates = pd.date_range(sd, ed) prices_all = ut.get_data([symbol], dates)[symbol] while not converged and iter_num < max_iter: #pdb.set_trace() holdings_actions = 0 syms = [symbol] X = int(states.iloc[0]) action = 2 #robot.querysetstate(int(states.iloc[0])) holdings_actions_1 = 0 for key, row in states.iterrows(): #pdb.set_trace() #change = ((norm_val.loc[key]/norm_val.iloc[norm_val.index.get_loc(key)-1])-1).values[0] #holdings.append(holdings[-1] + holdings_actions*prices_all.loc[key]) #reward = ((holdings[-1]/(holdings[-2]+holdings_actions*prices_all[prices_all.index.get_loc(key)-1])-1)*1000) holdings_actions = { 0: 1000, 1: 500, 2: 0, 3: -500, 4: -1000 }[action] if (key == states.index[0]): holdings_diff = 0 else: price_t = prices_all.iloc[prices_all.index.get_loc(key) - 1] price_t_plus_1 = prices_all.loc[key] cash = -1 * (holdings_actions - holdings_actions_1) * price_t holdings_diff = holdings_actions * price_t_plus_1 - holdings_actions_1 * price_t + cash if (holdings_actions_1 - holdings_actions) != 0: holdings_diff = holdings_diff - self.commission reward = holdings_diff X = int(states.loc[key]) holdings_actions_1 = holdings_actions action = robot.query(X, reward) actions_df.loc[key, iter_num] = holdings_actions #holdings_actions[1] = holdings_actions[1]*(1-self.impact) iter_num += 1 #Check convergence converged = False pdb.set_trace() previous_days = 13 trades = pd.DataFrame( data=actions_df.iloc[:, -1], index=actions_df.index).diff().shift(-1).fillna(0) #trades = pd.concat([pd.DataFrame(data=[[0]]*previous_days,index=normalized_values.index[0:previous_days],columns=trades.columns.tolist()),trades]) pdb.set_trace() #trades = pd.concat([pd.DataFrame(data=actions_df.iloc[0,-1],index=normalized_values.index[0],columns=trades.columns.tolist()),trades]) #trades = trades.append(pd.DataFrame(data=[actions_df.iloc[:,-1][0]],columns=trades.columns.tolist(),index=[trades.index[0]])) trades.sort_index(axis=0, inplace=True) trades.iloc[-1] = -1 * trades.iloc[:-1].sum() trades.columns = ['Shares'] trades['Symbol'] = symbol trades['Order'] = trades['Shares'].to_frame().applymap( lambda x: { -2000: 'SELL', -1500: 'SELL', -1000: 'SELL', -500: 'SELL', 0: 0, 500: "BUY", 1000: "BUY", 1500: "BUY", 2000: "BUY" }[x]) trades['Shares'] = trades['Shares'].abs() trades['Date'] = trades.index self.robot = robot
def testPolicy(self, symbol = "IBM", \ sd=dt.datetime(2009,1,1), \ ed=dt.datetime(2010,1,1), \ sv = 10000): (normalized_values, bbp, moving_avarage, rsi_val, rsi_spy, momentum, sma_cross) = indicators(sd=sd, ed=ed, syms=[symbol], allocs=[1], sv=sv, gen_plot=False) norm_val = normalized_values.copy() normalized_values = pd.DataFrame(data=pd.qcut(normalized_values, 10, labels=False), index=normalized_values.index) bbp = pd.DataFrame(data=pd.qcut(bbp, 10, labels=False), index=bbp.index) moving_avarage = pd.DataFrame(data=pd.qcut(moving_avarage, 10, labels=False), index=moving_avarage.index) rsi_val = pd.DataFrame(data=pd.qcut(rsi_val, 10, labels=False), index=rsi_val.index) rsi_spy = pd.DataFrame(data=pd.qcut(rsi_spy, 10, labels=False), index=rsi_spy.index) momentum = pd.DataFrame(data=pd.qcut(momentum, 10, labels=False), index=momentum.index) sma_cross = pd.DataFrame(data=pd.cut(sma_cross, 3, labels=False), index=sma_cross.index) states = pd.concat([ normalized_values, bbp, moving_avarage, rsi_val, rsi_spy, momentum, sma_cross ], axis=1).apply(lambda x: x.fillna(0)).iloc[13:, :] holdings = pd.DataFrame(data=[0] * len(states), index=states.index) pdb.set_trace() for key, state in states.iterrows(): action = self.agent.act(np.array([state])) holdings.loc[key] = { 0: -1000, 1: -500, 2: 0, 3: 500, 4: 1000 }[action] pdb.set_trace() # here we build a fake set of trades # your code should return the same sort of data #dates = pd.date_range(sd, ed) #prices_all = ut.get_data([symbol], dates) # automatically adds SPY #trades = prices_all[[symbol,]] # only portfolio symbols #trades_SPY = prices_all['SPY'] # only SPY, for comparison later #trades.values[:,:] = 0 # set them all to nothing #trades.values[0,:] = 1000 # add a BUY at the start #trades.values[40,:] = -1000 # add a SELL #trades.values[41,:] = 1000 # add a BUY #trades.values[60,:] = -2000 # go short from long #trades.values[61,:] = 2000 # go long from short #trades.values[-1,:] = -1000 #exit on the last day #if self.verbose: print type(trades) # it better be a DataFrame! #if self.verbose: print trades if self.verbose: print(prices_all) return trades
def testPolicy(self, symbol, sd=dt.datetime(2010, 1, 1), ed=dt.datetime(2011, 12, 31), sv=1000000): (normalized_values, bbp, moving_avarage, rsi_val, rsi_spy, momentum, sma_cross) = indicators(sd=sd, ed=ed, syms=symbol, allocs=[1 / (len(symbol))] * len(symbol), sv=sv, gen_plot=False) norm_val2 = pd.DataFrame() for i in normalized_values.columns.tolist(): norm_val = pd.DataFrame(data=normalized_values[i].copy(), index=normalized_values.index) norm_val['2'] = normalized_values[i].diff().fillna( 0).diff().fillna(0).shift(-1) norm_val.ix[norm_val['2'] > 0, '3'] = 1 norm_val.ix[norm_val['2'] < 0, '3'] = -1 norm_val['4'] = norm_val['3'] * 1000 norm_val['5'] = norm_val['4'].diff().fillna(0) norm_val.ix[0, '5'] = norm_val.ix[1, '4'] norm_val.ix[-1, '5'] = -1 * norm_val['5'].sum() inicial = 1 nueva = pd.DataFrame(columns=[i, 'Dates']) for key, row in norm_val.iterrows(): if (abs(row['5']) == 2000): nueva.loc[inicial - 1, i] = row['5'] / 2 nueva.loc[inicial - 1, 'Dates'] = key nueva.loc[inicial, i] = 0 nueva.loc[inicial, 'Dates'] = key nueva.loc[inicial + 1, i] = row['5'] / 2 nueva.loc[inicial + 1, 'Dates'] = key inicial = inicial + 2 else: nueva.loc[inicial - 1, i] = row['5'] nueva.loc[inicial - 1, 'Dates'] = key inicial = inicial + 1 nueva.set_index('Dates', inplace=True) nueva.rename_axis(None) norm_val2 = pd.concat([norm_val2, nueva]) pdb.set_trace() orders = pd.DataFrame(data=norm_val2[symbol], index=norm_val2.index, columns=symbol) orders.columns = ['Shares'] orders['Date'] = orders.index orders['Order'] = 0 orders.loc[orders['Shares'] == -1000, 'Order'] = 'SELL' orders.loc[orders['Shares'] == 1000, 'Order'] = 'BUY' orders.loc[orders['Shares'] == -1000, 'Shares'] = 1000 orders.loc[orders['Shares'] == 1000, 'Shares'] = 1000 orders['Symbol'] = symbol[0] orders.index = range(len(orders)) market = compute_portvals(orders, start_val=100000, commission=0, impact=0.0) assess_portfolio(portfolio=market, sd=sd, ed=ed, syms=symbol, gen_plot=True, allocs=[1], sv=1000000) return orders
def testPolicy(self, symbol, sd=dt.datetime(2010, 1, 1), ed=dt.datetime(2011, 12, 31), sv=1000000): (normalized_values, bbp, moving_avarage, rsi_val, rsi_spy, momentum, sma_cross) = indicators(sd=sd, ed=ed, syms=symbol, allocs=[1], sv=sv, gen_plot=False) orders = bbp.copy() * 0 #print(moving_avarage.shape,bbp.shape,rsi_val.shape,rsi_spy.shape) #pdb.set_trace() orders[(moving_avarage < 0.95) & (bbp < 0) & (rsi_val < 30) & (rsi_spy.SPY.tolist() > 30)] = 1000 orders[(moving_avarage > 1.05) & (bbp > 1) & (rsi_val > 70) & (rsi_spy.SPY.tolist() < 70)] = -1000 orders[(sma_cross != 0)] = 0 orders.ffill(inplace=True) orders.fillna(0, inplace=True) norm_val = normalized_values.copy() norm_val['2'] = normalized_values.diff().fillna(0).diff().fillna(0) norm_val.ix[norm_val['2'] > 0, '3'] = 1 norm_val.ix[norm_val['2'] < 0, '3'] = -1 norm_val['4'] = norm_val['3'] * 1000 norm_val['5'] = norm_val['4'].diff().fillna(0) norm_val.ix[0, '5'] = norm_val.ix[1, '4'] norm_val.ix[-1, '5'] = -1 * norm_val['5'].sum() inicial = 0 nueva = pd.DataFrame(columns=['1', 'Dates']) for key, row in norm_val.iterrows(): if (abs(row['5']) == 2000): nueva.loc[inicial, '1'] = row['5'] / 2 nueva.loc[inicial, 'Dates'] = key nueva.loc[inicial + 1, '1'] = row['5'] / 2 nueva.loc[inicial + 1, 'Dates'] = key inicial = inicial + 1 else: nueva.loc[inicial, '1'] = row['5'] nueva.loc[inicial, 'Dates'] = key inicial = inicial + 1 pdb.set_trace() nueva.set_index('Dates', inplace=True) orders[1:] = orders.diff() orders.ix[0] = 0 orders.columns = ['Shares'] orders['Date'] = orders.index orders['Order'] = 0 #pdb.set_trace() orders.loc[orders['Shares'] == -1000, 'Order'] = 'SELL' orders.loc[orders['Shares'] == 1000, 'Order'] = 'BUY' orders.loc[orders['Shares'] == -1000, 'Shares'] = 1000 orders.loc[orders['Shares'] == 1000, 'Shares'] = 1000 orders['Symbol'] = symbol[0] orders.index = range(len(orders)) market = compute_portvals(orders, start_val=100000, commission=9.95, impact=0.005) pdb.set_trace() assess_portfolio(portfolio=market, sd=sd, ed=ed, syms=symbol, gen_plot=True, allocs=[1], sv=1000000) return orders
def addEvidence(self, symbol = "IBM", \ sd=dt.datetime(2015,1,1), \ ed=dt.datetime(2017,1,1), \ sv = 1000000): btc = pd.read_csv('COINBASE_FILTERED.CSV') size = int(len(btc) * 0.005) btc = btc.iloc[-3 * size:-size] btc[btc.columns.values] = btc[btc.columns.values].ffill() btc['TR'] = 0 a = btc['High'] - btc['Low'] b = btc['Low'] - btc['Close'].shift(-1) c = btc['High'] - btc['Close'].shift(-1) btc['TR'] = pd.concat([a, b, c], axis=1).max(axis=1) btc['ATR'] = btc['TR'].ewm(span=10).mean() btc['Delta'] = btc['Close'] - btc['Open'] btc['to_predict'] = btc['Delta'].apply(lambda x: 1 if (x > 0) else 0) btc.index = pd.to_datetime(btc['Timestamp'], infer_datetime_format=True, unit='s') (normalized_values, bbp, moving_avarage, rsi_val, momentum, sma_cross) = indicators(data=btc) norm_val = normalized_values.copy() states = pd.concat([ normalized_values, bbp, moving_avarage, rsi_val, momentum, sma_cross ], axis=1).apply(lambda x: x.fillna(0)).iloc[13:, :] self.prices = normalized_values.copy() state_size = 6 # Tamanio del vector de estados action_size = 5 max_iter = 10 # Iteraciones Maximas para el aprendizaje actions_df = pd.DataFrame(index=states.index, data=[0] * len(states)) iter_num = 0 converged = False dates = pd.date_range(sd, ed) self.prices_all = btc[ 'Weighted_Price'] #ut.get_data([symbol], dates)[symbol] agent = DQNAgent(state_size, action_size) batch_size = 2 #64/32 comienzo = time.time() for e in range(max_iter): print("Va por la iteracion ", e) holdings_actions = 0 syms = [symbol] X = np.array([states.iloc[0]]) action = 2 for key, row in states.iloc[1:].iterrows(): (reward, holdings_actions) = self.calculate_reward( holdings_actions_1=holdings_actions, action=action, key=key) next_state = np.array([row]) agent.remember(X, action, reward, next_state) if len(agent.memory) > batch_size: agent.replay(batch_size) X = np.array([row]) holdings_actions_1 = holdings_actions action = agent.act(X) actions_df.loc[key, iter_num] = holdings_actions iter_num += 1 converged = False print(time.time() - comienzo) previous_days = 13 trades = pd.DataFrame( data=actions_df.iloc[:, -1], index=actions_df.index).diff().shift(-1).fillna(0) trades.sort_index(axis=0, inplace=True) trades.iloc[-1] = -1 * trades.iloc[:-1].sum() trades.columns = ['Shares'] trades['Symbol'] = symbol trades['Order'] = trades['Shares'].to_frame().applymap( lambda x: { -2: 'SELL', -1.5: 'SELL', -1: 'SELL', -0.5: 'SELL', 0: 0, 0.5: "BUY", 1: "BUY", 1.5: "BUY", 2: "BUY" }[x]) trades['Shares'] = trades['Shares'].abs() trades['Date'] = trades.index compute_portvals(trades) pdb.set_trace() self.agent = agent
def testPolicy(self, symbol = "IBM", \ sd=dt.datetime(2009,1,1), \ ed=dt.datetime(2010,1,1), \ sv = 10000): btc = pd.read_csv("COINBASE_FILTERED.CSV") size = int(len(btc) * 0.005) btc = btc.iloc[-size:] btc[btc.columns.values] = btc[btc.columns.values].ffill() btc['TR'] = 0 a = btc['High'] - btc['Low'] b = btc['Low'] - btc['Close'].shift(-1) c = btc['High'] - btc['Close'].shift(-1) btc['TR'] = pd.concat([a, b, c], axis=1).max(axis=1) btc['ATR'] = btc['TR'].ewm(span=10).mean() btc['Delta'] = btc['Close'] - btc['Open'] btc['to_predict'] = btc['Delta'].apply(lambda x: 1 if (x > 0) else 0) btc.index = pd.to_datetime(btc['Timestamp'], infer_datetime_format=True, unit='s') self.prices_all = btc[ 'Weighted_Price'] #ut.get_data([symbol], dates)[symbol] (normalized_values, bbp, moving_avarage, rsi_val, momentum, sma_cross) = indicators(data=btc) norm_val = normalized_values.copy() states = pd.concat([ normalized_values, bbp, moving_avarage, rsi_val, momentum, sma_cross ], axis=1).apply(lambda x: x.fillna(0)).iloc[13:, :] actions_df = pd.DataFrame(index=states.index, data=[0] * len(states)) holdings = pd.DataFrame(data=[0] * len(states), index=states.index) for key, state in states.iterrows(): action = self.agent.act(np.array([state])) holdings.loc[key] = {0: -1, 1: -0.5, 2: 0, 3: 0.5, 4: 1}[action] pdb.set_trace() previous_days = 13 trades = pd.DataFrame( data=actions_df.iloc[:, -1], index=actions_df.index).diff().shift(-1).fillna(0) trades.sort_index(axis=0, inplace=True) trades.iloc[-1] = -1 * trades.iloc[:-1].sum() trades.columns = ['Shares'] trades['Symbol'] = symbol trades['Order'] = trades['Shares'].to_frame().applymap( lambda x: { -2: 'SELL', -1.5: 'SELL', -1: 'SELL', -0.5: 'SELL', 0: 0, 0.5: "BUY", 1: "BUY", 1.5: "BUY", 2: "BUY" }[x]) trades['Shares'] = trades['Shares'].abs() trades['Date'] = trades.index self.agent = agent if self.verbose: print(prices_all) return trades
import util as ut import random from indicators_fun import indicators import pdb import QLearner as Ql from marketsimcode import * sd = dt.datetime(2008, 1, 1) ed = dt.datetime(2009, 1, 1) symbol = 'JPM' sv = 1000000 (normalized_values, bbp, moving_avarage, rsi_val, rsi_spy, momentum, sma_cross) = indicators(sd=sd, ed=ed, syms=[symbol], allocs=[1], sv=sv, gen_plot=False) nuevo = normalized_values.diff().shift(-1).fillna(0).applymap(lambda x: 1 if x > 0 else 0) nuevo.columns = ['Y'] data = pd.concat([ normalized_values, bbp, moving_avarage, rsi_val, rsi_spy, momentum, sma_cross, nuevo ], axis=1) data.columns = [ 'normalized_values', 'bbp', 'moving_avarage', 'rsi_val', 'rsi_spy',
def addEvidence(self, symbol = "IBM", \ sd=dt.datetime(2015,1,1), \ ed=dt.datetime(2017,1,1), \ sv = 1000000): # add your code to do learning here btc = pd.read_csv( "coinbaseUSD_1-min_data_2014-12-01_to_2018-06-27.csv") size = int(len(btc) * 0.1) btc = btc.iloc[-3 * size:-size] btc[btc.columns.values] = btc[btc.columns.values].ffill() btc['TR'] = 0 a = btc['High'] - btc['Low'] b = btc['Low'] - btc['Close'].shift(-1) c = btc['High'] - btc['Close'].shift(-1) btc['TR'] = pd.concat([a, b, c], axis=1).max(axis=1) btc['ATR'] = btc['TR'].ewm(span=10).mean() btc['Delta'] = btc['Close'] - btc['Open'] btc['to_predict'] = btc['Delta'].apply(lambda x: 1 if (x > 0) else 0) btc.index = pd.to_datetime(btc['Timestamp'], infer_datetime_format=True, unit='s') (normalized_values, bbp, moving_avarage, rsi_val, momentum, sma_cross) = indicators(data=btc) norm_val = normalized_values.copy() #normalized_values = pd.DataFrame(data=pd.qcut(normalized_values,10,labels=False),index=normalized_values.index) #bbp = pd.DataFrame(data=pd.qcut(bbp,10,labels=False),index=bbp.index) #moving_avarage = pd.DataFrame(data=pd.qcut(moving_avarage,10,labels=False),index=moving_avarage.index) #rsi_val = pd.DataFrame(data=pd.qcut(rsi_val,10,labels=False),index=rsi_val.index) # # rsi_spy = pd.DataFrame(data=pd.qcut(rsi_spy,10,labels=False),index=rsi_spy.index) #momentum = pd.DataFrame(data=pd.qcut(momentum,10,labels=False),index=momentum.index) #sma_cross = pd.DataFrame(data=pd.cut(sma_cross,3,labels=False),index=sma_cross.index) states = pd.concat([ normalized_values, bbp, moving_avarage, rsi_val, momentum, sma_cross ], axis=1).apply(lambda x: x.fillna(0)).iloc[13:, :] self.prices = normalized_values.copy() state_size = 6 action_size = 5 max_iter = 1 actions_df = pd.DataFrame(index=states.index, data=[0] * len(states)) iter_num = 0 converged = False dates = pd.date_range(sd, ed) self.prices_all = btc[ 'Weighted_Price'] #ut.get_data([symbol], dates)[symbol] agent = DQNAgent(state_size, action_size) batch_size = 64 for e in range(max_iter): print("Va por la iteracion ", e) holdings_actions = 0 syms = [symbol] X = np.array([states.iloc[0]]) action = 2 for key, row in states.iloc[1:].iterrows(): (reward, holdings_actions) = self.calculate_reward( holdings_actions_1=holdings_actions, action=action, key=key) #next_state, reward, done, _ = self.calculate_reward(action) #reward = reward if not done else -10 next_state = np.array([row]) agent.remember(X, action, reward, next_state) #if done: # agent.update_target_model() # print("episode: {}/{}, score: {}, e: {:.2}" # .format(e, EPISODES, time, agent.epsilon)) # break if len(agent.memory) > batch_size: agent.replay(batch_size) X = np.array([row]) holdings_actions_1 = holdings_actions action = agent.act(X) actions_df.loc[key, iter_num] = holdings_actions iter_num += 1 #Check convergence converged = False pdb.set_trace() previous_days = 13 trades = pd.DataFrame( data=actions_df.iloc[:, -1], index=actions_df.index).diff().shift(-1).fillna(0) trades.sort_index(axis=0, inplace=True) trades.iloc[-1] = -1 * trades.iloc[:-1].sum() trades.columns = ['Shares'] trades['Symbol'] = symbol trades['Order'] = trades['Shares'].to_frame().applymap( lambda x: { -2000: 'SELL', -1500: 'SELL', -1000: 'SELL', -500: 'SELL', 0: 0, 500: "BUY", 1000: "BUY", 1500: "BUY", 2000: "BUY" }[x]) trades['Shares'] = trades['Shares'].abs() trades['Date'] = trades.index self.agent = agent
def testPolicy(self, symbol = "IBM", \ sd=dt.datetime(2009,1,1), \ ed=dt.datetime(2010,1,1), \ sv = 10000): btc = pd.read_csv( "coinbaseUSD_1-min_data_2014-12-01_to_2018-06-27.csv") size = int(len(btc) * 0.1) btc = btc.iloc[-size:] btc[btc.columns.values] = btc[btc.columns.values].ffill() btc['TR'] = 0 a = btc['High'] - btc['Low'] b = btc['Low'] - btc['Close'].shift(-1) c = btc['High'] - btc['Close'].shift(-1) btc['TR'] = pd.concat([a, b, c], axis=1).max(axis=1) btc['ATR'] = btc['TR'].ewm(span=10).mean() btc['Delta'] = btc['Close'] - btc['Open'] btc['to_predict'] = btc['Delta'].apply(lambda x: 1 if (x > 0) else 0) btc.index = pd.to_datetime(btc['Timestamp'], infer_datetime_format=True, unit='s') self.prices_all = btc[ 'Weighted_Price'] #ut.get_data([symbol], dates)[symbol] (normalized_values, bbp, moving_avarage, rsi_val, momentum, sma_cross) = indicators(data=btc) #(normalized_values ,bbp,moving_avarage,rsi_val,rsi_spy,momentum,sma_cross) = indicators(sd=sd,ed=ed,syms=[symbol],allocs=[1],sv=sv,gen_plot=False) norm_val = normalized_values.copy() #normalized_values = pd.DataFrame(data=pd.qcut(normalized_values,10,labels=False),index=normalized_values.index) #bbp = pd.DataFrame(data=pd.qcut(bbp,10,labels=False),index=bbp.index) #moving_avarage = pd.DataFrame(data=pd.qcut(moving_avarage,10,labels=False),index=moving_avarage.index) #rsi_val = pd.DataFrame(data=pd.qcut(rsi_val,10,labels=False),index=rsi_val.index) ##rsi_spy = pd.DataFrame(data=pd.qcut(rsi_spy,10,labels=False),index=rsi_spy.index) #momentum = pd.DataFrame(data=pd.qcut(momentum,10,labels=False),index=momentum.index) #sma_cross = pd.DataFrame(data=pd.cut(sma_cross,3,labels=False),index=sma_cross.index) states = pd.concat([ normalized_values, bbp, moving_avarage, rsi_val, momentum, sma_cross ], axis=1).apply(lambda x: x.fillna(0)).iloc[13:, :] holdings = pd.DataFrame(data=[0] * len(states), index=states.index) pdb.set_trace() for key, state in states.iterrows(): action = self.agent.act(np.array([state])) holdings.loc[key] = { 0: -1000, 1: -500, 2: 0, 3: 500, 4: 1000 }[action] pdb.set_trace() # here we build a fake set of trades # your code should return the same sort of data #dates = pd.date_range(sd, ed) #prices_all = ut.get_data([symbol], dates) # automatically adds SPY #trades = prices_all[[symbol,]] # only portfolio symbols #trades_SPY = prices_all['SPY'] # only SPY, for comparison later #trades.values[:,:] = 0 # set them all to nothing #trades.values[0,:] = 1000 # add a BUY at the start #trades.values[40,:] = -1000 # add a SELL #trades.values[41,:] = 1000 # add a BUY #trades.values[60,:] = -2000 # go short from long #trades.values[61,:] = 2000 # go long from short #trades.values[-1,:] = -1000 #exit on the last day #if self.verbose: print type(trades) # it better be a DataFrame! #if self.verbose: print trades if self.verbose: print(prices_all) return trades