def bt(data, num_features, use_existing_model, model_name): dqn = Dqn() agent = Agent(num_features, use_existing_model, model_name) state = dqn.get_state(data, num_features, num_features) total_profits = 0 total_holds = 0 total_buys = 1 total_sells = 0 l = len(data) - 1 for t in range(num_features,l): action = agent.choose_best_action(state)#it will always predict reward, total_profits, total_holds, total_buys, total_sells = dqn.execute_action (action, data[t], t, total_profits, total_holds, total_buys, total_sells) done = True if t == l - 1 else False next_state = dqn.get_state(data, t + 1, num_features) print(f'row #{t} {agent.actions[action]} @{data[t]}, state1={state}, state2={next_state}, reward={reward}') state = next_state if done: print("-----------------------------------------") print(f'Total Profit: {formatPrice(total_profits)} , Total hold/buy/exit trades: {total_holds} / {total_buys} / {total_sells}') print("-----------------------------------------")
def learn(self, data, episodes, num_features, batch_size, use_existing_model, random_action_min=0.1, random_action_decay=0.99995, num_neurons=64, future_reward_importance=0.95): agent = Agent(num_features, use_existing_model, '', random_action_min, random_action_decay, num_neurons, future_reward_importance) l = len(data) - 1 rewards_vs_episode = [] profit_vs_episode = [] trades_vs_episode = [] epsilon_vs_episode = [] for episode in range(1,episodes + 1): #print("Episode " + str(e) + "/" + str(episode_count)) state = self.get_state(data, num_features, num_features) total_profits = 0 total_holds = 0 total_buys = 1 total_sells = 0 #total_rewards = 0 self.open_orders = [data[0]] for t in range(num_features,l): action = agent.choose_best_action(state)#tradeoff bw predict and random #print(f'state={state}') reward, total_profits, total_holds, total_buys, total_sells = self.execute_action (action, data[t], t, total_profits, total_holds, total_buys, total_sells) done = True if t == l - 1 else False next_state = self.get_state(data, t + 1, num_features) print(f'row #{t} {agent.actions[action]} @{data[t]}, state1={state}, state2={next_state}, reward={reward}') agent.remember(state, action, reward, next_state, done)#store contents of memory in buffer for future learning state = next_state if done: eps = np.round(agent.epsilon,3) print(f'Episode {episode}/{episodes} Total Profit: {formatPrice(total_profits)} , Total trades: {total_buys}, probability of random action: {eps}') print("---------------------------------------") #rewards_vs_episode.append(total_rewards) profit_vs_episode.append(np.round(total_profits,4)) trades_vs_episode.append(total_buys) epsilon_vs_episode.append(eps) if len(agent.memory) > batch_size:#if memory of agent gets full: agent.experience_replay(batch_size)#fit #clean memory ? # if episode % 1000 == 0: # model_name = "files/output/model_ep" + str(episode) # agent.model.save(model_name) # print(f'{model_name} saved') model_name = "files/output/model_ep" + str(episodes) agent.model.save(model_name) print(f'{model_name} saved') return profit_vs_episode, trades_vs_episode, epsilon_vs_episode, model_name, agent.num_trains, agent.epsilon
def bt(data, n_features, use_existing_model, name_model): dqn = Dqn() dqn.open_orders = [data[0]] agent = Agent(n_features, use_existing_model, name_model) state = dqn.get_state(data, n_features, n_features) total_profits = 0 total_holds = 0 total_buys = 1 total_sells = 0 total_notvalid = 0 l = len(data) - 1 for t in range(n_features, l): action = agent.choose_best_action(state) # it will always predict reward, total_profits, total_holds, total_buys, total_sells, total_notvalid = \ dqn.execute_action(action, data[t], t, total_profits, total_holds, total_buys, total_sells, total_notvalid) done = True if t == l - 1 else False next_state = dqn.get_state(data, t + 1, n_features) #print(f'row #{t} {agent.actions[action]} @{data[t]}, state1={state}, state2={next_state}, reward={reward}') state = next_state if done: # sell position at end of episode reward, total_profits, total_holds, total_buys, total_sells, total_notvalid = \ dqn.execute_action(2, data[t+1], t+1, total_profits, total_holds, total_buys, total_sells, total_notvalid) print("-----------------------------------------") print( f'Total Profit: {formatPrice(total_profits*100)} ,' f' Total hold/buy/sell/notvalid trades: {total_holds} / {total_buys} / {total_sells} / {total_notvalid}' ) print("-----------------------------------------")
def learn(self, data, n_episodes, n_features, batch_size, use_existing_model, random_action_min=0.1, random_action_decay=0.99995, n_neurons=64, future_reward_importance=0.95): agent = Agent(n_features, use_existing_model, '', random_action_min, random_action_decay, n_neurons, future_reward_importance) l = len(data) - 1 rewards_vs_episode = [] profit_vs_episode = [] trades_vs_episode = [] epsilon_vs_episode = [] for episode in range(1, n_episodes + 1): state = self.get_state(data, n_features, n_features) total_profits = 0 total_holds = 0 total_buys = 1 total_sells = 0 total_notvalid = 0 # add-on buys or sells without previous buy # total_rewards = 0 self.open_orders = [data[0]] for t in range(n_features, l): action = agent.choose_best_action( state) # tradeoff bw predict and random # print(f'state={state}') reward, total_profits, total_holds, total_buys, total_sells, total_notvalid = \ self.execute_action(action, data[t], t, total_profits, total_holds, total_buys, total_sells, total_notvalid) done = True if t == l - 1 else False next_state = self.get_state(data, t + 1, n_features) #if len(self.open_orders) > 0: # if long add next state return as reward #print(action, agent.actions[action]) if agent.actions[action] == 'buy': immediate_reward = next_state[0][-1] elif agent.actions[action] == 'sell': immediate_reward = -next_state[0][-1] else: immediate_reward = 0 #print("Immediate reward:{0:.5f} Reward:{1:.5f} Time:{2} Price:{3} Action:{4}". # format(immediate_reward, reward, t, data[t], agent.actions[action])) #reward = reward + immediate_reward reward = immediate_reward #print(f'row #{t} {agent.actions[action]} @{data[t]}, state1={state}, state2={next_state}, reward={reward}') # store contents of memory in buffer for future learning agent.remember(state, action, reward, next_state, done) state = next_state if done: # sell position at end of episode reward, total_profits, total_holds, total_buys, total_sells, total_notvalid = \ self.execute_action(2, data[t+1], t+1, total_profits, total_holds, total_buys, total_sells, total_notvalid) eps = np.round(agent.epsilon, 3) print( f'Episode {episode}/{n_episodes} Total Profit: {formatPrice(total_profits * 100)},' f' Total hold/buy/sell/notvalid trades: {total_holds} / {total_buys} / {total_sells} / {total_notvalid},' f' probability of random action: {eps}') print("---------------------------------------") # rewards_vs_episode.append(total_rewards) profit_vs_episode.append(np.round(total_profits, 4)) trades_vs_episode.append(total_buys) epsilon_vs_episode.append(eps) if len(agent.memory ) >= batch_size: # if enough recorded memory available agent.experience_replay(batch_size) # fit # clean memory ? model_name = "files/output/model_ep" + str(n_episodes) agent.model.save(model_name) print(f'{model_name} saved') return profit_vs_episode, trades_vs_episode, epsilon_vs_episode, model_name, agent.num_trains, agent.epsilon
print('time is') print(datetime.now().strftime('%H:%M:%S')) np.set_printoptions(precision=4) np.set_printoptions( suppress=True ) #prevent numpy exponential #notation on print, default False start_time = time.time() seed() stock_name = '^GSPC_2011' #^GSPC ^GSPC_2011 window_size = 10 # (t) 10 super simple features episodes = 100 # minimum 200 episodes for results. episode represent trade and learn on all data. batch_size = 15 # learn model on batch_size use_existing_model = False agent = Agent(window_size, use_existing_model, '') data = getStockDataVec(stock_name) l = len(data) - 1 print( f'Running {episodes} episodes, on {stock_name} has {l} bars, window of {window_size}, batch of {batch_size}' ) profit_vs_episode, trades_vs_episode = learn() print( f'finished learning the model. now u can backtest the model created in files/output/ on any stock' ) print('python backtest.py ') print(f'see plot of profit_vs_episode = {profit_vs_episode}') plot_barchart(profit_vs_episode, "profit per episode", "total profit", "episode", 'green')
done = True if t == l - 1 else False next_state = env.get_state(data, t + 1, window_size + 1) agent.remember(state, action, reward, next_state, done) state = next_state if done: print("-----------------------------------------") print( f'Total Profit: {formatPrice(total_profit)} , Total trades: {trade_count}' ) print("-----------------------------------------") stock_name = '^GSPC_2011' #^GSPC ^GSPC_2011 model_name = 'model_ep20' #model_ep0, model_ep10, model_ep20, model_ep30 model = load_model("files/output/" + model_name) window_size = model.layers[0].input.shape.as_list()[1] use_existing_model = True agent = Agent(window_size, use_existing_model, model_name) data = getStockDataVec(stock_name) l = len(data) - 1 trading_fee = 0 agent.open_orders = [] print( f'starting back-testing model {model_name} on {stock_name} (has {l} bars), window of {window_size} bars' ) bt() print( f'finished back-testing model {model_name} on {stock_name} (has {l} bars), window of {window_size} bars' )