def main(): nb_actions = 3 obs_size = 9 window_size = 10 batch_size = 2048 stock = "BAC" episode = 35 total_spent = 0 total_sold = 0 agent = Agent(window_size=window_size, action_size=nb_actions, batch_size=batch_size, gamma=0.95, epsilon=1.0, epsilon_decay=0.99, epsilon_min=0.001, learning_rate=0.001, is_eval=True, stock_name=stock, episode=episode) env = MarketEnv(stock, window_size=window_size, state_size=obs_size, account_balance=1000000, is_eval=True, shares_to_buy=100, max_positions=1000, train_test_split=.8) state = env.reset() for time in range(env.l): action = agent.act(state)[0] if action < 0: choice = 2 elif action > 0 and action[0] < 1: choice = 0 elif action > 1: choice = 1 next_state, action, reward, done = env.step(action, time) agent.remember(state, action, reward, next_state, done) state = next_state prices = [line[3] for line in env.prices] dates = [i for i in range(len(env.prices))] plt.plot(dates, prices) for line in env.buy: plt.plot(line[0], line[1], 'ro', color="g", markersize=2) total_spent += line[1] for line in env.sell: plt.plot(line[0], line[1], "ro", color="r", markersize=2) total_sold += line[1] percentage_gain = ((env.account_balance - env.starting_balance) / env.starting_balance) * 100 print("Profitable Trades: " + str(env.profitable_trades)) print("Unprofitable Trades: " + str(env.unprofitable_trades)) print("Percentage Gain: " + str(percentage_gain)) print("Amount Spent: " + str(total_spent)) print("Amount Sold: " + str(total_sold)) plt.show() plt.savefig("models/{}/{}-{}/{}".format(stock, stock, str(episode), stock))
for e in range(epoch): #loss = 0. game_over = False input_t = env.reset() print("run status {}", format(e)) while not game_over: input_prev = input_t #print(input_prev) isRandom = False if np.random.rand() <= epsilon: action = np.random.randint(0, env.action_space.n, size=1)[0] isRandom = True else: q = model.predict(np.array([input_prev])) action = np.argmax(q[0]) input_t, reward, game_over, info = env.step(action) if game_over == True: print("total reward : ", env.cur_reward) exp_replay.remember([input_prev, action, reward, input_t], game_over) batch = exp_replay.get_batch(model, batch_size=batch_size) # loss = model.train_on_batch(batch[0], batch[1]) # model_json = model.to_json() # with open("model.json", "w") as json_file: # json_file.write(model_json) # model.save_weights("model.h5") # model.save_weights("model_bk.h5")
def main(): window_size = 10 batch_size = 2048 episodes = 10000 max_episode_len = 39000 * 3 # One Year of trading in minutes stock = "BAC" args = { 'tau': .001, 'gamma': .99, 'lr_actor': .0001, 'lr_critic': .001, 'batch_size': max_episode_len } env = MarketEnv(stock, buy_position=3, window_size=window_size, account_balance=1000000, shares_to_buy=100, train_test_split=.8, max_episode_len=max_episode_len) agent = Agent(args, state_size=env.state_size, window_size=env.window_size, action_size=env.action_size, action_bound=env.action_bound[1], is_eval=False, stock_name=stock) episode_ave_max_q = 0 ep_reward = 0 for i in range(episodes): state = env.reset() for time in range(env.l): action = agent.act(state)[0] if action < 0: choice = 2 elif action > 0 and action[0] < 1: choice = 0 elif action > 1: choice = 1 next_state, reward, done = env.step(choice, time) agent.remember(state, action, reward, next_state, done) state = next_state # if agent.replay_buffer.size() == batch_size: # print("Replaying") # episode_ave_max_q += agent.replay(time, i, episode_ave_max_q) ep_reward += reward if done or time == env.l: episode_ave_max_q += agent.replay(time, i, episode_ave_max_q) break model_name = "{}-{}".format(stock, str(i)) path = "models/{}/{}/".format(stock, model_name) if i % 5 == 0: if not os.path.exists(path): os.makedirs(path) with open(os.path.join(path, 'LTYP.mif'), 'w'): pass agent.saver.save(agent.sess, path + model_name, global_step=i) summary_str = agent.sess.run(agent.summary_ops, feed_dict={ agent.summary_vars[0]: ep_reward, agent.summary_vars[1]: episode_ave_max_q }) agent.writer.add_summary(summary_str, i) agent.writer.flush() episode_ave_max_q = 0 ep_reward = 0 print('| Reward: {:d} | Episode: {:d} | Qmax: {:.4f}'.format( int(ep_reward), i, (episode_ave_max_q)))