Exemple #1
0
def main():
    nb_actions = 3
    obs_size = 9
    window_size = 10
    batch_size = 2048
    stock = "BAC"
    episode = 35
    total_spent = 0
    total_sold = 0

    agent = Agent(window_size=window_size,
                  action_size=nb_actions,
                  batch_size=batch_size,
                  gamma=0.95,
                  epsilon=1.0,
                  epsilon_decay=0.99,
                  epsilon_min=0.001,
                  learning_rate=0.001,
                  is_eval=True,
                  stock_name=stock,
                  episode=episode)
    env = MarketEnv(stock,
                    window_size=window_size,
                    state_size=obs_size,
                    account_balance=1000000,
                    is_eval=True,
                    shares_to_buy=100,
                    max_positions=1000,
                    train_test_split=.8)

    state = env.reset()

    for time in range(env.l):
        action = agent.act(state)[0]

        if action < 0:
            choice = 2
        elif action > 0 and action[0] < 1:
            choice = 0
        elif action > 1:
            choice = 1

        next_state, action, reward, done = env.step(action, time)

        agent.remember(state, action, reward, next_state, done)
        state = next_state

    prices = [line[3] for line in env.prices]
    dates = [i for i in range(len(env.prices))]
    plt.plot(dates, prices)

    for line in env.buy:
        plt.plot(line[0], line[1], 'ro', color="g", markersize=2)
        total_spent += line[1]

    for line in env.sell:
        plt.plot(line[0], line[1], "ro", color="r", markersize=2)
        total_sold += line[1]

    percentage_gain = ((env.account_balance - env.starting_balance) /
                       env.starting_balance) * 100

    print("Profitable Trades: " + str(env.profitable_trades))
    print("Unprofitable Trades: " + str(env.unprofitable_trades))
    print("Percentage Gain: " + str(percentage_gain))
    print("Amount Spent: " + str(total_spent))
    print("Amount Sold: " + str(total_sold))

    plt.show()
    plt.savefig("models/{}/{}-{}/{}".format(stock, stock, str(episode), stock))
Exemple #2
0
env = MarketEnv("data")
epoch = 1000000
epsilon = 0.5
batch_size = 30

Neural = NerualModel()
model = Neural.getModel()

rms = RMSprop()
model.compile(loss='mse', optimizer=rms)
exp_replay = ExperienceReplay()

for e in range(epoch):
    #loss = 0.
    game_over = False
    input_t = env.reset()
    print("run status {}", format(e))
    while not game_over:
        input_prev = input_t
        #print(input_prev)
        isRandom = False
        if np.random.rand() <= epsilon:
            action = np.random.randint(0, env.action_space.n, size=1)[0]
            isRandom = True
        else:
            q = model.predict(np.array([input_prev]))
            action = np.argmax(q[0])

        input_t, reward, game_over, info = env.step(action)
        if game_over == True:
            print("total reward : ", env.cur_reward)
def main():
    window_size = 10
    batch_size = 2048
    episodes = 10000
    max_episode_len = 39000 * 3  # One Year of trading in minutes
    stock = "BAC"

    args = {
        'tau': .001,
        'gamma': .99,
        'lr_actor': .0001,
        'lr_critic': .001,
        'batch_size': max_episode_len
    }

    env = MarketEnv(stock,
                    buy_position=3,
                    window_size=window_size,
                    account_balance=1000000,
                    shares_to_buy=100,
                    train_test_split=.8,
                    max_episode_len=max_episode_len)
    agent = Agent(args,
                  state_size=env.state_size,
                  window_size=env.window_size,
                  action_size=env.action_size,
                  action_bound=env.action_bound[1],
                  is_eval=False,
                  stock_name=stock)

    episode_ave_max_q = 0
    ep_reward = 0

    for i in range(episodes):
        state = env.reset()

        for time in range(env.l):

            action = agent.act(state)[0]

            if action < 0:
                choice = 2
            elif action > 0 and action[0] < 1:
                choice = 0
            elif action > 1:
                choice = 1

            next_state, reward, done = env.step(choice, time)

            agent.remember(state, action, reward, next_state, done)
            state = next_state

            # if agent.replay_buffer.size() == batch_size:
            #     print("Replaying")
            #     episode_ave_max_q += agent.replay(time, i, episode_ave_max_q)

            ep_reward += reward

            if done or time == env.l:
                episode_ave_max_q += agent.replay(time, i, episode_ave_max_q)
                break

        model_name = "{}-{}".format(stock, str(i))
        path = "models/{}/{}/".format(stock, model_name)

        if i % 5 == 0:
            if not os.path.exists(path):
                os.makedirs(path)

            with open(os.path.join(path, 'LTYP.mif'), 'w'):
                pass
            agent.saver.save(agent.sess, path + model_name, global_step=i)
            summary_str = agent.sess.run(agent.summary_ops,
                                         feed_dict={
                                             agent.summary_vars[0]:
                                             ep_reward,
                                             agent.summary_vars[1]:
                                             episode_ave_max_q
                                         })
            agent.writer.add_summary(summary_str, i)
            agent.writer.flush()

            episode_ave_max_q = 0
            ep_reward = 0

        print('| Reward: {:d} | Episode: {:d} | Qmax: {:.4f}'.format(
            int(ep_reward), i, (episode_ave_max_q)))