Exemple #1
0
def bt(data, num_features, use_existing_model, model_name):
    dqn          = Dqn()
    agent        = Agent(num_features, use_existing_model, model_name)
    state            = dqn.get_state(data, num_features, num_features)
    total_profits    = 0
    total_holds      = 0
    total_buys       = 1
    total_sells      = 0
    l = len(data) - 1

    for t in range(num_features,l):

        action = agent.choose_best_action(state)#it will always predict

        reward, total_profits, total_holds, total_buys, total_sells = dqn.execute_action (action, data[t], t, total_profits, total_holds, total_buys, total_sells)

        done = True if t == l - 1 else False

        next_state = dqn.get_state(data, t + 1, num_features)
        print(f'row #{t} {agent.actions[action]} @{data[t]}, state1={state}, state2={next_state}, reward={reward}')
        state = next_state

        if done:
            print("-----------------------------------------")
            print(f'Total Profit: {formatPrice(total_profits)} , Total hold/buy/exit trades: {total_holds} / {total_buys} / {total_sells}')
            print("-----------------------------------------")
Exemple #2
0
    def learn(self, data, episodes, num_features, batch_size, use_existing_model, random_action_min=0.1, random_action_decay=0.99995, num_neurons=64, future_reward_importance=0.95):
        agent              = Agent(num_features, use_existing_model, '', random_action_min, random_action_decay, num_neurons, future_reward_importance)
        l                  = len(data) - 1
        rewards_vs_episode = []
        profit_vs_episode  = []
        trades_vs_episode  = []
        epsilon_vs_episode = []
        for episode in range(1,episodes + 1):
            #print("Episode " + str(e) + "/" + str(episode_count))
            state            = self.get_state(data, num_features, num_features)
            total_profits    = 0
            total_holds      = 0
            total_buys       = 1
            total_sells      = 0
            #total_rewards    = 0
            self.open_orders = [data[0]]

            for t in range(num_features,l):

                action = agent.choose_best_action(state)#tradeoff bw predict and random
                #print(f'state={state}')
                reward, total_profits, total_holds, total_buys, total_sells = self.execute_action (action, data[t], t, total_profits, total_holds, total_buys, total_sells)

                done = True if t == l - 1 else False

                next_state = self.get_state(data, t + 1, num_features)

                print(f'row #{t} {agent.actions[action]} @{data[t]}, state1={state}, state2={next_state}, reward={reward}')
                agent.remember(state, action, reward, next_state, done)#store contents of memory in buffer for future learning
                state = next_state

                if done:
                    eps = np.round(agent.epsilon,3)
                    print(f'Episode {episode}/{episodes} Total Profit: {formatPrice(total_profits)} , Total trades: {total_buys}, probability of random action: {eps}')
                    print("---------------------------------------")
                    #rewards_vs_episode.append(total_rewards)
                    profit_vs_episode.append(np.round(total_profits,4))
                    trades_vs_episode.append(total_buys)
                    epsilon_vs_episode.append(eps)

                if len(agent.memory) > batch_size:#if memory of agent gets full:
                    agent.experience_replay(batch_size)#fit
                #clean memory ?
            # if episode % 1000 == 0:
            #     model_name = "files/output/model_ep" + str(episode)
            #     agent.model.save(model_name)
            #     print(f'{model_name} saved')


        model_name = "files/output/model_ep" + str(episodes)
        agent.model.save(model_name)
        print(f'{model_name} saved')
        return  profit_vs_episode, trades_vs_episode, epsilon_vs_episode, model_name, agent.num_trains, agent.epsilon
def bt(data, n_features, use_existing_model, name_model):
    dqn = Dqn()
    dqn.open_orders = [data[0]]
    agent = Agent(n_features, use_existing_model, name_model)
    state = dqn.get_state(data, n_features, n_features)
    total_profits = 0
    total_holds = 0
    total_buys = 1
    total_sells = 0
    total_notvalid = 0
    l = len(data) - 1

    for t in range(n_features, l):

        action = agent.choose_best_action(state)  # it will always predict

        reward, total_profits, total_holds, total_buys, total_sells, total_notvalid = \
            dqn.execute_action(action, data[t], t, total_profits, total_holds, total_buys, total_sells, total_notvalid)

        done = True if t == l - 1 else False

        next_state = dqn.get_state(data, t + 1, n_features)
        #print(f'row #{t} {agent.actions[action]} @{data[t]}, state1={state}, state2={next_state}, reward={reward}')
        state = next_state

        if done:
            # sell position at end of episode
            reward, total_profits, total_holds, total_buys, total_sells, total_notvalid = \
                dqn.execute_action(2, data[t+1], t+1, total_profits, total_holds, total_buys, total_sells,
                                   total_notvalid)
            print("-----------------------------------------")
            print(
                f'Total Profit: {formatPrice(total_profits*100)} ,'
                f' Total hold/buy/sell/notvalid trades: {total_holds} / {total_buys} / {total_sells} / {total_notvalid}'
            )
            print("-----------------------------------------")
    def learn(self,
              data,
              n_episodes,
              n_features,
              batch_size,
              use_existing_model,
              random_action_min=0.1,
              random_action_decay=0.99995,
              n_neurons=64,
              future_reward_importance=0.95):

        agent = Agent(n_features, use_existing_model, '', random_action_min,
                      random_action_decay, n_neurons, future_reward_importance)
        l = len(data) - 1
        rewards_vs_episode = []
        profit_vs_episode = []
        trades_vs_episode = []
        epsilon_vs_episode = []
        for episode in range(1, n_episodes + 1):
            state = self.get_state(data, n_features, n_features)
            total_profits = 0
            total_holds = 0
            total_buys = 1
            total_sells = 0
            total_notvalid = 0  # add-on buys or sells without previous buy
            # total_rewards    = 0
            self.open_orders = [data[0]]

            for t in range(n_features, l):

                action = agent.choose_best_action(
                    state)  # tradeoff bw predict and random
                # print(f'state={state}')
                reward, total_profits, total_holds, total_buys, total_sells, total_notvalid = \
                    self.execute_action(action, data[t], t, total_profits, total_holds, total_buys, total_sells,
                                        total_notvalid)

                done = True if t == l - 1 else False

                next_state = self.get_state(data, t + 1, n_features)

                #if len(self.open_orders) > 0:  # if long add next state return as reward
                #print(action, agent.actions[action])
                if agent.actions[action] == 'buy':
                    immediate_reward = next_state[0][-1]
                elif agent.actions[action] == 'sell':
                    immediate_reward = -next_state[0][-1]
                else:
                    immediate_reward = 0
                #print("Immediate reward:{0:.5f} Reward:{1:.5f} Time:{2} Price:{3} Action:{4}".
                #      format(immediate_reward, reward, t, data[t], agent.actions[action]))
                #reward = reward + immediate_reward
                reward = immediate_reward

                #print(f'row #{t} {agent.actions[action]} @{data[t]}, state1={state}, state2={next_state}, reward={reward}')

                # store contents of memory in buffer for future learning
                agent.remember(state, action, reward, next_state, done)
                state = next_state

                if done:
                    # sell position at end of episode
                    reward, total_profits, total_holds, total_buys, total_sells, total_notvalid = \
                        self.execute_action(2, data[t+1], t+1, total_profits, total_holds, total_buys, total_sells,
                                            total_notvalid)
                    eps = np.round(agent.epsilon, 3)
                    print(
                        f'Episode {episode}/{n_episodes} Total Profit: {formatPrice(total_profits * 100)},'
                        f' Total hold/buy/sell/notvalid trades: {total_holds} / {total_buys} / {total_sells} / {total_notvalid},'
                        f' probability of random action: {eps}')
                    print("---------------------------------------")
                    # rewards_vs_episode.append(total_rewards)
                    profit_vs_episode.append(np.round(total_profits, 4))
                    trades_vs_episode.append(total_buys)
                    epsilon_vs_episode.append(eps)

                if len(agent.memory
                       ) >= batch_size:  # if enough recorded memory available
                    agent.experience_replay(batch_size)  # fit
                # clean memory ?

        model_name = "files/output/model_ep" + str(n_episodes)
        agent.model.save(model_name)
        print(f'{model_name} saved')
        return profit_vs_episode, trades_vs_episode, epsilon_vs_episode, model_name, agent.num_trains, agent.epsilon
print('time is')
print(datetime.now().strftime('%H:%M:%S'))
np.set_printoptions(precision=4)
np.set_printoptions(
    suppress=True
)  #prevent numpy exponential #notation on print, default False

start_time = time.time()
seed()
stock_name = '^GSPC_2011'  #^GSPC  ^GSPC_2011
window_size = 10  # (t) 10 super simple features
episodes = 100  # minimum 200 episodes for results. episode represent trade and learn on all data.
batch_size = 15  # learn  model on  batch_size
use_existing_model = False
agent = Agent(window_size, use_existing_model, '')
data = getStockDataVec(stock_name)
l = len(data) - 1

print(
    f'Running {episodes} episodes, on {stock_name} has {l} bars, window of {window_size}, batch of {batch_size}'
)
profit_vs_episode, trades_vs_episode = learn()
print(
    f'finished learning the model. now u can backtest the model created in files/output/ on any stock'
)
print('python backtest.py ')

print(f'see plot of profit_vs_episode = {profit_vs_episode}')
plot_barchart(profit_vs_episode, "profit per episode", "total profit",
              "episode", 'green')
        done = True if t == l - 1 else False
        next_state = env.get_state(data, t + 1, window_size + 1)
        agent.remember(state, action, reward, next_state, done)
        state = next_state

        if done:
            print("-----------------------------------------")
            print(
                f'Total Profit: {formatPrice(total_profit)} , Total trades: {trade_count}'
            )
            print("-----------------------------------------")


stock_name = '^GSPC_2011'  #^GSPC  ^GSPC_2011
model_name = 'model_ep20'  #model_ep0, model_ep10, model_ep20, model_ep30
model = load_model("files/output/" + model_name)
window_size = model.layers[0].input.shape.as_list()[1]
use_existing_model = True
agent = Agent(window_size, use_existing_model, model_name)
data = getStockDataVec(stock_name)
l = len(data) - 1
trading_fee = 0
agent.open_orders = []
print(
    f'starting back-testing model {model_name} on {stock_name} (has {l} bars), window of {window_size} bars'
)
bt()
print(
    f'finished back-testing model {model_name} on {stock_name} (has {l} bars), window of {window_size} bars'
)