예제 #1
0
    def learn(self, data, episodes, num_features, batch_size, use_existing_model, random_action_min=0.1, random_action_decay=0.99995, num_neurons=64, future_reward_importance=0.95):
        agent              = Agent(num_features, use_existing_model, '', random_action_min, random_action_decay, num_neurons, future_reward_importance)
        l                  = len(data) - 1
        rewards_vs_episode = []
        profit_vs_episode  = []
        trades_vs_episode  = []
        epsilon_vs_episode = []
        for episode in range(1,episodes + 1):
            #print("Episode " + str(e) + "/" + str(episode_count))
            state            = self.get_state(data, num_features, num_features)
            total_profits    = 0
            total_holds      = 0
            total_buys       = 1
            total_sells      = 0
            #total_rewards    = 0
            self.open_orders = [data[0]]

            for t in range(num_features,l):

                action = agent.choose_best_action(state)#tradeoff bw predict and random
                #print(f'state={state}')
                reward, total_profits, total_holds, total_buys, total_sells = self.execute_action (action, data[t], t, total_profits, total_holds, total_buys, total_sells)

                done = True if t == l - 1 else False

                next_state = self.get_state(data, t + 1, num_features)

                print(f'row #{t} {agent.actions[action]} @{data[t]}, state1={state}, state2={next_state}, reward={reward}')
                agent.remember(state, action, reward, next_state, done)#store contents of memory in buffer for future learning
                state = next_state

                if done:
                    eps = np.round(agent.epsilon,3)
                    print(f'Episode {episode}/{episodes} Total Profit: {formatPrice(total_profits)} , Total trades: {total_buys}, probability of random action: {eps}')
                    print("---------------------------------------")
                    #rewards_vs_episode.append(total_rewards)
                    profit_vs_episode.append(np.round(total_profits,4))
                    trades_vs_episode.append(total_buys)
                    epsilon_vs_episode.append(eps)

                if len(agent.memory) > batch_size:#if memory of agent gets full:
                    agent.experience_replay(batch_size)#fit
                #clean memory ?
            # if episode % 1000 == 0:
            #     model_name = "files/output/model_ep" + str(episode)
            #     agent.model.save(model_name)
            #     print(f'{model_name} saved')


        model_name = "files/output/model_ep" + str(episodes)
        agent.model.save(model_name)
        print(f'{model_name} saved')
        return  profit_vs_episode, trades_vs_episode, epsilon_vs_episode, model_name, agent.num_trains, agent.epsilon
예제 #2
0
    def learn(self,
              data,
              n_episodes,
              n_features,
              batch_size,
              use_existing_model,
              random_action_min=0.1,
              random_action_decay=0.99995,
              n_neurons=64,
              future_reward_importance=0.95):

        agent = Agent(n_features, use_existing_model, '', random_action_min,
                      random_action_decay, n_neurons, future_reward_importance)
        l = len(data) - 1
        rewards_vs_episode = []
        profit_vs_episode = []
        trades_vs_episode = []
        epsilon_vs_episode = []
        for episode in range(1, n_episodes + 1):
            state = self.get_state(data, n_features, n_features)
            total_profits = 0
            total_holds = 0
            total_buys = 1
            total_sells = 0
            total_notvalid = 0  # add-on buys or sells without previous buy
            # total_rewards    = 0
            self.open_orders = [data[0]]

            for t in range(n_features, l):

                action = agent.choose_best_action(
                    state)  # tradeoff bw predict and random
                # print(f'state={state}')
                reward, total_profits, total_holds, total_buys, total_sells, total_notvalid = \
                    self.execute_action(action, data[t], t, total_profits, total_holds, total_buys, total_sells,
                                        total_notvalid)

                done = True if t == l - 1 else False

                next_state = self.get_state(data, t + 1, n_features)

                #if len(self.open_orders) > 0:  # if long add next state return as reward
                #print(action, agent.actions[action])
                if agent.actions[action] == 'buy':
                    immediate_reward = next_state[0][-1]
                elif agent.actions[action] == 'sell':
                    immediate_reward = -next_state[0][-1]
                else:
                    immediate_reward = 0
                #print("Immediate reward:{0:.5f} Reward:{1:.5f} Time:{2} Price:{3} Action:{4}".
                #      format(immediate_reward, reward, t, data[t], agent.actions[action]))
                #reward = reward + immediate_reward
                reward = immediate_reward

                #print(f'row #{t} {agent.actions[action]} @{data[t]}, state1={state}, state2={next_state}, reward={reward}')

                # store contents of memory in buffer for future learning
                agent.remember(state, action, reward, next_state, done)
                state = next_state

                if done:
                    # sell position at end of episode
                    reward, total_profits, total_holds, total_buys, total_sells, total_notvalid = \
                        self.execute_action(2, data[t+1], t+1, total_profits, total_holds, total_buys, total_sells,
                                            total_notvalid)
                    eps = np.round(agent.epsilon, 3)
                    print(
                        f'Episode {episode}/{n_episodes} Total Profit: {formatPrice(total_profits * 100)},'
                        f' Total hold/buy/sell/notvalid trades: {total_holds} / {total_buys} / {total_sells} / {total_notvalid},'
                        f' probability of random action: {eps}')
                    print("---------------------------------------")
                    # rewards_vs_episode.append(total_rewards)
                    profit_vs_episode.append(np.round(total_profits, 4))
                    trades_vs_episode.append(total_buys)
                    epsilon_vs_episode.append(eps)

                if len(agent.memory
                       ) >= batch_size:  # if enough recorded memory available
                    agent.experience_replay(batch_size)  # fit
                # clean memory ?

        model_name = "files/output/model_ep" + str(n_episodes)
        agent.model.save(model_name)
        print(f'{model_name} saved')
        return profit_vs_episode, trades_vs_episode, epsilon_vs_episode, model_name, agent.num_trains, agent.epsilon