Exemple #1
0
def main():

    window_size = 5
    episode_count = 10
    stock_name = "^GSPC_2011"

    agent = Agent(window_size)
    market = Market(window_size=window_size, stock_name=stock_name)

    batch_size = 32

    start_time = time.time()
    for e in range(episode_count + 1):
        print("Episodio" + str(e) + "/" + str(episode_count))
        agent.reset()
        state, price_data = market.reset()  # ToDo: get the initial state

        for t in range(market.last_data_index):
            # obtener acción actual del agente
            # llamar al método act() del agente considerando el estado actual
            action, bought_price = agent.act(state, price_data)

            # obtener siguiente estado del agente según el mercado
            next_state, next_price_data, reward, done =\
                market.get_next_state_reward(action, bought_price)

            # añadir trasacción a la memoria
            agent.memory.append((state, action, reward, next_state, done))
            # aprender de la historia solo en el caso que haya memoria
            if len(agent.memory) > batch_size:
                agent.experience_replay(batch_size)

            state = next_state
            price_data = next_price_data

            if done:
                print("--------------------------------")
                print("Ganancias totales: {0}".format(
                    agent.get_total_profit()))
                print("--------------------------------")

        if e % 10 == 0:
            if not os.path.exists("models"):
                os.mkdir("models")
            agent.model.save("models/model_rl" + str(e))

    end_time = time.time()
    training_time = round(end_time - start_time)
    print("Entrenamiento tomó {0} segundos.".format(training_time))