Beispiel #1
0
def learning(best_agent):

    # print_experience
    best_agent.print_experience()
    print_state(best_agent.get_q_values)

    agent = QLearning(12, 4)

    # 学習前
    agent.episodes = best_agent.episodes

    # 学習
    for i, episode in enumerate(best_agent.episodes):
        agent.history += episode
        if i % 10 == 0 and not i == 0:
            agent.learn()
            agent.reset()

    # 学習前
    print_state(agent.get_q_values)

    # print_experience
    agent.print_experience()
def learning(best_agent):

    # print_experience
    best_agent.print_experience()
    print_state(best_agent.get_q_values)

    agent = QLearning(12, 4)

    # 学習前
    agent.episodes = best_agent.episodes

    # 学習
    for i, episode in enumerate(best_agent.episodes):
        agent.history += episode
        if i % 10 == 0 and not i == 0:
            agent.learn()
            agent.reset()

    # 学習前
    print_state(agent.get_q_values)

    # print_experience
    agent.print_experience()