Esempio n. 1
0
obs = env.reset()
agent = DQNAgent(actions=actions,
                 memory=memory,
                 update_interval=500,
                 train_interval=1,
                 batch_size=32,
                 memory_interval=1,
                 observation=obs,
                 input_shape=[len(obs)],
                 training=True,
                 policy=policy)
agent.compile()

result = []
for episode in range(500):  # 1000エピソード回す
    agent.reset()
    observation = env.reset()  # 環境の初期化
    # observation, _, _, _ = env.step(env.action_space.sample())
    observation = deepcopy(observation)
    agent.observe(observation)
    for t in range(250):  # n回試行する
        # env.render() # 表示
        action = agent.act()
        observation, reward, done, info = env.step(
            action)  # アクションを実行した結果の状態、報酬、ゲームをクリアしたかどうか、その他の情報を返す
        observation = deepcopy(observation)
        agent.observe(observation, reward, done)
        if done:
            break

    # test