obs = env.reset() agent = DQNAgent(actions=actions, memory=memory, update_interval=500, train_interval=1, batch_size=32, memory_interval=1, observation=obs, input_shape=[len(obs)], training=True, policy=policy) agent.compile() result = [] for episode in range(500): # 1000エピソード回す agent.reset() observation = env.reset() # 環境の初期化 # observation, _, _, _ = env.step(env.action_space.sample()) observation = deepcopy(observation) agent.observe(observation) for t in range(250): # n回試行する # env.render() # 表示 action = agent.act() observation, reward, done, info = env.step( action) # アクションを実行した結果の状態、報酬、ゲームをクリアしたかどうか、その他の情報を返す observation = deepcopy(observation) agent.observe(observation, reward, done) if done: break # test