observation = env.reset() # 環境の初期化 # observation, _, _, _ = env.step(env.action_space.sample()) observation = deepcopy(observation) agent.observe(observation) for t in range(250): # n回試行する # env.render() # 表示 action = agent.act() observation, reward, done, info = env.step( action) # アクションを実行した結果の状態、報酬、ゲームをクリアしたかどうか、その他の情報を返す observation = deepcopy(observation) agent.observe(observation, reward, done) if done: break # test agent.training = False observation = env.reset() # 環境の初期化 agent.observe(observation) for t in range(250): # env.render() # 表示 action = agent.act() observation, reward, done, info = env.step(action) agent.observe(observation) # agent.get_reward(reward, done) if done: print("Episode {}, maintain {} timesteps".format(episode, t)) result.append(t) break agent.training = True x = np.arange(len(result))