def run_episode(environment: gym.Env, agent: DQNAgent, render: bool, max_length: int): """ Run one episode in the given environment with the agent. Arguments: environment {`gym.Env`} -- Environment representing the Markov Decision Process agent {`DQNAgent`} -- Reinforcment Learning agent that acts in the envíronment render {`bool`} -- Whether the frames of the episode should be rendered on the screen max_length {`int`} -- Maximum number of steps before the episode is terminated Returns: `float` -- Cumulated reward that the agent received during the episode """ episode_reward = 0 state = environment.reset() for _ in range(max_length): if render: environment.render() action = agent.act(state) next_state, reward, terminal, _ = environment.step(action) agent.observe( Transition(state, action, reward, None if terminal else next_state)) episode_reward += reward if terminal: break else: state = next_state return episode_reward
train_interval=1, batch_size=32, memory_interval=1, observation=obs, input_shape=[len(obs)], training=True, policy=policy) agent.compile() result = [] for episode in range(500): # 1000エピソード回す agent.reset() observation = env.reset() # 環境の初期化 # observation, _, _, _ = env.step(env.action_space.sample()) observation = deepcopy(observation) agent.observe(observation) for t in range(250): # n回試行する # env.render() # 表示 action = agent.act() observation, reward, done, info = env.step( action) # アクションを実行した結果の状態、報酬、ゲームをクリアしたかどうか、その他の情報を返す observation = deepcopy(observation) agent.observe(observation, reward, done) if done: break # test agent.training = False observation = env.reset() # 環境の初期化 agent.observe(observation) for t in range(250):