def train(env=None): agent = SARSAAgent() if env is None: env = gym.make("FrozenLakeEasy-v0") agent.learn(env, episode_count=500) show_q_value(agent.Q) agent.show_reward_log()
def train(env=None): agent = MonteCarloAgent(epsilon=0.1) if env is None: env = gym.make("FrozenLakeEasy-v0") agent.learn(env, episode_count=500) show_q_value(agent.Q) agent.show_reward_log()
def train(env=None): trainer = ActorCritic(Actor, Critic) if env is None: env = gym.make("FrozenLakeEasy-v0") actor, _ = trainer.train(env, episode_count=3000) print(actor.Q) show_q_value(actor.Q) actor.show_reward_log()
def train(): agent = QLearningAgent() env = gym.make("FrozenLakeEasy-v0") # env.render() agent.learn(env, episode_count=300000, render=False, gamma=gamma, report_interval=100000) show_q_value(agent.Q) agent.show_reward_log()
def train_gamma(env, name, gamma): agent = QLearningAgent() # env.render() agent.learn(env, episode_count=300000, render=False, gamma=gamma, report_interval=100000) show_q_value(agent.Q, name=name + "_stage.png") agent.show_reward_log(name=name + "_figure.png") print("game start") print("gamma={0} : {1}".format( gamma, "goal" if play(agent, env) else "game over"))
def train_stg(count=10, name=None): for i in range(10): print("now: {}th".format(i)) agent = QLearningAgent() env = gym.make("FrozenLakeEasy-v0") env.render() agent.learn(env, episode_count=500000, render=False, report_interval=100000) show_q_value(agent.Q, name=name + "_{}_stage.png".format(i)) agent.show_reward_log(name=name + "_{}_figure.png".format(i)) print("game start") print("{0}th: {1}".format(i + 1, "goal" if play(agent, env) else "game over"))
def train(): agent = QLearningAgent() env = gym.make("FrozenLakeEasy-v0") agent.learn(env, episode_count=500) show_q_value(agent.Q) agent.show_reward_log()
gain = reward + gamma * self.Q[n_state][n_action] estimated = self.Q[s][a] self.Q[s][a] += learning_rate * (gain - estimated) s = n_state if self.q_learning: a = self.policy(s, actions) else: a = n_action else: self.log(reward) if e != 0 and e % report_interval == 0: pass # self.show_reward_log(episode=e) def train(q_learning): env = gym.make("FrozenLakeEasy-v0") agent = CompareAgent(q_learning=q_learning) agent.learn(env, episode_count=100000) return dict(agent.Q) if __name__ == "__main__": with Pool() as pool: results = pool.map(train, ([True, False])) for r in results: show_q_value(r)
def train(): trainer = ActorCritic(Actor, Critc) env = gym.make("FrozenLakeEasy-v0") actor, critic = trainer.train(env, episode_count=3000) show_q_value(actor.Q) actor.show_reward_log()
def train(): agent = SARSAAgent() env = gym.make("FrozenLakeEasy-v0") agent.learn(env) show_q_value(agent.Q) agent.show_reward_log()
def train(): agent = MonteCarloAgent(epsilon=0.1) env = gym.make("FrozenLakeEasy-v0") agent.learn(env) show_q_value(agent.Q) agent.show_reward_log()