def run_expert(arguments, configuration): agent_dir = Path(arguments.output_dir) / "expert" if arguments.overwrite: shutil.rmtree(arguments.output_dir, ignore_errors=True) agent_dir.mkdir(parents=True, exist_ok=False) #case if arguments.case == 1: ball_enabled_ = False fire_enabled_ = True else: ball_enabled_ = False fire_enabled_ = True config = BreakoutConfiguration( brick_rows=arguments.rows, brick_cols=arguments.cols, brick_reward=arguments.brick_reward, step_reward=arguments.step_reward, ball_enabled=ball_enabled_, fire_enabled=fire_enabled_, secondlearner=False) #ball_enabled=False, fire_enabled=True) env = make_env(config, arguments.output_dir, arguments.goal_reward) np.random.seed(arguments.seed) env.seed(arguments.seed) policy = AutomataPolicy((-2, ), nb_steps=configuration.nb_exploration_steps, value_max=1.0, value_min=configuration.min_eps) algorithm = Sarsa if configuration.algorithm == "sarsa" else QLearning agent = Agent(algorithm(None, env.action_space, gamma=configuration.gamma, alpha=configuration.alpha, lambda_=configuration.lambda_), policy=policy, test_policy=EpsGreedyQPolicy(eps=0.01)) history = agent.fit( env, nb_steps=configuration.nb_steps, visualize=configuration.visualize_training, callbacks=[ ModelCheckpoint(str(agent_dir / "checkpoints" / "agent-{}.pkl")), TrainEpisodeLogger() ]) history.save(agent_dir / "history.json") agent.save(Path(agent_dir, "checkpoints", "agent.pkl")) plot_history(history, agent_dir) agent = Agent.load(agent_dir / "checkpoints" / "agent.pkl") agent.test(Monitor(env, agent_dir / "videos"), nb_episodes=5, visualize=True) env.close()
def run_learner(arguments, configuration, dfa: pythomata.dfa.DFA): agent_dir = Path(arguments.output_dir) / "learner" shutil.rmtree(agent_dir, ignore_errors=True) agent_dir.mkdir(parents=True, exist_ok=False) config = BreakoutConfiguration(brick_rows=arguments.rows, brick_cols=arguments.cols, brick_reward=arguments.brick_reward, step_reward=arguments.step_reward, fire_enabled=False, ball_enabled=True) env = make_env_from_dfa(config, dfa) np.random.seed(arguments.seed) env.seed(arguments.seed) policy = AutomataPolicy((-1, ), nb_steps=configuration.nb_exploration_steps, value_max=0.8, value_min=configuration.min_eps) algorithm = Sarsa if configuration.algorithm == "sarsa" else QLearning agent = Agent(algorithm(None, env.action_space, gamma=configuration.gamma, alpha=configuration.alpha, lambda_=configuration.lambda_), policy=policy, test_policy=EpsGreedyQPolicy(eps=0.001)) history = agent.fit( env, nb_steps=configuration.nb_steps, visualize=configuration.visualize_training, callbacks=[ ModelCheckpoint(str(agent_dir / "checkpoints" / "agent-{}.pkl")), TrainEpisodeLogger() ]) history.save(agent_dir / "history.json") agent.save(agent_dir / "checkpoints" / "agent.pkl") plot_history(history, agent_dir) agent = Agent.load(agent_dir / "checkpoints" / "agent.pkl") agent.test(Monitor(env, agent_dir / "videos"), nb_episodes=5, visualize=True) env.close()
def run_expert(arguments, configuration): agent_dir = Path(arguments.output_dir) / "expert" if arguments.overwrite: shutil.rmtree(arguments.output_dir, ignore_errors=True) agent_dir.mkdir(parents=True, exist_ok=False) config = MinecraftConfiguration(horizon=100, nb_goals=nb_tasks, action_space_type=ActionSpaceType.NORMAL) env = make_env(config, arguments.output_dir, arguments.goal_reward, reward_shaping=configuration.reward_shaping) np.random.seed(arguments.seed) env.seed(arguments.seed) policy = AutomataPolicy(tuple(-idx - 2 for idx in range(config.nb_goals)), nb_steps=configuration.nb_exploration_steps, value_max=1.0, value_min=configuration.min_eps) agent = Agent(Sarsa(None, env.action_space, gamma=configuration.gamma, alpha=configuration.alpha, lambda_=configuration.lambda_), policy=policy, test_policy=GreedyQPolicy()) history = agent.fit( env, nb_steps=configuration.nb_steps, visualize=configuration.visualize_training, callbacks=[ ModelCheckpoint(str(agent_dir / "checkpoints" / "agent-{}.pkl")), TrainEpisodeLogger() ]) history.save(agent_dir / "history.json") agent.save(Path(agent_dir, "checkpoints", "agent.pkl")) plot_history(history, agent_dir) agent = Agent.load(agent_dir / "checkpoints" / "agent.pkl") agent.test(Monitor(env, agent_dir / "videos"), nb_episodes=5, visualize=True) env.close()
def run_learner(arguments, configuration, dfa: pythomata.dfa.DFA): agent_dir = Path(arguments.output_dir) / "learner" shutil.rmtree(agent_dir, ignore_errors=True) agent_dir.mkdir(parents=True, exist_ok=False) config = MinecraftConfiguration( horizon=100, nb_goals=1, action_space_type=ActionSpaceType.DIFFERENTIAL) env = make_env_from_dfa(config, dfa, goal_reward=arguments.goal_reward) np.random.seed(arguments.seed) env.seed(arguments.seed) policy = AutomataPolicy((-1, ), nb_steps=configuration.nb_exploration_steps, value_max=1.0, value_min=configuration.min_eps) algorithm = Sarsa if configuration.algorithm == "sarsa" else QLearning agent = Agent(algorithm(None, env.action_space, gamma=configuration.gamma, alpha=configuration.alpha, lambda_=configuration.lambda_), policy=policy, test_policy=GreedyQPolicy()) history = agent.fit( env, nb_steps=configuration.nb_steps, visualize=configuration.visualize_training, callbacks=[ ModelCheckpoint(str(agent_dir / "checkpoints" / "agent-{}.pkl")), TrainEpisodeLogger() ]) history.save(agent_dir / "history.json") agent.save(agent_dir / "checkpoints" / "agent.pkl") plot_history(history, agent_dir) agent = Agent.load(agent_dir / "checkpoints" / "agent.pkl") agent.test(Monitor(env, agent_dir / "videos"), nb_episodes=5, visualize=True) env.close()