def run_expert(arguments, configuration):
    agent_dir = Path(arguments.output_dir) / "expert"
    if arguments.overwrite:
        shutil.rmtree(arguments.output_dir, ignore_errors=True)
    agent_dir.mkdir(parents=True, exist_ok=False)
    #case
    if arguments.case == 1:
        ball_enabled_ = False
        fire_enabled_ = True
    else:
        ball_enabled_ = False
        fire_enabled_ = True
    config = BreakoutConfiguration(
        brick_rows=arguments.rows,
        brick_cols=arguments.cols,
        brick_reward=arguments.brick_reward,
        step_reward=arguments.step_reward,
        ball_enabled=ball_enabled_,
        fire_enabled=fire_enabled_,
        secondlearner=False)  #ball_enabled=False, fire_enabled=True)
    env = make_env(config, arguments.output_dir, arguments.goal_reward)

    np.random.seed(arguments.seed)
    env.seed(arguments.seed)

    policy = AutomataPolicy((-2, ),
                            nb_steps=configuration.nb_exploration_steps,
                            value_max=1.0,
                            value_min=configuration.min_eps)

    algorithm = Sarsa if configuration.algorithm == "sarsa" else QLearning
    agent = Agent(algorithm(None,
                            env.action_space,
                            gamma=configuration.gamma,
                            alpha=configuration.alpha,
                            lambda_=configuration.lambda_),
                  policy=policy,
                  test_policy=EpsGreedyQPolicy(eps=0.01))

    history = agent.fit(
        env,
        nb_steps=configuration.nb_steps,
        visualize=configuration.visualize_training,
        callbacks=[
            ModelCheckpoint(str(agent_dir / "checkpoints" / "agent-{}.pkl")),
            TrainEpisodeLogger()
        ])
    history.save(agent_dir / "history.json")
    agent.save(Path(agent_dir, "checkpoints", "agent.pkl"))
    plot_history(history, agent_dir)

    agent = Agent.load(agent_dir / "checkpoints" / "agent.pkl")
    agent.test(Monitor(env, agent_dir / "videos"),
               nb_episodes=5,
               visualize=True)

    env.close()
def run_learner(arguments, configuration, dfa: pythomata.dfa.DFA):
    agent_dir = Path(arguments.output_dir) / "learner"
    shutil.rmtree(agent_dir, ignore_errors=True)
    agent_dir.mkdir(parents=True, exist_ok=False)

    config = BreakoutConfiguration(brick_rows=arguments.rows,
                                   brick_cols=arguments.cols,
                                   brick_reward=arguments.brick_reward,
                                   step_reward=arguments.step_reward,
                                   fire_enabled=False,
                                   ball_enabled=True)
    env = make_env_from_dfa(config, dfa)

    np.random.seed(arguments.seed)
    env.seed(arguments.seed)

    policy = AutomataPolicy((-1, ),
                            nb_steps=configuration.nb_exploration_steps,
                            value_max=0.8,
                            value_min=configuration.min_eps)

    algorithm = Sarsa if configuration.algorithm == "sarsa" else QLearning
    agent = Agent(algorithm(None,
                            env.action_space,
                            gamma=configuration.gamma,
                            alpha=configuration.alpha,
                            lambda_=configuration.lambda_),
                  policy=policy,
                  test_policy=EpsGreedyQPolicy(eps=0.001))

    history = agent.fit(
        env,
        nb_steps=configuration.nb_steps,
        visualize=configuration.visualize_training,
        callbacks=[
            ModelCheckpoint(str(agent_dir / "checkpoints" / "agent-{}.pkl")),
            TrainEpisodeLogger()
        ])

    history.save(agent_dir / "history.json")
    agent.save(agent_dir / "checkpoints" / "agent.pkl")
    plot_history(history, agent_dir)

    agent = Agent.load(agent_dir / "checkpoints" / "agent.pkl")
    agent.test(Monitor(env, agent_dir / "videos"),
               nb_episodes=5,
               visualize=True)

    env.close()
def run_expert(arguments, configuration):
    agent_dir = Path(arguments.output_dir) / "expert"
    if arguments.overwrite:
        shutil.rmtree(arguments.output_dir, ignore_errors=True)
    agent_dir.mkdir(parents=True, exist_ok=False)

    config = MinecraftConfiguration(horizon=100,
                                    nb_goals=nb_tasks,
                                    action_space_type=ActionSpaceType.NORMAL)
    env = make_env(config,
                   arguments.output_dir,
                   arguments.goal_reward,
                   reward_shaping=configuration.reward_shaping)

    np.random.seed(arguments.seed)
    env.seed(arguments.seed)

    policy = AutomataPolicy(tuple(-idx - 2 for idx in range(config.nb_goals)),
                            nb_steps=configuration.nb_exploration_steps,
                            value_max=1.0,
                            value_min=configuration.min_eps)

    agent = Agent(Sarsa(None,
                        env.action_space,
                        gamma=configuration.gamma,
                        alpha=configuration.alpha,
                        lambda_=configuration.lambda_),
                  policy=policy,
                  test_policy=GreedyQPolicy())

    history = agent.fit(
        env,
        nb_steps=configuration.nb_steps,
        visualize=configuration.visualize_training,
        callbacks=[
            ModelCheckpoint(str(agent_dir / "checkpoints" / "agent-{}.pkl")),
            TrainEpisodeLogger()
        ])
    history.save(agent_dir / "history.json")
    agent.save(Path(agent_dir, "checkpoints", "agent.pkl"))
    plot_history(history, agent_dir)

    agent = Agent.load(agent_dir / "checkpoints" / "agent.pkl")
    agent.test(Monitor(env, agent_dir / "videos"),
               nb_episodes=5,
               visualize=True)

    env.close()
Esempio n. 4
0
def run_learner(arguments, configuration, dfa: pythomata.dfa.DFA):
    agent_dir = Path(arguments.output_dir) / "learner"
    shutil.rmtree(agent_dir, ignore_errors=True)
    agent_dir.mkdir(parents=True, exist_ok=False)

    config = MinecraftConfiguration(
        horizon=100,
        nb_goals=1,
        action_space_type=ActionSpaceType.DIFFERENTIAL)
    env = make_env_from_dfa(config, dfa, goal_reward=arguments.goal_reward)

    np.random.seed(arguments.seed)
    env.seed(arguments.seed)

    policy = AutomataPolicy((-1, ),
                            nb_steps=configuration.nb_exploration_steps,
                            value_max=1.0,
                            value_min=configuration.min_eps)

    algorithm = Sarsa if configuration.algorithm == "sarsa" else QLearning
    agent = Agent(algorithm(None,
                            env.action_space,
                            gamma=configuration.gamma,
                            alpha=configuration.alpha,
                            lambda_=configuration.lambda_),
                  policy=policy,
                  test_policy=GreedyQPolicy())

    history = agent.fit(
        env,
        nb_steps=configuration.nb_steps,
        visualize=configuration.visualize_training,
        callbacks=[
            ModelCheckpoint(str(agent_dir / "checkpoints" / "agent-{}.pkl")),
            TrainEpisodeLogger()
        ])

    history.save(agent_dir / "history.json")
    agent.save(agent_dir / "checkpoints" / "agent.pkl")
    plot_history(history, agent_dir)

    agent = Agent.load(agent_dir / "checkpoints" / "agent.pkl")
    agent.test(Monitor(env, agent_dir / "videos"),
               nb_episodes=5,
               visualize=True)

    env.close()