def evaluate(experiment):
    # Prepare workspace
    seed, accuracy, agent_config, env_config, path = experiment
    gym.logger.set_level(gym.logger.DISABLED)
    path = Path(path)
    path.parent.mkdir(parents=True, exist_ok=True)

    # Make environment
    env = load_environment(env_config)

    # Make agent
    agent_name, agent_config = agent_config
    agent_config["accuracy"] = float(accuracy)
    agent_config["budget"] = 10**9
    agent = agent_factory(env, agent_config)

    logger.debug("Evaluating agent {} with budget {} on seed {}".format(agent_name, budget, seed))

    # Compute true value
    env.seed(seed)
    observation = env.reset()
    vi = agent_factory(env, agent_configs()["value_iteration"])
    best_action = vi.act(observation)
    action = agent.act(observation)
    q = vi.state_action_value
    simple_regret = q[vi.mdp.state, best_action] - q[vi.mdp.state, action]
    gap = q[vi.mdp.state, best_action] - np.sort(q[vi.mdp.state, :])[-2]

    if hasattr(agent.planner, "budget_used"):
        budget = agent.planner.budget_used

    # Save results
    result = {
        "agent": agent_name,
        "budget": budget,
        "accuracy": agent.planner.config["accuracy"],
        "horizon": agent.planner.config["horizon"],
        "seed": seed,
        "simple_regret": simple_regret,
        "gap": gap
    }

    df = pd.DataFrame.from_records([result])
    with open(path, 'a') as f:
        df.to_csv(f, sep=',', encoding='utf-8', header=f.tell() == 0, index=False)
def evaluate(env, agent_name, budget=2000, seed=None):
    print("Evaluating", agent_name)
    agent_config = agents[agent_name]
    agent_config["budget"] = budget
    agent = agent_factory(env, agent_config)
    if seed is not None:
        agent.seed(seed)
    agent.act(env)
    return agent
def evaluate(env, agent_name, budget, seed=None):
    print("Evaluating", agent_name, "with budget", budget)
    agent_config = agents[agent_name]
    agent_config["budget"] = budget
    agent = agent_factory(env, agent_config)
    if seed is not None:
        env.seed(seed)
        agent.seed(seed)
    obs = env.reset()
    agent.act(obs)
    return agent
def evaluate(experiment):
    # Prepare workspace
    seed, budget, agent_config, env_config, path = experiment
    gym.logger.set_level(gym.logger.DISABLED)
    path = Path(path)
    path.parent.mkdir(parents=True, exist_ok=True)

    # Make environment
    env_name, env_config = env_config
    env = load_environment(env_config)

    # Make agent
    agent_name, agent_config = agent_config
    agent_config["budget"] = int(budget)
    agent = agent_factory(env, agent_config)

    # Evaluate
    print("Evaluating agent {} with budget {} on seed {}".format(
        agent_name, budget, seed))
    evaluation = Evaluation(env,
                            agent,
                            directory=Path("out") / "planners" / agent_name,
                            num_episodes=1,
                            sim_seed=seed,
                            display_env=False,
                            display_agent=False,
                            display_rewards=False)
    evaluation.test()
    rewards = evaluation.monitor.stats_recorder.episode_rewards_[0]
    length = evaluation.monitor.stats_recorder.episode_lengths[0]
    total_reward = np.sum(rewards)
    return_ = np.sum([gamma**t * rewards[t] for t in range(len(rewards))])

    # Save results
    result = {
        "env": env_name,
        "agent": agent_name,
        "budget": budget,
        "seed": seed,
        "total_reward": total_reward,
        "return": return_,
        "length": length
    }
    if race_strategy:
        result["pit_count"] = evaluation.pits

    df = pd.DataFrame.from_records([result])
    with open(path, 'a') as f:
        df.to_csv(f,
                  sep=',',
                  encoding='utf-8',
                  header=f.tell() == 0,
                  index=False)
 def __init__(self, env, config=None):
     """
     :param env: The environment
     :param config: The agent configuration. It has to contains the field:
                    - prior_agent is the config used to create the agent, whose class is specified in
                    its __class__ field.
     """
     super(AbstractAgent, self).__init__(config)
     self.prior_agent = agent_factory(env, config['prior_agent'])
     #  Load the prior agent from a file, if one is set
     if 'model_save' in config['prior_agent']:
         self.prior_agent.load(config['prior_agent']['model_save'])
     super(MCTSWithPriorPolicyAgent, self).__init__(env, self.config)
     self.planner.prior_policy = self.agent_policy_available
     self.planner.rollout_policy = self.agent_policy_available
Esempio n. 6
0
    },
    # "double": False,
    "gamma": 0.75, #0.8
    "n_steps": 1,
    "batch_size": 32, #32
    "memory_capacity": 15000,
    "target_update": 50,
    "exploration": {
        "method": "EpsilonGreedy",
        "tau": 6000,
        "temperature": 1.0,
        "final_temperature": 0.05
    },
    "loss_function": "l2"
}
agent = agent_factory(env, agent_config)


print(type(env.observation_space))
obs, done = env.reset(), False
# print(obs)
print("main")
print(type(env.action_space))
action = agent.act(obs)
print("main2")
print(action)
Observation, reward, done, d = env.step(action)
# print("main3")

# print(Observation)
Esempio n. 7
0
def evaluate(experiment):
    # Prepare workspace
    seed, budget, agent_config, env_config, path = experiment
    gym.logger.set_level(gym.logger.DISABLED)
    path = Path(path)
    path.parent.mkdir(parents=True, exist_ok=True)

    # Make environment
    env = load_environment(env_config)

    # Make agent
    agent_name, agent_config = agent_config
    agent_config["budget"] = int(budget)
    agent = agent_factory(env, agent_config)

    logger.debug("Evaluating agent {} with budget {} on seed {}".format(
        agent_name, budget, seed))

    # Compute true value
    compute_regret = True
    compute_return = False
    if compute_regret:
        env.seed(seed)
        observation = env.reset()
        vi = agent_factory(env, agent_configs()["value_iteration"])
        best_action = vi.act(observation)
        action = agent.act(observation)
        q = vi.state_action_value
        simple_regret = q[vi.mdp.state, best_action] - q[vi.mdp.state, action]
        gap = q[vi.mdp.state, best_action] - np.sort(q[vi.mdp.state, :])[-2]
    else:
        simple_regret = 0
        gap = 0

    if compute_return:
        # Evaluate
        evaluation = Evaluation(env,
                                agent,
                                directory=Path("out") / "planners" /
                                agent_name,
                                num_episodes=1,
                                sim_seed=seed,
                                display_env=False,
                                display_agent=False,
                                display_rewards=False)
        evaluation.test()
        rewards = evaluation.monitor.stats_recorder.episode_rewards_[0]
        length = evaluation.monitor.stats_recorder.episode_lengths[0]
        total_reward = np.sum(rewards)
        cum_discount = lambda signal: np.sum(
            [gamma**t * signal[t] for t in range(len(signal))])
        return_ = cum_discount(rewards)
        mean_return = np.mean(
            [cum_discount(rewards[t:]) for t in range(len(rewards))])
    else:
        length = 0
        total_reward = 0
        return_ = 0
        mean_return = 0

    # Save results
    result = {
        "agent": agent_name,
        "budget": budget,
        "seed": seed,
        "total_reward": total_reward,
        "return": return_,
        "mean_return": mean_return,
        "length": length,
        "simple_regret": simple_regret,
        "gap": gap
    }

    df = pd.DataFrame.from_records([result])
    with open(path, 'a') as f:
        df.to_csv(f,
                  sep=',',
                  encoding='utf-8',
                  header=f.tell() == 0,
                  index=False)
Esempio n. 8
0
if args.env_name in ['highway-v0']:
    import highway_env
    from rl_agents.agents.common.factory import agent_factory

    env = make_vec_envs(args.env_name, seed, 1, 0.99, f'{args.emo_data_dir}/tmp/gym', device,\
                       True, stats_path=stats_path, hyperparams=hyperparams, time=time,
                       atari_max_steps=args.atari_max_steps)

    # Make agent
    agent_config = {
        "__class__": "<class 'rl_agents.agents.tree_search.deterministic.DeterministicPlannerAgent'>",
        "budget": 50,
        "gamma": 0.7,
    }
    th_model = agent_factory(gym.make(args.env_name), agent_config)
    time = False
elif args.env_name in ['duckietown']:
    from a2c_ppo_acktr.duckietown.env import launch_env
    from a2c_ppo_acktr.duckietown.wrappers import NormalizeWrapper, ImgWrapper,\
         DtRewardWrapper, ActionWrapper, ResizeWrapper
    from a2c_ppo_acktr.duckietown.teacher import PurePursuitExpert
    env = launch_env()
    env = ResizeWrapper(env)
    env = NormalizeWrapper(env)
    env = ImgWrapper(env)
    env = ActionWrapper(env)
    env = DtRewardWrapper(env)

     # Create an imperfect demonstrator
    expert = PurePursuitExpert(env=env)