def evaluate(environment_config, agent_config, options): """ Evaluate an agent interacting with an environment. :param environment_config: the path of the environment configuration file :param agent_config: the path of the agent configuration file :param options: the evaluation options """ logger.configure(LOGGING_CONFIG) if options['--verbose']: logger.configure(VERBOSE_CONFIG) env = load_environment(environment_config) agent = load_agent(agent_config, env) run_directory = Path(agent_config).with_suffix( '').name if options['--name-from-config'] else None options['--seed'] = int( options['--seed']) if options['--seed'] is not None else None evaluation = Evaluation(env, agent, run_directory=run_directory, num_episodes=int(options['--episodes']), sim_seed=options['--seed'], recover=options['--recover'] or options['--recover-from'], display_env=not options['--no-display'], display_agent=not options['--no-display'], display_rewards=not options['--no-display']) if options['--train']: evaluation.train() elif options['--test']: evaluation.test() else: evaluation.close() return os.path.relpath(evaluation.monitor.directory)
def evaluate(environment_config, agent_config, options): """ Evaluate an agent interacting with an environment. :param environment_config: the path of the environment configuration file :param agent_config: the path of the agent configuration file :param options: the evaluation options """ gym.logger.set_level( gym.logger.DEBUG if options['--verbose'] else gym.logger.INFO) env = load_environment(environment_config) agent = load_agent(agent_config, env) run_directory = Path(agent_config).with_suffix( '').name if options['--name-from-config'] else None options['--seed'] = int( options['--seed']) if options['--seed'] is not None else None evaluation = Evaluation(env, agent, run_directory=run_directory, num_episodes=int(options['--episodes']), sim_seed=options['--seed'], recover=options['--recover'], display_env=not options['--no-display'], display_agent=not options['--no-display'], display_rewards=not options['--no-display']) if options['--train']: evaluation.train() elif options['--test']: evaluation.test() else: evaluation.close() if options['--analyze'] and not options['<benchmark>']: RunAnalyzer([evaluation.monitor.directory]) return os.path.relpath(evaluation.monitor.directory)
def evaluate(agent_config): environment_config = 'configs/FiniteMDPEnv/haystack/env3.json' gym.logger.set_level(gym.logger.INFO) env = load_environment(environment_config) agent = agent_factory(env, agent_config) evaluation = Evaluation(env, agent, directory=None, num_episodes=1, display_env=False, display_agent=False, display_rewards=False) evaluation.test() evaluation.close() return evaluation.monitor.stats_recorder.episode_rewards[0]
def evaluate(environment_config, agent_config, options): """ Evaluate an agent interacting with an environment. :param environment_config: the path of the environment configuration file :param agent_config: the path of the agent configuration file :param options: the evaluation options """ gym.logger.set_level(gym.logger.INFO) env = load_environment(environment_config) agent = load_agent(agent_config, env) if options['--name-from-config']: directory = os.path.join( Evaluation.OUTPUT_FOLDER, os.path.basename(environment_config).split('.')[0], os.path.basename(agent_config).split('.')[0]) else: directory = None options['--seed'] = int( options['--seed']) if options['--seed'] is not None else None evaluation = Evaluation(env, agent, directory=directory, num_episodes=int(options['--episodes']), sim_seed=options['--seed'], display_env=not options['--no-display'], display_agent=not options['--no-display'], display_rewards=not options['--no-display']) if options['--train']: evaluation.train() elif options['--test']: evaluation.test() else: evaluation.close() if options['--analyze'] and not options['<benchmark>']: RunAnalyzer([evaluation.monitor.directory]) return os.path.relpath(evaluation.monitor.directory)
def evaluate(experiment): # Prepare workspace seed, agent_config, env_config, path = experiment gym.logger.set_level(gym.logger.DISABLED) path = Path(path) path.parent.mkdir(parents=True, exist_ok=True) # Make environment env = load_environment(env_config) # Make agent agent_name, agent_config = agent_config agent = load_agent(agent_config, env) # Evaluate print("Evaluating agent {} on seed {}".format(agent_name, seed)) evaluation = Evaluation(env, agent, directory=path.parent / agent_name, num_episodes=1, sim_seed=seed, display_env=True, display_agent=True, display_rewards=False) estimate_value = False if estimate_value: rewards, values, terminal = [], [], False evaluation.seed(episode=0) evaluation.reset() evaluation.training = False gamma = 0.99 or agent.config["gamma"] while not terminal: # Estimate state value oracle_env = safe_deepcopy_env(agent.env) oracle = load_agent(agent_configs()["oracle"], oracle_env) oracle_done, oracle_rewards = False, [] while not oracle_done: action = oracle.act(None) _, oracle_reward, oracle_done, _ = oracle_env.step(action) oracle_rewards.append(oracle_reward) value = np.sum([ gamma**t * oracle_rewards[t] for t in range(len(oracle_rewards)) ]) values.append(value) reward, terminal = evaluation.step() rewards.append(reward) evaluation.close() returns = [ np.sum( [gamma**t * rewards[k + t] for t in range(len(rewards[k:]))]) for k in range(len(rewards)) ] # Save intermediate results df = pd.DataFrame({ "agent": agent_name, "time": range(len(rewards)), "seed": [seed] * len(rewards), "reward": rewards, "return": returns, "value": values }) else: evaluation.test() rewards = evaluation.monitor.stats_recorder.episode_rewards_[0] length = evaluation.monitor.stats_recorder.episode_lengths[0] total_reward = np.sum(rewards) cum_discount = lambda signal, gamma: np.sum( [gamma**t * signal[t] for t in range(len(signal))]) return_ = cum_discount(rewards, 0.9) return_undisc = cum_discount(rewards, 0.99) result = { "agent": agent_name, "seed": seed, "total_reward": total_reward, "return": return_, "return_undisc": return_undisc, "length": length, } df = pd.DataFrame.from_records([result]) with open(path, 'a') as f: df.to_csv(f, sep=',', encoding='utf-8', header=f.tell() == 0, index=False)
def evaluate(environment_config, agent_config, options): """ Evaluate an agent interacting with an environment. :param environment_config: the path of the environment configuration file :param agent_config: the path of the agent configuration file :param options: the evaluation options """ logger.configure(LOGGING_CONFIG) if options['--verbose']: logger.configure(VERBOSE_CONFIG) run_directory = None if options['--name-from-config']: run_directory = "{}_{}_{}".format( Path(agent_config).with_suffix('').name, datetime.datetime.now().strftime('%Y%m%d-%H%M%S'), os.getpid()) options['--seed'] = int( options['--seed']) if options['--seed'] is not None else None env = load_environment(environment_config) if agent_config == "None": agent_config = env.config["agent_config"] if "auto_tau" in agent_config["exploration"] and ( agent_config["exploration"]["auto_tau"]): agent_config["exploration"]["tau"] = env.config[ "policy_frequency"] * env.config["duration"] * int( options['--episodes'] * env.config["controlled_vehicles"]) / 50 agent = load_agent(agent_config, env) # TODO diferent display options for agent, env, rewards if options['--offscreen_rendering']: env.config['offscreen_rendering'] = True evaluation_train = Evaluation(env, agent, run_directory=run_directory, num_episodes=int(options['--episodes']), sim_seed=options['--seed'], recover=options['--recover'] or options['--recover-from'], display_env=not options['--no-display'], display_agent=not options['--no-display'], display_rewards=not options['--no-display'], training=options['--train'], options=options) if options['--train']: evaluation_train.train() else: evaluation_train.close() if options['--test']: agent_test = load_agent(agent_config, env) if options['--train']: agent_test = evaluation_train.agent evaluation_test = Evaluation( env, agent_test, run_directory=run_directory, num_episodes=int(options['--episodes_test']), sim_seed=options['--seed'], recover=options['--recover'] or options['--recover-from'], display_env=not options['--no-display'], display_agent=not options['--no-display'], display_rewards=not options['--no-display'], training=False, options=options) evaluation_test.test()