def run_experiment(): total_step = 0 agent, env, spec = prepare_test_env_agent(headless=True) timestep_sec = env.timestep_sec policy_class = "ultra.baselines.sac:sac-v0" log_dir = "tests/output_eval_check_logs" for episode in episodes(1, etag=policy_class, log_dir=log_dir): observations = env.reset() state = observations[AGENT_ID] dones, infos = {"__all__": False}, None episode.reset() experiment_dir = episode.experiment_dir if not os.path.exists(f"{experiment_dir}/spec.pkl"): if not os.path.exists(experiment_dir): os.makedirs(experiment_dir) with open(f"{experiment_dir}/spec.pkl", "wb") as spec_output: dill.dump(spec, spec_output, pickle.HIGHEST_PROTOCOL) while not dones["__all__"]: evaluation_check( agent=agent, agent_id=AGENT_ID, episode=episode, eval_rate=10, eval_episodes=1, max_episode_steps=2, policy_class=policy_class, scenario_info=("00", "eval_test"), timestep_sec=0.1, headless=True, log_dir=log_dir, ) action = agent.act(state, explore=True) observations, rewards, dones, infos = env.step( {AGENT_ID: action}) next_state = observations[AGENT_ID] # retrieve some relavant information from reward processor # observations[AGENT_ID]["ego"].update(rewards[AGENT_ID]["log"]) loss_output = agent.step( state=state, action=action, reward=rewards[AGENT_ID], next_state=next_state, done=dones[AGENT_ID], ) episode.record_step( agent_id=AGENT_ID, infos=infos, rewards=rewards, total_step=total_step, loss_output=loss_output, ) total_step += 1 state = next_state env.close()
def run_experiment(): agent, env = prepare_test_env_agent() log_dir = os.path.join(EpisodeTest.OUTPUT_DIRECTORY, "logs/") episode = Episode(0) for episode in episodes(2, etag="Train", log_dir=log_dir): observations = env.reset() total_step = 0 episode.reset() dones, infos = {"__all__": False}, None state = observations[AGENT_ID] while not dones["__all__"]: action = agent.act(state, explore=True) observations, rewards, dones, infos = env.step( {AGENT_ID: action}) next_state = observations[AGENT_ID] # observations[AGENT_ID].update(rewards[AGENT_ID]) loss_output = agent.step( state=state, action=action, reward=rewards[AGENT_ID], next_state=next_state, done=dones[AGENT_ID], info=infos[AGENT_ID], ) episode.record_step( agent_ids_to_record=AGENT_ID, infos=infos, rewards=rewards, total_step=total_step, loss_outputs=loss_output, ) state = next_state total_step += 1 env.close() return episode.index
def run_experiment(): agent, env = prepare_test_env_agent() episode_count = 0 log_dir = "tests/logs" for episode in episodes(2, etag="Train", dir=log_dir): observations = env.reset() total_step = 0 episode.reset() dones, infos = {"__all__": False}, None state = observations[AGENT_ID] while not dones["__all__"]: action = agent.act(state, explore=True) observations, rewards, dones, infos = env.step( {AGENT_ID: action}) next_state = observations[AGENT_ID] # observations[AGENT_ID].update(rewards[AGENT_ID]) loss_output = agent.step( state=state, action=action, reward=rewards[AGENT_ID], next_state=next_state, done=dones[AGENT_ID], ) episode.record_step( agent_id=AGENT_ID, infos=infos, rewards=rewards, total_step=total_step, loss_output=loss_output, ) state = next_state total_step += 1 episode_count += 1 env.close() return episode_count
def run_experiment(): agent, env = prepare_test_env_agent() result = { "episode_reward": 0, "dist_center": 0, "goal_dist": 0, "speed": 0, "ego_num_violations": 0, "linear_jerk": 0, "angular_jerk": 0, "collision": 0, "off_road": 0, "off_route": 0, "reached_goal": 0, } for episode in episodes(1, etag="Train"): observations = env.reset() total_step = 0 episode.reset() dones, infos = {"__all__": False}, None state = observations[AGENT_ID] while not dones["__all__"] and total_step < 4: action = agent.act(state, explore=True) observations, rewards, dones, infos = env.step( {AGENT_ID: action}) next_state = observations[AGENT_ID] # observations[AGENT_ID]["ego"].update(rewards[AGENT_ID]["log"]) loss_output = agent.step( state=state, action=action, reward=rewards[AGENT_ID], next_state=next_state, done=dones[AGENT_ID], ) for key in result.keys(): if key in observations[AGENT_ID]: if key == "goal_dist": result[key] = observations[AGENT_ID] else: result[key] += observations[AGENT_ID][key] elif key == "episode_reward": result[key] += rewards[AGENT_ID] episode.record_step( agent_id=AGENT_ID, infos=infos, rewards=rewards, total_step=total_step, loss_output=loss_output, ) state = next_state total_step += 1 env.close() episode.record_episode() return result, episode
def train( scenario_info, num_episodes, policy_classes, max_episode_steps, eval_info, timestep_sec, headless, seed, log_dir, policy_ids=None, ): torch.set_num_threads(1) total_step = 0 finished = False # Make agent_ids in the form of 000, 001, ..., 010, 011, ..., 999, 1000, ...; # or use the provided policy_ids if available. agent_ids = ( ["0" * max(0, 3 - len(str(i))) + str(i) for i in range(len(policy_classes))] if not policy_ids else policy_ids ) # Ensure there is an ID for each policy, and a policy for each ID. assert len(agent_ids) == len(policy_classes), ( "The number of agent IDs provided ({}) must be equal to " "the number of policy classes provided ({}).".format( len(agent_ids), len(policy_classes) ) ) # Assign the policy classes to their associated ID. agent_classes = { agent_id: policy_class for agent_id, policy_class in zip(agent_ids, policy_classes) } # Create the agent specifications matched with their associated ID. agent_specs = { agent_id: make(locator=policy_class, max_episode_steps=max_episode_steps) for agent_id, policy_class in agent_classes.items() } # Create the agents matched with their associated ID. agents = { agent_id: agent_spec.build_agent() for agent_id, agent_spec in agent_specs.items() } # Create the environment. env = gym.make( "ultra.env:ultra-v0", agent_specs=agent_specs, scenario_info=scenario_info, headless=headless, timestep_sec=timestep_sec, seed=seed, ) # Define an 'etag' for this experiment's data directory based off policy_classes. # E.g. From a ["ultra.baselines.dqn:dqn-v0", "ultra.baselines.ppo:ppo-v0"] # policy_classes list, transform it to an etag of "dqn-v0:ppo-v0". etag = ":".join([policy_class.split(":")[-1] for policy_class in policy_classes]) for episode in episodes(num_episodes, etag=etag, log_dir=log_dir): # Reset the environment and retrieve the initial observations. observations = env.reset() dones = {"__all__": False} infos = None episode.reset() experiment_dir = episode.experiment_dir # Save relevant agent metadata. if not os.path.exists(f"{experiment_dir}/agent_metadata.pkl"): if not os.path.exists(experiment_dir): os.makedirs(experiment_dir) with open(f"{experiment_dir}/agent_metadata.pkl", "wb") as metadata_file: dill.dump( { "agent_ids": agent_ids, "agent_classes": agent_classes, "agent_specs": agent_specs, }, metadata_file, pickle.HIGHEST_PROTOCOL, ) while not dones["__all__"]: # Break if any of the agent's step counts is 1000000 or greater. if any([episode.get_itr(agent_id) >= 1000000 for agent_id in agents]): finished = True break # Perform the evaluation check. evaluation_check( agents=agents, agent_ids=agent_ids, policy_classes=agent_classes, episode=episode, log_dir=log_dir, max_episode_steps=max_episode_steps, **eval_info, **env.info, ) # Request and perform actions on each agent that received an observation. actions = { agent_id: agents[agent_id].act(observation, explore=True) for agent_id, observation in observations.items() } next_observations, rewards, dones, infos = env.step(actions) # Active agents are those that receive observations in this step and the next # step. Step each active agent (obtaining their network loss if applicable). active_agent_ids = observations.keys() & next_observations.keys() loss_outputs = { agent_id: agents[agent_id].step( state=observations[agent_id], action=actions[agent_id], reward=rewards[agent_id], next_state=next_observations[agent_id], done=dones[agent_id], info=infos[agent_id], ) for agent_id in active_agent_ids } # Record the data from this episode. episode.record_step( agent_ids_to_record=active_agent_ids, infos=infos, rewards=rewards, total_step=total_step, loss_outputs=loss_outputs, ) # Update variables for the next step. total_step += 1 observations = next_observations # Normalize the data and record this episode on tensorboard. episode.record_episode() episode.record_tensorboard() if finished: break env.close()
def evaluate_saved_models( experiment_dir: str, log_dir: str, headless: bool, max_episode_steps: int, agents: Sequence[str], num_episodes: int, scenario_info: Tuple[str, str], timestep: float, models_to_evaluate: Optional[str] = None, ): # If no agents are explicitly given then by default all agents are # enabled for evaluation if not agents: agents = os.listdir(os.path.join(experiment_dir, "models")) # Model path for each agent id model_paths = [ os.path.join(experiment_dir, "models", agent) for agent in agents ] if not os.path.exists(log_dir): os.makedirs(log_dir) if not all([os.path.exists(model_path) for model_path in model_paths]): raise "At least one path to a model is invalid" if not all([os.listdir(model_path) for model_path in model_paths]): raise "There are no models to evaluate in at least one model path" # Get agent IDs from the models to be evaluated. agent_ids_from_models = [ os.path.basename(os.path.normpath(model_path)) for model_path in model_paths ] # Load relevant agent metadata. with open(os.path.join(experiment_dir, "agent_metadata.pkl"), "rb") as metadata_file: agent_metadata = pickle.load(metadata_file) # Extract the agent IDs and policy classes from the metadata and given models. agent_ids = [ agent_id for agent_id in agent_metadata["agent_ids"] if agent_id in agent_ids_from_models ] policy_classes = { agent_id: agent_metadata["agent_classes"][agent_id] for agent_id in agent_ids } # From a base model directory such as logs/<experiment_name>/models/*, assign each agent ID with its # checkpoint directories in sorted order based on the checkpoint iteration. The agent IDs are # obtained from the direct child folders of the model directory given. As an example result: # { # '000': ['logs/<experiment_name>/models/000/1042', 'logs/<experiment_name>/models/000/2062'], # '001': ['logs/<experiment_name>/models/001/999', 'logs/<experiment_name>/models/001/1999'], # '003': ['logs/<experiment_name>/models/003/1009', 'logs/<experiment_name>/models/003/2120'], # '002': ['logs/<experiment_name>/models/002/1053', 'logs/<experiment_name>/models/002/2041'], # } agent_checkpoint_directories = { agent_id: sorted( glob.glob(os.path.join(experiment_dir, "models", agent_id, "*")), key=lambda x: int(x.split("/")[-1]), ) for agent_id in agent_ids } # If models are explicitly given through the CLI, then their respective model # directory paths are calculated. if models_to_evaluate: custom_checkpoint_directories = {} # Iterate through each model to be evaluated (models that do not exist will not be included) for model in models_to_evaluate: agent_id = model.split("/")[0] model_observation_number = model.split("/")[-1] if agent_id in agent_checkpoint_directories.keys(): model_directories = { model_directory.split("/")[-1]: model_directory for model_directory in agent_checkpoint_directories[agent_id] } if model_observation_number in model_directories: if agent_id in custom_checkpoint_directories: custom_checkpoint_directories[agent_id].append( model_directories[model_observation_number]) else: custom_checkpoint_directories[agent_id] = [ model_directories[model_observation_number] ] else: raise Exception( f"The agent with id: {agent_id} does not contain the provided observation number: {model_observation_number}" ) else: raise Exception( f"The agent id: {agent_id} is not in the specified agent IDs" ) # Agent checkpoint directories contains the specified model directories for the # specified agents agent_checkpoint_directories = custom_checkpoint_directories etag = (":".join( [policy_classes[agent_id].split(":")[-1] for agent_id in agent_ids]) + "-evaluation") for agent_id, checkpoint_directories in agent_checkpoint_directories.items( ): num_of_checkpoints = len(checkpoint_directories) ray.init() try: for episode in episodes( num_of_checkpoints, etag=etag, log_dir=log_dir, ): # Obtain a checkpoint directory for each agent. checkpoint_directory = { agent_id: checkpoint_directories[episode.index] } episode.eval_mode() episode.info[episode.active_tag] = ray.get([ evaluate.remote( experiment_dir=experiment_dir, agent_ids=[agent_id], policy_classes=policy_classes, seed=episode.eval_count, checkpoint_dirs=checkpoint_directory, scenario_info=scenario_info, num_episodes=num_episodes, max_episode_steps=max_episode_steps, timestep_sec=timestep, headless=headless, log_dir=log_dir, ) ])[0] episode.record_tensorboard(recording_step=episode.index) episode.eval_count += 1 finally: time.sleep(1) ray.shutdown()
def evaluate( experiment_dir, seed, agent_ids, policy_classes, checkpoint_dirs, scenario_info, num_episodes, max_episode_steps, headless, timestep_sec, log_dir, eval_mode=True, ): torch.set_num_threads(1) # Create the agent specifications matched with their associated ID. agent_specs = { agent_id: make( locator=policy_classes[agent_id], checkpoint_dir=checkpoint_dirs[agent_id], experiment_dir=experiment_dir, max_episode_steps=max_episode_steps, agent_id=agent_id, ) for agent_id in agent_ids } # Create the environment with the specified agents. env = gym.make( "ultra.env:ultra-v0", agent_specs=agent_specs, scenario_info=scenario_info, headless=headless, timestep_sec=timestep_sec, seed=seed, eval_mode=eval_mode, ) # Build each agent from its specification. agents = { agent_id: agent_spec.build_agent() for agent_id, agent_spec in agent_specs.items() } # A dictionary to hold the evaluation data for each agent. summary_log = {agent_id: LogInfo() for agent_id in agent_ids} # Define an 'etag' for this experiment's data directory based off policy_classes. # E.g. From a ["ultra.baselines.dqn:dqn-v0", "ultra.baselines.ppo:ppo-v0"] # policy_classes list, transform it to an etag of "dqn-v0:ppo-v0". etag = ":".join( [policy_class.split(":")[-1] for policy_class in policy_classes]) for episode in episodes(num_episodes, etag=etag, log_dir=log_dir): # Reset the environment and retrieve the initial observations. observations = env.reset() dones = {"__all__": False} infos = None episode.reset(mode="Evaluation") while not dones["__all__"]: # Get and perform the available agents' actions. actions = { agent_id: agents[agent_id].act(observation, explore=False) for agent_id, observation in observations.items() } observations, rewards, dones, infos = env.step(actions) # Record the data from this episode. episode.record_step(agent_ids_to_record=infos.keys(), infos=infos, rewards=rewards) episode.record_episode() for agent_id, agent_data in episode.info[episode.active_tag].items(): for key, value in agent_data.data.items(): if not isinstance(value, (list, tuple, np.ndarray)): summary_log[agent_id].data[key] += value # Normalize by the number of evaluation episodes. for agent_id, agent_data in summary_log.items(): for key, value in agent_data.data.items(): if not isinstance(value, (list, tuple, np.ndarray)): summary_log[agent_id].data[key] /= num_episodes env.close() return summary_log
def evaluate( experiment_dir, seed, agent_id, policy_class, itr_count, checkpoint_dir, scenario_info, num_episodes, headless, timestep_sec, ): torch.set_num_threads(1) spec = make( locator=policy_class, checkpoint_dir=checkpoint_dir, experiment_dir=experiment_dir, ) env = gym.make( "ultra.env:ultra-v0", agent_specs={agent_id: spec}, scenario_info=scenario_info, headless=headless, timestep_sec=timestep_sec, seed=seed, eval_mode=True, ) agent = spec.build_agent() summary_log = LogInfo() logs = [] for episode in episodes(num_episodes): observations = env.reset() state = observations[agent_id] dones, infos = {"__all__": False}, None episode.reset(mode="Evaluation") while not dones["__all__"]: action = agent.act(state, explore=False) observations, rewards, dones, infos = env.step({agent_id: action}) next_state = observations[agent_id] state = next_state episode.record_step(agent_id=agent_id, infos=infos, rewards=rewards) episode.record_episode() logs.append(episode.info[episode.active_tag].data) for key, value in episode.info[episode.active_tag].data.items(): if not isinstance(value, (list, tuple, np.ndarray)): summary_log.data[key] += value for key, val in summary_log.data.items(): if not isinstance(val, (list, tuple, np.ndarray)): summary_log.data[key] /= num_episodes env.close() return summary_log
if args.policy in data["agents"].keys(): policy_path = data["agents"][args.policy]["path"] policy_locator = data["agents"][args.policy]["locator"] else: raise ImportError("Invalid policy name. Please try again") # Required string for smarts' class registry policy_class = str(policy_path) + ":" + str(policy_locator) num_cpus = max( 1, psutil.cpu_count(logical=False) - 1 ) # remove `logical=False` to use all cpus ray_kwargs = default_ray_kwargs(num_cpus=num_cpus, num_gpus=num_gpus) ray.init(**ray_kwargs) try: agent_id = "AGENT_008" for episode in episodes(len(sorted_models), etag=args.policy): model = sorted_models[episode.index] print("model: ", model) episode_count = model.split("/")[-1] episode.eval_mode() episode.info[episode.active_tag] = ray.get( [ evaluate.remote( experiment_dir=args.experiment_dir, agent_id=agent_id, policy_class=policy_class, seed=episode.eval_count, itr_count=0, checkpoint_dir=model, scenario_info=(args.task, args.level), num_episodes=int(args.episodes),
def train( scenario_info, num_episodes, max_episode_steps, policy_class, eval_info, timestep_sec, headless, seed, log_dir, ): torch.set_num_threads(1) total_step = 0 finished = False AGENT_ID = "007" spec = make(locator=policy_class, max_episode_steps=max_episode_steps) env = gym.make( "ultra.env:ultra-v0", agent_specs={AGENT_ID: spec}, scenario_info=scenario_info, headless=headless, timestep_sec=timestep_sec, seed=seed, ) agent = spec.build_agent() for episode in episodes(num_episodes, etag=policy_class, log_dir=log_dir): observations = env.reset() state = observations[AGENT_ID] dones, infos = {"__all__": False}, None episode.reset() experiment_dir = episode.experiment_dir # save entire spec [ policy_params, reward_adapter, observation_adapter] if not os.path.exists(f"{experiment_dir}/spec.pkl"): if not os.path.exists(experiment_dir): os.makedirs(experiment_dir) with open(f"{experiment_dir}/spec.pkl", "wb") as spec_output: dill.dump(spec, spec_output, pickle.HIGHEST_PROTOCOL) while not dones["__all__"]: if episode.get_itr(AGENT_ID) >= 1000000: finished = True break evaluation_check( agent=agent, agent_id=AGENT_ID, policy_class=policy_class, episode=episode, log_dir=log_dir, max_episode_steps=max_episode_steps, **eval_info, **env.info, ) action = agent.act(state, explore=True) observations, rewards, dones, infos = env.step({AGENT_ID: action}) next_state = observations[AGENT_ID] loss_output = agent.step( state=state, action=action, reward=rewards[AGENT_ID], next_state=next_state, done=dones[AGENT_ID], ) episode.record_step( agent_id=AGENT_ID, infos=infos, rewards=rewards, total_step=total_step, loss_output=loss_output, ) total_step += 1 state = next_state episode.record_episode() episode.record_tensorboard(agent_id=AGENT_ID) if finished: break env.close()
def run_experiment(scenario_info, num_agents, log_dir, headless=True): agent_ids = [ "0" * max(0, 3 - len(str(i))) + str(i) for i in range(num_agents) ] agent_classes = { agent_id: "ultra.baselines.sac:sac-v0" for agent_id in agent_ids } agent_specs = { agent_id: BaselineAgentSpec(policy_class=SACPolicy, max_episode_steps=2) for agent_id in agent_ids } env = gym.make( "ultra.env:ultra-v0", agent_specs=agent_specs, scenario_info=scenario_info, headless=headless, timestep_sec=0.1, seed=seed, ) agents = { agent_id: agent_spec.build_agent() for agent_id, agent_spec in agent_specs.items() } total_step = 0 etag = ":".join( [policy_class.split(":")[-1] for policy_class in agent_classes]) evaluation_task_ids = dict() for episode in episodes(1, etag=etag, log_dir=log_dir): observations = env.reset() dones = {"__all__": False} infos = None episode.reset() experiment_dir = episode.experiment_dir if not os.path.exists(f"{experiment_dir}/agent_metadata.pkl"): if not os.path.exists(experiment_dir): os.makedirs(experiment_dir) with open(f"{experiment_dir}/agent_metadata.pkl", "wb") as metadata_file: dill.dump( { "agent_ids": agent_ids, "agent_classes": agent_classes, "agent_specs": agent_specs, }, metadata_file, pickle.HIGHEST_PROTOCOL, ) while not dones["__all__"]: evaluation_check( agents=agents, agent_ids=agent_ids, episode=episode, eval_rate=10, eval_episodes=1, max_episode_steps=2, policy_classes=agent_classes, scenario_info=scenario_info, evaluation_task_ids=evaluation_task_ids, timestep_sec=0.1, headless=True, log_dir=log_dir, ) collect_evaluations(evaluation_task_ids=evaluation_task_ids) actions = { agent_id: agents[agent_id].act(observation, explore=True) for agent_id, observation in observations.items() } next_observations, rewards, dones, infos = env.step(actions) active_agent_ids = observations.keys() & next_observations.keys() loss_outputs = { agent_id: agents[agent_id].step( state=observations[agent_id], action=actions[agent_id], reward=rewards[agent_id], next_state=next_observations[agent_id], done=dones[agent_id], info=infos[agent_id], ) for agent_id in active_agent_ids } episode.record_step( agent_ids_to_record=active_agent_ids, infos=infos, rewards=rewards, total_step=total_step, loss_outputs=loss_outputs, ) total_step += 1 observations = next_observations # Wait on the remaining evaluations to finish. while collect_evaluations(evaluation_task_ids): time.sleep(0.1) env.close()
def train( scenario_info, num_episodes, policy_classes, max_episode_steps, max_steps, eval_info, timestep_sec, headless, seed, log_dir, policy_ids=None, ): torch.set_num_threads(1) total_step = 0 finished = False evaluation_task_ids = dict() agent_ids, agent_classes, agent_specs, agents, etag = build_agents( policy_classes, policy_ids, max_episode_steps) # Create the environment. env = gym.make( "ultra.env:ultra-v0", agent_specs=agent_specs, scenario_info=scenario_info, headless=headless, timestep_sec=timestep_sec, seed=seed, ) for episode in episodes(num_episodes, etag=etag, log_dir=log_dir): # Reset the environment and retrieve the initial observations. observations = env.reset() dones = {"__all__": False} infos = None episode.reset() experiment_dir = episode.experiment_dir # Name of agent metadata pickle file filename = "agent_metadata.pkl" if not os.path.exists(os.path.join(experiment_dir, filename)): _save_agent_metadata( experiment_dir, filename, agent_ids, agent_classes, agent_specs, ) evaluation_check( agents=agents, agent_ids=agent_ids, policy_classes=agent_classes, episode=episode, log_dir=log_dir, max_episode_steps=max_episode_steps, evaluation_task_ids=evaluation_task_ids, **eval_info, **env.info, ) collect_evaluations(evaluation_task_ids=evaluation_task_ids) evaluation_check( agents=agents, agent_ids=agent_ids, policy_classes=agent_classes, episode=episode, log_dir=log_dir, max_episode_steps=max_episode_steps, evaluation_task_ids=evaluation_task_ids, **eval_info, **env.info, ) collect_evaluations(evaluation_task_ids=evaluation_task_ids) while not dones["__all__"]: # Break if any of the agent's step counts is max_steps (default is 1000000) or greater. if any([ episode.get_itr(agent_id) >= max_steps for agent_id in agents ]): finished = True break # Request and perform actions on each agent that received an observation. actions = { agent_id: agents[agent_id].act(observation, explore=True) for agent_id, observation in observations.items() } next_observations, rewards, dones, infos = env.step(actions) # Active agents are those that receive observations in this step and the next # step. Step each active agent (obtaining their network loss if applicable). active_agent_ids = observations.keys() & next_observations.keys() loss_outputs = { agent_id: agents[agent_id].step( state=observations[agent_id], action=actions[agent_id], reward=rewards[agent_id], next_state=next_observations[agent_id], done=dones[agent_id], info=infos[agent_id], ) for agent_id in active_agent_ids } # Record the data from this episode. episode.record_step( agent_ids_to_record=active_agent_ids, infos=infos, rewards=rewards, total_step=total_step, loss_outputs=loss_outputs, ) # Update variables for the next step. total_step += 1 observations = next_observations episode.record_episode() episode.record_tensorboard(recording_step=episode.index) if finished: break # Wait on the remaining evaluations to finish. while collect_evaluations(evaluation_task_ids): time.sleep(0.1) env.close()
raise "Path to model is invalid" if not os.listdir(args.models): raise "No models to evaluate" sorted_models = sorted( glob.glob(f"{args.models}/*"), key=lambda x: int(x.split("/")[-1]) ) base_dir = os.path.dirname(__file__) pool_path = os.path.join(base_dir, "agent_pool.json") ray.init() try: agent_id = "AGENT_008" for episode in episodes( len(sorted_models), etag=policy_class, log_dir=args.log_dir ): model = sorted_models[episode.index] print("model: ", model) episode_count = model.split("/")[-1] episode.eval_mode() episode.info[episode.active_tag] = ray.get( [ evaluate.remote( experiment_dir=args.experiment_dir, agent_id=agent_id, policy_class=policy_class, seed=episode.eval_count, itr_count=0, checkpoint_dir=model, scenario_info=(args.task, args.level),
len(checkpoint_directory) == number_of_checkpoints for checkpoint_directory in directories_iterator ), "Not all agents have the same number of checkpoints saved" # Define an 'etag' for this experiment's data directory based off policy_classes. # E.g. From a {"000": "ultra.baselines.dqn:dqn-v0", "001": "ultra.baselines.ppo:ppo-v0"] # policy_classes dict, transform it to an etag of "dqn-v0:ppo-v0-evaluation". etag = (":".join( [policy_classes[agent_id].split(":")[-1] for agent_id in agent_ids]) + "-evaluation") ray.init() try: for episode in episodes( number_of_checkpoints, etag=etag, log_dir=args.log_dir, ): # Obtain a checkpoint directory for each agent. current_checkpoint_directories = { agent_id: agent_directories[episode.index] for agent_id, agent_directories in agent_checkpoint_directories.items() } episode.eval_mode() episode.info[episode.active_tag] = ray.get([ evaluate.remote( experiment_dir=args.experiment_dir, agent_ids=agent_ids, policy_classes=policy_classes, seed=episode.eval_count,
def tune_train( config, scenario_info, num_episodes, policy_classes, max_episode_steps, save_rate, timestep_sec, headless, seed, log_dir, metric, ): torch.set_num_threads(1) total_step = 0 finished = False assert len( policy_classes) == 1, "Can only tune with single agent experiments." # Make agent_ids in the form of 000, 001, ..., 010, 011, ..., 999, 1000, ... agent_ids = [ "0" * max(0, 3 - len(str(i))) + str(i) for i in range(len(policy_classes)) ] # Assign the policy classes to their associated ID. agent_classes = { agent_id: policy_class for agent_id, policy_class in zip(agent_ids, policy_classes) } # Create the agent specifications matched with their associated ID. agent_specs = { agent_id: make( locator=policy_class, agent_params=config, max_episode_steps=max_episode_steps, ) for agent_id, policy_class in agent_classes.items() } # Create the agents matched with their associated ID. agents = { agent_id: agent_spec.build_agent() for agent_id, agent_spec in agent_specs.items() } # Create the environment. env = gym.make( "ultra.env:ultra-v0", agent_specs=agent_specs, scenario_info=scenario_info, headless=headless, timestep_sec=timestep_sec, seed=seed, ) # Define an 'etag' for this experiment's data directory based off policy_classes. # E.g. From a ["ultra.baselines.dqn:dqn-v0", "ultra.baselines.ppo:ppo-v0"] # policy_classes list, transform it to an etag of "dqn-v0:ppo-v0". etag = ":".join( [policy_class.split(":")[-1] for policy_class in policy_classes]) for episode in episodes(num_episodes, etag=etag, log_dir=log_dir): # Reset the environment and retrieve the initial observations. observations = env.reset() dones = {"__all__": False} infos = None episode.reset() experiment_dir = episode.experiment_dir # Save relevant agent metadata. if not os.path.exists(f"{experiment_dir}/agent_metadata.pkl"): if not os.path.exists(experiment_dir): os.makedirs(experiment_dir) with open(f"{experiment_dir}/agent_metadata.pkl", "wb") as metadata_file: dill.dump( { "agent_ids": agent_ids, "agent_classes": agent_classes, "agent_specs": agent_specs, }, metadata_file, pickle.HIGHEST_PROTOCOL, ) while not dones["__all__"]: # Break if any of the agent's step counts is 1000000 or greater. if any( [episode.get_itr(agent_id) >= 1000000 for agent_id in agents]): finished = True break # Request and perform actions on each agent that received an observation. actions = { agent_id: agents[agent_id].act(observation, explore=True) for agent_id, observation in observations.items() } next_observations, rewards, dones, infos = env.step(actions) # Active agents are those that receive observations in this step and the next # step. Step each active agent (obtaining their network loss if applicable). active_agent_ids = observations.keys() & next_observations.keys() loss_outputs = { agent_id: agents[agent_id].step( state=observations[agent_id], action=actions[agent_id], reward=rewards[agent_id], next_state=next_observations[agent_id], done=dones[agent_id], info=infos[agent_id], ) for agent_id in active_agent_ids } # Record the data from this episode. episode.record_step( agent_ids_to_record=active_agent_ids, infos=infos, rewards=rewards, total_step=total_step, loss_outputs=loss_outputs, ) # Update variables for the next step. total_step += 1 observations = next_observations # Normalize the data and record this episode on tensorboard. episode.record_episode() episode.record_tensorboard(recording_step=episode.index) # Save the agent if we have reached its save rate. if (episode.index + 1) % save_rate == 0: for agent_id in agent_ids: checkpoint_directory = episode.checkpoint_dir( agent_id, episode.index) agents[agent_id].save(checkpoint_directory) # Average the metric over the number of agents (1 agent). tune_value = sum([ episode.info[episode.active_tag][agent_id].data[metric] for agent_id in agent_ids ]) / len(agent_ids) tune.report(**{metric: tune_value}) if finished: break env.close()