def replay_entrypoint( save_directory, id, wrapped_agent_locator, wrapped_agent_params=None, read=False, ): if wrapped_agent_params is None: wrapped_agent_params = {} from .replay_agent import ReplayAgent internal_spec = make(wrapped_agent_locator, **wrapped_agent_params) global social_index global replay_save_dir global replay_read spec = AgentSpec( interface=internal_spec.interface, agent_params={ "save_directory": replay_save_dir, "id": f"{id}_{social_index}", "internal_spec": internal_spec, "wrapped_agent_params": wrapped_agent_params, "read": replay_read, }, agent_builder=ReplayAgent, ) social_index += 1 return spec
def _agent_spec_callback(self, ros_agent_spec: AgentSpec): assert (len(ros_agent_spec.tasks) == 1 ), "more than 1 task per agent is not yet supported" task = ros_agent_spec.tasks[0] task_params = json.loads(task.params_json) if task.params_json else {} task_version = task.task_ver or "latest" agent_locator = f"{self._zoo_module}:{task.task_ref}-{task_version}" agent_spec = None try: agent_spec = registry.make(agent_locator, **task_params) except ImportError as ie: rospy.logerr( f"Unable to locate agent with locator={agent_locator}: {ie}") if not agent_spec: rospy.logwarn( f"got unknown task_ref '{task.task_ref}' in AgentSpec message with params='{task.param_json}'. ignoring." ) return if (ros_agent_spec.end_pose.position.x != 0.0 or ros_agent_spec.end_pose.position.y != 0.0): goal = PositionalGoal( ( ros_agent_spec.end_pose.position.x, ros_agent_spec.end_pose.position.y, ), ros_agent_spec.veh_length, ) else: goal = EndlessGoal() mission = Mission( start=Start.from_pose( ROSDriver._pose_from_ros(ros_agent_spec.start_pose)), goal=goal, # TODO: how to prevent them from spawning on top of another existing vehicle? (see how it's done in SUMO traffic) entry_tactic=default_entry_tactic(ros_agent_spec.start_speed), vehicle_spec=VehicleSpec( veh_id=f"veh_for_agent_{ros_agent_spec.agent_id}", veh_config_type=ROSDriver._decode_vehicle_type( ros_agent_spec.veh_type), dimensions=Dimensions( ros_agent_spec.veh_length, ros_agent_spec.veh_width, ros_agent_spec.veh_height, ), ), ) with self._reset_lock: if (ros_agent_spec.agent_id in self._agents or ros_agent_spec.agent_id in self._agents_to_add): rospy.logwarn( f"trying to add new agent with existing agent_id '{ros_agent_spec.agent_id}'. ignoring." ) return self._agents_to_add[ros_agent_spec.agent_id] = (agent_spec, mission)
def test_check_agents_from_pool(self): seed = 2 policy = "" with open("ultra/agent_pool.json") as f: data = json.load(f) for policy in data["agents"].keys(): policy_path = data["agents"][policy]["path"] policy_locator = data["agents"][policy]["locator"] policy_class = str(policy_path) + ":" + str(policy_locator) try: spec = make(locator=policy_class) agent = spec.build_agent() except ImportError as err: self.assertTrue(False)
def prepare_test_env_agent(headless=True): timestep_sec = 0.1 # [throttle, brake, steering] policy_class = "ultra.baselines.ppo:ppo-v0" spec = make(locator=policy_class) env = gym.make( "ultra.env:ultra-v0", agent_specs={AGENT_ID: spec}, scenario_info=("00", "easy"), headless=headless, timestep_sec=timestep_sec, seed=seed, ) agent = spec.build_agent() return agent, env
def build_agents(policy_classes, policy_ids, max_episode_steps): # Make agent_ids in the form of 000, 001, ..., 010, 011, ..., 999, 1000, ...; # or use the provided policy_ids if available. agent_ids = ([ "0" * max(0, 3 - len(str(i))) + str(i) for i in range(len(policy_classes)) ] if not policy_ids else policy_ids) # Ensure there is an ID for each policy, and a policy for each ID. assert len(agent_ids) == len(policy_classes), ( "The number of agent IDs provided ({}) must be equal to " "the number of policy classes provided ({}).".format( len(agent_ids), len(policy_classes))) # Assign the policy classes to their associated ID. agent_classes = { agent_id: policy_class for agent_id, policy_class in zip(agent_ids, policy_classes) } # Create the agent specifications matched with their associated ID. agent_specs = { agent_id: make(locator=policy_class, max_episode_steps=max_episode_steps) for agent_id, policy_class in agent_classes.items() } # Create the agents matched with their associated ID. agents = { agent_id: agent_spec.build_agent() for agent_id, agent_spec in agent_specs.items() } # Define an 'etag' for this experiment's data directory based off policy_classes. # E.g. From a ["ultra.baselines.dqn:dqn-v0", "ultra.baselines.ppo:ppo-v0"] # policy_classes list, transform it to an etag of "dqn-v0:ppo-v0". etag = ":".join( [policy_class.split(":")[-1] for policy_class in policy_classes]) return agent_ids, agent_classes, agent_specs, agents, etag
def test_agent_is_instance_policy(self): policy_class = "ultra.baselines.sac:sac-v0" spec = make(locator=policy_class) agent = spec.build_agent() self.assertIsInstance(agent, SACPolicy)
def test_spec_is_instance_agentspec(self): policy_class = "ultra.baselines.sac:sac-v0" spec = make(locator=policy_class) self.assertIsInstance(spec, AgentSpec)
def train( scenario_info, num_episodes, policy_classes, max_episode_steps, eval_info, timestep_sec, headless, seed, log_dir, policy_ids=None, ): torch.set_num_threads(1) total_step = 0 finished = False # Make agent_ids in the form of 000, 001, ..., 010, 011, ..., 999, 1000, ...; # or use the provided policy_ids if available. agent_ids = ( ["0" * max(0, 3 - len(str(i))) + str(i) for i in range(len(policy_classes))] if not policy_ids else policy_ids ) # Ensure there is an ID for each policy, and a policy for each ID. assert len(agent_ids) == len(policy_classes), ( "The number of agent IDs provided ({}) must be equal to " "the number of policy classes provided ({}).".format( len(agent_ids), len(policy_classes) ) ) # Assign the policy classes to their associated ID. agent_classes = { agent_id: policy_class for agent_id, policy_class in zip(agent_ids, policy_classes) } # Create the agent specifications matched with their associated ID. agent_specs = { agent_id: make(locator=policy_class, max_episode_steps=max_episode_steps) for agent_id, policy_class in agent_classes.items() } # Create the agents matched with their associated ID. agents = { agent_id: agent_spec.build_agent() for agent_id, agent_spec in agent_specs.items() } # Create the environment. env = gym.make( "ultra.env:ultra-v0", agent_specs=agent_specs, scenario_info=scenario_info, headless=headless, timestep_sec=timestep_sec, seed=seed, ) # Define an 'etag' for this experiment's data directory based off policy_classes. # E.g. From a ["ultra.baselines.dqn:dqn-v0", "ultra.baselines.ppo:ppo-v0"] # policy_classes list, transform it to an etag of "dqn-v0:ppo-v0". etag = ":".join([policy_class.split(":")[-1] for policy_class in policy_classes]) for episode in episodes(num_episodes, etag=etag, log_dir=log_dir): # Reset the environment and retrieve the initial observations. observations = env.reset() dones = {"__all__": False} infos = None episode.reset() experiment_dir = episode.experiment_dir # Save relevant agent metadata. if not os.path.exists(f"{experiment_dir}/agent_metadata.pkl"): if not os.path.exists(experiment_dir): os.makedirs(experiment_dir) with open(f"{experiment_dir}/agent_metadata.pkl", "wb") as metadata_file: dill.dump( { "agent_ids": agent_ids, "agent_classes": agent_classes, "agent_specs": agent_specs, }, metadata_file, pickle.HIGHEST_PROTOCOL, ) while not dones["__all__"]: # Break if any of the agent's step counts is 1000000 or greater. if any([episode.get_itr(agent_id) >= 1000000 for agent_id in agents]): finished = True break # Perform the evaluation check. evaluation_check( agents=agents, agent_ids=agent_ids, policy_classes=agent_classes, episode=episode, log_dir=log_dir, max_episode_steps=max_episode_steps, **eval_info, **env.info, ) # Request and perform actions on each agent that received an observation. actions = { agent_id: agents[agent_id].act(observation, explore=True) for agent_id, observation in observations.items() } next_observations, rewards, dones, infos = env.step(actions) # Active agents are those that receive observations in this step and the next # step. Step each active agent (obtaining their network loss if applicable). active_agent_ids = observations.keys() & next_observations.keys() loss_outputs = { agent_id: agents[agent_id].step( state=observations[agent_id], action=actions[agent_id], reward=rewards[agent_id], next_state=next_observations[agent_id], done=dones[agent_id], info=infos[agent_id], ) for agent_id in active_agent_ids } # Record the data from this episode. episode.record_step( agent_ids_to_record=active_agent_ids, infos=infos, rewards=rewards, total_step=total_step, loss_outputs=loss_outputs, ) # Update variables for the next step. total_step += 1 observations = next_observations # Normalize the data and record this episode on tensorboard. episode.record_episode() episode.record_tensorboard() if finished: break env.close()
def evaluate( experiment_dir, seed, agent_ids, policy_classes, checkpoint_dirs, scenario_info, num_episodes, max_episode_steps, headless, timestep_sec, log_dir, eval_mode=True, ): torch.set_num_threads(1) # Create the agent specifications matched with their associated ID. agent_specs = { agent_id: make( locator=policy_classes[agent_id], checkpoint_dir=checkpoint_dirs[agent_id], experiment_dir=experiment_dir, max_episode_steps=max_episode_steps, agent_id=agent_id, ) for agent_id in agent_ids } # Create the environment with the specified agents. env = gym.make( "ultra.env:ultra-v0", agent_specs=agent_specs, scenario_info=scenario_info, headless=headless, timestep_sec=timestep_sec, seed=seed, eval_mode=eval_mode, ) # Build each agent from its specification. agents = { agent_id: agent_spec.build_agent() for agent_id, agent_spec in agent_specs.items() } # A dictionary to hold the evaluation data for each agent. summary_log = {agent_id: LogInfo() for agent_id in agent_ids} # Define an 'etag' for this experiment's data directory based off policy_classes. # E.g. From a ["ultra.baselines.dqn:dqn-v0", "ultra.baselines.ppo:ppo-v0"] # policy_classes list, transform it to an etag of "dqn-v0:ppo-v0". etag = ":".join( [policy_class.split(":")[-1] for policy_class in policy_classes]) for episode in episodes(num_episodes, etag=etag, log_dir=log_dir): # Reset the environment and retrieve the initial observations. observations = env.reset() dones = {"__all__": False} infos = None episode.reset(mode="Evaluation") while not dones["__all__"]: # Get and perform the available agents' actions. actions = { agent_id: agents[agent_id].act(observation, explore=False) for agent_id, observation in observations.items() } observations, rewards, dones, infos = env.step(actions) # Record the data from this episode. episode.record_step(agent_ids_to_record=infos.keys(), infos=infos, rewards=rewards) episode.record_episode() for agent_id, agent_data in episode.info[episode.active_tag].items(): for key, value in agent_data.data.items(): if not isinstance(value, (list, tuple, np.ndarray)): summary_log[agent_id].data[key] += value # Normalize by the number of evaluation episodes. for agent_id, agent_data in summary_log.items(): for key, value in agent_data.data.items(): if not isinstance(value, (list, tuple, np.ndarray)): summary_log[agent_id].data[key] /= num_episodes env.close() return summary_log
def evaluate( experiment_dir, seed, agent_id, policy_class, itr_count, checkpoint_dir, scenario_info, num_episodes, headless, timestep_sec, ): torch.set_num_threads(1) spec = make( locator=policy_class, checkpoint_dir=checkpoint_dir, experiment_dir=experiment_dir, ) env = gym.make( "ultra.env:ultra-v0", agent_specs={agent_id: spec}, scenario_info=scenario_info, headless=headless, timestep_sec=timestep_sec, seed=seed, eval_mode=True, ) agent = spec.build_agent() summary_log = LogInfo() logs = [] for episode in episodes(num_episodes): observations = env.reset() state = observations[agent_id] dones, infos = {"__all__": False}, None episode.reset(mode="Evaluation") while not dones["__all__"]: action = agent.act(state, explore=False) observations, rewards, dones, infos = env.step({agent_id: action}) next_state = observations[agent_id] state = next_state episode.record_step(agent_id=agent_id, infos=infos, rewards=rewards) episode.record_episode() logs.append(episode.info[episode.active_tag].data) for key, value in episode.info[episode.active_tag].data.items(): if not isinstance(value, (list, tuple, np.ndarray)): summary_log.data[key] += value for key, val in summary_log.data.items(): if not isinstance(val, (list, tuple, np.ndarray)): summary_log.data[key] /= num_episodes env.close() return summary_log
def train( scenario_info, num_episodes, max_episode_steps, policy_class, eval_info, timestep_sec, headless, seed, log_dir, ): torch.set_num_threads(1) total_step = 0 finished = False AGENT_ID = "007" spec = make(locator=policy_class, max_episode_steps=max_episode_steps) env = gym.make( "ultra.env:ultra-v0", agent_specs={AGENT_ID: spec}, scenario_info=scenario_info, headless=headless, timestep_sec=timestep_sec, seed=seed, ) agent = spec.build_agent() for episode in episodes(num_episodes, etag=policy_class, log_dir=log_dir): observations = env.reset() state = observations[AGENT_ID] dones, infos = {"__all__": False}, None episode.reset() experiment_dir = episode.experiment_dir # save entire spec [ policy_params, reward_adapter, observation_adapter] if not os.path.exists(f"{experiment_dir}/spec.pkl"): if not os.path.exists(experiment_dir): os.makedirs(experiment_dir) with open(f"{experiment_dir}/spec.pkl", "wb") as spec_output: dill.dump(spec, spec_output, pickle.HIGHEST_PROTOCOL) while not dones["__all__"]: if episode.get_itr(AGENT_ID) >= 1000000: finished = True break evaluation_check( agent=agent, agent_id=AGENT_ID, policy_class=policy_class, episode=episode, log_dir=log_dir, max_episode_steps=max_episode_steps, **eval_info, **env.info, ) action = agent.act(state, explore=True) observations, rewards, dones, infos = env.step({AGENT_ID: action}) next_state = observations[AGENT_ID] loss_output = agent.step( state=state, action=action, reward=rewards[AGENT_ID], next_state=next_state, done=dones[AGENT_ID], ) episode.record_step( agent_id=AGENT_ID, infos=infos, rewards=rewards, total_step=total_step, loss_output=loss_output, ) total_step += 1 state = next_state episode.record_episode() episode.record_tensorboard(agent_id=AGENT_ID) if finished: break env.close()
def to_agent_spec(self) -> AgentSpec: return make(locator=self.agent_locator, **self.policy_kwargs)
def to_agent_spec(self) -> AgentSpec: """Generate an agent spec.""" return make(locator=self.agent_locator, **self.policy_kwargs)
def tune_train( config, scenario_info, num_episodes, policy_classes, max_episode_steps, save_rate, timestep_sec, headless, seed, log_dir, metric, ): torch.set_num_threads(1) total_step = 0 finished = False assert len( policy_classes) == 1, "Can only tune with single agent experiments." # Make agent_ids in the form of 000, 001, ..., 010, 011, ..., 999, 1000, ... agent_ids = [ "0" * max(0, 3 - len(str(i))) + str(i) for i in range(len(policy_classes)) ] # Assign the policy classes to their associated ID. agent_classes = { agent_id: policy_class for agent_id, policy_class in zip(agent_ids, policy_classes) } # Create the agent specifications matched with their associated ID. agent_specs = { agent_id: make( locator=policy_class, agent_params=config, max_episode_steps=max_episode_steps, ) for agent_id, policy_class in agent_classes.items() } # Create the agents matched with their associated ID. agents = { agent_id: agent_spec.build_agent() for agent_id, agent_spec in agent_specs.items() } # Create the environment. env = gym.make( "ultra.env:ultra-v0", agent_specs=agent_specs, scenario_info=scenario_info, headless=headless, timestep_sec=timestep_sec, seed=seed, ) # Define an 'etag' for this experiment's data directory based off policy_classes. # E.g. From a ["ultra.baselines.dqn:dqn-v0", "ultra.baselines.ppo:ppo-v0"] # policy_classes list, transform it to an etag of "dqn-v0:ppo-v0". etag = ":".join( [policy_class.split(":")[-1] for policy_class in policy_classes]) for episode in episodes(num_episodes, etag=etag, log_dir=log_dir): # Reset the environment and retrieve the initial observations. observations = env.reset() dones = {"__all__": False} infos = None episode.reset() experiment_dir = episode.experiment_dir # Save relevant agent metadata. if not os.path.exists(f"{experiment_dir}/agent_metadata.pkl"): if not os.path.exists(experiment_dir): os.makedirs(experiment_dir) with open(f"{experiment_dir}/agent_metadata.pkl", "wb") as metadata_file: dill.dump( { "agent_ids": agent_ids, "agent_classes": agent_classes, "agent_specs": agent_specs, }, metadata_file, pickle.HIGHEST_PROTOCOL, ) while not dones["__all__"]: # Break if any of the agent's step counts is 1000000 or greater. if any( [episode.get_itr(agent_id) >= 1000000 for agent_id in agents]): finished = True break # Request and perform actions on each agent that received an observation. actions = { agent_id: agents[agent_id].act(observation, explore=True) for agent_id, observation in observations.items() } next_observations, rewards, dones, infos = env.step(actions) # Active agents are those that receive observations in this step and the next # step. Step each active agent (obtaining their network loss if applicable). active_agent_ids = observations.keys() & next_observations.keys() loss_outputs = { agent_id: agents[agent_id].step( state=observations[agent_id], action=actions[agent_id], reward=rewards[agent_id], next_state=next_observations[agent_id], done=dones[agent_id], info=infos[agent_id], ) for agent_id in active_agent_ids } # Record the data from this episode. episode.record_step( agent_ids_to_record=active_agent_ids, infos=infos, rewards=rewards, total_step=total_step, loss_outputs=loss_outputs, ) # Update variables for the next step. total_step += 1 observations = next_observations # Normalize the data and record this episode on tensorboard. episode.record_episode() episode.record_tensorboard(recording_step=episode.index) # Save the agent if we have reached its save rate. if (episode.index + 1) % save_rate == 0: for agent_id in agent_ids: checkpoint_directory = episode.checkpoint_dir( agent_id, episode.index) agents[agent_id].save(checkpoint_directory) # Average the metric over the number of agents (1 agent). tune_value = sum([ episode.info[episode.active_tag][agent_id].data[metric] for agent_id in agent_ids ]) / len(agent_ids) tune.report(**{metric: tune_value}) if finished: break env.close()