def __init__( self, agent_specs, scenario_info, headless, timestep_sec, seed, eval_mode=False, ordered_scenarios=False, ): self.timestep_sec = timestep_sec self.headless = headless self.scenario_info = scenario_info self.scenarios = self.get_task(scenario_info[0], scenario_info[1]) if not eval_mode: _scenarios = glob.glob(f"{self.scenarios['train']}") else: _scenarios = glob.glob(f"{self.scenarios['test']}") self.ultra_scores = BaselineAdapter() super().__init__( scenarios=_scenarios, agent_specs=agent_specs, headless=headless, timestep_sec=timestep_sec, seed=seed, visdom=False, ) if ordered_scenarios: scenario_roots = [] for root in _scenarios: if Scenario.is_valid_scenario(root): # The case that this is a scenario root scenario_roots.append(root) else: # The case that there this is a directory of scenarios: find each of the roots scenario_roots.extend(Scenario.discover_scenarios(root)) # Also see `smarts.env.HiwayEnv` self._scenarios_iterator = cycle( Scenario.variations_for_all_scenario_roots( scenario_roots, list(agent_specs.keys())))
def __new__( self, policy_class, action_type, checkpoint_dir=None, task=None, max_episode_steps=1200, experiment_dir=None, ): if experiment_dir: print(f"LOADING SPEC from {experiment_dir}/spec.pkl") with open(f"{experiment_dir}/spec.pkl", "rb") as input: spec = dill.load(input) new_spec = AgentSpec( interface=spec.interface, agent_params=dict( policy_params=spec.agent_params["policy_params"], checkpoint_dir=checkpoint_dir, ), agent_builder=spec.policy_builder, observation_adapter=spec.observation_adapter, reward_adapter=spec.reward_adapter, ) spec = new_spec else: adapter = BaselineAdapter() policy_dir = "/".join( inspect.getfile(policy_class).split("/")[:-1]) policy_params = load_yaml(f"{policy_dir}/params.yaml") spec = AgentSpec( interface=AgentInterface( waypoints=Waypoints(lookahead=20), neighborhood_vehicles=NeighborhoodVehicles(200), action=action_type, rgb=False, max_episode_steps=max_episode_steps, debug=True, ), agent_params=dict(policy_params=policy_params, checkpoint_dir=checkpoint_dir), agent_builder=policy_class, observation_adapter=adapter.observation_adapter, reward_adapter=adapter.reward_adapter, ) return spec
def __new__( self, policy_class, action_type, checkpoint_dir=None, task=None, max_episode_steps=1200, experiment_dir=None, agent_id="", ): if experiment_dir: print( f"Loading spec for {agent_id} from {experiment_dir}/agent_metadata.pkl" ) with open(f"{experiment_dir}/agent_metadata.pkl", "rb") as metadata_file: agent_metadata = dill.load(metadata_file) spec = agent_metadata["agent_specs"][agent_id] new_spec = AgentSpec( interface=spec.interface, agent_params=dict( policy_params=spec.agent_params["policy_params"], checkpoint_dir=checkpoint_dir, ), agent_builder=spec.policy_builder, observation_adapter=spec.observation_adapter, reward_adapter=spec.reward_adapter, ) spec = new_spec else: base_dir = os.path.join(os.path.dirname(__file__), "../") pool_path = os.path.join(base_dir, "agent_pool.json") policy_class_name = policy_class.__name__ agent_name = None with open(pool_path, "r") as f: data = json.load(f) agents = data["agents"].keys() for agent in agents: if data["agents"][agent]["class"] == policy_class_name: agent_name = data["agents"][agent]["name"] break assert agent_name != None adapter = BaselineAdapter(agent_name) spec = AgentSpec( interface=AgentInterface( waypoints=Waypoints(lookahead=20), neighborhood_vehicles=NeighborhoodVehicles(200), action=action_type, rgb=False, max_episode_steps=max_episode_steps, debug=True, ), agent_params=dict(policy_params=adapter.policy_params, checkpoint_dir=checkpoint_dir), agent_builder=policy_class, observation_adapter=adapter.observation_adapter, reward_adapter=adapter.reward_adapter, ) return spec
# THE SOFTWARE. import unittest import gym import numpy as np import ray from smarts.core.controllers import ActionSpaceType from smarts.zoo.registry import make from ultra.baselines.adapter import BaselineAdapter from ultra.baselines.agent_spec import BaselineAgentSpec AGENT_ID = "001" seed = 2 adapter = BaselineAdapter() class AdapterTest(unittest.TestCase): def test_observation_features(self): @ray.remote(max_calls=1, num_gpus=0, num_cpus=1) def run_experiment(): agent, env = prepare_test_env_agent() observations = env.reset() env.close() return observations ray.init(ignore_reinit_error=True) observations = ray.get(run_experiment.remote()) ray.shutdown() print(observations[AGENT_ID])
class UltraEnv(HiWayEnv): def __init__( self, agent_specs, scenario_info, headless, timestep_sec, seed, eval_mode=False, ordered_scenarios=False, ): self.timestep_sec = timestep_sec self.headless = headless self.scenario_info = scenario_info self.scenarios = self.get_task(scenario_info[0], scenario_info[1]) if not eval_mode: _scenarios = glob.glob(f"{self.scenarios['train']}") else: _scenarios = glob.glob(f"{self.scenarios['test']}") self.ultra_scores = BaselineAdapter() super().__init__( scenarios=_scenarios, agent_specs=agent_specs, headless=headless, timestep_sec=timestep_sec, seed=seed, visdom=False, ) if ordered_scenarios: scenario_roots = [] for root in _scenarios: if Scenario.is_valid_scenario(root): # The case that this is a scenario root scenario_roots.append(root) else: # The case that there this is a directory of scenarios: find each of the roots scenario_roots.extend(Scenario.discover_scenarios(root)) # Also see `smarts.env.HiwayEnv` self._scenarios_iterator = cycle( Scenario.variations_for_all_scenario_roots( scenario_roots, list(agent_specs.keys()))) def generate_logs(self, observation, highwayenv_score): ego_state = observation.ego_vehicle_state start = observation.ego_vehicle_state.mission.start goal = observation.ego_vehicle_state.mission.goal path = get_path_to_goal(goal=goal, paths=observation.waypoint_paths, start=start) closest_wp, _ = get_closest_waypoint( num_lookahead=100, goal_path=path, ego_position=ego_state.position, ego_heading=ego_state.heading, ) signed_dist_from_center = closest_wp.signed_lateral_error( ego_state.position) lane_width = closest_wp.lane_width * 0.5 ego_dist_center = signed_dist_from_center / lane_width linear_jerk = np.linalg.norm(ego_state.linear_jerk) angular_jerk = np.linalg.norm(ego_state.angular_jerk) # Distance to goal ego_2d_position = ego_state.position[0:2] goal_dist = distance.euclidean(ego_2d_position, goal.position) angle_error = closest_wp.relative_heading( ego_state.heading) # relative heading radians [-pi, pi] # number of violations ( ego_num_violations, social_num_violations, ) = ego_social_safety( observation, d_min_ego=1.0, t_c_ego=1.0, d_min_social=1.0, t_c_social=1.0, ignore_vehicle_behind=True, ) info = dict( position=ego_state.position, speed=ego_state.speed, steering=ego_state.steering, heading=ego_state.heading, dist_center=abs(ego_dist_center), start=start, goal=goal, closest_wp=closest_wp, events=observation.events, ego_num_violations=ego_num_violations, social_num_violations=social_num_violations, goal_dist=goal_dist, linear_jerk=np.linalg.norm(ego_state.linear_jerk), angular_jerk=np.linalg.norm(ego_state.angular_jerk), env_score=self.ultra_scores.reward_adapter(observation, highwayenv_score), ) return info def step(self, agent_actions): agent_actions = { agent_id: self._agent_specs[agent_id].action_adapter(action) for agent_id, action in agent_actions.items() } observations, rewards, agent_dones, extras = self._smarts.step( agent_actions) infos = { agent_id: { "score": value, "env_obs": observations[agent_id] } for agent_id, value in extras["scores"].items() } for agent_id in observations: agent_spec = self._agent_specs[agent_id] observation = observations[agent_id] reward = rewards[agent_id] info = infos[agent_id] rewards[agent_id] = agent_spec.reward_adapter(observation, reward) observations[agent_id] = agent_spec.observation_adapter( observation) infos[agent_id] = agent_spec.info_adapter(observation, reward, info) infos[agent_id]["logs"] = self.generate_logs(observation, reward) for done in agent_dones.values(): self._dones_registered += 1 if done else 0 agent_dones["__all__"] = self._dones_registered == len( self._agent_specs) return observations, rewards, agent_dones, infos def get_task(self, task_id, task_level): with open("ultra/config.yaml", "r") as task_file: scenarios = yaml.safe_load(task_file)["tasks"] task = scenarios[f"task{task_id}"][task_level] return task @property def info(self): return { "scenario_info": self.scenario_info, "timestep_sec": self.timestep_sec, "headless": self.headless, }
def train( task, num_episodes, max_episode_steps, rollout_fragment_length, policy, eval_info, timestep_sec, headless, seed, train_batch_size, sgd_minibatch_size, log_dir, ): agent_name = policy adapter = BaselineAdapter(agent_name) ModelCatalog.register_custom_model("fc_model", CustomFCModel) config = RllibAgent.rllib_default_config(agent_name) rllib_policies = { "default_policy": ( None, adapter.observation_space, adapter.action_space, { "model": { "custom_model": "fc_model", "custom_model_config": { "adapter": adapter }, } }, ) } agent_specs = { "AGENT-007": AgentSpec( interface=AgentInterface( waypoints=Waypoints(lookahead=20), neighborhood_vehicles=NeighborhoodVehicles(200), action=ActionSpaceType.Continuous, rgb=False, max_episode_steps=max_episode_steps, debug=True, ), agent_params={}, agent_builder=None, observation_adapter=adapter.observation_adapter, reward_adapter=adapter.reward_adapter, # action_adapter=adapter.action_adapter, ) } tune_config = { "env": RLlibUltraEnv, "log_level": "WARN", "callbacks": Callbacks, "framework": "torch", "num_workers": 1, "train_batch_size": train_batch_size, "sgd_minibatch_size": sgd_minibatch_size, "rollout_fragment_length": rollout_fragment_length, "in_evaluation": True, "evaluation_num_episodes": eval_info["eval_episodes"], "evaluation_interval": eval_info[ "eval_rate"], # Evaluation occurs after # of eval-intervals (episodes) "evaluation_config": { "env_config": { "seed": seed, "scenario_info": task, "headless": headless, "eval_mode": True, "ordered_scenarios": False, "agent_specs": agent_specs, "timestep_sec": timestep_sec, }, "explore": False, }, "env_config": { "seed": seed, "scenario_info": task, "headless": headless, "eval_mode": False, "ordered_scenarios": False, "agent_specs": agent_specs, "timestep_sec": timestep_sec, }, "multiagent": { "policies": rllib_policies }, } config.update(tune_config) agent = RllibAgent( agent_name=agent_name, env=RLlibUltraEnv, config=tune_config, logger_creator=log_creator(log_dir), ) # Iteration value in trainer.py (self._iterations) is the technically the number of episodes for i in range(num_episodes): results = agent.train() agent.log_evaluation_metrics( results) # Evaluation metrics will now be displayed on Tensorboard