def _intrfcs_init(): return [ [{"accelerometer": True}, {"accelerometer": False}], [{"drivable_area_grid_map": True}] * 2, [{"lidar": True}] * 2, [{"neighborhood_vehicles": True}] * 2, [{"ogm": True}] * 2, [{"rgb": True}] * 2, [{"waypoints": Waypoints(lookahead=1)}] * 2, [{"neighborhood_vehicles": True, "waypoints": Waypoints(lookahead=1)}] * 2, ]
def _intrfcs_obs(): base_intrfc = { "accelerometer": True, "drivable_area_grid_map": True, "lidar": True, "neighborhood_vehicles": True, "ogm": True, "rgb": True, "waypoints": Waypoints(lookahead=1), } return [ [base_intrfc] * 2, [dict(base_intrfc, **{"accelerometer": False})] * 2, [dict(base_intrfc, **{"waypoints": Waypoints(lookahead=50)})] * 2, ]
def __new__( self, policy_class, action_type, checkpoint_dir=None, task=None, max_episode_steps=1200, experiment_dir=None, ): if experiment_dir: print(f"LOADING SPEC from {experiment_dir}/spec.pkl") with open(f"{experiment_dir}/spec.pkl", "rb") as input: spec = dill.load(input) new_spec = AgentSpec( interface=spec.interface, agent_params=dict( policy_params=spec.agent_params["policy_params"], checkpoint_dir=checkpoint_dir, ), agent_builder=spec.policy_builder, observation_adapter=spec.observation_adapter, reward_adapter=spec.reward_adapter, ) spec = new_spec else: adapter = BaselineAdapter() policy_dir = "/".join( inspect.getfile(policy_class).split("/")[:-1]) policy_params = load_yaml(f"{policy_dir}/params.yaml") spec = AgentSpec( interface=AgentInterface( waypoints=Waypoints(lookahead=20), neighborhood_vehicles=NeighborhoodVehicles(200), action=action_type, rgb=False, max_episode_steps=max_episode_steps, debug=True, ), agent_params=dict(policy_params=policy_params, checkpoint_dir=checkpoint_dir), agent_builder=policy_class, observation_adapter=adapter.observation_adapter, reward_adapter=adapter.reward_adapter, ) return spec
def prepare_test_agent_and_environment( required_interface: Dict[str, Any], action_adapter: Callable = lambda action: action, info_adapter: Callable = lambda observation, reward, info: info, observation_adapter: Callable = lambda observation: observation, reward_adapter: Callable = lambda _, reward: reward, headless: bool = True, ) -> Tuple[Agent, UltraEnv]: if "waypoints" not in required_interface: required_interface["waypoints"] = Waypoints(lookahead=20) if "neighborhood_vehicles" not in required_interface: required_interface["neighborhood_vehicles"] = NeighborhoodVehicles( radius=200) if "action" not in required_interface: required_interface["action"] = ActionSpaceType.Lane agent_spec = AgentSpec( interface=AgentInterface(**required_interface), agent_builder=RandomAgent, agent_params={"action_type": required_interface["action"]}, action_adapter=action_adapter, info_adapter=info_adapter, observation_adapter=observation_adapter, reward_adapter=reward_adapter, ) agent = agent_spec.build_agent() environment = gym.make( "ultra.env:ultra-v0", agent_specs={AGENT_ID: agent_spec}, scenario_info=("00", "easy"), headless=headless, timestep_sec=TIMESTEP_SEC, seed=SEED, ) return agent, environment
def train( task, num_episodes, max_episode_steps, rollout_fragment_length, policy, eval_info, timestep_sec, headless, seed, train_batch_size, sgd_minibatch_size, log_dir, ): agent_name = policy policy_params = load_yaml( f"ultra/baselines/{agent_name}/{agent_name}/params.yaml") action_type = adapters.type_from_string(policy_params["action_type"]) observation_type = adapters.type_from_string( policy_params["observation_type"]) reward_type = adapters.type_from_string(policy_params["reward_type"]) if action_type != adapters.AdapterType.DefaultActionContinuous: raise Exception( f"RLlib training only supports the " f"{adapters.AdapterType.DefaultActionContinuous} action type.") if observation_type != adapters.AdapterType.DefaultObservationVector: # NOTE: The SMARTS observations adaptation that is done in ULTRA's Gym # environment is not done in ULTRA's RLlib environment. If other # observation adapters are used, they may raise an Exception. raise Exception( f"RLlib training only supports the " f"{adapters.AdapterType.DefaultObservationVector} observation type." ) action_space = adapters.space_from_type(adapter_type=action_type) observation_space = adapters.space_from_type(adapter_type=observation_type) action_adapter = adapters.adapter_from_type(adapter_type=action_type) info_adapter = adapters.adapter_from_type( adapter_type=adapters.AdapterType.DefaultInfo) observation_adapter = adapters.adapter_from_type( adapter_type=observation_type) reward_adapter = adapters.adapter_from_type(adapter_type=reward_type) params_seed = policy_params["seed"] encoder_key = policy_params["social_vehicles"]["encoder_key"] num_social_features = observation_space["social_vehicles"].shape[1] social_capacity = observation_space["social_vehicles"].shape[0] social_policy_hidden_units = int(policy_params["social_vehicles"].get( "social_policy_hidden_units", 0)) social_policy_init_std = int(policy_params["social_vehicles"].get( "social_policy_init_std", 0)) social_vehicle_config = get_social_vehicle_configs( encoder_key=encoder_key, num_social_features=num_social_features, social_capacity=social_capacity, seed=params_seed, social_policy_hidden_units=social_policy_hidden_units, social_policy_init_std=social_policy_init_std, ) ModelCatalog.register_custom_model("fc_model", CustomFCModel) config = RllibAgent.rllib_default_config(agent_name) rllib_policies = { "default_policy": ( None, observation_space, action_space, { "model": { "custom_model": "fc_model", "custom_model_config": { "social_vehicle_config": social_vehicle_config }, } }, ) } agent_specs = { "AGENT-007": AgentSpec( interface=AgentInterface( waypoints=Waypoints(lookahead=20), neighborhood_vehicles=NeighborhoodVehicles(200), action=ActionSpaceType.Continuous, rgb=False, max_episode_steps=max_episode_steps, debug=True, ), agent_params={}, agent_builder=None, action_adapter=action_adapter, info_adapter=info_adapter, observation_adapter=observation_adapter, reward_adapter=reward_adapter, ) } tune_config = { "env": RLlibUltraEnv, "log_level": "WARN", "callbacks": Callbacks, "framework": "torch", "num_workers": 1, "train_batch_size": train_batch_size, "sgd_minibatch_size": sgd_minibatch_size, "rollout_fragment_length": rollout_fragment_length, "in_evaluation": True, "evaluation_num_episodes": eval_info["eval_episodes"], "evaluation_interval": eval_info[ "eval_rate"], # Evaluation occurs after # of eval-intervals (episodes) "evaluation_config": { "env_config": { "seed": seed, "scenario_info": task, "headless": headless, "eval_mode": True, "ordered_scenarios": False, "agent_specs": agent_specs, "timestep_sec": timestep_sec, }, "explore": False, }, "env_config": { "seed": seed, "scenario_info": task, "headless": headless, "eval_mode": False, "ordered_scenarios": False, "agent_specs": agent_specs, "timestep_sec": timestep_sec, }, "multiagent": { "policies": rllib_policies }, } config.update(tune_config) agent = RllibAgent( agent_name=agent_name, env=RLlibUltraEnv, config=tune_config, logger_creator=log_creator(log_dir), ) # Iteration value in trainer.py (self._iterations) is the technically the number of episodes for i in range(num_episodes): results = agent.train() agent.log_evaluation_metrics( results) # Evaluation metrics will now be displayed on Tensorboard
def make_config(**kwargs): use_stacked_observation = kwargs.get("use_stacked_observation", False) use_rgb = kwargs.get("use_rgb", False) closest_neighbor_num = kwargs.get("max_observed_neighbors", 8) img_resolution = 24 observe_lane_num = 3 look_ahead = 10 stack_size = 3 if use_stacked_observation else 1 subscribed_features = dict( goal_relative_pos=(stack_size, 2), distance_to_center=(stack_size, 1), speed=(stack_size, 1), steering=(stack_size, 1), heading_errors=(stack_size, look_ahead), neighbor=(stack_size, closest_neighbor_num * 5), # dist, speed, ttc img_gray=(stack_size, img_resolution, img_resolution) if use_rgb else False, ) action_space = common.ActionSpace.from_type(1) observation_space = gym.spaces.Dict( common.subscribe_features(**subscribed_features)) policy_config = ( None, observation_space, action_space, dict(model=dict(custom_model_config=dict( obs_space_dict=observation_space), )), ) interface_config = dict( obs_stack_size=stack_size, neighborhood_vehicles=NeighborhoodVehicles(radius=100), waypoints=Waypoints(lookahead=10), rgb=RGB(width=256, height=256, resolution=img_resolution / 256) if use_rgb else None, action=ActionSpaceType.Lane, road_waypoints=None, drivable_area_grid_map=None, ogm=None, lidar=None, debug=False, ) observation_adapter = common.get_observation_adapter( observation_space, look_ahead=look_ahead, observe_lane_num=observe_lane_num, resize=(img_resolution, img_resolution), closest_neighbor_num=closest_neighbor_num, ) agent_config = dict( observation_adapter=observation_adapter, reward_adapter=get_reward_adapter(observation_adapter), action_adapter=common.ActionAdapter.from_type(1), info_adapter=common.default_info_adapter, ) learning_config = dict() other_config = dict( stop={"time_total_s": 2 * 60 * 60}, checkpoint_freq=40, checkpoint_at_end=True, max_failures=1000, ) return common.Config( name=NAME, agent=agent_config, interface=interface_config, policy=policy_config, learning=learning_config, other=other_config, trainer=DQNTrainer, spec={ "obs": observation_space, "act": action_space }, )
import numpy as np from scipy.spatial import distance from smarts.core.agent_interface import NeighborhoodVehicles, Waypoints from smarts.core.sensors import Observation from ultra.adapters.constants import DEFAULT_RADIUS, DEFAULT_WAYPOINTS import ultra.adapters.default_reward_adapter as default_reward_adapter from ultra.utils.common import ego_social_safety, get_closest_waypoint, get_path_to_goal _WAYPOINTS = DEFAULT_WAYPOINTS _RADIUS = DEFAULT_RADIUS required_interface = { "waypoints": Waypoints(lookahead=_WAYPOINTS), "neighborhood_vehicles": NeighborhoodVehicles(radius=_RADIUS), } def adapt( observation: Observation, reward: float, info: Dict[str, Any] ) -> Dict[str, Any]: """Adapts a raw environment observation, an environment reward, and info about the agent from the environment into custom information about the agent. The raw observation from the environment must include the ego vehicle's state, events, waypoint paths, and neighborhood vehicles. See smarts.core.sensors for more information on the Observation type. Args:
def _make_rllib_config(config, mode="training"): """Generate agent configuration. `mode` can be `train` or 'evaluation', and the only difference on the generated configuration is the agent info adapter. """ agent_config = config["agent"] interface_config = config["interface"] """ Parse the state configuration for agent """ state_config = agent_config["state"] # initialize environment wrapper if the wrapper config is not None wrapper_config = state_config.get("wrapper", {"name": "Simple"}) features_config = state_config["features"] # only for one frame, not really an observation frame_space = gym.spaces.Dict(common.subscribe_features(**features_config)) action_type = ActionSpaceType(agent_config["action"]["type"]) env_action_space = common.ActionSpace.from_type(action_type) wrapper_cls = getattr(rllib_wrappers, wrapper_config["name"]) """ Parse policy configuration """ policy_obs_space = wrapper_cls.get_observation_space( frame_space, wrapper_config) policy_action_space = wrapper_cls.get_action_space(env_action_space, wrapper_config) observation_adapter = wrapper_cls.get_observation_adapter( policy_obs_space, feature_configs=features_config, wrapper_config=wrapper_config) action_adapter = wrapper_cls.get_action_adapter(action_type, policy_action_space, wrapper_config) # policy observation space is related to the wrapper usage policy_config = ( None, policy_obs_space, policy_action_space, config["policy"].get( "config", {"custom_preprocessor": wrapper_cls.get_preprocessor()}), ) """ Parse agent interface configuration """ if interface_config.get("neighborhood_vehicles"): interface_config["neighborhood_vehicles"] = NeighborhoodVehicles( **interface_config["neighborhood_vehicles"]) if interface_config.get("waypoints"): interface_config["waypoints"] = Waypoints( **interface_config["waypoints"]) if interface_config.get("rgb"): interface_config["rgb"] = RGB(**interface_config["rgb"]) if interface_config.get("ogm"): interface_config["ogm"] = OGM(**interface_config["ogm"]) interface_config["action"] = ActionSpaceType(action_type) """ Pack environment configuration """ config["run"]["config"].update({"env": wrapper_cls}) config["env_config"] = { "custom_config": { **wrapper_config, "reward_adapter": wrapper_cls.get_reward_adapter(observation_adapter), "observation_adapter": observation_adapter, "action_adapter": action_adapter, "info_adapter": metrics.agent_info_adapter if mode == "evaluation" else None, "observation_space": policy_obs_space, "action_space": policy_action_space, }, } config["agent"] = {"interface": AgentInterface(**interface_config)} config["trainer"] = _get_trainer(**config["policy"]["trainer"]) config["policy"] = policy_config print(format.pretty_dict(config)) return config
def __new__( self, policy_class, action_type, checkpoint_dir=None, task=None, max_episode_steps=1200, experiment_dir=None, agent_id="", ): if experiment_dir: print( f"Loading spec for {agent_id} from {experiment_dir}/agent_metadata.pkl" ) with open(f"{experiment_dir}/agent_metadata.pkl", "rb") as metadata_file: agent_metadata = dill.load(metadata_file) spec = agent_metadata["agent_specs"][agent_id] new_spec = AgentSpec( interface=spec.interface, agent_params=dict( policy_params=spec.agent_params["policy_params"], checkpoint_dir=checkpoint_dir, ), agent_builder=spec.policy_builder, observation_adapter=spec.observation_adapter, reward_adapter=spec.reward_adapter, ) spec = new_spec else: base_dir = os.path.join(os.path.dirname(__file__), "../") pool_path = os.path.join(base_dir, "agent_pool.json") policy_class_name = policy_class.__name__ agent_name = None with open(pool_path, "r") as f: data = json.load(f) agents = data["agents"].keys() for agent in agents: if data["agents"][agent]["class"] == policy_class_name: agent_name = data["agents"][agent]["name"] break assert agent_name != None adapter = BaselineAdapter(agent_name) spec = AgentSpec( interface=AgentInterface( waypoints=Waypoints(lookahead=20), neighborhood_vehicles=NeighborhoodVehicles(200), action=action_type, rgb=False, max_episode_steps=max_episode_steps, debug=True, ), agent_params=dict(policy_params=adapter.policy_params, checkpoint_dir=checkpoint_dir), agent_builder=policy_class, observation_adapter=adapter.observation_adapter, reward_adapter=adapter.reward_adapter, ) return spec
# THE SOFTWARE. import math import numpy as np from scipy.spatial import distance from smarts.core.agent_interface import Waypoints from smarts.core.sensors import Observation from ultra.adapters.constants import DEFAULT_WAYPOINTS from ultra.utils.common import get_closest_waypoint, get_path_to_goal _WAYPOINTS = DEFAULT_WAYPOINTS # This adapter requires SMARTS to pass the next _WAYPOINTS waypoints in the agent's # observation. required_interface = {"waypoints": Waypoints(lookahead=_WAYPOINTS)} def adapt(observation: Observation, reward: float) -> float: """Adapts a raw environment observation and an environment reward to a custom reward of type float. The raw observation from the environment must include the ego vehicle's state, events, and waypoint paths. See smarts.core.sensors for more information on the Observation type. Args: observation (Observation): The raw environment observation received from SMARTS. reward (float): The environment reward received from SMARTS. Returns:
def _make_rllib_config(config, mode="train"): """ Generate agent configuration. `mode` can be `train` or 'evaluation', and the only difference on the generated configuration is the agent info adapter. """ agent = config["agent"] state_config = agent["state"] # initialize environment wrapper if the wrapper config is not None wrapper_config = state_config["wrapper"] wrapper = ( getattr(rllib_wrappers, wrapper_config["name"]) if wrapper_config else None ) features = state_config["features"] # only for one frame, not really an observation frame_space = gym.spaces.Dict(common.subscribe_features(**features)) action_type = agent["action"]["type"] action_space = common.ActionSpace.from_type(action_type) # policy observation space is related to the wrapper usage policy_obs_space = _get_policy_observation_space( wrapper, frame_space, wrapper_config ) policy_config = ( None, policy_obs_space, action_space, config["policy"].get("config", {}), ) interface = config["interface"] if interface.get("neighborhood_vehicles"): interface["neighborhood_vehicles"] = NeighborhoodVehicles( **interface["neighborhood_vehicles"] ) if interface.get("waypoints"): interface["waypoints"] = Waypoints(**interface["waypoints"]) if interface.get("rgb"): interface["rgb"] = RGB(**interface["rgb"]) if interface.get("ogm"): interface["ogm"] = OGM(**interface["ogm"]) interface["action"] = ActionSpaceType(action_type) adapter_type = "vanilla" if wrapper == rllib_wrappers.FrameStack else "single_frame" agent_config = dict( action_adapter=common.ActionAdapter.from_type(action_type), info_adapter=metrics.agent_info_adapter if mode == "evaluate" else common.default_info_adapter, ) adapter_type = ( "stack_frame" if wrapper == rllib_wrappers.FrameStack else "single_frame" ) if agent.get("adapter_type", {}) != {}: adapter_type = agent.get("adapter_type", {}) observation_adapter = common.get_observation_adapter( frame_space, adapter_type, wrapper=wrapper, feature_configs=features ) env_config = dict() wrapper_config["parameters"] = { "observation_adapter": observation_adapter, "reward_adapter": common.get_reward_adapter(observation_adapter, adapter_type), "base_env_cls": RLlibHiWayEnv, } env_config.update(**wrapper_config["parameters"]) config["run"]["config"].update({"env": wrapper}) config["env_config"] = env_config config["agent"] = agent_config config["interface"] = interface config["trainer"] = _get_trainer(**config["policy"]["trainer"]) config["policy"] = policy_config pprint.pprint(config) return config
def test_observations_stacking(self): EPISODES = 3 WIDTH = 64 HEIGHT = WIDTH RESOLUTION = 50 / WIDTH ENVIRONMENT_STACK_SIZE = 4 agent_spec = AgentSpec( interface=AgentInterface( waypoints=Waypoints(lookahead=1), neighborhood_vehicles=NeighborhoodVehicles(radius=10.0), rgb=RGB(width=WIDTH, height=HEIGHT, resolution=RESOLUTION), action=ActionSpaceType.Lane, ), agent_builder=TestLaneAgent, ) agent = agent_spec.build_agent() environment = gym.make( "ultra.env:ultra-v0", agent_specs={AGENT_ID: agent_spec}, scenario_info=("00", "easy"), headless=True, timestep_sec=0.1, seed=2, ) def check_environment_observations_stack(environment): self.assertIsInstance(environment.smarts_observations_stack, deque) self.assertEqual( len(environment.smarts_observations_stack), ENVIRONMENT_STACK_SIZE ) self.assertIsInstance(environment.smarts_observations_stack[0], dict) self.assertTrue( all( str(environment.smarts_observations_stack[0]) == str(observations) for observations in environment.smarts_observations_stack ) ) def check_stacked_observations(environment, observations): self.assertIn(AGENT_ID, observations) self.assertTrue(AGENT_ID, observations[AGENT_ID].top_down_rgb) self.assertIsInstance(observations[AGENT_ID].top_down_rgb, TopDownRGB) self.assertEqual( observations[AGENT_ID].top_down_rgb.metadata, environment.smarts_observations_stack[-1][ AGENT_ID ].top_down_rgb.metadata, ) self.assertEqual( observations[AGENT_ID].top_down_rgb.data.shape, (ENVIRONMENT_STACK_SIZE, HEIGHT, WIDTH, 3), ) # Ensure the stacked observation's TopDownRGB data is in the same order, and # and contains the same NumPy arrays as the environment's observation stack. self.assertTrue( all( np.array_equal( observations_from_stack[AGENT_ID].top_down_rgb.data, observations[AGENT_ID].top_down_rgb.data[i], ) for i, observations_from_stack in enumerate( environment.smarts_observations_stack ) ) ) for _ in range(EPISODES): dones = {"__all__": False} observations = environment.reset() check_environment_observations_stack(environment) check_stacked_observations(environment, observations) while not dones["__all__"]: action = agent.act(observations[AGENT_ID]) observations, _, dones, _ = environment.step({AGENT_ID: action}) check_stacked_observations(environment, observations) environment.close()
def train( task, num_episodes, max_episode_steps, rollout_fragment_length, policy, eval_info, timestep_sec, headless, seed, train_batch_size, sgd_minibatch_size, log_dir, ): agent_name = policy adapter = BaselineAdapter(agent_name) ModelCatalog.register_custom_model("fc_model", CustomFCModel) config = RllibAgent.rllib_default_config(agent_name) rllib_policies = { "default_policy": ( None, adapter.observation_space, adapter.action_space, { "model": { "custom_model": "fc_model", "custom_model_config": { "adapter": adapter }, } }, ) } agent_specs = { "AGENT-007": AgentSpec( interface=AgentInterface( waypoints=Waypoints(lookahead=20), neighborhood_vehicles=NeighborhoodVehicles(200), action=ActionSpaceType.Continuous, rgb=False, max_episode_steps=max_episode_steps, debug=True, ), agent_params={}, agent_builder=None, observation_adapter=adapter.observation_adapter, reward_adapter=adapter.reward_adapter, # action_adapter=adapter.action_adapter, ) } tune_config = { "env": RLlibUltraEnv, "log_level": "WARN", "callbacks": Callbacks, "framework": "torch", "num_workers": 1, "train_batch_size": train_batch_size, "sgd_minibatch_size": sgd_minibatch_size, "rollout_fragment_length": rollout_fragment_length, "in_evaluation": True, "evaluation_num_episodes": eval_info["eval_episodes"], "evaluation_interval": eval_info[ "eval_rate"], # Evaluation occurs after # of eval-intervals (episodes) "evaluation_config": { "env_config": { "seed": seed, "scenario_info": task, "headless": headless, "eval_mode": True, "ordered_scenarios": False, "agent_specs": agent_specs, "timestep_sec": timestep_sec, }, "explore": False, }, "env_config": { "seed": seed, "scenario_info": task, "headless": headless, "eval_mode": False, "ordered_scenarios": False, "agent_specs": agent_specs, "timestep_sec": timestep_sec, }, "multiagent": { "policies": rllib_policies }, } config.update(tune_config) agent = RllibAgent( agent_name=agent_name, env=RLlibUltraEnv, config=tune_config, logger_creator=log_creator(log_dir), ) # Iteration value in trainer.py (self._iterations) is the technically the number of episodes for i in range(num_episodes): results = agent.train() agent.log_evaluation_metrics( results) # Evaluation metrics will now be displayed on Tensorboard