def _make_agent_specs(num_agent): agent_specs = { "AGENT_" + str(agent_id): AgentSpec( interface=AgentInterface( rgb=RGB(), action=ActionSpaceType.Lane, ), agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"), observation_adapter=lambda obs: obs.top_down_rgb.data, reward_adapter=lambda obs, reward: reward, info_adapter=lambda obs, reward, info: info["score"], ) for agent_id in range(num_agent) } obs_space = gym.spaces.Dict({ "AGENT_" + str(agent_id): gym.spaces.Box( low=0, high=255, shape=( agent_specs["AGENT_" + str(agent_id)].interface.rgb.width, agent_specs["AGENT_" + str(agent_id)].interface.rgb.height, 3, ), dtype=np.uint8, ) for agent_id in range(num_agent) }) return agent_specs, obs_space
def agent_specs(): def observation_adapter(env_observation): return env_observation.top_down_rgb.data return { "AGENT_" + agent_id: AgentSpec( interface=AgentInterface( rgb=RGB(), action=ActionSpaceType.Lane, ), agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"), observation_adapter=observation_adapter, ) for agent_id in ["001", "002"] }
def agent_specs(): return { "Agent_" + agent_id: AgentSpec( interface=AgentInterface( rgb=RGB(width=256, height=256, resolution=50 / 256), action=ActionSpaceType.Lane, max_episode_steps=3, ), agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"), observation_adapter=lambda obs: obs.top_down_rgb.data, reward_adapter=lambda obs, reward: reward, info_adapter=lambda obs, reward, info: info["score"], ) for agent_id in ["1", "2"] }
def _make_agent_specs(topdown_rgb): if topdown_rgb == "rgb": rgb = RGB() elif topdown_rgb == "false": rgb = False return { "AGENT_" + agent_id: AgentSpec( interface=AgentInterface( rgb=rgb, action=ActionSpaceType.Lane, ), agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"), ) for agent_id in ["001", "002"] }
def agent_spec(): return AgentSpec( interface=AgentInterface( road_waypoints=RoadWaypoints(40), neighborhood_vehicles=NeighborhoodVehicles( radius=max(MAP_WIDTH * MAP_RESOLUTION, MAP_HEIGHT * MAP_RESOLUTION) * 0.5 ), drivable_area_grid_map=DrivableAreaGridMap( width=MAP_WIDTH, height=MAP_HEIGHT, resolution=MAP_RESOLUTION ), ogm=OGM(width=MAP_WIDTH, height=MAP_HEIGHT, resolution=MAP_RESOLUTION), rgb=RGB(width=MAP_WIDTH, height=MAP_HEIGHT, resolution=MAP_RESOLUTION), action=ActionSpaceType.Lane, ), policy_builder=lambda: AgentPolicy.from_function(lambda _: "keep_lane"), )
def make_config(**kwargs): use_stacked_observation = kwargs.get("use_stacked_observation", False) use_rgb = kwargs.get("use_rgb", False) closest_neighbor_num = kwargs.get("max_observed_neighbors", 8) img_resolution = 24 observe_lane_num = 3 look_ahead = 10 stack_size = 3 if use_stacked_observation else 1 subscribed_features = dict( goal_relative_pos=(stack_size, 2), distance_to_center=(stack_size, 1), speed=(stack_size, 1), steering=(stack_size, 1), heading_errors=(stack_size, look_ahead), neighbor=(stack_size, closest_neighbor_num * 5), # dist, speed, ttc img_gray=(stack_size, img_resolution, img_resolution) if use_rgb else False, ) action_space = common.ActionSpace.from_type(1) observation_space = gym.spaces.Dict( common.subscribe_features(**subscribed_features)) policy_config = ( None, observation_space, action_space, dict(model=dict(custom_model_config=dict( obs_space_dict=observation_space), )), ) interface_config = dict( obs_stack_size=stack_size, neighborhood_vehicles=NeighborhoodVehicles(radius=100), waypoints=Waypoints(lookahead=10), rgb=RGB(width=256, height=256, resolution=img_resolution / 256) if use_rgb else None, action=ActionSpaceType.Lane, road_waypoints=None, drivable_area_grid_map=None, ogm=None, lidar=None, debug=False, ) observation_adapter = common.get_observation_adapter( observation_space, look_ahead=look_ahead, observe_lane_num=observe_lane_num, resize=(img_resolution, img_resolution), closest_neighbor_num=closest_neighbor_num, ) agent_config = dict( observation_adapter=observation_adapter, reward_adapter=get_reward_adapter(observation_adapter), action_adapter=common.ActionAdapter.from_type(1), info_adapter=common.default_info_adapter, ) learning_config = dict() other_config = dict( stop={"time_total_s": 2 * 60 * 60}, checkpoint_freq=40, checkpoint_at_end=True, max_failures=1000, ) return common.Config( name=NAME, agent=agent_config, interface=interface_config, policy=policy_config, learning=learning_config, other=other_config, trainer=DQNTrainer, spec={ "obs": observation_space, "act": action_space }, )
from smarts.core.sensors import Observation _WIDTH = 64 _HEIGHT = 64 _STACK = 4 _RESOLUTION = 50 / 64 # The space of the adapted observation. gym_space: gym.Space = gym.spaces.Box(low=0.0, high=1.0, shape=(_STACK, _HEIGHT, _WIDTH), dtype=np.float32) # This adapter requires SMARTS to pass the top-down RGB image in the agent's # observation. required_interface = { "rgb": RGB(width=_WIDTH, height=_HEIGHT, resolution=_RESOLUTION) } def adapt(observation: Observation) -> np.ndarray: """Adapts a raw environment observation into a numpy.ndarray. The raw observation from the environment must include the top-down RGB image. See smarts.core.sensors for more information on the Observation type. Args: observation (Observation): The raw environment observation received from SMARTS. Returns: np.ndarray: A numpy.ndarray of size (_STACK, _HEIGHT, _WIDTH) that is the gray-scale image of the top-down RGB image. The gray-scale value for each
def _make_rllib_config(config, mode="training"): """Generate agent configuration. `mode` can be `train` or 'evaluation', and the only difference on the generated configuration is the agent info adapter. """ agent_config = config["agent"] interface_config = config["interface"] """ Parse the state configuration for agent """ state_config = agent_config["state"] # initialize environment wrapper if the wrapper config is not None wrapper_config = state_config.get("wrapper", {"name": "Simple"}) features_config = state_config["features"] # only for one frame, not really an observation frame_space = gym.spaces.Dict(common.subscribe_features(**features_config)) action_type = ActionSpaceType(agent_config["action"]["type"]) env_action_space = common.ActionSpace.from_type(action_type) wrapper_cls = getattr(rllib_wrappers, wrapper_config["name"]) """ Parse policy configuration """ policy_obs_space = wrapper_cls.get_observation_space( frame_space, wrapper_config) policy_action_space = wrapper_cls.get_action_space(env_action_space, wrapper_config) observation_adapter = wrapper_cls.get_observation_adapter( policy_obs_space, feature_configs=features_config, wrapper_config=wrapper_config) action_adapter = wrapper_cls.get_action_adapter(action_type, policy_action_space, wrapper_config) # policy observation space is related to the wrapper usage policy_config = ( None, policy_obs_space, policy_action_space, config["policy"].get( "config", {"custom_preprocessor": wrapper_cls.get_preprocessor()}), ) """ Parse agent interface configuration """ if interface_config.get("neighborhood_vehicles"): interface_config["neighborhood_vehicles"] = NeighborhoodVehicles( **interface_config["neighborhood_vehicles"]) if interface_config.get("waypoints"): interface_config["waypoints"] = Waypoints( **interface_config["waypoints"]) if interface_config.get("rgb"): interface_config["rgb"] = RGB(**interface_config["rgb"]) if interface_config.get("ogm"): interface_config["ogm"] = OGM(**interface_config["ogm"]) interface_config["action"] = ActionSpaceType(action_type) """ Pack environment configuration """ config["run"]["config"].update({"env": wrapper_cls}) config["env_config"] = { "custom_config": { **wrapper_config, "reward_adapter": wrapper_cls.get_reward_adapter(observation_adapter), "observation_adapter": observation_adapter, "action_adapter": action_adapter, "info_adapter": metrics.agent_info_adapter if mode == "evaluation" else None, "observation_space": policy_obs_space, "action_space": policy_action_space, }, } config["agent"] = {"interface": AgentInterface(**interface_config)} config["trainer"] = _get_trainer(**config["policy"]["trainer"]) config["policy"] = policy_config print(format.pretty_dict(config)) return config
def _make_rllib_config(config, mode="train"): """ Generate agent configuration. `mode` can be `train` or 'evaluation', and the only difference on the generated configuration is the agent info adapter. """ agent = config["agent"] state_config = agent["state"] # initialize environment wrapper if the wrapper config is not None wrapper_config = state_config["wrapper"] wrapper = ( getattr(rllib_wrappers, wrapper_config["name"]) if wrapper_config else None ) features = state_config["features"] # only for one frame, not really an observation frame_space = gym.spaces.Dict(common.subscribe_features(**features)) action_type = agent["action"]["type"] action_space = common.ActionSpace.from_type(action_type) # policy observation space is related to the wrapper usage policy_obs_space = _get_policy_observation_space( wrapper, frame_space, wrapper_config ) policy_config = ( None, policy_obs_space, action_space, config["policy"].get("config", {}), ) interface = config["interface"] if interface.get("neighborhood_vehicles"): interface["neighborhood_vehicles"] = NeighborhoodVehicles( **interface["neighborhood_vehicles"] ) if interface.get("waypoints"): interface["waypoints"] = Waypoints(**interface["waypoints"]) if interface.get("rgb"): interface["rgb"] = RGB(**interface["rgb"]) if interface.get("ogm"): interface["ogm"] = OGM(**interface["ogm"]) interface["action"] = ActionSpaceType(action_type) adapter_type = "vanilla" if wrapper == rllib_wrappers.FrameStack else "single_frame" agent_config = dict( action_adapter=common.ActionAdapter.from_type(action_type), info_adapter=metrics.agent_info_adapter if mode == "evaluate" else common.default_info_adapter, ) adapter_type = ( "stack_frame" if wrapper == rllib_wrappers.FrameStack else "single_frame" ) if agent.get("adapter_type", {}) != {}: adapter_type = agent.get("adapter_type", {}) observation_adapter = common.get_observation_adapter( frame_space, adapter_type, wrapper=wrapper, feature_configs=features ) env_config = dict() wrapper_config["parameters"] = { "observation_adapter": observation_adapter, "reward_adapter": common.get_reward_adapter(observation_adapter, adapter_type), "base_env_cls": RLlibHiWayEnv, } env_config.update(**wrapper_config["parameters"]) config["run"]["config"].update({"env": wrapper}) config["env_config"] = env_config config["agent"] = agent_config config["interface"] = interface config["trainer"] = _get_trainer(**config["policy"]["trainer"]) config["policy"] = policy_config pprint.pprint(config) return config
return ret def action_adapter(model_action): assert len(model_action) == 3 return np.asarray(model_action) def info_adapter(reward, info): return info agent_interface = AgentInterface( max_episode_steps=None, waypoints=True, # neighborhood < 60m neighborhood_vehicles=NeighborhoodVehicles(radius=60), # OGM within 64 * 0.25 = 16 ogm=OGM(64, 64, 0.25), rgb=RGB(256, 256, 50 / 256), action=ActionSpaceType.ActuatorDynamic, ) agent_spec = AgentSpec( interface=agent_interface, observation_adapter=observation_adapter, reward_adapter=reward_adapter, action_adapter=action_adapter, info_adapter=info_adapter, )
def test_observations_stacking(self): EPISODES = 3 WIDTH = 64 HEIGHT = WIDTH RESOLUTION = 50 / WIDTH ENVIRONMENT_STACK_SIZE = 4 agent_spec = AgentSpec( interface=AgentInterface( waypoints=Waypoints(lookahead=1), neighborhood_vehicles=NeighborhoodVehicles(radius=10.0), rgb=RGB(width=WIDTH, height=HEIGHT, resolution=RESOLUTION), action=ActionSpaceType.Lane, ), agent_builder=TestLaneAgent, ) agent = agent_spec.build_agent() environment = gym.make( "ultra.env:ultra-v0", agent_specs={AGENT_ID: agent_spec}, scenario_info=("00", "easy"), headless=True, timestep_sec=0.1, seed=2, ) def check_environment_observations_stack(environment): self.assertIsInstance(environment.smarts_observations_stack, deque) self.assertEqual( len(environment.smarts_observations_stack), ENVIRONMENT_STACK_SIZE ) self.assertIsInstance(environment.smarts_observations_stack[0], dict) self.assertTrue( all( str(environment.smarts_observations_stack[0]) == str(observations) for observations in environment.smarts_observations_stack ) ) def check_stacked_observations(environment, observations): self.assertIn(AGENT_ID, observations) self.assertTrue(AGENT_ID, observations[AGENT_ID].top_down_rgb) self.assertIsInstance(observations[AGENT_ID].top_down_rgb, TopDownRGB) self.assertEqual( observations[AGENT_ID].top_down_rgb.metadata, environment.smarts_observations_stack[-1][ AGENT_ID ].top_down_rgb.metadata, ) self.assertEqual( observations[AGENT_ID].top_down_rgb.data.shape, (ENVIRONMENT_STACK_SIZE, HEIGHT, WIDTH, 3), ) # Ensure the stacked observation's TopDownRGB data is in the same order, and # and contains the same NumPy arrays as the environment's observation stack. self.assertTrue( all( np.array_equal( observations_from_stack[AGENT_ID].top_down_rgb.data, observations[AGENT_ID].top_down_rgb.data[i], ) for i, observations_from_stack in enumerate( environment.smarts_observations_stack ) ) ) for _ in range(EPISODES): dones = {"__all__": False} observations = environment.reset() check_environment_observations_stack(environment) check_stacked_observations(environment, observations) while not dones["__all__"]: action = agent.act(observations[AGENT_ID]) observations, _, dones, _ = environment.step({AGENT_ID: action}) check_stacked_observations(environment, observations) environment.close()