Esempio n. 1
0
def _make_agent_specs(num_agent):
    agent_specs = {
        "AGENT_" + str(agent_id): AgentSpec(
            interface=AgentInterface(
                rgb=RGB(),
                action=ActionSpaceType.Lane,
            ),
            agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"),
            observation_adapter=lambda obs: obs.top_down_rgb.data,
            reward_adapter=lambda obs, reward: reward,
            info_adapter=lambda obs, reward, info: info["score"],
        )
        for agent_id in range(num_agent)
    }

    obs_space = gym.spaces.Dict({
        "AGENT_" + str(agent_id): gym.spaces.Box(
            low=0,
            high=255,
            shape=(
                agent_specs["AGENT_" + str(agent_id)].interface.rgb.width,
                agent_specs["AGENT_" + str(agent_id)].interface.rgb.height,
                3,
            ),
            dtype=np.uint8,
        )
        for agent_id in range(num_agent)
    })

    return agent_specs, obs_space
Esempio n. 2
0
def agent_specs():
    def observation_adapter(env_observation):
        return env_observation.top_down_rgb.data

    return {
        "AGENT_" + agent_id: AgentSpec(
            interface=AgentInterface(
                rgb=RGB(),
                action=ActionSpaceType.Lane,
            ),
            agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"),
            observation_adapter=observation_adapter,
        )
        for agent_id in ["001", "002"]
    }
Esempio n. 3
0
def agent_specs():
    return {
        "Agent_" + agent_id: AgentSpec(
            interface=AgentInterface(
                rgb=RGB(width=256, height=256, resolution=50 / 256),
                action=ActionSpaceType.Lane,
                max_episode_steps=3,
            ),
            agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"),
            observation_adapter=lambda obs: obs.top_down_rgb.data,
            reward_adapter=lambda obs, reward: reward,
            info_adapter=lambda obs, reward, info: info["score"],
        )
        for agent_id in ["1", "2"]
    }
Esempio n. 4
0
def _make_agent_specs(topdown_rgb):
    if topdown_rgb == "rgb":
        rgb = RGB()
    elif topdown_rgb == "false":
        rgb = False

    return {
        "AGENT_" + agent_id: AgentSpec(
            interface=AgentInterface(
                rgb=rgb,
                action=ActionSpaceType.Lane,
            ),
            agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"),
        )
        for agent_id in ["001", "002"]
    }
Esempio n. 5
0
def agent_spec():
    return AgentSpec(
        interface=AgentInterface(
            road_waypoints=RoadWaypoints(40),
            neighborhood_vehicles=NeighborhoodVehicles(
                radius=max(MAP_WIDTH * MAP_RESOLUTION, MAP_HEIGHT * MAP_RESOLUTION)
                * 0.5
            ),
            drivable_area_grid_map=DrivableAreaGridMap(
                width=MAP_WIDTH, height=MAP_HEIGHT, resolution=MAP_RESOLUTION
            ),
            ogm=OGM(width=MAP_WIDTH, height=MAP_HEIGHT, resolution=MAP_RESOLUTION),
            rgb=RGB(width=MAP_WIDTH, height=MAP_HEIGHT, resolution=MAP_RESOLUTION),
            action=ActionSpaceType.Lane,
        ),
        policy_builder=lambda: AgentPolicy.from_function(lambda _: "keep_lane"),
    )
def make_config(**kwargs):
    use_stacked_observation = kwargs.get("use_stacked_observation", False)
    use_rgb = kwargs.get("use_rgb", False)
    closest_neighbor_num = kwargs.get("max_observed_neighbors", 8)

    img_resolution = 24
    observe_lane_num = 3
    look_ahead = 10
    stack_size = 3 if use_stacked_observation else 1
    subscribed_features = dict(
        goal_relative_pos=(stack_size, 2),
        distance_to_center=(stack_size, 1),
        speed=(stack_size, 1),
        steering=(stack_size, 1),
        heading_errors=(stack_size, look_ahead),
        neighbor=(stack_size, closest_neighbor_num * 5),  # dist, speed, ttc
        img_gray=(stack_size, img_resolution,
                  img_resolution) if use_rgb else False,
    )

    action_space = common.ActionSpace.from_type(1)
    observation_space = gym.spaces.Dict(
        common.subscribe_features(**subscribed_features))

    policy_config = (
        None,
        observation_space,
        action_space,
        dict(model=dict(custom_model_config=dict(
            obs_space_dict=observation_space), )),
    )

    interface_config = dict(
        obs_stack_size=stack_size,
        neighborhood_vehicles=NeighborhoodVehicles(radius=100),
        waypoints=Waypoints(lookahead=10),
        rgb=RGB(width=256, height=256, resolution=img_resolution /
                256) if use_rgb else None,
        action=ActionSpaceType.Lane,
        road_waypoints=None,
        drivable_area_grid_map=None,
        ogm=None,
        lidar=None,
        debug=False,
    )

    observation_adapter = common.get_observation_adapter(
        observation_space,
        look_ahead=look_ahead,
        observe_lane_num=observe_lane_num,
        resize=(img_resolution, img_resolution),
        closest_neighbor_num=closest_neighbor_num,
    )

    agent_config = dict(
        observation_adapter=observation_adapter,
        reward_adapter=get_reward_adapter(observation_adapter),
        action_adapter=common.ActionAdapter.from_type(1),
        info_adapter=common.default_info_adapter,
    )

    learning_config = dict()

    other_config = dict(
        stop={"time_total_s": 2 * 60 * 60},
        checkpoint_freq=40,
        checkpoint_at_end=True,
        max_failures=1000,
    )

    return common.Config(
        name=NAME,
        agent=agent_config,
        interface=interface_config,
        policy=policy_config,
        learning=learning_config,
        other=other_config,
        trainer=DQNTrainer,
        spec={
            "obs": observation_space,
            "act": action_space
        },
    )
from smarts.core.sensors import Observation

_WIDTH = 64
_HEIGHT = 64
_STACK = 4
_RESOLUTION = 50 / 64

# The space of the adapted observation.
gym_space: gym.Space = gym.spaces.Box(low=0.0,
                                      high=1.0,
                                      shape=(_STACK, _HEIGHT, _WIDTH),
                                      dtype=np.float32)
# This adapter requires SMARTS to pass the top-down RGB image in the agent's
# observation.
required_interface = {
    "rgb": RGB(width=_WIDTH, height=_HEIGHT, resolution=_RESOLUTION)
}


def adapt(observation: Observation) -> np.ndarray:
    """Adapts a raw environment observation into a numpy.ndarray.

    The raw observation from the environment must include the top-down RGB image. See
    smarts.core.sensors for more information on the Observation type.

    Args:
        observation (Observation): The raw environment observation received from SMARTS.

    Returns:
        np.ndarray: A numpy.ndarray of size (_STACK, _HEIGHT, _WIDTH) that is the
            gray-scale image of the top-down RGB image. The gray-scale value for each
Esempio n. 8
0
def _make_rllib_config(config, mode="training"):
    """Generate agent configuration. `mode` can be `train` or 'evaluation', and the
    only difference on the generated configuration is the agent info adapter.
    """

    agent_config = config["agent"]
    interface_config = config["interface"]
    """ Parse the state configuration for agent """
    state_config = agent_config["state"]

    # initialize environment wrapper if the wrapper config is not None
    wrapper_config = state_config.get("wrapper", {"name": "Simple"})
    features_config = state_config["features"]
    # only for one frame, not really an observation
    frame_space = gym.spaces.Dict(common.subscribe_features(**features_config))
    action_type = ActionSpaceType(agent_config["action"]["type"])
    env_action_space = common.ActionSpace.from_type(action_type)
    wrapper_cls = getattr(rllib_wrappers, wrapper_config["name"])
    """ Parse policy configuration """
    policy_obs_space = wrapper_cls.get_observation_space(
        frame_space, wrapper_config)
    policy_action_space = wrapper_cls.get_action_space(env_action_space,
                                                       wrapper_config)

    observation_adapter = wrapper_cls.get_observation_adapter(
        policy_obs_space,
        feature_configs=features_config,
        wrapper_config=wrapper_config)
    action_adapter = wrapper_cls.get_action_adapter(action_type,
                                                    policy_action_space,
                                                    wrapper_config)
    # policy observation space is related to the wrapper usage
    policy_config = (
        None,
        policy_obs_space,
        policy_action_space,
        config["policy"].get(
            "config", {"custom_preprocessor": wrapper_cls.get_preprocessor()}),
    )
    """ Parse agent interface configuration """
    if interface_config.get("neighborhood_vehicles"):
        interface_config["neighborhood_vehicles"] = NeighborhoodVehicles(
            **interface_config["neighborhood_vehicles"])

    if interface_config.get("waypoints"):
        interface_config["waypoints"] = Waypoints(
            **interface_config["waypoints"])

    if interface_config.get("rgb"):
        interface_config["rgb"] = RGB(**interface_config["rgb"])

    if interface_config.get("ogm"):
        interface_config["ogm"] = OGM(**interface_config["ogm"])

    interface_config["action"] = ActionSpaceType(action_type)
    """ Pack environment configuration """
    config["run"]["config"].update({"env": wrapper_cls})
    config["env_config"] = {
        "custom_config": {
            **wrapper_config,
            "reward_adapter":
            wrapper_cls.get_reward_adapter(observation_adapter),
            "observation_adapter":
            observation_adapter,
            "action_adapter":
            action_adapter,
            "info_adapter":
            metrics.agent_info_adapter if mode == "evaluation" else None,
            "observation_space":
            policy_obs_space,
            "action_space":
            policy_action_space,
        },
    }
    config["agent"] = {"interface": AgentInterface(**interface_config)}
    config["trainer"] = _get_trainer(**config["policy"]["trainer"])
    config["policy"] = policy_config

    print(format.pretty_dict(config))

    return config
Esempio n. 9
0
def _make_rllib_config(config, mode="train"):
    """ Generate agent configuration. `mode` can be `train` or 'evaluation', and the
    only difference on the generated configuration is the agent info adapter.
    """

    agent = config["agent"]
    state_config = agent["state"]

    # initialize environment wrapper if the wrapper config is not None
    wrapper_config = state_config["wrapper"]
    wrapper = (
        getattr(rllib_wrappers, wrapper_config["name"]) if wrapper_config else None
    )

    features = state_config["features"]
    # only for one frame, not really an observation
    frame_space = gym.spaces.Dict(common.subscribe_features(**features))

    action_type = agent["action"]["type"]
    action_space = common.ActionSpace.from_type(action_type)

    # policy observation space is related to the wrapper usage
    policy_obs_space = _get_policy_observation_space(
        wrapper, frame_space, wrapper_config
    )
    policy_config = (
        None,
        policy_obs_space,
        action_space,
        config["policy"].get("config", {}),
    )

    interface = config["interface"]
    if interface.get("neighborhood_vehicles"):
        interface["neighborhood_vehicles"] = NeighborhoodVehicles(
            **interface["neighborhood_vehicles"]
        )

    if interface.get("waypoints"):
        interface["waypoints"] = Waypoints(**interface["waypoints"])

    if interface.get("rgb"):
        interface["rgb"] = RGB(**interface["rgb"])

    if interface.get("ogm"):
        interface["ogm"] = OGM(**interface["ogm"])

    interface["action"] = ActionSpaceType(action_type)

    adapter_type = "vanilla" if wrapper == rllib_wrappers.FrameStack else "single_frame"

    agent_config = dict(
        action_adapter=common.ActionAdapter.from_type(action_type),
        info_adapter=metrics.agent_info_adapter
        if mode == "evaluate"
        else common.default_info_adapter,
    )

    adapter_type = (
        "stack_frame" if wrapper == rllib_wrappers.FrameStack else "single_frame"
    )
    if agent.get("adapter_type", {}) != {}:
        adapter_type = agent.get("adapter_type", {})
    observation_adapter = common.get_observation_adapter(
        frame_space, adapter_type, wrapper=wrapper, feature_configs=features
    )

    env_config = dict()
    wrapper_config["parameters"] = {
        "observation_adapter": observation_adapter,
        "reward_adapter": common.get_reward_adapter(observation_adapter, adapter_type),
        "base_env_cls": RLlibHiWayEnv,
    }
    env_config.update(**wrapper_config["parameters"])
    config["run"]["config"].update({"env": wrapper})

    config["env_config"] = env_config
    config["agent"] = agent_config
    config["interface"] = interface
    config["trainer"] = _get_trainer(**config["policy"]["trainer"])
    config["policy"] = policy_config

    pprint.pprint(config)

    return config
Esempio n. 10
0
    return ret


def action_adapter(model_action):
    assert len(model_action) == 3
    return np.asarray(model_action)


def info_adapter(reward, info):
    return info


agent_interface = AgentInterface(
    max_episode_steps=None,
    waypoints=True,
    # neighborhood < 60m
    neighborhood_vehicles=NeighborhoodVehicles(radius=60),
    # OGM within 64 * 0.25 = 16
    ogm=OGM(64, 64, 0.25),
    rgb=RGB(256, 256, 50 / 256),
    action=ActionSpaceType.ActuatorDynamic,
)

agent_spec = AgentSpec(
    interface=agent_interface,
    observation_adapter=observation_adapter,
    reward_adapter=reward_adapter,
    action_adapter=action_adapter,
    info_adapter=info_adapter,
)
Esempio n. 11
0
    def test_observations_stacking(self):
        EPISODES = 3
        WIDTH = 64
        HEIGHT = WIDTH
        RESOLUTION = 50 / WIDTH
        ENVIRONMENT_STACK_SIZE = 4

        agent_spec = AgentSpec(
            interface=AgentInterface(
                waypoints=Waypoints(lookahead=1),
                neighborhood_vehicles=NeighborhoodVehicles(radius=10.0),
                rgb=RGB(width=WIDTH, height=HEIGHT, resolution=RESOLUTION),
                action=ActionSpaceType.Lane,
            ),
            agent_builder=TestLaneAgent,
        )
        agent = agent_spec.build_agent()

        environment = gym.make(
            "ultra.env:ultra-v0",
            agent_specs={AGENT_ID: agent_spec},
            scenario_info=("00", "easy"),
            headless=True,
            timestep_sec=0.1,
            seed=2,
        )

        def check_environment_observations_stack(environment):
            self.assertIsInstance(environment.smarts_observations_stack, deque)
            self.assertEqual(
                len(environment.smarts_observations_stack), ENVIRONMENT_STACK_SIZE
            )
            self.assertIsInstance(environment.smarts_observations_stack[0], dict)
            self.assertTrue(
                all(
                    str(environment.smarts_observations_stack[0]) == str(observations)
                    for observations in environment.smarts_observations_stack
                )
            )

        def check_stacked_observations(environment, observations):
            self.assertIn(AGENT_ID, observations)
            self.assertTrue(AGENT_ID, observations[AGENT_ID].top_down_rgb)
            self.assertIsInstance(observations[AGENT_ID].top_down_rgb, TopDownRGB)
            self.assertEqual(
                observations[AGENT_ID].top_down_rgb.metadata,
                environment.smarts_observations_stack[-1][
                    AGENT_ID
                ].top_down_rgb.metadata,
            )
            self.assertEqual(
                observations[AGENT_ID].top_down_rgb.data.shape,
                (ENVIRONMENT_STACK_SIZE, HEIGHT, WIDTH, 3),
            )
            # Ensure the stacked observation's TopDownRGB data is in the same order, and
            # and contains the same NumPy arrays as the environment's observation stack.
            self.assertTrue(
                all(
                    np.array_equal(
                        observations_from_stack[AGENT_ID].top_down_rgb.data,
                        observations[AGENT_ID].top_down_rgb.data[i],
                    )
                    for i, observations_from_stack in enumerate(
                        environment.smarts_observations_stack
                    )
                )
            )

        for _ in range(EPISODES):
            dones = {"__all__": False}
            observations = environment.reset()

            check_environment_observations_stack(environment)
            check_stacked_observations(environment, observations)

            while not dones["__all__"]:
                action = agent.act(observations[AGENT_ID])
                observations, _, dones, _ = environment.step({AGENT_ID: action})
                check_stacked_observations(environment, observations)

        environment.close()