Ejemplo n.º 1
0
def agent_spec():
    return AgentSpec(
        interface=AgentInterface(
            road_waypoints=RoadWaypoints(40),
            neighborhood_vehicles=NeighborhoodVehicles(
                radius=max(MAP_WIDTH * MAP_RESOLUTION, MAP_HEIGHT * MAP_RESOLUTION)
                * 0.5
            ),
            drivable_area_grid_map=DrivableAreaGridMap(
                width=MAP_WIDTH, height=MAP_HEIGHT, resolution=MAP_RESOLUTION
            ),
            ogm=OGM(width=MAP_WIDTH, height=MAP_HEIGHT, resolution=MAP_RESOLUTION),
            rgb=RGB(width=MAP_WIDTH, height=MAP_HEIGHT, resolution=MAP_RESOLUTION),
            action=ActionSpaceType.Lane,
        ),
        policy_builder=lambda: AgentPolicy.from_function(lambda _: "keep_lane"),
    )
Ejemplo n.º 2
0

def action_adapter(model_action):
    assert len(model_action) == 2
    throttle = np.clip(model_action[0], 0, 1)
    brake = np.abs(np.clip(model_action[0], -1, 0))
    return np.asarray([throttle, brake, model_action[1]])


def info_adapter(reward, info):
    return info


agent_interface = AgentInterface(
    max_episode_steps=None,
    waypoints=True,
    # neighborhood < 60m
    neighborhood_vehicles=NeighborhoodVehicles(radius=60),
    # OGM within 64 * 0.25 = 16
    ogm=OGM(64, 64, 0.25),
    action=ActionSpaceType.Continuous,
)

agent_spec = AgentSpec(
    interface=agent_interface,
    observation_adapter=observation_adapter,
    reward_adapter=reward_adapter,
    action_adapter=action_adapter,
    info_adapter=info_adapter,
)
Ejemplo n.º 3
0
def _make_rllib_config(config, mode="training"):
    """Generate agent configuration. `mode` can be `train` or 'evaluation', and the
    only difference on the generated configuration is the agent info adapter.
    """

    agent_config = config["agent"]
    interface_config = config["interface"]
    """ Parse the state configuration for agent """
    state_config = agent_config["state"]

    # initialize environment wrapper if the wrapper config is not None
    wrapper_config = state_config.get("wrapper", {"name": "Simple"})
    features_config = state_config["features"]
    # only for one frame, not really an observation
    frame_space = gym.spaces.Dict(common.subscribe_features(**features_config))
    action_type = ActionSpaceType(agent_config["action"]["type"])
    env_action_space = common.ActionSpace.from_type(action_type)
    wrapper_cls = getattr(rllib_wrappers, wrapper_config["name"])
    """ Parse policy configuration """
    policy_obs_space = wrapper_cls.get_observation_space(
        frame_space, wrapper_config)
    policy_action_space = wrapper_cls.get_action_space(env_action_space,
                                                       wrapper_config)

    observation_adapter = wrapper_cls.get_observation_adapter(
        policy_obs_space,
        feature_configs=features_config,
        wrapper_config=wrapper_config)
    action_adapter = wrapper_cls.get_action_adapter(action_type,
                                                    policy_action_space,
                                                    wrapper_config)
    # policy observation space is related to the wrapper usage
    policy_config = (
        None,
        policy_obs_space,
        policy_action_space,
        config["policy"].get(
            "config", {"custom_preprocessor": wrapper_cls.get_preprocessor()}),
    )
    """ Parse agent interface configuration """
    if interface_config.get("neighborhood_vehicles"):
        interface_config["neighborhood_vehicles"] = NeighborhoodVehicles(
            **interface_config["neighborhood_vehicles"])

    if interface_config.get("waypoints"):
        interface_config["waypoints"] = Waypoints(
            **interface_config["waypoints"])

    if interface_config.get("rgb"):
        interface_config["rgb"] = RGB(**interface_config["rgb"])

    if interface_config.get("ogm"):
        interface_config["ogm"] = OGM(**interface_config["ogm"])

    interface_config["action"] = ActionSpaceType(action_type)
    """ Pack environment configuration """
    config["run"]["config"].update({"env": wrapper_cls})
    config["env_config"] = {
        "custom_config": {
            **wrapper_config,
            "reward_adapter":
            wrapper_cls.get_reward_adapter(observation_adapter),
            "observation_adapter":
            observation_adapter,
            "action_adapter":
            action_adapter,
            "info_adapter":
            metrics.agent_info_adapter if mode == "evaluation" else None,
            "observation_space":
            policy_obs_space,
            "action_space":
            policy_action_space,
        },
    }
    config["agent"] = {"interface": AgentInterface(**interface_config)}
    config["trainer"] = _get_trainer(**config["policy"]["trainer"])
    config["policy"] = policy_config

    print(format.pretty_dict(config))

    return config
Ejemplo n.º 4
0
def _make_rllib_config(config, mode="train"):
    """ Generate agent configuration. `mode` can be `train` or 'evaluation', and the
    only difference on the generated configuration is the agent info adapter.
    """

    agent = config["agent"]
    state_config = agent["state"]

    # initialize environment wrapper if the wrapper config is not None
    wrapper_config = state_config["wrapper"]
    wrapper = (
        getattr(rllib_wrappers, wrapper_config["name"]) if wrapper_config else None
    )

    features = state_config["features"]
    # only for one frame, not really an observation
    frame_space = gym.spaces.Dict(common.subscribe_features(**features))

    action_type = agent["action"]["type"]
    action_space = common.ActionSpace.from_type(action_type)

    # policy observation space is related to the wrapper usage
    policy_obs_space = _get_policy_observation_space(
        wrapper, frame_space, wrapper_config
    )
    policy_config = (
        None,
        policy_obs_space,
        action_space,
        config["policy"].get("config", {}),
    )

    interface = config["interface"]
    if interface.get("neighborhood_vehicles"):
        interface["neighborhood_vehicles"] = NeighborhoodVehicles(
            **interface["neighborhood_vehicles"]
        )

    if interface.get("waypoints"):
        interface["waypoints"] = Waypoints(**interface["waypoints"])

    if interface.get("rgb"):
        interface["rgb"] = RGB(**interface["rgb"])

    if interface.get("ogm"):
        interface["ogm"] = OGM(**interface["ogm"])

    interface["action"] = ActionSpaceType(action_type)

    adapter_type = "vanilla" if wrapper == rllib_wrappers.FrameStack else "single_frame"

    agent_config = dict(
        action_adapter=common.ActionAdapter.from_type(action_type),
        info_adapter=metrics.agent_info_adapter
        if mode == "evaluate"
        else common.default_info_adapter,
    )

    adapter_type = (
        "stack_frame" if wrapper == rllib_wrappers.FrameStack else "single_frame"
    )
    if agent.get("adapter_type", {}) != {}:
        adapter_type = agent.get("adapter_type", {})
    observation_adapter = common.get_observation_adapter(
        frame_space, adapter_type, wrapper=wrapper, feature_configs=features
    )

    env_config = dict()
    wrapper_config["parameters"] = {
        "observation_adapter": observation_adapter,
        "reward_adapter": common.get_reward_adapter(observation_adapter, adapter_type),
        "base_env_cls": RLlibHiWayEnv,
    }
    env_config.update(**wrapper_config["parameters"])
    config["run"]["config"].update({"env": wrapper})

    config["env_config"] = env_config
    config["agent"] = agent_config
    config["interface"] = interface
    config["trainer"] = _get_trainer(**config["policy"]["trainer"])
    config["policy"] = policy_config

    pprint.pprint(config)

    return config