Beispiel #1
0
def _intrfcs_init():
    return [
        [{"accelerometer": True}, {"accelerometer": False}],
        [{"drivable_area_grid_map": True}] * 2,
        [{"lidar": True}] * 2,
        [{"neighborhood_vehicles": True}] * 2,
        [{"ogm": True}] * 2,
        [{"rgb": True}] * 2,
        [{"waypoints": Waypoints(lookahead=1)}] * 2,
        [{"neighborhood_vehicles": True, "waypoints": Waypoints(lookahead=1)}] * 2,
    ]
Beispiel #2
0
def _intrfcs_obs():
    base_intrfc = {
        "accelerometer": True,
        "drivable_area_grid_map": True,
        "lidar": True,
        "neighborhood_vehicles": True,
        "ogm": True,
        "rgb": True,
        "waypoints": Waypoints(lookahead=1),
    }

    return [
        [base_intrfc] * 2,
        [dict(base_intrfc, **{"accelerometer": False})] * 2,
        [dict(base_intrfc, **{"waypoints": Waypoints(lookahead=50)})] * 2,
    ]
Beispiel #3
0
 def __new__(
     self,
     policy_class,
     action_type,
     checkpoint_dir=None,
     task=None,
     max_episode_steps=1200,
     experiment_dir=None,
 ):
     if experiment_dir:
         print(f"LOADING SPEC from {experiment_dir}/spec.pkl")
         with open(f"{experiment_dir}/spec.pkl", "rb") as input:
             spec = dill.load(input)
             new_spec = AgentSpec(
                 interface=spec.interface,
                 agent_params=dict(
                     policy_params=spec.agent_params["policy_params"],
                     checkpoint_dir=checkpoint_dir,
                 ),
                 agent_builder=spec.policy_builder,
                 observation_adapter=spec.observation_adapter,
                 reward_adapter=spec.reward_adapter,
             )
             spec = new_spec
     else:
         adapter = BaselineAdapter()
         policy_dir = "/".join(
             inspect.getfile(policy_class).split("/")[:-1])
         policy_params = load_yaml(f"{policy_dir}/params.yaml")
         spec = AgentSpec(
             interface=AgentInterface(
                 waypoints=Waypoints(lookahead=20),
                 neighborhood_vehicles=NeighborhoodVehicles(200),
                 action=action_type,
                 rgb=False,
                 max_episode_steps=max_episode_steps,
                 debug=True,
             ),
             agent_params=dict(policy_params=policy_params,
                               checkpoint_dir=checkpoint_dir),
             agent_builder=policy_class,
             observation_adapter=adapter.observation_adapter,
             reward_adapter=adapter.reward_adapter,
         )
     return spec
Beispiel #4
0
def prepare_test_agent_and_environment(
    required_interface: Dict[str, Any],
    action_adapter: Callable = lambda action: action,
    info_adapter: Callable = lambda observation, reward, info: info,
    observation_adapter: Callable = lambda observation: observation,
    reward_adapter: Callable = lambda _, reward: reward,
    headless: bool = True,
) -> Tuple[Agent, UltraEnv]:
    if "waypoints" not in required_interface:
        required_interface["waypoints"] = Waypoints(lookahead=20)
    if "neighborhood_vehicles" not in required_interface:
        required_interface["neighborhood_vehicles"] = NeighborhoodVehicles(
            radius=200)
    if "action" not in required_interface:
        required_interface["action"] = ActionSpaceType.Lane

    agent_spec = AgentSpec(
        interface=AgentInterface(**required_interface),
        agent_builder=RandomAgent,
        agent_params={"action_type": required_interface["action"]},
        action_adapter=action_adapter,
        info_adapter=info_adapter,
        observation_adapter=observation_adapter,
        reward_adapter=reward_adapter,
    )
    agent = agent_spec.build_agent()

    environment = gym.make(
        "ultra.env:ultra-v0",
        agent_specs={AGENT_ID: agent_spec},
        scenario_info=("00", "easy"),
        headless=headless,
        timestep_sec=TIMESTEP_SEC,
        seed=SEED,
    )

    return agent, environment
Beispiel #5
0
def train(
    task,
    num_episodes,
    max_episode_steps,
    rollout_fragment_length,
    policy,
    eval_info,
    timestep_sec,
    headless,
    seed,
    train_batch_size,
    sgd_minibatch_size,
    log_dir,
):
    agent_name = policy
    policy_params = load_yaml(
        f"ultra/baselines/{agent_name}/{agent_name}/params.yaml")

    action_type = adapters.type_from_string(policy_params["action_type"])
    observation_type = adapters.type_from_string(
        policy_params["observation_type"])
    reward_type = adapters.type_from_string(policy_params["reward_type"])

    if action_type != adapters.AdapterType.DefaultActionContinuous:
        raise Exception(
            f"RLlib training only supports the "
            f"{adapters.AdapterType.DefaultActionContinuous} action type.")
    if observation_type != adapters.AdapterType.DefaultObservationVector:
        # NOTE: The SMARTS observations adaptation that is done in ULTRA's Gym
        #       environment is not done in ULTRA's RLlib environment. If other
        #       observation adapters are used, they may raise an Exception.
        raise Exception(
            f"RLlib training only supports the "
            f"{adapters.AdapterType.DefaultObservationVector} observation type."
        )

    action_space = adapters.space_from_type(adapter_type=action_type)
    observation_space = adapters.space_from_type(adapter_type=observation_type)

    action_adapter = adapters.adapter_from_type(adapter_type=action_type)
    info_adapter = adapters.adapter_from_type(
        adapter_type=adapters.AdapterType.DefaultInfo)
    observation_adapter = adapters.adapter_from_type(
        adapter_type=observation_type)
    reward_adapter = adapters.adapter_from_type(adapter_type=reward_type)

    params_seed = policy_params["seed"]
    encoder_key = policy_params["social_vehicles"]["encoder_key"]
    num_social_features = observation_space["social_vehicles"].shape[1]
    social_capacity = observation_space["social_vehicles"].shape[0]
    social_policy_hidden_units = int(policy_params["social_vehicles"].get(
        "social_policy_hidden_units", 0))
    social_policy_init_std = int(policy_params["social_vehicles"].get(
        "social_policy_init_std", 0))
    social_vehicle_config = get_social_vehicle_configs(
        encoder_key=encoder_key,
        num_social_features=num_social_features,
        social_capacity=social_capacity,
        seed=params_seed,
        social_policy_hidden_units=social_policy_hidden_units,
        social_policy_init_std=social_policy_init_std,
    )

    ModelCatalog.register_custom_model("fc_model", CustomFCModel)
    config = RllibAgent.rllib_default_config(agent_name)

    rllib_policies = {
        "default_policy": (
            None,
            observation_space,
            action_space,
            {
                "model": {
                    "custom_model": "fc_model",
                    "custom_model_config": {
                        "social_vehicle_config": social_vehicle_config
                    },
                }
            },
        )
    }
    agent_specs = {
        "AGENT-007":
        AgentSpec(
            interface=AgentInterface(
                waypoints=Waypoints(lookahead=20),
                neighborhood_vehicles=NeighborhoodVehicles(200),
                action=ActionSpaceType.Continuous,
                rgb=False,
                max_episode_steps=max_episode_steps,
                debug=True,
            ),
            agent_params={},
            agent_builder=None,
            action_adapter=action_adapter,
            info_adapter=info_adapter,
            observation_adapter=observation_adapter,
            reward_adapter=reward_adapter,
        )
    }

    tune_config = {
        "env": RLlibUltraEnv,
        "log_level": "WARN",
        "callbacks": Callbacks,
        "framework": "torch",
        "num_workers": 1,
        "train_batch_size": train_batch_size,
        "sgd_minibatch_size": sgd_minibatch_size,
        "rollout_fragment_length": rollout_fragment_length,
        "in_evaluation": True,
        "evaluation_num_episodes": eval_info["eval_episodes"],
        "evaluation_interval": eval_info[
            "eval_rate"],  # Evaluation occurs after # of eval-intervals (episodes)
        "evaluation_config": {
            "env_config": {
                "seed": seed,
                "scenario_info": task,
                "headless": headless,
                "eval_mode": True,
                "ordered_scenarios": False,
                "agent_specs": agent_specs,
                "timestep_sec": timestep_sec,
            },
            "explore": False,
        },
        "env_config": {
            "seed": seed,
            "scenario_info": task,
            "headless": headless,
            "eval_mode": False,
            "ordered_scenarios": False,
            "agent_specs": agent_specs,
            "timestep_sec": timestep_sec,
        },
        "multiagent": {
            "policies": rllib_policies
        },
    }

    config.update(tune_config)
    agent = RllibAgent(
        agent_name=agent_name,
        env=RLlibUltraEnv,
        config=tune_config,
        logger_creator=log_creator(log_dir),
    )

    # Iteration value in trainer.py (self._iterations) is the technically the number of episodes
    for i in range(num_episodes):
        results = agent.train()
        agent.log_evaluation_metrics(
            results)  # Evaluation metrics will now be displayed on Tensorboard
def make_config(**kwargs):
    use_stacked_observation = kwargs.get("use_stacked_observation", False)
    use_rgb = kwargs.get("use_rgb", False)
    closest_neighbor_num = kwargs.get("max_observed_neighbors", 8)

    img_resolution = 24
    observe_lane_num = 3
    look_ahead = 10
    stack_size = 3 if use_stacked_observation else 1
    subscribed_features = dict(
        goal_relative_pos=(stack_size, 2),
        distance_to_center=(stack_size, 1),
        speed=(stack_size, 1),
        steering=(stack_size, 1),
        heading_errors=(stack_size, look_ahead),
        neighbor=(stack_size, closest_neighbor_num * 5),  # dist, speed, ttc
        img_gray=(stack_size, img_resolution,
                  img_resolution) if use_rgb else False,
    )

    action_space = common.ActionSpace.from_type(1)
    observation_space = gym.spaces.Dict(
        common.subscribe_features(**subscribed_features))

    policy_config = (
        None,
        observation_space,
        action_space,
        dict(model=dict(custom_model_config=dict(
            obs_space_dict=observation_space), )),
    )

    interface_config = dict(
        obs_stack_size=stack_size,
        neighborhood_vehicles=NeighborhoodVehicles(radius=100),
        waypoints=Waypoints(lookahead=10),
        rgb=RGB(width=256, height=256, resolution=img_resolution /
                256) if use_rgb else None,
        action=ActionSpaceType.Lane,
        road_waypoints=None,
        drivable_area_grid_map=None,
        ogm=None,
        lidar=None,
        debug=False,
    )

    observation_adapter = common.get_observation_adapter(
        observation_space,
        look_ahead=look_ahead,
        observe_lane_num=observe_lane_num,
        resize=(img_resolution, img_resolution),
        closest_neighbor_num=closest_neighbor_num,
    )

    agent_config = dict(
        observation_adapter=observation_adapter,
        reward_adapter=get_reward_adapter(observation_adapter),
        action_adapter=common.ActionAdapter.from_type(1),
        info_adapter=common.default_info_adapter,
    )

    learning_config = dict()

    other_config = dict(
        stop={"time_total_s": 2 * 60 * 60},
        checkpoint_freq=40,
        checkpoint_at_end=True,
        max_failures=1000,
    )

    return common.Config(
        name=NAME,
        agent=agent_config,
        interface=interface_config,
        policy=policy_config,
        learning=learning_config,
        other=other_config,
        trainer=DQNTrainer,
        spec={
            "obs": observation_space,
            "act": action_space
        },
    )
import numpy as np
from scipy.spatial import distance

from smarts.core.agent_interface import NeighborhoodVehicles, Waypoints
from smarts.core.sensors import Observation
from ultra.adapters.constants import DEFAULT_RADIUS, DEFAULT_WAYPOINTS
import ultra.adapters.default_reward_adapter as default_reward_adapter
from ultra.utils.common import ego_social_safety, get_closest_waypoint, get_path_to_goal


_WAYPOINTS = DEFAULT_WAYPOINTS
_RADIUS = DEFAULT_RADIUS


required_interface = {
    "waypoints": Waypoints(lookahead=_WAYPOINTS),
    "neighborhood_vehicles": NeighborhoodVehicles(radius=_RADIUS),
}


def adapt(
    observation: Observation, reward: float, info: Dict[str, Any]
) -> Dict[str, Any]:
    """Adapts a raw environment observation, an environment reward, and info about the
    agent from the environment into custom information about the agent.

    The raw observation from the environment must include the ego vehicle's state,
    events, waypoint paths, and neighborhood vehicles. See smarts.core.sensors for more
    information on the Observation type.

    Args:
Beispiel #8
0
def _make_rllib_config(config, mode="training"):
    """Generate agent configuration. `mode` can be `train` or 'evaluation', and the
    only difference on the generated configuration is the agent info adapter.
    """

    agent_config = config["agent"]
    interface_config = config["interface"]
    """ Parse the state configuration for agent """
    state_config = agent_config["state"]

    # initialize environment wrapper if the wrapper config is not None
    wrapper_config = state_config.get("wrapper", {"name": "Simple"})
    features_config = state_config["features"]
    # only for one frame, not really an observation
    frame_space = gym.spaces.Dict(common.subscribe_features(**features_config))
    action_type = ActionSpaceType(agent_config["action"]["type"])
    env_action_space = common.ActionSpace.from_type(action_type)
    wrapper_cls = getattr(rllib_wrappers, wrapper_config["name"])
    """ Parse policy configuration """
    policy_obs_space = wrapper_cls.get_observation_space(
        frame_space, wrapper_config)
    policy_action_space = wrapper_cls.get_action_space(env_action_space,
                                                       wrapper_config)

    observation_adapter = wrapper_cls.get_observation_adapter(
        policy_obs_space,
        feature_configs=features_config,
        wrapper_config=wrapper_config)
    action_adapter = wrapper_cls.get_action_adapter(action_type,
                                                    policy_action_space,
                                                    wrapper_config)
    # policy observation space is related to the wrapper usage
    policy_config = (
        None,
        policy_obs_space,
        policy_action_space,
        config["policy"].get(
            "config", {"custom_preprocessor": wrapper_cls.get_preprocessor()}),
    )
    """ Parse agent interface configuration """
    if interface_config.get("neighborhood_vehicles"):
        interface_config["neighborhood_vehicles"] = NeighborhoodVehicles(
            **interface_config["neighborhood_vehicles"])

    if interface_config.get("waypoints"):
        interface_config["waypoints"] = Waypoints(
            **interface_config["waypoints"])

    if interface_config.get("rgb"):
        interface_config["rgb"] = RGB(**interface_config["rgb"])

    if interface_config.get("ogm"):
        interface_config["ogm"] = OGM(**interface_config["ogm"])

    interface_config["action"] = ActionSpaceType(action_type)
    """ Pack environment configuration """
    config["run"]["config"].update({"env": wrapper_cls})
    config["env_config"] = {
        "custom_config": {
            **wrapper_config,
            "reward_adapter":
            wrapper_cls.get_reward_adapter(observation_adapter),
            "observation_adapter":
            observation_adapter,
            "action_adapter":
            action_adapter,
            "info_adapter":
            metrics.agent_info_adapter if mode == "evaluation" else None,
            "observation_space":
            policy_obs_space,
            "action_space":
            policy_action_space,
        },
    }
    config["agent"] = {"interface": AgentInterface(**interface_config)}
    config["trainer"] = _get_trainer(**config["policy"]["trainer"])
    config["policy"] = policy_config

    print(format.pretty_dict(config))

    return config
Beispiel #9
0
    def __new__(
        self,
        policy_class,
        action_type,
        checkpoint_dir=None,
        task=None,
        max_episode_steps=1200,
        experiment_dir=None,
        agent_id="",
    ):
        if experiment_dir:
            print(
                f"Loading spec for {agent_id} from {experiment_dir}/agent_metadata.pkl"
            )
            with open(f"{experiment_dir}/agent_metadata.pkl",
                      "rb") as metadata_file:
                agent_metadata = dill.load(metadata_file)
                spec = agent_metadata["agent_specs"][agent_id]

                new_spec = AgentSpec(
                    interface=spec.interface,
                    agent_params=dict(
                        policy_params=spec.agent_params["policy_params"],
                        checkpoint_dir=checkpoint_dir,
                    ),
                    agent_builder=spec.policy_builder,
                    observation_adapter=spec.observation_adapter,
                    reward_adapter=spec.reward_adapter,
                )

                spec = new_spec
        else:
            base_dir = os.path.join(os.path.dirname(__file__), "../")
            pool_path = os.path.join(base_dir, "agent_pool.json")

            policy_class_name = policy_class.__name__
            agent_name = None

            with open(pool_path, "r") as f:
                data = json.load(f)
                agents = data["agents"].keys()
                for agent in agents:
                    if data["agents"][agent]["class"] == policy_class_name:
                        agent_name = data["agents"][agent]["name"]
                        break

            assert agent_name != None

            adapter = BaselineAdapter(agent_name)
            spec = AgentSpec(
                interface=AgentInterface(
                    waypoints=Waypoints(lookahead=20),
                    neighborhood_vehicles=NeighborhoodVehicles(200),
                    action=action_type,
                    rgb=False,
                    max_episode_steps=max_episode_steps,
                    debug=True,
                ),
                agent_params=dict(policy_params=adapter.policy_params,
                                  checkpoint_dir=checkpoint_dir),
                agent_builder=policy_class,
                observation_adapter=adapter.observation_adapter,
                reward_adapter=adapter.reward_adapter,
            )
        return spec
# THE SOFTWARE.
import math

import numpy as np
from scipy.spatial import distance

from smarts.core.agent_interface import Waypoints
from smarts.core.sensors import Observation
from ultra.adapters.constants import DEFAULT_WAYPOINTS
from ultra.utils.common import get_closest_waypoint, get_path_to_goal

_WAYPOINTS = DEFAULT_WAYPOINTS

# This adapter requires SMARTS to pass the next _WAYPOINTS waypoints in the agent's
# observation.
required_interface = {"waypoints": Waypoints(lookahead=_WAYPOINTS)}


def adapt(observation: Observation, reward: float) -> float:
    """Adapts a raw environment observation and an environment reward to a custom reward
    of type float.

    The raw observation from the environment must include the ego vehicle's state,
    events, and waypoint paths. See smarts.core.sensors for more information on the
    Observation type.

    Args:
        observation (Observation): The raw environment observation received from SMARTS.
        reward (float): The environment reward received from SMARTS.

    Returns:
Beispiel #11
0
def _make_rllib_config(config, mode="train"):
    """ Generate agent configuration. `mode` can be `train` or 'evaluation', and the
    only difference on the generated configuration is the agent info adapter.
    """

    agent = config["agent"]
    state_config = agent["state"]

    # initialize environment wrapper if the wrapper config is not None
    wrapper_config = state_config["wrapper"]
    wrapper = (
        getattr(rllib_wrappers, wrapper_config["name"]) if wrapper_config else None
    )

    features = state_config["features"]
    # only for one frame, not really an observation
    frame_space = gym.spaces.Dict(common.subscribe_features(**features))

    action_type = agent["action"]["type"]
    action_space = common.ActionSpace.from_type(action_type)

    # policy observation space is related to the wrapper usage
    policy_obs_space = _get_policy_observation_space(
        wrapper, frame_space, wrapper_config
    )
    policy_config = (
        None,
        policy_obs_space,
        action_space,
        config["policy"].get("config", {}),
    )

    interface = config["interface"]
    if interface.get("neighborhood_vehicles"):
        interface["neighborhood_vehicles"] = NeighborhoodVehicles(
            **interface["neighborhood_vehicles"]
        )

    if interface.get("waypoints"):
        interface["waypoints"] = Waypoints(**interface["waypoints"])

    if interface.get("rgb"):
        interface["rgb"] = RGB(**interface["rgb"])

    if interface.get("ogm"):
        interface["ogm"] = OGM(**interface["ogm"])

    interface["action"] = ActionSpaceType(action_type)

    adapter_type = "vanilla" if wrapper == rllib_wrappers.FrameStack else "single_frame"

    agent_config = dict(
        action_adapter=common.ActionAdapter.from_type(action_type),
        info_adapter=metrics.agent_info_adapter
        if mode == "evaluate"
        else common.default_info_adapter,
    )

    adapter_type = (
        "stack_frame" if wrapper == rllib_wrappers.FrameStack else "single_frame"
    )
    if agent.get("adapter_type", {}) != {}:
        adapter_type = agent.get("adapter_type", {})
    observation_adapter = common.get_observation_adapter(
        frame_space, adapter_type, wrapper=wrapper, feature_configs=features
    )

    env_config = dict()
    wrapper_config["parameters"] = {
        "observation_adapter": observation_adapter,
        "reward_adapter": common.get_reward_adapter(observation_adapter, adapter_type),
        "base_env_cls": RLlibHiWayEnv,
    }
    env_config.update(**wrapper_config["parameters"])
    config["run"]["config"].update({"env": wrapper})

    config["env_config"] = env_config
    config["agent"] = agent_config
    config["interface"] = interface
    config["trainer"] = _get_trainer(**config["policy"]["trainer"])
    config["policy"] = policy_config

    pprint.pprint(config)

    return config
Beispiel #12
0
    def test_observations_stacking(self):
        EPISODES = 3
        WIDTH = 64
        HEIGHT = WIDTH
        RESOLUTION = 50 / WIDTH
        ENVIRONMENT_STACK_SIZE = 4

        agent_spec = AgentSpec(
            interface=AgentInterface(
                waypoints=Waypoints(lookahead=1),
                neighborhood_vehicles=NeighborhoodVehicles(radius=10.0),
                rgb=RGB(width=WIDTH, height=HEIGHT, resolution=RESOLUTION),
                action=ActionSpaceType.Lane,
            ),
            agent_builder=TestLaneAgent,
        )
        agent = agent_spec.build_agent()

        environment = gym.make(
            "ultra.env:ultra-v0",
            agent_specs={AGENT_ID: agent_spec},
            scenario_info=("00", "easy"),
            headless=True,
            timestep_sec=0.1,
            seed=2,
        )

        def check_environment_observations_stack(environment):
            self.assertIsInstance(environment.smarts_observations_stack, deque)
            self.assertEqual(
                len(environment.smarts_observations_stack), ENVIRONMENT_STACK_SIZE
            )
            self.assertIsInstance(environment.smarts_observations_stack[0], dict)
            self.assertTrue(
                all(
                    str(environment.smarts_observations_stack[0]) == str(observations)
                    for observations in environment.smarts_observations_stack
                )
            )

        def check_stacked_observations(environment, observations):
            self.assertIn(AGENT_ID, observations)
            self.assertTrue(AGENT_ID, observations[AGENT_ID].top_down_rgb)
            self.assertIsInstance(observations[AGENT_ID].top_down_rgb, TopDownRGB)
            self.assertEqual(
                observations[AGENT_ID].top_down_rgb.metadata,
                environment.smarts_observations_stack[-1][
                    AGENT_ID
                ].top_down_rgb.metadata,
            )
            self.assertEqual(
                observations[AGENT_ID].top_down_rgb.data.shape,
                (ENVIRONMENT_STACK_SIZE, HEIGHT, WIDTH, 3),
            )
            # Ensure the stacked observation's TopDownRGB data is in the same order, and
            # and contains the same NumPy arrays as the environment's observation stack.
            self.assertTrue(
                all(
                    np.array_equal(
                        observations_from_stack[AGENT_ID].top_down_rgb.data,
                        observations[AGENT_ID].top_down_rgb.data[i],
                    )
                    for i, observations_from_stack in enumerate(
                        environment.smarts_observations_stack
                    )
                )
            )

        for _ in range(EPISODES):
            dones = {"__all__": False}
            observations = environment.reset()

            check_environment_observations_stack(environment)
            check_stacked_observations(environment, observations)

            while not dones["__all__"]:
                action = agent.act(observations[AGENT_ID])
                observations, _, dones, _ = environment.step({AGENT_ID: action})
                check_stacked_observations(environment, observations)

        environment.close()
Beispiel #13
0
def train(
    task,
    num_episodes,
    max_episode_steps,
    rollout_fragment_length,
    policy,
    eval_info,
    timestep_sec,
    headless,
    seed,
    train_batch_size,
    sgd_minibatch_size,
    log_dir,
):

    agent_name = policy
    adapter = BaselineAdapter(agent_name)
    ModelCatalog.register_custom_model("fc_model", CustomFCModel)
    config = RllibAgent.rllib_default_config(agent_name)

    rllib_policies = {
        "default_policy": (
            None,
            adapter.observation_space,
            adapter.action_space,
            {
                "model": {
                    "custom_model": "fc_model",
                    "custom_model_config": {
                        "adapter": adapter
                    },
                }
            },
        )
    }
    agent_specs = {
        "AGENT-007":
        AgentSpec(
            interface=AgentInterface(
                waypoints=Waypoints(lookahead=20),
                neighborhood_vehicles=NeighborhoodVehicles(200),
                action=ActionSpaceType.Continuous,
                rgb=False,
                max_episode_steps=max_episode_steps,
                debug=True,
            ),
            agent_params={},
            agent_builder=None,
            observation_adapter=adapter.observation_adapter,
            reward_adapter=adapter.reward_adapter,
            # action_adapter=adapter.action_adapter,
        )
    }

    tune_config = {
        "env": RLlibUltraEnv,
        "log_level": "WARN",
        "callbacks": Callbacks,
        "framework": "torch",
        "num_workers": 1,
        "train_batch_size": train_batch_size,
        "sgd_minibatch_size": sgd_minibatch_size,
        "rollout_fragment_length": rollout_fragment_length,
        "in_evaluation": True,
        "evaluation_num_episodes": eval_info["eval_episodes"],
        "evaluation_interval": eval_info[
            "eval_rate"],  # Evaluation occurs after # of eval-intervals (episodes)
        "evaluation_config": {
            "env_config": {
                "seed": seed,
                "scenario_info": task,
                "headless": headless,
                "eval_mode": True,
                "ordered_scenarios": False,
                "agent_specs": agent_specs,
                "timestep_sec": timestep_sec,
            },
            "explore": False,
        },
        "env_config": {
            "seed": seed,
            "scenario_info": task,
            "headless": headless,
            "eval_mode": False,
            "ordered_scenarios": False,
            "agent_specs": agent_specs,
            "timestep_sec": timestep_sec,
        },
        "multiagent": {
            "policies": rllib_policies
        },
    }

    config.update(tune_config)
    agent = RllibAgent(
        agent_name=agent_name,
        env=RLlibUltraEnv,
        config=tune_config,
        logger_creator=log_creator(log_dir),
    )

    # Iteration value in trainer.py (self._iterations) is the technically the number of episodes
    for i in range(num_episodes):
        results = agent.train()
        agent.log_evaluation_metrics(
            results)  # Evaluation metrics will now be displayed on Tensorboard