コード例 #1
0
ファイル: ultra_env.py プロジェクト: peter112520/SMARTS
    def __init__(
        self,
        agent_specs,
        scenario_info,
        headless,
        timestep_sec,
        seed,
        eval_mode=False,
        ordered_scenarios=False,
    ):
        self.timestep_sec = timestep_sec
        self.headless = headless
        self.scenario_info = scenario_info
        self.scenarios = self.get_task(scenario_info[0], scenario_info[1])
        if not eval_mode:
            _scenarios = glob.glob(f"{self.scenarios['train']}")
        else:
            _scenarios = glob.glob(f"{self.scenarios['test']}")

        self.ultra_scores = BaselineAdapter()
        super().__init__(
            scenarios=_scenarios,
            agent_specs=agent_specs,
            headless=headless,
            timestep_sec=timestep_sec,
            seed=seed,
            visdom=False,
        )

        if ordered_scenarios:
            scenario_roots = []
            for root in _scenarios:
                if Scenario.is_valid_scenario(root):
                    # The case that this is a scenario root
                    scenario_roots.append(root)
                else:
                    # The case that there this is a directory of scenarios: find each of the roots
                    scenario_roots.extend(Scenario.discover_scenarios(root))
            # Also see `smarts.env.HiwayEnv`
            self._scenarios_iterator = cycle(
                Scenario.variations_for_all_scenario_roots(
                    scenario_roots, list(agent_specs.keys())))
コード例 #2
0
 def __new__(
     self,
     policy_class,
     action_type,
     checkpoint_dir=None,
     task=None,
     max_episode_steps=1200,
     experiment_dir=None,
 ):
     if experiment_dir:
         print(f"LOADING SPEC from {experiment_dir}/spec.pkl")
         with open(f"{experiment_dir}/spec.pkl", "rb") as input:
             spec = dill.load(input)
             new_spec = AgentSpec(
                 interface=spec.interface,
                 agent_params=dict(
                     policy_params=spec.agent_params["policy_params"],
                     checkpoint_dir=checkpoint_dir,
                 ),
                 agent_builder=spec.policy_builder,
                 observation_adapter=spec.observation_adapter,
                 reward_adapter=spec.reward_adapter,
             )
             spec = new_spec
     else:
         adapter = BaselineAdapter()
         policy_dir = "/".join(
             inspect.getfile(policy_class).split("/")[:-1])
         policy_params = load_yaml(f"{policy_dir}/params.yaml")
         spec = AgentSpec(
             interface=AgentInterface(
                 waypoints=Waypoints(lookahead=20),
                 neighborhood_vehicles=NeighborhoodVehicles(200),
                 action=action_type,
                 rgb=False,
                 max_episode_steps=max_episode_steps,
                 debug=True,
             ),
             agent_params=dict(policy_params=policy_params,
                               checkpoint_dir=checkpoint_dir),
             agent_builder=policy_class,
             observation_adapter=adapter.observation_adapter,
             reward_adapter=adapter.reward_adapter,
         )
     return spec
コード例 #3
0
ファイル: agent_spec.py プロジェクト: zbzhu99/SMARTS
    def __new__(
        self,
        policy_class,
        action_type,
        checkpoint_dir=None,
        task=None,
        max_episode_steps=1200,
        experiment_dir=None,
        agent_id="",
    ):
        if experiment_dir:
            print(
                f"Loading spec for {agent_id} from {experiment_dir}/agent_metadata.pkl"
            )
            with open(f"{experiment_dir}/agent_metadata.pkl",
                      "rb") as metadata_file:
                agent_metadata = dill.load(metadata_file)
                spec = agent_metadata["agent_specs"][agent_id]

                new_spec = AgentSpec(
                    interface=spec.interface,
                    agent_params=dict(
                        policy_params=spec.agent_params["policy_params"],
                        checkpoint_dir=checkpoint_dir,
                    ),
                    agent_builder=spec.policy_builder,
                    observation_adapter=spec.observation_adapter,
                    reward_adapter=spec.reward_adapter,
                )

                spec = new_spec
        else:
            base_dir = os.path.join(os.path.dirname(__file__), "../")
            pool_path = os.path.join(base_dir, "agent_pool.json")

            policy_class_name = policy_class.__name__
            agent_name = None

            with open(pool_path, "r") as f:
                data = json.load(f)
                agents = data["agents"].keys()
                for agent in agents:
                    if data["agents"][agent]["class"] == policy_class_name:
                        agent_name = data["agents"][agent]["name"]
                        break

            assert agent_name != None

            adapter = BaselineAdapter(agent_name)
            spec = AgentSpec(
                interface=AgentInterface(
                    waypoints=Waypoints(lookahead=20),
                    neighborhood_vehicles=NeighborhoodVehicles(200),
                    action=action_type,
                    rgb=False,
                    max_episode_steps=max_episode_steps,
                    debug=True,
                ),
                agent_params=dict(policy_params=adapter.policy_params,
                                  checkpoint_dir=checkpoint_dir),
                agent_builder=policy_class,
                observation_adapter=adapter.observation_adapter,
                reward_adapter=adapter.reward_adapter,
            )
        return spec
コード例 #4
0
# THE SOFTWARE.
import unittest

import gym
import numpy as np
import ray

from smarts.core.controllers import ActionSpaceType
from smarts.zoo.registry import make
from ultra.baselines.adapter import BaselineAdapter
from ultra.baselines.agent_spec import BaselineAgentSpec

AGENT_ID = "001"
seed = 2

adapter = BaselineAdapter()


class AdapterTest(unittest.TestCase):
    def test_observation_features(self):
        @ray.remote(max_calls=1, num_gpus=0, num_cpus=1)
        def run_experiment():
            agent, env = prepare_test_env_agent()
            observations = env.reset()
            env.close()
            return observations

        ray.init(ignore_reinit_error=True)
        observations = ray.get(run_experiment.remote())
        ray.shutdown()
        print(observations[AGENT_ID])
コード例 #5
0
ファイル: ultra_env.py プロジェクト: peter112520/SMARTS
class UltraEnv(HiWayEnv):
    def __init__(
        self,
        agent_specs,
        scenario_info,
        headless,
        timestep_sec,
        seed,
        eval_mode=False,
        ordered_scenarios=False,
    ):
        self.timestep_sec = timestep_sec
        self.headless = headless
        self.scenario_info = scenario_info
        self.scenarios = self.get_task(scenario_info[0], scenario_info[1])
        if not eval_mode:
            _scenarios = glob.glob(f"{self.scenarios['train']}")
        else:
            _scenarios = glob.glob(f"{self.scenarios['test']}")

        self.ultra_scores = BaselineAdapter()
        super().__init__(
            scenarios=_scenarios,
            agent_specs=agent_specs,
            headless=headless,
            timestep_sec=timestep_sec,
            seed=seed,
            visdom=False,
        )

        if ordered_scenarios:
            scenario_roots = []
            for root in _scenarios:
                if Scenario.is_valid_scenario(root):
                    # The case that this is a scenario root
                    scenario_roots.append(root)
                else:
                    # The case that there this is a directory of scenarios: find each of the roots
                    scenario_roots.extend(Scenario.discover_scenarios(root))
            # Also see `smarts.env.HiwayEnv`
            self._scenarios_iterator = cycle(
                Scenario.variations_for_all_scenario_roots(
                    scenario_roots, list(agent_specs.keys())))

    def generate_logs(self, observation, highwayenv_score):
        ego_state = observation.ego_vehicle_state
        start = observation.ego_vehicle_state.mission.start
        goal = observation.ego_vehicle_state.mission.goal
        path = get_path_to_goal(goal=goal,
                                paths=observation.waypoint_paths,
                                start=start)
        closest_wp, _ = get_closest_waypoint(
            num_lookahead=100,
            goal_path=path,
            ego_position=ego_state.position,
            ego_heading=ego_state.heading,
        )
        signed_dist_from_center = closest_wp.signed_lateral_error(
            ego_state.position)
        lane_width = closest_wp.lane_width * 0.5
        ego_dist_center = signed_dist_from_center / lane_width

        linear_jerk = np.linalg.norm(ego_state.linear_jerk)
        angular_jerk = np.linalg.norm(ego_state.angular_jerk)

        # Distance to goal
        ego_2d_position = ego_state.position[0:2]
        goal_dist = distance.euclidean(ego_2d_position, goal.position)

        angle_error = closest_wp.relative_heading(
            ego_state.heading)  # relative heading radians [-pi, pi]

        # number of violations
        (
            ego_num_violations,
            social_num_violations,
        ) = ego_social_safety(
            observation,
            d_min_ego=1.0,
            t_c_ego=1.0,
            d_min_social=1.0,
            t_c_social=1.0,
            ignore_vehicle_behind=True,
        )

        info = dict(
            position=ego_state.position,
            speed=ego_state.speed,
            steering=ego_state.steering,
            heading=ego_state.heading,
            dist_center=abs(ego_dist_center),
            start=start,
            goal=goal,
            closest_wp=closest_wp,
            events=observation.events,
            ego_num_violations=ego_num_violations,
            social_num_violations=social_num_violations,
            goal_dist=goal_dist,
            linear_jerk=np.linalg.norm(ego_state.linear_jerk),
            angular_jerk=np.linalg.norm(ego_state.angular_jerk),
            env_score=self.ultra_scores.reward_adapter(observation,
                                                       highwayenv_score),
        )
        return info

    def step(self, agent_actions):
        agent_actions = {
            agent_id: self._agent_specs[agent_id].action_adapter(action)
            for agent_id, action in agent_actions.items()
        }

        observations, rewards, agent_dones, extras = self._smarts.step(
            agent_actions)

        infos = {
            agent_id: {
                "score": value,
                "env_obs": observations[agent_id]
            }
            for agent_id, value in extras["scores"].items()
        }

        for agent_id in observations:
            agent_spec = self._agent_specs[agent_id]
            observation = observations[agent_id]
            reward = rewards[agent_id]
            info = infos[agent_id]

            rewards[agent_id] = agent_spec.reward_adapter(observation, reward)
            observations[agent_id] = agent_spec.observation_adapter(
                observation)
            infos[agent_id] = agent_spec.info_adapter(observation, reward,
                                                      info)
            infos[agent_id]["logs"] = self.generate_logs(observation, reward)

        for done in agent_dones.values():
            self._dones_registered += 1 if done else 0

        agent_dones["__all__"] = self._dones_registered == len(
            self._agent_specs)

        return observations, rewards, agent_dones, infos

    def get_task(self, task_id, task_level):
        with open("ultra/config.yaml", "r") as task_file:
            scenarios = yaml.safe_load(task_file)["tasks"]
            task = scenarios[f"task{task_id}"][task_level]
        return task

    @property
    def info(self):
        return {
            "scenario_info": self.scenario_info,
            "timestep_sec": self.timestep_sec,
            "headless": self.headless,
        }
コード例 #6
0
def train(
    task,
    num_episodes,
    max_episode_steps,
    rollout_fragment_length,
    policy,
    eval_info,
    timestep_sec,
    headless,
    seed,
    train_batch_size,
    sgd_minibatch_size,
    log_dir,
):

    agent_name = policy
    adapter = BaselineAdapter(agent_name)
    ModelCatalog.register_custom_model("fc_model", CustomFCModel)
    config = RllibAgent.rllib_default_config(agent_name)

    rllib_policies = {
        "default_policy": (
            None,
            adapter.observation_space,
            adapter.action_space,
            {
                "model": {
                    "custom_model": "fc_model",
                    "custom_model_config": {
                        "adapter": adapter
                    },
                }
            },
        )
    }
    agent_specs = {
        "AGENT-007":
        AgentSpec(
            interface=AgentInterface(
                waypoints=Waypoints(lookahead=20),
                neighborhood_vehicles=NeighborhoodVehicles(200),
                action=ActionSpaceType.Continuous,
                rgb=False,
                max_episode_steps=max_episode_steps,
                debug=True,
            ),
            agent_params={},
            agent_builder=None,
            observation_adapter=adapter.observation_adapter,
            reward_adapter=adapter.reward_adapter,
            # action_adapter=adapter.action_adapter,
        )
    }

    tune_config = {
        "env": RLlibUltraEnv,
        "log_level": "WARN",
        "callbacks": Callbacks,
        "framework": "torch",
        "num_workers": 1,
        "train_batch_size": train_batch_size,
        "sgd_minibatch_size": sgd_minibatch_size,
        "rollout_fragment_length": rollout_fragment_length,
        "in_evaluation": True,
        "evaluation_num_episodes": eval_info["eval_episodes"],
        "evaluation_interval": eval_info[
            "eval_rate"],  # Evaluation occurs after # of eval-intervals (episodes)
        "evaluation_config": {
            "env_config": {
                "seed": seed,
                "scenario_info": task,
                "headless": headless,
                "eval_mode": True,
                "ordered_scenarios": False,
                "agent_specs": agent_specs,
                "timestep_sec": timestep_sec,
            },
            "explore": False,
        },
        "env_config": {
            "seed": seed,
            "scenario_info": task,
            "headless": headless,
            "eval_mode": False,
            "ordered_scenarios": False,
            "agent_specs": agent_specs,
            "timestep_sec": timestep_sec,
        },
        "multiagent": {
            "policies": rllib_policies
        },
    }

    config.update(tune_config)
    agent = RllibAgent(
        agent_name=agent_name,
        env=RLlibUltraEnv,
        config=tune_config,
        logger_creator=log_creator(log_dir),
    )

    # Iteration value in trainer.py (self._iterations) is the technically the number of episodes
    for i in range(num_episodes):
        results = agent.train()
        agent.log_evaluation_metrics(
            results)  # Evaluation metrics will now be displayed on Tensorboard