Ejemplo n.º 1
0
def agent_interface(request):
    if request.param == "laner":
        return AgentInterface.from_type(AgentType.Laner)
    if request.param == "rgb":
        return AgentInterface(rgb=True, action=ActionSpaceType.Lane)
    if request.param == "lidar":
        return AgentInterface(lidar=True, action=ActionSpaceType.Lane)
Ejemplo n.º 2
0
def smarts():
    laner = AgentInterface(max_episode_steps=1000, action=ActionSpaceType.Lane,)
    buddha = AgentInterface(max_episode_steps=1000, action=ActionSpaceType.Lane,)
    agents = {AGENT_1: laner, AGENT_2: buddha}
    smarts = SMARTS(
        agents, traffic_sim=SumoTrafficSimulation(headless=True), envision=None,
    )

    yield smarts
    smarts.destroy()
Ejemplo n.º 3
0
def smarts_two_agents():
    smarts_ = SMARTS(
        agent_interfaces={
            AGENT_ID: AgentInterface.from_type(AgentType.Laner, max_episode_steps=30),
            AGENT_ID_2: AgentInterface.from_type(AgentType.Laner, max_episode_steps=30),
        },
        traffic_sim=SumoTrafficSimulation(time_resolution=0.1),
    )
    yield smarts_
    smarts_.destroy()
Ejemplo n.º 4
0
def main(scenarios, headless, seed):
    scenarios_iterator = Scenario.scenario_variations(scenarios, [])
    for _ in scenarios:
        scenario = next(scenarios_iterator)
        agent_missions = scenario.discover_missions_of_traffic_histories()

        for agent_id, mission in agent_missions.items():
            scenario.set_ego_missions({agent_id: mission})

            agent_spec = AgentSpec(
                interface=AgentInterface.from_type(AgentType.Laner,
                                                   max_episode_steps=None),
                agent_builder=KeepLaneAgent,
            )

            agent = agent_spec.build_agent()

            smarts = SMARTS(
                agent_interfaces={agent_id: agent_spec.interface},
                traffic_sim=SumoTrafficSimulation(headless=True,
                                                  auto_start=True),
                envision=Envision(),
            )
            observations = smarts.reset(scenario)

            dones = {agent_id: False}
            while not dones[agent_id]:
                agent_obs = observations[agent_id]
                agent_action = agent.act(agent_obs)

                observations, rewards, dones, infos = smarts.step(
                    {agent_id: agent_action})
def agent_spec():
    return AgentSpec(
        interface=AgentInterface.from_type(AgentType.TrajectoryInterpolator,
                                           neighborhood_vehicles=True),
        agent_builder=WithTimeTrajectoryAgent,
        agent_params=None,
    )
Ejemplo n.º 6
0
def entrypoint(
    gains={
        "theta": 3.0,
        "position": 4.0,
        "obstacle": 3.0,
        "u_accel": 0.1,
        "u_yaw_rate": 1.0,
        "terminal": 0.01,
        "impatience": 0.01,
        "speed": 0.01,
        "rate": 1,
    },
    debug=False,
    aggressiveness=0,
    max_episode_steps=None,
):
    from .agent import OpEnAgent

    return AgentSpec(
        interface=AgentInterface(
            action=ActionSpaceType.Trajectory,
            waypoints=True,
            neighborhood_vehicles=True,
            max_episode_steps=max_episode_steps,
            agent_behavior=AgentBehavior(aggressiveness=aggressiveness),
        ),
        agent_params={
            "gains": gains,
            "debug": debug,
        },
        agent_builder=OpEnAgent,
    )
Ejemplo n.º 7
0
def main(
    scenarios,
    headless,
    num_episodes,
    seed,
):
    agent_spec = AgentSpec(
        interface=AgentInterface.from_type(
            AgentType.StandardWithAbsoluteSteering, max_episode_steps=3000),
        policy_builder=HumanKeyboardPolicy,
    )

    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=scenarios,
        agent_specs={AGENT_ID: agent_spec},
        headless=headless,
        timestep_sec=0.1,
        seed=seed,
    )

    for episode in episodes(n=num_episodes):
        agent = agent_spec.build_agent()
        observations = env.reset()
        episode.record_scenario(env.scenario_log)

        dones = {"__all__": False}
        while not dones["__all__"]:
            agent_obs = observations[AGENT_ID]
            agent_action = agent.act(agent_obs)
            observations, rewards, dones, infos = env.step(
                {AGENT_ID: agent_action})
            episode.record_step(observations, rewards, dones, infos)

    env.close()
Ejemplo n.º 8
0
def entrypoint(
    gains={
        "theta": 3.0,
        "position": 4.0,
        "obstacle": 3.0,
        "u_accel": 0.1,
        "u_yaw_rate": 1.0,
        "terminal": 0.01,
        "impatience": 0.01,
        "speed": 0.01,
    },
    debug=False,
    max_episode_steps=600,
):
    from .policy import Policy

    return AgentSpec(
        interface=AgentInterface(
            action=ActionSpaceType.Trajectory,
            waypoints=True,
            neighborhood_vehicles=True,
            max_episode_steps=max_episode_steps,
        ),
        policy_params={"gains": gains, "debug": debug,},
        policy_builder=Policy,
        perform_self_test=False,
    )
Ejemplo n.º 9
0
    def __init__(self, **kwargs):
        print(kwargs)
        self.episode_limit = kwargs['episode_limit']
        self.n_agents = kwargs['agent_num']
        self.observation_space = [
            gym.spaces.Box(low=-1e10, high=1e10, shape=(10, ))
        ] * self.n_agents
        self.action_space = [gym.spaces.Discrete(4)] * self.n_agents
        self.agent_ids = ["Agent %i" % i for i in range(self.n_agents)]
        self.n_actions = 4
        self.scenarios = [kwargs['scenarios']]

        self.headless = kwargs['headless']
        num_episodes = 100
        self.seed = kwargs['seed']

        self.agent_specs = {
            agent_id: AgentSpec(
                interface=AgentInterface.from_type(AgentType.Laner,
                                                   max_episode_steps=5000),
                observation_adapter=observation_adapter,
                reward_adapter=reward_adapter,
                action_adapter=action_adapter,
            )
            for agent_id in self.agent_ids
        }

        self.base_env = gym.make(
            "smarts.env:hiway-v0",
            scenarios=self.scenarios,
            agent_specs=self.agent_specs,
            headless=self.headless,
            seed=self.seed,
        )
        self.current_observations = self.base_env.reset()
Ejemplo n.º 10
0
    def __init__(self, config):
        self._config = config

        # XXX: These are intentionally left public at PyMARL's request
        self.n_agents = config.get("n_agents", 1)
        self.episode_limit = config.get("episode_limit", 1000)
        self.observation_space = config.get("observation_space",
                                            DEFAULT_OBSERVATION_SPACE)
        self.action_space = config.get("action_space", DEFAULT_ACTION_SPACE)
        self.state_space = config.get("state_space", DEFAULT_STATE_SPACE)

        self._agent_ids = ["Agent %i" % i for i in range(self.n_agents)]

        self._reward_adapter = config.get("reward_adapter",
                                          default_reward_adapter)
        self._observation_adapter = config.get("observation_adapter",
                                               default_obs_adapter)
        self._action_adapter = config.get("action_adapter",
                                          default_action_adapter)
        self._done_adapter = config.get("done_adapter",
                                        lambda dones: list(dones.values()))
        self._state_adapter = config.get("state_adapter",
                                         default_state_adapter)

        self._headless = config.get("headless", False)
        self._timestep_sec = config.get("timestep_sec", 0.01)
        self._observations = None
        self._state = None
        self._steps = 0

        seed = self._config.get("seed", 42)
        smarts.core.seed(seed)

        self._scenarios_iterator = Scenario.scenario_variations(
            config["scenarios"], self._agent_ids)

        agent_interfaces = {
            agent_id: AgentInterface.from_type(
                config.get("agent_type", AgentType.Laner),
                max_episode_steps=self.episode_limit,
                debug=config.get("debug", False),
            )
            for i, agent_id, in enumerate(self._agent_ids)
        }

        envision = None
        if not self._headless:
            envision = Envision(
                endpoint=config.get("envision_endpoint", None),
                output_dir=config.get("envision_record_data_replay_path",
                                      None),
            )

        self._smarts = SMARTS(
            agent_interfaces=agent_interfaces,
            traffic_sim=SumoTrafficSimulation(
                time_resolution=self._timestep_sec),
            envision=envision,
            timestep_sec=self._timestep_sec,
        )
Ejemplo n.º 11
0
def agent_spec(agent_and_agent_type):
    return AgentSpec(
        interface=AgentInterface.from_type(
            agent_and_agent_type[1], max_episode_steps=5000
        ),
        agent_builder=agent_and_agent_type[0],
    )
Ejemplo n.º 12
0
def main(scenarios, headless, num_episodes, max_episode_steps=None):
    agent_spec = AgentSpec(
        interface=AgentInterface.from_type(
            AgentType.LanerWithSpeed, max_episode_steps=max_episode_steps
        ),
        agent_builder=ChaseViaPointsAgent,
    )

    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=scenarios,
        agent_specs={"SingleAgent": agent_spec},
        headless=headless,
        sumo_headless=True,
    )

    # Convert `env.step()` and `env.reset()` from multi-agent interface to
    # single-agent interface.
    env = SingleAgent(env=env)

    for episode in episodes(n=num_episodes):
        agent = agent_spec.build_agent()
        observation = env.reset()
        episode.record_scenario(env.scenario_log)

        done = False
        while not done:
            agent_action = agent.act(observation)
            observation, reward, done, info = env.step(agent_action)
            episode.record_step(observation, reward, done, info)

    env.close()
Ejemplo n.º 13
0
def main(scenarios, headless, num_episodes, seed):
    agent_spec = AgentSpec(
        interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=None),
        agent_builder=KeepLaneAgent,
    )

    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=scenarios,
        agent_specs={AGENT_ID: agent_spec},
        headless=headless,
        visdom=False,
        timestep_sec=0.1,
        sumo_headless=True,
        seed=seed,
        # envision_record_data_replay_path="./data_replay",
    )

    for episode in episodes(n=num_episodes):
        agent = agent_spec.build_agent()
        observations = env.reset()
        episode.record_scenario(env.scenario_log)

        dones = {"__all__": False}
        while not dones["__all__"]:
            agent_obs = observations[AGENT_ID]
            agent_action = agent.act(agent_obs)
            observations, rewards, dones, infos = env.step({AGENT_ID: agent_action})
            episode.record_step(observations, rewards, dones, infos)

    env.close()
Ejemplo n.º 14
0
def _make_agent_specs(num_agent):
    agent_specs = {
        "AGENT_" + str(agent_id): AgentSpec(
            interface=AgentInterface(
                rgb=RGB(),
                action=ActionSpaceType.Lane,
            ),
            agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"),
            observation_adapter=lambda obs: obs.top_down_rgb.data,
            reward_adapter=lambda obs, reward: reward,
            info_adapter=lambda obs, reward, info: info["score"],
        )
        for agent_id in range(num_agent)
    }

    obs_space = gym.spaces.Dict({
        "AGENT_" + str(agent_id): gym.spaces.Box(
            low=0,
            high=255,
            shape=(
                agent_specs["AGENT_" + str(agent_id)].interface.rgb.width,
                agent_specs["AGENT_" + str(agent_id)].interface.rgb.height,
                3,
            ),
            dtype=np.uint8,
        )
        for agent_id in range(num_agent)
    })

    return agent_specs, obs_space
Ejemplo n.º 15
0
        def init_env():
            if True:
                agent_spec = AgentSpec(
                    interface=AgentInterface.from_type(
                        AgentType.Laner, max_episode_steps=all_args.episode_length
                    )
                )
                AGENT_ID = [str(i) for i in range(all_args.num_agents)]

                env = gym.make(
                    "smarts.env:hiway-v0",
                    scenarios=all_args.scenarios,
                    agent_specs={i: agent_spec for i in AGENT_ID},
                    headless=all_args.headless,
                    visdom=False,
                    timestep_sec=0.1,
                    sumo_headless=True,
                    seed=all_args.seed + rank * 1000,
                    # zoo_workers=[("143.110.210.157", 7432)], # Distribute social agents across these workers
                    auth_key=all_args.auth_key,
                    # envision_record_data_replay_path="./data_replay",
                )
                env = SmartWrapper(env, all_args.num_agents)
            else:
                print("Can not support the " +
                      all_args.env_name + "environment.")
                raise NotImplementedError
            # env.seed(all_args.seed + rank * 1000)
            return env
Ejemplo n.º 16
0
def env_and_spec(action,
                 agent_type,
                 max_episode_steps,
                 scenarios,
                 seed=42,
                 agent_id="Agent-006"):
    class Policy(AgentPolicy):
        def act(self, obs):
            return action

    agent_spec = AgentSpec(
        interface=AgentInterface.from_type(
            agent_type, max_episode_steps=max_episode_steps),
        policy_builder=Policy,
    )
    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=scenarios,
        agent_specs={agent_id: agent_spec},
        headless=True,
        visdom=False,
        timestep_sec=TIMESTEP_SEC,
        sumo_headless=True,
        seed=seed,
    )

    return (env, agent_spec)
Ejemplo n.º 17
0
def main(scenario):
    scenario_path = Path(scenario).absolute()
    agent_mission_count = Scenario.discover_agent_missions_count(scenario_path)

    assert agent_mission_count > 0, "agent mission count should larger than 0"

    agent_ids = [f"AGENT-{i}" for i in range(agent_mission_count)]

    agent_specs = {
        agent_id: AgentSpec(
            interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=None),
            agent_builder=RuleBasedAgent,
        )
        for agent_id in agent_ids
    }

    agents = {aid: agent_spec.build_agent() for aid, agent_spec in agent_specs.items()}

    env = HiWayEnv(scenarios=[scenario_path], agent_specs=agent_specs)

    while True:
        observations = env.reset()
        done = False
        while not done:
            agent_ids = list(observations.keys())
            actions = {aid: agents[aid].act(observations[aid]) for aid in agent_ids}
            observations, _, dones, _ = env.step(actions)
            done = dones["__all__"]
Ejemplo n.º 18
0
 def get_agent(self, ego, policy, max_episode_steps):
     observation_adapter = None
     if ego:
         config = get_agent_config_by_type(policy)
         agent_spec = AgentSpec(
             interface=config["interface"],
             agent_params=dict(config["policy"], checkpoint_dir=ego_model),
             agent_builder=config["policy_class"],
         )
         agent_spec.interface.max_episode_steps = max_episode_steps
         observation_adapter = IntersectionAdapter(
             agent_id="AGENT_007",
             social_vehicle_config=config["social_vehicle_config"],
             timestep_sec=config["env"]["timestep_sec"],
             **config["other"],
         )
     else:
         # Lane Following agent
         agent_spec = AgentSpec(
             interface=AgentInterface(
                 max_episode_steps=max_episode_steps,  # 10 mins
                 waypoints=True,
                 action=ActionSpaceType.Lane,
                 debug=False,
                 neighborhood_vehicles=NeighborhoodVehicles(radius=2000),
             ),
             agent_builder=DefaultPolicy,
         )
     return agent_spec, observation_adapter
def main(scenarios, headless, seed):
    agent_spec = AgentSpec(
        interface=AgentInterface.from_type(AgentType.Laner,
                                           max_episode_steps=None),
        agent_builder=None,
        observation_adapter=None,
    )

    smarts = SMARTS(
        agent_interfaces={},
        traffic_sim=SumoTrafficSimulation(headless=True, auto_start=True),
        envision=Envision(),
    )
    scenarios_iterator = Scenario.scenario_variations(
        scenarios,
        list([]),
    )

    smarts.reset(next(scenarios_iterator))

    for _ in range(5000):
        smarts.step({})
        smarts.attach_sensors_to_vehicles(
            agent_spec, smarts.vehicle_index.social_vehicle_ids())
        obs, _, _, _ = smarts.observe_from(
            smarts.vehicle_index.social_vehicle_ids())
Ejemplo n.º 20
0
def test_graceful_interrupt(monkeypatch):
    """SMARTS should only throw a KeyboardInterript exception."""

    agent_spec = AgentSpec(
        interface=AgentInterface.from_type(AgentType.Laner),
        agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"),
    )
    agent = agent_spec.build_agent()
    env = build_env(agent_spec)

    with pytest.raises(KeyboardInterrupt):
        obs = env.reset()

        # To simulate a user interrupting the sim (e.g. ctrl-c). We just need to
        # hook in to some function that SMARTS calls internally (like this one).
        with mock.patch(
            "smarts.core.sensors.Sensors.observe", side_effect=KeyboardInterrupt
        ):
            for episode in range(10):
                obs, _, _, _ = env.step({AGENT_ID: agent.act(obs)})

        assert episode == 0, "SMARTS should have been interrupted, ending early"

    with pytest.raises(SMARTSNotSetupError):
        env.step({AGENT_ID: agent.act(obs)})
Ejemplo n.º 21
0
def main(scenarios, sim_name, headless, num_episodes, seed, max_episode_steps=None):
    agent_spec = AgentSpec(
        interface=AgentInterface.from_type(
            AgentType.LanerWithSpeed, max_episode_steps=max_episode_steps
        ),
        agent_builder=ChaseViaPointsAgent,
    )

    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=scenarios,
        agent_specs={AGENT_ID: agent_spec},
        sim_name=sim_name,
        headless=headless,
        visdom=False,
        timestep_sec=0.1,
        sumo_headless=True,
        seed=seed,
        # zoo_addrs=[("10.193.241.236", 7432)], # Sample server address (ip, port), to distribute social agents in remote server.
        # envision_record_data_replay_path="./data_replay",
    )

    for episode in episodes(n=num_episodes):
        agent = agent_spec.build_agent()
        observations = env.reset()
        episode.record_scenario(env.scenario_log)

        dones = {"__all__": False}
        while not dones["__all__"]:
            agent_obs = observations[AGENT_ID]
            agent_action = agent.act(agent_obs)
            observations, rewards, dones, infos = env.step({AGENT_ID: agent_action})
            episode.record_step(observations, rewards, dones, infos)

    env.close()
Ejemplo n.º 22
0
def main(_args):
    scenario_path = Path(args.scenario).absolute()
    mission_num = get_submission_num(scenario_path)

    if mission_num == -1:
        mission_num = 1

    AGENT_IDS = [f"AGENT-{i}" for i in range(mission_num)]

    agent_interface = AgentInterface.from_type(AgentType.Laner)

    agent_specs = [
        AgentSpec(interface=agent_interface,
                  policy_builder=lambda: KeeplanePolicy())
        for _ in range(mission_num)
    ]

    agents = dict(zip(AGENT_IDS, agent_specs))

    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=[scenario_path],
        agent_specs=agents,
        headless=_args.headless,
        visdom=False,
        seed=42,
    )

    agents = {
        _id: agent_spec.build_agent()
        for _id, agent_spec in agents.items()
    }

    import webbrowser
    webbrowser.open('http://localhost:8081/')

    for ie in range(30):
        step = 0
        print(f"\n---- Starting episode: {ie}...")
        observations = env.reset()
        total_reward = 0.0
        dones = {"__all__": False}

        while not dones["__all__"]:
            step += 1
            agent_actions = {
                _id: agents[_id].act(obs)
                for _id, obs in observations.items()
            }
            observations, rewards, dones, _ = env.step(agent_actions)
            total_reward += sum(rewards.values())

            if (step + 1) % 10 == 0:
                print(
                    f"* Episode: {ie} * step: {step} * acc-Reward: {total_reward}"
                )
        print("Accumulated reward:", total_reward)

    env.close()
Ejemplo n.º 23
0
def run_experiment(log_path, experiment_name, training_iteration=100):
    model_path = Path(__file__).parent / "model"
    agent_spec = AgentSpec(
        interface=AgentInterface.from_type(AgentType.Standard,
                                           max_episode_steps=5000),
        policy=RLlibTFSavedModelAgent(
            model_path.absolute(),
            OBSERVATION_SPACE,
        ),
        observation_adapter=observation_adapter,
        reward_adapter=reward_adapter,
        action_adapter=action_adapter,
    )

    rllib_policies = {
        "policy": (
            None,
            OBSERVATION_SPACE,
            ACTION_SPACE,
            {
                "model": {
                    "custom_model": TrainingModel.NAME
                }
            },
        )
    }

    scenario_path = Path(__file__).parent / "../../scenarios/loop"
    scenario_path = str(scenario_path.absolute())

    tune_confg = {
        "env": RLlibHiWayEnv,
        "env_config": {
            "scenarios": [scenario_path],
            "seed": 42,
            "headless": True,
            "agent_specs": {
                "Agent-007": agent_spec
            },
        },
        "multiagent": {
            "policies": rllib_policies,
            "policy_mapping_fn": lambda _: "policy",
        },
        "log_level": "WARN",
        "num_workers": multiprocessing.cpu_count() - 1,
        "horizon": HORIZON,
    }

    analysis = tune.run(
        "PPO",
        name=experiment_name,
        stop={"training_iteration": training_iteration},
        max_failures=10,
        local_dir=log_path,
        config=tune_confg,
    )

    return analysis
Ejemplo n.º 24
0
def rllib_agent():
    def observation_adapter(env_observation):
        ego = env_observation.ego_vehicle_state
        waypoint_paths = env_observation.waypoint_paths
        wps = [path[0] for path in waypoint_paths]

        # distance of vehicle from center of lane
        closest_wp = min(wps, key=lambda wp: wp.dist_to(ego.position))
        signed_dist_from_center = closest_wp.signed_lateral_error(ego.position)
        lane_hwidth = closest_wp.lane_width * 0.5
        norm_dist_from_center = signed_dist_from_center / lane_hwidth

        return {
            "distance_from_center": np.array([norm_dist_from_center]),
            "angle_error": np.array([closest_wp.relative_heading(ego.heading)]),
            "speed": np.array([ego.speed]),
            "steering": np.array([ego.steering]),
        }

    def reward_adapter(env_obs, env_reward):
        return env_reward

    def action_adapter(model_action):
        throttle, brake, steering = model_action
        return np.array([throttle, brake, steering])

    def info_adapter(env_obs, env_reward, env_info):
        env_info[INFO_EXTRA_KEY] = "blah"
        return env_info

    # This action space should match the input to the action_adapter(..) function below.
    ACTION_SPACE = gym.spaces.Box(
        low=np.array([0.0, 0.0, -1.0]), high=np.array([1.0, 1.0, 1.0]), dtype=np.float32
    )

    # This observation space should match the output of observation_adapter(..) below
    OBSERVATION_SPACE = gym.spaces.Dict(
        {
            "distance_from_center": gym.spaces.Box(low=-1e10, high=1e10, shape=(1,)),
            "angle_error": gym.spaces.Box(low=-np.pi, high=np.pi, shape=(1,)),
            "speed": gym.spaces.Box(low=-1e10, high=1e10, shape=(1,)),
            "steering": gym.spaces.Box(low=-1e10, high=1e10, shape=(1,)),
        }
    )

    return {
        "agent_spec": AgentSpec(
            interface=AgentInterface.from_type(
                AgentType.Standard, max_episode_steps=500
            ),
            observation_adapter=observation_adapter,
            reward_adapter=reward_adapter,
            action_adapter=action_adapter,
            info_adapter=info_adapter,
        ),
        "observation_space": OBSERVATION_SPACE,
        "action_space": ACTION_SPACE,
    }
Ejemplo n.º 25
0
def klws_entrypoint(speed):
    from .keep_left_with_speed_agent import KeepLeftWithSpeedAgent

    return AgentSpec(
        interface=AgentInterface.from_type(AgentType.LanerWithSpeed,
                                           max_episode_steps=20000),
        agent_params={"speed": speed * 0.01},
        agent_builder=KeepLeftWithSpeedAgent,
    )
Ejemplo n.º 26
0
def smarts(traffic_sim):
    buddha = AgentInterface(
        max_episode_steps=1000, neighborhood_vehicles=True, action=ActionSpaceType.Lane,
    )
    agents = {"Agent-007": buddha}
    smarts = SMARTS(agents, traffic_sim=traffic_sim)

    yield smarts
    smarts.destroy()
Ejemplo n.º 27
0
def human_keyboard_entrypoint(*arg, **kwargs):
    from .human_in_the_loop import HumanKeyboardAgent

    spec = AgentSpec(
        interface=AgentInterface.from_type(
            AgentType.StandardWithAbsoluteSteering, max_episode_steps=3000),
        agent_builder=HumanKeyboardAgent,
    )
    return spec
Ejemplo n.º 28
0
def agent_spec():
    class Policy(AgentPolicy):
        def act(self, obs):
            return "keep_lane"

    return AgentSpec(
        interface=AgentInterface.from_type(AgentType.Laner,
                                           max_episode_steps=MAX_STEPS),
        policy_builder=Policy,
    )
Ejemplo n.º 29
0
def agent_spec():
    class KeepLaneAgent(Agent):
        def act(self, obs):
            return "keep_lane"

    return AgentSpec(
        interface=AgentInterface.from_type(AgentType.Laner,
                                           max_episode_steps=MAX_STEPS),
        agent_builder=KeepLaneAgent,
    )
Ejemplo n.º 30
0
def main(args):
    scenarios = [Path(e).absolute() for e in args.scenarios]
    agent_interface = AgentInterface.from_type(
        AgentType.StandardWithAbsoluteSteering)
    agent_num_list = [get_submission_num(s) for s in scenarios]

    min_v, max_v = max(agent_num_list), min(agent_num_list)
    assert min_v == max_v, "Mission number mismatch."
    AGENT_IDS = [f"AGENT-{i}" for i in range(max_v)]
    agents = {
        _id: AgentSpec(interface=agent_interface,
                       policy_builder=lambda: RandomPolicy())
        for _id in AGENT_IDS
    }

    env = gym.make(
        "smarts.env:hiway-v0",
        scenarios=scenarios,
        agent_specs=agents,
        headless=args.headless,
        visdom=False,
        seed=42,
    )

    agents = {
        _id: agent_spec.build_agent()
        for _id, agent_spec in agents.items()
    }

    for ie in range(10):
        step = 0
        print(f"\n---- Starting episode: {ie}...")
        observations = env.reset()
        total_reward = 0.0
        dones = {"__all__": False}

        print(f"load scenario={env.scenario_log['scenario_map']}")

        while not dones["__all__"]:
            step += 1
            agent_actions = {
                _id: agents[_id].act(obs)
                for _id, obs in observations.items()
            }
            observations, rewards, dones, _ = env.step(agent_actions)
            total_reward += sum(rewards.values())

            if (step + 1) % 10 == 0:
                print(
                    f"* Episode: {ie} * step: {step} * acc-Reward: {total_reward}"
                )

        print("Accumulated reward:", total_reward)

    env.close()