def agent_interface(request): if request.param == "laner": return AgentInterface.from_type(AgentType.Laner) if request.param == "rgb": return AgentInterface(rgb=True, action=ActionSpaceType.Lane) if request.param == "lidar": return AgentInterface(lidar=True, action=ActionSpaceType.Lane)
def smarts(): laner = AgentInterface(max_episode_steps=1000, action=ActionSpaceType.Lane,) buddha = AgentInterface(max_episode_steps=1000, action=ActionSpaceType.Lane,) agents = {AGENT_1: laner, AGENT_2: buddha} smarts = SMARTS( agents, traffic_sim=SumoTrafficSimulation(headless=True), envision=None, ) yield smarts smarts.destroy()
def smarts_two_agents(): smarts_ = SMARTS( agent_interfaces={ AGENT_ID: AgentInterface.from_type(AgentType.Laner, max_episode_steps=30), AGENT_ID_2: AgentInterface.from_type(AgentType.Laner, max_episode_steps=30), }, traffic_sim=SumoTrafficSimulation(time_resolution=0.1), ) yield smarts_ smarts_.destroy()
def main(scenarios, headless, seed): scenarios_iterator = Scenario.scenario_variations(scenarios, []) for _ in scenarios: scenario = next(scenarios_iterator) agent_missions = scenario.discover_missions_of_traffic_histories() for agent_id, mission in agent_missions.items(): scenario.set_ego_missions({agent_id: mission}) agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=None), agent_builder=KeepLaneAgent, ) agent = agent_spec.build_agent() smarts = SMARTS( agent_interfaces={agent_id: agent_spec.interface}, traffic_sim=SumoTrafficSimulation(headless=True, auto_start=True), envision=Envision(), ) observations = smarts.reset(scenario) dones = {agent_id: False} while not dones[agent_id]: agent_obs = observations[agent_id] agent_action = agent.act(agent_obs) observations, rewards, dones, infos = smarts.step( {agent_id: agent_action})
def agent_spec(): return AgentSpec( interface=AgentInterface.from_type(AgentType.TrajectoryInterpolator, neighborhood_vehicles=True), agent_builder=WithTimeTrajectoryAgent, agent_params=None, )
def entrypoint( gains={ "theta": 3.0, "position": 4.0, "obstacle": 3.0, "u_accel": 0.1, "u_yaw_rate": 1.0, "terminal": 0.01, "impatience": 0.01, "speed": 0.01, "rate": 1, }, debug=False, aggressiveness=0, max_episode_steps=None, ): from .agent import OpEnAgent return AgentSpec( interface=AgentInterface( action=ActionSpaceType.Trajectory, waypoints=True, neighborhood_vehicles=True, max_episode_steps=max_episode_steps, agent_behavior=AgentBehavior(aggressiveness=aggressiveness), ), agent_params={ "gains": gains, "debug": debug, }, agent_builder=OpEnAgent, )
def main( scenarios, headless, num_episodes, seed, ): agent_spec = AgentSpec( interface=AgentInterface.from_type( AgentType.StandardWithAbsoluteSteering, max_episode_steps=3000), policy_builder=HumanKeyboardPolicy, ) env = gym.make( "smarts.env:hiway-v0", scenarios=scenarios, agent_specs={AGENT_ID: agent_spec}, headless=headless, timestep_sec=0.1, seed=seed, ) for episode in episodes(n=num_episodes): agent = agent_spec.build_agent() observations = env.reset() episode.record_scenario(env.scenario_log) dones = {"__all__": False} while not dones["__all__"]: agent_obs = observations[AGENT_ID] agent_action = agent.act(agent_obs) observations, rewards, dones, infos = env.step( {AGENT_ID: agent_action}) episode.record_step(observations, rewards, dones, infos) env.close()
def entrypoint( gains={ "theta": 3.0, "position": 4.0, "obstacle": 3.0, "u_accel": 0.1, "u_yaw_rate": 1.0, "terminal": 0.01, "impatience": 0.01, "speed": 0.01, }, debug=False, max_episode_steps=600, ): from .policy import Policy return AgentSpec( interface=AgentInterface( action=ActionSpaceType.Trajectory, waypoints=True, neighborhood_vehicles=True, max_episode_steps=max_episode_steps, ), policy_params={"gains": gains, "debug": debug,}, policy_builder=Policy, perform_self_test=False, )
def __init__(self, **kwargs): print(kwargs) self.episode_limit = kwargs['episode_limit'] self.n_agents = kwargs['agent_num'] self.observation_space = [ gym.spaces.Box(low=-1e10, high=1e10, shape=(10, )) ] * self.n_agents self.action_space = [gym.spaces.Discrete(4)] * self.n_agents self.agent_ids = ["Agent %i" % i for i in range(self.n_agents)] self.n_actions = 4 self.scenarios = [kwargs['scenarios']] self.headless = kwargs['headless'] num_episodes = 100 self.seed = kwargs['seed'] self.agent_specs = { agent_id: AgentSpec( interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=5000), observation_adapter=observation_adapter, reward_adapter=reward_adapter, action_adapter=action_adapter, ) for agent_id in self.agent_ids } self.base_env = gym.make( "smarts.env:hiway-v0", scenarios=self.scenarios, agent_specs=self.agent_specs, headless=self.headless, seed=self.seed, ) self.current_observations = self.base_env.reset()
def __init__(self, config): self._config = config # XXX: These are intentionally left public at PyMARL's request self.n_agents = config.get("n_agents", 1) self.episode_limit = config.get("episode_limit", 1000) self.observation_space = config.get("observation_space", DEFAULT_OBSERVATION_SPACE) self.action_space = config.get("action_space", DEFAULT_ACTION_SPACE) self.state_space = config.get("state_space", DEFAULT_STATE_SPACE) self._agent_ids = ["Agent %i" % i for i in range(self.n_agents)] self._reward_adapter = config.get("reward_adapter", default_reward_adapter) self._observation_adapter = config.get("observation_adapter", default_obs_adapter) self._action_adapter = config.get("action_adapter", default_action_adapter) self._done_adapter = config.get("done_adapter", lambda dones: list(dones.values())) self._state_adapter = config.get("state_adapter", default_state_adapter) self._headless = config.get("headless", False) self._timestep_sec = config.get("timestep_sec", 0.01) self._observations = None self._state = None self._steps = 0 seed = self._config.get("seed", 42) smarts.core.seed(seed) self._scenarios_iterator = Scenario.scenario_variations( config["scenarios"], self._agent_ids) agent_interfaces = { agent_id: AgentInterface.from_type( config.get("agent_type", AgentType.Laner), max_episode_steps=self.episode_limit, debug=config.get("debug", False), ) for i, agent_id, in enumerate(self._agent_ids) } envision = None if not self._headless: envision = Envision( endpoint=config.get("envision_endpoint", None), output_dir=config.get("envision_record_data_replay_path", None), ) self._smarts = SMARTS( agent_interfaces=agent_interfaces, traffic_sim=SumoTrafficSimulation( time_resolution=self._timestep_sec), envision=envision, timestep_sec=self._timestep_sec, )
def agent_spec(agent_and_agent_type): return AgentSpec( interface=AgentInterface.from_type( agent_and_agent_type[1], max_episode_steps=5000 ), agent_builder=agent_and_agent_type[0], )
def main(scenarios, headless, num_episodes, max_episode_steps=None): agent_spec = AgentSpec( interface=AgentInterface.from_type( AgentType.LanerWithSpeed, max_episode_steps=max_episode_steps ), agent_builder=ChaseViaPointsAgent, ) env = gym.make( "smarts.env:hiway-v0", scenarios=scenarios, agent_specs={"SingleAgent": agent_spec}, headless=headless, sumo_headless=True, ) # Convert `env.step()` and `env.reset()` from multi-agent interface to # single-agent interface. env = SingleAgent(env=env) for episode in episodes(n=num_episodes): agent = agent_spec.build_agent() observation = env.reset() episode.record_scenario(env.scenario_log) done = False while not done: agent_action = agent.act(observation) observation, reward, done, info = env.step(agent_action) episode.record_step(observation, reward, done, info) env.close()
def main(scenarios, headless, num_episodes, seed): agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=None), agent_builder=KeepLaneAgent, ) env = gym.make( "smarts.env:hiway-v0", scenarios=scenarios, agent_specs={AGENT_ID: agent_spec}, headless=headless, visdom=False, timestep_sec=0.1, sumo_headless=True, seed=seed, # envision_record_data_replay_path="./data_replay", ) for episode in episodes(n=num_episodes): agent = agent_spec.build_agent() observations = env.reset() episode.record_scenario(env.scenario_log) dones = {"__all__": False} while not dones["__all__"]: agent_obs = observations[AGENT_ID] agent_action = agent.act(agent_obs) observations, rewards, dones, infos = env.step({AGENT_ID: agent_action}) episode.record_step(observations, rewards, dones, infos) env.close()
def _make_agent_specs(num_agent): agent_specs = { "AGENT_" + str(agent_id): AgentSpec( interface=AgentInterface( rgb=RGB(), action=ActionSpaceType.Lane, ), agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"), observation_adapter=lambda obs: obs.top_down_rgb.data, reward_adapter=lambda obs, reward: reward, info_adapter=lambda obs, reward, info: info["score"], ) for agent_id in range(num_agent) } obs_space = gym.spaces.Dict({ "AGENT_" + str(agent_id): gym.spaces.Box( low=0, high=255, shape=( agent_specs["AGENT_" + str(agent_id)].interface.rgb.width, agent_specs["AGENT_" + str(agent_id)].interface.rgb.height, 3, ), dtype=np.uint8, ) for agent_id in range(num_agent) }) return agent_specs, obs_space
def init_env(): if True: agent_spec = AgentSpec( interface=AgentInterface.from_type( AgentType.Laner, max_episode_steps=all_args.episode_length ) ) AGENT_ID = [str(i) for i in range(all_args.num_agents)] env = gym.make( "smarts.env:hiway-v0", scenarios=all_args.scenarios, agent_specs={i: agent_spec for i in AGENT_ID}, headless=all_args.headless, visdom=False, timestep_sec=0.1, sumo_headless=True, seed=all_args.seed + rank * 1000, # zoo_workers=[("143.110.210.157", 7432)], # Distribute social agents across these workers auth_key=all_args.auth_key, # envision_record_data_replay_path="./data_replay", ) env = SmartWrapper(env, all_args.num_agents) else: print("Can not support the " + all_args.env_name + "environment.") raise NotImplementedError # env.seed(all_args.seed + rank * 1000) return env
def env_and_spec(action, agent_type, max_episode_steps, scenarios, seed=42, agent_id="Agent-006"): class Policy(AgentPolicy): def act(self, obs): return action agent_spec = AgentSpec( interface=AgentInterface.from_type( agent_type, max_episode_steps=max_episode_steps), policy_builder=Policy, ) env = gym.make( "smarts.env:hiway-v0", scenarios=scenarios, agent_specs={agent_id: agent_spec}, headless=True, visdom=False, timestep_sec=TIMESTEP_SEC, sumo_headless=True, seed=seed, ) return (env, agent_spec)
def main(scenario): scenario_path = Path(scenario).absolute() agent_mission_count = Scenario.discover_agent_missions_count(scenario_path) assert agent_mission_count > 0, "agent mission count should larger than 0" agent_ids = [f"AGENT-{i}" for i in range(agent_mission_count)] agent_specs = { agent_id: AgentSpec( interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=None), agent_builder=RuleBasedAgent, ) for agent_id in agent_ids } agents = {aid: agent_spec.build_agent() for aid, agent_spec in agent_specs.items()} env = HiWayEnv(scenarios=[scenario_path], agent_specs=agent_specs) while True: observations = env.reset() done = False while not done: agent_ids = list(observations.keys()) actions = {aid: agents[aid].act(observations[aid]) for aid in agent_ids} observations, _, dones, _ = env.step(actions) done = dones["__all__"]
def get_agent(self, ego, policy, max_episode_steps): observation_adapter = None if ego: config = get_agent_config_by_type(policy) agent_spec = AgentSpec( interface=config["interface"], agent_params=dict(config["policy"], checkpoint_dir=ego_model), agent_builder=config["policy_class"], ) agent_spec.interface.max_episode_steps = max_episode_steps observation_adapter = IntersectionAdapter( agent_id="AGENT_007", social_vehicle_config=config["social_vehicle_config"], timestep_sec=config["env"]["timestep_sec"], **config["other"], ) else: # Lane Following agent agent_spec = AgentSpec( interface=AgentInterface( max_episode_steps=max_episode_steps, # 10 mins waypoints=True, action=ActionSpaceType.Lane, debug=False, neighborhood_vehicles=NeighborhoodVehicles(radius=2000), ), agent_builder=DefaultPolicy, ) return agent_spec, observation_adapter
def main(scenarios, headless, seed): agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=None), agent_builder=None, observation_adapter=None, ) smarts = SMARTS( agent_interfaces={}, traffic_sim=SumoTrafficSimulation(headless=True, auto_start=True), envision=Envision(), ) scenarios_iterator = Scenario.scenario_variations( scenarios, list([]), ) smarts.reset(next(scenarios_iterator)) for _ in range(5000): smarts.step({}) smarts.attach_sensors_to_vehicles( agent_spec, smarts.vehicle_index.social_vehicle_ids()) obs, _, _, _ = smarts.observe_from( smarts.vehicle_index.social_vehicle_ids())
def test_graceful_interrupt(monkeypatch): """SMARTS should only throw a KeyboardInterript exception.""" agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Laner), agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"), ) agent = agent_spec.build_agent() env = build_env(agent_spec) with pytest.raises(KeyboardInterrupt): obs = env.reset() # To simulate a user interrupting the sim (e.g. ctrl-c). We just need to # hook in to some function that SMARTS calls internally (like this one). with mock.patch( "smarts.core.sensors.Sensors.observe", side_effect=KeyboardInterrupt ): for episode in range(10): obs, _, _, _ = env.step({AGENT_ID: agent.act(obs)}) assert episode == 0, "SMARTS should have been interrupted, ending early" with pytest.raises(SMARTSNotSetupError): env.step({AGENT_ID: agent.act(obs)})
def main(scenarios, sim_name, headless, num_episodes, seed, max_episode_steps=None): agent_spec = AgentSpec( interface=AgentInterface.from_type( AgentType.LanerWithSpeed, max_episode_steps=max_episode_steps ), agent_builder=ChaseViaPointsAgent, ) env = gym.make( "smarts.env:hiway-v0", scenarios=scenarios, agent_specs={AGENT_ID: agent_spec}, sim_name=sim_name, headless=headless, visdom=False, timestep_sec=0.1, sumo_headless=True, seed=seed, # zoo_addrs=[("10.193.241.236", 7432)], # Sample server address (ip, port), to distribute social agents in remote server. # envision_record_data_replay_path="./data_replay", ) for episode in episodes(n=num_episodes): agent = agent_spec.build_agent() observations = env.reset() episode.record_scenario(env.scenario_log) dones = {"__all__": False} while not dones["__all__"]: agent_obs = observations[AGENT_ID] agent_action = agent.act(agent_obs) observations, rewards, dones, infos = env.step({AGENT_ID: agent_action}) episode.record_step(observations, rewards, dones, infos) env.close()
def main(_args): scenario_path = Path(args.scenario).absolute() mission_num = get_submission_num(scenario_path) if mission_num == -1: mission_num = 1 AGENT_IDS = [f"AGENT-{i}" for i in range(mission_num)] agent_interface = AgentInterface.from_type(AgentType.Laner) agent_specs = [ AgentSpec(interface=agent_interface, policy_builder=lambda: KeeplanePolicy()) for _ in range(mission_num) ] agents = dict(zip(AGENT_IDS, agent_specs)) env = gym.make( "smarts.env:hiway-v0", scenarios=[scenario_path], agent_specs=agents, headless=_args.headless, visdom=False, seed=42, ) agents = { _id: agent_spec.build_agent() for _id, agent_spec in agents.items() } import webbrowser webbrowser.open('http://localhost:8081/') for ie in range(30): step = 0 print(f"\n---- Starting episode: {ie}...") observations = env.reset() total_reward = 0.0 dones = {"__all__": False} while not dones["__all__"]: step += 1 agent_actions = { _id: agents[_id].act(obs) for _id, obs in observations.items() } observations, rewards, dones, _ = env.step(agent_actions) total_reward += sum(rewards.values()) if (step + 1) % 10 == 0: print( f"* Episode: {ie} * step: {step} * acc-Reward: {total_reward}" ) print("Accumulated reward:", total_reward) env.close()
def run_experiment(log_path, experiment_name, training_iteration=100): model_path = Path(__file__).parent / "model" agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Standard, max_episode_steps=5000), policy=RLlibTFSavedModelAgent( model_path.absolute(), OBSERVATION_SPACE, ), observation_adapter=observation_adapter, reward_adapter=reward_adapter, action_adapter=action_adapter, ) rllib_policies = { "policy": ( None, OBSERVATION_SPACE, ACTION_SPACE, { "model": { "custom_model": TrainingModel.NAME } }, ) } scenario_path = Path(__file__).parent / "../../scenarios/loop" scenario_path = str(scenario_path.absolute()) tune_confg = { "env": RLlibHiWayEnv, "env_config": { "scenarios": [scenario_path], "seed": 42, "headless": True, "agent_specs": { "Agent-007": agent_spec }, }, "multiagent": { "policies": rllib_policies, "policy_mapping_fn": lambda _: "policy", }, "log_level": "WARN", "num_workers": multiprocessing.cpu_count() - 1, "horizon": HORIZON, } analysis = tune.run( "PPO", name=experiment_name, stop={"training_iteration": training_iteration}, max_failures=10, local_dir=log_path, config=tune_confg, ) return analysis
def rllib_agent(): def observation_adapter(env_observation): ego = env_observation.ego_vehicle_state waypoint_paths = env_observation.waypoint_paths wps = [path[0] for path in waypoint_paths] # distance of vehicle from center of lane closest_wp = min(wps, key=lambda wp: wp.dist_to(ego.position)) signed_dist_from_center = closest_wp.signed_lateral_error(ego.position) lane_hwidth = closest_wp.lane_width * 0.5 norm_dist_from_center = signed_dist_from_center / lane_hwidth return { "distance_from_center": np.array([norm_dist_from_center]), "angle_error": np.array([closest_wp.relative_heading(ego.heading)]), "speed": np.array([ego.speed]), "steering": np.array([ego.steering]), } def reward_adapter(env_obs, env_reward): return env_reward def action_adapter(model_action): throttle, brake, steering = model_action return np.array([throttle, brake, steering]) def info_adapter(env_obs, env_reward, env_info): env_info[INFO_EXTRA_KEY] = "blah" return env_info # This action space should match the input to the action_adapter(..) function below. ACTION_SPACE = gym.spaces.Box( low=np.array([0.0, 0.0, -1.0]), high=np.array([1.0, 1.0, 1.0]), dtype=np.float32 ) # This observation space should match the output of observation_adapter(..) below OBSERVATION_SPACE = gym.spaces.Dict( { "distance_from_center": gym.spaces.Box(low=-1e10, high=1e10, shape=(1,)), "angle_error": gym.spaces.Box(low=-np.pi, high=np.pi, shape=(1,)), "speed": gym.spaces.Box(low=-1e10, high=1e10, shape=(1,)), "steering": gym.spaces.Box(low=-1e10, high=1e10, shape=(1,)), } ) return { "agent_spec": AgentSpec( interface=AgentInterface.from_type( AgentType.Standard, max_episode_steps=500 ), observation_adapter=observation_adapter, reward_adapter=reward_adapter, action_adapter=action_adapter, info_adapter=info_adapter, ), "observation_space": OBSERVATION_SPACE, "action_space": ACTION_SPACE, }
def klws_entrypoint(speed): from .keep_left_with_speed_agent import KeepLeftWithSpeedAgent return AgentSpec( interface=AgentInterface.from_type(AgentType.LanerWithSpeed, max_episode_steps=20000), agent_params={"speed": speed * 0.01}, agent_builder=KeepLeftWithSpeedAgent, )
def smarts(traffic_sim): buddha = AgentInterface( max_episode_steps=1000, neighborhood_vehicles=True, action=ActionSpaceType.Lane, ) agents = {"Agent-007": buddha} smarts = SMARTS(agents, traffic_sim=traffic_sim) yield smarts smarts.destroy()
def human_keyboard_entrypoint(*arg, **kwargs): from .human_in_the_loop import HumanKeyboardAgent spec = AgentSpec( interface=AgentInterface.from_type( AgentType.StandardWithAbsoluteSteering, max_episode_steps=3000), agent_builder=HumanKeyboardAgent, ) return spec
def agent_spec(): class Policy(AgentPolicy): def act(self, obs): return "keep_lane" return AgentSpec( interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=MAX_STEPS), policy_builder=Policy, )
def agent_spec(): class KeepLaneAgent(Agent): def act(self, obs): return "keep_lane" return AgentSpec( interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=MAX_STEPS), agent_builder=KeepLaneAgent, )
def main(args): scenarios = [Path(e).absolute() for e in args.scenarios] agent_interface = AgentInterface.from_type( AgentType.StandardWithAbsoluteSteering) agent_num_list = [get_submission_num(s) for s in scenarios] min_v, max_v = max(agent_num_list), min(agent_num_list) assert min_v == max_v, "Mission number mismatch." AGENT_IDS = [f"AGENT-{i}" for i in range(max_v)] agents = { _id: AgentSpec(interface=agent_interface, policy_builder=lambda: RandomPolicy()) for _id in AGENT_IDS } env = gym.make( "smarts.env:hiway-v0", scenarios=scenarios, agent_specs=agents, headless=args.headless, visdom=False, seed=42, ) agents = { _id: agent_spec.build_agent() for _id, agent_spec in agents.items() } for ie in range(10): step = 0 print(f"\n---- Starting episode: {ie}...") observations = env.reset() total_reward = 0.0 dones = {"__all__": False} print(f"load scenario={env.scenario_log['scenario_map']}") while not dones["__all__"]: step += 1 agent_actions = { _id: agents[_id].act(obs) for _id, obs in observations.items() } observations, rewards, dones, _ = env.step(agent_actions) total_reward += sum(rewards.values()) if (step + 1) % 10 == 0: print( f"* Episode: {ie} * step: {step} * acc-Reward: {total_reward}" ) print("Accumulated reward:", total_reward) env.close()