def main(scenarios, headless, seed): scenarios_iterator = Scenario.scenario_variations(scenarios, []) for _ in scenarios: scenario = next(scenarios_iterator) agent_missions = scenario.discover_missions_of_traffic_histories() for agent_id, mission in agent_missions.items(): scenario.set_ego_missions({agent_id: mission}) agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=None), agent_builder=KeepLaneAgent, ) agent = agent_spec.build_agent() smarts = SMARTS( agent_interfaces={agent_id: agent_spec.interface}, traffic_sim=SumoTrafficSimulation(headless=True, auto_start=True), envision=Envision(), ) observations = smarts.reset(scenario) dones = {agent_id: False} while not dones[agent_id]: agent_obs = observations[agent_id] agent_action = agent.act(agent_obs) observations, rewards, dones, infos = smarts.step( {agent_id: agent_action}) smarts.destroy()
def main(scenarios, headless, seed): agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=None), agent_builder=None, observation_adapter=None, ) smarts = SMARTS( agent_interfaces={}, traffic_sim=SumoTrafficSimulation(headless=True, auto_start=True), envision=Envision(), ) scenarios_iterator = Scenario.scenario_variations( scenarios, list([]), ) smarts.reset(next(scenarios_iterator)) for _ in range(5000): smarts.step({}) smarts.attach_sensors_to_vehicles( agent_spec, smarts.vehicle_index.social_vehicle_ids()) obs, _, _, _ = smarts.observe_from( smarts.vehicle_index.social_vehicle_ids()) # TODO: save observations for imitation learning smarts.destroy()
def smarts(agent_spec): smarts = SMARTS( agent_interfaces={AGENT_ID: agent_spec.interface}, traffic_sim=SumoTrafficSimulation(), ) yield smarts smarts.destroy()
def test_data_replay(agent_spec, scenarios_iterator, data_replay_path, monkeypatch): """We stub out the websocket the Envision client writes to and store the sent data. We do the same under Envision client's replay feature and compare that the data sent to the websocket is the same as before. """ def step_through_episodes(agent_spec, smarts, scenarios_iterator): for i in range(NUM_EPISODES): agent = agent_spec.build_agent() scenario = next(scenarios_iterator) obs = smarts.reset(scenario) done = False while not done: obs = agent_spec.observation_adapter(obs[AGENT_ID]) action = agent.act(obs) action = agent_spec.action_adapter(action) obs, _, dones, _ = smarts.step({AGENT_ID: action}) done = dones[AGENT_ID] # 1. Inspect sent data during SMARTS simulation # Mock WebSocketApp so we can inspect the websocket frames being sent FakeWebSocketApp, original_sent_data = fake_websocket_app_class() monkeypatch.setattr(websocket, "WebSocketApp", FakeWebSocketApp) assert original_sent_data.qsize() == 0 envision = Envision(output_dir=data_replay_path) smarts = SMARTS( agent_interfaces={AGENT_ID: agent_spec.interface}, traffic_sim=SumoTrafficSimulation(time_resolution=TIMESTEP_SEC), envision=envision, timestep_sec=TIMESTEP_SEC, ) step_through_episodes(agent_spec, smarts, scenarios_iterator) smarts.destroy() data_replay_path = Path(data_replay_path) data_replay_run_paths = [x for x in data_replay_path.iterdir() if x.is_dir()] assert len(data_replay_run_paths) == 1 jsonl_paths = list(data_replay_run_paths[0].glob("*.jsonl")) assert len(jsonl_paths) == 1 assert original_sent_data.qsize() > 0 # 2. Inspect replay data # Mock WebSocketApp so we can inspect the websocket frames being sent FakeWebSocketApp, new_sent_data = fake_websocket_app_class() monkeypatch.setattr(websocket, "WebSocketApp", FakeWebSocketApp) assert new_sent_data.qsize() == 0 # Now read data replay Envision.read_and_send(jsonl_paths[0], timestep_sec=TIMESTEP_SEC) # Verify the new data matches the original data assert original_sent_data.qsize() == new_sent_data.qsize() for _ in range(new_sent_data.qsize()): assert original_sent_data.get() == new_sent_data.get()
def smarts(scenarios, mock_provider): smarts_ = SMARTS( agent_interfaces={}, traffic_sim=SumoTrafficSimulation(time_resolution=0.1), ) smarts_.add_provider(mock_provider) smarts_.reset(next(scenarios)) yield smarts_ smarts_.destroy()
def smarts(): smarts_ = SMARTS( agent_interfaces={ AGENT_ID: AgentInterface.from_type(AgentType.Laner, max_episode_steps=30) }, traffic_sim=SumoTrafficSimulation(time_resolution=0.1), ) yield smarts_ smarts_.destroy()
def smarts(traffic_sim): buddha = AgentInterface( max_episode_steps=1000, neighborhood_vehicles=True, action=ActionSpaceType.Lane, ) agents = {"Agent-007": buddha} smarts = SMARTS(agents, traffic_sim=traffic_sim) yield smarts smarts.destroy()
def smarts(): laner = AgentInterface(max_episode_steps=1000, action=ActionSpaceType.Lane,) buddha = AgentInterface(max_episode_steps=1000, action=ActionSpaceType.Lane,) agents = {AGENT_1: laner, AGENT_2: buddha} smarts = SMARTS( agents, traffic_sim=SumoTrafficSimulation(headless=True), envision=None, ) yield smarts smarts.destroy()
def smarts(): buddha = AgentInterface( max_episode_steps=1000, neighborhood_vehicles=True, action=ActionSpaceType.Lane, ) smarts = SMARTS( agent_interfaces={"Agent-007": buddha}, traffic_sim=SumoTrafficSimulation(headless=True), envision=None, ) yield smarts smarts.destroy()
def main(scenarios: Sequence[str], headless: bool, seed: int): agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=None), agent_builder=None, observation_adapter=None, ) smarts = SMARTS( agent_interfaces={}, traffic_sim=SumoTrafficSimulation(headless=headless, auto_start=True), envision=None if headless else Envision(), ) scenarios_iterator = Scenario.scenario_variations( scenarios, list([]), ) scenario = next(scenarios_iterator) obs = smarts.reset(scenario) collected_data = {} _record_data(smarts.elapsed_sim_time, obs, collected_data) # could also include "motorcycle" or "truck" in this set if desired vehicle_types = frozenset({"car"}) while True: smarts.step({}) current_vehicles = smarts.vehicle_index.social_vehicle_ids( vehicle_types=vehicle_types) if collected_data and not current_vehicles: print("no more vehicles. exiting...") break smarts.attach_sensors_to_vehicles(agent_spec, current_vehicles) obs, _, _, dones = smarts.observe_from(current_vehicles) _record_data(smarts.elapsed_sim_time, obs, collected_data) # an example of how we might save the data per car for car, data in collected_data.items(): outfile = f"data_{scenario.name}_{scenario.traffic_history.name}_{car}.pkl" with open(outfile, "wb") as of: pickle.dump(data, of) smarts.destroy()
def sim(request): shared_interface = AgentInterface(done_criteria=DoneCriteria( agents_alive=request.param)) agents = { AGENT1: shared_interface, AGENT2: shared_interface, AGENT3: shared_interface, } smarts = SMARTS( agents, traffic_sim=SumoTrafficSimulation(headless=True), envision=None, ) yield smarts smarts.destroy()
def main(scenarios, headless, seed): scenarios_iterator = Scenario.scenario_variations(scenarios, []) smarts = SMARTS( agent_interfaces={}, traffic_sim=None, envision=None if headless else Envision(), ) for _ in scenarios: scenario = next(scenarios_iterator) agent_missions = scenario.discover_missions_of_traffic_histories() for agent_id, mission in agent_missions.items(): agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.LanerWithSpeed, max_episode_steps=None), agent_builder=KeepLaneAgent, agent_params=scenario.traffic_history_target_speed, ) agent = agent_spec.build_agent() # Take control of vehicle with corresponding agent_id smarts.switch_ego_agent({agent_id: agent_spec.interface}) # tell the traffic history provider to start traffic # at the point when this agent enters... traffic_history_provider = smarts.get_provider_by_type( TrafficHistoryProvider) assert traffic_history_provider traffic_history_provider.start_time = mission.start_time # agent vehicle will enter right away... modified_mission = replace(mission, start_time=0.0) scenario.set_ego_missions({agent_id: modified_mission}) observations = smarts.reset(scenario) dones = {agent_id: False} while not dones.get(agent_id, True): agent_obs = observations[agent_id] agent_action = agent.act(agent_obs) observations, rewards, dones, infos = smarts.step( {agent_id: agent_action}) smarts.destroy()
def main(scenarios, headless, seed): agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=None), agent_builder=None, observation_adapter=None, ) smarts = SMARTS( agent_interfaces={}, traffic_sim=SumoTrafficSimulation(headless=headless, auto_start=True), envision=None if headless else Envision(), ) scenarios_iterator = Scenario.scenario_variations( scenarios, list([]), ) smarts.reset(next(scenarios_iterator)) prev_vehicles = set() done_vehicles = set() for _ in range(5000): smarts.step({}) current_vehicles = smarts.vehicle_index.social_vehicle_ids() # We explicitly watch for which agent/vehicles left the simulation here # since we don't have a "done criteria" that detects when a vehicle's # traffic history has played itself out. done_vehicles = prev_vehicles - current_vehicles prev_vehicles = current_vehicles smarts.attach_sensors_to_vehicles(agent_spec, current_vehicles) obs, _, _, dones = smarts.observe_from(current_vehicles) # The `dones` returned above should be empty for traffic histories # where all vehicles are assumed to stay on the road and not collide. # TODO: add the following assert once the maps are accurate enough that # we don't have any agents accidentally go off-road. # assert not done for v in done_vehicles: dones[f"Agent-{v}"] = True # TODO: save observations for imitation learning smarts.destroy()
def main(scenarios, headless, seed): scenarios_iterator = Scenario.scenario_variations(scenarios, []) smarts = SMARTS( agent_interfaces={}, traffic_sim=SumoTrafficSimulation(headless=True, auto_start=True), envision=Envision(), ) for _ in scenarios: scenario = next(scenarios_iterator) agent_missions = scenario.discover_missions_of_traffic_histories() for agent_id, mission in agent_missions.items(): agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=None), agent_builder=KeepLaneAgent, ) agent = agent_spec.build_agent() smarts.switch_ego_agent({agent_id: agent_spec.interface}) # required: get traffic_history_provider and set time offset traffic_history_provider = smarts.get_provider_by_type( TrafficHistoryProvider) assert traffic_history_provider traffic_history_provider.set_start_time(mission.start_time) modified_mission = replace(mission, start_time=0.0) scenario.set_ego_missions({agent_id: modified_mission}) observations = smarts.reset(scenario) dones = {agent_id: False} while not dones[agent_id]: agent_obs = observations[agent_id] agent_action = agent.act(agent_obs) observations, rewards, dones, infos = smarts.step( {agent_id: agent_action}) smarts.destroy()
class HiWayEnv(gym.Env): """A complete gym environment that wraps a SMARTS simulation. Args: scenarios: a list of directories of the scenarios that will be run agent_specs: a list of agents that will run in the environment sim_name: a string that gives this simulation a name headless: true|false envision disabled visdom: true|false visdom integration fixed_timestep_sec: the step length for all components of the simulation (may be None if time deltas are externally-driven) seed: the seed for random number generation num_external_sumo_clients: the number of SUMO clients beyond SMARTS sumo_headless: true|false for SUMO visualization disabled [sumo-gui|sumo] sumo_port: used to specify a specific sumo port sumo_auto_start: true|false sumo will start automatically envision_endpoint: used to specify envision's uri envision_record_data_replay_path: used to specify envision's data replay output directory zoo_addrs: List of (ip, port) tuples of zoo server, used to instantiate remote social agents """ metadata = {"render.modes": ["human"]} """Metadata for gym's use""" def __init__( self, scenarios: Sequence[str], agent_specs: Dict[str, AgentSpec], sim_name=None, shuffle_scenarios=True, headless=False, visdom=False, fixed_timestep_sec=None, seed=42, num_external_sumo_clients=0, sumo_headless=True, sumo_port=None, sumo_auto_start=True, endless_traffic=True, envision_endpoint=None, envision_record_data_replay_path=None, zoo_addrs=None, timestep_sec=None, # for backwards compatibility (deprecated) ): self._log = logging.getLogger(self.__class__.__name__) self.seed(seed) if timestep_sec and not fixed_timestep_sec: warnings.warn( "timestep_sec has been deprecated in favor of fixed_timestep_sec. Please update your code.", category=DeprecationWarning, ) if not fixed_timestep_sec: fixed_timestep_sec = timestep_sec or 0.1 self._agent_specs = agent_specs self._dones_registered = 0 self._scenarios_iterator = Scenario.scenario_variations( scenarios, list(agent_specs.keys()), shuffle_scenarios, ) agent_interfaces = { agent_id: agent.interface for agent_id, agent in agent_specs.items() } envision_client = None if not headless or envision_record_data_replay_path: envision_client = Envision( endpoint=envision_endpoint, sim_name=sim_name, output_dir=envision_record_data_replay_path, headless=headless, ) visdom_client = None if visdom: visdom_client = VisdomClient() all_sumo = Scenario.supports_traffic_simulation(scenarios) traffic_sim = None if not all_sumo: # We currently only support the Native SUMO Traffic Provider and Social Agents for SUMO maps if zoo_addrs: warnings.warn( "`zoo_addrs` can only be used with SUMO scenarios") zoo_addrs = None warnings.warn( "We currently only support the Native SUMO Traffic Provider and Social Agents for SUMO maps." "All scenarios passed need to be of SUMO, to enable SUMO Traffic Simulation and Social Agents." ) pass else: from smarts.core.sumo_traffic_simulation import SumoTrafficSimulation traffic_sim = SumoTrafficSimulation( headless=sumo_headless, time_resolution=fixed_timestep_sec, num_external_sumo_clients=num_external_sumo_clients, sumo_port=sumo_port, auto_start=sumo_auto_start, endless_traffic=endless_traffic, ) zoo_addrs = zoo_addrs self._smarts = SMARTS( agent_interfaces=agent_interfaces, traffic_sim=traffic_sim, envision=envision_client, visdom=visdom_client, fixed_timestep_sec=fixed_timestep_sec, zoo_addrs=zoo_addrs, ) @property def agent_specs(self): """Agent specs currently in use for this simulation. Returns: A list of AgentSpec. """ return self._agent_specs @property def scenario_log(self): """Simulation step logs. Returns: A dictionary with the following: fixed_timestep_sec: The timestep of the simulation. scenario_map: The name of the current scenario. scenario_routes: The routes in the map. mission_hash: The hash identifier for the current scenario. """ scenario = self._smarts.scenario return { "fixed_timestep_sec": self._smarts.fixed_timestep_sec, "scenario_map": scenario.name, "scenario_routes": scenario.route or "", "mission_hash": str(hash(frozenset(scenario.missions.items()))), } def seed(self, seed: int) -> int: """Set the seed of this environment.""" smarts_seed(seed) return seed def step(self, agent_actions): """Step and return observations, rewards, dones, and infos.""" agent_actions = { agent_id: self._agent_specs[agent_id].action_adapter(action) for agent_id, action in agent_actions.items() } observations, rewards, dones, extras = None, None, None, None with timeit("SMARTS Simulation/Scenario Step", self._log): observations, rewards, dones, extras = self._smarts.step( agent_actions) infos = { agent_id: { "score": value, "env_obs": observations[agent_id] } for agent_id, value in extras["scores"].items() } for agent_id in observations: agent_spec = self._agent_specs[agent_id] observation = observations[agent_id] reward = rewards[agent_id] info = infos[agent_id] rewards[agent_id] = agent_spec.reward_adapter(observation, reward) observations[agent_id] = agent_spec.observation_adapter( observation) infos[agent_id] = agent_spec.info_adapter(observation, reward, info) for done in dones.values(): self._dones_registered += 1 if done else 0 dones["__all__"] = self._dones_registered >= len(self._agent_specs) return observations, rewards, dones, infos def reset(self): """Reset the environment and reinitialize to the next scenario.""" scenario = next(self._scenarios_iterator) self._dones_registered = 0 env_observations = self._smarts.reset(scenario) observations = { agent_id: self._agent_specs[agent_id].observation_adapter(obs) for agent_id, obs in env_observations.items() } return observations def render(self, mode="human"): """Does nothing.""" pass def close(self): """Clean up all remaining resources.""" if self._smarts is not None: self._smarts.destroy() self._smarts = None
class HiWayEnv(gym.Env): """A complete gym environment that wraps a SMARTS simulation. Args: scenarios: a list of directories of the scenarios that will be run agent_specs: a list of agents that will run in the environment sim_name: a string that gives this simulation a name headless: true|false envision disabled visdom: true|false visdom integration timestep_sec: the step length for all components of the simulation seed: the seed for random number generation num_external_sumo_clients: the number of SUMO clients beyond SMARTS sumo_headless: true|false for SUMO visualization disabled [sumo-gui|sumo] sumo_port: used to specify a specific sumo port sumo_auto_start: true|false sumo will start automatically envision_endpoint: used to specify envision's uri envision_record_data_replay_path: used to specify envision's data replay output directory zoo_addrs: List of (ip, port) tuples of zoo server, used to instantiate remote social agents """ metadata = {"render.modes": ["human"]} """Metadata for gym's use""" def __init__( self, scenarios: Sequence[str], agent_specs, sim_name=None, shuffle_scenarios=True, headless=False, visdom=False, timestep_sec=0.1, seed=42, num_external_sumo_clients=0, sumo_headless=True, sumo_port=None, sumo_auto_start=True, endless_traffic=True, envision_endpoint=None, envision_record_data_replay_path=None, zoo_addrs=None, ): self._log = logging.getLogger(self.__class__.__name__) smarts.core.seed(seed) self._agent_specs = agent_specs self._dones_registered = 0 self._scenarios_iterator = Scenario.scenario_variations( scenarios, list(agent_specs.keys()), shuffle_scenarios, ) agent_interfaces = { agent_id: agent.interface for agent_id, agent in agent_specs.items() } envision_client = None if not headless: envision_client = Envision( endpoint=envision_endpoint, sim_name=sim_name, output_dir=envision_record_data_replay_path, ) visdom_client = None if visdom: visdom_client = VisdomClient() self._smarts = SMARTS( agent_interfaces=agent_interfaces, traffic_sim=SumoTrafficSimulation( headless=sumo_headless, time_resolution=timestep_sec, num_external_sumo_clients=num_external_sumo_clients, sumo_port=sumo_port, auto_start=sumo_auto_start, endless_traffic=endless_traffic, ), envision=envision_client, visdom=visdom_client, timestep_sec=timestep_sec, zoo_addrs=zoo_addrs, ) @property def scenario_log(self): """Simulation step logs. Returns: A dictionary with the following: timestep_sec: The timestep of the simulation. scenario_map: The name of the current scenario. scenario_routes: The routes in the map. mission_hash: The hash identifier for the current scenario. """ scenario = self._smarts.scenario return { "timestep_sec": self._smarts.timestep_sec, "scenario_map": scenario.name, "scenario_routes": scenario.route or "", "mission_hash": str(hash(frozenset(scenario.missions.items()))), } def step(self, agent_actions): agent_actions = { agent_id: self._agent_specs[agent_id].action_adapter(action) for agent_id, action in agent_actions.items() } observations, rewards, agent_dones, extras = self._smarts.step( agent_actions) infos = { agent_id: { "score": value, "env_obs": observations[agent_id] } for agent_id, value in extras["scores"].items() } for agent_id in observations: agent_spec = self._agent_specs[agent_id] observation = observations[agent_id] reward = rewards[agent_id] info = infos[agent_id] rewards[agent_id] = agent_spec.reward_adapter(observation, reward) observations[agent_id] = agent_spec.observation_adapter( observation) infos[agent_id] = agent_spec.info_adapter(observation, reward, info) for done in agent_dones.values(): self._dones_registered += 1 if done else 0 agent_dones["__all__"] = self._dones_registered == len( self._agent_specs) return observations, rewards, agent_dones, infos def reset(self): scenario = next(self._scenarios_iterator) self._dones_registered = 0 env_observations = self._smarts.reset(scenario) observations = { agent_id: self._agent_specs[agent_id].observation_adapter(obs) for agent_id, obs in env_observations.items() } return observations def render(self, mode="human"): """Does nothing.""" pass def close(self): if self._smarts is not None: self._smarts.destroy()
def smarts(): smarts = SMARTS({}, traffic_sim=SumoTrafficSimulation()) yield smarts smarts.destroy()
class PyMARLHiWayEnv: """This class adheres to the PyMARL MultiAgentEnv so it can be run by PyMARL. See: https://git.io/JvMb9 This environment will want a specific configuration: config: a dictionary with the environment configuration agent_specs: a dictionary of agent_ids to agents that will run in the environment (required) scenarios: a list of directories of the scenarios that will be run (required) sim_name: a string that gives this simulation a name (default None) envision_record_data_replay_path: used to specify envision's data replay output directory (default None) envision_endpoint: used to specify envision's uri (default None) headless: true|false envision disabled (default True) num_external_sumo_clients: the number of SUMO clients beyond SMARTS (default 0) seed: the seed for random number generation (default 42) sumo_auto_start: true|false sumo will start automatically (default False) sumo_headless: true|false for sumo|sumo-gui (default False) sumo_port: used to specify a specific sumo port (default None) timestep_sec: the step length for all components of the simulation (default 0.1) """ def __init__(self, config): self._config = config # XXX: These are intentionally left public at PyMARL's request self.n_agents = config.get("n_agents", 1) self.episode_limit = config.get("episode_limit", 1000) self.observation_space = config.get("observation_space", DEFAULT_OBSERVATION_SPACE) self.action_space = config.get("action_space", DEFAULT_ACTION_SPACE) self.state_space = config.get("state_space", DEFAULT_STATE_SPACE) self._agent_ids = ["Agent %i" % i for i in range(self.n_agents)] self._reward_adapter = config.get("reward_adapter", default_reward_adapter) self._observation_adapter = config.get("observation_adapter", default_obs_adapter) self._action_adapter = config.get("action_adapter", default_action_adapter) self._done_adapter = config.get("done_adapter", lambda dones: list(dones.values())) self._state_adapter = config.get("state_adapter", default_state_adapter) self._headless = config.get("headless", False) self._timestep_sec = config.get("timestep_sec", 0.01) self._observations = None self._state = None self._steps = 0 seed = self._config.get("seed", 42) smarts.core.seed(seed) self._scenarios_iterator = Scenario.scenario_variations( config["scenarios"], self._agent_ids) agent_interfaces = { agent_id: AgentInterface.from_type( config.get("agent_type", AgentType.Laner), max_episode_steps=self.episode_limit, debug=config.get("debug", False), ) for i, agent_id, in enumerate(self._agent_ids) } envision = None if not self._headless or config.get("envision_record_data_replay_path", None): envision = Envision( endpoint=config.get("envision_endpoint", None), sim_name=config.get("sim_name", None), output_dir=config.get("envision_record_data_replay_path", None), ) self._smarts = SMARTS( agent_interfaces=agent_interfaces, traffic_sim=SumoTrafficSimulation( time_resolution=self._timestep_sec), envision=envision, timestep_sec=self._timestep_sec, ) def get_obs(self): return self._observations def get_obs_agent(self, agent_id): return self._observations[agent_id] def get_obs_size(self): obs_size = 0 for obs in self.observation_space.spaces.values(): if type(obs) is Box: obs_size += np.prod(obs.shape) elif type(obs) is Discrete: obs_size += obs.n return obs_size def get_state(self): return np.concatenate(self._observations) def get_state_size(self): return self.get_obs_size() * self.n_agents def get_avail_actions(self): return [np.ones((N_ACTIONS, )) for _ in range(self.n_agents)] def get_avail_agent_actions(self, agent_id): return np.ones((N_ACTIONS, )) def get_total_actions(self): return N_ACTIONS def render(self): pass def save_replay(self): pass def step(self, agent_actions): agent_actions = { agent_id: self._action_adapter(action) for agent_id, action in zip(self._agent_ids, agent_actions) } observations, rewards, dones, extras = self._smarts.step(agent_actions) infos = { f"score_{i}": score for i, score in enumerate(extras["scores"].values()) } # Ensure observations contain the same keys as rewards assert observations.keys() == rewards.keys() self._observations = np.asarray([ np.concatenate(list(self._observation_adapter(obs).values())) for obs in observations.values() ]) rewards = [ self._reward_adapter(obs, rew) for obs, rew in zip(observations.values(), rewards.values()) ] infos["rewards_list"] = rewards self._steps += 1 infos["dones_list"] = np.array(list(dones.values())) dones = infos["dones_list"] if self._steps >= self.episode_limit: infos["episode_steps"] = self._steps dones = np.array([True]) return np.mean(rewards), dones, infos def reset(self): self._steps = 0 scenario = next(self._scenarios_iterator) observations = self._smarts.reset(scenario) self._observations = np.asarray([ np.concatenate(list(self._observation_adapter(obs).values())) for obs in observations.values() ]) return self._observations def close(self): if self._smarts is not None: self._smarts.destroy() def get_env_info(self): return { "state_shape": self.get_state_size(), "obs_shape": self.get_obs_size(), "n_actions": self.get_total_actions(), "n_agents": self.n_agents, "episode_limit": self.episode_limit, }
def main( script: str, scenarios: Sequence[str], headless: bool, envision_record_data_replay_path: str, seed: int, vehicles_to_replace_randomly: int, min_timestep_count: int, positional_radius: int, episodes: int, ): assert episodes > 0 logger = logging.getLogger(script) logger.setLevel(logging.INFO) logger.debug("initializing SMARTS") envision_client = None if not headless or envision_record_data_replay_path: envision_client = Envision(output_dir=envision_record_data_replay_path) smarts = SMARTS( agent_interfaces={}, traffic_sim=None, envision=envision_client, ) random_seed(seed) scenarios_iterator = Scenario.scenario_variations(scenarios, []) scenario = next(scenarios_iterator) for episode in range(episodes): logger.info(f"starting episode {episode}...") def should_trigger(ctx: Dict[str, Any]) -> bool: return ctx["elapsed_sim_time"] > 2 def on_trigger(ctx: Dict[str, Any]): # Define agent specs to be assigned agent_spec = AgentSpec( interface=AgentInterface(waypoints=True, action=ActionSpaceType.Lane), agent_builder=BasicAgent, ) # Select a random sample from candidates k = ctx.get("vehicles_to_replace_randomly", 0) if k <= 0: logger.warning( "default (0) or negative value specified for replacement. Replacing all valid vehicle candidates." ) sample = ctx["vehicle_candidates"] else: logger.info( f"Choosing {k} vehicles randomly from {len(ctx['vehicle_candidates'])} valid vehicle candidates." ) sample = random.sample(ctx["vehicle_candidates"], k) assert len(sample) != 0 for veh_id in sample: # Map selected vehicles to agent ids & specs agent_id = f"agent-{veh_id}" ctx["agents"][agent_id] = agent_spec.build_agent() # Create missions based on current state and traffic history positional, traverse = scenario.create_dynamic_traffic_history_mission( veh_id, ctx["elapsed_sim_time"], ctx["positional_radius"] ) # Take control of vehicles immediately try: # Try to assign a PositionalGoal at the last recorded timestep smarts.add_agent_and_switch_control( veh_id, agent_id, agent_spec.interface, positional ) except PlanningError: logger.warning( f"Unable to create PositionalGoal for vehicle {veh_id}, falling back to TraverseGoal" ) smarts.add_agent_and_switch_control( veh_id, agent_id, agent_spec.interface, traverse ) # Create a table of vehicle trajectory lengths, filtering out non-moving vehicles vehicle_candidates = [] for v_id in (str(id) for id in scenario.traffic_history.all_vehicle_ids()): traj = list(scenario.traffic_history.vehicle_trajectory(v_id)) # Find moving vehicles with more than the minimum number of timesteps if [row for row in traj if row.speed != 0] and len( traj ) >= min_timestep_count: vehicle_candidates.append(v_id) assert len(vehicle_candidates) > 0 k = vehicles_to_replace_randomly if k > len(vehicle_candidates): logger.warning( f"vehicles_to_replace_randomly={k} is greater than the number of vehicle candidates ({len(vehicle_candidates)})." ) k = len(vehicle_candidates) # Initialize trigger and define initial context context = { "agents": {}, "elapsed_sim_time": 0.0, "vehicle_candidates": vehicle_candidates, "vehicles_to_replace_randomly": k, "positional_radius": positional_radius, } trigger = Trigger(should_trigger, on_trigger) dones = {} observations = smarts.reset(scenario) while not dones or not all(dones.values()): # Update context context["elapsed_sim_time"] = smarts.elapsed_sim_time # Step trigger to further update context trigger.update(context) # Get agents from current context agents = context["agents"] # Step simulation actions = { agent_id: agents[agent_id].act(agent_obs) for agent_id, agent_obs in observations.items() } logger.debug( f"stepping @ sim_time={smarts.elapsed_sim_time} for agents={list(observations.keys())}..." ) observations, rewards, dones, infos = smarts.step(actions) for agent_id in agents.keys(): if dones.get(agent_id, False): if not observations[agent_id].events.reached_goal: logger.warning( f"agent_id={agent_id} exited @ sim_time={smarts.elapsed_sim_time}" ) logger.warning(f" ... with {observations[agent_id].events}") else: logger.info( f"agent_id={agent_id} reached goal @ sim_time={smarts.elapsed_sim_time}" ) logger.debug(f" ... with {observations[agent_id].events}") del observations[agent_id] smarts.destroy()
class HiWayEnv(gym.Env): """A generic environment for various driving tasks simulated by SMARTS.""" metadata = {"render.modes": ["human"]} """Metadata for gym's use""" def __init__( self, scenarios: Sequence[str], agent_specs: Dict[str, AgentSpec], sim_name: Optional[str] = None, shuffle_scenarios: bool = True, headless: bool = True, visdom: bool = False, fixed_timestep_sec: Optional[float] = None, seed: int = 42, num_external_sumo_clients: int = 0, sumo_headless: bool = True, sumo_port: Optional[str] = None, sumo_auto_start: bool = True, endless_traffic: bool = True, envision_endpoint: Optional[str] = None, envision_record_data_replay_path: Optional[str] = None, zoo_addrs: Optional[str] = None, timestep_sec: Optional[ float ] = None, # for backwards compatibility (deprecated) ): """ Args: scenarios (Sequence[str]): A list of scenario directories that will be simulated. agent_specs (Dict[str, AgentSpec]): Specification of the agents that will run in the environment. sim_name (Optional[str], optional): Simulation name. Defaults to None. shuffle_scenarios (bool, optional): If true, order of scenarios will be randomized, else it will be maintained. Defaults to True. headless (bool, optional): If True, disables visualization in Envision. Defaults to False. visdom (bool, optional): If True, enables visualization of observed RGB images in Visdom. Defaults to False. fixed_timestep_sec (Optional[float], optional): Step duration for all components of the simulation. May be None if time deltas are externally-driven. Defaults to None. seed (int, optional): Random number generator seed. Defaults to 42. num_external_sumo_clients (int, optional): Number of SUMO clients beyond SMARTS. Defaults to 0. sumo_headless (bool, optional): If True, disables visualization in SUMO GUI. Defaults to True. sumo_port (Optional[str], optional): SUMO port. Defaults to None. sumo_auto_start (bool, optional): Automatic starting of SUMO. Defaults to True. endless_traffic (bool, optional): SUMO's endless traffic setting. Defaults to True. envision_endpoint (Optional[str], optional): Envision's uri. Defaults to None. envision_record_data_replay_path (Optional[str], optional): Envision's data replay output directory. Defaults to None. zoo_addrs (Optional[str], optional): List of (ip, port) tuples of zoo server, used to instantiate remote social agents. Defaults to None. timestep_sec (Optional[float], optional): [description]. Defaults to None. """ self._log = logging.getLogger(self.__class__.__name__) self.seed(seed) if timestep_sec and not fixed_timestep_sec: warnings.warn( "timestep_sec has been deprecated in favor of fixed_timestep_sec. Please update your code.", category=DeprecationWarning, ) if not fixed_timestep_sec: fixed_timestep_sec = timestep_sec or 0.1 self._agent_specs = agent_specs self._dones_registered = 0 self._scenarios_iterator = Scenario.scenario_variations( scenarios, list(agent_specs.keys()), shuffle_scenarios, ) agent_interfaces = { agent_id: agent.interface for agent_id, agent in agent_specs.items() } envision_client = None if not headless or envision_record_data_replay_path: envision_client = Envision( endpoint=envision_endpoint, sim_name=sim_name, output_dir=envision_record_data_replay_path, headless=headless, ) visdom_client = None if visdom: visdom_client = VisdomClient() all_sumo = Scenario.supports_traffic_simulation(scenarios) traffic_sim = None if not all_sumo: # We currently only support the Native SUMO Traffic Provider and Social Agents for SUMO maps if zoo_addrs: warnings.warn("`zoo_addrs` can only be used with SUMO scenarios") zoo_addrs = None warnings.warn( "We currently only support the Native SUMO Traffic Provider and Social Agents for SUMO maps." "All scenarios passed need to be of SUMO, to enable SUMO Traffic Simulation and Social Agents." ) pass else: from smarts.core.sumo_traffic_simulation import SumoTrafficSimulation traffic_sim = SumoTrafficSimulation( headless=sumo_headless, time_resolution=fixed_timestep_sec, num_external_sumo_clients=num_external_sumo_clients, sumo_port=sumo_port, auto_start=sumo_auto_start, endless_traffic=endless_traffic, ) zoo_addrs = zoo_addrs self._smarts = SMARTS( agent_interfaces=agent_interfaces, traffic_sim=traffic_sim, envision=envision_client, visdom=visdom_client, fixed_timestep_sec=fixed_timestep_sec, zoo_addrs=zoo_addrs, ) @property def agent_specs(self) -> Dict[str, AgentSpec]: """Agents' specifications used in this simulation. Returns: (Dict[str, AgentSpec]): Agents' specifications. """ return self._agent_specs @property def scenario_log(self) -> Dict[str, Union[float, str]]: """Simulation steps log. Returns: Dict[str, Union[float,str]]: A dictionary with the following keys. fixed_timestep_sec - Simulation timestep. scenario_map - Name of the current scenario. scenario_routes - Routes in the map. mission_hash - Hash identifier for the current scenario. """ scenario = self._smarts.scenario return { "fixed_timestep_sec": self._smarts.fixed_timestep_sec, "scenario_map": scenario.name, "scenario_routes": scenario.route or "", "mission_hash": str(hash(frozenset(scenario.missions.items()))), } def seed(self, seed: int) -> int: """Sets random number generator seed number. Args: seed (int): Seed number. Returns: int: Seed number. """ smarts_seed(seed) return seed def step( self, agent_actions ) -> Tuple[ Dict[str, Observation], Dict[str, float], Dict[str, bool], Dict[str, Any] ]: """Steps the environment. Args: agent_actions (Dict[str, Any]): Action taken for each agent. Returns: Tuple[ Dict[str, Observation], Dict[str, float], Dict[str, bool], Dict[str, Any] ]: Observations, rewards, dones, and infos for active agents. """ agent_actions = { agent_id: self._agent_specs[agent_id].action_adapter(action) for agent_id, action in agent_actions.items() } assert isinstance(agent_actions, dict) and all( isinstance(key, str) for key in agent_actions.keys() ), "Expected Dict[str, any]" observations, rewards, dones, extras = None, None, None, None with timeit("SMARTS Simulation/Scenario Step", self._log): observations, rewards, dones, extras = self._smarts.step(agent_actions) infos = { agent_id: {"score": value, "env_obs": observations[agent_id]} for agent_id, value in extras["scores"].items() } for agent_id in observations: agent_spec = self._agent_specs[agent_id] observation = observations[agent_id] reward = rewards[agent_id] info = infos[agent_id] rewards[agent_id] = agent_spec.reward_adapter(observation, reward) observations[agent_id] = agent_spec.observation_adapter(observation) infos[agent_id] = agent_spec.info_adapter(observation, reward, info) for done in dones.values(): self._dones_registered += 1 if done else 0 dones["__all__"] = self._dones_registered >= len(self._agent_specs) return observations, rewards, dones, infos def reset(self) -> Dict[str, Observation]: """Reset the environment and initialize to the next scenario. Returns: Dict[str, Observation]: Agents' observation. """ scenario = next(self._scenarios_iterator) self._dones_registered = 0 env_observations = self._smarts.reset(scenario) observations = { agent_id: self._agent_specs[agent_id].observation_adapter(obs) for agent_id, obs in env_observations.items() } return observations def render(self, mode="human"): """Does nothing.""" pass def close(self): """Closes the environment and releases all resources.""" if self._smarts is not None: self._smarts.destroy() self._smarts = None
def main(script: str, scenarios: Sequence[str], headless: bool, seed: int): logger = logging.getLogger(script) logger.setLevel(logging.INFO) agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=None), agent_builder=None, observation_adapter=None, ) smarts = SMARTS( agent_interfaces={}, traffic_sim=SumoTrafficSimulation(headless=headless, auto_start=True), envision=None if headless else Envision(), ) scenario_list = Scenario.get_scenario_list(scenarios) scenarios_iterator = Scenario.variations_for_all_scenario_roots(scenario_list, []) for scenario in scenarios_iterator: obs = smarts.reset(scenario) collected_data = {} _record_data(smarts.elapsed_sim_time, obs, collected_data) # could also include "motorcycle" or "truck" in this set if desired vehicle_types = frozenset({"car"}) # filter off-road vehicles from observations vehicles_off_road = set() while True: smarts.step({}) current_vehicles = smarts.vehicle_index.social_vehicle_ids( vehicle_types=vehicle_types ) if collected_data and not current_vehicles: print("no more vehicles. exiting...") break for veh_id in current_vehicles: try: smarts.attach_sensors_to_vehicles(agent_spec.interface, {veh_id}) except ControllerOutOfLaneException: logger.warning(f"{veh_id} out of lane, skipped attaching sensors") vehicles_off_road.add(veh_id) valid_vehicles = {v for v in current_vehicles if v not in vehicles_off_road} obs, _, _, dones = smarts.observe_from(valid_vehicles) _record_data(smarts.elapsed_sim_time, obs, collected_data) # an example of how we might save the data per car observation_folder = "collected_observations" if not os.path.exists(observation_folder): os.makedirs(observation_folder) for car, data in collected_data.items(): outfile = f"{observation_folder}/{scenario.name}_{scenario.traffic_history.name}_{car}.pkl" with open(outfile, "wb") as of: pickle.dump(data, of) smarts.destroy()
class BenchmarkServer: '''A complete remote benchmark environment that warps a SMARTS simulation. scenarios: a list of directories of the scenarios that will be run agent_specs: a dict of agentspecs that will run in the environment headless: true|false envision disabled visdom: true|false visdom integration timestep_sec: the step length for all components of the simulation seed: the seed for random number generation num_external_sumo_clients: the number of SUMO clients beyond SMARTS sumo_headless: true|false for SUMO visualization disabled [sumo-gui|sumo] sumo_port: used to specify a specific sumo port sumo_auto_start: true|false sumo will start automatically envision_endpoint: used to specify envision's uri envision_record_data_replay_path: used to specify envision's data replay output directory zoo_addrs: List of (ip, port) tuples of Zoo Workers, used to instantiate remote social agents auth_key: Authentication key of type string for communication with Zoo Workers ''' def __init__(self, scenarios: Sequence[str], agent_specs: Dict, shuffle_scenarios=True, headless=False, visdom=False, timestep_sec=0.1, seed=42, num_external_sumo_clients=0, sumo_headless=True, sumo_port=None, sumo_auto_start=True, endless_traffic=True, envision_endpoint=None, envision_record_data_replay_path=None, zoo_addrs=None): self._metircs = Metric(1) self.has_connection = False self._log = logging.getLogger(self.__class__.__name__) smarts.core.seed(seed) # Set seed for np and random module. self._agent_specs = agent_specs self._dones_registered = 0 # Setup ego. self._ego = agent_specs[EGO_ID].build_agent() # Setup sceanrios for benchmark. self._scenarios_iterator = Scenario.scenario_variations( scenarios, list(agent_specs.keys()), shuffle_scenarios, ) # Setup envision and visdom. envision_client = None if not headless: envision_client = Envision( endpoint=envision_endpoint, output_dir=envision_record_data_replay_path) visdom_client = None if visdom: visdom_client = VisdomClient() # Setup SMARTS agent_interfaces = { agent_id: agent.interface for agent_id, agent in agent_specs.items() } self._smarts = SMARTS( agent_interfaces=agent_interfaces, traffic_sim=SumoTrafficSimulation( headless=sumo_headless, time_resolution=timestep_sec, num_external_sumo_clients=num_external_sumo_clients, sumo_port=sumo_port, auto_start=sumo_auto_start, endless_traffic=endless_traffic, ), envision=envision_client, visdom=visdom_client, timestep_sec=timestep_sec, zoo_addrs=zoo_addrs) @property def scenario_log(self): """Simulation step logs. Returns: A dictionary with the following: timestep_sec: The timestep of the simulation. scenario_map: The name of the current scenario. scenario_routes: The routes in the map. mission_hash: The hash identifier for the current scenario. """ scenario = self._smarts.scenario return { "timestep_sec": self._smarts.timestep_sec, "scenario_map": scenario.name, "scenario_routes": scenario.route or "", "mission_hash": str(hash(frozenset(scenario.missions.items()))), } def run_benchmark(self): '''Main processdure of benchmarking.''' proto_obs = dict() try: proto_obs = self.reset() except StopIteration: print("All specified scenarios has been tested!") return self._ego.block_for_connection(proto_obs[EGO_ID]) while True: proto_ego_act = self._ego.recv_action() proto_obs, dones = self.step({EGO_ID: proto_ego_act}) if dones["__all__"]: metric_res = self._metircs.compute() try: proto_obs = self.reset() except StopIteration: print("All sceanrios has been tested!") return dones = {"__all__": False} proto_ego_obs = proto_obs[EGO_ID] self._ego.send_observation(proto_ego_obs, self._is_reset) def step(self, agent_actions): '''Input serilized action and out serilized observation as well''' agent_actions = { agent_id: self._agent_specs[agent_id].action_adapter(action) for agent_id, action in agent_actions.items() } observations, rewards, agent_dones, infos = self._smarts.step( agent_actions) # TODO: (kls)Enable benchmark # self._metircs.log_step(observations, rewards, agent_dones, infos, 0) obs = dict() for agent_id in observations: agent_spec = self._agent_specs[agent_id] observation = observations[agent_id] obs[agent_id] = agent_spec.observation_adapter(observation) for done in agent_dones.values(): self._dones_registered += 1 if done else 0 agent_dones["__all__"] = self._dones_registered == len( self._agent_specs) self._is_reset = False return obs, agent_dones def reset(self) -> Dict: self._metircs.reset() scenario = next(self._scenarios_iterator) self._dones_registered = 0 env_observations = self._smarts.reset(scenario) obs = { agent_id: self._agent_specs[agent_id].observation_adapter(observation) for agent_id, observation in env_observations.items() } self._is_reset = True obs[EGO_ID].mov_objs[0].pose2d.pos.x = 30.0 print('++++++++++++++RESET++++++++++++') return obs def close(self): if self._smarts is not None: self._smarts.destroy()
def main( script: str, scenarios: Sequence[str], headless: bool, seed: int, vehicles_to_replace: int, episodes: int, ): assert vehicles_to_replace > 0 assert episodes > 0 logger = logging.getLogger(script) logger.setLevel(logging.INFO) logger.debug("initializing SMARTS") smarts = SMARTS( agent_interfaces={}, traffic_sim=None, envision=None if headless else Envision(), ) random_seed(seed) traffic_history_provider = smarts.get_provider_by_type( TrafficHistoryProvider) assert traffic_history_provider scenario_list = Scenario.get_scenario_list(scenarios) scenarios_iterator = Scenario.variations_for_all_scenario_roots( scenario_list, []) for scenario in scenarios_iterator: logger.debug("working on scenario {}".format(scenario.name)) veh_missions = scenario.discover_missions_of_traffic_histories() if not veh_missions: logger.warning("no vehicle missions found for scenario {}.".format( scenario.name)) continue veh_start_times = { v_id: mission.start_time for v_id, mission in veh_missions.items() } k = vehicles_to_replace if k > len(veh_missions): logger.warning( "vehicles_to_replace={} is greater than the number of vehicle missions ({})." .format(vehicles_to_replace, len(veh_missions))) k = len(veh_missions) # XXX replace with AgentSpec appropriate for IL model agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Imitation), agent_builder=ReplayCheckerAgent, agent_params=smarts.fixed_timestep_sec, ) for episode in range(episodes): logger.info(f"starting episode {episode}...") agentid_to_vehid = {} agent_interfaces = {} # Build the Agents for the to-be-hijacked vehicles # and gather their missions agents = {} dones = {} ego_missions = {} sample = {} if scenario.traffic_history.dataset_source == "Waymo": # For Waymo, we only hijack the vehicle that was autonomous in the dataset waymo_ego_id = scenario.traffic_history.ego_vehicle_id if waymo_ego_id is not None: assert ( k == 1 ), f"do not specify -k > 1 when just hijacking Waymo ego vehicle (it was {k})" veh_id = str(waymo_ego_id) sample = {veh_id} else: logger.warning( f"Waymo ego vehicle id not mentioned in the dataset. Hijacking a random vehicle." ) if not sample: # For other datasets, hijack a sample of the recorded vehicles # Pick k vehicle missions to hijack with agent # and figure out which one starts the earliest sample = scenario.traffic_history.random_overlapping_sample( veh_start_times, k) if len(sample) < k: logger.warning( f"Unable to choose {k} overlapping missions. allowing non-overlapping." ) leftover = set(veh_start_times.keys()) - sample sample.update(set(random.sample(leftover, k - len(sample)))) agent_spec.interface.max_episode_steps = max([ scenario.traffic_history.vehicle_final_exit_time(veh_id) / 0.1 for veh_id in sample ]) history_start_time = None logger.info(f"chose vehicles: {sample}") for veh_id in sample: agent_id = f"ego-agent-IL-{veh_id}" agentid_to_vehid[agent_id] = veh_id agent_interfaces[agent_id] = agent_spec.interface if (not history_start_time or veh_start_times[veh_id] < history_start_time): history_start_time = veh_start_times[veh_id] for agent_id in agent_interfaces.keys(): agent = agent_spec.build_agent() veh_id = agentid_to_vehid[agent_id] agent.load_data_for_vehicle(veh_id, scenario, history_start_time) agents[agent_id] = agent dones[agent_id] = False mission = veh_missions[veh_id] ego_missions[agent_id] = replace( mission, start_time=mission.start_time - history_start_time) # Tell the traffic history provider to start traffic # at the point when the earliest agent enters... traffic_history_provider.start_time = history_start_time # and all the other agents to offset their missions by this much too scenario.set_ego_missions(ego_missions) logger.info(f"offsetting sim_time by: {history_start_time}") # Take control of vehicles with corresponding agent_ids smarts.switch_ego_agents(agent_interfaces) # Finally start the simulation loop... logger.info(f"starting simulation loop...") observations = smarts.reset(scenario) while not all(done for done in dones.values()): actions = { agent_id: agents[agent_id].act(agent_obs) for agent_id, agent_obs in observations.items() } logger.debug("stepping @ sim_time={} for agents={}...".format( smarts.elapsed_sim_time, list(observations.keys()))) observations, rewards, dones, infos = smarts.step(actions) for agent_id in agents.keys(): if dones.get(agent_id, False): if not observations[agent_id].events.reached_goal: logger.warning( "agent_id={} exited @ sim_time={}".format( agent_id, smarts.elapsed_sim_time)) logger.warning(" ... with {}".format( observations[agent_id].events)) else: logger.info( "agent_id={} reached goal @ sim_time={}". format(agent_id, smarts.elapsed_sim_time)) logger.debug(" ... with {}".format( observations[agent_id].events)) del observations[agent_id] smarts.destroy()
class SMARTSEnv(gym.Env): metadata = {"render.modes": ["human"]} """Metadata for gym's use""" def __init__(self, all_args): self.all_args = all_args self._dones_registered = 0 self.neighbor_num = all_args.neighbor_num self.rews_mode = all_args.rews_mode self.n_agents = all_args.num_agents self.use_proximity = all_args.use_proximity self.use_discrete = all_args.use_discrete # default True self.use_centralized_V = all_args.use_centralized_V self.scenarios = [(all_args.scenario_path + all_args.scenario_name)] self.agent_ids = ["Agent %i" % i for i in range(self.n_agents)] self.obs_space_dict = self.get_obs_space_dict() self.obs_dim = self.get_obs_dim() # ! TODO: self.share_obs_dim = self.get_state_dim( ) if self.use_centralized_V else self.get_obs_dim() self.observation_space = [ gym.spaces.Box(low=-1e10, high=1e10, shape=(self.obs_dim, )) ] * self.n_agents self.share_observation_space = [ gym.spaces.Box(low=-1e10, high=1e10, shape=(self.share_obs_dim, )) ] * self.n_agents if self.use_discrete: self.act_dim = 4 self.action_space = [gym.spaces.Discrete(self.act_dim) ] * self.n_agents self.agent_type = AgentType.Vulner_with_proximity if self.use_proximity else AgentType.Vulner else: # TODO Add continous action space self.agent_type = AgentType.VulnerCon_with_proximity if self.use_proximity else AgentType.VulnerCon raise NotImplementedError self._agent_specs = { agent_id: AgentSpec( interface=AgentInterface.from_type( self.agent_type, max_episode_steps=all_args.horizon), observation_adapter=self.get_obs_adapter(), reward_adapter=self.get_rew_adapter(self.rews_mode, self.neighbor_num), action_adapter=self.get_act_adapter(), ) for agent_id in self.agent_ids } self._scenarios_iterator = Scenario.scenario_variations( self.scenarios, list(self._agent_specs.keys()), all_args.shuffle_scenarios, ) self.agent_interfaces = { agent_id: agent.interface for agent_id, agent in self._agent_specs.items() } self.envision_client = None if not all_args.headless: self.envision_client = Envision( endpoint=all_args.envision_endpoint, output_dir=all_args.envision_record_data_replay_path) self.visdom_client = None if all_args.visdom: self.visdom_client = VisdomClient() self._smarts = SMARTS( agent_interfaces=self.agent_interfaces, traffic_sim=SumoTrafficSimulation( headless=all_args.sumo_headless, time_resolution=all_args.timestep_sec, num_external_sumo_clients=all_args.num_external_sumo_clients, sumo_port=all_args.sumo_port, auto_start=all_args.sumo_auto_start, endless_traffic=all_args.endless_traffic, ), envision=self.envision_client, visdom=self.visdom_client, timestep_sec=all_args.timestep_sec, zoo_workers=all_args.zoo_workers, auth_key=all_args.auth_key, ) def seed(self, seed): self.seed = seed smarts.core.seed(seed) def get_obs_space_dict(self): obs_config = { "distance_to_center": gym.spaces.Box(low=-1e10, high=1e10, shape=(1, )), "angle_error": gym.spaces.Box(low=-np.pi, high=np.pi, shape=(1, )), "speed": gym.spaces.Box(low=-1e10, high=1e10, shape=(1, )), "steering": gym.spaces.Box(low=-1e10, high=1e10, shape=(1, )), "ego_lane_dist": gym.spaces.Box(low=-1e10, high=1e10, shape=(3, )), "ego_ttc": gym.spaces.Box(low=-1e10, high=1e10, shape=(3, )), "neighbor": gym.spaces.Box(low=-1e3, high=1e3, shape=(self.neighbor_num * 5, )), } if self.use_proximity: obs_config.update({ "proximity": gym.spaces.Box(low=-1e10, high=1e10, shape=(8, )) }) obs_space_dict = gym.spaces.Dict(obs_config) return obs_space_dict def get_obs_dim(self): dim = 0 for key in self.obs_space_dict.spaces.keys(): space = list(self.obs_space_dict[key].shape) dim += reduce(lambda x, y: x * y, space) return dim def get_obs_adapter(self): def obs_adapter(env_observation): adapter = Adapter(space=self.obs_space_dict, transform=observation_adapter( self.neighbor_num, self.use_proximity)) obs = adapter.transform(env_observation) obs_flatten = np.concatenate(list(obs.values()), axis=0) return obs_flatten return obs_adapter def get_act_adapter(self): def action_adapter(policy_action): if isinstance(policy_action, (list, tuple, np.ndarray)): action = np.argmax(policy_action) else: action = policy_action action_dict = [ "keep_lane", "slow_down", "change_lane_left", "change_lane_right" ] return action_dict[action] return action_adapter def get_rew_adapter(self, adapter_type="vanilla", neighbor_num=3): return reward_adapter(adapter_type, neighbor_num) def _reset(self): scenario = next(self._scenarios_iterator) self._dones_registered = 0 env_observations = self._smarts.reset(scenario) self.last_obs = env_observations observations = { agent_id: self._agent_specs[agent_id].observation_adapter(obs) for agent_id, obs in env_observations.items() } return observations def reset(self, choose=True): if choose: try: self.current_observations = self._reset() except: self.close() self._smarts = SMARTS( agent_interfaces=self.agent_interfaces, traffic_sim=SumoTrafficSimulation( headless=self.all_args.sumo_headless, time_resolution=self.all_args.timestep_sec, num_external_sumo_clients=self.all_args. num_external_sumo_clients, sumo_port=self.all_args.sumo_port, auto_start=self.all_args.sumo_auto_start, endless_traffic=self.all_args.endless_traffic, ), envision=self.envision_client, visdom=self.visdom_client, timestep_sec=self.all_args.timestep_sec, zoo_workers=self.all_args.zoo_workers, auth_key=self.all_args.auth_key, ) self.current_observations = self._reset() return self.get_obs() else: return [np.zeros(self.obs_dim) for agent_id in self.agent_ids] def _step(self, agent_actions): agent_actions = { agent_id: self._agent_specs[agent_id].action_adapter(action) for agent_id, action in agent_actions.items() } observations, rewards, agent_dones, extras = self._smarts.step( agent_actions) infos = { agent_id: { "scores": value } for agent_id, value in extras["scores"].items() } for agent_id in observations: agent_spec = self._agent_specs[agent_id] observation = observations[agent_id] reward = rewards[agent_id] info = infos[agent_id] if self.rews_mode == "vanilla": rewards[agent_id] = agent_spec.reward_adapter( observation, reward) elif self.rews_mode == "standard": rewards[agent_id] = agent_spec.reward_adapter( self.last_obs[agent_id], observation, reward) elif self.rews_mode == "cruising": rewards[agent_id] = agent_spec.reward_adapter( observation, reward) self.last_obs[agent_id] = observation observations[agent_id] = agent_spec.observation_adapter( observation) infos[agent_id] = agent_spec.info_adapter(observation, reward, info) for done in agent_dones.values(): self._dones_registered += 1 if done else 0 agent_dones["__all__"] = self._dones_registered == len( self._agent_specs) return observations, rewards, agent_dones, infos def step(self, action_n): if not np.all(action_n == np.ones((self.n_agents, )).astype(np.int) * (-1)): actions = dict(zip(self.agent_ids, action_n)) self.current_observations, rewards, dones, infos = self._step( actions) obs_n = [] r_n = [] d_n = [] info_n = [] for agent_id in self.agent_ids: obs_n.append( self.current_observations.get(agent_id, np.zeros(self.obs_dim))) r_n.append([rewards.get(agent_id, 0.)]) d_n.append(dones.get(agent_id, True)) info_n.append(infos.get(agent_id, {'scores': 0.})) return obs_n, r_n, d_n, info_n else: obs_n = [np.zeros(self.obs_dim) for agent_id in self.agent_ids] r_n = [[0] for agent_id in self.agent_ids] d_n = [None for agent_id in self.agent_ids] info_n = [{} for agent_id in self.agent_ids] return obs_n, r_n, d_n, info_n def get_obs(self): """ Returns all agent observations in a list """ obs_n = [] for i, agent_id in enumerate(self.agent_ids): obs_n.append( self.current_observations.get(agent_id, np.zeros(self.obs_dim))) return obs_n def get_obs_agent(self, agent_id): """ Returns observation for agent_id """ return self.get_obs()[agent_id] def get_state(self): obs_n = [] for i, agent_id in enumerate(self.agent_ids): obs_n.append( self.current_observations.get(agent_id, np.zeros(self.obs_dim))) return obs_n def get_state_dim(self): """ Returns the shape of the state""" return self.obs_dim def render(self, mode="human"): """Does nothing.""" pass def close(self): if self._smarts is not None: self._smarts.destroy()