def main(scenarios, headless, seed): agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=None), agent_builder=None, observation_adapter=None, ) smarts = SMARTS( agent_interfaces={}, traffic_sim=SumoTrafficSimulation(headless=True, auto_start=True), envision=Envision(), ) scenarios_iterator = Scenario.scenario_variations( scenarios, list([]), ) smarts.reset(next(scenarios_iterator)) for _ in range(5000): smarts.step({}) smarts.attach_sensors_to_vehicles( agent_spec, smarts.vehicle_index.social_vehicle_ids()) obs, _, _, _ = smarts.observe_from( smarts.vehicle_index.social_vehicle_ids())
def test_no_recapture_agent(smarts_two_agents: SMARTS, two_agent_capture_offset_tenth_of_second): smarts_two_agents.reset(next(two_agent_capture_offset_tenth_of_second)) for i in range(3): smarts_two_agents.step({}) assert len(smarts_two_agents.agent_manager.pending_agent_ids) == 0 assert len(smarts_two_agents.agent_manager.active_agents) == 2 assert len(smarts_two_agents.vehicle_index.agent_vehicle_ids()) == 2
def smarts(scenarios, mock_provider): smarts_ = SMARTS( agent_interfaces={}, traffic_sim=SumoTrafficSimulation(time_resolution=0.1), ) smarts_.add_provider(mock_provider) smarts_.reset(next(scenarios)) yield smarts_ smarts_.destroy()
def test_no_renderer(smarts_wo_renderer: SMARTS, scenario): assert not smarts_wo_renderer.is_rendering smarts_wo_renderer.reset(scenario) assert not smarts_wo_renderer.is_rendering for _ in range(10): smarts_wo_renderer.step({AGENT_ID: "keep_lane"}) assert not smarts_wo_renderer.is_rendering
def test_capture_vehicle(smarts: SMARTS, scenarios): vehicle_prefix = "car-flow-route-west_1_0-east_1_max" smarts.reset(next(scenarios)) vehicle_id = list(smarts.vehicle_index.agent_vehicle_ids())[0] assert vehicle_id.startswith(vehicle_prefix) assert smarts.elapsed_sim_time < 1 assert len(smarts.vehicle_index.agent_vehicle_ids()) == 1 assert smarts.vehicle_index.actor_id_from_vehicle_id( vehicle_id).startswith(AGENT_ID)
def smarts(): smarts_ = SMARTS( agent_interfaces={}, traffic_sim=mock.Mock(), ) scenario = next( Scenario.scenario_variations(["scenarios/intersections/4lane_t"], ["Agent-007"])) smarts_.reset(scenario) return smarts_
def test_optional_renderer(smarts: SMARTS, scenario): assert not smarts.is_rendering renderer = smarts.renderer assert renderer smarts._renderer = None smarts.reset(scenario) assert smarts.is_rendering smarts._renderer = None for _ in range(10): smarts.step({AGENT_ID: "keep_lane"}) assert not smarts.is_rendering
def main(scenarios, headless, seed): scenarios_iterator = Scenario.scenario_variations(scenarios, []) for _ in scenarios: scenario = next(scenarios_iterator) agent_missions = scenario.discover_missions_of_traffic_histories() for agent_id, mission in agent_missions.items(): scenario.set_ego_missions({agent_id: mission}) agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=None), agent_builder=KeepLaneAgent, ) agent = agent_spec.build_agent() smarts = SMARTS( agent_interfaces={agent_id: agent_spec.interface}, traffic_sim=SumoTrafficSimulation(headless=True, auto_start=True), envision=Envision(), ) observations = smarts.reset(scenario) dones = {agent_id: False} while not dones[agent_id]: agent_obs = observations[agent_id] agent_action = agent.act(agent_obs) observations, rewards, dones, infos = smarts.step( {agent_id: agent_action})
def main(scenarios, headless, seed): agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=None), agent_builder=None, observation_adapter=None, ) smarts = SMARTS( agent_interfaces={}, traffic_sim=SumoTrafficSimulation(headless=headless, auto_start=True), envision=None if headless else Envision(), ) scenarios_iterator = Scenario.scenario_variations( scenarios, list([]), ) smarts.reset(next(scenarios_iterator)) prev_vehicles = set() done_vehicles = set() for _ in range(5000): smarts.step({}) current_vehicles = smarts.vehicle_index.social_vehicle_ids() # We explicitly watch for which agent/vehicles left the simulation here # since we don't have a "done criteria" that detects when a vehicle's # traffic history has played itself out. done_vehicles = prev_vehicles - current_vehicles prev_vehicles = current_vehicles smarts.attach_sensors_to_vehicles(agent_spec, current_vehicles) obs, _, _, dones = smarts.observe_from(current_vehicles) # The `dones` returned above should be empty for traffic histories # where all vehicles are assumed to stay on the road and not collide. # TODO: add the following assert once the maps are accurate enough that # we don't have any agents accidentally go off-road. # assert not done for v in done_vehicles: dones[f"Agent-{v}"] = True # TODO: save observations for imitation learning smarts.destroy()
def main(scenarios: Sequence[str], headless: bool, seed: int): agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=None), agent_builder=None, observation_adapter=None, ) smarts = SMARTS( agent_interfaces={}, traffic_sim=SumoTrafficSimulation(headless=headless, auto_start=True), envision=None if headless else Envision(), ) scenarios_iterator = Scenario.scenario_variations( scenarios, list([]), ) scenario = next(scenarios_iterator) obs = smarts.reset(scenario) collected_data = {} _record_data(smarts.elapsed_sim_time, obs, collected_data) # could also include "motorcycle" or "truck" in this set if desired vehicle_types = frozenset({"car"}) while True: smarts.step({}) current_vehicles = smarts.vehicle_index.social_vehicle_ids( vehicle_types=vehicle_types) if collected_data and not current_vehicles: print("no more vehicles. exiting...") break smarts.attach_sensors_to_vehicles(agent_spec, current_vehicles) obs, _, _, dones = smarts.observe_from(current_vehicles) _record_data(smarts.elapsed_sim_time, obs, collected_data) # an example of how we might save the data per car for car, data in collected_data.items(): outfile = f"data_{scenario.name}_{scenario.traffic_history.name}_{car}.pkl" with open(outfile, "wb") as of: pickle.dump(data, of) smarts.destroy()
def main(scenarios, headless, seed): scenarios_iterator = Scenario.scenario_variations(scenarios, []) smarts = SMARTS( agent_interfaces={}, traffic_sim=None, envision=None if headless else Envision(), ) for _ in scenarios: scenario = next(scenarios_iterator) agent_missions = scenario.discover_missions_of_traffic_histories() for agent_id, mission in agent_missions.items(): agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.LanerWithSpeed, max_episode_steps=None), agent_builder=KeepLaneAgent, agent_params=scenario.traffic_history_target_speed, ) agent = agent_spec.build_agent() # Take control of vehicle with corresponding agent_id smarts.switch_ego_agent({agent_id: agent_spec.interface}) # tell the traffic history provider to start traffic # at the point when this agent enters... traffic_history_provider = smarts.get_provider_by_type( TrafficHistoryProvider) assert traffic_history_provider traffic_history_provider.start_time = mission.start_time # agent vehicle will enter right away... modified_mission = replace(mission, start_time=0.0) scenario.set_ego_missions({agent_id: modified_mission}) observations = smarts.reset(scenario) dones = {agent_id: False} while not dones.get(agent_id, True): agent_obs = observations[agent_id] agent_action = agent.act(agent_obs) observations, rewards, dones, infos = smarts.step( {agent_id: agent_action}) smarts.destroy()
def main(scenarios, headless, seed): scenarios_iterator = Scenario.scenario_variations(scenarios, []) smarts = SMARTS( agent_interfaces={}, traffic_sim=SumoTrafficSimulation(headless=True, auto_start=True), envision=Envision(), ) for _ in scenarios: scenario = next(scenarios_iterator) agent_missions = scenario.discover_missions_of_traffic_histories() for agent_id, mission in agent_missions.items(): agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=None), agent_builder=KeepLaneAgent, ) agent = agent_spec.build_agent() smarts.switch_ego_agent({agent_id: agent_spec.interface}) # required: get traffic_history_provider and set time offset traffic_history_provider = smarts.get_provider_by_type( TrafficHistoryProvider) assert traffic_history_provider traffic_history_provider.set_start_time(mission.start_time) modified_mission = replace(mission, start_time=0.0) scenario.set_ego_missions({agent_id: modified_mission}) observations = smarts.reset(scenario) dones = {agent_id: False} while not dones[agent_id]: agent_obs = observations[agent_id] agent_action = agent.act(agent_obs) observations, rewards, dones, infos = smarts.step( {agent_id: agent_action}) smarts.destroy()
def test_emit_on_default(smarts: SMARTS, empty_scenarios): smarts.reset(next(empty_scenarios)) assert smarts.elapsed_sim_time == 3 assert len(smarts.vehicle_index.agent_vehicle_ids()) == 1 assert len(smarts.vehicle_index.vehicle_ids_by_actor_id(AGENT_ID)) == 1
def main(script: str, scenarios: Sequence[str], headless: bool, seed: int): logger = logging.getLogger(script) logger.setLevel(logging.INFO) agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Laner, max_episode_steps=None), agent_builder=None, observation_adapter=None, ) smarts = SMARTS( agent_interfaces={}, traffic_sim=SumoTrafficSimulation(headless=headless, auto_start=True), envision=None if headless else Envision(), ) scenario_list = Scenario.get_scenario_list(scenarios) scenarios_iterator = Scenario.variations_for_all_scenario_roots(scenario_list, []) for scenario in scenarios_iterator: obs = smarts.reset(scenario) collected_data = {} _record_data(smarts.elapsed_sim_time, obs, collected_data) # could also include "motorcycle" or "truck" in this set if desired vehicle_types = frozenset({"car"}) # filter off-road vehicles from observations vehicles_off_road = set() while True: smarts.step({}) current_vehicles = smarts.vehicle_index.social_vehicle_ids( vehicle_types=vehicle_types ) if collected_data and not current_vehicles: print("no more vehicles. exiting...") break for veh_id in current_vehicles: try: smarts.attach_sensors_to_vehicles(agent_spec.interface, {veh_id}) except ControllerOutOfLaneException: logger.warning(f"{veh_id} out of lane, skipped attaching sensors") vehicles_off_road.add(veh_id) valid_vehicles = {v for v in current_vehicles if v not in vehicles_off_road} obs, _, _, dones = smarts.observe_from(valid_vehicles) _record_data(smarts.elapsed_sim_time, obs, collected_data) # an example of how we might save the data per car observation_folder = "collected_observations" if not os.path.exists(observation_folder): os.makedirs(observation_folder) for car, data in collected_data.items(): outfile = f"{observation_folder}/{scenario.name}_{scenario.traffic_history.name}_{car}.pkl" with open(outfile, "wb") as of: pickle.dump(data, of) smarts.destroy()
class HiWayEnv(gym.Env): """A complete gym environment that wraps a SMARTS simulation. Args: scenarios: a list of directories of the scenarios that will be run agent_specs: a list of agents that will run in the environment sim_name: a string that gives this simulation a name headless: true|false envision disabled visdom: true|false visdom integration timestep_sec: the step length for all components of the simulation seed: the seed for random number generation num_external_sumo_clients: the number of SUMO clients beyond SMARTS sumo_headless: true|false for SUMO visualization disabled [sumo-gui|sumo] sumo_port: used to specify a specific sumo port sumo_auto_start: true|false sumo will start automatically envision_endpoint: used to specify envision's uri envision_record_data_replay_path: used to specify envision's data replay output directory zoo_addrs: List of (ip, port) tuples of zoo server, used to instantiate remote social agents """ metadata = {"render.modes": ["human"]} """Metadata for gym's use""" def __init__( self, scenarios: Sequence[str], agent_specs, sim_name=None, shuffle_scenarios=True, headless=False, visdom=False, timestep_sec=0.1, seed=42, num_external_sumo_clients=0, sumo_headless=True, sumo_port=None, sumo_auto_start=True, endless_traffic=True, envision_endpoint=None, envision_record_data_replay_path=None, zoo_addrs=None, ): self._log = logging.getLogger(self.__class__.__name__) smarts.core.seed(seed) self._agent_specs = agent_specs self._dones_registered = 0 self._scenarios_iterator = Scenario.scenario_variations( scenarios, list(agent_specs.keys()), shuffle_scenarios, ) agent_interfaces = { agent_id: agent.interface for agent_id, agent in agent_specs.items() } envision_client = None if not headless: envision_client = Envision( endpoint=envision_endpoint, sim_name=sim_name, output_dir=envision_record_data_replay_path, ) visdom_client = None if visdom: visdom_client = VisdomClient() self._smarts = SMARTS( agent_interfaces=agent_interfaces, traffic_sim=SumoTrafficSimulation( headless=sumo_headless, time_resolution=timestep_sec, num_external_sumo_clients=num_external_sumo_clients, sumo_port=sumo_port, auto_start=sumo_auto_start, endless_traffic=endless_traffic, ), envision=envision_client, visdom=visdom_client, timestep_sec=timestep_sec, zoo_addrs=zoo_addrs, ) @property def scenario_log(self): """Simulation step logs. Returns: A dictionary with the following: timestep_sec: The timestep of the simulation. scenario_map: The name of the current scenario. scenario_routes: The routes in the map. mission_hash: The hash identifier for the current scenario. """ scenario = self._smarts.scenario return { "timestep_sec": self._smarts.timestep_sec, "scenario_map": scenario.name, "scenario_routes": scenario.route or "", "mission_hash": str(hash(frozenset(scenario.missions.items()))), } def step(self, agent_actions): agent_actions = { agent_id: self._agent_specs[agent_id].action_adapter(action) for agent_id, action in agent_actions.items() } observations, rewards, agent_dones, extras = self._smarts.step( agent_actions) infos = { agent_id: { "score": value, "env_obs": observations[agent_id] } for agent_id, value in extras["scores"].items() } for agent_id in observations: agent_spec = self._agent_specs[agent_id] observation = observations[agent_id] reward = rewards[agent_id] info = infos[agent_id] rewards[agent_id] = agent_spec.reward_adapter(observation, reward) observations[agent_id] = agent_spec.observation_adapter( observation) infos[agent_id] = agent_spec.info_adapter(observation, reward, info) for done in agent_dones.values(): self._dones_registered += 1 if done else 0 agent_dones["__all__"] = self._dones_registered == len( self._agent_specs) return observations, rewards, agent_dones, infos def reset(self): scenario = next(self._scenarios_iterator) self._dones_registered = 0 env_observations = self._smarts.reset(scenario) observations = { agent_id: self._agent_specs[agent_id].observation_adapter(obs) for agent_id, obs in env_observations.items() } return observations def render(self, mode="human"): """Does nothing.""" pass def close(self): if self._smarts is not None: self._smarts.destroy()
class PyMARLHiWayEnv: """This class adheres to the PyMARL MultiAgentEnv so it can be run by PyMARL. See: https://git.io/JvMb9 This environment will want a specific configuration: config: a dictionary with the environment configuration agent_specs: a dictionary of agent_ids to agents that will run in the environment (required) scenarios: a list of directories of the scenarios that will be run (required) sim_name: a string that gives this simulation a name (default None) envision_record_data_replay_path: used to specify envision's data replay output directory (default None) envision_endpoint: used to specify envision's uri (default None) headless: true|false envision disabled (default True) num_external_sumo_clients: the number of SUMO clients beyond SMARTS (default 0) seed: the seed for random number generation (default 42) sumo_auto_start: true|false sumo will start automatically (default False) sumo_headless: true|false for sumo|sumo-gui (default False) sumo_port: used to specify a specific sumo port (default None) timestep_sec: the step length for all components of the simulation (default 0.1) """ def __init__(self, config): self._config = config # XXX: These are intentionally left public at PyMARL's request self.n_agents = config.get("n_agents", 1) self.episode_limit = config.get("episode_limit", 1000) self.observation_space = config.get("observation_space", DEFAULT_OBSERVATION_SPACE) self.action_space = config.get("action_space", DEFAULT_ACTION_SPACE) self.state_space = config.get("state_space", DEFAULT_STATE_SPACE) self._agent_ids = ["Agent %i" % i for i in range(self.n_agents)] self._reward_adapter = config.get("reward_adapter", default_reward_adapter) self._observation_adapter = config.get("observation_adapter", default_obs_adapter) self._action_adapter = config.get("action_adapter", default_action_adapter) self._done_adapter = config.get("done_adapter", lambda dones: list(dones.values())) self._state_adapter = config.get("state_adapter", default_state_adapter) self._headless = config.get("headless", False) self._timestep_sec = config.get("timestep_sec", 0.01) self._observations = None self._state = None self._steps = 0 seed = self._config.get("seed", 42) smarts.core.seed(seed) self._scenarios_iterator = Scenario.scenario_variations( config["scenarios"], self._agent_ids) agent_interfaces = { agent_id: AgentInterface.from_type( config.get("agent_type", AgentType.Laner), max_episode_steps=self.episode_limit, debug=config.get("debug", False), ) for i, agent_id, in enumerate(self._agent_ids) } envision = None if not self._headless or config.get("envision_record_data_replay_path", None): envision = Envision( endpoint=config.get("envision_endpoint", None), sim_name=config.get("sim_name", None), output_dir=config.get("envision_record_data_replay_path", None), ) self._smarts = SMARTS( agent_interfaces=agent_interfaces, traffic_sim=SumoTrafficSimulation( time_resolution=self._timestep_sec), envision=envision, timestep_sec=self._timestep_sec, ) def get_obs(self): return self._observations def get_obs_agent(self, agent_id): return self._observations[agent_id] def get_obs_size(self): obs_size = 0 for obs in self.observation_space.spaces.values(): if type(obs) is Box: obs_size += np.prod(obs.shape) elif type(obs) is Discrete: obs_size += obs.n return obs_size def get_state(self): return np.concatenate(self._observations) def get_state_size(self): return self.get_obs_size() * self.n_agents def get_avail_actions(self): return [np.ones((N_ACTIONS, )) for _ in range(self.n_agents)] def get_avail_agent_actions(self, agent_id): return np.ones((N_ACTIONS, )) def get_total_actions(self): return N_ACTIONS def render(self): pass def save_replay(self): pass def step(self, agent_actions): agent_actions = { agent_id: self._action_adapter(action) for agent_id, action in zip(self._agent_ids, agent_actions) } observations, rewards, dones, extras = self._smarts.step(agent_actions) infos = { f"score_{i}": score for i, score in enumerate(extras["scores"].values()) } # Ensure observations contain the same keys as rewards assert observations.keys() == rewards.keys() self._observations = np.asarray([ np.concatenate(list(self._observation_adapter(obs).values())) for obs in observations.values() ]) rewards = [ self._reward_adapter(obs, rew) for obs, rew in zip(observations.values(), rewards.values()) ] infos["rewards_list"] = rewards self._steps += 1 infos["dones_list"] = np.array(list(dones.values())) dones = infos["dones_list"] if self._steps >= self.episode_limit: infos["episode_steps"] = self._steps dones = np.array([True]) return np.mean(rewards), dones, infos def reset(self): self._steps = 0 scenario = next(self._scenarios_iterator) observations = self._smarts.reset(scenario) self._observations = np.asarray([ np.concatenate(list(self._observation_adapter(obs).values())) for obs in observations.values() ]) return self._observations def close(self): if self._smarts is not None: self._smarts.destroy() def get_env_info(self): return { "state_shape": self.get_state_size(), "obs_shape": self.get_obs_size(), "n_actions": self.get_total_actions(), "n_agents": self.n_agents, "episode_limit": self.episode_limit, }
class HiWayEnv(gym.Env): """A generic environment for various driving tasks simulated by SMARTS.""" metadata = {"render.modes": ["human"]} """Metadata for gym's use""" def __init__( self, scenarios: Sequence[str], agent_specs: Dict[str, AgentSpec], sim_name: Optional[str] = None, shuffle_scenarios: bool = True, headless: bool = True, visdom: bool = False, fixed_timestep_sec: Optional[float] = None, seed: int = 42, num_external_sumo_clients: int = 0, sumo_headless: bool = True, sumo_port: Optional[str] = None, sumo_auto_start: bool = True, endless_traffic: bool = True, envision_endpoint: Optional[str] = None, envision_record_data_replay_path: Optional[str] = None, zoo_addrs: Optional[str] = None, timestep_sec: Optional[ float ] = None, # for backwards compatibility (deprecated) ): """ Args: scenarios (Sequence[str]): A list of scenario directories that will be simulated. agent_specs (Dict[str, AgentSpec]): Specification of the agents that will run in the environment. sim_name (Optional[str], optional): Simulation name. Defaults to None. shuffle_scenarios (bool, optional): If true, order of scenarios will be randomized, else it will be maintained. Defaults to True. headless (bool, optional): If True, disables visualization in Envision. Defaults to False. visdom (bool, optional): If True, enables visualization of observed RGB images in Visdom. Defaults to False. fixed_timestep_sec (Optional[float], optional): Step duration for all components of the simulation. May be None if time deltas are externally-driven. Defaults to None. seed (int, optional): Random number generator seed. Defaults to 42. num_external_sumo_clients (int, optional): Number of SUMO clients beyond SMARTS. Defaults to 0. sumo_headless (bool, optional): If True, disables visualization in SUMO GUI. Defaults to True. sumo_port (Optional[str], optional): SUMO port. Defaults to None. sumo_auto_start (bool, optional): Automatic starting of SUMO. Defaults to True. endless_traffic (bool, optional): SUMO's endless traffic setting. Defaults to True. envision_endpoint (Optional[str], optional): Envision's uri. Defaults to None. envision_record_data_replay_path (Optional[str], optional): Envision's data replay output directory. Defaults to None. zoo_addrs (Optional[str], optional): List of (ip, port) tuples of zoo server, used to instantiate remote social agents. Defaults to None. timestep_sec (Optional[float], optional): [description]. Defaults to None. """ self._log = logging.getLogger(self.__class__.__name__) self.seed(seed) if timestep_sec and not fixed_timestep_sec: warnings.warn( "timestep_sec has been deprecated in favor of fixed_timestep_sec. Please update your code.", category=DeprecationWarning, ) if not fixed_timestep_sec: fixed_timestep_sec = timestep_sec or 0.1 self._agent_specs = agent_specs self._dones_registered = 0 self._scenarios_iterator = Scenario.scenario_variations( scenarios, list(agent_specs.keys()), shuffle_scenarios, ) agent_interfaces = { agent_id: agent.interface for agent_id, agent in agent_specs.items() } envision_client = None if not headless or envision_record_data_replay_path: envision_client = Envision( endpoint=envision_endpoint, sim_name=sim_name, output_dir=envision_record_data_replay_path, headless=headless, ) visdom_client = None if visdom: visdom_client = VisdomClient() all_sumo = Scenario.supports_traffic_simulation(scenarios) traffic_sim = None if not all_sumo: # We currently only support the Native SUMO Traffic Provider and Social Agents for SUMO maps if zoo_addrs: warnings.warn("`zoo_addrs` can only be used with SUMO scenarios") zoo_addrs = None warnings.warn( "We currently only support the Native SUMO Traffic Provider and Social Agents for SUMO maps." "All scenarios passed need to be of SUMO, to enable SUMO Traffic Simulation and Social Agents." ) pass else: from smarts.core.sumo_traffic_simulation import SumoTrafficSimulation traffic_sim = SumoTrafficSimulation( headless=sumo_headless, time_resolution=fixed_timestep_sec, num_external_sumo_clients=num_external_sumo_clients, sumo_port=sumo_port, auto_start=sumo_auto_start, endless_traffic=endless_traffic, ) zoo_addrs = zoo_addrs self._smarts = SMARTS( agent_interfaces=agent_interfaces, traffic_sim=traffic_sim, envision=envision_client, visdom=visdom_client, fixed_timestep_sec=fixed_timestep_sec, zoo_addrs=zoo_addrs, ) @property def agent_specs(self) -> Dict[str, AgentSpec]: """Agents' specifications used in this simulation. Returns: (Dict[str, AgentSpec]): Agents' specifications. """ return self._agent_specs @property def scenario_log(self) -> Dict[str, Union[float, str]]: """Simulation steps log. Returns: Dict[str, Union[float,str]]: A dictionary with the following keys. fixed_timestep_sec - Simulation timestep. scenario_map - Name of the current scenario. scenario_routes - Routes in the map. mission_hash - Hash identifier for the current scenario. """ scenario = self._smarts.scenario return { "fixed_timestep_sec": self._smarts.fixed_timestep_sec, "scenario_map": scenario.name, "scenario_routes": scenario.route or "", "mission_hash": str(hash(frozenset(scenario.missions.items()))), } def seed(self, seed: int) -> int: """Sets random number generator seed number. Args: seed (int): Seed number. Returns: int: Seed number. """ smarts_seed(seed) return seed def step( self, agent_actions ) -> Tuple[ Dict[str, Observation], Dict[str, float], Dict[str, bool], Dict[str, Any] ]: """Steps the environment. Args: agent_actions (Dict[str, Any]): Action taken for each agent. Returns: Tuple[ Dict[str, Observation], Dict[str, float], Dict[str, bool], Dict[str, Any] ]: Observations, rewards, dones, and infos for active agents. """ agent_actions = { agent_id: self._agent_specs[agent_id].action_adapter(action) for agent_id, action in agent_actions.items() } assert isinstance(agent_actions, dict) and all( isinstance(key, str) for key in agent_actions.keys() ), "Expected Dict[str, any]" observations, rewards, dones, extras = None, None, None, None with timeit("SMARTS Simulation/Scenario Step", self._log): observations, rewards, dones, extras = self._smarts.step(agent_actions) infos = { agent_id: {"score": value, "env_obs": observations[agent_id]} for agent_id, value in extras["scores"].items() } for agent_id in observations: agent_spec = self._agent_specs[agent_id] observation = observations[agent_id] reward = rewards[agent_id] info = infos[agent_id] rewards[agent_id] = agent_spec.reward_adapter(observation, reward) observations[agent_id] = agent_spec.observation_adapter(observation) infos[agent_id] = agent_spec.info_adapter(observation, reward, info) for done in dones.values(): self._dones_registered += 1 if done else 0 dones["__all__"] = self._dones_registered >= len(self._agent_specs) return observations, rewards, dones, infos def reset(self) -> Dict[str, Observation]: """Reset the environment and initialize to the next scenario. Returns: Dict[str, Observation]: Agents' observation. """ scenario = next(self._scenarios_iterator) self._dones_registered = 0 env_observations = self._smarts.reset(scenario) observations = { agent_id: self._agent_specs[agent_id].observation_adapter(obs) for agent_id, obs in env_observations.items() } return observations def render(self, mode="human"): """Does nothing.""" pass def close(self): """Closes the environment and releases all resources.""" if self._smarts is not None: self._smarts.destroy() self._smarts = None
def main( script: str, scenarios: Sequence[str], headless: bool, envision_record_data_replay_path: str, seed: int, vehicles_to_replace_randomly: int, min_timestep_count: int, positional_radius: int, episodes: int, ): assert episodes > 0 logger = logging.getLogger(script) logger.setLevel(logging.INFO) logger.debug("initializing SMARTS") envision_client = None if not headless or envision_record_data_replay_path: envision_client = Envision(output_dir=envision_record_data_replay_path) smarts = SMARTS( agent_interfaces={}, traffic_sim=None, envision=envision_client, ) random_seed(seed) scenarios_iterator = Scenario.scenario_variations(scenarios, []) scenario = next(scenarios_iterator) for episode in range(episodes): logger.info(f"starting episode {episode}...") def should_trigger(ctx: Dict[str, Any]) -> bool: return ctx["elapsed_sim_time"] > 2 def on_trigger(ctx: Dict[str, Any]): # Define agent specs to be assigned agent_spec = AgentSpec( interface=AgentInterface(waypoints=True, action=ActionSpaceType.Lane), agent_builder=BasicAgent, ) # Select a random sample from candidates k = ctx.get("vehicles_to_replace_randomly", 0) if k <= 0: logger.warning( "default (0) or negative value specified for replacement. Replacing all valid vehicle candidates." ) sample = ctx["vehicle_candidates"] else: logger.info( f"Choosing {k} vehicles randomly from {len(ctx['vehicle_candidates'])} valid vehicle candidates." ) sample = random.sample(ctx["vehicle_candidates"], k) assert len(sample) != 0 for veh_id in sample: # Map selected vehicles to agent ids & specs agent_id = f"agent-{veh_id}" ctx["agents"][agent_id] = agent_spec.build_agent() # Create missions based on current state and traffic history positional, traverse = scenario.create_dynamic_traffic_history_mission( veh_id, ctx["elapsed_sim_time"], ctx["positional_radius"] ) # Take control of vehicles immediately try: # Try to assign a PositionalGoal at the last recorded timestep smarts.add_agent_and_switch_control( veh_id, agent_id, agent_spec.interface, positional ) except PlanningError: logger.warning( f"Unable to create PositionalGoal for vehicle {veh_id}, falling back to TraverseGoal" ) smarts.add_agent_and_switch_control( veh_id, agent_id, agent_spec.interface, traverse ) # Create a table of vehicle trajectory lengths, filtering out non-moving vehicles vehicle_candidates = [] for v_id in (str(id) for id in scenario.traffic_history.all_vehicle_ids()): traj = list(scenario.traffic_history.vehicle_trajectory(v_id)) # Find moving vehicles with more than the minimum number of timesteps if [row for row in traj if row.speed != 0] and len( traj ) >= min_timestep_count: vehicle_candidates.append(v_id) assert len(vehicle_candidates) > 0 k = vehicles_to_replace_randomly if k > len(vehicle_candidates): logger.warning( f"vehicles_to_replace_randomly={k} is greater than the number of vehicle candidates ({len(vehicle_candidates)})." ) k = len(vehicle_candidates) # Initialize trigger and define initial context context = { "agents": {}, "elapsed_sim_time": 0.0, "vehicle_candidates": vehicle_candidates, "vehicles_to_replace_randomly": k, "positional_radius": positional_radius, } trigger = Trigger(should_trigger, on_trigger) dones = {} observations = smarts.reset(scenario) while not dones or not all(dones.values()): # Update context context["elapsed_sim_time"] = smarts.elapsed_sim_time # Step trigger to further update context trigger.update(context) # Get agents from current context agents = context["agents"] # Step simulation actions = { agent_id: agents[agent_id].act(agent_obs) for agent_id, agent_obs in observations.items() } logger.debug( f"stepping @ sim_time={smarts.elapsed_sim_time} for agents={list(observations.keys())}..." ) observations, rewards, dones, infos = smarts.step(actions) for agent_id in agents.keys(): if dones.get(agent_id, False): if not observations[agent_id].events.reached_goal: logger.warning( f"agent_id={agent_id} exited @ sim_time={smarts.elapsed_sim_time}" ) logger.warning(f" ... with {observations[agent_id].events}") else: logger.info( f"agent_id={agent_id} reached goal @ sim_time={smarts.elapsed_sim_time}" ) logger.debug(f" ... with {observations[agent_id].events}") del observations[agent_id] smarts.destroy()
class BenchmarkServer: '''A complete remote benchmark environment that warps a SMARTS simulation. scenarios: a list of directories of the scenarios that will be run agent_specs: a dict of agentspecs that will run in the environment headless: true|false envision disabled visdom: true|false visdom integration timestep_sec: the step length for all components of the simulation seed: the seed for random number generation num_external_sumo_clients: the number of SUMO clients beyond SMARTS sumo_headless: true|false for SUMO visualization disabled [sumo-gui|sumo] sumo_port: used to specify a specific sumo port sumo_auto_start: true|false sumo will start automatically envision_endpoint: used to specify envision's uri envision_record_data_replay_path: used to specify envision's data replay output directory zoo_addrs: List of (ip, port) tuples of Zoo Workers, used to instantiate remote social agents auth_key: Authentication key of type string for communication with Zoo Workers ''' def __init__(self, scenarios: Sequence[str], agent_specs: Dict, shuffle_scenarios=True, headless=False, visdom=False, timestep_sec=0.1, seed=42, num_external_sumo_clients=0, sumo_headless=True, sumo_port=None, sumo_auto_start=True, endless_traffic=True, envision_endpoint=None, envision_record_data_replay_path=None, zoo_addrs=None): self._metircs = Metric(1) self.has_connection = False self._log = logging.getLogger(self.__class__.__name__) smarts.core.seed(seed) # Set seed for np and random module. self._agent_specs = agent_specs self._dones_registered = 0 # Setup ego. self._ego = agent_specs[EGO_ID].build_agent() # Setup sceanrios for benchmark. self._scenarios_iterator = Scenario.scenario_variations( scenarios, list(agent_specs.keys()), shuffle_scenarios, ) # Setup envision and visdom. envision_client = None if not headless: envision_client = Envision( endpoint=envision_endpoint, output_dir=envision_record_data_replay_path) visdom_client = None if visdom: visdom_client = VisdomClient() # Setup SMARTS agent_interfaces = { agent_id: agent.interface for agent_id, agent in agent_specs.items() } self._smarts = SMARTS( agent_interfaces=agent_interfaces, traffic_sim=SumoTrafficSimulation( headless=sumo_headless, time_resolution=timestep_sec, num_external_sumo_clients=num_external_sumo_clients, sumo_port=sumo_port, auto_start=sumo_auto_start, endless_traffic=endless_traffic, ), envision=envision_client, visdom=visdom_client, timestep_sec=timestep_sec, zoo_addrs=zoo_addrs) @property def scenario_log(self): """Simulation step logs. Returns: A dictionary with the following: timestep_sec: The timestep of the simulation. scenario_map: The name of the current scenario. scenario_routes: The routes in the map. mission_hash: The hash identifier for the current scenario. """ scenario = self._smarts.scenario return { "timestep_sec": self._smarts.timestep_sec, "scenario_map": scenario.name, "scenario_routes": scenario.route or "", "mission_hash": str(hash(frozenset(scenario.missions.items()))), } def run_benchmark(self): '''Main processdure of benchmarking.''' proto_obs = dict() try: proto_obs = self.reset() except StopIteration: print("All specified scenarios has been tested!") return self._ego.block_for_connection(proto_obs[EGO_ID]) while True: proto_ego_act = self._ego.recv_action() proto_obs, dones = self.step({EGO_ID: proto_ego_act}) if dones["__all__"]: metric_res = self._metircs.compute() try: proto_obs = self.reset() except StopIteration: print("All sceanrios has been tested!") return dones = {"__all__": False} proto_ego_obs = proto_obs[EGO_ID] self._ego.send_observation(proto_ego_obs, self._is_reset) def step(self, agent_actions): '''Input serilized action and out serilized observation as well''' agent_actions = { agent_id: self._agent_specs[agent_id].action_adapter(action) for agent_id, action in agent_actions.items() } observations, rewards, agent_dones, infos = self._smarts.step( agent_actions) # TODO: (kls)Enable benchmark # self._metircs.log_step(observations, rewards, agent_dones, infos, 0) obs = dict() for agent_id in observations: agent_spec = self._agent_specs[agent_id] observation = observations[agent_id] obs[agent_id] = agent_spec.observation_adapter(observation) for done in agent_dones.values(): self._dones_registered += 1 if done else 0 agent_dones["__all__"] = self._dones_registered == len( self._agent_specs) self._is_reset = False return obs, agent_dones def reset(self) -> Dict: self._metircs.reset() scenario = next(self._scenarios_iterator) self._dones_registered = 0 env_observations = self._smarts.reset(scenario) obs = { agent_id: self._agent_specs[agent_id].observation_adapter(observation) for agent_id, observation in env_observations.items() } self._is_reset = True obs[EGO_ID].mov_objs[0].pose2d.pos.x = 30.0 print('++++++++++++++RESET++++++++++++') return obs def close(self): if self._smarts is not None: self._smarts.destroy()
class ROSDriver: """Wraps SMARTS as a ROS (v1) node. See the README.md in `examples/ros` for instructions on how to setup and run.""" def __init__(self): self._state_lock = Lock() self._reset_lock = Lock() self._smarts = None self._reset() def _reset(self): if self._smarts: self._smarts.destroy() self._smarts = None self._target_freq = None self._state_topic = None self._state_publisher = None self._agents_publisher = None self._most_recent_state_sent = None self._warned_about_freq = False with self._state_lock: self._recent_state = deque(maxlen=3) with self._reset_lock: self._reset_msg = None self._scenario_path = None self._agents = {} self._agents_to_add = {} def setup_ros( self, node_name: str = "SMARTS", def_namespace: str = "SMARTS/", buffer_size: int = 3, target_freq: float = None, time_ratio: float = 1.0, pub_queue_size: int = 10, ): """Set up the SMARTS ros node.""" assert not self._state_publisher # enforce only one SMARTS instance per ROS network... # NOTE: The node name specified here may be overridden by ROS # remapping arguments from the command line invocation. rospy.init_node(node_name, anonymous=False) # If the namespace is aready set in the environment, we use it, # otherwise we use our default. namespace = def_namespace if not os.environ.get( "ROS_NAMESPACE") else "" self._service_name = f"{namespace}{node_name}_info" self._state_publisher = rospy.Publisher(f"{namespace}entities_out", EntitiesStamped, queue_size=pub_queue_size) self._agents_publisher = rospy.Publisher(f"{namespace}agents_out", AgentsStamped, queue_size=pub_queue_size) rospy.Subscriber(f"{namespace}reset", SmartsReset, self._smarts_reset_callback) self._state_topic = f"{namespace}entities_in" rospy.Subscriber(self._state_topic, EntitiesStamped, self._entities_callback) rospy.Subscriber(f"{namespace}agent_spec", AgentSpec, self._agent_spec_callback) buffer_size = rospy.get_param("~buffer_size", buffer_size) if buffer_size and buffer_size != self._recent_state.maxlen: assert buffer_size > 0 self._recent_state = deque(maxlen=buffer_size) # If target_freq is not specified, SMARTS is allowed to # run as quickly as it can with no delay between steps. target_freq = rospy.get_param("~target_freq", target_freq) if target_freq: assert target_freq > 0.0 self._target_freq = target_freq log_everything_to_ROS(level=logging.WARNING) def setup_smarts(self, headless: bool = True, seed: int = 42, time_ratio: float = 1.0): """Do the setup of the underlying SMARTS instance.""" assert not self._smarts if not self._state_publisher: raise RuntimeError("must call setup_ros() first.") self._zoo_module = rospy.get_param("~zoo_module", "zoo") headless = rospy.get_param("~headless", headless) seed = rospy.get_param("~seed", seed) time_ratio = rospy.get_param("~time_ratio", time_ratio) assert time_ratio > 0.0 self._time_ratio = time_ratio self._smarts = SMARTS( agent_interfaces={}, traffic_sim=None, fixed_timestep_sec=None, envision=None if headless else Envision(), external_provider=True, ) assert self._smarts.external_provider self._last_step_time = None with self._reset_lock: self._reset_msg = None self._scenario_path = None self._agents = {} self._agents_to_add = {} def _smarts_reset_callback(self, reset_msg: SmartsReset): with self._reset_lock: self._reset_msg = reset_msg self._agents = {} self._agents_to_add = {} for ros_agent_spec in reset_msg.initial_agents: self._agent_spec_callback(ros_agent_spec) def _get_smarts_info(self, req: SmartsInfoRequest) -> SmartsInfoResponse: resp = SmartsInfoResponse() resp.header.stamp = rospy.Time.now() if not self._smarts: rospy.logwarn("get_smarts_info() called before SMARTS set up.") return resp resp.version = self._smarts.version resp.step_count = self._smarts.step_count resp.elapsed_sim_time = self._smarts.elapsed_sim_time if self._smarts.scenario: resp.current_scenario_path = self._smarts.scenario.root_filepath return resp def _entities_callback(self, entities: EntitiesStamped): # note: push/pop is thread safe on a deque but # in our smoothing we are accessing all elements # so we still need to protect it. with self._state_lock: self._recent_state.append(entities) @staticmethod def _decode_entity_type(entity_type: int) -> str: if entity_type == EntityState.ENTITY_TYPE_CAR: return "passenger" if entity_type == EntityState.ENTITY_TYPE_TRUCK: return "truck" if entity_type == EntityState.ENTITY_TYPE_TRAILER: return "trailer" if entity_type == EntityState.ENTITY_TYPE_BUS: return "bus" if entity_type == EntityState.ENTITY_TYPE_COACH: return "coach" if entity_type == EntityState.ENTITY_TYPE_PEDESTRIAN: return "pedestrian" if entity_type == EntityState.ENTITY_TYPE_MOTORCYCLE: return "motorcycle" if entity_type == EntityState.ENTITY_TYPE_UNSPECIFIED: return "passenger" rospy.logwarn( f"unsupported entity_type {entity_type}. defaulting to passenger car." ) return "passenger" @staticmethod def _encode_entity_type(entity_type: str) -> int: if entity_type in ["passenger", "car"]: return EntityState.ENTITY_TYPE_CAR if entity_type == "truck": return EntityState.ENTITY_TYPE_TRUCK if entity_type == "trailer": return EntityState.ENTITY_TYPE_TRAILER if entity_type == "bus": return EntityState.ENTITY_TYPE_BUS if entity_type == "coach": return EntityState.ENTITY_TYPE_COACH if entity_type == "pedestrian": return EntityState.ENTITY_TYPE_PEDESTRIAN if entity_type == "motorcycle": return EntityState.ENTITY_TYPE_MOTORCYCLE if entity_type is None: return EntityState.ENTITY_TYPE_UNSPECIFIED rospy.logwarn( f"unsupported entity_type {entity_type}. defaulting to 'car'.") return EntityState.ENTITY_TYPE_CAR @staticmethod def _decode_vehicle_type(vehicle_type: int) -> str: if vehicle_type == AgentSpec.VEHICLE_TYPE_CAR: return "passenger" if vehicle_type == AgentSpec.VEHICLE_TYPE_TRUCK: return "truck" if vehicle_type == AgentSpec.VEHICLE_TYPE_TRAILER: return "trailer" if vehicle_type == AgentSpec.VEHICLE_TYPE_BUS: return "bus" if vehicle_type == AgentSpec.VEHICLE_TYPE_COACH: return "coach" if vehicle_type == AgentSpec.VEHICLE_TYPE_PEDESTRIAN: return "pedestrian" if vehicle_type == AgentSpec.VEHICLE_TYPE_MOTORCYCLE: return "motorcycle" if vehicle_type == AgentSpec.VEHICLE_TYPE_UNSPECIFIED: return "passenger" rospy.logwarn( f"unsupported vehicle_type {vehicle_type}. defaulting to passenger car." ) return "passenger" @staticmethod def _pose_from_ros(ros_pose) -> Pose: return Pose( position=(ros_pose.position.x, ros_pose.position.y, ros_pose.position.z), orientation=( ros_pose.orientation.x, ros_pose.orientation.y, ros_pose.orientation.z, ros_pose.orientation.w, ), ) def _agent_spec_callback(self, ros_agent_spec: AgentSpec): assert (len(ros_agent_spec.tasks) == 1 ), "more than 1 task per agent is not yet supported" task = ros_agent_spec.tasks[0] task_params = json.loads(task.params_json) if task.params_json else {} task_version = task.task_ver or "latest" agent_locator = f"{self._zoo_module}:{task.task_ref}-{task_version}" agent_spec = None try: agent_spec = registry.make(agent_locator, **task_params) except ImportError as ie: rospy.logerr( f"Unable to locate agent with locator={agent_locator}: {ie}") if not agent_spec: rospy.logwarn( f"got unknown task_ref '{task.task_ref}' in AgentSpec message with params='{task.param_json}'. ignoring." ) return if (ros_agent_spec.end_pose.position.x != 0.0 or ros_agent_spec.end_pose.position.y != 0.0): goal = PositionalGoal( ( ros_agent_spec.end_pose.position.x, ros_agent_spec.end_pose.position.y, ), ros_agent_spec.veh_length, ) else: goal = EndlessGoal() mission = Mission( start=Start.from_pose( ROSDriver._pose_from_ros(ros_agent_spec.start_pose)), goal=goal, # TODO: how to prevent them from spawning on top of another existing vehicle? (see how it's done in SUMO traffic) entry_tactic=default_entry_tactic(ros_agent_spec.start_speed), vehicle_spec=VehicleSpec( veh_id=f"veh_for_agent_{ros_agent_spec.agent_id}", veh_config_type=ROSDriver._decode_vehicle_type( ros_agent_spec.veh_type), dimensions=Dimensions( ros_agent_spec.veh_length, ros_agent_spec.veh_width, ros_agent_spec.veh_height, ), ), ) with self._reset_lock: if (ros_agent_spec.agent_id in self._agents or ros_agent_spec.agent_id in self._agents_to_add): rospy.logwarn( f"trying to add new agent with existing agent_id '{ros_agent_spec.agent_id}'. ignoring." ) return self._agents_to_add[ros_agent_spec.agent_id] = (agent_spec, mission) @staticmethod def _xyz_to_vect(xyz) -> np.ndarray: return np.array((xyz.x, xyz.y, xyz.z)) @staticmethod def _xyzw_to_vect(xyzw) -> np.ndarray: return np.array((xyzw.x, xyzw.y, xyzw.z, xyzw.w)) @staticmethod def _entity_to_vs(entity: EntityState) -> VehicleState: veh_id = entity.entity_id veh_type = ROSDriver._decode_entity_type(entity.entity_type) veh_dims = Dimensions(entity.length, entity.width, entity.height) vs = VehicleState( source="EXTERNAL", vehicle_id=veh_id, vehicle_config_type=veh_type, pose=Pose( ROSDriver._xyz_to_vect(entity.pose.position), ROSDriver._xyzw_to_vect(entity.pose.orientation), ), dimensions=veh_dims, linear_velocity=ROSDriver._xyz_to_vect(entity.velocity.linear), angular_velocity=ROSDriver._xyz_to_vect(entity.velocity.angular), linear_acceleration=ROSDriver._xyz_to_vect( entity.acceleration.linear), angular_acceleration=ROSDriver._xyz_to_vect( entity.acceleration.angular), ) vs.set_privileged() vs.speed = np.linalg.norm(vs.linear_velocity) return vs class _VehicleStateVector: def __init__(self, vs: VehicleState, stamp: float): self.vs = vs self.vector = np.concatenate(( np.array((stamp, )), vs.pose.position, np.array((vs.pose.heading, )), np.array(vs.dimensions.as_lwh), vs.linear_velocity, vs.angular_velocity, vs.linear_acceleration, vs.angular_acceleration, )) @property def stamp(self): """The estimated timestamp of this vehicle state.""" return self.vector[0] def average_with(self, other_vect: np.ndarray): """Update this vehicle state with the average between this state and the given new state. """ self.vector += other_vect self.vector /= 2 self.update_vehicle_state() def update_vehicle_state(self): """Update this vehicle state.""" assert len(self.vector) == 20 self.vs.pose = Pose.from_center( self.vector[1:4], Heading(self.vector[4] % (2 * math.pi))) self.vs.dimensions = Dimensions(*self.vector[5:8]) self.linear_velocity = self.vector[8:11] self.angular_velocity = self.vector[11:14] self.linear_acceleration = self.vector[14:17] self.angular_acceleration = self.vector[17:] self.vs.speed = np.linalg.norm(self.linear_velocity) @staticmethod def _moving_average( vehicle_id: str, states: Sequence[EntitiesStamped] ) -> Tuple[VehicleState, float, float, float]: prev_states = [] for s in range(len(states)): prev_state = states[-1 - s] for entity in prev_state.entities: if entity.entity_id == vehicle_id: vs = ROSDriver._entity_to_vs(entity) stamp = prev_state.header.stamp.to_sec() prev_states.append(ROSDriver._VehicleStateVector( vs, stamp)) assert prev_states if len(prev_states) == 1: return (prev_states[0].vs, prev_states[0].stamp, 0, 0) prev_states[0].average_with(prev_states[1].vector) if len(prev_states) == 2: return (prev_states[0].vs, prev_states[0].stamp, 0, 0) prev_states[1].average_with(prev_states[2].vector) dt = prev_states[0].stamp - prev_states[1].stamp lin_acc_slope = (prev_states[0].vs.linear_acceleration - prev_states[1].vs.linear_acceleration) / dt ang_acc_slope = (prev_states[0].vs.angular_acceleration - prev_states[1].vs.angular_acceleration) / dt return (prev_states[0].vs, prev_states[0].stamp, lin_acc_slope, ang_acc_slope) @staticmethod def _extrapolate_to_now(vs: VehicleState, staleness: float, lin_acc_slope: float, ang_acc_slope: float): """Here we just linearly extrapolate the acceleration to "now" from the previous state for each vehicle and then use standard kinematics to project the velocity and position from that.""" # The following ~10 lines are a hack b/c I'm too stupid to figure out # how to do calculus on quaternions... heading = vs.pose.heading heading_delta_vec = staleness * ( vs.angular_velocity + 0.5 * vs.angular_acceleration * staleness + ang_acc_slope * staleness**2 / 6.0) heading += vec_to_radians(heading_delta_vec[:2]) + (0.5 * math.pi) heading %= 2 * math.pi vs.pose.orientation = fast_quaternion_from_angle(heading) # XXX: also need to remove the cached heading_ since we've changed orientation vs.pose.heading_ = None # I assume the following should be updated based on changing # heading from above, but I'll leave that for now... vs.pose.position += staleness * ( vs.linear_velocity + 0.5 * vs.linear_acceleration * staleness + lin_acc_slope * staleness**2 / 6.0) vs.linear_velocity += staleness * (vs.linear_acceleration + 0.5 * lin_acc_slope * staleness) vs.speed = np.linalg.norm(vs.linear_velocity) vs.angular_velocity += staleness * (vs.angular_acceleration + 0.5 * ang_acc_slope * staleness) vs.linear_acceleration += staleness * lin_acc_slope vs.angular_acceleration += staleness * ang_acc_slope def _update_smarts_state(self) -> float: with self._state_lock: if (not self._recent_state or self._most_recent_state_sent == self._recent_state[-1]): rospy.logdebug( f"No messages received on topic {self._state_topic} yet to send to SMARTS." ) states = None else: states = [s for s in self._recent_state] rosnow = rospy.get_rostime() if self._last_step_time: step_delta = (rosnow - self._last_step_time).to_sec() else: step_delta = None self._last_step_time = rosnow if not states: return step_delta # Note: when the source of these states is a co-simulator # running on another machine across the network, for accurate # extrapolation and staleness-related computations, it is # a good idea to either use an external time server or a # ROS /clock node (in which case the /use_sim_time parameter # shoule be set to True). entities = [] most_recent_state = states[-1] for entity in most_recent_state.entities: vs, stamp, lin_acc_slope, ang_acc_slope = ROSDriver._moving_average( entity.entity_id, states) entity_staleness = rosnow.to_sec() - stamp if entity_staleness > 0: ROSDriver._extrapolate_to_now(vs, entity_staleness, lin_acc_slope, ang_acc_slope) entities.append(vs) rospy.logdebug( f"sending state to SMARTS w/ step_delta={step_delta}, approximate staleness={(rosnow - most_recent_state.header.stamp).to_sec()}..." ) self._smarts.external_provider.state_update(entities, step_delta) self._most_recent_state_sent = most_recent_state return step_delta @staticmethod def _vector_to_xyz(v, xyz): xyz.x, xyz.y, xyz.z = v[0], v[1], v[2] @staticmethod def _vector_to_xyzw(v, xyzw): xyzw.x, xyzw.y, xyzw.z, xyzw.w = v[0], v[1], v[2], v[3] def _publish_state(self): smarts_state = self._smarts.external_provider.all_vehicle_states entities = EntitiesStamped() entities.header.stamp = rospy.Time.now() for vehicle in smarts_state: entity = EntityState() entity.entity_id = vehicle.vehicle_id entity.entity_type = ROSDriver._encode_entity_type( vehicle.vehicle_type) entity.length = vehicle.dimensions.length entity.width = vehicle.dimensions.width entity.height = vehicle.dimensions.height ROSDriver._vector_to_xyz(vehicle.pose.position, entity.pose.position) ROSDriver._vector_to_xyzw(vehicle.pose.orientation, entity.pose.orientation) ROSDriver._vector_to_xyz(vehicle.linear_velocity, entity.velocity.linear) ROSDriver._vector_to_xyz(vehicle.angular_velocity, entity.velocity.angular) if vehicle.linear_acceleration: ROSDriver._vector_to_xyz(vehicle.linear_acceleration, entity.acceleration.linear) if vehicle.angular_acceleration: ROSDriver._vector_to_xyz(vehicle.angular_acceleration, entity.acceleration.angular) entities.entities.append(entity) self._state_publisher.publish(entities) def _publish_agents(self, observations: Dict[str, Observation], dones: Dict[str, bool]): agents = AgentsStamped() agents.header.stamp = rospy.Time.now() for agent_id, agent_obs in observations.items(): veh_state = agent_obs.ego_vehicle_state pose = Pose.from_center(veh_state.position, veh_state.heading) agent = AgentReport() agent.agent_id = agent_id ROSDriver._vector_to_xyz(pose.position, agent.pose.position) ROSDriver._vector_to_xyzw(pose.orientation, agent.pose.orientation) agent.speed = veh_state.speed agent.distance_travelled = agent_obs.distance_travelled agent.is_done = dones[agent_id] agent.reached_goal = agent_obs.events.reached_goal agent.did_collide = bool(agent_obs.events.collisions) agent.is_wrong_way = agent_obs.events.wrong_way agent.is_off_route = agent_obs.events.off_route agent.is_off_road = agent_obs.events.off_road agent.is_on_shoulder = agent_obs.events.on_shoulder agent.is_not_moving = agent_obs.events.not_moving agents.agents.append(agent) self._agents_publisher.publish(agents) def _do_agents(self, observations: Dict[str, Observation]) -> Dict[str, Any]: with self._reset_lock: actions = { agent_id: self._agents[agent_id].act(agent_obs) for agent_id, agent_obs in observations.items() } for agent_id, agent in self._agents_to_add.items(): spec, mission = agent[0], agent[1] assert agent_id not in self._agents agent = spec.build_agent() # XXX: hack! in the future this injection must be removed or done another way... agent.sim = self._smarts self._agents[agent_id] = agent self._smarts.add_agent_with_mission(agent_id, spec.interface, mission) self._agents_to_add = {} return actions def _get_map_spec(self) -> MapSpec: """SMARTS ROS nodes can extend from this ROSDriver base class and implement this method to return an alternative MapSpec object designating the map builder to use in the Scenario (returning None indicates to use the default for the current Scenario). self._scenario_path can be used to construct a MapSpec object.""" return None def _check_reset(self) -> Dict[str, Observation]: with self._reset_lock: if self._reset_msg: self._scenario_path = self._reset_msg.scenario rospy.loginfo( f"resetting SMARTS w/ scenario={self._scenario_path}") self._reset_msg = None self._last_step_time = None self._recent_state = deque(maxlen=3) self._most_recent_state_sent = None self._warned_about_freq = False map_spec = self._get_map_spec() return self._smarts.reset( Scenario(self._scenario_path, map_spec=map_spec)) return None def run_forever(self): """Publish the SMARTS ros node and run indefinitely.""" if not self._state_publisher: raise RuntimeError("must call setup_ros() first.") if not self._smarts: raise RuntimeError("must call setup_smarts() first.") rospy.Service(self._service_name, SmartsInfo, self._get_smarts_info) warned_scenario = False observations = {} step_delta = None if self._target_freq: rate = rospy.Rate(self._target_freq) rospy.loginfo(f"starting to spin") try: while not rospy.is_shutdown(): obs = self._check_reset() if not self._scenario_path: if not warned_scenario: rospy.loginfo( "waiting for scenario on reset channel...") warned_scenario = True elif self._last_step_time: rospy.loginfo("no more scenarios. exiting...") break continue if obs is not None: observations = obs try: actions = self._do_agents(observations) step_delta = self._update_smarts_state() observations, _, dones, _ = self._smarts.step( actions, step_delta) self._publish_state() self._publish_agents(observations, dones) except Exception as e: if isinstance(e, rospy.ROSInterruptException): raise e batch_mode = rospy.get_param("~batch_mode", False) if not batch_mode: raise e import traceback rospy.logerr(f"SMARTS raised exception: {e}") rospy.logerr(traceback.format_exc()) rospy.logerr("Will wait for next reset...") self._smarts = None self.setup_smarts() if self._target_freq: if rate.remaining().to_sec() <= 0.0: msg = f"SMARTS unable to maintain requested target_freq of {self._target_freq} Hz." if self._warned_about_freq: rospy.loginfo(msg) else: rospy.logwarn(msg) self._warned_about_freq = True rate.sleep() except rospy.ROSInterruptException: rospy.loginfo("ROS interrupted. exiting...") self._reset() # cleans up the SMARTS instance...
class HiWayEnv(gym.Env): """A complete gym environment that wraps a SMARTS simulation. Args: scenarios: a list of directories of the scenarios that will be run agent_specs: a list of agents that will run in the environment sim_name: a string that gives this simulation a name headless: true|false envision disabled visdom: true|false visdom integration fixed_timestep_sec: the step length for all components of the simulation (may be None if time deltas are externally-driven) seed: the seed for random number generation num_external_sumo_clients: the number of SUMO clients beyond SMARTS sumo_headless: true|false for SUMO visualization disabled [sumo-gui|sumo] sumo_port: used to specify a specific sumo port sumo_auto_start: true|false sumo will start automatically envision_endpoint: used to specify envision's uri envision_record_data_replay_path: used to specify envision's data replay output directory zoo_addrs: List of (ip, port) tuples of zoo server, used to instantiate remote social agents """ metadata = {"render.modes": ["human"]} """Metadata for gym's use""" def __init__( self, scenarios: Sequence[str], agent_specs: Dict[str, AgentSpec], sim_name=None, shuffle_scenarios=True, headless=False, visdom=False, fixed_timestep_sec=None, seed=42, num_external_sumo_clients=0, sumo_headless=True, sumo_port=None, sumo_auto_start=True, endless_traffic=True, envision_endpoint=None, envision_record_data_replay_path=None, zoo_addrs=None, timestep_sec=None, # for backwards compatibility (deprecated) ): self._log = logging.getLogger(self.__class__.__name__) self.seed(seed) if timestep_sec and not fixed_timestep_sec: warnings.warn( "timestep_sec has been deprecated in favor of fixed_timestep_sec. Please update your code.", category=DeprecationWarning, ) if not fixed_timestep_sec: fixed_timestep_sec = timestep_sec or 0.1 self._agent_specs = agent_specs self._dones_registered = 0 self._scenarios_iterator = Scenario.scenario_variations( scenarios, list(agent_specs.keys()), shuffle_scenarios, ) agent_interfaces = { agent_id: agent.interface for agent_id, agent in agent_specs.items() } envision_client = None if not headless or envision_record_data_replay_path: envision_client = Envision( endpoint=envision_endpoint, sim_name=sim_name, output_dir=envision_record_data_replay_path, headless=headless, ) visdom_client = None if visdom: visdom_client = VisdomClient() all_sumo = Scenario.supports_traffic_simulation(scenarios) traffic_sim = None if not all_sumo: # We currently only support the Native SUMO Traffic Provider and Social Agents for SUMO maps if zoo_addrs: warnings.warn( "`zoo_addrs` can only be used with SUMO scenarios") zoo_addrs = None warnings.warn( "We currently only support the Native SUMO Traffic Provider and Social Agents for SUMO maps." "All scenarios passed need to be of SUMO, to enable SUMO Traffic Simulation and Social Agents." ) pass else: from smarts.core.sumo_traffic_simulation import SumoTrafficSimulation traffic_sim = SumoTrafficSimulation( headless=sumo_headless, time_resolution=fixed_timestep_sec, num_external_sumo_clients=num_external_sumo_clients, sumo_port=sumo_port, auto_start=sumo_auto_start, endless_traffic=endless_traffic, ) zoo_addrs = zoo_addrs self._smarts = SMARTS( agent_interfaces=agent_interfaces, traffic_sim=traffic_sim, envision=envision_client, visdom=visdom_client, fixed_timestep_sec=fixed_timestep_sec, zoo_addrs=zoo_addrs, ) @property def agent_specs(self): """Agent specs currently in use for this simulation. Returns: A list of AgentSpec. """ return self._agent_specs @property def scenario_log(self): """Simulation step logs. Returns: A dictionary with the following: fixed_timestep_sec: The timestep of the simulation. scenario_map: The name of the current scenario. scenario_routes: The routes in the map. mission_hash: The hash identifier for the current scenario. """ scenario = self._smarts.scenario return { "fixed_timestep_sec": self._smarts.fixed_timestep_sec, "scenario_map": scenario.name, "scenario_routes": scenario.route or "", "mission_hash": str(hash(frozenset(scenario.missions.items()))), } def seed(self, seed: int) -> int: """Set the seed of this environment.""" smarts_seed(seed) return seed def step(self, agent_actions): """Step and return observations, rewards, dones, and infos.""" agent_actions = { agent_id: self._agent_specs[agent_id].action_adapter(action) for agent_id, action in agent_actions.items() } observations, rewards, dones, extras = None, None, None, None with timeit("SMARTS Simulation/Scenario Step", self._log): observations, rewards, dones, extras = self._smarts.step( agent_actions) infos = { agent_id: { "score": value, "env_obs": observations[agent_id] } for agent_id, value in extras["scores"].items() } for agent_id in observations: agent_spec = self._agent_specs[agent_id] observation = observations[agent_id] reward = rewards[agent_id] info = infos[agent_id] rewards[agent_id] = agent_spec.reward_adapter(observation, reward) observations[agent_id] = agent_spec.observation_adapter( observation) infos[agent_id] = agent_spec.info_adapter(observation, reward, info) for done in dones.values(): self._dones_registered += 1 if done else 0 dones["__all__"] = self._dones_registered >= len(self._agent_specs) return observations, rewards, dones, infos def reset(self): """Reset the environment and reinitialize to the next scenario.""" scenario = next(self._scenarios_iterator) self._dones_registered = 0 env_observations = self._smarts.reset(scenario) observations = { agent_id: self._agent_specs[agent_id].observation_adapter(obs) for agent_id, obs in env_observations.items() } return observations def render(self, mode="human"): """Does nothing.""" pass def close(self): """Clean up all remaining resources.""" if self._smarts is not None: self._smarts.destroy() self._smarts = None
def main( script: str, scenarios: Sequence[str], headless: bool, seed: int, vehicles_to_replace: int, episodes: int, ): assert vehicles_to_replace > 0 assert episodes > 0 logger = logging.getLogger(script) logger.setLevel(logging.INFO) logger.debug("initializing SMARTS") smarts = SMARTS( agent_interfaces={}, traffic_sim=None, envision=None if headless else Envision(), ) random_seed(seed) traffic_history_provider = smarts.get_provider_by_type( TrafficHistoryProvider) assert traffic_history_provider scenario_list = Scenario.get_scenario_list(scenarios) scenarios_iterator = Scenario.variations_for_all_scenario_roots( scenario_list, []) for scenario in scenarios_iterator: logger.debug("working on scenario {}".format(scenario.name)) veh_missions = scenario.discover_missions_of_traffic_histories() if not veh_missions: logger.warning("no vehicle missions found for scenario {}.".format( scenario.name)) continue veh_start_times = { v_id: mission.start_time for v_id, mission in veh_missions.items() } k = vehicles_to_replace if k > len(veh_missions): logger.warning( "vehicles_to_replace={} is greater than the number of vehicle missions ({})." .format(vehicles_to_replace, len(veh_missions))) k = len(veh_missions) # XXX replace with AgentSpec appropriate for IL model agent_spec = AgentSpec( interface=AgentInterface.from_type(AgentType.Imitation), agent_builder=ReplayCheckerAgent, agent_params=smarts.fixed_timestep_sec, ) for episode in range(episodes): logger.info(f"starting episode {episode}...") agentid_to_vehid = {} agent_interfaces = {} # Build the Agents for the to-be-hijacked vehicles # and gather their missions agents = {} dones = {} ego_missions = {} sample = {} if scenario.traffic_history.dataset_source == "Waymo": # For Waymo, we only hijack the vehicle that was autonomous in the dataset waymo_ego_id = scenario.traffic_history.ego_vehicle_id if waymo_ego_id is not None: assert ( k == 1 ), f"do not specify -k > 1 when just hijacking Waymo ego vehicle (it was {k})" veh_id = str(waymo_ego_id) sample = {veh_id} else: logger.warning( f"Waymo ego vehicle id not mentioned in the dataset. Hijacking a random vehicle." ) if not sample: # For other datasets, hijack a sample of the recorded vehicles # Pick k vehicle missions to hijack with agent # and figure out which one starts the earliest sample = scenario.traffic_history.random_overlapping_sample( veh_start_times, k) if len(sample) < k: logger.warning( f"Unable to choose {k} overlapping missions. allowing non-overlapping." ) leftover = set(veh_start_times.keys()) - sample sample.update(set(random.sample(leftover, k - len(sample)))) agent_spec.interface.max_episode_steps = max([ scenario.traffic_history.vehicle_final_exit_time(veh_id) / 0.1 for veh_id in sample ]) history_start_time = None logger.info(f"chose vehicles: {sample}") for veh_id in sample: agent_id = f"ego-agent-IL-{veh_id}" agentid_to_vehid[agent_id] = veh_id agent_interfaces[agent_id] = agent_spec.interface if (not history_start_time or veh_start_times[veh_id] < history_start_time): history_start_time = veh_start_times[veh_id] for agent_id in agent_interfaces.keys(): agent = agent_spec.build_agent() veh_id = agentid_to_vehid[agent_id] agent.load_data_for_vehicle(veh_id, scenario, history_start_time) agents[agent_id] = agent dones[agent_id] = False mission = veh_missions[veh_id] ego_missions[agent_id] = replace( mission, start_time=mission.start_time - history_start_time) # Tell the traffic history provider to start traffic # at the point when the earliest agent enters... traffic_history_provider.start_time = history_start_time # and all the other agents to offset their missions by this much too scenario.set_ego_missions(ego_missions) logger.info(f"offsetting sim_time by: {history_start_time}") # Take control of vehicles with corresponding agent_ids smarts.switch_ego_agents(agent_interfaces) # Finally start the simulation loop... logger.info(f"starting simulation loop...") observations = smarts.reset(scenario) while not all(done for done in dones.values()): actions = { agent_id: agents[agent_id].act(agent_obs) for agent_id, agent_obs in observations.items() } logger.debug("stepping @ sim_time={} for agents={}...".format( smarts.elapsed_sim_time, list(observations.keys()))) observations, rewards, dones, infos = smarts.step(actions) for agent_id in agents.keys(): if dones.get(agent_id, False): if not observations[agent_id].events.reached_goal: logger.warning( "agent_id={} exited @ sim_time={}".format( agent_id, smarts.elapsed_sim_time)) logger.warning(" ... with {}".format( observations[agent_id].events)) else: logger.info( "agent_id={} reached goal @ sim_time={}". format(agent_id, smarts.elapsed_sim_time)) logger.debug(" ... with {}".format( observations[agent_id].events)) del observations[agent_id] smarts.destroy()
class SMARTSEnv(gym.Env): metadata = {"render.modes": ["human"]} """Metadata for gym's use""" def __init__(self, all_args): self.all_args = all_args self._dones_registered = 0 self.neighbor_num = all_args.neighbor_num self.rews_mode = all_args.rews_mode self.n_agents = all_args.num_agents self.use_proximity = all_args.use_proximity self.use_discrete = all_args.use_discrete # default True self.use_centralized_V = all_args.use_centralized_V self.scenarios = [(all_args.scenario_path + all_args.scenario_name)] self.agent_ids = ["Agent %i" % i for i in range(self.n_agents)] self.obs_space_dict = self.get_obs_space_dict() self.obs_dim = self.get_obs_dim() # ! TODO: self.share_obs_dim = self.get_state_dim( ) if self.use_centralized_V else self.get_obs_dim() self.observation_space = [ gym.spaces.Box(low=-1e10, high=1e10, shape=(self.obs_dim, )) ] * self.n_agents self.share_observation_space = [ gym.spaces.Box(low=-1e10, high=1e10, shape=(self.share_obs_dim, )) ] * self.n_agents if self.use_discrete: self.act_dim = 4 self.action_space = [gym.spaces.Discrete(self.act_dim) ] * self.n_agents self.agent_type = AgentType.Vulner_with_proximity if self.use_proximity else AgentType.Vulner else: # TODO Add continous action space self.agent_type = AgentType.VulnerCon_with_proximity if self.use_proximity else AgentType.VulnerCon raise NotImplementedError self._agent_specs = { agent_id: AgentSpec( interface=AgentInterface.from_type( self.agent_type, max_episode_steps=all_args.horizon), observation_adapter=self.get_obs_adapter(), reward_adapter=self.get_rew_adapter(self.rews_mode, self.neighbor_num), action_adapter=self.get_act_adapter(), ) for agent_id in self.agent_ids } self._scenarios_iterator = Scenario.scenario_variations( self.scenarios, list(self._agent_specs.keys()), all_args.shuffle_scenarios, ) self.agent_interfaces = { agent_id: agent.interface for agent_id, agent in self._agent_specs.items() } self.envision_client = None if not all_args.headless: self.envision_client = Envision( endpoint=all_args.envision_endpoint, output_dir=all_args.envision_record_data_replay_path) self.visdom_client = None if all_args.visdom: self.visdom_client = VisdomClient() self._smarts = SMARTS( agent_interfaces=self.agent_interfaces, traffic_sim=SumoTrafficSimulation( headless=all_args.sumo_headless, time_resolution=all_args.timestep_sec, num_external_sumo_clients=all_args.num_external_sumo_clients, sumo_port=all_args.sumo_port, auto_start=all_args.sumo_auto_start, endless_traffic=all_args.endless_traffic, ), envision=self.envision_client, visdom=self.visdom_client, timestep_sec=all_args.timestep_sec, zoo_workers=all_args.zoo_workers, auth_key=all_args.auth_key, ) def seed(self, seed): self.seed = seed smarts.core.seed(seed) def get_obs_space_dict(self): obs_config = { "distance_to_center": gym.spaces.Box(low=-1e10, high=1e10, shape=(1, )), "angle_error": gym.spaces.Box(low=-np.pi, high=np.pi, shape=(1, )), "speed": gym.spaces.Box(low=-1e10, high=1e10, shape=(1, )), "steering": gym.spaces.Box(low=-1e10, high=1e10, shape=(1, )), "ego_lane_dist": gym.spaces.Box(low=-1e10, high=1e10, shape=(3, )), "ego_ttc": gym.spaces.Box(low=-1e10, high=1e10, shape=(3, )), "neighbor": gym.spaces.Box(low=-1e3, high=1e3, shape=(self.neighbor_num * 5, )), } if self.use_proximity: obs_config.update({ "proximity": gym.spaces.Box(low=-1e10, high=1e10, shape=(8, )) }) obs_space_dict = gym.spaces.Dict(obs_config) return obs_space_dict def get_obs_dim(self): dim = 0 for key in self.obs_space_dict.spaces.keys(): space = list(self.obs_space_dict[key].shape) dim += reduce(lambda x, y: x * y, space) return dim def get_obs_adapter(self): def obs_adapter(env_observation): adapter = Adapter(space=self.obs_space_dict, transform=observation_adapter( self.neighbor_num, self.use_proximity)) obs = adapter.transform(env_observation) obs_flatten = np.concatenate(list(obs.values()), axis=0) return obs_flatten return obs_adapter def get_act_adapter(self): def action_adapter(policy_action): if isinstance(policy_action, (list, tuple, np.ndarray)): action = np.argmax(policy_action) else: action = policy_action action_dict = [ "keep_lane", "slow_down", "change_lane_left", "change_lane_right" ] return action_dict[action] return action_adapter def get_rew_adapter(self, adapter_type="vanilla", neighbor_num=3): return reward_adapter(adapter_type, neighbor_num) def _reset(self): scenario = next(self._scenarios_iterator) self._dones_registered = 0 env_observations = self._smarts.reset(scenario) self.last_obs = env_observations observations = { agent_id: self._agent_specs[agent_id].observation_adapter(obs) for agent_id, obs in env_observations.items() } return observations def reset(self, choose=True): if choose: try: self.current_observations = self._reset() except: self.close() self._smarts = SMARTS( agent_interfaces=self.agent_interfaces, traffic_sim=SumoTrafficSimulation( headless=self.all_args.sumo_headless, time_resolution=self.all_args.timestep_sec, num_external_sumo_clients=self.all_args. num_external_sumo_clients, sumo_port=self.all_args.sumo_port, auto_start=self.all_args.sumo_auto_start, endless_traffic=self.all_args.endless_traffic, ), envision=self.envision_client, visdom=self.visdom_client, timestep_sec=self.all_args.timestep_sec, zoo_workers=self.all_args.zoo_workers, auth_key=self.all_args.auth_key, ) self.current_observations = self._reset() return self.get_obs() else: return [np.zeros(self.obs_dim) for agent_id in self.agent_ids] def _step(self, agent_actions): agent_actions = { agent_id: self._agent_specs[agent_id].action_adapter(action) for agent_id, action in agent_actions.items() } observations, rewards, agent_dones, extras = self._smarts.step( agent_actions) infos = { agent_id: { "scores": value } for agent_id, value in extras["scores"].items() } for agent_id in observations: agent_spec = self._agent_specs[agent_id] observation = observations[agent_id] reward = rewards[agent_id] info = infos[agent_id] if self.rews_mode == "vanilla": rewards[agent_id] = agent_spec.reward_adapter( observation, reward) elif self.rews_mode == "standard": rewards[agent_id] = agent_spec.reward_adapter( self.last_obs[agent_id], observation, reward) elif self.rews_mode == "cruising": rewards[agent_id] = agent_spec.reward_adapter( observation, reward) self.last_obs[agent_id] = observation observations[agent_id] = agent_spec.observation_adapter( observation) infos[agent_id] = agent_spec.info_adapter(observation, reward, info) for done in agent_dones.values(): self._dones_registered += 1 if done else 0 agent_dones["__all__"] = self._dones_registered == len( self._agent_specs) return observations, rewards, agent_dones, infos def step(self, action_n): if not np.all(action_n == np.ones((self.n_agents, )).astype(np.int) * (-1)): actions = dict(zip(self.agent_ids, action_n)) self.current_observations, rewards, dones, infos = self._step( actions) obs_n = [] r_n = [] d_n = [] info_n = [] for agent_id in self.agent_ids: obs_n.append( self.current_observations.get(agent_id, np.zeros(self.obs_dim))) r_n.append([rewards.get(agent_id, 0.)]) d_n.append(dones.get(agent_id, True)) info_n.append(infos.get(agent_id, {'scores': 0.})) return obs_n, r_n, d_n, info_n else: obs_n = [np.zeros(self.obs_dim) for agent_id in self.agent_ids] r_n = [[0] for agent_id in self.agent_ids] d_n = [None for agent_id in self.agent_ids] info_n = [{} for agent_id in self.agent_ids] return obs_n, r_n, d_n, info_n def get_obs(self): """ Returns all agent observations in a list """ obs_n = [] for i, agent_id in enumerate(self.agent_ids): obs_n.append( self.current_observations.get(agent_id, np.zeros(self.obs_dim))) return obs_n def get_obs_agent(self, agent_id): """ Returns observation for agent_id """ return self.get_obs()[agent_id] def get_state(self): obs_n = [] for i, agent_id in enumerate(self.agent_ids): obs_n.append( self.current_observations.get(agent_id, np.zeros(self.obs_dim))) return obs_n def get_state_dim(self): """ Returns the shape of the state""" return self.obs_dim def render(self, mode="human"): """Does nothing.""" pass def close(self): if self._smarts is not None: self._smarts.destroy()