def __init__(self, env, lower_lim=None, upper_lim=None, prob_per_step_to_stop=None): super().__init__(env) assert (lower_lim is not None and upper_lim is not None) or prob_per_step_to_stop is not None if prob_per_step_to_stop is not None: assert prob_per_step_to_stop > 0 and prob_per_step_to_stop < 1 self.observation_space = update_obs_space(self, { 'fraction_episode_done': [self.metadata['n_agents'], 1], 'horizon': [self.metadata['n_agents'], 1], 'timestep': [self.metadata['n_agents'], 1] }) self.observation_space = update_obs_space(self, {})
def __init__(self, env, eat_thresh=0.5, max_food_health=10, respawn_time=np.inf, food_rew_type='selfish', reward_scale=1.0, reward_scale_obs=False): super().__init__(env) self.eat_thresh = eat_thresh self.max_food_health = max_food_health self.respawn_time = respawn_time self.food_rew_type = food_rew_type self.n_agents = self.metadata['n_agents'] if type(reward_scale) not in [list, tuple, np.ndarray]: reward_scale = [reward_scale, reward_scale] self.reward_scale = reward_scale self.reward_scale_obs = reward_scale_obs # Reset obs/action space to match self.max_n_food = self.metadata['max_n_food'] self.curr_n_food = self.metadata['curr_n_food'] self.max_food_size = self.metadata['food_size'] food_dim = 5 if self.reward_scale_obs else 4 self.observation_space = update_obs_space( self.env, { 'food_obs': (self.max_n_food, food_dim), 'food_health': (self.max_n_food, 1), 'food_eat': (self.max_n_food, 1) }) self.action_space.spaces['action_eat_food'] = Tuple([ MultiDiscrete([2] * self.max_n_food) for _ in range(self.n_agents) ])
def __init__(self, env, n_lidar_per_agent=30, lidar_range=6.0, compress_lidar_scale=None, visualize_lidar=False): super().__init__(env) self.n_lidar_per_agent = n_lidar_per_agent self.lidar_range = lidar_range self.compress_lidar_scale = compress_lidar_scale self.visualize_lidar = visualize_lidar self.n_agents = self.unwrapped.n_agents self.observation_space = update_obs_space( env, {'lidar': (self.n_agents, self.n_lidar_per_agent, 1)}) # generate concentric lidar rays centered at origin self.lidar_angles = np.linspace(0, 2 * np.pi, num=self.n_lidar_per_agent, endpoint=False) self.lidar_rays = self.lidar_range * np.array([ np.cos(self.lidar_angles), np.sin(self.lidar_angles), np.zeros_like(self.lidar_angles) ]).T self.lidar_rays = self.lidar_rays[None, :]
def __init__(self, env, body_names, radius_multiplier=1.5, agent_idx_allowed_to_lock=None, lock_type="any_lock", ac_obs_prefix='', obj_in_game_metadata_keys=None, agent_allowed_to_lock_keys=None): super().__init__(env) self.n_agents = self.unwrapped.n_agents self.n_obj = len(body_names) self.body_names = body_names self.agent_idx_allowed_to_lock = np.arange( self.n_agents ) if agent_idx_allowed_to_lock is None else agent_idx_allowed_to_lock self.lock_type = lock_type self.ac_obs_prefix = ac_obs_prefix self.obj_in_game_metadata_keys = obj_in_game_metadata_keys self.agent_allowed_to_lock_keys = agent_allowed_to_lock_keys self.action_space.spaces[f'action_{ac_obs_prefix}glue'] = (Tuple( [MultiDiscrete([2] * self.n_obj) for _ in range(self.n_agents)])) self.observation_space = update_obs_space( env, { f'{ac_obs_prefix}obj_lock': (self.n_obj, 1), f'{ac_obs_prefix}you_lock': (self.n_agents, self.n_obj, 1), f'{ac_obs_prefix}team_lock': (self.n_agents, self.n_obj, 1) }) self.lock_radius = radius_multiplier * self.metadata['box_size'] self.obj_locked = np.zeros((self.n_obj, ), dtype=int)
def __init__(self, env, prep_fraction=.2): super().__init__(env) self.prep_fraction = prep_fraction self.prep_time = self.prep_fraction * self.unwrapped.horizon self.n_agents = self.metadata['n_agents'] self.step_counter = 0 self.observation_space = update_obs_space( self, {'prep_obs': [self.n_agents, 1]})
def __init__(self, env, action_keys, defaults, do_nothing_option=True): super().__init__(env) if self.do_nothing_option: self.action_keys.append('do_nothing') self.n_agents = self.metadata['n_agents'] self.action_space.spaces['action_choose_option'] = Tuple([Discrete(n=len(self.action_keys)) for _ in range(self.metadata['n_agents'])]) self.observation_space = update_obs_space(self, {'previous_choice': [self.metadata['n_agents'], len(self.action_keys)]})
def __init__(self, env, pos_obs_key, mask_obs_key, cone_angle=3/8 * np.pi): super().__init__(env) self.cone_angle = cone_angle self.n_agents = self.unwrapped.n_agents assert(self.n_agents == self.observation_space.spaces['agent_pos'].shape[0]) self.n_objects = self.observation_space.spaces[pos_obs_key].shape[0] self.observation_space = update_obs_space(env, {mask_obs_key: (self.n_agents, self.n_objects)}) self.pos_obs_key = pos_obs_key self.mask_obs_key = mask_obs_key
def __init__(self, env, payoff_matrix): super().__init__(env) self.n_agents = self.unwrapped.n_agents # 0 means to cooperate, 1 means to defect self.action_space.spaces['action_defect'] = Tuple( [Discrete(n=2) for _ in range(self.n_agents)]) self.observation_space = update_obs_space( self, { 'prev_ac': [self.n_agents, 1], 'prev_ac_while_playing': [self.n_agents, 1] })
def __init__(self, env, last_step_first_agent_vs_last_agent: bool, last_agent_always_plays: bool, last_doesnt_play_until_t: int = None, last_must_play_at_t: bool = False): super().__init__(env) self.n_agents = self.unwrapped.n_agents self.observation_space = update_obs_space(self, { 'you_played': [self.n_agents, 1], 'youre_playing': [self.n_agents, 1] })
def __init__(self, env, attack_damage=-5, attack_range=0.7, mask_eat_if_attacked=True, only_attack_in_front=True): assert attack_damage <= 0 assert attack_range >= 0 super().__init__(env, 'action_attack_agent') self.observation_space = update_obs_space( self, { 'attacked_me': [self.n_agents, self.n_agents, 1], 'n_attacked_me': [self.n_agents, 1] })
def __init__(self, env, choosing_period, agent_identity_dim=4, mutual_cooperate_rew=2, defected_against_rew=-1, successful_defect_rew=1): super().__init__(env, 'action_choose_agent') self.observation_space = update_obs_space( self, { 'chose_me': [self.n_agents, self.n_agents, 1], 'i_chose': [self.n_agents, self.n_agents, 1], 'chose_me_rew': [self.n_agents, self.n_agents, 1], 'i_chose_rew': [self.n_agents, self.n_agents, 1], 'i_chose_any_rew': [self.n_agents, 1], 'previous_choice_identity': [self.n_agents, agent_identity_dim], 'next_choice_is_real': [self.n_agents, 1], })
def __init__(self, env, max_health=20, death_rew=-100, steps_freeze_on_death=100, life_rew=1): super().__init__(env) assert death_rew <= 0 assert life_rew >= 0 assert steps_freeze_on_death >= 0 self.observation_space = update_obs_space( self, { 'agent_health': [self.metadata['n_agents'], 1], 'is_dead': [self.metadata['n_agents'], 1], 'time_to_alive': [self.metadata['n_agents'], 1] }) self.zero_action = zero_action(self.action_space) logging.info( f"Only {self.zero_action.keys()} will be zerod during death")
def __init__(self, env, **graph_kwargs): RUSPGenerator.__init__(self, **graph_kwargs) gym.Wrapper.__init__(self, env) n_a = self.metadata['n_agents'] self.obs_keys_with_shapes = { 'self_rew_value': [n_a, 1], 'self_rew_value_noisy': [n_a, 1], 'self_rew_value_noise_level': [n_a, 1], 'other_rew_value_s': [n_a, n_a, 1], 'other_rew_value_s_noisy': [n_a, n_a, 1], 'other_rew_value_s_noise_level': [n_a, n_a, 1], 'rew_share_so_s': [n_a, n_a, 1], 'rew_share_so_s_noisy': [n_a, n_a, 1], 'rew_share_so_s_noise_level': [n_a, n_a, 1], 'rew_share_os_o': [n_a, n_a, 1], 'rew_share_os_o_noisy': [n_a, n_a, 1], 'rew_share_os_o_noise_level': [n_a, n_a, 1], } self.observation_space = update_obs_space(self, self.obs_keys_with_shapes)
def __init__(self, env, team_index=None, n_teams=2): super().__init__(env) self.n_agents = self.metadata['n_actors'] if team_index is None: assert n_teams >= 1, "Number of teams must be at least 1" # split teams: 5 agents and 3 teams will result in team_index = [0,0,1,1,2] team_index = np.array_split(np.arange(self.n_agents), n_teams) team_index = np.concatenate( [np.ones_like(ar) * i for i, ar in enumerate(team_index)]) assert len(team_index) == self.n_agents, ( "team_index parameter length must be equal to number of agents") if isinstance(team_index, np.ndarray): assert team_index.ndim == 1, ( "team_index parameter must be numpy array of dimension 1") # store in metadata property that gets automatically inherited # make sure we copy value of team_index if it's a numpy array self.metadata['team_index'] = np.array(team_index) self.team_idx = np.array(team_index) self.observation_space = update_obs_space( env, {'team_size': (self.n_agents, 1)})
def __init__(self, env, body_names, radius_multiplier=1.7, grab_dist=None, grab_exclusive=False, obj_in_game_metadata_keys=None): super().__init__(env) self.n_agents = self.unwrapped.n_agents self.body_names = body_names self.n_obj = len(body_names) self.obj_in_game_metadata_keys = obj_in_game_metadata_keys self.action_space.spaces['action_pull'] = (Tuple( [MultiDiscrete([2] * self.n_obj) for _ in range(self.n_agents)])) self.observation_space = update_obs_space(env, { 'obj_pull': (self.n_obj, 1), 'you_pull': (self.n_obj, self.n_agents) }) self.grab_radius = radius_multiplier * self.metadata['box_size'] self.grab_dist = grab_dist self.grab_exclusive = grab_exclusive
def __init__(self, env, cone_angle=3/8 * np.pi): super().__init__(env) self.cone_angle = cone_angle self.n_agents = self.unwrapped.n_agents self.observation_space = update_obs_space(env, {'mask_aa_obs': (self.n_agents, self.n_agents)})
def __init__(self, env, vector_dim=16): super().__init__(env) self.observation_space = update_obs_space(self, {'agent_identity': [self.metadata['n_agents'], self.vector_dim]})
def __init__(self, env, payoff_matrix): super().__init__(env) self.n_agents = self.metadata['n_agents'] self.action_space.spaces['action_defect'] = Tuple([Discrete(n=2) for _ in range(self.n_agents)]) self.observation_space = update_obs_space(self, {'prev_ac': [self.n_agents, 1]})