def __init__(self, env, lower_lim=None, upper_lim=None, prob_per_step_to_stop=None):
     super().__init__(env)
     assert (lower_lim is not None and upper_lim is not None) or prob_per_step_to_stop is not None
     if prob_per_step_to_stop is not None:
         assert prob_per_step_to_stop > 0 and prob_per_step_to_stop < 1
     self.observation_space = update_obs_space(self, {
         'fraction_episode_done': [self.metadata['n_agents'], 1],
         'horizon': [self.metadata['n_agents'], 1],
         'timestep': [self.metadata['n_agents'], 1]
     })
     self.observation_space = update_obs_space(self, {})
    def __init__(self,
                 env,
                 eat_thresh=0.5,
                 max_food_health=10,
                 respawn_time=np.inf,
                 food_rew_type='selfish',
                 reward_scale=1.0,
                 reward_scale_obs=False):
        super().__init__(env)
        self.eat_thresh = eat_thresh
        self.max_food_health = max_food_health
        self.respawn_time = respawn_time
        self.food_rew_type = food_rew_type
        self.n_agents = self.metadata['n_agents']

        if type(reward_scale) not in [list, tuple, np.ndarray]:
            reward_scale = [reward_scale, reward_scale]
        self.reward_scale = reward_scale
        self.reward_scale_obs = reward_scale_obs

        # Reset obs/action space to match
        self.max_n_food = self.metadata['max_n_food']
        self.curr_n_food = self.metadata['curr_n_food']
        self.max_food_size = self.metadata['food_size']
        food_dim = 5 if self.reward_scale_obs else 4
        self.observation_space = update_obs_space(
            self.env, {
                'food_obs': (self.max_n_food, food_dim),
                'food_health': (self.max_n_food, 1),
                'food_eat': (self.max_n_food, 1)
            })
        self.action_space.spaces['action_eat_food'] = Tuple([
            MultiDiscrete([2] * self.max_n_food) for _ in range(self.n_agents)
        ])
    def __init__(self,
                 env,
                 n_lidar_per_agent=30,
                 lidar_range=6.0,
                 compress_lidar_scale=None,
                 visualize_lidar=False):
        super().__init__(env)
        self.n_lidar_per_agent = n_lidar_per_agent
        self.lidar_range = lidar_range
        self.compress_lidar_scale = compress_lidar_scale
        self.visualize_lidar = visualize_lidar
        self.n_agents = self.unwrapped.n_agents

        self.observation_space = update_obs_space(
            env, {'lidar': (self.n_agents, self.n_lidar_per_agent, 1)})

        # generate concentric lidar rays centered at origin
        self.lidar_angles = np.linspace(0,
                                        2 * np.pi,
                                        num=self.n_lidar_per_agent,
                                        endpoint=False)
        self.lidar_rays = self.lidar_range * np.array([
            np.cos(self.lidar_angles),
            np.sin(self.lidar_angles),
            np.zeros_like(self.lidar_angles)
        ]).T
        self.lidar_rays = self.lidar_rays[None, :]
 def __init__(self,
              env,
              body_names,
              radius_multiplier=1.5,
              agent_idx_allowed_to_lock=None,
              lock_type="any_lock",
              ac_obs_prefix='',
              obj_in_game_metadata_keys=None,
              agent_allowed_to_lock_keys=None):
     super().__init__(env)
     self.n_agents = self.unwrapped.n_agents
     self.n_obj = len(body_names)
     self.body_names = body_names
     self.agent_idx_allowed_to_lock = np.arange(
         self.n_agents
     ) if agent_idx_allowed_to_lock is None else agent_idx_allowed_to_lock
     self.lock_type = lock_type
     self.ac_obs_prefix = ac_obs_prefix
     self.obj_in_game_metadata_keys = obj_in_game_metadata_keys
     self.agent_allowed_to_lock_keys = agent_allowed_to_lock_keys
     self.action_space.spaces[f'action_{ac_obs_prefix}glue'] = (Tuple(
         [MultiDiscrete([2] * self.n_obj) for _ in range(self.n_agents)]))
     self.observation_space = update_obs_space(
         env, {
             f'{ac_obs_prefix}obj_lock': (self.n_obj, 1),
             f'{ac_obs_prefix}you_lock': (self.n_agents, self.n_obj, 1),
             f'{ac_obs_prefix}team_lock': (self.n_agents, self.n_obj, 1)
         })
     self.lock_radius = radius_multiplier * self.metadata['box_size']
     self.obj_locked = np.zeros((self.n_obj, ), dtype=int)
Beispiel #5
0
 def __init__(self, env, prep_fraction=.2):
     super().__init__(env)
     self.prep_fraction = prep_fraction
     self.prep_time = self.prep_fraction * self.unwrapped.horizon
     self.n_agents = self.metadata['n_agents']
     self.step_counter = 0
     self.observation_space = update_obs_space(
         self, {'prep_obs': [self.n_agents, 1]})
 def __init__(self, env, action_keys, defaults, do_nothing_option=True):
     super().__init__(env)
     if self.do_nothing_option:
         self.action_keys.append('do_nothing')
     self.n_agents = self.metadata['n_agents']
     self.action_space.spaces['action_choose_option'] = Tuple([Discrete(n=len(self.action_keys))
                                                               for _ in range(self.metadata['n_agents'])])
     self.observation_space = update_obs_space(self, {'previous_choice': [self.metadata['n_agents'], len(self.action_keys)]})
Beispiel #7
0
 def __init__(self, env, pos_obs_key, mask_obs_key, cone_angle=3/8 * np.pi):
     super().__init__(env)
     self.cone_angle = cone_angle
     self.n_agents = self.unwrapped.n_agents
     assert(self.n_agents == self.observation_space.spaces['agent_pos'].shape[0])
     self.n_objects = self.observation_space.spaces[pos_obs_key].shape[0]
     self.observation_space = update_obs_space(env, {mask_obs_key: (self.n_agents, self.n_objects)})
     self.pos_obs_key = pos_obs_key
     self.mask_obs_key = mask_obs_key
 def __init__(self, env, payoff_matrix):
     super().__init__(env)
     self.n_agents = self.unwrapped.n_agents
     # 0 means to cooperate, 1 means to defect
     self.action_space.spaces['action_defect'] = Tuple(
         [Discrete(n=2) for _ in range(self.n_agents)])
     self.observation_space = update_obs_space(
         self, {
             'prev_ac': [self.n_agents, 1],
             'prev_ac_while_playing': [self.n_agents, 1]
         })
 def __init__(self,
              env,
              last_step_first_agent_vs_last_agent: bool,
              last_agent_always_plays: bool,
              last_doesnt_play_until_t: int = None,
              last_must_play_at_t: bool = False):
     super().__init__(env)
     self.n_agents = self.unwrapped.n_agents
     self.observation_space = update_obs_space(self, {
         'you_played': [self.n_agents, 1],
         'youre_playing': [self.n_agents, 1]
     })
Beispiel #10
0
 def __init__(self,
              env,
              attack_damage=-5,
              attack_range=0.7,
              mask_eat_if_attacked=True,
              only_attack_in_front=True):
     assert attack_damage <= 0
     assert attack_range >= 0
     super().__init__(env, 'action_attack_agent')
     self.observation_space = update_obs_space(
         self, {
             'attacked_me': [self.n_agents, self.n_agents, 1],
             'n_attacked_me': [self.n_agents, 1]
         })
 def __init__(self,
              env,
              choosing_period,
              agent_identity_dim=4,
              mutual_cooperate_rew=2,
              defected_against_rew=-1,
              successful_defect_rew=1):
     super().__init__(env, 'action_choose_agent')
     self.observation_space = update_obs_space(
         self, {
             'chose_me': [self.n_agents, self.n_agents, 1],
             'i_chose': [self.n_agents, self.n_agents, 1],
             'chose_me_rew': [self.n_agents, self.n_agents, 1],
             'i_chose_rew': [self.n_agents, self.n_agents, 1],
             'i_chose_any_rew': [self.n_agents, 1],
             'previous_choice_identity':
             [self.n_agents, agent_identity_dim],
             'next_choice_is_real': [self.n_agents, 1],
         })
Beispiel #12
0
 def __init__(self,
              env,
              max_health=20,
              death_rew=-100,
              steps_freeze_on_death=100,
              life_rew=1):
     super().__init__(env)
     assert death_rew <= 0
     assert life_rew >= 0
     assert steps_freeze_on_death >= 0
     self.observation_space = update_obs_space(
         self, {
             'agent_health': [self.metadata['n_agents'], 1],
             'is_dead': [self.metadata['n_agents'], 1],
             'time_to_alive': [self.metadata['n_agents'], 1]
         })
     self.zero_action = zero_action(self.action_space)
     logging.info(
         f"Only {self.zero_action.keys()} will be zerod during death")
 def __init__(self, env, **graph_kwargs):
     RUSPGenerator.__init__(self, **graph_kwargs)
     gym.Wrapper.__init__(self, env)
     n_a = self.metadata['n_agents']
     self.obs_keys_with_shapes = {
         'self_rew_value': [n_a, 1],
         'self_rew_value_noisy': [n_a, 1],
         'self_rew_value_noise_level': [n_a, 1],
         'other_rew_value_s': [n_a, n_a, 1],
         'other_rew_value_s_noisy': [n_a, n_a, 1],
         'other_rew_value_s_noise_level': [n_a, n_a, 1],
         'rew_share_so_s': [n_a, n_a, 1],
         'rew_share_so_s_noisy': [n_a, n_a, 1],
         'rew_share_so_s_noise_level': [n_a, n_a, 1],
         'rew_share_os_o': [n_a, n_a, 1],
         'rew_share_os_o_noisy': [n_a, n_a, 1],
         'rew_share_os_o_noise_level': [n_a, n_a, 1],
     }
     self.observation_space = update_obs_space(self, self.obs_keys_with_shapes)
    def __init__(self, env, team_index=None, n_teams=2):
        super().__init__(env)
        self.n_agents = self.metadata['n_actors']

        if team_index is None:
            assert n_teams >= 1, "Number of teams must be at least 1"
            # split teams: 5 agents and 3 teams will result in team_index = [0,0,1,1,2]
            team_index = np.array_split(np.arange(self.n_agents), n_teams)
            team_index = np.concatenate(
                [np.ones_like(ar) * i for i, ar in enumerate(team_index)])

        assert len(team_index) == self.n_agents, (
            "team_index parameter length must be equal to number of agents")
        if isinstance(team_index, np.ndarray):
            assert team_index.ndim == 1, (
                "team_index parameter must be numpy array of dimension 1")

        # store in metadata property that gets automatically inherited
        # make sure we copy value of team_index if it's a numpy array
        self.metadata['team_index'] = np.array(team_index)
        self.team_idx = np.array(team_index)
        self.observation_space = update_obs_space(
            env, {'team_size': (self.n_agents, 1)})
    def __init__(self,
                 env,
                 body_names,
                 radius_multiplier=1.7,
                 grab_dist=None,
                 grab_exclusive=False,
                 obj_in_game_metadata_keys=None):
        super().__init__(env)
        self.n_agents = self.unwrapped.n_agents
        self.body_names = body_names
        self.n_obj = len(body_names)
        self.obj_in_game_metadata_keys = obj_in_game_metadata_keys
        self.action_space.spaces['action_pull'] = (Tuple(
            [MultiDiscrete([2] * self.n_obj) for _ in range(self.n_agents)]))

        self.observation_space = update_obs_space(env, {
            'obj_pull': (self.n_obj, 1),
            'you_pull': (self.n_obj, self.n_agents)
        })

        self.grab_radius = radius_multiplier * self.metadata['box_size']
        self.grab_dist = grab_dist
        self.grab_exclusive = grab_exclusive
Beispiel #16
0
 def __init__(self, env, cone_angle=3/8 * np.pi):
     super().__init__(env)
     self.cone_angle = cone_angle
     self.n_agents = self.unwrapped.n_agents
     self.observation_space = update_obs_space(env, {'mask_aa_obs': (self.n_agents, self.n_agents)})
 def __init__(self, env, vector_dim=16):
     super().__init__(env)
     self.observation_space = update_obs_space(self, {'agent_identity': [self.metadata['n_agents'], self.vector_dim]})
Beispiel #18
0
 def __init__(self, env, payoff_matrix):
     super().__init__(env)
     self.n_agents = self.metadata['n_agents']
     self.action_space.spaces['action_defect'] = Tuple([Discrete(n=2) for _ in range(self.n_agents)])
     self.observation_space = update_obs_space(self, {'prev_ac': [self.n_agents, 1]})