def make_env(n_substeps=15, horizon=80, deterministic_mode=False, floor_size=6.0, grid_size=30, n_agents=1, objective_diameter=[1, 1], objective_placement='center', num_rays_per_side=25, shelter_reward_scale=1, n_boxes=2, n_elongated_boxes=0, box_size=0.5, box_only_z_rot=False, lock_box=True, grab_box=True, grab_selective=False, lock_grab_radius=0.25, lock_type='any_lock_specific', grab_exclusive=False, grab_out_of_vision=False, lock_out_of_vision=True, box_floor_friction=0.2, other_friction=0.01, gravity=[0, 0, -50], action_lims=(-0.9, 0.9), polar_obs=True, n_lidar_per_agent=0, visualize_lidar=False, compress_lidar_scale=None, boxid_obs=True, boxsize_obs=True, team_size_obs=False, additional_obs={}): grab_radius_multiplier = lock_grab_radius / box_size lock_radius_multiplier = lock_grab_radius / box_size env = Base(n_agents=n_agents, n_substeps=n_substeps, horizon=horizon, floor_size=floor_size, grid_size=grid_size, action_lims=action_lims, deterministic_mode=deterministic_mode) env.add_module( WallScenarios(grid_size=grid_size, door_size=2, scenario='empty', friction=other_friction)) if objective_placement == 'center': objective_placement_fn = center_placement elif objective_placement == 'uniform_away_from_walls': objective_placement_fn = uniform_placement_middle(0.7) env.add_module( Cylinders(1, diameter=objective_diameter, height=box_size, make_static=True, placement_fn=objective_placement_fn)) env.add_module( Agents(n_agents, placement_fn=uniform_placement, color=[np.array((66., 235., 244., 255.)) / 255] * n_agents, friction=other_friction, polar_obs=polar_obs)) if np.max(n_boxes) > 0: env.add_module( Boxes(n_boxes=n_boxes, placement_fn=uniform_placement, friction=box_floor_friction, polar_obs=polar_obs, n_elongated_boxes=n_elongated_boxes, boxid_obs=boxid_obs, boxsize_obs=boxsize_obs, box_size=box_size, box_only_z_rot=box_only_z_rot)) if n_lidar_per_agent > 0 and visualize_lidar: env.add_module( LidarSites(n_agents=n_agents, n_lidar_per_agent=n_lidar_per_agent)) env.add_module(AgentManipulation()) if box_floor_friction is not None: env.add_module(FloorAttributes(friction=box_floor_friction)) env.add_module(WorldConstants(gravity=gravity)) env.reset() keys_self = ['agent_qpos_qvel', 'hider', 'prep_obs'] keys_mask_self = ['mask_aa_obs'] keys_external = ['agent_qpos_qvel'] keys_copy = ['you_lock', 'team_lock', 'ramp_you_lock', 'ramp_team_lock'] keys_mask_external = [] env = AddConstantObservationsWrapper(env, new_obs=additional_obs) keys_external += list(additional_obs) keys_mask_external += [ob for ob in additional_obs if 'mask' in ob] env = ShelterRewardWrapper(env, num_rays_per_side=num_rays_per_side, reward_scale=shelter_reward_scale) env = SplitMultiAgentActions(env) if team_size_obs: keys_self += ['team_size'] env = TeamMembership(env, np.zeros((n_agents, ))) env = AgentAgentObsMask2D(env) env = DiscretizeActionWrapper(env, 'action_movement') if np.max(n_boxes) > 0: env = AgentGeomObsMask2D(env, pos_obs_key='box_pos', mask_obs_key='mask_ab_obs', geom_idxs_obs_key='box_geom_idxs') keys_external += ['mask_ab_obs', 'box_obs'] keys_mask_external.append('mask_ab_obs') if lock_box and np.max(n_boxes) > 0: env = LockObjWrapper( env, body_names=[f'moveable_box{i}' for i in range(n_boxes)], agent_idx_allowed_to_lock=np.arange(n_agents), lock_type=lock_type, radius_multiplier=lock_radius_multiplier, obj_in_game_metadata_keys=["curr_n_boxes"], agent_allowed_to_lock_keys=None if lock_out_of_vision else ["mask_ab_obs"]) if grab_box and np.max(n_boxes) > 0: env = GrabObjWrapper(env, [f'moveable_box{i}' for i in range(n_boxes)], radius_multiplier=grab_radius_multiplier, grab_exclusive=grab_exclusive, obj_in_game_metadata_keys=['curr_n_boxes']) if n_lidar_per_agent > 0: env = Lidar(env, n_lidar_per_agent=n_lidar_per_agent, visualize_lidar=visualize_lidar, compress_lidar_scale=compress_lidar_scale) keys_copy += ['lidar'] keys_external += ['lidar'] env = SplitObservations(env, keys_self + keys_mask_self, keys_copy=keys_copy) if n_agents == 1: env = SpoofEntityWrapper(env, 2, ['agent_qpos_qvel', 'hider', 'prep_obs'], ['mask_aa_obs']) env = SpoofEntityWrapper(env, n_boxes, ['box_obs', 'you_lock', 'team_lock', 'obj_lock'], ['mask_ab_obs']) keys_mask_external += ['mask_ab_obs_spoof'] env = LockAllWrapper(env, remove_object_specific_lock=True) if not grab_out_of_vision and grab_box: env = MaskActionWrapper(env, 'action_pull', ['mask_ab_obs']) # Can only pull if in vision if not grab_selective and grab_box: env = GrabClosestWrapper(env) env = DiscardMujocoExceptionEpisodes(env) env = ConcatenateObsWrapper( env, { 'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs'], 'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock'] }) env = SelectKeysWrapper(env, keys_self=keys_self, keys_other=keys_external + keys_mask_self + keys_mask_external) return env
def make_env( n_substeps=15, n_agents=3, floor_size=[1.5, 6], action_lims=(-0.9, 0.9), grid_size=60, other_friction=0.01, box_floor_friction=0.2, gravity=[0, 0, -50], horizon=1000, horizon_lower=None, horizon_upper=None, prob_per_step_to_stop=0.001, # Food n_food=1, n_food_cluster=1, food_radius=0.4, food_respawn_time=0, max_food_health=5, food_together_radius=0.4, food_rew_type='selfish', food_reward_scale=0.0, # Health max_agent_health=20, health_per_food_bounds=[2.1, 2.7], health_per_step=-1.0, # Attacking attack_range=0.7, attack_damage=-5.0, only_attack_in_front=True, # Death life_rew=1, death_rew=-100, steps_freeze_on_death=100, # Random Teams rusp_args={}, # ID id_dim=16, # Action Masking mask_all_when_dead=True): env = Base( n_agents=n_agents, n_substeps=n_substeps, floor_size=floor_size, horizon= 99999999999999, # Just a big number so actual horizon is done by RandomizedHorizonWrapper action_lims=action_lims, deterministic_mode=False, grid_size=grid_size) if box_floor_friction is not None: env.add_module(FloorAttributes(friction=box_floor_friction)) env.add_module(WorldConstants(gravity=gravity)) env.add_module( Agents(n_agents, placement_fn=uniform_placement, friction=other_friction)) # Food env.metadata['food_together_radius'] = food_together_radius assert n_food % n_food_cluster == 0 cluster_assignments = np.repeat( np.arange(0, n_food, n_food // n_food_cluster), n_food // n_food_cluster) food_placement = [ close_to_other_object_placement("food", i, "food_together_radius") for i in cluster_assignments ] food_placement[::n_food // n_food_cluster] = [uniform_placement] * n_food_cluster env.add_module(Food(n_food, placement_fn=food_placement)) env.reset() keys_self = [ 'agent_qpos_qvel', 'agent_identity', 'agent_health', 'is_dead', 'time_to_alive', 'timestep' ] keys_additional_self_vf = ['fraction_episode_done', 'horizon'] keys_copy = ['mask_is_dead'] keys_other_agents = [ 'agent_qpos_qvel', 'agent_identity', 'agent_health', 'is_dead', 'time_to_alive', ] keys_additional_other_agents_vf = [] keys_self_matrices = [] add_rew_share_observation_keys( keys_self=keys_self, keys_additional_self_vf=keys_additional_self_vf, keys_other_agents=keys_other_agents, keys_additional_other_agents_vf=keys_additional_other_agents_vf, keys_self_matrices=keys_self_matrices, **rusp_args) keys_external = [ 'other_agents', 'other_agents_vf', 'additional_self_vf_obs' ] keys_self_masks = ['mask_aa_obs'] env = SplitMultiAgentActions(env) env = DiscretizeActionWrapper(env, 'action_movement') env = AgentAgentObsMask2D(env) env = ZeroRews(env) env = RandomizedHorizonWrapper(env, lower_lim=horizon_lower or horizon, upper_lim=horizon_upper or horizon, prob_per_step_to_stop=prob_per_step_to_stop) env = FoodHealthWrapper( env, respawn_time=(np.inf if food_respawn_time is None else food_respawn_time), eat_thresh=(np.inf if food_radius is None else food_radius), max_food_health=max_food_health, food_rew_type=food_rew_type, reward_scale=food_reward_scale, split_eat_between_agents=True) keys_external += ['mask_af_obs', 'food_obs'] keys_copy.append('close_enough_to_food') env = FoodIncreaseHealth(env, health_per_food_bounds=health_per_food_bounds) env = TimeDecreaseHealth(env, health_per_step=health_per_step) # Attack action should go before Food Health wrapper, since it masks eat action env = AttackAction(env, attack_damage=attack_damage, attack_range=attack_range, only_attack_in_front=only_attack_in_front) env = ActionOptionsWrapper(env, ['action_attack_agent', 'action_eat_food'], { 'action_attack_agent': -1, 'action_eat_food': 0 }) env = ColorAgentsByOption( env, 'action_choose_option', ['action_attack_agent', 'action_eat_food', 'do_nothing']) keys_self.append('previous_choice') keys_other_agents.append('previous_choice') keys_self_matrices.append('attacked_me') keys_self.append('n_attacked_me') keys_other_agents += ['attacked_me', 'n_attacked_me'] env = AgentHealthWrapper(env, max_health=max_agent_health, death_rew=death_rew, steps_freeze_on_death=steps_freeze_on_death, life_rew=life_rew) # This needs to come before options wrapper, so we can't group it above env = AlwaysEatWrapper(env, agent_idx_allowed=np.arange(n_agents)) env = RUSPWrapper(env, **rusp_args) env = RandomIdentityVector(env, vector_dim=id_dim) env = SplitObservations(env, keys_self + keys_additional_self_vf, keys_copy=keys_copy, keys_self_matrices=keys_self_matrices + keys_self_masks) env = ConcatenateObsWrapper( env, { 'other_agents': keys_other_agents, 'other_agents_vf': ['other_agents'] + keys_additional_other_agents_vf, 'additional_self_vf_obs': [k + '_self' for k in keys_additional_self_vf] }) env = DiscardMujocoExceptionEpisodes(env) env = SelectKeysWrapper(env, keys_self=keys_self, keys_other=keys_external + keys_copy + keys_self_masks) env = OasisActionMasks(env, mask_all_when_dead=mask_all_when_dead) return env
def make_env(n_substeps=15, horizon=80, deterministic_mode=False, floor_size=6.0, grid_size=30, door_size=2, n_hiders=1, n_seekers=1, max_n_agents=None, n_boxes=1, n_ramps=0, n_elongated_boxes=0, rand_num_elongated_boxes=False, n_min_boxes=None, box_size=0.5, boxid_obs=False, box_only_z_rot=True, rew_type='joint_zero_sum', lock_box=True, grab_box=True, lock_ramp=True, lock_type='any_lock_specific', lock_grab_radius=0.25, lock_out_of_vision=True, grab_exclusive=False, grab_out_of_vision=False, grab_selective=False, box_floor_friction=0.2, other_friction=0.01, gravity=[0, 0, -50], action_lims=(-0.9, 0.9), polar_obs=True, scenario='quadrant', quadrant_game_hider_uniform_placement=False, p_door_dropout=0.0, n_rooms=4, random_room_number=True, prob_outside_walls=1.0, n_lidar_per_agent=0, visualize_lidar=False, compress_lidar_scale=None, hiders_together_radius=None, seekers_together_radius=None, prep_fraction=0.4, prep_obs=False, team_size_obs=False, restrict_rect=None, penalize_objects_out=False, n_food=0, food_radius=None, food_respawn_time=None, max_food_health=1, food_together_radius=None, food_rew_type='selfish', eat_when_caught=False, food_reward_scale=1.0, food_normal_centered=False, food_box_centered=False, n_food_cluster=1): grab_radius_multiplier = lock_grab_radius / box_size lock_radius_multiplier = lock_grab_radius / box_size env = Base(n_agents=n_hiders + n_seekers, n_substeps=n_substeps, horizon=horizon, floor_size=floor_size, grid_size=grid_size, action_lims=action_lims, deterministic_mode=deterministic_mode) if scenario == 'randomwalls': env.add_module(RandomWalls( grid_size=grid_size, num_rooms=n_rooms, random_room_number=random_room_number, min_room_size=6, door_size=door_size, prob_outside_walls=prob_outside_walls, gen_door_obs=False)) box_placement_fn = uniform_placement ramp_placement_fn = uniform_placement cell_size = env.floor_size / grid_size first_hider_placement = uniform_placement if hiders_together_radius is not None: htr_in_cells = np.ceil(hiders_together_radius / cell_size).astype(int) env.metadata['hiders_together_radius'] = htr_in_cells close_to_first_hider_placement = close_to_other_object_placement( "agent", 0, "hiders_together_radius") agent_placement_fn = [first_hider_placement] + \ [close_to_first_hider_placement] * (n_hiders - 1) else: agent_placement_fn = [first_hider_placement] * n_hiders first_seeker_placement = uniform_placement if seekers_together_radius is not None: str_in_cells = np.ceil(seekers_together_radius / cell_size).astype(int) env.metadata['seekers_together_radius'] = str_in_cells close_to_first_seeker_placement = close_to_other_object_placement( "agent", n_hiders, "seekers_together_radius") agent_placement_fn += [first_seeker_placement] + \ [close_to_first_seeker_placement] * (n_seekers - 1) else: agent_placement_fn += [first_seeker_placement] * (n_seekers) elif scenario == 'quadrant': env.add_module(WallScenarios(grid_size=grid_size, door_size=door_size, scenario=scenario, friction=other_friction, p_door_dropout=p_door_dropout)) box_placement_fn = quadrant_placement ramp_placement_fn = uniform_placement hider_placement = uniform_placement if quadrant_game_hider_uniform_placement else quadrant_placement agent_placement_fn = [hider_placement] * n_hiders + [outside_quadrant_placement] * n_seekers else: raise ValueError(f"Scenario {scenario} not supported.") env.add_module(Agents(n_hiders + n_seekers, placement_fn=agent_placement_fn, color=[np.array((66., 235., 244., 255.)) / 255] * n_hiders + [(1., 0., 0., 1.)] * n_seekers, friction=other_friction, polar_obs=polar_obs)) if np.max(n_boxes) > 0: env.add_module(Boxes(n_boxes=n_boxes, placement_fn=box_placement_fn, friction=box_floor_friction, polar_obs=polar_obs, n_elongated_boxes=n_elongated_boxes, boxid_obs=boxid_obs, box_only_z_rot=box_only_z_rot)) if n_ramps > 0: env.add_module(Ramps(n_ramps=n_ramps, placement_fn=ramp_placement_fn, friction=other_friction, polar_obs=polar_obs, pad_ramp_size=(np.max(n_elongated_boxes) > 0))) if n_lidar_per_agent > 0 and visualize_lidar: env.add_module(LidarSites(n_agents=n_hiders + n_seekers, n_lidar_per_agent=n_lidar_per_agent)) if n_food > 0: if scenario == 'quadrant': first_food_placement = quadrant_placement elif food_box_centered: first_food_placement = uniform_placement_middle(0.25) else: first_food_placement = uniform_placement if food_together_radius is not None: cell_size = env.floor_size / grid_size ftr_in_cells = np.ceil(food_together_radius / cell_size).astype(int) env.metadata['food_together_radius'] = ftr_in_cells assert n_food % n_food_cluster == 0 cluster_assignments = np.repeat(np.arange(0, n_food, n_food // n_food_cluster), n_food // n_food_cluster) food_placement = [close_to_other_object_placement( "food", i, "food_together_radius") for i in cluster_assignments] food_placement[::n_food // n_food_cluster] = [first_food_placement] * n_food_cluster else: food_placement = first_food_placement env.add_module(Food(n_food, placement_fn=food_placement)) env.add_module(AgentManipulation()) if box_floor_friction is not None: env.add_module(FloorAttributes(friction=box_floor_friction)) env.add_module(WorldConstants(gravity=gravity)) env.reset() keys_self = ['agent_qpos_qvel', 'hider', 'prep_obs'] keys_mask_self = ['mask_aa_obs'] keys_external = ['agent_qpos_qvel'] keys_copy = ['you_lock', 'team_lock', 'ramp_you_lock', 'ramp_team_lock'] keys_mask_external = [] env = SplitMultiAgentActions(env) if team_size_obs: keys_self += ['team_size'] env = TeamMembership(env, np.append(np.zeros((n_hiders,)), np.ones((n_seekers,)))) env = AgentAgentObsMask2D(env) hider_obs = np.array([[1]] * n_hiders + [[0]] * n_seekers) env = AddConstantObservationsWrapper(env, new_obs={'hider': hider_obs}) env = HideAndSeekRewardWrapper(env, n_hiders=n_hiders, n_seekers=n_seekers, rew_type=rew_type) if restrict_rect is not None: env = RestrictAgentsRect(env, restrict_rect=restrict_rect, penalize_objects_out=penalize_objects_out) env = PreparationPhase(env, prep_fraction=prep_fraction) env = DiscretizeActionWrapper(env, 'action_movement') if np.max(n_boxes) > 0: env = AgentGeomObsMask2D(env, pos_obs_key='box_pos', mask_obs_key='mask_ab_obs', geom_idxs_obs_key='box_geom_idxs') keys_external += ['mask_ab_obs', 'box_obs'] keys_mask_external.append('mask_ab_obs') if n_food: env = AgentSiteObsMask2D(env, pos_obs_key='food_pos', mask_obs_key='mask_af_obs') env = FoodHealthWrapper(env, respawn_time=(np.inf if food_respawn_time is None else food_respawn_time), eat_thresh=(np.inf if food_radius is None else food_radius), max_food_health=max_food_health, food_rew_type=food_rew_type, reward_scale=food_reward_scale) env = MaskActionWrapper(env, 'action_eat_food', ['mask_af_obs']) # Can only eat if in vision if prep_obs: env = MaskPrepPhaseAction(env, 'action_eat_food') if not eat_when_caught: env = MaskUnseenAction(env, 0, 'action_eat_food') eat_agents = np.arange(n_hiders) env = AlwaysEatWrapper(env, agent_idx_allowed=eat_agents) keys_external += ['mask_af_obs', 'food_obs'] keys_mask_external.append('mask_af_obs') if lock_box and np.max(n_boxes) > 0: env = LockObjWrapper(env, body_names=[f'moveable_box{i}' for i in range(np.max(n_boxes))], agent_idx_allowed_to_lock=np.arange(n_hiders+n_seekers), lock_type=lock_type, radius_multiplier=lock_radius_multiplier, obj_in_game_metadata_keys=["curr_n_boxes"], agent_allowed_to_lock_keys=None if lock_out_of_vision else ["mask_ab_obs"]) if n_ramps > 0: env = AgentGeomObsMask2D(env, pos_obs_key='ramp_pos', mask_obs_key='mask_ar_obs', geom_idxs_obs_key='ramp_geom_idxs') if lock_ramp: env = LockObjWrapper(env, body_names=[f'ramp{i}:ramp' for i in range(n_ramps)], agent_idx_allowed_to_lock=np.arange(n_hiders+n_seekers), lock_type=lock_type, ac_obs_prefix='ramp_', radius_multiplier=lock_radius_multiplier, obj_in_game_metadata_keys=['curr_n_ramps'], agent_allowed_to_lock_keys=None if lock_out_of_vision else ["mask_ar_obs"]) keys_external += ['ramp_obs'] keys_mask_external.append('mask_ar_obs') if grab_box and (np.max(n_boxes) > 0 or n_ramps > 0): env = GrabObjWrapper(env, [f'moveable_box{i}' for i in range(np.max(n_boxes))] + ([f"ramp{i}:ramp" for i in range(n_ramps)]), radius_multiplier=grab_radius_multiplier, grab_exclusive=grab_exclusive, obj_in_game_metadata_keys=['curr_n_boxes', 'curr_n_ramps']) if n_lidar_per_agent > 0: env = Lidar(env, n_lidar_per_agent=n_lidar_per_agent, visualize_lidar=visualize_lidar, compress_lidar_scale=compress_lidar_scale) keys_copy += ['lidar'] keys_external += ['lidar'] if prep_obs: env = TrackStatWrapper(env, np.max(n_boxes), n_ramps, n_food) env = SplitObservations(env, keys_self + keys_mask_self, keys_copy=keys_copy) env = SpoofEntityWrapper(env, np.max(n_boxes), ['box_obs', 'you_lock', 'team_lock', 'obj_lock'], ['mask_ab_obs']) if n_food: env = SpoofEntityWrapper(env, n_food, ['food_obs'], ['mask_af_obs']) keys_mask_external += ['mask_ab_obs_spoof', 'mask_af_obs_spoof'] if max_n_agents is not None: env = SpoofEntityWrapper(env, max_n_agents, ['agent_qpos_qvel', 'hider', 'prep_obs'], ['mask_aa_obs']) env = LockAllWrapper(env, remove_object_specific_lock=True) if not grab_out_of_vision and grab_box: env = MaskActionWrapper(env, 'action_pull', ['mask_ab_obs'] + (['mask_ar_obs'] if n_ramps > 0 else [])) if not grab_selective and grab_box: env = GrabClosestWrapper(env) env = NoActionsInPrepPhase(env, np.arange(n_hiders, n_hiders + n_seekers)) env = DiscardMujocoExceptionEpisodes(env) #env = ConcatenateObsWrapper(env, {'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs'], # 'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock'], # 'ramp_obs': ['ramp_obs'] + (['ramp_you_lock', 'ramp_team_lock', 'ramp_obj_lock'] if lock_ramp else [])}) env = ConcatenateObsWrapper(env, {'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs'], 'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock'], 'ramp_obs': ['ramp_obs'] + (['ramp_you_lock', 'ramp_team_lock', 'ramp_obj_lock'] if lock_ramp else [])}) env = SelectKeysWrapper(env, keys_self=keys_self, keys_external=keys_external, keys_mask=keys_mask_self + keys_mask_external, flatten=False) return env
def make_env(n_substeps=15, horizon=80, deterministic_mode=False, floor_size=6.0, grid_size=30, n_agents=1, n_rooms=4, random_room_number=True, scenario='empty', door_size=2, n_sites=3, n_elongated_sites=0, site_placement='uniform_away_from_walls', reward_infos=[{'type': 'construction_dense'}], n_boxes=1, n_elongated_boxes=0, n_min_boxes=None, box_size=0.5, box_only_z_rot=False, lock_box=True, grab_box=True, grab_selective=False, lock_grab_radius=0.25, lock_type='any_lock_specific', grab_exclusive=False, grab_out_of_vision=False, lock_out_of_vision=True, box_floor_friction=0.2, other_friction=0.01, gravity=[0, 0, -50], action_lims=(-0.9, 0.9), polar_obs=True, n_lidar_per_agent=0, visualize_lidar=False, compress_lidar_scale=None, boxid_obs=True, boxsize_obs=True, team_size_obs=False, additional_obs={}): grab_radius_multiplier = lock_grab_radius / box_size lock_radius_multiplier = lock_grab_radius / box_size if type(n_sites) not in [list, np.ndarray]: n_sites = [n_sites, n_sites] env = Base(n_agents=n_agents, n_substeps=n_substeps, horizon=horizon, floor_size=floor_size, grid_size=grid_size, action_lims=action_lims, deterministic_mode=deterministic_mode) if scenario == 'randomwalls': env.add_module(RandomWalls(grid_size=grid_size, num_rooms=n_rooms, random_room_number=random_room_number, min_room_size=6, door_size=door_size, gen_door_obs=False)) elif scenario == 'empty': env.add_module(WallScenarios(grid_size=grid_size, door_size=door_size, scenario='empty', friction=other_friction)) env.add_module(Agents(n_agents, placement_fn=uniform_placement, color=[np.array((66., 235., 244., 255.)) / 255] * n_agents, friction=other_friction, polar_obs=polar_obs)) if np.max(n_boxes) > 0: env.add_module(Boxes(n_boxes=n_boxes, placement_fn=uniform_placement, friction=box_floor_friction, polar_obs=polar_obs, n_elongated_boxes=n_elongated_boxes, boxid_obs=boxid_obs, boxsize_obs=boxsize_obs, box_size=box_size, box_only_z_rot=box_only_z_rot, mark_box_corners=True)) if n_sites[1] > 0: if site_placement == 'center': site_placement_fn = center_placement elif site_placement == 'uniform': site_placement_fn = uniform_placement elif site_placement == 'uniform_away_from_walls': site_placement_fn = uniform_placement_middle(0.85) else: raise ValueError(f'Site placement option: {site_placement} not implemented.' ' Please choose from center, uniform and uniform_away_from_walls.') env.add_module(ConstructionSites(n_sites, placement_fn=site_placement_fn, site_size=box_size, site_height=box_size / 2, n_elongated_sites=n_elongated_sites)) if n_lidar_per_agent > 0 and visualize_lidar: env.add_module(LidarSites(n_agents=n_agents, n_lidar_per_agent=n_lidar_per_agent)) if np.max(n_boxes) > 0 and grab_box: env.add_module(AgentManipulation()) if box_floor_friction is not None: env.add_module(FloorAttributes(friction=box_floor_friction)) env.add_module(WorldConstants(gravity=gravity)) env.reset() keys_self = ['agent_qpos_qvel'] keys_mask_self = ['mask_aa_obs'] keys_external = ['agent_qpos_qvel', 'construction_site_obs'] keys_copy = ['you_lock', 'team_lock', 'ramp_you_lock', 'ramp_team_lock'] keys_mask_external = [] env = AddConstantObservationsWrapper(env, new_obs=additional_obs) keys_external += list(additional_obs) keys_mask_external += [ob for ob in additional_obs if 'mask' in ob] env = SplitMultiAgentActions(env) if team_size_obs: keys_self += ['team_size'] env = TeamMembership(env, np.zeros((n_agents,))) env = AgentAgentObsMask2D(env) env = DiscretizeActionWrapper(env, 'action_movement') if np.max(n_boxes) > 0: env = AgentGeomObsMask2D(env, pos_obs_key='box_pos', mask_obs_key='mask_ab_obs', geom_idxs_obs_key='box_geom_idxs') keys_external += ['mask_ab_obs', 'box_obs'] keys_mask_external.append('mask_ab_obs') if lock_box and np.max(n_boxes) > 0: agent_allowed_to_lock_keys = None if lock_out_of_vision else ["mask_ab_obs"] env = LockObjWrapper(env, body_names=[f'moveable_box{i}' for i in range(n_boxes)], agent_idx_allowed_to_lock=np.arange(n_agents), lock_type=lock_type, radius_multiplier=lock_radius_multiplier, obj_in_game_metadata_keys=["curr_n_boxes"], agent_allowed_to_lock_keys=agent_allowed_to_lock_keys) if grab_box and np.max(n_boxes) > 0: env = GrabObjWrapper(env, [f'moveable_box{i}' for i in range(n_boxes)], radius_multiplier=grab_radius_multiplier, grab_exclusive=grab_exclusive, obj_in_game_metadata_keys=['curr_n_boxes']) if n_lidar_per_agent > 0: env = Lidar(env, n_lidar_per_agent=n_lidar_per_agent, visualize_lidar=visualize_lidar, compress_lidar_scale=compress_lidar_scale) keys_copy += ['lidar'] keys_external += ['lidar'] env = ConstructionDistancesWrapper(env) env = NumpyArrayRewardWrapper(env) reward_wrappers = { 'construction_dense': ConstructionDenseRewardWrapper, 'construction_completed': ConstructionCompletedRewardWrapper, } for rew_info in reward_infos: rew_type = rew_info['type'] del rew_info['type'] env = reward_wrappers[rew_type](env, **rew_info) env = SplitObservations(env, keys_self + keys_mask_self, keys_copy=keys_copy) if n_agents == 1: env = SpoofEntityWrapper(env, 2, ['agent_qpos_qvel'], ['mask_aa_obs']) env = SpoofEntityWrapper(env, n_boxes, ['box_obs', 'you_lock', 'team_lock', 'obj_lock'], ['mask_ab_obs']) env = SpoofEntityWrapper(env, n_sites[1], ['construction_site_obs'], ['mask_acs_obs']) keys_mask_external += ['mask_ab_obs_spoof', 'mask_acs_obs_spoof'] env = LockAllWrapper(env, remove_object_specific_lock=True) if not grab_out_of_vision and grab_box: env = MaskActionWrapper(env, 'action_pull', ['mask_ab_obs']) # Can only pull if in vision if not grab_selective and grab_box: env = GrabClosestWrapper(env) env = DiscardMujocoExceptionEpisodes(env) env = ConcatenateObsWrapper(env, {'agent_qpos_qvel': ['agent_qpos_qvel'], 'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock']}) env = SelectKeysWrapper(env, keys_self=keys_self, keys_external=keys_external, keys_mask=keys_mask_self + keys_mask_external, flatten=False) return env
def make_env( n_substeps=15, horizon=80, deterministic_mode=False, floor_size=6.0, grid_size=30, door_size=2, n_agents=1, fixed_agent_spawn=False, lock_box=True, grab_box=True, grab_selective=False, lock_type='any_lock_specific', lock_grab_radius=0.25, grab_exclusive=False, grab_out_of_vision=False, lock_out_of_vision=True, box_floor_friction=0.2, other_friction=0.01, gravity=[0, 0, -50], action_lims=(-0.9, 0.9), polar_obs=True, scenario='quadrant', p_door_dropout=0.0, n_rooms=4, random_room_number=True, n_lidar_per_agent=0, visualize_lidar=False, compress_lidar_scale=None, n_boxes=2, box_size=0.5, box_only_z_rot=False, boxid_obs=True, boxsize_obs=True, pad_ramp_size=True, additional_obs={}, # lock-box task task_type='all', lock_reward=5.0, unlock_penalty=7.0, shaped_reward_scale=0.25, return_threshold=0.1, # ramps n_ramps=0): grab_radius_multiplier = lock_grab_radius / box_size lock_radius_multiplier = lock_grab_radius / box_size env = Base(n_agents=n_agents, n_substeps=n_substeps, floor_size=floor_size, horizon=horizon, action_lims=action_lims, deterministic_mode=deterministic_mode, grid_size=grid_size) if scenario == 'randomwalls': env.add_module( RandomWalls(grid_size=grid_size, num_rooms=n_rooms, random_room_number=random_room_number, min_room_size=6, door_size=door_size, gen_door_obs=False)) box_placement_fn = uniform_placement ramp_placement_fn = uniform_placement agent_placement_fn = uniform_placement if not fixed_agent_spawn else center_placement elif scenario == 'quadrant': env.add_module( WallScenarios(grid_size=grid_size, door_size=door_size, scenario=scenario, friction=other_friction, p_door_dropout=p_door_dropout)) box_placement_fn = uniform_placement ramp_placement_fn = uniform_placement agent_placement_fn = quadrant_placement if not fixed_agent_spawn else center_placement elif scenario == 'empty': env.add_module( WallScenarios(grid_size=grid_size, door_size=2, scenario='empty')) box_placement_fn = uniform_placement ramp_placement_fn = uniform_placement agent_placement_fn = center_placement elif 'var_tri' in scenario: env.add_module( WallScenarios(grid_size=grid_size, door_size=door_size, scenario='var_tri')) ramp_placement_fn = [tri_placement(i % 3) for i in range(n_ramps)] agent_placement_fn = center_placement if fixed_agent_spawn else \ (uniform_placement if 'uniform' in scenario else rotate_tri_placement) box_placement_fn = uniform_placement if 'uniform' in scenario else rotate_tri_placement else: raise ValueError(f"Scenario {scenario} not supported.") env.add_module( Agents(n_agents, placement_fn=agent_placement_fn, color=[np.array((66., 235., 244., 255.)) / 255] * n_agents, friction=other_friction, polar_obs=polar_obs)) if np.max(n_boxes) > 0: env.add_module( Boxes(n_boxes=n_boxes, placement_fn=box_placement_fn, friction=box_floor_friction, polar_obs=polar_obs, n_elongated_boxes=0, boxid_obs=boxid_obs, box_only_z_rot=box_only_z_rot, boxsize_obs=boxsize_obs)) if n_ramps > 0: env.add_module( Ramps(n_ramps=n_ramps, placement_fn=ramp_placement_fn, friction=other_friction, polar_obs=polar_obs, pad_ramp_size=pad_ramp_size)) if n_lidar_per_agent > 0 and visualize_lidar: env.add_module( LidarSites(n_agents=n_agents, n_lidar_per_agent=n_lidar_per_agent)) if np.max(n_boxes) > 0 and grab_box: env.add_module(AgentManipulation()) if box_floor_friction is not None: env.add_module(FloorAttributes(friction=box_floor_friction)) env.add_module(WorldConstants(gravity=gravity)) env.reset() keys_self = ['agent_qpos_qvel', 'hider', 'prep_obs'] keys_mask_self = ['mask_aa_obs'] keys_external = ['agent_qpos_qvel'] keys_copy = ['you_lock', 'team_lock'] keys_mask_external = [] env = SplitMultiAgentActions(env) env = TeamMembership(env, np.zeros((n_agents, ))) env = AgentAgentObsMask2D(env) env = DiscretizeActionWrapper(env, 'action_movement') env = NumpyArrayRewardWrapper(env) if np.max(n_boxes) > 0: env = AgentGeomObsMask2D(env, pos_obs_key='box_pos', mask_obs_key='mask_ab_obs', geom_idxs_obs_key='box_geom_idxs') keys_external += ['mask_ab_obs', 'box_obs'] keys_mask_external.append('mask_ab_obs') if lock_box and np.max(n_boxes) > 0: env = LockObjWrapper( env, body_names=[f'moveable_box{i}' for i in range(n_boxes)], agent_idx_allowed_to_lock=np.arange(n_agents), lock_type=lock_type, radius_multiplier=lock_radius_multiplier, obj_in_game_metadata_keys=["curr_n_boxes"], agent_allowed_to_lock_keys=None if lock_out_of_vision else ["mask_ab_obs"]) if n_ramps > 0: env = AgentGeomObsMask2D(env, pos_obs_key='ramp_pos', mask_obs_key='mask_ar_obs', geom_idxs_obs_key='ramp_geom_idxs') env = LockObjWrapper( env, body_names=[f"ramp{i}:ramp" for i in range(n_ramps)], agent_idx_allowed_to_lock=np.arange(n_agents), lock_type=lock_type, ac_obs_prefix='ramp_', radius_multiplier=lock_radius_multiplier, agent_allowed_to_lock_keys=None if lock_out_of_vision else ["mask_ar_obs"]) keys_external += ['ramp_obs'] keys_mask_external += ['mask_ar_obs'] keys_copy += ['ramp_you_lock', 'ramp_team_lock'] if grab_box and np.max(n_boxes) > 0: body_names = ([f'moveable_box{i}' for i in range(n_boxes)] + [f"ramp{i}:ramp" for i in range(n_ramps)]) obj_in_game_meta_keys = ['curr_n_boxes' ] + (['curr_n_ramps'] if n_ramps > 0 else []) env = GrabObjWrapper(env, body_names=body_names, radius_multiplier=grab_radius_multiplier, grab_exclusive=grab_exclusive, obj_in_game_metadata_keys=obj_in_game_meta_keys) if n_lidar_per_agent > 0: env = Lidar(env, n_lidar_per_agent=n_lidar_per_agent, visualize_lidar=visualize_lidar, compress_lidar_scale=compress_lidar_scale) keys_copy += ['lidar'] keys_external += ['lidar'] env = AddConstantObservationsWrapper(env, new_obs=additional_obs) keys_external += list(additional_obs) keys_mask_external += [ob for ob in additional_obs if 'mask' in ob] ############################################# # lock Box Task Reward ### env = LockObjectsTask(env, n_objs=n_boxes, task=task_type, fixed_order=True, obj_lock_obs_key='obj_lock', obj_pos_obs_key='box_pos', act_lock_key='action_glue', agent_pos_key='agent_pos', lock_reward=lock_reward, unlock_penalty=unlock_penalty, shaped_reward_scale=shaped_reward_scale, return_threshold=return_threshold) ### ############################################# env = SplitObservations(env, keys_self + keys_mask_self, keys_copy=keys_copy) env = SpoofEntityWrapper(env, n_boxes, ['box_obs', 'you_lock', 'team_lock', 'obj_lock'], ['mask_ab_obs']) keys_mask_external += ['mask_ab_obs_spoof'] if n_agents < 2: env = SpoofEntityWrapper(env, 1, ['agent_qpos_qvel', 'hider', 'prep_obs'], ['mask_aa_obs']) env = LockAllWrapper(env, remove_object_specific_lock=True) if not grab_out_of_vision and grab_box: # Can only pull if in vision mask_keys = ['mask_ab_obs'] + (['mask_ar_obs'] if n_ramps > 0 else []) env = MaskActionWrapper(env, action_key='action_pull', mask_keys=mask_keys) if not grab_selective and grab_box: env = GrabClosestWrapper(env) env = DiscardMujocoExceptionEpisodes(env) env = ConcatenateObsWrapper( env, { 'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs'], 'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock'], 'ramp_obs': ['ramp_obs', 'ramp_you_lock', 'ramp_team_lock', 'ramp_obj_lock'] }) env = SelectKeysWrapper(env, keys_self=keys_self, keys_external=keys_external, keys_mask=keys_mask_self + keys_mask_external, flatten=False) return env