def make_env(n_substeps=5, horizon=250, deterministic_mode=False, n_agents=2, n_boxes=2, n_ramps=1): ''' This make_env function is not used anywhere; it exists to provide a simple, bare-bones example of how to construct a multi-agent environment using the modules framework. ''' env = Base(n_agents=n_agents, n_substeps=n_substeps, horizon=horizon, deterministic_mode=deterministic_mode) env.add_module( RandomWalls(grid_size=30, num_rooms=4, min_room_size=6, door_size=2)) if n_boxes > 0: env.add_module(Boxes(n_boxes=n_boxes)) if n_ramps > 0: env.add_module(Ramps(n_ramps=n_ramps)) env.add_module(Agents(n_agents)) env.reset() keys_self = ['agent_qpos_qvel'] keys_mask_self = ['mask_aa_obs'] keys_external = ['agent_qpos_qvel'] keys_mask_external = [] env = SplitMultiAgentActions(env) env = DiscretizeActionWrapper(env, 'action_movement') env = AgentAgentObsMask2D(env) env = SplitObservations(env, keys_self + keys_mask_self) env = SelectKeysWrapper(env, keys_self=keys_self, keys_external=keys_external, keys_mask=keys_mask_self + keys_mask_external, flatten=False) env = DiscardMujocoExceptionEpisodes(env) return env
def BoxLockingEnv(args, n_substeps=15, horizon=120, deterministic_mode=False, floor_size=6.0, grid_size=30, door_size=2, n_agents=1, fixed_agent_spawn=False, lock_box=True, grab_box=True, grab_selective=False, lock_type='all_lock_team_specific', lock_grab_radius=0.25, grab_exclusive=False, grab_out_of_vision=False, lock_out_of_vision=False, box_floor_friction=0.2, other_friction=0.01, gravity=[0, 0, -50], action_lims=(-0.9, 0.9), polar_obs=True, scenario='quadrant', p_door_dropout=0.5, n_rooms=2, random_room_number=False, n_lidar_per_agent=0, visualize_lidar=False, compress_lidar_scale=None, n_boxes=3, box_size=0.5, box_only_z_rot=True, boxid_obs=False, boxsize_obs=False, pad_ramp_size=True, additional_obs={}, # lock-box task task_type='order-return', lock_reward=5.0, unlock_penalty=5.0, shaped_reward_scale=0.5, return_threshold=0.1, # ramps n_ramps=0): scenario = args.scenario_name n_agents = args.num_agents task_type = args.task_type n_boxes = args.num_boxes #assert n_agents==1, ("only 1 agents is supported, check the config.py.") grab_radius_multiplier = lock_grab_radius / box_size lock_radius_multiplier = lock_grab_radius / box_size env = Base(n_agents=n_agents, n_substeps=n_substeps, floor_size=floor_size, horizon=horizon, action_lims=action_lims, deterministic_mode=deterministic_mode, grid_size=grid_size) if scenario == 'randomwalls': env.add_module(RandomWalls(n_agents=n_agents, grid_size=grid_size, num_rooms=n_rooms, random_room_number=random_room_number, min_room_size=6, door_size=door_size, gen_door_obs=False)) box_placement_fn = uniform_placement ramp_placement_fn = uniform_placement agent_placement_fn = uniform_placement if not fixed_agent_spawn else center_placement elif scenario == 'quadrant': env.add_module(WallScenarios(n_agents=n_agents, grid_size=grid_size, door_size=door_size, scenario=scenario, friction=other_friction, p_door_dropout=p_door_dropout)) box_placement_fn = uniform_placement ramp_placement_fn = uniform_placement agent_placement_fn = quadrant_placement if not fixed_agent_spawn else center_placement elif scenario == 'empty': env.add_module(WallScenarios(n_agents=n_agents, grid_size=grid_size, door_size=2, scenario='empty')) box_placement_fn = uniform_placement ramp_placement_fn = uniform_placement agent_placement_fn = center_placement elif 'var_tri' in scenario: env.add_module(WallScenarios(n_agents=n_agents, grid_size=grid_size, door_size=door_size, scenario='var_tri')) ramp_placement_fn = [tri_placement(i % 3) for i in range(n_ramps)] agent_placement_fn = center_placement if fixed_agent_spawn else \ (uniform_placement if 'uniform' in scenario else rotate_tri_placement) box_placement_fn = uniform_placement if 'uniform' in scenario else rotate_tri_placement else: raise ValueError(f"Scenario {scenario} not supported.") env.add_module(Agents(n_agents, placement_fn=agent_placement_fn, color=[np.array((66., 235., 244., 255.)) / 255] * n_agents, friction=other_friction, polar_obs=polar_obs)) if np.max(n_boxes) > 0: env.add_module(Boxes(n_boxes=n_boxes, placement_fn=box_placement_fn, friction=box_floor_friction, polar_obs=polar_obs, n_elongated_boxes=0, boxid_obs=boxid_obs, box_only_z_rot=box_only_z_rot, boxsize_obs=boxsize_obs)) if n_ramps > 0: env.add_module(Ramps(n_ramps=n_ramps, placement_fn=ramp_placement_fn, friction=other_friction, polar_obs=polar_obs, pad_ramp_size=pad_ramp_size)) if n_lidar_per_agent > 0 and visualize_lidar: env.add_module(LidarSites(n_agents=n_agents, n_lidar_per_agent=n_lidar_per_agent)) if np.max(n_boxes) > 0 and grab_box: env.add_module(AgentManipulation()) if box_floor_friction is not None: env.add_module(FloorAttributes(friction=box_floor_friction)) env.add_module(WorldConstants(gravity=gravity)) env.reset() if 'var_tri' in scenario or "randomwalls" in scenario: keys_self = ['agent_qpos_qvel','current_step'] else: keys_self = ['agent_qpos_qvel','current_step','vector_door_obs'] keys_mask_self = ['mask_aa_obs'] keys_external = ['agent_qpos_qvel'] keys_copy = ['you_lock', 'team_lock'] keys_mask_external = [] env = SplitMultiAgentActions(env) env = TeamMembership(env, np.zeros((n_agents,))) env = AgentAgentObsMask2D(env) env = DiscretizeActionWrapper(env, 'action_movement') env = NumpyArrayRewardWrapper(env) env = TimeWrapper(env, horizon) if np.max(n_boxes) > 0: env = AgentGeomObsMask2D(env, pos_obs_key='box_pos', mask_obs_key='mask_ab_obs', geom_idxs_obs_key='box_geom_idxs') keys_external += ['mask_ab_obs', 'box_obs'] keys_mask_external.append('mask_ab_obs') if lock_box and np.max(n_boxes) > 0: env = LockObjWrapper(env, body_names=[f'moveable_box{i}' for i in range(n_boxes)], agent_idx_allowed_to_lock=np.arange(n_agents), lock_type=lock_type, radius_multiplier=lock_radius_multiplier, obj_in_game_metadata_keys=["curr_n_boxes"], agent_allowed_to_lock_keys=None if lock_out_of_vision else ["mask_ab_obs"]) if n_ramps > 0: env = AgentGeomObsMask2D(env, pos_obs_key='ramp_pos', mask_obs_key='mask_ar_obs', geom_idxs_obs_key='ramp_geom_idxs') env = LockObjWrapper(env, body_names=[f"ramp{i}:ramp" for i in range(n_ramps)], agent_idx_allowed_to_lock=np.arange(n_agents), lock_type=lock_type, ac_obs_prefix='ramp_', radius_multiplier=lock_radius_multiplier, agent_allowed_to_lock_keys=None if lock_out_of_vision else ["mask_ar_obs"]) keys_external += ['ramp_obs'] keys_mask_external += ['mask_ar_obs'] keys_copy += ['ramp_you_lock', 'ramp_team_lock'] if grab_box and np.max(n_boxes) > 0: body_names = ([f'moveable_box{i}' for i in range(n_boxes)] + [f"ramp{i}:ramp" for i in range(n_ramps)]) obj_in_game_meta_keys = ['curr_n_boxes'] + (['curr_n_ramps'] if n_ramps > 0 else []) env = GrabObjWrapper(env, body_names=body_names, radius_multiplier=grab_radius_multiplier, grab_exclusive=grab_exclusive, obj_in_game_metadata_keys=obj_in_game_meta_keys) if n_lidar_per_agent > 0: env = Lidar(env, n_lidar_per_agent=n_lidar_per_agent, visualize_lidar=visualize_lidar, compress_lidar_scale=compress_lidar_scale) keys_copy += ['lidar'] keys_external += ['lidar'] env = AddConstantObservationsWrapper(env, new_obs=additional_obs) keys_external += list(additional_obs) keys_mask_external += [ob for ob in additional_obs if 'mask' in ob] ############################################# # lock Box Task Reward ### env = LockObjectsTask(env, n_objs=n_boxes, task=task_type, fixed_order=True, obj_lock_obs_key='obj_lock', obj_pos_obs_key='box_pos', act_lock_key='action_glue', agent_pos_key='agent_pos', lock_reward=lock_reward, unlock_penalty=unlock_penalty, shaped_reward_scale=shaped_reward_scale, return_threshold=return_threshold) ### ############################################# env = SplitObservations(env, keys_self + keys_mask_self, keys_copy=keys_copy) env = SpoofEntityWrapper(env, n_boxes, ['box_obs', 'you_lock', 'team_lock', 'obj_lock'], ['mask_ab_obs']) keys_mask_external += ['mask_ab_obs_spoof'] if n_agents < 2: env = SpoofEntityWrapper(env, 1, ['agent_qpos_qvel'], ['mask_aa_obs']) env = LockAllWrapper(env, remove_object_specific_lock=True) if not grab_out_of_vision and grab_box: # Can only pull if in vision mask_keys = ['mask_ab_obs'] + (['mask_ar_obs'] if n_ramps > 0 else []) env = MaskActionWrapper(env, action_key='action_pull', mask_keys=mask_keys) if not grab_selective and grab_box: env = GrabClosestWrapper(env) env = DiscardMujocoExceptionEpisodes(env,n_agents) if n_ramps > 0: env = ConcatenateObsWrapper(env, {'agent_qpos_qvel': ['agent_qpos_qvel'], 'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock'], 'ramp_obs': ['ramp_obs', 'ramp_you_lock', 'ramp_team_lock', 'ramp_obj_lock']}) else: env = ConcatenateObsWrapper(env, {'agent_qpos_qvel': ['agent_qpos_qvel'], 'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock']}) env = SelectKeysWrapper(env, keys_self=keys_self, keys_external=keys_external, keys_mask=keys_mask_self + keys_mask_external, flatten=False) return env
def HideAndSeekEnv(n_substeps=15, horizon=80, deterministic_mode=False, floor_size=6.0, grid_size=30, door_size=2, n_hiders=2, n_seekers=2, max_n_agents=None, n_boxes=2, n_ramps=1, n_elongated_boxes=0, rand_num_elongated_boxes=False, n_min_boxes=None, box_size=2.0, boxid_obs=False, box_only_z_rot=True, rew_type='joint_zero_sum', lock_box=True, grab_box=True, lock_ramp=False, lock_type='all_lock_team_specific', lock_grab_radius=0.25, lock_out_of_vision=True, grab_exclusive=False, grab_out_of_vision=False, grab_selective=False, box_floor_friction=0.2, other_friction=0.01, gravity=[0, 0, -50], action_lims=(-0.9, 0.9), polar_obs=True, scenario='quadrant', quadrant_game_hider_uniform_placement=False, p_door_dropout=0.5, n_rooms=4, random_room_number=True, prob_outside_walls=1.0, n_lidar_per_agent=0, visualize_lidar=False, compress_lidar_scale=None, hiders_together_radius=None, seekers_together_radius=None, prep_fraction=0.4, prep_obs=True, team_size_obs=False, restrict_rect=[0.1, 0.1, 5.9, 5.9], penalize_objects_out=True, n_food=0, food_radius=None, food_respawn_time=None, max_food_health=1, food_together_radius=None, food_rew_type='selfish', eat_when_caught=False, food_reward_scale=1.0, food_normal_centered=False, food_box_centered=False, n_food_cluster=1): grab_radius_multiplier = lock_grab_radius / box_size lock_radius_multiplier = lock_grab_radius / box_size env = Base(n_agents=n_hiders + n_seekers, n_substeps=n_substeps, horizon=horizon, floor_size=floor_size, grid_size=grid_size, action_lims=action_lims, deterministic_mode=deterministic_mode) if scenario == 'randomwalls': env.add_module( RandomWalls(n_agents=n_hiders + n_seekers, grid_size=grid_size, num_rooms=n_rooms, random_room_number=random_room_number, min_room_size=6, door_size=door_size, prob_outside_walls=prob_outside_walls, gen_door_obs=False)) box_placement_fn = uniform_placement ramp_placement_fn = uniform_placement cell_size = env.floor_size / grid_size first_hider_placement = uniform_placement if hiders_together_radius is not None: htr_in_cells = np.ceil(hiders_together_radius / cell_size).astype(int) env.metadata['hiders_together_radius'] = htr_in_cells close_to_first_hider_placement = close_to_other_object_placement( "agent", 0, "hiders_together_radius") agent_placement_fn = [first_hider_placement] + \ [close_to_first_hider_placement] * (n_hiders - 1) else: agent_placement_fn = [first_hider_placement] * n_hiders first_seeker_placement = uniform_placement if seekers_together_radius is not None: str_in_cells = np.ceil(seekers_together_radius / cell_size).astype(int) env.metadata['seekers_together_radius'] = str_in_cells close_to_first_seeker_placement = close_to_other_object_placement( "agent", n_hiders, "seekers_together_radius") agent_placement_fn += [first_seeker_placement] + \ [close_to_first_seeker_placement] * (n_seekers - 1) else: agent_placement_fn += [first_seeker_placement] * (n_seekers) elif scenario == 'quadrant': env.add_module( WallScenarios(n_agents=n_hiders + n_seekers, grid_size=grid_size, door_size=door_size, scenario=scenario, friction=other_friction, p_door_dropout=p_door_dropout)) box_placement_fn = quadrant_placement ramp_placement_fn = nearwall_placement #uniform_placement hider_placement = uniform_placement if quadrant_game_hider_uniform_placement else quadrant_placement agent_placement_fn = [hider_placement] * n_hiders + [ outside_quadrant_placement ] * n_seekers else: raise ValueError(f"Scenario {scenario} not supported.") env.add_module( Agents(n_hiders + n_seekers, placement_fn=agent_placement_fn, color=[np.array((66., 235., 244., 255.)) / 255] * n_hiders + [(1., 0., 0., 1.)] * n_seekers, friction=other_friction, polar_obs=polar_obs)) if np.max(n_boxes) > 0: env.add_module( Boxes(n_boxes=n_boxes, placement_fn=box_placement_fn, friction=box_floor_friction, polar_obs=polar_obs, n_elongated_boxes=n_elongated_boxes, boxid_obs=boxid_obs, box_only_z_rot=box_only_z_rot)) if n_ramps > 0: env.add_module( Ramps(n_ramps=n_ramps, placement_fn=ramp_placement_fn, friction=other_friction, polar_obs=polar_obs, pad_ramp_size=(np.max(n_elongated_boxes) > 0))) if n_lidar_per_agent > 0 and visualize_lidar: env.add_module( LidarSites(n_agents=n_hiders + n_seekers, n_lidar_per_agent=n_lidar_per_agent)) if n_food > 0: if scenario == 'quadrant': first_food_placement = quadrant_placement elif food_box_centered: first_food_placement = uniform_placement_middle(0.25) else: first_food_placement = uniform_placement if food_together_radius is not None: cell_size = env.floor_size / grid_size ftr_in_cells = np.ceil(food_together_radius / cell_size).astype(int) env.metadata['food_together_radius'] = ftr_in_cells assert n_food % n_food_cluster == 0 cluster_assignments = np.repeat( np.arange(0, n_food, n_food // n_food_cluster), n_food // n_food_cluster) food_placement = [ close_to_other_object_placement("food", i, "food_together_radius") for i in cluster_assignments ] food_placement[::n_food // n_food_cluster] = [first_food_placement ] * n_food_cluster else: food_placement = first_food_placement env.add_module(Food(n_food, placement_fn=food_placement)) env.add_module(AgentManipulation()) if box_floor_friction is not None: env.add_module(FloorAttributes(friction=box_floor_friction)) env.add_module(WorldConstants(gravity=gravity)) env.reset() keys_self = [ 'agent_qpos_qvel', 'hider', 'prep_obs', 'current_step', 'vector_door_obs' ] keys_mask_self = ['mask_aa_obs'] keys_external = ['agent_qpos_qvel'] keys_copy = ['you_lock', 'team_lock', 'ramp_you_lock', 'ramp_team_lock'] keys_mask_external = [] env = SplitMultiAgentActions(env) if team_size_obs: keys_self += ['team_size'] env = TeamMembership( env, np.append(np.zeros((n_hiders, )), np.ones((n_seekers, )))) env = AgentAgentObsMask2D(env) hider_obs = np.array([[1]] * n_hiders + [[0]] * n_seekers) env = AddConstantObservationsWrapper(env, new_obs={'hider': hider_obs}) env = HideAndSeekRewardWrapper(env, n_hiders=n_hiders, n_seekers=n_seekers, rew_type=rew_type) env = TimeWrapper(env, horizon) if restrict_rect is not None: env = RestrictAgentsRect(env, restrict_rect=restrict_rect, penalize_objects_out=penalize_objects_out) env = PreparationPhase(env, prep_fraction=prep_fraction) env = DiscretizeActionWrapper(env, 'action_movement') if np.max(n_boxes) > 0: env = AgentGeomObsMask2D(env, pos_obs_key='box_pos', mask_obs_key='mask_ab_obs', geom_idxs_obs_key='box_geom_idxs') keys_external += ['mask_ab_obs', 'box_obs'] keys_mask_external.append('mask_ab_obs') if n_food: env = AgentSiteObsMask2D(env, pos_obs_key='food_pos', mask_obs_key='mask_af_obs') env = FoodHealthWrapper( env, respawn_time=(np.inf if food_respawn_time is None else food_respawn_time), eat_thresh=(np.inf if food_radius is None else food_radius), max_food_health=max_food_health, food_rew_type=food_rew_type, reward_scale=food_reward_scale) env = MaskActionWrapper(env, 'action_eat_food', ['mask_af_obs']) # Can only eat if in vision if prep_obs: env = MaskPrepPhaseAction(env, 'action_eat_food') if not eat_when_caught: env = MaskUnseenAction(env, 0, 'action_eat_food') eat_agents = np.arange(n_hiders) env = AlwaysEatWrapper(env, agent_idx_allowed=eat_agents) keys_external += ['mask_af_obs', 'food_obs'] keys_mask_external.append('mask_af_obs') if lock_box and np.max(n_boxes) > 0: env = LockObjWrapper( env, body_names=[f'moveable_box{i}' for i in range(np.max(n_boxes))], agent_idx_allowed_to_lock=np.arange(n_hiders + n_seekers), lock_type=lock_type, radius_multiplier=lock_radius_multiplier, obj_in_game_metadata_keys=["curr_n_boxes"], agent_allowed_to_lock_keys=None if lock_out_of_vision else ["mask_ab_obs"]) if n_ramps > 0: env = AgentGeomObsMask2D(env, pos_obs_key='ramp_pos', mask_obs_key='mask_ar_obs', geom_idxs_obs_key='ramp_geom_idxs') if lock_ramp: env = LockObjWrapper( env, body_names=[f'ramp{i}:ramp' for i in range(n_ramps)], agent_idx_allowed_to_lock=np.arange(n_hiders + n_seekers), lock_type=lock_type, ac_obs_prefix='ramp_', radius_multiplier=lock_radius_multiplier, obj_in_game_metadata_keys=['curr_n_ramps'], agent_allowed_to_lock_keys=None if lock_out_of_vision else ["mask_ar_obs"]) keys_external += ['ramp_obs'] keys_mask_external.append('mask_ar_obs') if grab_box and (np.max(n_boxes) > 0 or n_ramps > 0): env = GrabObjWrapper( env, [f'moveable_box{i}' for i in range(np.max(n_boxes))] + ([f"ramp{i}:ramp" for i in range(n_ramps)]), radius_multiplier=grab_radius_multiplier, grab_exclusive=grab_exclusive, obj_in_game_metadata_keys=['curr_n_boxes', 'curr_n_ramps']) if n_lidar_per_agent > 0: env = Lidar(env, n_lidar_per_agent=n_lidar_per_agent, visualize_lidar=visualize_lidar, compress_lidar_scale=compress_lidar_scale) keys_copy += ['lidar'] keys_external += ['lidar'] if prep_obs: env = TrackStatWrapper(env, np.max(n_boxes), n_ramps, n_food) env = SplitObservations(env, keys_self + keys_mask_self, keys_copy=keys_copy) if n_boxes: env = SpoofEntityWrapper( env, np.max(n_boxes), ['box_obs', 'you_lock', 'team_lock', 'obj_lock'], ['mask_ab_obs']) if n_food: env = SpoofEntityWrapper(env, n_food, ['food_obs'], ['mask_af_obs']) keys_mask_external += ['mask_ab_obs_spoof', 'mask_af_obs_spoof'] if max_n_agents is not None: env = SpoofEntityWrapper(env, max_n_agents, ['agent_qpos_qvel', 'hider', 'prep_obs'], ['mask_aa_obs']) env = LockAllWrapper(env, remove_object_specific_lock=True) if not grab_out_of_vision and grab_box and n_boxes > 0: env = MaskActionWrapper(env, 'action_pull', ['mask_ab_obs'] + (['mask_ar_obs'] if n_ramps > 0 else [])) if not grab_selective and grab_box and n_boxes > 0: env = GrabClosestWrapper(env) env = NoActionsInPrepPhase(env, np.arange(n_hiders, n_hiders + n_seekers)) env = DiscardMujocoExceptionEpisodes(env, n_hiders + n_seekers) if n_boxes > 0 and n_ramps > 0: env = ConcatenateObsWrapper( env, { 'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs'], 'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock'], 'ramp_obs': ['ramp_obs'] + (['ramp_you_lock', 'ramp_team_lock', 'ramp_obj_lock'] if lock_ramp else []) }) elif n_boxes > 0: env = ConcatenateObsWrapper( env, { 'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs'], 'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock'] }) elif n_ramps > 0: env = ConcatenateObsWrapper( env, { 'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs'], 'ramp_obs': ['ramp_obs'] + (['ramp_you_lock', 'ramp_team_lock', 'ramp_obj_lock'] if lock_ramp else []) }) else: env = ConcatenateObsWrapper( env, {'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs']}) env = SelectKeysWrapper(env, keys_self=keys_self, keys_external=keys_external, keys_mask=keys_mask_self + keys_mask_external, flatten=False) return env
def BlueprintConstructionEnv(n_substeps=15, horizon=200, deterministic_mode=False, floor_size=4.0, grid_size=30, n_agents=2, n_rooms=2, random_room_number=False, scenario='empty', door_size=2, n_sites=2, n_elongated_sites=0, site_placement='uniform_away_from_walls', reward_infos=[{ 'type': 'construction_dense' }, { 'type': 'construction_completed' }], n_boxes=4, n_elongated_boxes=0, n_min_boxes=None, box_size=0.5, box_only_z_rot=False, lock_box=True, grab_box=True, grab_selective=False, lock_grab_radius=0.25, lock_type='all_lock_team_specific', grab_exclusive=False, grab_out_of_vision=True, lock_out_of_vision=True, box_floor_friction=0.2, other_friction=0.01, gravity=[0, 0, -50], action_lims=(-0.9, 0.9), polar_obs=True, n_lidar_per_agent=0, visualize_lidar=False, compress_lidar_scale=None, boxid_obs=False, boxsize_obs=False, team_size_obs=False, additional_obs={}): grab_radius_multiplier = lock_grab_radius / box_size lock_radius_multiplier = lock_grab_radius / box_size if type(n_sites) not in [list, np.ndarray]: n_sites = [n_sites, n_sites] env = Base(n_agents=n_agents, n_substeps=n_substeps, horizon=horizon, floor_size=floor_size, grid_size=grid_size, action_lims=action_lims, deterministic_mode=deterministic_mode) if scenario == 'randomwalls': env.add_module( RandomWalls(n_agents=n_agents, grid_size=grid_size, num_rooms=n_rooms, random_room_number=random_room_number, min_room_size=6, door_size=door_size, gen_door_obs=False)) elif scenario == 'empty': env.add_module( WallScenarios(n_agents=n_agents, grid_size=grid_size, door_size=door_size, scenario='empty', friction=other_friction)) else: raise ValueError(f"Scenario {scenario} not supported.") env.add_module( Agents(n_agents, placement_fn=uniform_placement, color=[np.array((66., 235., 244., 255.)) / 255] * n_agents, friction=other_friction, polar_obs=polar_obs)) if np.max(n_boxes) > 0: env.add_module( Boxes(n_boxes=n_boxes, placement_fn=uniform_placement, friction=box_floor_friction, polar_obs=polar_obs, n_elongated_boxes=n_elongated_boxes, boxid_obs=boxid_obs, boxsize_obs=boxsize_obs, box_size=box_size, box_only_z_rot=box_only_z_rot, mark_box_corners=True)) if n_sites[1] > 0: if site_placement == 'center': site_placement_fn = center_placement elif site_placement == 'uniform': site_placement_fn = uniform_placement elif site_placement == 'uniform_away_from_walls': site_placement_fn = uniform_placement_middle(0.85) else: raise ValueError( f'Site placement option: {site_placement} not implemented.' ' Please choose from center, uniform and uniform_away_from_walls.' ) env.add_module( ConstructionSites(n_sites, placement_fn=site_placement_fn, site_size=box_size, site_height=box_size / 2, n_elongated_sites=n_elongated_sites)) if n_lidar_per_agent > 0 and visualize_lidar: env.add_module( LidarSites(n_agents=n_agents, n_lidar_per_agent=n_lidar_per_agent)) if np.max(n_boxes) > 0 and grab_box: env.add_module(AgentManipulation()) if box_floor_friction is not None: env.add_module(FloorAttributes(friction=box_floor_friction)) env.add_module(WorldConstants(gravity=gravity)) env.reset() keys_self = ['agent_qpos_qvel', 'current_step'] keys_mask_self = ['mask_aa_obs'] keys_external = ['agent_qpos_qvel', 'construction_site_obs'] keys_copy = ['you_lock', 'team_lock', 'ramp_you_lock', 'ramp_team_lock'] keys_mask_external = [] env = AddConstantObservationsWrapper(env, new_obs=additional_obs) keys_external += list(additional_obs) keys_mask_external += [ob for ob in additional_obs if 'mask' in ob] env = SplitMultiAgentActions(env) if team_size_obs: keys_self += ['team_size'] env = TeamMembership(env, np.zeros((n_agents, ))) env = AgentAgentObsMask2D(env) env = DiscretizeActionWrapper(env, 'action_movement') if np.max(n_boxes) > 0: env = AgentGeomObsMask2D(env, pos_obs_key='box_pos', mask_obs_key='mask_ab_obs', geom_idxs_obs_key='box_geom_idxs') keys_external += ['mask_ab_obs', 'box_obs'] keys_mask_external.append('mask_ab_obs') if lock_box and np.max(n_boxes) > 0: agent_allowed_to_lock_keys = None if lock_out_of_vision else [ "mask_ab_obs" ] env = LockObjWrapper( env, body_names=[f'moveable_box{i}' for i in range(n_boxes)], agent_idx_allowed_to_lock=np.arange(n_agents), lock_type=lock_type, radius_multiplier=lock_radius_multiplier, obj_in_game_metadata_keys=["curr_n_boxes"], agent_allowed_to_lock_keys=agent_allowed_to_lock_keys) if grab_box and np.max(n_boxes) > 0: env = GrabObjWrapper(env, [f'moveable_box{i}' for i in range(n_boxes)], radius_multiplier=grab_radius_multiplier, grab_exclusive=grab_exclusive, obj_in_game_metadata_keys=['curr_n_boxes']) if n_lidar_per_agent > 0: env = Lidar(env, n_lidar_per_agent=n_lidar_per_agent, visualize_lidar=visualize_lidar, compress_lidar_scale=compress_lidar_scale) keys_copy += ['lidar'] keys_external += ['lidar'] env = ConstructionDistancesWrapper(env) env = NumpyArrayRewardWrapper(env) reward_wrappers = { 'construction_dense': ConstructionDenseRewardWrapper, 'construction_completed': ConstructionCompletedRewardWrapper, } for rew_info in reward_infos: rew_type = rew_info['type'] del rew_info['type'] env = reward_wrappers[rew_type](env, **rew_info) env = TimeWrapper(env, horizon) env = SplitObservations(env, keys_self + keys_mask_self, keys_copy=keys_copy) if n_agents == 1: env = SpoofEntityWrapper(env, 2, ['agent_qpos_qvel'], ['mask_aa_obs']) env = SpoofEntityWrapper(env, n_boxes, ['box_obs', 'you_lock', 'team_lock', 'obj_lock'], ['mask_ab_obs']) env = SpoofEntityWrapper(env, n_sites[1], ['construction_site_obs'], ['mask_acs_obs']) keys_mask_external += ['mask_ab_obs_spoof', 'mask_acs_obs_spoof'] env = LockAllWrapper(env, remove_object_specific_lock=True) if not grab_out_of_vision and grab_box: env = MaskActionWrapper(env, 'action_pull', ['mask_ab_obs']) # Can only pull if in vision if not grab_selective and grab_box: env = GrabClosestWrapper(env) env = DiscardMujocoExceptionEpisodes(env, n_agents) env = ConcatenateObsWrapper( env, { 'agent_qpos_qvel': ['agent_qpos_qvel'], 'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock'] }) env = SelectKeysWrapper(env, keys_self=keys_self, keys_external=keys_external, keys_mask=keys_mask_self + keys_mask_external, flatten=False) return env
def ShelterConstructionEnv(args, n_substeps=15, horizon=80, deterministic_mode=False, floor_size=6.0, grid_size=30, n_agents=1, objective_diameter=[1, 1], objective_placement='center', num_rays_per_side=25, shelter_reward_scale=1, n_boxes=2, n_elongated_boxes=0, box_size=0.5, box_only_z_rot=False, lock_box=True, grab_box=True, grab_selective=False, lock_grab_radius=0.25, lock_type='any_lock_specific', grab_exclusive=False, grab_out_of_vision=False, lock_out_of_vision=True, box_floor_friction=0.2, other_friction=0.01, gravity=[0, 0, -50], action_lims=(-0.9, 0.9), polar_obs=True, n_lidar_per_agent=0, visualize_lidar=False, compress_lidar_scale=None, boxid_obs=True, boxsize_obs=True, team_size_obs=False, additional_obs={}): assert n_agents==1, ("only 1 agents is supported, check the config.py.") grab_radius_multiplier = lock_grab_radius / box_size lock_radius_multiplier = lock_grab_radius / box_size env = Base(n_agents=n_agents, n_substeps=n_substeps, horizon=horizon, floor_size=floor_size, grid_size=grid_size, action_lims=action_lims, deterministic_mode=deterministic_mode) env.add_module(WallScenarios(n_agents=n_agents, grid_size=grid_size, door_size=2, scenario='empty', friction=other_friction)) if objective_placement == 'center': objective_placement_fn = center_placement elif objective_placement == 'uniform_away_from_walls': objective_placement_fn = uniform_placement_middle(0.7) env.add_module(Cylinders(1, diameter=objective_diameter, height=box_size, make_static=True, placement_fn=objective_placement_fn)) env.add_module(Agents(n_agents, placement_fn=uniform_placement, color=[np.array((66., 235., 244., 255.)) / 255] * n_agents, friction=other_friction, polar_obs=polar_obs)) if np.max(n_boxes) > 0: env.add_module(Boxes(n_boxes=n_boxes, placement_fn=uniform_placement, friction=box_floor_friction, polar_obs=polar_obs, n_elongated_boxes=n_elongated_boxes, boxid_obs=boxid_obs, boxsize_obs=boxsize_obs, box_size=box_size, box_only_z_rot=box_only_z_rot)) if n_lidar_per_agent > 0 and visualize_lidar: env.add_module(LidarSites(n_agents=n_agents, n_lidar_per_agent=n_lidar_per_agent)) env.add_module(AgentManipulation()) if box_floor_friction is not None: env.add_module(FloorAttributes(friction=box_floor_friction)) env.add_module(WorldConstants(gravity=gravity)) env.reset() keys_self = ['agent_qpos_qvel','current_step'] keys_mask_self = ['mask_aa_obs'] keys_external = ['agent_qpos_qvel', 'vector_door_obs'] keys_copy = ['you_lock', 'team_lock', 'ramp_you_lock', 'ramp_team_lock'] keys_mask_external = [] env = AddConstantObservationsWrapper(env, new_obs=additional_obs) keys_external += list(additional_obs) keys_mask_external += [ob for ob in additional_obs if 'mask' in ob] env = ShelterRewardWrapper(env, num_rays_per_side=num_rays_per_side, reward_scale=shelter_reward_scale) env = SplitMultiAgentActions(env) if team_size_obs: keys_self += ['team_size'] env = TeamMembership(env, np.zeros((n_agents,))) env = AgentAgentObsMask2D(env) env = DiscretizeActionWrapper(env, 'action_movement') if np.max(n_boxes) > 0: env = AgentGeomObsMask2D(env, pos_obs_key='box_pos', mask_obs_key='mask_ab_obs', geom_idxs_obs_key='box_geom_idxs') keys_external += ['mask_ab_obs', 'box_obs'] keys_mask_external.append('mask_ab_obs') if lock_box and np.max(n_boxes) > 0: env = LockObjWrapper(env, body_names=[f'moveable_box{i}' for i in range(n_boxes)], agent_idx_allowed_to_lock=np.arange(n_agents), lock_type=lock_type, radius_multiplier=lock_radius_multiplier, obj_in_game_metadata_keys=["curr_n_boxes"], agent_allowed_to_lock_keys=None if lock_out_of_vision else ["mask_ab_obs"]) if grab_box and np.max(n_boxes) > 0: env = GrabObjWrapper(env, [f'moveable_box{i}' for i in range(n_boxes)], radius_multiplier=grab_radius_multiplier, grab_exclusive=grab_exclusive, obj_in_game_metadata_keys=['curr_n_boxes']) if n_lidar_per_agent > 0: env = Lidar(env, n_lidar_per_agent=n_lidar_per_agent, visualize_lidar=visualize_lidar, compress_lidar_scale=compress_lidar_scale) keys_copy += ['lidar'] keys_external += ['lidar'] env = TimeWrapper(env, horizon) env = SplitObservations(env, keys_self + keys_mask_self, keys_copy=keys_copy) if n_agents == 1: env = SpoofEntityWrapper(env, 2, ['agent_qpos_qvel'], ['mask_aa_obs']) env = SpoofEntityWrapper(env, n_boxes, ['box_obs', 'you_lock', 'team_lock', 'obj_lock'], ['mask_ab_obs']) keys_mask_external += ['mask_ab_obs_spoof'] env = LockAllWrapper(env, remove_object_specific_lock=True) if not grab_out_of_vision and grab_box: env = MaskActionWrapper(env, 'action_pull', ['mask_ab_obs']) # Can only pull if in vision if not grab_selective and grab_box: env = GrabClosestWrapper(env) env = DiscardMujocoExceptionEpisodes(env, n_agents) env = ConcatenateObsWrapper(env, {'agent_qpos_qvel': ['agent_qpos_qvel'], 'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock']}) env = SelectKeysWrapper(env, keys_self=keys_self, keys_external=keys_external, keys_mask=keys_mask_self + keys_mask_external, flatten=False) return env