Python TimeWrapper Examples

Programming Language: Python

Namespace/Package Name: envs.hns.wrappers.manipulation

Class/Type: TimeWrapper

Examples at hotexamples.com: 4

Python TimeWrapper - 4 examples found. These are the top rated real world Python examples of envs.hns.wrappers.manipulation.TimeWrapper extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

TimeWrapper(4)

Frequently Used Methods

TimeWrapper (4)

Example #1

Show file

File: box_locking.py Project: umersheikh846/benchmarkmarl_repo

def BoxLockingEnv(args, n_substeps=15, horizon=120, deterministic_mode=False,
             floor_size=6.0, grid_size=30, door_size=2,
             n_agents=1, fixed_agent_spawn=False,
             lock_box=True, grab_box=True, grab_selective=False,
             lock_type='all_lock_team_specific',
             lock_grab_radius=0.25, grab_exclusive=False, grab_out_of_vision=False,
             lock_out_of_vision=False,
             box_floor_friction=0.2, other_friction=0.01, gravity=[0, 0, -50],
             action_lims=(-0.9, 0.9), polar_obs=True,
             scenario='quadrant', p_door_dropout=0.5,
             n_rooms=2, random_room_number=False,
             n_lidar_per_agent=0, visualize_lidar=False, compress_lidar_scale=None,
             n_boxes=3, box_size=0.5, box_only_z_rot=True,
             boxid_obs=False, boxsize_obs=False, pad_ramp_size=True, additional_obs={},
             # lock-box task
             task_type='order-return', lock_reward=5.0, unlock_penalty=5.0, shaped_reward_scale=0.5,
             return_threshold=0.1,
             # ramps
             n_ramps=0):

    scenario = args.scenario_name
    n_agents = args.num_agents
    task_type = args.task_type
    n_boxes = args.num_boxes
    #assert n_agents==1, ("only 1 agents is supported, check the config.py.")

    grab_radius_multiplier = lock_grab_radius / box_size
    lock_radius_multiplier = lock_grab_radius / box_size

    env = Base(n_agents=n_agents, n_substeps=n_substeps,
               floor_size=floor_size,
               horizon=horizon, action_lims=action_lims, deterministic_mode=deterministic_mode,
               grid_size=grid_size)

    if scenario == 'randomwalls':
        env.add_module(RandomWalls(n_agents=n_agents, grid_size=grid_size, num_rooms=n_rooms,
                                   random_room_number=random_room_number,
                                   min_room_size=6, door_size=door_size,
                                   gen_door_obs=False))
        box_placement_fn = uniform_placement
        ramp_placement_fn = uniform_placement
        agent_placement_fn = uniform_placement if not fixed_agent_spawn else center_placement
    elif scenario == 'quadrant':
        env.add_module(WallScenarios(n_agents=n_agents, grid_size=grid_size, door_size=door_size,
                                     scenario=scenario, friction=other_friction,
                                     p_door_dropout=p_door_dropout))
        box_placement_fn = uniform_placement
        ramp_placement_fn = uniform_placement
        agent_placement_fn = quadrant_placement if not fixed_agent_spawn else center_placement
    elif scenario == 'empty':
        env.add_module(WallScenarios(n_agents=n_agents, grid_size=grid_size, door_size=2, scenario='empty'))
        box_placement_fn = uniform_placement
        ramp_placement_fn = uniform_placement
        agent_placement_fn = center_placement
    elif 'var_tri' in scenario:
        env.add_module(WallScenarios(n_agents=n_agents, grid_size=grid_size, door_size=door_size, scenario='var_tri'))
        ramp_placement_fn = [tri_placement(i % 3) for i in range(n_ramps)]
        agent_placement_fn = center_placement if fixed_agent_spawn else \
            (uniform_placement if 'uniform' in scenario else rotate_tri_placement)
        box_placement_fn = uniform_placement if 'uniform' in scenario else rotate_tri_placement
    else:
        raise ValueError(f"Scenario {scenario} not supported.")

    env.add_module(Agents(n_agents,
                          placement_fn=agent_placement_fn,
                          color=[np.array((66., 235., 244., 255.)) / 255] * n_agents,
                          friction=other_friction,
                          polar_obs=polar_obs))
    if np.max(n_boxes) > 0:
        env.add_module(Boxes(n_boxes=n_boxes, placement_fn=box_placement_fn,
                             friction=box_floor_friction, polar_obs=polar_obs,
                             n_elongated_boxes=0,
                             boxid_obs=boxid_obs,
                             box_only_z_rot=box_only_z_rot,
                             boxsize_obs=boxsize_obs))

    if n_ramps > 0:
        env.add_module(Ramps(n_ramps=n_ramps, placement_fn=ramp_placement_fn,
                             friction=other_friction, polar_obs=polar_obs,
                             pad_ramp_size=pad_ramp_size))

    if n_lidar_per_agent > 0 and visualize_lidar:
        env.add_module(LidarSites(n_agents=n_agents, n_lidar_per_agent=n_lidar_per_agent))

    if np.max(n_boxes) > 0 and grab_box:
        env.add_module(AgentManipulation())
    if box_floor_friction is not None:
        env.add_module(FloorAttributes(friction=box_floor_friction))
    env.add_module(WorldConstants(gravity=gravity))
    env.reset()
    if 'var_tri' in scenario or "randomwalls" in scenario:
        keys_self = ['agent_qpos_qvel','current_step']
    else:
        keys_self = ['agent_qpos_qvel','current_step','vector_door_obs']
    keys_mask_self = ['mask_aa_obs']
    keys_external = ['agent_qpos_qvel']
    keys_copy = ['you_lock', 'team_lock']
    keys_mask_external = []

    env = SplitMultiAgentActions(env)
    env = TeamMembership(env, np.zeros((n_agents,)))
    env = AgentAgentObsMask2D(env)
    env = DiscretizeActionWrapper(env, 'action_movement')
    env = NumpyArrayRewardWrapper(env)
    env = TimeWrapper(env, horizon)
    if np.max(n_boxes) > 0:
        env = AgentGeomObsMask2D(env, pos_obs_key='box_pos', mask_obs_key='mask_ab_obs',
                                 geom_idxs_obs_key='box_geom_idxs')
        keys_external += ['mask_ab_obs', 'box_obs']
        keys_mask_external.append('mask_ab_obs')
    if lock_box and np.max(n_boxes) > 0:
        env = LockObjWrapper(env, body_names=[f'moveable_box{i}' for i in range(n_boxes)],
                             agent_idx_allowed_to_lock=np.arange(n_agents),
                             lock_type=lock_type,
                             radius_multiplier=lock_radius_multiplier,
                             obj_in_game_metadata_keys=["curr_n_boxes"],
                             agent_allowed_to_lock_keys=None if lock_out_of_vision else ["mask_ab_obs"])

    if n_ramps > 0:
        env = AgentGeomObsMask2D(env, pos_obs_key='ramp_pos', mask_obs_key='mask_ar_obs',
                                 geom_idxs_obs_key='ramp_geom_idxs')
        env = LockObjWrapper(env, body_names=[f"ramp{i}:ramp" for i in range(n_ramps)],
                             agent_idx_allowed_to_lock=np.arange(n_agents),
                             lock_type=lock_type, ac_obs_prefix='ramp_',
                             radius_multiplier=lock_radius_multiplier,
                             agent_allowed_to_lock_keys=None if lock_out_of_vision else ["mask_ar_obs"])

        keys_external += ['ramp_obs']
        keys_mask_external += ['mask_ar_obs']
        keys_copy += ['ramp_you_lock', 'ramp_team_lock']

    if grab_box and np.max(n_boxes) > 0:
        body_names = ([f'moveable_box{i}' for i in range(n_boxes)] +
                      [f"ramp{i}:ramp" for i in range(n_ramps)])
        obj_in_game_meta_keys = ['curr_n_boxes'] + (['curr_n_ramps'] if n_ramps > 0 else [])
        env = GrabObjWrapper(env,
                             body_names=body_names,
                             radius_multiplier=grab_radius_multiplier,
                             grab_exclusive=grab_exclusive,
                             obj_in_game_metadata_keys=obj_in_game_meta_keys)

    if n_lidar_per_agent > 0:
        env = Lidar(env, n_lidar_per_agent=n_lidar_per_agent, visualize_lidar=visualize_lidar,
                    compress_lidar_scale=compress_lidar_scale)
        keys_copy += ['lidar']
        keys_external += ['lidar']

    env = AddConstantObservationsWrapper(env, new_obs=additional_obs)
    keys_external += list(additional_obs)
    keys_mask_external += [ob for ob in additional_obs if 'mask' in ob]

    #############################################
    # lock Box Task Reward
    ###
    env = LockObjectsTask(env, n_objs=n_boxes, task=task_type, fixed_order=True,
                          obj_lock_obs_key='obj_lock', obj_pos_obs_key='box_pos',
                          act_lock_key='action_glue', agent_pos_key='agent_pos',
                          lock_reward=lock_reward, unlock_penalty=unlock_penalty,
                          shaped_reward_scale=shaped_reward_scale,
                          return_threshold=return_threshold)
    ###
    #############################################

    env = SplitObservations(env, keys_self + keys_mask_self, keys_copy=keys_copy)
    env = SpoofEntityWrapper(env, n_boxes,
                             ['box_obs', 'you_lock', 'team_lock', 'obj_lock'],
                             ['mask_ab_obs'])
    keys_mask_external += ['mask_ab_obs_spoof']

    if n_agents < 2:
        env = SpoofEntityWrapper(env, 1, ['agent_qpos_qvel'], ['mask_aa_obs'])

    env = LockAllWrapper(env, remove_object_specific_lock=True)
    if not grab_out_of_vision and grab_box:
        # Can only pull if in vision
        mask_keys = ['mask_ab_obs'] + (['mask_ar_obs'] if n_ramps > 0 else [])
        env = MaskActionWrapper(env, action_key='action_pull', mask_keys=mask_keys)
    if not grab_selective and grab_box:
        env = GrabClosestWrapper(env)
    env = DiscardMujocoExceptionEpisodes(env,n_agents)
    if n_ramps > 0:
        env = ConcatenateObsWrapper(env, {'agent_qpos_qvel': ['agent_qpos_qvel'],
                                      'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock'],
                                      'ramp_obs': ['ramp_obs', 'ramp_you_lock', 'ramp_team_lock',
                                                   'ramp_obj_lock']})
    else:
        env = ConcatenateObsWrapper(env, {'agent_qpos_qvel': ['agent_qpos_qvel'],
                                      'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock']})
    
    env = SelectKeysWrapper(env, keys_self=keys_self,
                            keys_external=keys_external,
                            keys_mask=keys_mask_self + keys_mask_external,
                            flatten=False)

    return env

Example #2

Show file

def BlueprintConstructionEnv(n_substeps=15,
                             horizon=200,
                             deterministic_mode=False,
                             floor_size=4.0,
                             grid_size=30,
                             n_agents=2,
                             n_rooms=2,
                             random_room_number=False,
                             scenario='empty',
                             door_size=2,
                             n_sites=2,
                             n_elongated_sites=0,
                             site_placement='uniform_away_from_walls',
                             reward_infos=[{
                                 'type': 'construction_dense'
                             }, {
                                 'type': 'construction_completed'
                             }],
                             n_boxes=4,
                             n_elongated_boxes=0,
                             n_min_boxes=None,
                             box_size=0.5,
                             box_only_z_rot=False,
                             lock_box=True,
                             grab_box=True,
                             grab_selective=False,
                             lock_grab_radius=0.25,
                             lock_type='all_lock_team_specific',
                             grab_exclusive=False,
                             grab_out_of_vision=True,
                             lock_out_of_vision=True,
                             box_floor_friction=0.2,
                             other_friction=0.01,
                             gravity=[0, 0, -50],
                             action_lims=(-0.9, 0.9),
                             polar_obs=True,
                             n_lidar_per_agent=0,
                             visualize_lidar=False,
                             compress_lidar_scale=None,
                             boxid_obs=False,
                             boxsize_obs=False,
                             team_size_obs=False,
                             additional_obs={}):

    grab_radius_multiplier = lock_grab_radius / box_size
    lock_radius_multiplier = lock_grab_radius / box_size

    if type(n_sites) not in [list, np.ndarray]:
        n_sites = [n_sites, n_sites]

    env = Base(n_agents=n_agents,
               n_substeps=n_substeps,
               horizon=horizon,
               floor_size=floor_size,
               grid_size=grid_size,
               action_lims=action_lims,
               deterministic_mode=deterministic_mode)

    if scenario == 'randomwalls':
        env.add_module(
            RandomWalls(n_agents=n_agents,
                        grid_size=grid_size,
                        num_rooms=n_rooms,
                        random_room_number=random_room_number,
                        min_room_size=6,
                        door_size=door_size,
                        gen_door_obs=False))
    elif scenario == 'empty':
        env.add_module(
            WallScenarios(n_agents=n_agents,
                          grid_size=grid_size,
                          door_size=door_size,
                          scenario='empty',
                          friction=other_friction))
    else:
        raise ValueError(f"Scenario {scenario} not supported.")

    env.add_module(
        Agents(n_agents,
               placement_fn=uniform_placement,
               color=[np.array((66., 235., 244., 255.)) / 255] * n_agents,
               friction=other_friction,
               polar_obs=polar_obs))
    if np.max(n_boxes) > 0:
        env.add_module(
            Boxes(n_boxes=n_boxes,
                  placement_fn=uniform_placement,
                  friction=box_floor_friction,
                  polar_obs=polar_obs,
                  n_elongated_boxes=n_elongated_boxes,
                  boxid_obs=boxid_obs,
                  boxsize_obs=boxsize_obs,
                  box_size=box_size,
                  box_only_z_rot=box_only_z_rot,
                  mark_box_corners=True))
    if n_sites[1] > 0:
        if site_placement == 'center':
            site_placement_fn = center_placement
        elif site_placement == 'uniform':
            site_placement_fn = uniform_placement
        elif site_placement == 'uniform_away_from_walls':
            site_placement_fn = uniform_placement_middle(0.85)
        else:
            raise ValueError(
                f'Site placement option: {site_placement} not implemented.'
                ' Please choose from center, uniform and uniform_away_from_walls.'
            )

        env.add_module(
            ConstructionSites(n_sites,
                              placement_fn=site_placement_fn,
                              site_size=box_size,
                              site_height=box_size / 2,
                              n_elongated_sites=n_elongated_sites))
    if n_lidar_per_agent > 0 and visualize_lidar:
        env.add_module(
            LidarSites(n_agents=n_agents, n_lidar_per_agent=n_lidar_per_agent))
    if np.max(n_boxes) > 0 and grab_box:
        env.add_module(AgentManipulation())
    if box_floor_friction is not None:
        env.add_module(FloorAttributes(friction=box_floor_friction))
    env.add_module(WorldConstants(gravity=gravity))
    env.reset()
    keys_self = ['agent_qpos_qvel', 'current_step']
    keys_mask_self = ['mask_aa_obs']
    keys_external = ['agent_qpos_qvel', 'construction_site_obs']
    keys_copy = ['you_lock', 'team_lock', 'ramp_you_lock', 'ramp_team_lock']
    keys_mask_external = []

    env = AddConstantObservationsWrapper(env, new_obs=additional_obs)
    keys_external += list(additional_obs)
    keys_mask_external += [ob for ob in additional_obs if 'mask' in ob]

    env = SplitMultiAgentActions(env)
    if team_size_obs:
        keys_self += ['team_size']
    env = TeamMembership(env, np.zeros((n_agents, )))
    env = AgentAgentObsMask2D(env)
    env = DiscretizeActionWrapper(env, 'action_movement')
    if np.max(n_boxes) > 0:
        env = AgentGeomObsMask2D(env,
                                 pos_obs_key='box_pos',
                                 mask_obs_key='mask_ab_obs',
                                 geom_idxs_obs_key='box_geom_idxs')
        keys_external += ['mask_ab_obs', 'box_obs']
        keys_mask_external.append('mask_ab_obs')
    if lock_box and np.max(n_boxes) > 0:
        agent_allowed_to_lock_keys = None if lock_out_of_vision else [
            "mask_ab_obs"
        ]
        env = LockObjWrapper(
            env,
            body_names=[f'moveable_box{i}' for i in range(n_boxes)],
            agent_idx_allowed_to_lock=np.arange(n_agents),
            lock_type=lock_type,
            radius_multiplier=lock_radius_multiplier,
            obj_in_game_metadata_keys=["curr_n_boxes"],
            agent_allowed_to_lock_keys=agent_allowed_to_lock_keys)
    if grab_box and np.max(n_boxes) > 0:
        env = GrabObjWrapper(env, [f'moveable_box{i}' for i in range(n_boxes)],
                             radius_multiplier=grab_radius_multiplier,
                             grab_exclusive=grab_exclusive,
                             obj_in_game_metadata_keys=['curr_n_boxes'])

    if n_lidar_per_agent > 0:
        env = Lidar(env,
                    n_lidar_per_agent=n_lidar_per_agent,
                    visualize_lidar=visualize_lidar,
                    compress_lidar_scale=compress_lidar_scale)
        keys_copy += ['lidar']
        keys_external += ['lidar']

    env = ConstructionDistancesWrapper(env)
    env = NumpyArrayRewardWrapper(env)

    reward_wrappers = {
        'construction_dense': ConstructionDenseRewardWrapper,
        'construction_completed': ConstructionCompletedRewardWrapper,
    }

    for rew_info in reward_infos:
        rew_type = rew_info['type']
        del rew_info['type']
        env = reward_wrappers[rew_type](env, **rew_info)
    env = TimeWrapper(env, horizon)
    env = SplitObservations(env,
                            keys_self + keys_mask_self,
                            keys_copy=keys_copy)
    if n_agents == 1:
        env = SpoofEntityWrapper(env, 2, ['agent_qpos_qvel'], ['mask_aa_obs'])
    env = SpoofEntityWrapper(env, n_boxes,
                             ['box_obs', 'you_lock', 'team_lock', 'obj_lock'],
                             ['mask_ab_obs'])
    env = SpoofEntityWrapper(env, n_sites[1], ['construction_site_obs'],
                             ['mask_acs_obs'])
    keys_mask_external += ['mask_ab_obs_spoof', 'mask_acs_obs_spoof']
    env = LockAllWrapper(env, remove_object_specific_lock=True)
    if not grab_out_of_vision and grab_box:
        env = MaskActionWrapper(env, 'action_pull',
                                ['mask_ab_obs'])  # Can only pull if in vision
    if not grab_selective and grab_box:
        env = GrabClosestWrapper(env)
    env = DiscardMujocoExceptionEpisodes(env, n_agents)
    env = ConcatenateObsWrapper(
        env, {
            'agent_qpos_qvel': ['agent_qpos_qvel'],
            'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock']
        })
    env = SelectKeysWrapper(env,
                            keys_self=keys_self,
                            keys_external=keys_external,
                            keys_mask=keys_mask_self + keys_mask_external,
                            flatten=False)
    return env

Example #3

Show file

def HideAndSeekEnv(n_substeps=15,
                   horizon=80,
                   deterministic_mode=False,
                   floor_size=6.0,
                   grid_size=30,
                   door_size=2,
                   n_hiders=2,
                   n_seekers=2,
                   max_n_agents=None,
                   n_boxes=2,
                   n_ramps=1,
                   n_elongated_boxes=0,
                   rand_num_elongated_boxes=False,
                   n_min_boxes=None,
                   box_size=2.0,
                   boxid_obs=False,
                   box_only_z_rot=True,
                   rew_type='joint_zero_sum',
                   lock_box=True,
                   grab_box=True,
                   lock_ramp=False,
                   lock_type='all_lock_team_specific',
                   lock_grab_radius=0.25,
                   lock_out_of_vision=True,
                   grab_exclusive=False,
                   grab_out_of_vision=False,
                   grab_selective=False,
                   box_floor_friction=0.2,
                   other_friction=0.01,
                   gravity=[0, 0, -50],
                   action_lims=(-0.9, 0.9),
                   polar_obs=True,
                   scenario='quadrant',
                   quadrant_game_hider_uniform_placement=False,
                   p_door_dropout=0.5,
                   n_rooms=4,
                   random_room_number=True,
                   prob_outside_walls=1.0,
                   n_lidar_per_agent=0,
                   visualize_lidar=False,
                   compress_lidar_scale=None,
                   hiders_together_radius=None,
                   seekers_together_radius=None,
                   prep_fraction=0.4,
                   prep_obs=True,
                   team_size_obs=False,
                   restrict_rect=[0.1, 0.1, 5.9, 5.9],
                   penalize_objects_out=True,
                   n_food=0,
                   food_radius=None,
                   food_respawn_time=None,
                   max_food_health=1,
                   food_together_radius=None,
                   food_rew_type='selfish',
                   eat_when_caught=False,
                   food_reward_scale=1.0,
                   food_normal_centered=False,
                   food_box_centered=False,
                   n_food_cluster=1):

    grab_radius_multiplier = lock_grab_radius / box_size
    lock_radius_multiplier = lock_grab_radius / box_size

    env = Base(n_agents=n_hiders + n_seekers,
               n_substeps=n_substeps,
               horizon=horizon,
               floor_size=floor_size,
               grid_size=grid_size,
               action_lims=action_lims,
               deterministic_mode=deterministic_mode)

    if scenario == 'randomwalls':
        env.add_module(
            RandomWalls(n_agents=n_hiders + n_seekers,
                        grid_size=grid_size,
                        num_rooms=n_rooms,
                        random_room_number=random_room_number,
                        min_room_size=6,
                        door_size=door_size,
                        prob_outside_walls=prob_outside_walls,
                        gen_door_obs=False))
        box_placement_fn = uniform_placement
        ramp_placement_fn = uniform_placement
        cell_size = env.floor_size / grid_size

        first_hider_placement = uniform_placement
        if hiders_together_radius is not None:
            htr_in_cells = np.ceil(hiders_together_radius /
                                   cell_size).astype(int)

            env.metadata['hiders_together_radius'] = htr_in_cells

            close_to_first_hider_placement = close_to_other_object_placement(
                "agent", 0, "hiders_together_radius")

            agent_placement_fn = [first_hider_placement] + \
                                 [close_to_first_hider_placement] * (n_hiders - 1)
        else:
            agent_placement_fn = [first_hider_placement] * n_hiders

        first_seeker_placement = uniform_placement

        if seekers_together_radius is not None:
            str_in_cells = np.ceil(seekers_together_radius /
                                   cell_size).astype(int)

            env.metadata['seekers_together_radius'] = str_in_cells

            close_to_first_seeker_placement = close_to_other_object_placement(
                "agent", n_hiders, "seekers_together_radius")

            agent_placement_fn += [first_seeker_placement] + \
                                  [close_to_first_seeker_placement] * (n_seekers - 1)
        else:
            agent_placement_fn += [first_seeker_placement] * (n_seekers)

    elif scenario == 'quadrant':
        env.add_module(
            WallScenarios(n_agents=n_hiders + n_seekers,
                          grid_size=grid_size,
                          door_size=door_size,
                          scenario=scenario,
                          friction=other_friction,
                          p_door_dropout=p_door_dropout))
        box_placement_fn = quadrant_placement
        ramp_placement_fn = nearwall_placement  #uniform_placement
        hider_placement = uniform_placement if quadrant_game_hider_uniform_placement else quadrant_placement
        agent_placement_fn = [hider_placement] * n_hiders + [
            outside_quadrant_placement
        ] * n_seekers
    else:
        raise ValueError(f"Scenario {scenario} not supported.")
    env.add_module(
        Agents(n_hiders + n_seekers,
               placement_fn=agent_placement_fn,
               color=[np.array((66., 235., 244., 255.)) / 255] * n_hiders +
               [(1., 0., 0., 1.)] * n_seekers,
               friction=other_friction,
               polar_obs=polar_obs))

    if np.max(n_boxes) > 0:
        env.add_module(
            Boxes(n_boxes=n_boxes,
                  placement_fn=box_placement_fn,
                  friction=box_floor_friction,
                  polar_obs=polar_obs,
                  n_elongated_boxes=n_elongated_boxes,
                  boxid_obs=boxid_obs,
                  box_only_z_rot=box_only_z_rot))
    if n_ramps > 0:
        env.add_module(
            Ramps(n_ramps=n_ramps,
                  placement_fn=ramp_placement_fn,
                  friction=other_friction,
                  polar_obs=polar_obs,
                  pad_ramp_size=(np.max(n_elongated_boxes) > 0)))
    if n_lidar_per_agent > 0 and visualize_lidar:
        env.add_module(
            LidarSites(n_agents=n_hiders + n_seekers,
                       n_lidar_per_agent=n_lidar_per_agent))

    if n_food > 0:
        if scenario == 'quadrant':
            first_food_placement = quadrant_placement
        elif food_box_centered:
            first_food_placement = uniform_placement_middle(0.25)
        else:
            first_food_placement = uniform_placement
        if food_together_radius is not None:
            cell_size = env.floor_size / grid_size
            ftr_in_cells = np.ceil(food_together_radius /
                                   cell_size).astype(int)

            env.metadata['food_together_radius'] = ftr_in_cells

            assert n_food % n_food_cluster == 0
            cluster_assignments = np.repeat(
                np.arange(0, n_food, n_food // n_food_cluster),
                n_food // n_food_cluster)
            food_placement = [
                close_to_other_object_placement("food", i,
                                                "food_together_radius")
                for i in cluster_assignments
            ]
            food_placement[::n_food //
                           n_food_cluster] = [first_food_placement
                                              ] * n_food_cluster
        else:
            food_placement = first_food_placement
        env.add_module(Food(n_food, placement_fn=food_placement))

    env.add_module(AgentManipulation())
    if box_floor_friction is not None:
        env.add_module(FloorAttributes(friction=box_floor_friction))
    env.add_module(WorldConstants(gravity=gravity))
    env.reset()
    keys_self = [
        'agent_qpos_qvel', 'hider', 'prep_obs', 'current_step',
        'vector_door_obs'
    ]
    keys_mask_self = ['mask_aa_obs']
    keys_external = ['agent_qpos_qvel']
    keys_copy = ['you_lock', 'team_lock', 'ramp_you_lock', 'ramp_team_lock']
    keys_mask_external = []
    env = SplitMultiAgentActions(env)
    if team_size_obs:
        keys_self += ['team_size']
    env = TeamMembership(
        env, np.append(np.zeros((n_hiders, )), np.ones((n_seekers, ))))
    env = AgentAgentObsMask2D(env)
    hider_obs = np.array([[1]] * n_hiders + [[0]] * n_seekers)
    env = AddConstantObservationsWrapper(env, new_obs={'hider': hider_obs})
    env = HideAndSeekRewardWrapper(env,
                                   n_hiders=n_hiders,
                                   n_seekers=n_seekers,
                                   rew_type=rew_type)
    env = TimeWrapper(env, horizon)
    if restrict_rect is not None:
        env = RestrictAgentsRect(env,
                                 restrict_rect=restrict_rect,
                                 penalize_objects_out=penalize_objects_out)
    env = PreparationPhase(env, prep_fraction=prep_fraction)
    env = DiscretizeActionWrapper(env, 'action_movement')
    if np.max(n_boxes) > 0:
        env = AgentGeomObsMask2D(env,
                                 pos_obs_key='box_pos',
                                 mask_obs_key='mask_ab_obs',
                                 geom_idxs_obs_key='box_geom_idxs')
        keys_external += ['mask_ab_obs', 'box_obs']
        keys_mask_external.append('mask_ab_obs')
    if n_food:
        env = AgentSiteObsMask2D(env,
                                 pos_obs_key='food_pos',
                                 mask_obs_key='mask_af_obs')
        env = FoodHealthWrapper(
            env,
            respawn_time=(np.inf
                          if food_respawn_time is None else food_respawn_time),
            eat_thresh=(np.inf if food_radius is None else food_radius),
            max_food_health=max_food_health,
            food_rew_type=food_rew_type,
            reward_scale=food_reward_scale)
        env = MaskActionWrapper(env, 'action_eat_food',
                                ['mask_af_obs'])  # Can only eat if in vision
        if prep_obs:
            env = MaskPrepPhaseAction(env, 'action_eat_food')
        if not eat_when_caught:
            env = MaskUnseenAction(env, 0, 'action_eat_food')
        eat_agents = np.arange(n_hiders)
        env = AlwaysEatWrapper(env, agent_idx_allowed=eat_agents)
        keys_external += ['mask_af_obs', 'food_obs']
        keys_mask_external.append('mask_af_obs')
    if lock_box and np.max(n_boxes) > 0:
        env = LockObjWrapper(
            env,
            body_names=[f'moveable_box{i}' for i in range(np.max(n_boxes))],
            agent_idx_allowed_to_lock=np.arange(n_hiders + n_seekers),
            lock_type=lock_type,
            radius_multiplier=lock_radius_multiplier,
            obj_in_game_metadata_keys=["curr_n_boxes"],
            agent_allowed_to_lock_keys=None
            if lock_out_of_vision else ["mask_ab_obs"])
    if n_ramps > 0:
        env = AgentGeomObsMask2D(env,
                                 pos_obs_key='ramp_pos',
                                 mask_obs_key='mask_ar_obs',
                                 geom_idxs_obs_key='ramp_geom_idxs')
        if lock_ramp:
            env = LockObjWrapper(
                env,
                body_names=[f'ramp{i}:ramp' for i in range(n_ramps)],
                agent_idx_allowed_to_lock=np.arange(n_hiders + n_seekers),
                lock_type=lock_type,
                ac_obs_prefix='ramp_',
                radius_multiplier=lock_radius_multiplier,
                obj_in_game_metadata_keys=['curr_n_ramps'],
                agent_allowed_to_lock_keys=None
                if lock_out_of_vision else ["mask_ar_obs"])
        keys_external += ['ramp_obs']
        keys_mask_external.append('mask_ar_obs')
    if grab_box and (np.max(n_boxes) > 0 or n_ramps > 0):
        env = GrabObjWrapper(
            env, [f'moveable_box{i}' for i in range(np.max(n_boxes))] +
            ([f"ramp{i}:ramp" for i in range(n_ramps)]),
            radius_multiplier=grab_radius_multiplier,
            grab_exclusive=grab_exclusive,
            obj_in_game_metadata_keys=['curr_n_boxes', 'curr_n_ramps'])

    if n_lidar_per_agent > 0:
        env = Lidar(env,
                    n_lidar_per_agent=n_lidar_per_agent,
                    visualize_lidar=visualize_lidar,
                    compress_lidar_scale=compress_lidar_scale)
        keys_copy += ['lidar']
        keys_external += ['lidar']

    if prep_obs:
        env = TrackStatWrapper(env, np.max(n_boxes), n_ramps, n_food)
    env = SplitObservations(env,
                            keys_self + keys_mask_self,
                            keys_copy=keys_copy)
    if n_boxes:
        env = SpoofEntityWrapper(
            env, np.max(n_boxes),
            ['box_obs', 'you_lock', 'team_lock', 'obj_lock'], ['mask_ab_obs'])
    if n_food:
        env = SpoofEntityWrapper(env, n_food, ['food_obs'], ['mask_af_obs'])
    keys_mask_external += ['mask_ab_obs_spoof', 'mask_af_obs_spoof']
    if max_n_agents is not None:
        env = SpoofEntityWrapper(env, max_n_agents,
                                 ['agent_qpos_qvel', 'hider', 'prep_obs'],
                                 ['mask_aa_obs'])
    env = LockAllWrapper(env, remove_object_specific_lock=True)
    if not grab_out_of_vision and grab_box and n_boxes > 0:
        env = MaskActionWrapper(env, 'action_pull', ['mask_ab_obs'] +
                                (['mask_ar_obs'] if n_ramps > 0 else []))
    if not grab_selective and grab_box and n_boxes > 0:
        env = GrabClosestWrapper(env)
    env = NoActionsInPrepPhase(env, np.arange(n_hiders, n_hiders + n_seekers))
    env = DiscardMujocoExceptionEpisodes(env, n_hiders + n_seekers)
    if n_boxes > 0 and n_ramps > 0:
        env = ConcatenateObsWrapper(
            env, {
                'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs'],
                'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock'],
                'ramp_obs': ['ramp_obs'] +
                (['ramp_you_lock', 'ramp_team_lock', 'ramp_obj_lock']
                 if lock_ramp else [])
            })
    elif n_boxes > 0:
        env = ConcatenateObsWrapper(
            env, {
                'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs'],
                'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock']
            })
    elif n_ramps > 0:
        env = ConcatenateObsWrapper(
            env, {
                'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs'],
                'ramp_obs': ['ramp_obs'] +
                (['ramp_you_lock', 'ramp_team_lock', 'ramp_obj_lock']
                 if lock_ramp else [])
            })
    else:
        env = ConcatenateObsWrapper(
            env, {'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs']})
    env = SelectKeysWrapper(env,
                            keys_self=keys_self,
                            keys_external=keys_external,
                            keys_mask=keys_mask_self + keys_mask_external,
                            flatten=False)
    return env

Example #4

Show file

File: shelter_construction.py Project: umersheikh846/benchmarkmarl_repo

def ShelterConstructionEnv(args, n_substeps=15, horizon=80, deterministic_mode=False,
             floor_size=6.0, grid_size=30,
             n_agents=1,
             objective_diameter=[1, 1], objective_placement='center',
             num_rays_per_side=25, shelter_reward_scale=1,
             n_boxes=2, n_elongated_boxes=0,
             box_size=0.5, box_only_z_rot=False,
             lock_box=True, grab_box=True, grab_selective=False, lock_grab_radius=0.25,
             lock_type='any_lock_specific', grab_exclusive=False,
             grab_out_of_vision=False, lock_out_of_vision=True,
             box_floor_friction=0.2, other_friction=0.01, gravity=[0, 0, -50],
             action_lims=(-0.9, 0.9), polar_obs=True,
             n_lidar_per_agent=0, visualize_lidar=False, compress_lidar_scale=None,
             boxid_obs=True, boxsize_obs=True, team_size_obs=False, additional_obs={}):

    assert n_agents==1, ("only 1 agents is supported, check the config.py.")

    grab_radius_multiplier = lock_grab_radius / box_size
    lock_radius_multiplier = lock_grab_radius / box_size

    env = Base(n_agents=n_agents, n_substeps=n_substeps, horizon=horizon,
               floor_size=floor_size, grid_size=grid_size,
               action_lims=action_lims, deterministic_mode=deterministic_mode)

    env.add_module(WallScenarios(n_agents=n_agents, grid_size=grid_size, door_size=2, scenario='empty',
                                 friction=other_friction))

    if objective_placement == 'center':
        objective_placement_fn = center_placement
    elif objective_placement == 'uniform_away_from_walls':
        objective_placement_fn = uniform_placement_middle(0.7)

    env.add_module(Cylinders(1, diameter=objective_diameter, height=box_size,
                             make_static=True, placement_fn=objective_placement_fn))

    env.add_module(Agents(n_agents,
                          placement_fn=uniform_placement,
                          color=[np.array((66., 235., 244., 255.)) / 255] * n_agents,
                          friction=other_friction,
                          polar_obs=polar_obs))
    if np.max(n_boxes) > 0:
        env.add_module(Boxes(n_boxes=n_boxes, placement_fn=uniform_placement,
                             friction=box_floor_friction, polar_obs=polar_obs,
                             n_elongated_boxes=n_elongated_boxes,
                             boxid_obs=boxid_obs, boxsize_obs=boxsize_obs,
                             box_size=box_size,
                             box_only_z_rot=box_only_z_rot))
    if n_lidar_per_agent > 0 and visualize_lidar:
        env.add_module(LidarSites(n_agents=n_agents, n_lidar_per_agent=n_lidar_per_agent))

    env.add_module(AgentManipulation())
    if box_floor_friction is not None:
        env.add_module(FloorAttributes(friction=box_floor_friction))
    env.add_module(WorldConstants(gravity=gravity))
    env.reset()
    keys_self = ['agent_qpos_qvel','current_step']
    keys_mask_self = ['mask_aa_obs']
    keys_external = ['agent_qpos_qvel', 'vector_door_obs']
    keys_copy = ['you_lock', 'team_lock', 'ramp_you_lock', 'ramp_team_lock']
    keys_mask_external = []

    env = AddConstantObservationsWrapper(env, new_obs=additional_obs)
    keys_external += list(additional_obs)
    keys_mask_external += [ob for ob in additional_obs if 'mask' in ob]

    env = ShelterRewardWrapper(env, num_rays_per_side=num_rays_per_side,
                               reward_scale=shelter_reward_scale)
    env = SplitMultiAgentActions(env)

    if team_size_obs:
        keys_self += ['team_size']
    env = TeamMembership(env, np.zeros((n_agents,)))
    env = AgentAgentObsMask2D(env)
    env = DiscretizeActionWrapper(env, 'action_movement')
    if np.max(n_boxes) > 0:
        env = AgentGeomObsMask2D(env, pos_obs_key='box_pos', mask_obs_key='mask_ab_obs',
                                 geom_idxs_obs_key='box_geom_idxs')
        keys_external += ['mask_ab_obs', 'box_obs']
        keys_mask_external.append('mask_ab_obs')
    if lock_box and np.max(n_boxes) > 0:
        env = LockObjWrapper(env, body_names=[f'moveable_box{i}' for i in range(n_boxes)],
                             agent_idx_allowed_to_lock=np.arange(n_agents),
                             lock_type=lock_type,
                             radius_multiplier=lock_radius_multiplier,
                             obj_in_game_metadata_keys=["curr_n_boxes"],
                             agent_allowed_to_lock_keys=None if lock_out_of_vision else ["mask_ab_obs"])

    if grab_box and np.max(n_boxes) > 0:
        env = GrabObjWrapper(env, [f'moveable_box{i}' for i in range(n_boxes)],
                             radius_multiplier=grab_radius_multiplier,
                             grab_exclusive=grab_exclusive,
                             obj_in_game_metadata_keys=['curr_n_boxes'])

    if n_lidar_per_agent > 0:
        env = Lidar(env, n_lidar_per_agent=n_lidar_per_agent, visualize_lidar=visualize_lidar,
                    compress_lidar_scale=compress_lidar_scale)
        keys_copy += ['lidar']
        keys_external += ['lidar']
    env = TimeWrapper(env, horizon)
    env = SplitObservations(env, keys_self + keys_mask_self, keys_copy=keys_copy)
    if n_agents == 1:
        env = SpoofEntityWrapper(env, 2, ['agent_qpos_qvel'], ['mask_aa_obs'])
    env = SpoofEntityWrapper(env, n_boxes, ['box_obs', 'you_lock', 'team_lock', 'obj_lock'], ['mask_ab_obs'])
    keys_mask_external += ['mask_ab_obs_spoof']
    env = LockAllWrapper(env, remove_object_specific_lock=True)
    if not grab_out_of_vision and grab_box:
        env = MaskActionWrapper(env, 'action_pull', ['mask_ab_obs'])  # Can only pull if in vision
    if not grab_selective and grab_box:
        env = GrabClosestWrapper(env)
    env = DiscardMujocoExceptionEpisodes(env, n_agents)
    env = ConcatenateObsWrapper(env, {'agent_qpos_qvel': ['agent_qpos_qvel'],
                                      'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock']})
    env = SelectKeysWrapper(env, keys_self=keys_self,
                            keys_external=keys_external,
                            keys_mask=keys_mask_self + keys_mask_external,
                            flatten=False)
    return env