def make_env(n_substeps=15,
             horizon=80,
             deterministic_mode=False,
             floor_size=6.0,
             grid_size=30,
             n_agents=1,
             objective_diameter=[1, 1],
             objective_placement='center',
             num_rays_per_side=25,
             shelter_reward_scale=1,
             n_boxes=2,
             n_elongated_boxes=0,
             box_size=0.5,
             box_only_z_rot=False,
             lock_box=True,
             grab_box=True,
             grab_selective=False,
             lock_grab_radius=0.25,
             lock_type='any_lock_specific',
             grab_exclusive=False,
             grab_out_of_vision=False,
             lock_out_of_vision=True,
             box_floor_friction=0.2,
             other_friction=0.01,
             gravity=[0, 0, -50],
             action_lims=(-0.9, 0.9),
             polar_obs=True,
             n_lidar_per_agent=0,
             visualize_lidar=False,
             compress_lidar_scale=None,
             boxid_obs=True,
             boxsize_obs=True,
             team_size_obs=False,
             additional_obs={}):

    grab_radius_multiplier = lock_grab_radius / box_size
    lock_radius_multiplier = lock_grab_radius / box_size

    env = Base(n_agents=n_agents,
               n_substeps=n_substeps,
               horizon=horizon,
               floor_size=floor_size,
               grid_size=grid_size,
               action_lims=action_lims,
               deterministic_mode=deterministic_mode)

    env.add_module(
        WallScenarios(grid_size=grid_size,
                      door_size=2,
                      scenario='empty',
                      friction=other_friction))

    if objective_placement == 'center':
        objective_placement_fn = center_placement
    elif objective_placement == 'uniform_away_from_walls':
        objective_placement_fn = uniform_placement_middle(0.7)

    env.add_module(
        Cylinders(1,
                  diameter=objective_diameter,
                  height=box_size,
                  make_static=True,
                  placement_fn=objective_placement_fn))

    env.add_module(
        Agents(n_agents,
               placement_fn=uniform_placement,
               color=[np.array((66., 235., 244., 255.)) / 255] * n_agents,
               friction=other_friction,
               polar_obs=polar_obs))
    if np.max(n_boxes) > 0:
        env.add_module(
            Boxes(n_boxes=n_boxes,
                  placement_fn=uniform_placement,
                  friction=box_floor_friction,
                  polar_obs=polar_obs,
                  n_elongated_boxes=n_elongated_boxes,
                  boxid_obs=boxid_obs,
                  boxsize_obs=boxsize_obs,
                  box_size=box_size,
                  box_only_z_rot=box_only_z_rot))
    if n_lidar_per_agent > 0 and visualize_lidar:
        env.add_module(
            LidarSites(n_agents=n_agents, n_lidar_per_agent=n_lidar_per_agent))

    env.add_module(AgentManipulation())
    if box_floor_friction is not None:
        env.add_module(FloorAttributes(friction=box_floor_friction))
    env.add_module(WorldConstants(gravity=gravity))
    env.reset()
    keys_self = ['agent_qpos_qvel', 'hider', 'prep_obs']
    keys_mask_self = ['mask_aa_obs']
    keys_external = ['agent_qpos_qvel']
    keys_copy = ['you_lock', 'team_lock', 'ramp_you_lock', 'ramp_team_lock']
    keys_mask_external = []

    env = AddConstantObservationsWrapper(env, new_obs=additional_obs)
    keys_external += list(additional_obs)
    keys_mask_external += [ob for ob in additional_obs if 'mask' in ob]

    env = ShelterRewardWrapper(env,
                               num_rays_per_side=num_rays_per_side,
                               reward_scale=shelter_reward_scale)
    env = SplitMultiAgentActions(env)

    if team_size_obs:
        keys_self += ['team_size']
    env = TeamMembership(env, np.zeros((n_agents, )))
    env = AgentAgentObsMask2D(env)
    env = DiscretizeActionWrapper(env, 'action_movement')
    if np.max(n_boxes) > 0:
        env = AgentGeomObsMask2D(env,
                                 pos_obs_key='box_pos',
                                 mask_obs_key='mask_ab_obs',
                                 geom_idxs_obs_key='box_geom_idxs')
        keys_external += ['mask_ab_obs', 'box_obs']
        keys_mask_external.append('mask_ab_obs')
    if lock_box and np.max(n_boxes) > 0:
        env = LockObjWrapper(
            env,
            body_names=[f'moveable_box{i}' for i in range(n_boxes)],
            agent_idx_allowed_to_lock=np.arange(n_agents),
            lock_type=lock_type,
            radius_multiplier=lock_radius_multiplier,
            obj_in_game_metadata_keys=["curr_n_boxes"],
            agent_allowed_to_lock_keys=None
            if lock_out_of_vision else ["mask_ab_obs"])

    if grab_box and np.max(n_boxes) > 0:
        env = GrabObjWrapper(env, [f'moveable_box{i}' for i in range(n_boxes)],
                             radius_multiplier=grab_radius_multiplier,
                             grab_exclusive=grab_exclusive,
                             obj_in_game_metadata_keys=['curr_n_boxes'])

    if n_lidar_per_agent > 0:
        env = Lidar(env,
                    n_lidar_per_agent=n_lidar_per_agent,
                    visualize_lidar=visualize_lidar,
                    compress_lidar_scale=compress_lidar_scale)
        keys_copy += ['lidar']
        keys_external += ['lidar']

    env = SplitObservations(env,
                            keys_self + keys_mask_self,
                            keys_copy=keys_copy)
    if n_agents == 1:
        env = SpoofEntityWrapper(env, 2,
                                 ['agent_qpos_qvel', 'hider', 'prep_obs'],
                                 ['mask_aa_obs'])
    env = SpoofEntityWrapper(env, n_boxes,
                             ['box_obs', 'you_lock', 'team_lock', 'obj_lock'],
                             ['mask_ab_obs'])
    keys_mask_external += ['mask_ab_obs_spoof']
    env = LockAllWrapper(env, remove_object_specific_lock=True)
    if not grab_out_of_vision and grab_box:
        env = MaskActionWrapper(env, 'action_pull',
                                ['mask_ab_obs'])  # Can only pull if in vision
    if not grab_selective and grab_box:
        env = GrabClosestWrapper(env)
    env = DiscardMujocoExceptionEpisodes(env)
    env = ConcatenateObsWrapper(
        env, {
            'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs'],
            'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock']
        })
    env = SelectKeysWrapper(env,
                            keys_self=keys_self,
                            keys_other=keys_external + keys_mask_self +
                            keys_mask_external)
    return env
Beispiel #2
0
def make_env(
        n_substeps=15,
        n_agents=3,
        floor_size=[1.5, 6],
        action_lims=(-0.9, 0.9),
        grid_size=60,
        other_friction=0.01,
        box_floor_friction=0.2,
        gravity=[0, 0, -50],
        horizon=1000,
        horizon_lower=None,
        horizon_upper=None,
        prob_per_step_to_stop=0.001,
        # Food
        n_food=1,
        n_food_cluster=1,
        food_radius=0.4,
        food_respawn_time=0,
        max_food_health=5,
        food_together_radius=0.4,
        food_rew_type='selfish',
        food_reward_scale=0.0,
        # Health
        max_agent_health=20,
        health_per_food_bounds=[2.1, 2.7],
        health_per_step=-1.0,
        # Attacking
        attack_range=0.7,
        attack_damage=-5.0,
        only_attack_in_front=True,
        # Death
        life_rew=1,
        death_rew=-100,
        steps_freeze_on_death=100,
        # Random Teams
        rusp_args={},
        # ID
        id_dim=16,
        # Action Masking
        mask_all_when_dead=True):
    env = Base(
        n_agents=n_agents,
        n_substeps=n_substeps,
        floor_size=floor_size,
        horizon=
        99999999999999,  # Just a big number so actual horizon is done by RandomizedHorizonWrapper
        action_lims=action_lims,
        deterministic_mode=False,
        grid_size=grid_size)
    if box_floor_friction is not None:
        env.add_module(FloorAttributes(friction=box_floor_friction))
    env.add_module(WorldConstants(gravity=gravity))

    env.add_module(
        Agents(n_agents,
               placement_fn=uniform_placement,
               friction=other_friction))

    # Food
    env.metadata['food_together_radius'] = food_together_radius

    assert n_food % n_food_cluster == 0
    cluster_assignments = np.repeat(
        np.arange(0, n_food, n_food // n_food_cluster),
        n_food // n_food_cluster)
    food_placement = [
        close_to_other_object_placement("food", i, "food_together_radius")
        for i in cluster_assignments
    ]
    food_placement[::n_food //
                   n_food_cluster] = [uniform_placement] * n_food_cluster

    env.add_module(Food(n_food, placement_fn=food_placement))

    env.reset()

    keys_self = [
        'agent_qpos_qvel', 'agent_identity', 'agent_health', 'is_dead',
        'time_to_alive', 'timestep'
    ]
    keys_additional_self_vf = ['fraction_episode_done', 'horizon']
    keys_copy = ['mask_is_dead']
    keys_other_agents = [
        'agent_qpos_qvel',
        'agent_identity',
        'agent_health',
        'is_dead',
        'time_to_alive',
    ]
    keys_additional_other_agents_vf = []
    keys_self_matrices = []

    add_rew_share_observation_keys(
        keys_self=keys_self,
        keys_additional_self_vf=keys_additional_self_vf,
        keys_other_agents=keys_other_agents,
        keys_additional_other_agents_vf=keys_additional_other_agents_vf,
        keys_self_matrices=keys_self_matrices,
        **rusp_args)

    keys_external = [
        'other_agents', 'other_agents_vf', 'additional_self_vf_obs'
    ]

    keys_self_masks = ['mask_aa_obs']

    env = SplitMultiAgentActions(env)
    env = DiscretizeActionWrapper(env, 'action_movement')
    env = AgentAgentObsMask2D(env)

    env = ZeroRews(env)

    env = RandomizedHorizonWrapper(env,
                                   lower_lim=horizon_lower or horizon,
                                   upper_lim=horizon_upper or horizon,
                                   prob_per_step_to_stop=prob_per_step_to_stop)

    env = FoodHealthWrapper(
        env,
        respawn_time=(np.inf
                      if food_respawn_time is None else food_respawn_time),
        eat_thresh=(np.inf if food_radius is None else food_radius),
        max_food_health=max_food_health,
        food_rew_type=food_rew_type,
        reward_scale=food_reward_scale,
        split_eat_between_agents=True)
    keys_external += ['mask_af_obs', 'food_obs']
    keys_copy.append('close_enough_to_food')

    env = FoodIncreaseHealth(env,
                             health_per_food_bounds=health_per_food_bounds)
    env = TimeDecreaseHealth(env, health_per_step=health_per_step)

    # Attack action should go before Food Health wrapper, since it masks eat action
    env = AttackAction(env,
                       attack_damage=attack_damage,
                       attack_range=attack_range,
                       only_attack_in_front=only_attack_in_front)
    env = ActionOptionsWrapper(env, ['action_attack_agent', 'action_eat_food'],
                               {
                                   'action_attack_agent': -1,
                                   'action_eat_food': 0
                               })
    env = ColorAgentsByOption(
        env, 'action_choose_option',
        ['action_attack_agent', 'action_eat_food', 'do_nothing'])
    keys_self.append('previous_choice')
    keys_other_agents.append('previous_choice')
    keys_self_matrices.append('attacked_me')
    keys_self.append('n_attacked_me')
    keys_other_agents += ['attacked_me', 'n_attacked_me']

    env = AgentHealthWrapper(env,
                             max_health=max_agent_health,
                             death_rew=death_rew,
                             steps_freeze_on_death=steps_freeze_on_death,
                             life_rew=life_rew)

    # This needs to come before options wrapper, so we can't group it above
    env = AlwaysEatWrapper(env, agent_idx_allowed=np.arange(n_agents))

    env = RUSPWrapper(env, **rusp_args)

    env = RandomIdentityVector(env, vector_dim=id_dim)

    env = SplitObservations(env,
                            keys_self + keys_additional_self_vf,
                            keys_copy=keys_copy,
                            keys_self_matrices=keys_self_matrices +
                            keys_self_masks)
    env = ConcatenateObsWrapper(
        env, {
            'other_agents':
            keys_other_agents,
            'other_agents_vf':
            ['other_agents'] + keys_additional_other_agents_vf,
            'additional_self_vf_obs':
            [k + '_self' for k in keys_additional_self_vf]
        })
    env = DiscardMujocoExceptionEpisodes(env)
    env = SelectKeysWrapper(env,
                            keys_self=keys_self,
                            keys_other=keys_external + keys_copy +
                            keys_self_masks)
    env = OasisActionMasks(env, mask_all_when_dead=mask_all_when_dead)
    return env
def make_env(n_substeps=15, horizon=80, deterministic_mode=False,
             floor_size=6.0, grid_size=30, door_size=2,
             n_hiders=1, n_seekers=1, max_n_agents=None,
             n_boxes=1, n_ramps=0, n_elongated_boxes=0,
             rand_num_elongated_boxes=False, n_min_boxes=None,
             box_size=0.5, boxid_obs=False, box_only_z_rot=True,
             rew_type='joint_zero_sum',
             lock_box=True, grab_box=True, lock_ramp=True,
             lock_type='any_lock_specific',
             lock_grab_radius=0.25, lock_out_of_vision=True, grab_exclusive=False,
             grab_out_of_vision=False, grab_selective=False,
             box_floor_friction=0.2, other_friction=0.01, gravity=[0, 0, -50],
             action_lims=(-0.9, 0.9), polar_obs=True,
             scenario='quadrant', quadrant_game_hider_uniform_placement=False,
             p_door_dropout=0.0,
             n_rooms=4, random_room_number=True, prob_outside_walls=1.0,
             n_lidar_per_agent=0, visualize_lidar=False, compress_lidar_scale=None,
             hiders_together_radius=None, seekers_together_radius=None,
             prep_fraction=0.4, prep_obs=False,
             team_size_obs=False,
             restrict_rect=None, penalize_objects_out=False,
             n_food=0, food_radius=None, food_respawn_time=None, max_food_health=1,
             food_together_radius=None, food_rew_type='selfish', eat_when_caught=False,
             food_reward_scale=1.0, food_normal_centered=False, food_box_centered=False,
             n_food_cluster=1):

    grab_radius_multiplier = lock_grab_radius / box_size
    lock_radius_multiplier = lock_grab_radius / box_size

    env = Base(n_agents=n_hiders + n_seekers, n_substeps=n_substeps, horizon=horizon,
               floor_size=floor_size, grid_size=grid_size,
               action_lims=action_lims,
               deterministic_mode=deterministic_mode)

    if scenario == 'randomwalls':
        env.add_module(RandomWalls(
            grid_size=grid_size, num_rooms=n_rooms,
            random_room_number=random_room_number, min_room_size=6,
            door_size=door_size,
            prob_outside_walls=prob_outside_walls, gen_door_obs=False))
        box_placement_fn = uniform_placement
        ramp_placement_fn = uniform_placement
        cell_size = env.floor_size / grid_size

        first_hider_placement = uniform_placement
        if hiders_together_radius is not None:
            htr_in_cells = np.ceil(hiders_together_radius / cell_size).astype(int)

            env.metadata['hiders_together_radius'] = htr_in_cells

            close_to_first_hider_placement = close_to_other_object_placement(
                                                "agent", 0, "hiders_together_radius")

            agent_placement_fn = [first_hider_placement] + \
                                 [close_to_first_hider_placement] * (n_hiders - 1)
        else:
            agent_placement_fn = [first_hider_placement] * n_hiders

        first_seeker_placement = uniform_placement

        if seekers_together_radius is not None:
            str_in_cells = np.ceil(seekers_together_radius / cell_size).astype(int)

            env.metadata['seekers_together_radius'] = str_in_cells

            close_to_first_seeker_placement = close_to_other_object_placement(
                                                "agent", n_hiders, "seekers_together_radius")

            agent_placement_fn += [first_seeker_placement] + \
                                  [close_to_first_seeker_placement] * (n_seekers - 1)
        else:
            agent_placement_fn += [first_seeker_placement] * (n_seekers)

    elif scenario == 'quadrant':
        env.add_module(WallScenarios(grid_size=grid_size, door_size=door_size,
                                     scenario=scenario, friction=other_friction,
                                     p_door_dropout=p_door_dropout))
        box_placement_fn = quadrant_placement
        ramp_placement_fn = uniform_placement
        hider_placement = uniform_placement if quadrant_game_hider_uniform_placement else quadrant_placement
        agent_placement_fn = [hider_placement] * n_hiders + [outside_quadrant_placement] * n_seekers
    else:
        raise ValueError(f"Scenario {scenario} not supported.")

    env.add_module(Agents(n_hiders + n_seekers,
                          placement_fn=agent_placement_fn,
                          color=[np.array((66., 235., 244., 255.)) / 255] * n_hiders + [(1., 0., 0., 1.)] * n_seekers,
                          friction=other_friction,
                          polar_obs=polar_obs))
    if np.max(n_boxes) > 0:
        env.add_module(Boxes(n_boxes=n_boxes, placement_fn=box_placement_fn,
                             friction=box_floor_friction, polar_obs=polar_obs,
                             n_elongated_boxes=n_elongated_boxes,
                             boxid_obs=boxid_obs, box_only_z_rot=box_only_z_rot))
    if n_ramps > 0:
        env.add_module(Ramps(n_ramps=n_ramps, placement_fn=ramp_placement_fn, friction=other_friction, polar_obs=polar_obs,
                             pad_ramp_size=(np.max(n_elongated_boxes) > 0)))
    if n_lidar_per_agent > 0 and visualize_lidar:
        env.add_module(LidarSites(n_agents=n_hiders + n_seekers, n_lidar_per_agent=n_lidar_per_agent))
    if n_food > 0:
        if scenario == 'quadrant':
            first_food_placement = quadrant_placement
        elif food_box_centered:
            first_food_placement = uniform_placement_middle(0.25)
        else:
            first_food_placement = uniform_placement
        if food_together_radius is not None:
            cell_size = env.floor_size / grid_size
            ftr_in_cells = np.ceil(food_together_radius / cell_size).astype(int)

            env.metadata['food_together_radius'] = ftr_in_cells

            assert n_food % n_food_cluster == 0
            cluster_assignments = np.repeat(np.arange(0, n_food, n_food // n_food_cluster), n_food // n_food_cluster)
            food_placement = [close_to_other_object_placement(
                "food", i, "food_together_radius") for i in cluster_assignments]
            food_placement[::n_food // n_food_cluster] = [first_food_placement] * n_food_cluster
        else:
            food_placement = first_food_placement
        env.add_module(Food(n_food, placement_fn=food_placement))

    env.add_module(AgentManipulation())
    if box_floor_friction is not None:
        env.add_module(FloorAttributes(friction=box_floor_friction))
    env.add_module(WorldConstants(gravity=gravity))
    env.reset()
    keys_self = ['agent_qpos_qvel', 'hider', 'prep_obs']
    keys_mask_self = ['mask_aa_obs']
    keys_external = ['agent_qpos_qvel']
    keys_copy = ['you_lock', 'team_lock', 'ramp_you_lock', 'ramp_team_lock']
    keys_mask_external = []
    env = SplitMultiAgentActions(env)
    if team_size_obs:
        keys_self += ['team_size']
    env = TeamMembership(env, np.append(np.zeros((n_hiders,)), np.ones((n_seekers,))))
    env = AgentAgentObsMask2D(env)
    hider_obs = np.array([[1]] * n_hiders + [[0]] * n_seekers)
    env = AddConstantObservationsWrapper(env, new_obs={'hider': hider_obs})
    env = HideAndSeekRewardWrapper(env, n_hiders=n_hiders, n_seekers=n_seekers,
                                   rew_type=rew_type)
    if restrict_rect is not None:
        env = RestrictAgentsRect(env, restrict_rect=restrict_rect, penalize_objects_out=penalize_objects_out)
    env = PreparationPhase(env, prep_fraction=prep_fraction)
    env = DiscretizeActionWrapper(env, 'action_movement')
    if np.max(n_boxes) > 0:
        env = AgentGeomObsMask2D(env, pos_obs_key='box_pos', mask_obs_key='mask_ab_obs',
                                 geom_idxs_obs_key='box_geom_idxs')
        keys_external += ['mask_ab_obs', 'box_obs']
        keys_mask_external.append('mask_ab_obs')
    if n_food:
        env = AgentSiteObsMask2D(env, pos_obs_key='food_pos', mask_obs_key='mask_af_obs')
        env = FoodHealthWrapper(env, respawn_time=(np.inf if food_respawn_time is None else food_respawn_time),
                                eat_thresh=(np.inf if food_radius is None else food_radius),
                                max_food_health=max_food_health, food_rew_type=food_rew_type,
                                reward_scale=food_reward_scale)
        env = MaskActionWrapper(env, 'action_eat_food', ['mask_af_obs'])  # Can only eat if in vision
        if prep_obs:
            env = MaskPrepPhaseAction(env, 'action_eat_food')
        if not eat_when_caught:
            env = MaskUnseenAction(env, 0, 'action_eat_food')
        eat_agents = np.arange(n_hiders)
        env = AlwaysEatWrapper(env, agent_idx_allowed=eat_agents)
        keys_external += ['mask_af_obs', 'food_obs']
        keys_mask_external.append('mask_af_obs')
    if lock_box and np.max(n_boxes) > 0:
        env = LockObjWrapper(env, body_names=[f'moveable_box{i}' for i in range(np.max(n_boxes))],
                             agent_idx_allowed_to_lock=np.arange(n_hiders+n_seekers),
                             lock_type=lock_type, radius_multiplier=lock_radius_multiplier,
                             obj_in_game_metadata_keys=["curr_n_boxes"],
                             agent_allowed_to_lock_keys=None if lock_out_of_vision else ["mask_ab_obs"])
    if n_ramps > 0:
        env = AgentGeomObsMask2D(env, pos_obs_key='ramp_pos', mask_obs_key='mask_ar_obs',
                                 geom_idxs_obs_key='ramp_geom_idxs')
        if lock_ramp:
            env = LockObjWrapper(env, body_names=[f'ramp{i}:ramp' for i in range(n_ramps)],
                                 agent_idx_allowed_to_lock=np.arange(n_hiders+n_seekers),
                                 lock_type=lock_type, ac_obs_prefix='ramp_',
                                 radius_multiplier=lock_radius_multiplier,
                                 obj_in_game_metadata_keys=['curr_n_ramps'],
                                 agent_allowed_to_lock_keys=None if lock_out_of_vision else ["mask_ar_obs"])
        keys_external += ['ramp_obs']
        keys_mask_external.append('mask_ar_obs')
    if grab_box and (np.max(n_boxes) > 0 or n_ramps > 0):
        env = GrabObjWrapper(env, [f'moveable_box{i}' for i in range(np.max(n_boxes))] + ([f"ramp{i}:ramp" for i in range(n_ramps)]),
                             radius_multiplier=grab_radius_multiplier,
                             grab_exclusive=grab_exclusive,
                             obj_in_game_metadata_keys=['curr_n_boxes', 'curr_n_ramps'])

    if n_lidar_per_agent > 0:
        env = Lidar(env, n_lidar_per_agent=n_lidar_per_agent, visualize_lidar=visualize_lidar,
                    compress_lidar_scale=compress_lidar_scale)
        keys_copy += ['lidar']
        keys_external += ['lidar']

    if prep_obs:
        env = TrackStatWrapper(env, np.max(n_boxes), n_ramps, n_food)
    env = SplitObservations(env, keys_self + keys_mask_self, keys_copy=keys_copy)
    env = SpoofEntityWrapper(env, np.max(n_boxes), ['box_obs', 'you_lock', 'team_lock', 'obj_lock'], ['mask_ab_obs'])
    if n_food:
        env = SpoofEntityWrapper(env, n_food, ['food_obs'], ['mask_af_obs'])
    keys_mask_external += ['mask_ab_obs_spoof', 'mask_af_obs_spoof']
    if max_n_agents is not None:
        env = SpoofEntityWrapper(env, max_n_agents, ['agent_qpos_qvel', 'hider', 'prep_obs'], ['mask_aa_obs'])
    env = LockAllWrapper(env, remove_object_specific_lock=True)
    if not grab_out_of_vision and grab_box:
        env = MaskActionWrapper(env, 'action_pull',
                                ['mask_ab_obs'] + (['mask_ar_obs'] if n_ramps > 0 else []))
    if not grab_selective and grab_box:
        env = GrabClosestWrapper(env)
    env = NoActionsInPrepPhase(env, np.arange(n_hiders, n_hiders + n_seekers))
    env = DiscardMujocoExceptionEpisodes(env)
    #env = ConcatenateObsWrapper(env, {'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs'],
    #                                  'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock'],
    #                                 'ramp_obs': ['ramp_obs'] + (['ramp_you_lock', 'ramp_team_lock', 'ramp_obj_lock'] if lock_ramp else [])})
    env = ConcatenateObsWrapper(env, {'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs'],
                                      'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock'],
                                     'ramp_obs': ['ramp_obs'] + (['ramp_you_lock', 'ramp_team_lock', 'ramp_obj_lock'] if lock_ramp else [])})
    env = SelectKeysWrapper(env, keys_self=keys_self,
                            keys_external=keys_external,
                            keys_mask=keys_mask_self + keys_mask_external,
                            flatten=False)
    return env
def make_env(n_substeps=15, horizon=80, deterministic_mode=False,
             floor_size=6.0, grid_size=30,
             n_agents=1,
             n_rooms=4, random_room_number=True, scenario='empty', door_size=2,
             n_sites=3, n_elongated_sites=0, site_placement='uniform_away_from_walls',
             reward_infos=[{'type': 'construction_dense'}],
             n_boxes=1, n_elongated_boxes=0,
             n_min_boxes=None, box_size=0.5, box_only_z_rot=False,
             lock_box=True, grab_box=True, grab_selective=False, lock_grab_radius=0.25,
             lock_type='any_lock_specific', grab_exclusive=False,
             grab_out_of_vision=False, lock_out_of_vision=True,
             box_floor_friction=0.2, other_friction=0.01, gravity=[0, 0, -50],
             action_lims=(-0.9, 0.9), polar_obs=True,
             n_lidar_per_agent=0, visualize_lidar=False, compress_lidar_scale=None,
             boxid_obs=True, boxsize_obs=True, team_size_obs=False, additional_obs={}):

    grab_radius_multiplier = lock_grab_radius / box_size
    lock_radius_multiplier = lock_grab_radius / box_size

    if type(n_sites) not in [list, np.ndarray]:
        n_sites = [n_sites, n_sites]

    env = Base(n_agents=n_agents, n_substeps=n_substeps, horizon=horizon,
               floor_size=floor_size, grid_size=grid_size,
               action_lims=action_lims, deterministic_mode=deterministic_mode)

    if scenario == 'randomwalls':
        env.add_module(RandomWalls(grid_size=grid_size, num_rooms=n_rooms,
                                   random_room_number=random_room_number, min_room_size=6,
                                   door_size=door_size, gen_door_obs=False))
    elif scenario == 'empty':
        env.add_module(WallScenarios(grid_size=grid_size, door_size=door_size,
                                     scenario='empty',
                                     friction=other_friction))

    env.add_module(Agents(n_agents,
                          placement_fn=uniform_placement,
                          color=[np.array((66., 235., 244., 255.)) / 255] * n_agents,
                          friction=other_friction,
                          polar_obs=polar_obs))
    if np.max(n_boxes) > 0:
        env.add_module(Boxes(n_boxes=n_boxes, placement_fn=uniform_placement,
                             friction=box_floor_friction, polar_obs=polar_obs,
                             n_elongated_boxes=n_elongated_boxes,
                             boxid_obs=boxid_obs, boxsize_obs=boxsize_obs,
                             box_size=box_size,
                             box_only_z_rot=box_only_z_rot,
                             mark_box_corners=True))
    if n_sites[1] > 0:
        if site_placement == 'center':
            site_placement_fn = center_placement
        elif site_placement == 'uniform':
            site_placement_fn = uniform_placement
        elif site_placement == 'uniform_away_from_walls':
            site_placement_fn = uniform_placement_middle(0.85)
        else:
            raise ValueError(f'Site placement option: {site_placement} not implemented.'
                             ' Please choose from center, uniform and uniform_away_from_walls.')

        env.add_module(ConstructionSites(n_sites, placement_fn=site_placement_fn,
                                         site_size=box_size, site_height=box_size / 2,
                                         n_elongated_sites=n_elongated_sites))
    if n_lidar_per_agent > 0 and visualize_lidar:
        env.add_module(LidarSites(n_agents=n_agents, n_lidar_per_agent=n_lidar_per_agent))
    if np.max(n_boxes) > 0 and grab_box:
        env.add_module(AgentManipulation())
    if box_floor_friction is not None:
        env.add_module(FloorAttributes(friction=box_floor_friction))
    env.add_module(WorldConstants(gravity=gravity))
    env.reset()
    keys_self = ['agent_qpos_qvel']
    keys_mask_self = ['mask_aa_obs']
    keys_external = ['agent_qpos_qvel', 'construction_site_obs']
    keys_copy = ['you_lock', 'team_lock', 'ramp_you_lock', 'ramp_team_lock']
    keys_mask_external = []

    env = AddConstantObservationsWrapper(env, new_obs=additional_obs)
    keys_external += list(additional_obs)
    keys_mask_external += [ob for ob in additional_obs if 'mask' in ob]

    env = SplitMultiAgentActions(env)
    if team_size_obs:
        keys_self += ['team_size']
    env = TeamMembership(env, np.zeros((n_agents,)))
    env = AgentAgentObsMask2D(env)
    env = DiscretizeActionWrapper(env, 'action_movement')
    if np.max(n_boxes) > 0:
        env = AgentGeomObsMask2D(env, pos_obs_key='box_pos', mask_obs_key='mask_ab_obs',
                                 geom_idxs_obs_key='box_geom_idxs')
        keys_external += ['mask_ab_obs', 'box_obs']
        keys_mask_external.append('mask_ab_obs')
    if lock_box and np.max(n_boxes) > 0:
        agent_allowed_to_lock_keys = None if lock_out_of_vision else ["mask_ab_obs"]
        env = LockObjWrapper(env, body_names=[f'moveable_box{i}' for i in range(n_boxes)],
                             agent_idx_allowed_to_lock=np.arange(n_agents),
                             lock_type=lock_type,
                             radius_multiplier=lock_radius_multiplier,
                             obj_in_game_metadata_keys=["curr_n_boxes"],
                             agent_allowed_to_lock_keys=agent_allowed_to_lock_keys)
    if grab_box and np.max(n_boxes) > 0:
        env = GrabObjWrapper(env, [f'moveable_box{i}' for i in range(n_boxes)],
                             radius_multiplier=grab_radius_multiplier,
                             grab_exclusive=grab_exclusive,
                             obj_in_game_metadata_keys=['curr_n_boxes'])

    if n_lidar_per_agent > 0:
        env = Lidar(env, n_lidar_per_agent=n_lidar_per_agent, visualize_lidar=visualize_lidar,
                    compress_lidar_scale=compress_lidar_scale)
        keys_copy += ['lidar']
        keys_external += ['lidar']

    env = ConstructionDistancesWrapper(env)
    env = NumpyArrayRewardWrapper(env)

    reward_wrappers = {
        'construction_dense': ConstructionDenseRewardWrapper,
        'construction_completed': ConstructionCompletedRewardWrapper,
    }

    for rew_info in reward_infos:
        rew_type = rew_info['type']
        del rew_info['type']
        env = reward_wrappers[rew_type](env, **rew_info)

    env = SplitObservations(env, keys_self + keys_mask_self, keys_copy=keys_copy)
    if n_agents == 1:
        env = SpoofEntityWrapper(env, 2, ['agent_qpos_qvel'], ['mask_aa_obs'])
    env = SpoofEntityWrapper(env, n_boxes,
                             ['box_obs', 'you_lock', 'team_lock', 'obj_lock'],
                             ['mask_ab_obs'])
    env = SpoofEntityWrapper(env, n_sites[1], ['construction_site_obs'], ['mask_acs_obs'])
    keys_mask_external += ['mask_ab_obs_spoof', 'mask_acs_obs_spoof']
    env = LockAllWrapper(env, remove_object_specific_lock=True)
    if not grab_out_of_vision and grab_box:
        env = MaskActionWrapper(env, 'action_pull', ['mask_ab_obs'])  # Can only pull if in vision
    if not grab_selective and grab_box:
        env = GrabClosestWrapper(env)
    env = DiscardMujocoExceptionEpisodes(env)
    env = ConcatenateObsWrapper(env, {'agent_qpos_qvel': ['agent_qpos_qvel'],
                                      'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock']})
    env = SelectKeysWrapper(env, keys_self=keys_self,
                            keys_external=keys_external,
                            keys_mask=keys_mask_self + keys_mask_external,
                            flatten=False)
    return env
Beispiel #5
0
def make_env(
        n_substeps=15,
        horizon=80,
        deterministic_mode=False,
        floor_size=6.0,
        grid_size=30,
        door_size=2,
        n_agents=1,
        fixed_agent_spawn=False,
        lock_box=True,
        grab_box=True,
        grab_selective=False,
        lock_type='any_lock_specific',
        lock_grab_radius=0.25,
        grab_exclusive=False,
        grab_out_of_vision=False,
        lock_out_of_vision=True,
        box_floor_friction=0.2,
        other_friction=0.01,
        gravity=[0, 0, -50],
        action_lims=(-0.9, 0.9),
        polar_obs=True,
        scenario='quadrant',
        p_door_dropout=0.0,
        n_rooms=4,
        random_room_number=True,
        n_lidar_per_agent=0,
        visualize_lidar=False,
        compress_lidar_scale=None,
        n_boxes=2,
        box_size=0.5,
        box_only_z_rot=False,
        boxid_obs=True,
        boxsize_obs=True,
        pad_ramp_size=True,
        additional_obs={},
        # lock-box task
        task_type='all',
        lock_reward=5.0,
        unlock_penalty=7.0,
        shaped_reward_scale=0.25,
        return_threshold=0.1,
        # ramps
        n_ramps=0):

    grab_radius_multiplier = lock_grab_radius / box_size
    lock_radius_multiplier = lock_grab_radius / box_size

    env = Base(n_agents=n_agents,
               n_substeps=n_substeps,
               floor_size=floor_size,
               horizon=horizon,
               action_lims=action_lims,
               deterministic_mode=deterministic_mode,
               grid_size=grid_size)

    if scenario == 'randomwalls':
        env.add_module(
            RandomWalls(grid_size=grid_size,
                        num_rooms=n_rooms,
                        random_room_number=random_room_number,
                        min_room_size=6,
                        door_size=door_size,
                        gen_door_obs=False))
        box_placement_fn = uniform_placement
        ramp_placement_fn = uniform_placement
        agent_placement_fn = uniform_placement if not fixed_agent_spawn else center_placement
    elif scenario == 'quadrant':
        env.add_module(
            WallScenarios(grid_size=grid_size,
                          door_size=door_size,
                          scenario=scenario,
                          friction=other_friction,
                          p_door_dropout=p_door_dropout))
        box_placement_fn = uniform_placement
        ramp_placement_fn = uniform_placement
        agent_placement_fn = quadrant_placement if not fixed_agent_spawn else center_placement
    elif scenario == 'empty':
        env.add_module(
            WallScenarios(grid_size=grid_size, door_size=2, scenario='empty'))
        box_placement_fn = uniform_placement
        ramp_placement_fn = uniform_placement
        agent_placement_fn = center_placement
    elif 'var_tri' in scenario:
        env.add_module(
            WallScenarios(grid_size=grid_size,
                          door_size=door_size,
                          scenario='var_tri'))
        ramp_placement_fn = [tri_placement(i % 3) for i in range(n_ramps)]
        agent_placement_fn = center_placement if fixed_agent_spawn else \
            (uniform_placement if 'uniform' in scenario else rotate_tri_placement)
        box_placement_fn = uniform_placement if 'uniform' in scenario else rotate_tri_placement
    else:
        raise ValueError(f"Scenario {scenario} not supported.")

    env.add_module(
        Agents(n_agents,
               placement_fn=agent_placement_fn,
               color=[np.array((66., 235., 244., 255.)) / 255] * n_agents,
               friction=other_friction,
               polar_obs=polar_obs))
    if np.max(n_boxes) > 0:
        env.add_module(
            Boxes(n_boxes=n_boxes,
                  placement_fn=box_placement_fn,
                  friction=box_floor_friction,
                  polar_obs=polar_obs,
                  n_elongated_boxes=0,
                  boxid_obs=boxid_obs,
                  box_only_z_rot=box_only_z_rot,
                  boxsize_obs=boxsize_obs))

    if n_ramps > 0:
        env.add_module(
            Ramps(n_ramps=n_ramps,
                  placement_fn=ramp_placement_fn,
                  friction=other_friction,
                  polar_obs=polar_obs,
                  pad_ramp_size=pad_ramp_size))

    if n_lidar_per_agent > 0 and visualize_lidar:
        env.add_module(
            LidarSites(n_agents=n_agents, n_lidar_per_agent=n_lidar_per_agent))

    if np.max(n_boxes) > 0 and grab_box:
        env.add_module(AgentManipulation())
    if box_floor_friction is not None:
        env.add_module(FloorAttributes(friction=box_floor_friction))
    env.add_module(WorldConstants(gravity=gravity))
    env.reset()
    keys_self = ['agent_qpos_qvel', 'hider', 'prep_obs']
    keys_mask_self = ['mask_aa_obs']
    keys_external = ['agent_qpos_qvel']
    keys_copy = ['you_lock', 'team_lock']
    keys_mask_external = []

    env = SplitMultiAgentActions(env)
    env = TeamMembership(env, np.zeros((n_agents, )))
    env = AgentAgentObsMask2D(env)
    env = DiscretizeActionWrapper(env, 'action_movement')
    env = NumpyArrayRewardWrapper(env)

    if np.max(n_boxes) > 0:
        env = AgentGeomObsMask2D(env,
                                 pos_obs_key='box_pos',
                                 mask_obs_key='mask_ab_obs',
                                 geom_idxs_obs_key='box_geom_idxs')
        keys_external += ['mask_ab_obs', 'box_obs']
        keys_mask_external.append('mask_ab_obs')
    if lock_box and np.max(n_boxes) > 0:
        env = LockObjWrapper(
            env,
            body_names=[f'moveable_box{i}' for i in range(n_boxes)],
            agent_idx_allowed_to_lock=np.arange(n_agents),
            lock_type=lock_type,
            radius_multiplier=lock_radius_multiplier,
            obj_in_game_metadata_keys=["curr_n_boxes"],
            agent_allowed_to_lock_keys=None
            if lock_out_of_vision else ["mask_ab_obs"])

    if n_ramps > 0:
        env = AgentGeomObsMask2D(env,
                                 pos_obs_key='ramp_pos',
                                 mask_obs_key='mask_ar_obs',
                                 geom_idxs_obs_key='ramp_geom_idxs')
        env = LockObjWrapper(
            env,
            body_names=[f"ramp{i}:ramp" for i in range(n_ramps)],
            agent_idx_allowed_to_lock=np.arange(n_agents),
            lock_type=lock_type,
            ac_obs_prefix='ramp_',
            radius_multiplier=lock_radius_multiplier,
            agent_allowed_to_lock_keys=None
            if lock_out_of_vision else ["mask_ar_obs"])

        keys_external += ['ramp_obs']
        keys_mask_external += ['mask_ar_obs']
        keys_copy += ['ramp_you_lock', 'ramp_team_lock']

    if grab_box and np.max(n_boxes) > 0:
        body_names = ([f'moveable_box{i}' for i in range(n_boxes)] +
                      [f"ramp{i}:ramp" for i in range(n_ramps)])
        obj_in_game_meta_keys = ['curr_n_boxes'
                                 ] + (['curr_n_ramps'] if n_ramps > 0 else [])
        env = GrabObjWrapper(env,
                             body_names=body_names,
                             radius_multiplier=grab_radius_multiplier,
                             grab_exclusive=grab_exclusive,
                             obj_in_game_metadata_keys=obj_in_game_meta_keys)

    if n_lidar_per_agent > 0:
        env = Lidar(env,
                    n_lidar_per_agent=n_lidar_per_agent,
                    visualize_lidar=visualize_lidar,
                    compress_lidar_scale=compress_lidar_scale)
        keys_copy += ['lidar']
        keys_external += ['lidar']

    env = AddConstantObservationsWrapper(env, new_obs=additional_obs)
    keys_external += list(additional_obs)
    keys_mask_external += [ob for ob in additional_obs if 'mask' in ob]

    #############################################
    # lock Box Task Reward
    ###
    env = LockObjectsTask(env,
                          n_objs=n_boxes,
                          task=task_type,
                          fixed_order=True,
                          obj_lock_obs_key='obj_lock',
                          obj_pos_obs_key='box_pos',
                          act_lock_key='action_glue',
                          agent_pos_key='agent_pos',
                          lock_reward=lock_reward,
                          unlock_penalty=unlock_penalty,
                          shaped_reward_scale=shaped_reward_scale,
                          return_threshold=return_threshold)
    ###
    #############################################

    env = SplitObservations(env,
                            keys_self + keys_mask_self,
                            keys_copy=keys_copy)
    env = SpoofEntityWrapper(env, n_boxes,
                             ['box_obs', 'you_lock', 'team_lock', 'obj_lock'],
                             ['mask_ab_obs'])
    keys_mask_external += ['mask_ab_obs_spoof']

    if n_agents < 2:
        env = SpoofEntityWrapper(env, 1,
                                 ['agent_qpos_qvel', 'hider', 'prep_obs'],
                                 ['mask_aa_obs'])

    env = LockAllWrapper(env, remove_object_specific_lock=True)
    if not grab_out_of_vision and grab_box:
        # Can only pull if in vision
        mask_keys = ['mask_ab_obs'] + (['mask_ar_obs'] if n_ramps > 0 else [])
        env = MaskActionWrapper(env,
                                action_key='action_pull',
                                mask_keys=mask_keys)
    if not grab_selective and grab_box:
        env = GrabClosestWrapper(env)
    env = DiscardMujocoExceptionEpisodes(env)
    env = ConcatenateObsWrapper(
        env, {
            'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs'],
            'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock'],
            'ramp_obs':
            ['ramp_obs', 'ramp_you_lock', 'ramp_team_lock', 'ramp_obj_lock']
        })
    env = SelectKeysWrapper(env,
                            keys_self=keys_self,
                            keys_external=keys_external,
                            keys_mask=keys_mask_self + keys_mask_external,
                            flatten=False)

    return env