Esempio n. 1
0
    def __init__(self, 
                 n_legs=4,
                 ts=0.02,
                 integrator='RK4',
                 leg_length=0.282,
                 out_file="multi_ant.xml",
                 base_file="ant_og.xml",
                 reward_mech='local',
                 pos_noise=1e-3,
                 vel_noise=1e-3,
                 force_noise=1e-3
                 ):
        EzPickle.__init__(self, n_legs, ts, integrator, leg_length,
                                out_file, base_file, reward_mech,
                                pos_noise, vel_noise, force_noise)
        self.n_legs = n_legs
        self.ts = ts
        self.integrator = integrator
        self.leg_length = leg_length
        self.out_file = out_file
        self.base_file = base_file
        self._reward_mech = reward_mech
        
        self.pos_noise = pos_noise
        self.vel_noise = vel_noise
        self.force_noise = force_noise

        self.legs = None
        self.out_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), self.out_file)
        self.base_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), self.base_file)

        self.gen_xml(out_file=self.out_file_path, og_file=self.base_file_path)

        mujoco_env.MujocoEnv.__init__(self, self.out_file_path, 5)
        self.legs = [AntLeg(self.model, i, n_legs, pos_noise=pos_noise, vel_noise=vel_noise, force_noise=force_noise) for i in range(self.n_legs)]
Esempio n. 2
0
 def __setstate__(self, d):
     # curriculum update attributes here for parallel sampler
     EzPickle.__setstate__(self, d)
     self.constraint_window = d['constraint_window']
     self.n_evaders = d['n_evaders']
     self.n_pursuers = d['n_pursuers']
     self.catchr = d['catchr']
Esempio n. 3
0
    def __init__(self, n_walkers=2, position_noise=1e-3, angle_noise=1e-3, reward_mech='local',
                 forward_reward=1.0, fall_reward=-100.0, drop_reward=-100.0, terminate_on_fall=True,
                 one_hot=False):
        EzPickle.__init__(self, n_walkers, position_noise, angle_noise, reward_mech, forward_reward,
                          fall_reward, drop_reward, terminate_on_fall, one_hot)

        self.n_walkers = n_walkers
        self.position_noise = position_noise
        self.angle_noise = angle_noise
        self._reward_mech = reward_mech
        self.forward_reward = forward_reward
        self.fall_reward = fall_reward
        self.drop_reward = drop_reward
        self.terminate_on_fall = terminate_on_fall
        self.one_hot = one_hot
        self.setup()
Esempio n. 4
0
 def __getstate__(self):
     d = EzPickle.__getstate__(self)
     d['constraint_window'] = self.constraint_window
     d['n_evaders'] = self.n_evaders
     d['n_pursuers'] = self.n_pursuers
     d['catchr'] = self.catchr
     return d
Esempio n. 5
0
    def __init__(self, n_pursuers, n_evaders, n_coop=2, n_poison=10, radius=0.015,
                 obstacle_radius=0.2, obstacle_loc=np.array([0.5, 0.5]), ev_speed=0.01,
                 poison_speed=0.01, n_sensors=30, sensor_range=0.2, action_scale=0.01,
                 poison_reward=-1., food_reward=1., encounter_reward=.05, control_penalty=-.5,
                 reward_mech='local', addid=True, speed_features=True, **kwargs):
        EzPickle.__init__(self, n_pursuers, n_evaders, n_coop, n_poison, radius, obstacle_radius,
                          obstacle_loc, ev_speed, poison_speed, n_sensors, sensor_range,
                          action_scale, poison_reward, food_reward, encounter_reward,
                          control_penalty, reward_mech, addid, speed_features, **kwargs)
        self.n_pursuers = n_pursuers
        self.n_evaders = n_evaders
        self.n_coop = n_coop
        self.n_poison = n_poison
        self.obstacle_radius = obstacle_radius
        self.obstacle_loc = obstacle_loc
        self.poison_speed = poison_speed
        self.radius = radius
        self.ev_speed = ev_speed
        self.n_sensors = n_sensors
        self.sensor_range = np.ones(self.n_pursuers) * sensor_range
        self.action_scale = action_scale
        self.poison_reward = poison_reward
        self.food_reward = food_reward
        self.control_penalty = control_penalty
        self.encounter_reward = encounter_reward

        self.n_obstacles = 1
        self._reward_mech = reward_mech
        self._addid = addid
        self._speed_features = speed_features
        self.seed()
        self._pursuers = [
            Archea(npu + 1, self.radius, self.n_sensors, self.sensor_range[npu], addid=self._addid,
                   speed_features=self._speed_features) for npu in range(self.n_pursuers)
        ]
        self._evaders = [
            Archea(nev + 1, self.radius * 2, self.n_pursuers, self.sensor_range.mean() / 2)
            for nev in range(self.n_evaders)
        ]
        self._poisons = [
            Archea(npo + 1, self.radius * 3 / 4, self.n_poison, 0) for npo in range(self.n_poison)
        ]
Esempio n. 6
0
    def __init__(self, map_pool, **kwargs):
        EzPickle.__init__(self, map_pool, **kwargs)
        """
        In evade purusit a set of pursuers must 'tag' a set of evaders
        Required arguments:
        - map_matrix: the map on which agents interact

        Optional arguments:
        - Ally layer: list of pursuers
        Opponent layer: list of evaders
        Ally controller: stationary policy of ally pursuers
        Ally controller: stationary policy of opponent evaders
        map_matrix: the map on which agents interact
        catchr: reward for 'tagging' a single evader
        caughtr: reward for getting 'tagged' by a pursuer
        train_pursuit: flag indicating if we are simulating pursuers or evaders
        initial_config: dictionary of form
            initial_config['allies']: the initial ally confidguration (matrix)
            initial_config['opponents']: the initial opponent confidguration (matrix)
        """

        self.sample_maps = kwargs.pop('sample_maps', False)

        self.map_pool = map_pool
        map_matrix = map_pool[0]
        self.map_matrix = map_matrix
        xs, ys = self.map_matrix.shape
        self.xs = xs
        self.ys = ys

        self._reward_mech = kwargs.pop('reward_mech', 'global')

        self.n_evaders = kwargs.pop('n_evaders', 1)
        self.n_pursuers = kwargs.pop('n_pursuers', 1)

        self.obs_range = kwargs.pop(
            'obs_range', 3)  # can see 3 grids around them by default
        #assert self.obs_range % 2 != 0, "obs_range should be odd"
        self.obs_offset = int((self.obs_range - 1) / 2)

        self.flatten = kwargs.pop('flatten', True)

        self.pursuers = agent_utils.create_agents(self.n_pursuers,
                                                  map_matrix,
                                                  self.obs_range,
                                                  flatten=self.flatten)
        self.evaders = agent_utils.create_agents(self.n_evaders,
                                                 map_matrix,
                                                 self.obs_range,
                                                 flatten=self.flatten)

        self.pursuer_layer = kwargs.pop('ally_layer',
                                        AgentLayer(xs, ys, self.pursuers))
        self.evader_layer = kwargs.pop('opponent_layer',
                                       AgentLayer(xs, ys, self.evaders))

        self.layer_norm = kwargs.pop('layer_norm', 10)

        self.n_catch = kwargs.pop('n_catch', 2)

        self.random_opponents = kwargs.pop('random_opponents', False)
        self.max_opponents = kwargs.pop('max_opponents', 10)

        n_act_purs = self.pursuer_layer.get_nactions(0)
        n_act_ev = self.evader_layer.get_nactions(0)

        self.evader_controller = kwargs.pop('evader_controller',
                                            RandomPolicy(n_act_purs))
        self.pursuer_controller = kwargs.pop('pursuer_controller',
                                             RandomPolicy(n_act_ev))

        self.current_agent_layer = np.zeros((xs, ys), dtype=np.int32)

        self.catchr = kwargs.pop('catchr', 0.01)
        self.caughtr = kwargs.pop('caughtr', -0.01)

        self.term_pursuit = kwargs.pop('term_pursuit', 5.0)
        self.term_evade = kwargs.pop('term_evade', -5.0)

        self.urgency_reward = kwargs.pop('urgency_reward', 0.0)

        self.include_id = kwargs.pop('include_id', True)

        self.ally_actions = np.zeros(n_act_purs, dtype=np.int32)
        self.opponent_actions = np.zeros(n_act_ev, dtype=np.int32)

        self.train_pursuit = kwargs.pop('train_pursuit', True)

        if self.train_pursuit:
            self.low = np.array([0.0 for i in range(3 * self.obs_range**2)])
            self.high = np.array([1.0 for i in range(3 * self.obs_range**2)])
            if self.include_id:
                self.low = np.append(self.low, 0.0)
                self.high = np.append(self.high, 1.0)
            self.action_space = spaces.Discrete(n_act_purs)
            if self.flatten:
                self.observation_space = spaces.Box(self.low, self.high)
            else:
                self.observation_space = spaces.Box(low=-np.inf,
                                                    high=np.inf,
                                                    shape=(4, self.obs_range,
                                                           self.obs_range))
            self.local_obs = np.zeros(
                (self.n_pursuers, 4, self.obs_range,
                 self.obs_range))  # Nagents X 3 X xsize X ysize
            self.act_dims = [n_act_purs for i in range(self.n_pursuers)]
        else:
            self.low = np.array([0.0 for i in range(3 * self.obs_range**2)])
            self.high = np.array([1.0 for i in range(3 * self.obs_range**2)])
            if self.include_id:
                np.append(self.low, 0.0)
                np.append(self.high, 1.0)
            self.action_space = spaces.Discrete(n_act_ev)
            if self.flatten:
                self.observation_space = spaces.Box(self.low, self.high)
            else:
                self.observation_space = spaces.Box(low=-np.inf,
                                                    high=np.inf,
                                                    shape=(4, self.obs_range,
                                                           self.obs_range))
            self.local_obs = np.zeros(
                (self.n_evaders, 4, self.obs_range,
                 self.obs_range))  # Nagents X 3 X xsize X ysize
            self.act_dims = [n_act_purs for i in range(self.n_evaders)]
        self.pursuers_gone = np.array([False for i in range(self.n_pursuers)])
        self.evaders_gone = np.array([False for i in range(self.n_evaders)])

        self.initial_config = kwargs.pop('initial_config', {})

        self.surround = kwargs.pop('surround', True)

        self.constraint_window = kwargs.pop('constraint_window', 1.0)

        self.curriculum_remove_every = kwargs.pop('curriculum_remove_every',
                                                  500)
        self.curriculum_constrain_rate = kwargs.pop(
            'curriculum_constrain_rate', 0.0)
        self.curriculum_turn_off_shaping = kwargs.pop(
            'curriculum_turn_off_shaping', np.inf)

        self.surround_mask = np.array([[-1, 0], [1, 0], [0, 1], [0, -1]])

        self.model_state = np.zeros((4, ) + map_matrix.shape, dtype=np.float32)