def __init__(self, n_legs=4, ts=0.02, integrator='RK4', leg_length=0.282, out_file="multi_ant.xml", base_file="ant_og.xml", reward_mech='local', pos_noise=1e-3, vel_noise=1e-3, force_noise=1e-3 ): EzPickle.__init__(self, n_legs, ts, integrator, leg_length, out_file, base_file, reward_mech, pos_noise, vel_noise, force_noise) self.n_legs = n_legs self.ts = ts self.integrator = integrator self.leg_length = leg_length self.out_file = out_file self.base_file = base_file self._reward_mech = reward_mech self.pos_noise = pos_noise self.vel_noise = vel_noise self.force_noise = force_noise self.legs = None self.out_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), self.out_file) self.base_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), self.base_file) self.gen_xml(out_file=self.out_file_path, og_file=self.base_file_path) mujoco_env.MujocoEnv.__init__(self, self.out_file_path, 5) self.legs = [AntLeg(self.model, i, n_legs, pos_noise=pos_noise, vel_noise=vel_noise, force_noise=force_noise) for i in range(self.n_legs)]
def __setstate__(self, d): # curriculum update attributes here for parallel sampler EzPickle.__setstate__(self, d) self.constraint_window = d['constraint_window'] self.n_evaders = d['n_evaders'] self.n_pursuers = d['n_pursuers'] self.catchr = d['catchr']
def __init__(self, n_walkers=2, position_noise=1e-3, angle_noise=1e-3, reward_mech='local', forward_reward=1.0, fall_reward=-100.0, drop_reward=-100.0, terminate_on_fall=True, one_hot=False): EzPickle.__init__(self, n_walkers, position_noise, angle_noise, reward_mech, forward_reward, fall_reward, drop_reward, terminate_on_fall, one_hot) self.n_walkers = n_walkers self.position_noise = position_noise self.angle_noise = angle_noise self._reward_mech = reward_mech self.forward_reward = forward_reward self.fall_reward = fall_reward self.drop_reward = drop_reward self.terminate_on_fall = terminate_on_fall self.one_hot = one_hot self.setup()
def __getstate__(self): d = EzPickle.__getstate__(self) d['constraint_window'] = self.constraint_window d['n_evaders'] = self.n_evaders d['n_pursuers'] = self.n_pursuers d['catchr'] = self.catchr return d
def __init__(self, n_pursuers, n_evaders, n_coop=2, n_poison=10, radius=0.015, obstacle_radius=0.2, obstacle_loc=np.array([0.5, 0.5]), ev_speed=0.01, poison_speed=0.01, n_sensors=30, sensor_range=0.2, action_scale=0.01, poison_reward=-1., food_reward=1., encounter_reward=.05, control_penalty=-.5, reward_mech='local', addid=True, speed_features=True, **kwargs): EzPickle.__init__(self, n_pursuers, n_evaders, n_coop, n_poison, radius, obstacle_radius, obstacle_loc, ev_speed, poison_speed, n_sensors, sensor_range, action_scale, poison_reward, food_reward, encounter_reward, control_penalty, reward_mech, addid, speed_features, **kwargs) self.n_pursuers = n_pursuers self.n_evaders = n_evaders self.n_coop = n_coop self.n_poison = n_poison self.obstacle_radius = obstacle_radius self.obstacle_loc = obstacle_loc self.poison_speed = poison_speed self.radius = radius self.ev_speed = ev_speed self.n_sensors = n_sensors self.sensor_range = np.ones(self.n_pursuers) * sensor_range self.action_scale = action_scale self.poison_reward = poison_reward self.food_reward = food_reward self.control_penalty = control_penalty self.encounter_reward = encounter_reward self.n_obstacles = 1 self._reward_mech = reward_mech self._addid = addid self._speed_features = speed_features self.seed() self._pursuers = [ Archea(npu + 1, self.radius, self.n_sensors, self.sensor_range[npu], addid=self._addid, speed_features=self._speed_features) for npu in range(self.n_pursuers) ] self._evaders = [ Archea(nev + 1, self.radius * 2, self.n_pursuers, self.sensor_range.mean() / 2) for nev in range(self.n_evaders) ] self._poisons = [ Archea(npo + 1, self.radius * 3 / 4, self.n_poison, 0) for npo in range(self.n_poison) ]
def __init__(self, map_pool, **kwargs): EzPickle.__init__(self, map_pool, **kwargs) """ In evade purusit a set of pursuers must 'tag' a set of evaders Required arguments: - map_matrix: the map on which agents interact Optional arguments: - Ally layer: list of pursuers Opponent layer: list of evaders Ally controller: stationary policy of ally pursuers Ally controller: stationary policy of opponent evaders map_matrix: the map on which agents interact catchr: reward for 'tagging' a single evader caughtr: reward for getting 'tagged' by a pursuer train_pursuit: flag indicating if we are simulating pursuers or evaders initial_config: dictionary of form initial_config['allies']: the initial ally confidguration (matrix) initial_config['opponents']: the initial opponent confidguration (matrix) """ self.sample_maps = kwargs.pop('sample_maps', False) self.map_pool = map_pool map_matrix = map_pool[0] self.map_matrix = map_matrix xs, ys = self.map_matrix.shape self.xs = xs self.ys = ys self._reward_mech = kwargs.pop('reward_mech', 'global') self.n_evaders = kwargs.pop('n_evaders', 1) self.n_pursuers = kwargs.pop('n_pursuers', 1) self.obs_range = kwargs.pop( 'obs_range', 3) # can see 3 grids around them by default #assert self.obs_range % 2 != 0, "obs_range should be odd" self.obs_offset = int((self.obs_range - 1) / 2) self.flatten = kwargs.pop('flatten', True) self.pursuers = agent_utils.create_agents(self.n_pursuers, map_matrix, self.obs_range, flatten=self.flatten) self.evaders = agent_utils.create_agents(self.n_evaders, map_matrix, self.obs_range, flatten=self.flatten) self.pursuer_layer = kwargs.pop('ally_layer', AgentLayer(xs, ys, self.pursuers)) self.evader_layer = kwargs.pop('opponent_layer', AgentLayer(xs, ys, self.evaders)) self.layer_norm = kwargs.pop('layer_norm', 10) self.n_catch = kwargs.pop('n_catch', 2) self.random_opponents = kwargs.pop('random_opponents', False) self.max_opponents = kwargs.pop('max_opponents', 10) n_act_purs = self.pursuer_layer.get_nactions(0) n_act_ev = self.evader_layer.get_nactions(0) self.evader_controller = kwargs.pop('evader_controller', RandomPolicy(n_act_purs)) self.pursuer_controller = kwargs.pop('pursuer_controller', RandomPolicy(n_act_ev)) self.current_agent_layer = np.zeros((xs, ys), dtype=np.int32) self.catchr = kwargs.pop('catchr', 0.01) self.caughtr = kwargs.pop('caughtr', -0.01) self.term_pursuit = kwargs.pop('term_pursuit', 5.0) self.term_evade = kwargs.pop('term_evade', -5.0) self.urgency_reward = kwargs.pop('urgency_reward', 0.0) self.include_id = kwargs.pop('include_id', True) self.ally_actions = np.zeros(n_act_purs, dtype=np.int32) self.opponent_actions = np.zeros(n_act_ev, dtype=np.int32) self.train_pursuit = kwargs.pop('train_pursuit', True) if self.train_pursuit: self.low = np.array([0.0 for i in range(3 * self.obs_range**2)]) self.high = np.array([1.0 for i in range(3 * self.obs_range**2)]) if self.include_id: self.low = np.append(self.low, 0.0) self.high = np.append(self.high, 1.0) self.action_space = spaces.Discrete(n_act_purs) if self.flatten: self.observation_space = spaces.Box(self.low, self.high) else: self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(4, self.obs_range, self.obs_range)) self.local_obs = np.zeros( (self.n_pursuers, 4, self.obs_range, self.obs_range)) # Nagents X 3 X xsize X ysize self.act_dims = [n_act_purs for i in range(self.n_pursuers)] else: self.low = np.array([0.0 for i in range(3 * self.obs_range**2)]) self.high = np.array([1.0 for i in range(3 * self.obs_range**2)]) if self.include_id: np.append(self.low, 0.0) np.append(self.high, 1.0) self.action_space = spaces.Discrete(n_act_ev) if self.flatten: self.observation_space = spaces.Box(self.low, self.high) else: self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(4, self.obs_range, self.obs_range)) self.local_obs = np.zeros( (self.n_evaders, 4, self.obs_range, self.obs_range)) # Nagents X 3 X xsize X ysize self.act_dims = [n_act_purs for i in range(self.n_evaders)] self.pursuers_gone = np.array([False for i in range(self.n_pursuers)]) self.evaders_gone = np.array([False for i in range(self.n_evaders)]) self.initial_config = kwargs.pop('initial_config', {}) self.surround = kwargs.pop('surround', True) self.constraint_window = kwargs.pop('constraint_window', 1.0) self.curriculum_remove_every = kwargs.pop('curriculum_remove_every', 500) self.curriculum_constrain_rate = kwargs.pop( 'curriculum_constrain_rate', 0.0) self.curriculum_turn_off_shaping = kwargs.pop( 'curriculum_turn_off_shaping', np.inf) self.surround_mask = np.array([[-1, 0], [1, 0], [0, 1], [0, -1]]) self.model_state = np.zeros((4, ) + map_matrix.shape, dtype=np.float32)