Ejemplo n.º 1
0
 def __setstate__(self, d):
     # curriculum update attributes here for parallel sampler
     EzPickle.__setstate__(self, d)
     self.constraint_window = d['constraint_window']
     self.n_targets = d['n_targets']
     self.n_snipers = d['n_snipers']
     self.catchr = d['catchr']
Ejemplo n.º 2
0
    def __init__(self,
                 env,
                 scale_reward=1.,
                 enable_obsnorm=False,
                 enable_rewnorm=False,
                 obs_alpha=0.001,
                 rew_alpha=0.001,
                 eps=1e-8):
        EzPickle.__init__(self, env, scale_reward, enable_obsnorm,
                          enable_rewnorm, obs_alpha, rew_alpha, eps)
        self._unwrapped = env
        self._scale_reward = scale_reward
        self._enable_obsnorm = enable_obsnorm
        self._enable_rewnorm = enable_rewnorm
        self._obs_alpha = obs_alpha
        self._rew_alpha = rew_alpha
        self._eps = eps
        self._flatobs_shape = [None for _ in env.agents]
        self._obs_mean = [None for _ in env.agents]
        self._obs_var = [None for _ in env.agents]
        self._rew_mean = [None for _ in env.agents]
        self._rew_var = [None for _ in env.agents]

        for agid, agent in enumerate(env.agents):
            if isinstance(agent.observation_space, spaces.Box):
                self._flatobs_shape[agid] = np.prod(
                    agent.observation_space.shape)
            elif isinstance(env.observation_space, spaces.Discrete):
                self._flatobs_shape[agid] = agent.observation_space.n

            self._obs_mean[agid] = np.zeros(self._flatobs_shape[agid])
            self._obs_var[agid] = np.ones(self._flatobs_shape[agid])
            self._rew_mean[agid] = 0.
            self._rew_var[agid] = 1.
Ejemplo n.º 3
0
 def __init__(self, observation_space, action_space, hidden_spec, enable_obsnorm, varscope_name):
     EzPickle.__init__(self, observation_space, action_space, hidden_spec, enable_obsnorm,
                       varscope_name)
     self.hidden_spec = hidden_spec
     self._dist = Categorical(action_space.n)
     super(CategoricalMLPPolicy, self).__init__(observation_space, action_space, action_space.n,
                                                enable_obsnorm, varscope_name)
Ejemplo n.º 4
0
    def __init__(self,
                 n_walkers=2,
                 position_noise=1e-3,
                 angle_noise=1e-3,
                 reward_mech='local',
                 forward_reward=1.0,
                 fall_reward=-100.0,
                 drop_reward=-100.0,
                 terminate_on_fall=True,
                 one_hot=False,
                 **kwargs):
        EzPickle.__init__(self, n_walkers, position_noise, angle_noise,
                          reward_mech, forward_reward, fall_reward,
                          drop_reward, terminate_on_fall, one_hot, **kwargs)

        self.n_walkers = n_walkers
        self.position_noise = position_noise
        self.angle_noise = angle_noise
        self._reward_mech = reward_mech
        self.forward_reward = forward_reward
        self.fall_reward = fall_reward
        self.drop_reward = drop_reward
        self.terminate_on_fall = terminate_on_fall
        self.one_hot = one_hot
        self.setup()
        self.action_space, self.observation_space = [], []
        for a_i in range(self.n_walkers):
            self.action_space.append(self.agents[a_i].action_space)
            self.observation_space.append(self.agents[a_i].observation_space)
Ejemplo n.º 5
0
    def __init__(self, 
                 n_legs=4,
                 ts=0.02,
                 integrator='RK4',
                 leg_length=0.282,
                 out_file="multi_ant.xml",
                 base_file="ant_og.xml",
                 reward_mech='local',
                 pos_noise=1e-3,
                 vel_noise=1e-3,
                 force_noise=1e-3
                 ):
        EzPickle.__init__(self, n_legs, ts, integrator, leg_length,
                                out_file, base_file, reward_mech,
                                pos_noise, vel_noise, force_noise)
        self.n_legs = n_legs
        self.ts = ts
        self.integrator = integrator
        self.leg_length = leg_length
        self.out_file = out_file
        self.base_file = base_file
        self._reward_mech = reward_mech
        
        self.pos_noise = pos_noise
        self.vel_noise = vel_noise
        self.force_noise = force_noise

        self.legs = None
        self.out_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), self.out_file)
        self.base_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), self.base_file)

        self.gen_xml(out_file=self.out_file_path, og_file=self.base_file_path)

        mujoco_env.MujocoEnv.__init__(self, self.out_file_path, 5)
        self.legs = [AntLeg(self.model, i, n_legs, pos_noise=pos_noise, vel_noise=vel_noise, force_noise=force_noise) for i in range(self.n_legs)]
Ejemplo n.º 6
0
    def __init__(self, env, scale_reward=1., enable_obsnorm=False, enable_rewnorm=False,
                 obs_alpha=0.001, rew_alpha=0.001, eps=1e-8):
        EzPickle.__init__(self, env, scale_reward, enable_obsnorm, enable_rewnorm, obs_alpha,
                          rew_alpha, eps)
        self._unwrapped = env
        self._scale_reward = scale_reward
        self._enable_obsnorm = enable_obsnorm
        self._enable_rewnorm = enable_rewnorm
        self._obs_alpha = obs_alpha
        self._rew_alpha = rew_alpha
        self._eps = eps
        self._flatobs_shape = [None for _ in env.agents]
        self._obs_mean = [None for _ in env.agents]
        self._obs_var = [None for _ in env.agents]
        self._rew_mean = [None for _ in env.agents]
        self._rew_var = [None for _ in env.agents]

        for agid, agent in enumerate(env.agents):
            if isinstance(agent.observation_space, spaces.Box):
                self._flatobs_shape[agid] = np.prod(agent.observation_space.shape)
            elif isinstance(env.observation_space, spaces.Discrete):
                self._flatobs_shape[agid] = agent.observation_space.n

            self._obs_mean[agid] = np.zeros(self._flatobs_shape[agid])
            self._obs_var[agid] = np.ones(self._flatobs_shape[agid])
            self._rew_mean[agid] = 0.
            self._rew_var[agid] = 1.
Ejemplo n.º 7
0
    def __init__(self, n_pursuers1, n_pursuers2, n_evaders1, n_evaders2, n_coop=2, n_poison=10, radius=0.015,
                 obstacle_radius=0.2, obstacle_loc=np.array([0.5, 0.5]), ev_speed=0.01,
                 poison_speed=0.01, n_sensors=30, sensor_range=0.2, action_scale=0.01,
                 poison_reward=-1., food_reward=1., encounter_reward=.05, control_penalty=-.5,
                 collision_penalty=-1, reward_mech='local', addid=True, speed_features=True, **kwargs):
        EzPickle.__init__(self, n_pursuers1, n_pursuers2, n_evaders1, n_evaders2, n_coop, n_poison, radius, obstacle_radius,
                          obstacle_loc, ev_speed, poison_speed, n_sensors, sensor_range,
                          action_scale, poison_reward, food_reward, encounter_reward,
                          control_penalty, reward_mech, addid, speed_features, **kwargs)
        self.n_pursuers1 = n_pursuers1
        self.n_pursuers2 = n_pursuers2
        self.n_evaders1 = n_evaders1
        self.n_evaders2 = n_evaders2
        self.n_coop = n_coop
        self.n_poison = n_poison
        self.obstacle_radius = obstacle_radius
        self.obstacle_loc = obstacle_loc
        self.poison_speed = poison_speed
        self.radius = radius
        self.ev_speed = ev_speed
        self.n_sensors = n_sensors
        self.sensor_range1 = np.ones(self.n_pursuers1) * sensor_range
        self.sensor_range2 = np.ones(self.n_pursuers2) * sensor_range
        self.action_scale = action_scale
        self.poison_reward = poison_reward
        self.food_reward = food_reward
        self.control_penalty = control_penalty
        self.collision_penalty = collision_penalty
        self.encounter_reward = encounter_reward

        self.n_obstacles = 1
        self._reward_mech = reward_mech
        self._addid = addid
        self._speed_features = speed_features
        self.seed()
        self._pursuers1 = [
            Archea(npu + 1, self.radius, self.n_sensors, self.sensor_range1[npu], addid=self._addid,
                   speed_features=self._speed_features) for npu in range(self.n_pursuers1)
        ]
        self._pursuers2 = [
            Archea(npu + 1, self.radius, self.n_sensors, self.sensor_range2[npu], addid=self._addid,
                   speed_features=self._speed_features) for npu in range(self.n_pursuers2)
        ]
        self._evaders1 = [
            Archea(nev + 1, self.radius * 2, self.n_pursuers1, self.sensor_range1.mean() / 2)
            for nev in range(self.n_evaders1)
        ]
        self._evaders2 = [
            Archea(nev + 1, self.radius * 2, self.n_pursuers1, self.sensor_range1.mean() / 2)
            for nev in range(self.n_evaders2)
        ]
        self._poisons = [
            Archea(npo + 1, self.radius * 3 / 4, self.n_poison, 0) for npo in range(self.n_poison)
        ]
Ejemplo n.º 8
0
    def __init__(self):
        num_agents = 1
        self.env_agents = [SimpleAgent() for _ in range(num_agents)]  # NEEDED
        # Internal
        self.n_agents = len(self.env_agents)

        EzPickle.__init__(self)

        self.seed()

        self.reset()
Ejemplo n.º 9
0
    def __init__(self,
                 n_walkers=2,
                 position_noise=1e-3,
                 angle_noise=1e-3,
                 reward_mech='local',
                 forward_reward=1.0,
                 fall_reward=-100.0,
                 drop_reward=-100.0,
                 terminate_on_fall=True):
        EzPickle.__init__(self, n_walkers, position_noise, angle_noise,
                          reward_mech, forward_reward, fall_reward,
                          drop_reward, terminate_on_fall)

        self.seed()
        self.viewer = None

        self.world = Box2D.b2World()
        self.terrain = None

        self.n_walkers = n_walkers
        init_x = TERRAIN_STEP * TERRAIN_STARTPAD / 2
        init_y = TERRAIN_HEIGHT + 2 * LEG_H
        self.start_x = [
            init_x + WALKER_SEPERATION * i * TERRAIN_STEP
            for i in xrange(self.n_walkers)
        ]
        self.walkers = [
            BipedalWalker(self.world, init_x=sx, init_y=init_y)
            for sx in self.start_x
        ]

        self.package_scale = n_walkers / 1.75
        self.package_length = PACKAGE_LENGTH / SCALE * self.package_scale

        self.total_agents = n_walkers

        self.prev_shaping = np.zeros(self.n_walkers)
        self.prev_package_shaping = 0.0

        self.position_noise = position_noise
        self.angle_noise = angle_noise
        self._reward_mech = reward_mech

        self.terrain_length = int(TERRAIN_LENGTH * n_walkers * 1 / 8.)

        self.forward_reward = forward_reward
        self.fall_reward = fall_reward
        self.drop_reward = drop_reward

        self.terminate_on_fall = terminate_on_fall

        self.reset()
Ejemplo n.º 10
0
	def __init__(self):
		self.env_agents = [SimpleAgent() for _ in range(3)] # NEEDED
		# Internal
		self.n_agents = len(self.env_agents)
		self.time_to_event_generator = lambda: np.random.weibull(1.5,1)[0]
		self.time_to_event = np.array([self.time_to_event_generator() for _ in self.env_agents])
		self.sojourn_time = np.array([ 0. for i in self.time_to_event])
		self.global_time = 0.

		EzPickle.__init__(self)

		self.seed()

		self.reset()
Ejemplo n.º 11
0
 def __init__(self, observation_space, action_space, hidden_spec,
              enable_obsnorm, min_stdev, init_logstdev, varscope_name):
     EzPickle.__init__(self, observation_space, action_space, hidden_spec,
                       enable_obsnorm, min_stdev, init_logstdev,
                       varscope_name)
     self.hidden_spec = hidden_spec
     self.min_stdev = min_stdev
     self.init_logstdev = init_logstdev
     self._dist = Gaussian(action_space.shape[0])
     super(GaussianMLPPolicy, self).__init__(
         observation_space,
         action_space,
         action_space.shape[0] * 2,  # Mean and diagonal stdev
         enable_obsnorm,
         varscope_name)
Ejemplo n.º 12
0
 def __getstate__(self):
     d = EzPickle.__getstate__(self)
     d['constraint_window'] = self.constraint_window
     d['n_evaders'] = self.n_evaders
     d['n_pursuers'] = self.n_pursuers
     d['catchr'] = self.catchr
     return d
Ejemplo n.º 13
0
    def __init__(self, n_good, n_hostages, n_bad, n_coop_save, n_coop_avoid, radius=0.015,
                 key_loc=None, bad_speed=0.01, n_sensors=30, sensor_range=0.2, action_scale=0.01,
                 save_reward=5., hit_reward=-1., encounter_reward=0.01, not_saved_reward=-3,
                 bomb_reward=-5., bomb_radius=0.05, key_radius=0.0075, control_penalty=-.1,
                 reward_mech='global', addid=True, **kwargs):
        """
        The environment consists of a square world with hostages behind gates. One of the good agent has to find the keys only then the gates can be obtained. Once the gates are opened the good agents need to find the hostages to save them. They also need to avoid the bomb and the bad agents. Coming across a bomb terminates the game and gives a large negative reward
        """
        EzPickle.__init__(self, n_good, n_hostages, n_bad, n_coop_save, n_coop_avoid, radius,
                          key_loc, bad_speed, n_sensors, sensor_range, action_scale, save_reward,
                          hit_reward, encounter_reward, not_saved_reward, bomb_reward, bomb_radius,
                          key_radius, control_penalty, reward_mech, addid, **kwargs)
        self.n_good = n_good
        self.n_hostages = n_hostages
        self.n_bad = n_bad
        self.n_coop_save = n_coop_save
        self.n_coop_avoid = n_coop_avoid
        self.radius = radius
        self.key_loc = key_loc
        self.key_radius = key_radius
        self.bad_speed = bad_speed
        self.n_sensors = n_sensors
        self.sensor_range = np.ones(self.n_good) * sensor_range if isinstance(
            sensor_range, float) else sensor_range
        self.action_scale = action_scale
        self.save_reward = save_reward
        self.hit_reward = hit_reward
        self.encounter_reward = encounter_reward
        self.not_saved_reward = not_saved_reward
        self.bomb_reward = bomb_reward
        self.bomb_radius = bomb_radius
        self.control_penalty = control_penalty
        self._reward_mech = reward_mech
        self._addid = addid
        self.seed()

        self._rescuers = [CircAgent(agid + 1, self.radius, self.n_sensors, self.sensor_range[agid],
                                    addid=self._addid) for agid in range(self.n_good)]

        self._criminals = [
            CircAgent(agid + 1, self.radius, self.n_sensors, self.sensor_range.mean())
            for agid in range(self.n_bad)
        ]

        self._hostages = [CircAgent(agid + 1, self.radius * 2, self.n_sensors,
                                    self.sensor_range.min()) for agid in range(self.n_hostages)]
Ejemplo n.º 14
0
 def __init__(self, observation_space, action_space, hidden_spec,
              enable_obsnorm, min_stdev, init_logstdev,
              state_include_action, varscope_name):
     EzPickle.__init__(self, observation_space, action_space, hidden_spec,
                       enable_obsnorm, min_stdev, init_logstdev,
                       state_include_action, varscope_name)
     self.hidden_spec = hidden_spec
     self.min_stdev = min_stdev
     self.init_logstdev = init_logstdev
     self.state_include_action = state_include_action  # TODO add to stochastic policy
     self._dist = RecurrentGaussian(action_space.shape[0])
     self.prev_actions = None
     self.prev_hiddens = None
     super(GaussianGRUPolicy, self).__init__(
         observation_space,
         action_space,
         action_space.shape[0] * 2,  # Mean and diagonal stdev
         enable_obsnorm,
         varscope_name)
Ejemplo n.º 15
0
    def __init__(self, n_pursuers, n_evaders, n_coop=2, n_poison=10, radius=0.015,
                 obstacle_radius=0.2, obstacle_loc=np.array([0.5, 0.5]), ev_speed=0.01,
                 poison_speed=0.01, n_sensors=30, sensor_range=0.2, action_scale=0.01,
                 poison_reward=-1., food_reward=1., encounter_reward=.05, control_penalty=-.5,
                 reward_mech='local', addid=True, speed_features=True, **kwargs):
        EzPickle.__init__(self, n_pursuers, n_evaders, n_coop, n_poison, radius, obstacle_radius,
                          obstacle_loc, ev_speed, poison_speed, n_sensors, sensor_range,
                          action_scale, poison_reward, food_reward, encounter_reward,
                          control_penalty, reward_mech, addid, speed_features, **kwargs)
        self.n_pursuers = n_pursuers
        self.n_evaders = n_evaders
        self.n_coop = n_coop
        self.n_poison = n_poison
        self.obstacle_radius = obstacle_radius
        self.obstacle_loc = obstacle_loc
        self.poison_speed = poison_speed
        self.radius = radius
        self.ev_speed = ev_speed
        self.n_sensors = n_sensors
        self.sensor_range = np.ones(self.n_pursuers) * sensor_range
        self.action_scale = action_scale
        self.poison_reward = poison_reward
        self.food_reward = food_reward
        self.control_penalty = control_penalty
        self.encounter_reward = encounter_reward

        self.n_obstacles = 1
        self._reward_mech = reward_mech
        self._addid = addid
        self._speed_features = speed_features
        self.seed()
        self._pursuers = [
            Archea(npu + 1, self.radius, self.n_sensors, self.sensor_range[npu], addid=self._addid,
                   speed_features=self._speed_features) for npu in range(self.n_pursuers)
        ]
        self._evaders = [
            Archea(nev + 1, self.radius * 2, self.n_pursuers, self.sensor_range.mean() / 2)
            for nev in range(self.n_evaders)
        ]
        self._poisons = [
            Archea(npo + 1, self.radius * 3 / 4, self.n_poison, 0) for npo in range(self.n_poison)
        ]
Ejemplo n.º 16
0
    def __init__(self):

        self.discount = CT_DISCOUNT_RATE

        num_row_col = 1

        self.n_agents = num_row_col**2
        self.max_stop_time = 100  # seconds
        self.min_stop_time = 2  # seconds
        # specify connectivity as East to West across row, North to South across column
        self.connectivity = np.array(list(range(self.n_agents))).reshape(
            (num_row_col, num_row_col))

        # Assigned on reset()
        self.env_agents = [None for _ in range(self.n_agents)]  # NEEDED
        self.simpy_env = None
        self.agent_event_list = [None] * self.n_agents

        EzPickle.__init__(self)
        self.seed()
        self.reset()
Ejemplo n.º 17
0
    def __init__(self,
                 n_walkers=2,
                 position_noise=1e-3,
                 angle_noise=1e-3,
                 reward_mech='local',
                 forward_reward=1.0,
                 fall_reward=-100.0,
                 drop_reward=-100.0,
                 terminate_on_fall=True,
                 one_hot=False):
        EzPickle.__init__(self, n_walkers, position_noise, angle_noise,
                          reward_mech, forward_reward, fall_reward,
                          drop_reward, terminate_on_fall, one_hot)

        self.n_walkers = n_walkers
        self.position_noise = position_noise
        self.angle_noise = angle_noise
        self._reward_mech = reward_mech
        self.forward_reward = forward_reward
        self.fall_reward = fall_reward
        self.drop_reward = drop_reward
        self.terminate_on_fall = terminate_on_fall
        self.one_hot = one_hot
        self.setup()
Ejemplo n.º 18
0
    def __init__(self, 
                 continuous_action_space=True,
                 n_agents=MIN_AGENTS,
                 constant_n_agents=True,
                 training_mode='circle', 
                 sensor_mode='closest',
                 sensor_capacity=SENSOR_CAPACITY,
                 max_time_steps=MAX_TIME_STEPS,
                 one_hot=False,
                 render_option=False,
                 speed_noise=1e-3,
                 position_noise=1e-3, 
                 angle_noise=1e-3, 
                 reward_mech='local',
                 rew_arrival=15,
                 rew_closing=2.5,
                 rew_nmac=-15,
                 rew_large_turnrate=-0.1,
                 rew_large_acc=-1,
                 pen_action_heavy=True,
                 random_mode=True):

        EzPickle.__init__(self, continuous_action_space, n_agents, constant_n_agents,
                 training_mode, sensor_mode,sensor_capacity, max_time_steps, one_hot,
                 render_option, speed_noise, position_noise, angle_noise, reward_mech,
                 rew_arrival, rew_closing, rew_nmac, rew_large_turnrate, rew_large_acc,
                 pen_action_heavy, random_mode)

        self.t = 0
        self.aircraft = []
        self.n_agents = n_agents
        self.continuous_action_space = continuous_action_space
        self.constant_n_agents = constant_n_agents
        self.training_mode = training_mode
        self.sensor_mode = sensor_mode
        self.sensor_capacity = sensor_capacity
        self.max_time_steps = max_time_steps
        self.one_hot = one_hot
        self.render_option = render_option
        self.circle_radius = random.choice(range(MIN_CIRCLE_RADIUS, MAX_CIRCLE_RADIUS))
        # Observation noises:
        self.speed_noise = 1e-3
        self.position_noise = 1e-3
        self.angle_noise = 1e-3
        # Reward settings:
        self._reward_mech = reward_mech
        self.rew_arrival = rew_arrival
        self.rew_closing = rew_closing
        self.rew_nmac = rew_nmac
        self.rew_large_turnrate = rew_large_turnrate
        self.rew_large_acc = rew_large_acc
        self.pen_action_heavy = pen_action_heavy
        self.random_mode = random_mode

        self.observation_space = \
            spaces.Box(low=-1, high=1, shape=(OWN_OBS_DIM + PAIR_OBS_DIM * self.sensor_capacity, ))
        if self.continuous_action_space:
            self.action_space = spaces.Box(low=-1, high=1, shape=(ACTION_DIM,))
        else:
            self.action_space = spaces.Discrete(DISC_ACTION_DIM)

        self.seed()
Ejemplo n.º 19
0
    def __init__(self, map_pool, **kwargs):
        EzPickle.__init__(self, map_pool, **kwargs)
        """
        In evade purusit a set of pursuers must 'tag' a set of evaders
        Required arguments:
        - map_matrix: the map on which agents interact

        Optional arguments:
        - Ally layer: list of pursuers
        Opponent layer: list of evaders
        Ally controller: stationary policy of ally pursuers
        Ally controller: stationary policy of opponent evaders
        map_matrix: the map on which agents interact
        catchr: reward for 'tagging' a single evader
        caughtr: reward for getting 'tagged' by a pursuer
        train_pursuit: flag indicating if we are simulating pursuers or evaders
        initial_config: dictionary of form
            initial_config['allies']: the initial ally confidguration (matrix)
            initial_config['opponents']: the initial opponent confidguration (matrix)
        """

        self.sample_maps = kwargs.pop('sample_maps', False)

        self.map_pool = map_pool
        map_matrix = map_pool
        self.map_matrix = map_matrix
        zs, xs, ys = self.map_matrix.shape
        self.xs = xs
        self.ys = ys
        self.zs = zs

        self._reward_mech = kwargs.pop('reward_mech', 'global')

        self.n_evaders = kwargs.pop('n_evaders', 1)
        self.n_pursuers = kwargs.pop('n_pursuers', 1)

        self.obs_range = kwargs.pop('obs_range', 3)  # can see 3 grids around them by default
        #assert self.obs_range % 2 != 0, "obs_range should be odd"
        self.obs_offset = int((self.obs_range - 1) / 2)

        self.flatten = kwargs.pop('flatten', True)

        self.pursuers = agent_utils.create_agents(self.n_pursuers, map_matrix, self.obs_range,
                                                  flatten=self.flatten)
        self.evaders = agent_utils.create_agents(self.n_evaders, map_matrix, self.obs_range,
                                                 flatten=self.flatten)

        self.pursuer_layer = kwargs.pop('ally_layer', AgentLayer(xs, ys, zs, self.pursuers))
        self.evader_layer = kwargs.pop('opponent_layer', AgentLayer(xs, ys, zs, self.evaders))

        self.layer_norm = kwargs.pop('layer_norm', 10)

        self.n_catch = kwargs.pop('n_catch', 2)

        self.random_opponents = kwargs.pop('random_opponents', False)
        self.max_opponents = kwargs.pop('max_opponents', 10)

        n_act_purs = self.pursuer_layer.get_nactions(0)
        n_act_ev = self.evader_layer.get_nactions(0)

        self.evader_controller = kwargs.pop('evader_controller', RandomPolicy(n_act_purs))
        self.pursuer_controller = kwargs.pop('pursuer_controller', RandomPolicy(n_act_ev))

        self.current_agent_layer = np.zeros((zs, xs, ys), dtype=np.int32)

        self.catchr = kwargs.pop('catchr', 0.01)
        self.caughtr = kwargs.pop('caughtr', -0.01)

        self.term_pursuit = kwargs.pop('term_pursuit', 5.0)
        self.term_evade = kwargs.pop('term_evade', -5.0)

        self.urgency_reward = kwargs.pop('urgency_reward', 0.0)

        self.include_id = kwargs.pop('include_id', True)

        self.ally_actions = np.zeros(n_act_purs, dtype=np.int32)
        self.opponent_actions = np.zeros(n_act_ev, dtype=np.int32)

        self.train_pursuit = kwargs.pop('train_pursuit', True)

        if self.train_pursuit:
            self.low = np.array([0.0 for i in range(3 * self.obs_range**2)])
            self.high = np.array([1.0 for i in range(3 * self.obs_range**2)])
            if self.include_id:
                self.low = np.append(self.low, 0.0)
                self.high = np.append(self.high, 1.0)
            self.action_space = spaces.Discrete(n_act_purs)
            if self.flatten:
                self.observation_space = spaces.Box(self.low, self.high)
            else:
                self.observation_space = spaces.Box(low=-np.inf, high=np.inf,
                                                    shape=(4, self.obs_range, self.obs_range))
            self.local_obs = np.zeros(
                (self.n_pursuers, 4, self.obs_range, self.obs_range))  # Nagents X 3 X xsize X ysize
            self.act_dims = [n_act_purs for i in range(self.n_pursuers)]
        else:
            self.low = np.array([0.0 for i in range(3 * self.obs_range**2)])
            self.high = np.array([1.0 for i in range(3 * self.obs_range**2)])
            if self.include_id:
                np.append(self.low, 0.0)
                np.append(self.high, 1.0)
            self.action_space = spaces.Discrete(n_act_ev)
            if self.flatten:
                self.observation_space = spaces.Box(self.low, self.high)
            else:
                self.observation_space = spaces.Box(low=-np.inf, high=np.inf,
                                                    shape=(4, self.obs_range, self.obs_range))
            self.local_obs = np.zeros(
                (self.n_evaders, 4, self.obs_range, self.obs_range))  # Nagents X 3 X xsize X ysize
            self.act_dims = [n_act_purs for i in range(self.n_evaders)]
        self.pursuers_gone = np.array([False for i in range(self.n_pursuers)])
        self.evaders_gone = np.array([False for i in range(self.n_evaders)])

        self.initial_config = kwargs.pop('initial_config', {})

        self.surround = kwargs.pop('surround', True)

        self.constraint_window = kwargs.pop('constraint_window', 1.0)

        self.curriculum_remove_every = kwargs.pop('curriculum_remove_every', 500)
        self.curriculum_constrain_rate = kwargs.pop('curriculum_constrain_rate', 0.0)
        self.curriculum_turn_off_shaping = kwargs.pop('curriculum_turn_off_shaping', np.inf)

        self.surround_mask = np.array([[-1, 0, 0], [1, 0, 0], [0, 1, 0], [0, -1, 0]])

        self.model_state = np.zeros((4,) + map_matrix.shape, dtype=np.float32)
Ejemplo n.º 20
0
 def __setstate__(self, d):
     EzPickle.__setstate__(self, d)
     self._obs_mean = d['_obs_mean']
     self._obs_var = d['_obs_var']
Ejemplo n.º 21
0
 def __getstate__(self):
     d = EzPickle.__getstate__(self)
     d['_obs_mean'] = self._obs_mean
     d['_obs_var'] = self._obs_var
     return d
Ejemplo n.º 22
0
 def __getstate__(self):
     d = EzPickle.__getstate__(self)
     d['_obs_mean'] = self._obs_mean
     d['_obs_var'] = self._obs_var
     return d
Ejemplo n.º 23
0
    def __init__(self, map_pool, **kwargs):
        # kwargs = dictionary where you can pop key of size 1 off to define term
        # if present, assign value and if not use default
        EzPickle.__init__(self, map_pool, **kwargs)

        #initialize map, observation, reward
        self.sample_maps = kwargs.pop('sample_maps', False)
        self.map_pool = map_pool
        map_matrix = map_pool[0]
        self.map_matrix = map_matrix
        xs, ys = self.map_matrix.shape
        self.xs = xs
        self.ys = ys
        self._reward_mech = kwargs.pop('reward_mech', 'global')
        self.obs_range = kwargs.pop(
            'obs_range', 3)  # can see 3 grids around them by default

        #assert self.obs_range % 2 != 0, "obs_range should be odd"
        self.obs_offset = int((self.obs_range - 1) / 2)
        self.flatten = kwargs.pop('flatten', True)

        #initalize agents
        self.n_surveillances = kwargs.pop('n_surveillances', 1)
        self.n_snipers = kwargs.pop('n_snipers', 1)
        self.n_targets = kwargs.pop('n_targets', 1)

        #self.agents = list of single agent entities that define how it should move given inputs
        #helper function for creating list
        self.surveillances = agent_utils.create_agents(self.n_surveillances,
                                                       map_matrix,
                                                       self.obs_range,
                                                       flatten=self.flatten)
        self.snipers = agent_utils.create_agents(self.n_snipers,
                                                 map_matrix,
                                                 self.obs_range,
                                                 flatten=self.flatten)
        self.targets = agent_utils.create_agents(self.n_targets,
                                                 map_matrix,
                                                 self.obs_range,
                                                 flatten=self.flatten)

        self.surveillance_layer = AgentLayer(xs, ys, self.surveillances)
        self.sniper_layer = AgentLayer(xs, ys, self.snipers)
        self.target_layer = AgentLayer(xs, ys, self.targets)

        n_act = self.sniper_layer.get_nactions(0)
        self.sniper_controller = kwargs.pop('sniper_controller',
                                            RandomPolicy(n_act))
        self.target_controller = kwargs.pop('target_controller',
                                            RandomPolicy(n_act))

        self.sniper_r = kwargs.pop('term_sniper', -1.0)
        self.target_r = kwargs.pop('term_evade', 0.1)
        self.urgency_reward = kwargs.pop('urgency_reward', 0.0)

        # initialize remainder of state
        self.layer_norm = kwargs.pop('layer_norm', 10)

        self.current_agent_layer = np.zeros((xs, ys), dtype=np.int32)
        self.include_id = kwargs.pop('include_id', True)
        self.surveillance_actions = np.zeros(n_act, dtype=np.int32)
        self.sniper_actions = np.zeros(n_act, dtype=np.int32)
        self.target_actions = np.zeros(n_act, dtype=np.int32)

        # set up the action and observation spaces
        self.low = np.array([0.0 for i in xrange(4 * self.obs_range**2)])
        self.high = np.array([1.0 for i in xrange(4 * self.obs_range**2)])
        if self.include_id:
            self.low = np.append(self.low, 0.0)
            self.high = np.append(self.high, 1.0)
        self.action_space = spaces.Discrete(n_act)
        if self.flatten:
            self.observation_space = spaces.Box(self.low, self.high)
        else:
            self.observation_space = spaces.Box(low=-np.inf,
                                                high=np.inf,
                                                shape=(5, self.obs_range,
                                                       self.obs_range))
        self.local_obs = np.zeros(
            (self.n_surveillances, 5, self.obs_range,
             self.obs_range))  # Nagents X 4 X xsize X ysize
        self.act_dims = [n_act for i in xrange(self.n_surveillances)]

        #more state set up
        self.initial_config = kwargs.pop('initial_config', {})
        self.constraint_window = kwargs.pop('constraint_window', 1.0)
        self.curriculum_remove_every = kwargs.pop('curriculum_remove_every',
                                                  500)
        self.curriculum_constrain_rate = kwargs.pop(
            'curriculum_constrain_rate', 0.0)
        self.curriculum_turn_off_shaping = kwargs.pop(
            'curriculum_turn_off_shaping', np.inf)
        self.surround = kwargs.pop('surround', True)
        self.surround_mask = np.array([[-1, 0], [1, 0], [0, 1], [0, -1]])

        #layers of state
        #layer 1: buildings
        #layer 2: snipers
        #layer 3: targets
        #layer 4: suveillance
        #layer 5: irrelevant
        self.model_state = np.zeros((5, ) + map_matrix.shape, dtype=np.float32)

        #################################################################
        # The functions below are the interface with MultiAgentSiulator #
        #################################################################

        @property
        def agents(self):
            return self.surveillances
Ejemplo n.º 24
0
    def __init__(self,
                 radius=0.015,
                 obstacle_radius=0.2,
                 obstacle_loc=np.array([0.5, 0.5]),
                 ev_speed=0.01,
                 n_sensors=20,
                 sensor_range=2,
                 action_scale=0.01,
                 food_reward=10,
                 encounter_reward=.05,
                 control_penalty=-0.1,
                 evader_params=np.array([0.1, 0.05]),
                 speed_features=True,
                 is_observability_full=False,
                 max_velocity_pursuer=0.05,
                 meta_learning=False,
                 **kwargs):
        EzPickle.__init__(self, radius, obstacle_radius, obstacle_loc,
                          ev_speed, n_sensors, sensor_range, action_scale,
                          food_reward, encounter_reward, control_penalty,
                          evader_params, speed_features, is_observability_full,
                          max_velocity_pursuer, meta_learning, **kwargs)
        self.obstacle_radius = obstacle_radius
        self.obstacle_loc = obstacle_loc
        self.ev_speed = 0.05 * (1 - evader_params[0])
        self.n_sensors = n_sensors
        self.sensor_range = np.ones(1) * sensor_range
        self.radius = radius
        self.action_scale = action_scale
        self.food_reward = food_reward
        self.encounter_reward = encounter_reward
        self.control_penalty = control_penalty
        self.n_obstacles = 1
        self._speed_features = speed_features
        self.seed()
        self._pursuer = Archea(1,
                               self.radius,
                               self.n_sensors,
                               self.sensor_range,
                               is_observability_full,
                               speed_features=True)
        self._evader = Archea(1,
                              self.radius,
                              self.n_sensors,
                              self.sensor_range,
                              is_observability_full,
                              speed_features=True)
        self._food = Archea(1,
                            self.radius * 0.75,
                            self.n_sensors,
                            self.sensor_range,
                            is_observability_full,
                            speed_features=True)
        self._pursuers = [self._pursuer]
        self.evader_params = evader_params
        self._meta_learning = meta_learning
        self.max_velocity_pursuer = max_velocity_pursuer
        if self._meta_learning:
            self.evader_params[0] = truncnorm.rvs(-2, 2, loc=0.5, scale=0.25)
            while self.evader_params[0] == 0:
                self.evader_params[0] = truncnorm.rvs(-2,
                                                      2,
                                                      loc=0.5,
                                                      scale=0.25)

        self.is_observability_full = is_observability_full
        self._evader_move = False
Ejemplo n.º 25
0
 def __setstate__(self, d):
     EzPickle.__setstate__(self, d)
     self._obs_mean = d['_obs_mean']
     self._obs_var = d['_obs_var']
Ejemplo n.º 26
0
    def __init__(self,
                 n_pursuers,
                 n_evaders,
                 n_coop=2,
                 n_poison=10,
                 radius=0.015,
                 obstacle_radius=0.2,
                 obstacle_loc=np.array([0.5, 0.5]),
                 ev_speed=0.01,
                 poison_speed=0.01,
                 n_sensors=30,
                 sensor_range=0.2,
                 action_scale=0.01,
                 poison_reward=-1.,
                 food_reward=1.,
                 encounter_reward=.05,
                 control_penalty=-.5,
                 reward_mech='local',
                 addid=True,
                 speed_features=True,
                 **kwargs):
        EzPickle.__init__(self, n_pursuers, n_evaders, n_coop, n_poison,
                          radius, obstacle_radius, obstacle_loc, ev_speed,
                          poison_speed, n_sensors, sensor_range, action_scale,
                          poison_reward, food_reward, encounter_reward,
                          control_penalty, reward_mech, addid, speed_features,
                          **kwargs)
        self.n_pursuers = n_pursuers
        self.n_evaders = n_evaders
        self.n_coop = n_coop
        self.n_poison = n_poison
        self.obstacle_radius = obstacle_radius
        self.obstacle_loc = obstacle_loc
        self.poison_speed = poison_speed
        self.radius = radius
        self.ev_speed = ev_speed
        self.n_sensors = n_sensors
        self.sensor_range = np.ones(self.n_pursuers) * sensor_range
        self.action_scale = action_scale
        self.poison_reward = poison_reward
        self.food_reward = food_reward
        self.control_penalty = control_penalty
        self.encounter_reward = encounter_reward

        self.n_obstacles = 1
        self._reward_mech = reward_mech
        self._addid = addid
        self._speed_features = speed_features
        self.seed()
        if kwargs.get('seed') != None:
            self.seed(kwargs['seed'])
        self._pursuers = [
            Archea(npu + 1,
                   self.radius * 1.,
                   self.n_sensors,
                   self.sensor_range[npu],
                   addid=self._addid,
                   speed_features=self._speed_features,
                   ally=self.n_pursuers) for npu in range(self.n_pursuers)
        ]
        self._evaders = [
            Archea(nev + 1, self.radius * 1., self.n_pursuers,
                   self.sensor_range.mean() / 2)
            for nev in range(self.n_evaders)
        ]
        self._poisons = [
            Archea(npo + 1, self.radius * 1., self.n_poison, 0)
            for npo in range(self.n_poison)
        ]
        self.action_space, self.observation_space = [], []
        for a_i in range(self.n_pursuers):
            self.action_space.append(self.agents[a_i].action_space)
            self.observation_space.append(self.agents[a_i].observation_space)