def observation(self, agent, world): def communications_observed(other_agent): ''' fill in information communicated between agents ''' comms = delta_pos(other_agent, agent).tolist() comms += [other_agent.state.c] # will only work with zero-padding formatting # TODO: I think non-communication should send None instead of zero, because zero has real meaning # however this causes a problem with action function if distance(agent, other_agent) > world.max_communication_distance: comms = [0] * len(comms) return comms landmark_positions = format_observation( observe=lambda landmark: delta_pos(landmark, agent).tolist(), objects=world.landmarks, num_observations=len(world.landmarks), observation_size=world.dim_p) communications = format_observation( observe=communications_observed, objects=[ a for a in world.agents if (a is not agent and not a.terminated) ], num_observations=self.num_agents, observation_size=world.dim_p + 1, sort_key=lambda o: distance(agent, o)) return np.asarray(agent.state.p_pos.tolist() + landmark_positions + communications)
def observation(self, agent, world): # get positions of all entities in this agent's reference frame def observe_agents(other_agent): ''' fill in information communicated/observed between agents ''' ag_obs = delta_pos(other_agent, agent).tolist() # check if within observation range if distance(agent, other_agent) > world.max_communication_distance: ag_obs = [0] * len(ag_obs) return ag_obs def observe_landmarks(landmark): ''' fill in information observed about landmarks ''' ld_obs = delta_pos(landmark, agent).tolist() # check if within observation range and is observable if (distance(landmark, agent) > world.max_communication_distance or not landmark.is_observable()): ld_obs = [0] * len(ld_obs) return ld_obs landmark_positions = format_observation(observe=observe_landmarks, objects=world.landmarks, num_observations=len( world.landmarks), observation_size=world.dim_p) communications = format_observation( observe=observe_agents, objects=[ a for a in world.agents if (a is not agent and not a.terminated) ], num_observations=self.num_agents, observation_size=world.dim_p, sort_key=lambda o: distance(agent, o)) obs = np.asarray(agent.state.p_pos.tolist() + landmark_positions + communications) if agent.terminated: # if agent is terminated, return all zeros for observation # TODO: make this more efficient. Right now it does a lot of unnecessary calcs which are all # then set to zero. Done this way to ensure consistant array size obs = 0.0 * obs return obs
def observation(self, agent, world): # get positions of all entities in this agent's reference frame def observe_landmarks(landmark): ''' fill in information observed about landmarks ''' # ld_obs = delta_pos(landmark, agent).tolist() ld_obs = landmark.state.p_pos.tolist() # check if landmark is giving reward or hazard warning d = distance(landmark, agent) if d < landmark.size: if landmark.is_hazard: landmark.hazard_tag = 1.0 landmark.color = np.array([1.1, 0, 0]) ld_obs += [landmark.hazard_tag] assert (len(ld_obs) == _LANDMARK_OBSERVATION_LEN) return ld_obs landmark_positions = format_observation( observe=observe_landmarks, objects=world.landmarks, num_observations=len(world.landmarks), observation_size=_LANDMARK_OBSERVATION_LEN) # new_obs = np.asarray(agent.state.p_vel.tolist() + agent.state.p_pos.tolist() + landmark_positions + agent_observations) new_obs = np.asarray(agent.state.p_vel.tolist() + agent.state.p_pos.tolist() + landmark_positions) return new_obs
def observation(self, agent, world): # get positions of all entities in this agent's reference frame def communications_observed(other_comm_node): ''' Communication between agents is just the conductance Notes: - inverse of comm resistance (i.e. conductance) used so that we can use zero for out of range comms - noisy measurement of heading - TODO: observation of failures ''' # check if node is terminated is_terminated = 0 if isinstance(other_comm_node, MortalAgent) and other_comm_node.terminated: is_terminated = 1 dx = dy = dvx = dvy = 0. if not is_terminated: dx, dy = delta_pos(other_comm_node, agent) dvx, dvy = delta_vel(other_comm_node, agent) comms = [is_terminated, dx, dy, dvx, dvy] # dx_noisy = dx + np.random.normal(0.0, 0.01) # dy_noisy = dy + np.random.normal(0.0, 0.01) # comms = [dx_noisy, dy_noisy] # heading = np.arctan2(dy,dx) + np.random.normal(0.0, 0.1) # conductance = 1.0/self._calculate_resistance(agent, other_comm_node, world) # comms = [heading, conductance] # set comms to zero if out for range # if distance(agent, other_comm_node) > agent.max_observation_distance: # comms = [0] * len(comms) return comms # comm_nodes are origin and destination terminals and unterminated agents comm_nodes = world.landmarks[0:2] comm_nodes.extend([a for a in world.agents if a is not agent]) communications = format_observation(observe = communications_observed, objects = comm_nodes, num_observations = (self.num_agents-1) + 2, observation_size = 2*world.dim_p + 1) # observe velocity if not terminated vel_obs = [0, 0] if not agent.terminated: vel_obs = agent.state.p_vel.tolist() # package observation obs = np.asarray(vel_obs + agent.state.p_pos.tolist() + communications) if agent.terminated: # if agent is terminated, return all zeros for observation # TODO: make this more efficient. Right now it does a lot of unnecessary calcs which are all # then set to zero. Done this way to ensure consistant array size obs = 0.0*obs return obs
def observation(self, agent, world): # get positions of all entities in this agent's reference frame landmark_positions = format_observation(observe = lambda landmark: delta_pos(agent, landmark).tolist(), objects = world.landmarks, num_observations = len(world.landmarks), observation_size = world.dim_p) obs = np.asarray(agent.state.p_pos.tolist() + landmark_positions) return obs
def observation(self, agent, world): # get positions of all entities in this agent's reference frame def communications_observed(other_agent): ''' fill in information communicated between agents ''' comms = delta_pos(other_agent, agent).tolist() comms += [ self._calculate_landmark_resistance_gain( agent, other_agent, world) ] # will only work with zero-padding formatting # TODO: I think non-communication should send None instead of zero, because zero has real meaning # however this causes a problem with action function if distance(agent, other_agent) > world.max_communication_distance: comms = [0] * len(comms) return comms landmark_positions = format_observation( observe=lambda landmark: delta_pos(landmark, agent).tolist(), objects=world.landmarks, num_observations=len(world.landmarks), observation_size=world.dim_p) communications = format_observation( observe=communications_observed, objects=[ a for a in world.agents if (a is not agent and not a.terminated) ], num_observations=self.num_agents, observation_size=world.dim_p + 1, sort_key=lambda o: distance(agent, o)) obs = np.asarray(agent.state.p_pos.tolist() + landmark_positions + communications) if agent.terminated: # if agent is terminated, return all zeros for observation # TODO: make this more efficient. Right now it does a lot of unnecessary calcs which are all # then set to zero. Done this way to ensure consistant array size obs = 0.0 * obs return obs
def _direct_observation(self, agent, world): # get positions of all entities in this agent's reference frame def communications_observed(other_comm_node): ''' Communication between agents is just the conductance Notes: - inverse of comm resistance (i.e. conductance) used so that we can use zero for out of range comms - noisy measurement of heading - TODO: observation of failures ''' # check if node is terminated is_terminated = 0 if isinstance(other_comm_node, MortalAgent) and other_comm_node.terminated: is_terminated = 1 dx = dy = dvx = dvy = 0. if not is_terminated: dx, dy = delta_pos(other_comm_node, agent) dvx, dvy = delta_vel(other_comm_node, agent) comms = [is_terminated, dx, dy, dvx, dvy] # set comms to zero if out for range # if distance(agent, other_comm_node) > agent.max_observation_distance: if not check_2way_communicability(agent, other_comm_node): comms = [0] * len(comms) return comms # Observe communication terminals terminals = (world.origin_terminal_landmark.state.p_pos.tolist() + world.destination_terminal_landmark.state.p_pos.tolist()) # comm_nodes are origin and destination terminals and unterminated agents comm_nodes = [] comm_nodes.extend([a for a in world.agents if a is not agent]) communications = format_observation( observe=communications_observed, objects=comm_nodes, num_observations=self.num_agents - 1, observation_size=2 * world.dim_p + 1) # package observation obs = np.asarray([agent.terminated] + agent.state.p_vel.tolist() + agent.state.p_pos.tolist() + terminals + communications) return obs
def test_format_observation_1(self): ''' test observation formatting basic functionality ''' # trial 1 ag = [0,0] a1 = [1,1] a2 = [-1.0, 1.0] observe = lambda a: [a[0]-ag[0], a[1]-ag[1]] objects = [a1, a2] num_observations = 2 observation_size = 2 observation = ol.format_observation(observe=observe, objects=objects, num_observations=num_observations, observation_size=observation_size) self.assertEqual(len(observation), 4) self.assertAlmostEqual(observation[0], 1.0) self.assertAlmostEqual(observation[1], 1.0) self.assertAlmostEqual(observation[2], -1.0) self.assertAlmostEqual(observation[3], 1.0)
def test_format_observation_2(self): ''' test observation formatting for padding ''' # trial 1 ag = [0,0] a1 = [1,1] a2 = [-1.0, 1.0] observe = lambda a: [a[0]-ag[0], a[1]-ag[1]] objects = [a1, a2] num_observations = 5 observation_size = 2 observation = ol.format_observation(observe=observe, objects=objects, num_observations=num_observations, observation_size=observation_size) self.assertEqual(len(observation), 10) self.assertAlmostEqual(observation[0], 1.0) self.assertAlmostEqual(observation[1], 1.0) self.assertAlmostEqual(observation[2], -1.0) self.assertAlmostEqual(observation[3], 1.0) for i in range(4, len(observation)): self.assertAlmostEqual(observation[i], 0.0)
def _direct_observation(self, agent, world): # get positions of all entities in this agent's reference frame def observe_agents(other_agent): ''' fill in information communicated/observed between agents ''' # check if node is terminated is_terminated = 0 if isinstance(other_agent, MortalAgent) and other_agent.terminated: is_terminated = 1 # relative speed and position of other agent # dx = dy = dvx = dvy = 0. # if not is_terminated: dx, dy = delta_pos(other_agent, agent) dvx, dvy = delta_vel(other_agent, agent) # get local reward function at position of other agent other_landmark_sensor_reading = 0.0 for lm in world.landmarks: other_landmark_sensor_reading += lm.reward_fn.get_value( *other_agent.state.p_pos) ag_obs = [ is_terminated, dx, dy, dvx, dvy, other_landmark_sensor_reading ] assert (len(ag_obs) == _AGENT_OBSERVATION_LEN) return ag_obs def observe_landmarks(landmark): ''' fill in information observed about landmarks ''' # ld_obs = delta_pos(landmark, agent).tolist() # # check if within observation range and is observable # d = distance(landmark, agent) # if d > world.max_communication_distance: # ld_obs = [0.0]*len(ld_obs) ld_obs = [] # check reward signal received from landmark landmark_sensor_reading = 0.0 for lm in world.landmarks: landmark_sensor_reading += lm.reward_fn.get_value( *agent.state.p_pos) ld_obs += [landmark_sensor_reading] assert (len(ld_obs) == _LANDMARK_OBSERVATION_LEN) return ld_obs landmark_observations = format_observation( observe=observe_landmarks, objects=world.landmarks, num_observations=len(world.landmarks), observation_size=_LANDMARK_OBSERVATION_LEN) agent_observations = format_observation( observe=observe_agents, objects=[a for a in world.agents if a is not agent], num_observations=self.num_agents - 1, observation_size=_AGENT_OBSERVATION_LEN) new_obs = np.asarray([agent.terminated] + agent.state.p_vel.tolist() + agent.state.p_pos.tolist() + landmark_observations + agent_observations) # append previous observation for velocity estimation if agent.previous_observation is None: agent.previous_observation = 0.0 * new_obs obs = np.append(new_obs, agent.previous_observation) agent.previous_observation = new_obs return obs
def _direct_observation(self, agent, world): ''' observation where each entity's state has it's own component of observe vector ''' # get positions of all entities in this agent's reference frame def observe_agents(other_agent): ''' fill in information communicated/observed between agents ''' # check if node is terminated is_terminated = 0 if isinstance(other_agent, MortalAgent) and other_agent.terminated: is_terminated = 1 dx = dy = dvx = dvy = 0. if not is_terminated: dx, dy = delta_pos(other_agent, agent) dvx, dvy = delta_vel(other_agent, agent) ag_obs = [is_terminated, dx, dy, dvx, dvy] assert (len(ag_obs) == _AGENT_OBSERVATION_LEN) return ag_obs def observe_landmarks(landmark): ''' fill in information observed about landmarks ''' ld_obs = delta_pos(landmark, agent).tolist() # check if within observation range and is observable d = distance(landmark, agent) if d > world.max_communication_distance: ld_obs = [0.0] * len(ld_obs) # check if landmark is giving reward or hazard warning if d < landmark.size: if landmark.is_hazard: landmark.hazard_tag = 1.0 landmark.color = np.array([1.1, 0, 0]) ld_obs += [landmark.hazard_tag] assert (len(ld_obs) == _LANDMARK_OBSERVATION_LEN) return ld_obs landmark_positions = format_observation( observe=observe_landmarks, objects=world.landmarks, num_observations=len(world.landmarks), observation_size=_LANDMARK_OBSERVATION_LEN) agent_observations = format_observation( observe=observe_agents, objects=[a for a in world.agents if (a is not agent)], num_observations=self.num_agents - 1, observation_size=_AGENT_OBSERVATION_LEN) new_obs = np.asarray([agent.terminated] + agent.state.p_vel.tolist() + agent.state.p_pos.tolist() + landmark_positions + agent_observations) # append previous observation for velocity estimation # if agent.previous_observation is None: # agent.previous_observation = 0.0*new_obs # obs = np.append(new_obs, agent.previous_observation) # agent.previous_observation = new_obs return new_obs
def observation(self, agent, world): # get positions of all entities in this agent's reference frame def observe_agents(other_agent): ''' fill in information communicated/observed between agents ''' # check if node is terminated is_terminated = 0 if isinstance(other_agent, MortalAgent) and other_agent.terminated: is_terminated = 1 # relative speed and position of other agent # dx = dy = dvx = dvy = 0. # if not is_terminated: dx, dy = delta_pos(other_agent, agent) dvx, dvy = delta_vel(other_agent, agent) # get local reward function at position of other agent other_sensed_reward = 0.0 for lm in world.landmarks: other_sensed_reward += lm.reward_fn.get_value( *other_agent.state.p_pos) ag_obs = [is_terminated, dx, dy, dvx, dvy, other_sensed_reward] assert (len(ag_obs) == _AGENT_OBSERVATION_LEN) return ag_obs def observe_landmarks(landmark): ''' fill in information observed about landmarks ''' # ld_obs = delta_pos(landmark, agent).tolist() # # check if within observation range and is observable # d = distance(landmark, agent) # if d > world.max_communication_distance: # ld_obs = [0.0]*len(ld_obs) ld_obs = [] # check reward signal received from landmark sensed_reward = 0.0 for lm in world.landmarks: sensed_reward += lm.reward_fn.get_value(*agent.state.p_pos) ld_obs += [sensed_reward] assert (len(ld_obs) == _LANDMARK_OBSERVATION_LEN) return ld_obs landmark_observations = format_observation( observe=observe_landmarks, objects=world.landmarks, num_observations=len(world.landmarks), observation_size=_LANDMARK_OBSERVATION_LEN) agent_observations = format_observation( observe=observe_agents, objects=[ a for a in world.agents if (a is not agent and not a.terminated) ], num_observations=self.num_agents - 1, observation_size=_AGENT_OBSERVATION_LEN) new_obs = np.asarray(agent.state.p_vel.tolist() + agent.state.p_pos.tolist() + landmark_observations + agent_observations) if agent.terminated: # if agent is terminated, return all zeros for observation # TODO: make this more efficient. Right now it does a lot of unnecessary calcs which are all # then set to zero. Done this way to ensure consistant array size new_obs = 0.0 * new_obs # append previous observation for velocity estimation if agent.previous_observation is None: agent.previous_observation = 0.0 * new_obs obs = np.append(new_obs, agent.previous_observation) agent.previous_observation = new_obs return obs