def update_vis(self): agent_pos_array = self.build_agentPositionArray() all_posarray = [self.prey_loc] + agent_pos_array vis_obs = defs.obs(all_posarray, -1, 0) self.update_event = pursuit_vis.pygame.event.Event( self.visualizer.update_event_type, {'obs': vis_obs}) pursuit_vis.pygame.event.post(self.update_event)
def step(self): agent_actions = [] agent_approvedActions = [] agent_probs = [] #retrieve what the agent wants to do allPos = [self.prey_loc] + self.build_agentPositionArray() preyInd = 0 for (adx, agent) in enumerate(self.agents): #Check what the agent wants to do myInd = adx + 1 curr_obs = defs.obs(allPos, myInd, preyInd) action_probs = agent.behave(curr_obs) agent_probs.append(action_probs) #Get actions for adx in range(len(self.agents)): action = np.random.choice(defs.Actions, p=agent_probs[adx]) agent_actions.append(action) #Resolve collisions random_ordering = range(len(self.agents)) np.random.shuffle(random_ordering) for adx in range(len(self.agents)): agent_idx = random_ordering[adx] agent = self.agents[agent_idx] agent_requestAction = agent_actions[agent_idx] agent_requestMovement = const.ACTION_TO_MOVES[agent_requestAction] agent_requestPosition = autils.movePosition( agent.pos, agent_requestMovement) #Check if it collides, move if it doesn't. curr_agentPosList = self.build_agentPositionArray() all_posList = [self.prey_loc] + curr_agentPosList col = autils.getCollision(all_posList, agent_requestPosition) if (col >= 0): agent_requestAction = defs.Actions.NOOP #NOOP else: agent.behave_act(agent_requestAction) agent_approvedActions.append(agent_requestAction) if self.visualize: self.update_vis() self.prey_step() if config.SIM_DELAY: time.sleep(config.SIM_DELAY / 3) if self.visualize: self.update_vis() # self.world_center() if self.visualize: self.update_vis() return agent_actions, agent_probs
def generate_observation(self, agent_index): """ TODO: Change function to generate the new type of observation (see global_defs.py) """ agent_locs = [agent.pos for agent in self.agents] station_locs = [pos for pos in self.sttn_pos] all_locs = agent_locs + station_locs load_indices = range(len(self.agents), len(all_locs)) obs = global_defs.obs(all_locs, agent_index, load_indices) return obs
def __init__(self, prey_loc, agents_list, visualize): self.agents = [] self.visualize = visualize self.terminal = False self.init_add_prey(prey_loc) self.init_add_agents(agents_list) if self.visualize: allPos = [self.prey_loc] + [self.build_agentPositionArray()] obs = defs.obs(self.build_agentPositionArray(), -1, 0) self.init_visualization(obs) self.center_point = defs.Point2D(0, 0) self.center_point.x = int(config.DIMENSIONS / 2) self.center_point.y = int(config.DIMENSIONS / 2)
def test_agent_respond_sanity(self): """ Testing that agent probabilites always sum to one and that the agent always takes actions within bounds. :return: """ for i in range(1000): with self.subTest(i=i): random_pos_array = np.random.randint( 0, 10, (20, 2)) #Generating 20 random locations random_pos_list = [ global_defs.Point2D(ele[0], ele[1]) for ele in random_pos_array ] a = agent_random.agent_random( global_defs.Point2D(random_pos_list[0][0], random_pos_list[0][1])) allPos = random_pos_list myInd = 0 loadIndices = range(4, 8) random_observation = global_defs.obs(allPos, myInd, loadIndices) (action_probs, action_idx) = a.respond(random_observation) self.assertTrue(len(action_probs) == 6) np.testing.assert_approx_equal(np.sum(action_probs), 1) self.assertTrue(action_idx < 6) print(action_probs, action_idx) if action_idx == global_defs.Actions.LOAD: is_neighbor = False for loadidx in (loadIndices): loadPos = allPos[loadidx] is_neighbor = is_neighbor or utils.is_neighbor( loadPos, a.pos) self.assertTrue(is_neighbor) is_neighbor = False for loadidx in (loadIndices): loadPos = allPos[loadidx] is_neighbor = is_neighbor or utils.is_neighbor( loadPos, a.pos) if is_neighbor: msg_str = "Is neighbor {} {}".format( a.pos, [allPos[i] for i in range(4, 8)]) self.assertTrue(action_probs[-1] > 0, msg=msg_str)
def test_agent_tp_1_respond(self): random_pos_array = np.random.randint( 0, 10, (20, 2)) #Generating 20 random locations random_pos_list = [ global_defs.Point2D(ele[0], ele[1]) for ele in random_pos_array ] a = agent_lifter.agent_lifter( global_defs.Point2D(random_pos_list[0][0], random_pos_list[0][1]), 1) allPos = random_pos_list myInd = 0 loadIndices = range(4, 8) random_observation = global_defs.obs(allPos, myInd, loadIndices) (action_probs, action_idx) = a.respond(random_observation) self.assertTrue(len(action_probs) == 6) np.testing.assert_approx_equal(np.sum(action_probs), 1) self.assertTrue(action_idx < 6)
def create_observation_for_agent(self, ind): allpos = [self.prey_loc] + self.build_agentPositionArray() preyInd = 0 myInd = ind + 1 return defs.obs(allpos, myInd, preyInd)