def demo_populate_scene(self): # agents self.agents.add( Agent(agent_id=0, screen=self.screen, game=self, agent_image=self.agent_image, field=self.field_rect, init_position=(3, 1), init_direction=(1, 1), max_speed=1.8, waypoints=[self.waypoints['stop'], self.waypoints['start']])) self.agents.add( Agent(agent_id=1, screen=self.screen, game=self, agent_image=self.agent_image, field=self.field_rect, init_position=(3, 5), init_direction=(1, 1), max_speed=1.8, waypoints=[self.waypoints['start'], self.waypoints['stop']])) self.agents.add( Agent(agent_id=2, screen=self.screen, game=self, agent_image=self.agent_image, field=self.field_rect, init_position=(2, 2), init_direction=(1, 1), max_speed=1.14, waypoints=[self.waypoints['start'], self.waypoints['stop']]))
def __init__(self, width, height, n_agents): self.width = width self.height = height self.n_agents = n_agents self.agent1 = Agent() self.agent2 = Agent() self.target = Target() self.grid = np.zeros([self.width, self.height]) self.set_agent_pos() self.set_target_pos() self.obs = []
def add_agents(self, agent_dict): for agent in agent_dict: dx, dy = float(agent['dx']), float(agent['dy']) x, y = float(agent['x']), float(agent['y']) num = int(agent['n']) a_type = int(agent['type']) # spawn an agent in a random direction and position(within dx, dy) direction = (randint(-1, 1), randint(-1, 1)) position = random_position(x, y, dx, dy) waypoints = [awp['id'] for awp in agent['addwaypoint']] rd = 0.3 for _ in xrange(num): self.agents.add( Agent(agent_id=self._agent_count, atype=a_type, screen=self.screen, game=self, agent_image=self.agent_image, field=self.field_rect, init_position=position, init_direction=direction, max_speed=normalvariate(1.34, 0.26), radius=rd, waypoints=[self.waypoints[wp] for wp in waypoints])) self._agent_count += 1
def __init__(self, width, height, n_agents): self.width = width self.height = height self.n_agents = n_agents self.agents = [Agent() for _ in range(n_agents)] # initialize multiple agents self.target = Target() self.grid = np.zeros([self.width, self.height]) self.timestep = 0 self.set_agent_pos() self.set_target_pos() self.obs = []
class Gridworld(): def __init__(self, width, height, n_agents): self.width = width self.height = height self.n_agents = n_agents self.agent1 = Agent() self.agent2 = Agent() self.target = Target() self.grid = np.zeros([self.width, self.height]) self.set_agent_pos() self.set_target_pos() self.obs = [] def set_agent_pos(self): pos_x = self.agent1.pos_x pos_y = self.agent1.pos_y pos_x2 = self.agent2.pos_x pos_y2 = self.agent2.pos_y self.grid[pos_x][pos_y] = 1 self.grid[pos_x2][pos_y2] = 2 def set_target_pos(self, random_pos=False): if random_pos: pos_x = random.randint(0, self.width) pos_y = random.randint(0, self.height) self.target.pos_x = pos_x #check if this is correct self.target.pos_y = pos_y #check if thi is correct else: pos_x = self.target.pos_x pos_y = self.target.pos_y self.grid[pos_x][pos_y] = 8 def step(self, action, action2): done = False # Individual rewards reward = 0 reward_2 = 0 reward -= 1 # negative reward for each step reward_2 -= 1 # generate next action self.agent1.update_pos(action) self.agent2.update_pos(action2) # update positions in the grid world self.grid_reset() self.set_agent_pos() # dist = math.sqrt((self.agent1.pos_x - self.target.pos_x) ** 2 + (self.agent1.pos_y - self.target.pos_y) ** 2) # reward = reward - abs(0.1*dist) done1 = False done2 = False # if agent 1 reaches the target if self.agent1.pos_x == self.target.pos_x and self.agent1.pos_y == self.target.pos_y: reward += 20 done1 = True # if agent2 reaches the target if self.agent2.pos_x == self.target.pos_x and self.agent2.pos_y == self.target.pos_y: reward_2 += 20 done2 = True if (done1 == True) | (done2 == True): done = True # create observation vector of agent position(s) and target position observations = [] observations.append(self.agent1.pos_x) # agent1 observations.append(self.agent1.pos_y) observations.append(self.agent2.pos_x) # agent2 observations.append(self.agent2.pos_y) observations.append(self.target.pos_x) observations.append(self.target.pos_y) #self.target.move_random() # TODO: remove this for stationary target self.set_target_pos(random_pos=False) return observations, reward, reward_2, done def reset(self): self.agent1.reset() self.agent2.reset( self.agent1.pos_x, self.agent1.pos_y, use_given_start=True) # true to use same starting location self.target.reset() observations = [] observations.append(self.agent1.pos_x) # agent1 observations.append(self.agent1.pos_y) observations.append(self.agent2.pos_x) # agent2 observations.append(self.agent2.pos_y) observations.append(self.target.pos_x) observations.append(self.target.pos_y) return observations def grid_reset(self): self.grid = np.zeros([self.width, self.height])
class Gridworld(): def __init__(self, width, height, n_agents): self.width = width self.height = height self.n_agents = n_agents self.agent1 = Agent() self.target = Target() self.grid = np.zeros([self.width, self.height]) self.timestep = 0 self.set_agent_pos() self.set_target_pos() self.obs = [] def set_agent_pos(self): pos_x = self.agent1.pos_x pos_y = self.agent1.pos_y self.grid[pos_x][pos_y] = 1 def set_target_pos(self, random_pos=False): if random_pos: pos_x = random.randint(0, self.width) pos_y = random.randint(0, self.height) else: pos_x = self.target.pos_x pos_y = self.target.pos_y self.grid[pos_x][pos_y] = 8 def step(self, action): done = False reward = 0 reward -= 1 # negative reward for each step # generate next action self.agent1.update_pos(action) self.target.move_random() # TODO: remove this stationary target # update positions in the grid world self.grid_reset() self.set_agent_pos() self.set_target_pos(random_pos=False) # dist = math.sqrt((self.agent1.pos_x - self.target.pos_x) ** 2 + (self.agent1.pos_y - self.target.pos_y) ** 2) # reward = reward - abs(0.1*dist) if self.agent1.pos_x == self.target.pos_x and self.agent1.pos_y == self.target.pos_y: reward += 20 done = True self.timestep += 1 #print("reward : "+str(reward)+str("\n")) # create observation vector of agent position(s) and target position observations = [] observations.append(self.agent1.pos_x) observations.append(self.agent1.pos_y) observations.append(self.target.pos_x) observations.append(self.target.pos_y) return observations, reward, done def reset(self): self.timestep = 0 self.agent1.reset() self.target.reset() observations = [] observations.append(self.agent1.pos_x) observations.append(self.agent1.pos_y) observations.append(self.target.pos_x) observations.append(self.target.pos_y) return observations def grid_reset(self): self.grid = np.zeros([self.width, self.height])