Beispiel #1
0
    def demo_populate_scene(self):
        # agents
        self.agents.add(
            Agent(agent_id=0,
                  screen=self.screen,
                  game=self,
                  agent_image=self.agent_image,
                  field=self.field_rect,
                  init_position=(3, 1),
                  init_direction=(1, 1),
                  max_speed=1.8,
                  waypoints=[self.waypoints['stop'], self.waypoints['start']]))
        self.agents.add(
            Agent(agent_id=1,
                  screen=self.screen,
                  game=self,
                  agent_image=self.agent_image,
                  field=self.field_rect,
                  init_position=(3, 5),
                  init_direction=(1, 1),
                  max_speed=1.8,
                  waypoints=[self.waypoints['start'], self.waypoints['stop']]))

        self.agents.add(
            Agent(agent_id=2,
                  screen=self.screen,
                  game=self,
                  agent_image=self.agent_image,
                  field=self.field_rect,
                  init_position=(2, 2),
                  init_direction=(1, 1),
                  max_speed=1.14,
                  waypoints=[self.waypoints['start'], self.waypoints['stop']]))
    def __init__(self, width, height, n_agents):
        self.width = width
        self.height = height
        self.n_agents = n_agents
        self.agent1 = Agent()
        self.agent2 = Agent()
        self.target = Target()
        self.grid = np.zeros([self.width, self.height])

        self.set_agent_pos()
        self.set_target_pos()

        self.obs = []
Beispiel #3
0
    def add_agents(self, agent_dict):
        for agent in agent_dict:
            dx, dy = float(agent['dx']), float(agent['dy'])
            x, y = float(agent['x']), float(agent['y'])
            num = int(agent['n'])
            a_type = int(agent['type'])

            # spawn an agent in a random direction and position(within dx, dy)
            direction = (randint(-1, 1), randint(-1, 1))
            position = random_position(x, y, dx, dy)
            waypoints = [awp['id'] for awp in agent['addwaypoint']]

            rd = 0.3

            for _ in xrange(num):
                self.agents.add(
                    Agent(agent_id=self._agent_count,
                          atype=a_type,
                          screen=self.screen,
                          game=self,
                          agent_image=self.agent_image,
                          field=self.field_rect,
                          init_position=position,
                          init_direction=direction,
                          max_speed=normalvariate(1.34, 0.26),
                          radius=rd,
                          waypoints=[self.waypoints[wp] for wp in waypoints]))
                self._agent_count += 1
    def __init__(self, width, height, n_agents):
        self.width = width
        self.height = height
        self.n_agents = n_agents
        self.agents = [Agent()
                       for _ in range(n_agents)]  # initialize multiple agents
        self.target = Target()
        self.grid = np.zeros([self.width, self.height])
        self.timestep = 0

        self.set_agent_pos()
        self.set_target_pos()

        self.obs = []
class Gridworld():
    def __init__(self, width, height, n_agents):
        self.width = width
        self.height = height
        self.n_agents = n_agents
        self.agent1 = Agent()
        self.agent2 = Agent()
        self.target = Target()
        self.grid = np.zeros([self.width, self.height])

        self.set_agent_pos()
        self.set_target_pos()

        self.obs = []

    def set_agent_pos(self):
        pos_x = self.agent1.pos_x
        pos_y = self.agent1.pos_y
        pos_x2 = self.agent2.pos_x
        pos_y2 = self.agent2.pos_y
        self.grid[pos_x][pos_y] = 1
        self.grid[pos_x2][pos_y2] = 2

    def set_target_pos(self, random_pos=False):
        if random_pos:
            pos_x = random.randint(0, self.width)
            pos_y = random.randint(0, self.height)
            self.target.pos_x = pos_x  #check if this is correct
            self.target.pos_y = pos_y  #check if thi is correct
        else:
            pos_x = self.target.pos_x
            pos_y = self.target.pos_y
        self.grid[pos_x][pos_y] = 8

    def step(self, action, action2):
        done = False
        # Individual rewards
        reward = 0
        reward_2 = 0
        reward -= 1  # negative reward for each step
        reward_2 -= 1

        # generate next action
        self.agent1.update_pos(action)
        self.agent2.update_pos(action2)

        # update positions in the grid world
        self.grid_reset()
        self.set_agent_pos()

        # dist = math.sqrt((self.agent1.pos_x - self.target.pos_x) ** 2 + (self.agent1.pos_y - self.target.pos_y) ** 2)
        # reward = reward - abs(0.1*dist)

        done1 = False
        done2 = False

        # if agent 1 reaches the target
        if self.agent1.pos_x == self.target.pos_x and self.agent1.pos_y == self.target.pos_y:
            reward += 20
            done1 = True

        # if agent2 reaches the target
        if self.agent2.pos_x == self.target.pos_x and self.agent2.pos_y == self.target.pos_y:
            reward_2 += 20
            done2 = True

        if (done1 == True) | (done2 == True):
            done = True

        # create observation vector of agent position(s) and target position
        observations = []
        observations.append(self.agent1.pos_x)  # agent1
        observations.append(self.agent1.pos_y)
        observations.append(self.agent2.pos_x)  # agent2
        observations.append(self.agent2.pos_y)
        observations.append(self.target.pos_x)
        observations.append(self.target.pos_y)

        #self.target.move_random()  # TODO: remove this for stationary target
        self.set_target_pos(random_pos=False)

        return observations, reward, reward_2, done

    def reset(self):
        self.agent1.reset()
        self.agent2.reset(
            self.agent1.pos_x, self.agent1.pos_y,
            use_given_start=True)  # true to use same starting location
        self.target.reset()

        observations = []
        observations.append(self.agent1.pos_x)  # agent1
        observations.append(self.agent1.pos_y)
        observations.append(self.agent2.pos_x)  # agent2
        observations.append(self.agent2.pos_y)
        observations.append(self.target.pos_x)
        observations.append(self.target.pos_y)
        return observations

    def grid_reset(self):
        self.grid = np.zeros([self.width, self.height])
Beispiel #6
0
class Gridworld():
    def __init__(self, width, height, n_agents):
        self.width = width
        self.height = height
        self.n_agents = n_agents
        self.agent1 = Agent()
        self.target = Target()
        self.grid = np.zeros([self.width, self.height])
        self.timestep = 0

        self.set_agent_pos()
        self.set_target_pos()

        self.obs = []

    def set_agent_pos(self):
        pos_x = self.agent1.pos_x
        pos_y = self.agent1.pos_y
        self.grid[pos_x][pos_y] = 1

    def set_target_pos(self, random_pos=False):
        if random_pos:
            pos_x = random.randint(0, self.width)
            pos_y = random.randint(0, self.height)
        else:
            pos_x = self.target.pos_x
            pos_y = self.target.pos_y
        self.grid[pos_x][pos_y] = 8

    def step(self, action):
        done = False
        reward = 0
        reward -= 1  # negative reward for each step
        # generate next action
        self.agent1.update_pos(action)

        self.target.move_random()  # TODO: remove this stationary target

        # update positions in the grid world
        self.grid_reset()
        self.set_agent_pos()
        self.set_target_pos(random_pos=False)

        # dist = math.sqrt((self.agent1.pos_x - self.target.pos_x) ** 2 + (self.agent1.pos_y - self.target.pos_y) ** 2)
        # reward = reward - abs(0.1*dist)

        if self.agent1.pos_x == self.target.pos_x and self.agent1.pos_y == self.target.pos_y:
            reward += 20
            done = True

        self.timestep += 1
        #print("reward : "+str(reward)+str("\n"))

        # create observation vector of agent position(s) and target position
        observations = []
        observations.append(self.agent1.pos_x)
        observations.append(self.agent1.pos_y)
        observations.append(self.target.pos_x)
        observations.append(self.target.pos_y)

        return observations, reward, done

    def reset(self):
        self.timestep = 0
        self.agent1.reset()
        self.target.reset()

        observations = []
        observations.append(self.agent1.pos_x)
        observations.append(self.agent1.pos_y)
        observations.append(self.target.pos_x)
        observations.append(self.target.pos_y)
        return observations

    def grid_reset(self):
        self.grid = np.zeros([self.width, self.height])