Ejemplo n.º 1
0
    def shortest_random_path(self, mazemap):

        [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap)
        if sx == -1 or sy == -1 or tx == -1 or ty == -1:
            return -1

        queue = deque()
        queue.append([tx, ty])
        shortest_path = np.zeros([config.Map.Height, config.Map.Width], dtype=np.int) # zero for unvisited
        shortest_path[tx][ty] = 0

        #utils.displayMap(mazemap)

        while len(queue):
            [cx, cy] = queue.popleft()
            cur_path_len = shortest_path[cx][cy]

            for k in range(len(utils.dirs)):
                [nx, ny] = [cx, cy] + utils.dirs[k]
                if not utils.inMap(nx, ny):
                    continue
                if mazemap[nx, ny, utils.Cell.Empty] or mazemap[nx, ny, utils.Cell.Source]:
                    if shortest_path[nx][ny] == 0 or shortest_path[nx][ny] > cur_path_len + 1:
                        queue.append([nx, ny])
                        shortest_path[nx][ny] = cur_path_len + 1

        # go optimal direction in probability $optimal_dir_prob
        step = 0
        max_step = 200
        optimal_dir_prob = 0.8
        invalid_distance = config.Map.Height * config.Map.Width
        while (sx != tx or sy != ty) and step < max_step:
            distance_dirs = []
            valid_dir_n = 0
            for i in range(len(utils.dirs)):
                dx = sx + utils.dirs[i][0]
                dy = sy + utils.dirs[i][1]
                if utils.inMap(dx, dy) and not mazemap[dx, dy, utils.Cell.Wall]:
                    distance_dirs.append(shortest_path[dx][dy])
                    valid_dir_n += 1
                else:
                    distance_dirs.append(invalid_distance)
            prob_dirs = []
            for i in range(len(utils.dirs)):
                if i == np.argmin(distance_dirs):
                    prob_dirs.append(optimal_dir_prob)
                elif distance_dirs[i] != invalid_distance:
                    prob_dirs.append((1 - optimal_dir_prob) / (valid_dir_n - 1))
                else:
                    prob_dirs.append(0.)

            selected_dir = np.argmax(np.random.multinomial(1, prob_dirs))
            sx += utils.dirs[selected_dir][0]
            sy += utils.dirs[selected_dir][1]
            step += 1
        return step
Ejemplo n.º 2
0
    def dfs_path(self, mazemap):
        [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap)
        if sx == -1 or sy == -1 or tx == -1 or ty == -1:
            return -1
        
        # explore in a dfs way until find target
        stack = [[sx, sy]]
        step = 0
        visited = np.zeros([config.Map.Height, config.Map.Width], dtype=np.int) # zero for unvisited
        visited[sx][sy] = 1

        while len(stack) > 0:
            [x, y] = stack[-1]
            if x == tx and y == ty:
                break
            expended = False
            for i in range(len(utils.dirs)):
                dx = x + utils.dirs[i][0]
                dy = y + utils.dirs[i][1]
                if utils.inMap(dx, dy) and not mazemap[dx, dy, utils.Cell.Wall] and visited[dx][dy] == 0:
                    expended = True
                    visited[dx][dy] = 1
                    stack.append([dx, dy])
                    step += 1
                    break
            if not expended:
                stack.pop()
                step += 1

        return step
Ejemplo n.º 3
0
    def shortest_path(mazemap):

        [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap)
        if sx == -1 or sy == -1 or tx == -1 or ty == -1:
            return -1

        queue = deque()
        queue.append([sx, sy])
        shortest_path = np.zeros([config.Map.Height, config.Map.Width], dtype=np.int) # zero for unvisited
        shortest_path[sx][sy] = 1

        #utils.displayMap(mazemap)

        while len(queue):
            [cx, cy] = queue.popleft()
            cur_path_len = shortest_path[cx][cy]

            for k in range(len(utils.dirs)):
                [nx, ny] = [cx, cy] + utils.dirs[k]
                if not utils.inMap(nx, ny):
                    continue
                if mazemap[nx, ny, utils.Cell.Empty] or mazemap[nx, ny, utils.Cell.Target]:
                    if shortest_path[nx][ny] == 0 or shortest_path[nx][ny] > cur_path_len + 1:
                        queue.append([nx, ny])
                        shortest_path[nx][ny] = cur_path_len + 1

        #print('shortest_path:' + str(shortest_path[tx][ty]))

        #if shortest_path[tx][ty]==11:
        #    utils.displayMap(mazemap)
        #    print('error')

        return shortest_path[tx][ty]-1
Ejemplo n.º 4
0
    def right_hand_path(self, mazemap):

        [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap)
        if sx == -1 or sy == -1 or tx == -1 or ty == -1:
            return -1

        mazemap[sx, sy] = utils.Cell.EmptyV

        count = 0
        cx, cy = sx, sy
        path = []

        cur_dir = 0
        dirs = np.array([[1, 0], [0, -1], [-1, 0], [0, 1]])
        p = [1, 0, 3, 2]
        while cx != tx or cy != ty:
            for i in p:
                next_dir = (cur_dir + i) % 4
                nx, ny = [cx, cy] + dirs[next_dir]
                if utils.inMap(nx, ny):
                    if mazemap[nx,ny,utils.Cell.Empty] or mazemap[nx,ny,utils.Cell.Target] :
                        cx, cy = nx, ny
                        cur_dir = next_dir
                        break
            count += 1
            path.append([cx, cy])

        mazemap[sx, sy] = utils.Cell.SourceV

        print(count, path)

        return count
Ejemplo n.º 5
0
    def getDirections(self):
        directions = -np.ones((config.Source_num + config.Hole_num,
                               config.Map.Width, config.Map.Height),
                              dtype=np.int32)
        q = queue.Queue(maxsize=config.Map.Width * config.Map.Height)
        for i in range(config.Source_num):
            q.put(self.source_pos[i])
            while not q.empty():
                current = q.get()
                for j in range(4):
                    if self.trans[current[0]][current[1]][j] == 0:
                        next_pos = [
                            current[0] + trans_to_action[j][0],
                            current[1] + trans_to_action[j][1]
                        ]
                        if utils.inMap(next_pos) and directions[i][
                                next_pos[0]][next_pos[1]] == -1:
                            directions[i][next_pos[0]][next_pos[1]] = (j +
                                                                       2) % 4
                            if self.source_hole_map[next_pos[0]][
                                    next_pos[1]] == -2:
                                q.put(next_pos)

        for i in range(config.Source_num, config.Source_num + config.Hole_num):
            q.put(self.hole_pos[i - config.Source_num])
            while not q.empty():
                current = q.get()
                for j in range(4):
                    if self.trans[current[0]][current[1]][j] == 0:
                        next_pos = [
                            current[0] + trans_to_action[j][0],
                            current[1] + trans_to_action[j][1]
                        ]
                        if utils.inMap(next_pos) and directions[i][
                                next_pos[0]][next_pos[1]] == -1:
                            directions[i][next_pos[0]][next_pos[1]] = (j +
                                                                       2) % 4
                            if self.source_hole_map[next_pos[0]][
                                    next_pos[1]] == -2:
                                q.put(next_pos)
        return directions
Ejemplo n.º 6
0
 def rightdown_path(self, mazemap):
     [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap)
     if sx == -1 or sy == -1 or tx == -1 or ty == -1:
         return -1
     step = 0
     max_step = 200
     while (sx != tx or sy != ty) and step < max_step:
         # right
         dx = sx + utils.dirs[0][0]
         dy = sy + utils.dirs[0][1]     
         if utils.inMap(dx, dy) and not mazemap[dx, dy, utils.Cell.Wall]:
             sx = dx
             sy = dy
         else:
             # down
             dx = sx + utils.dirs[1][0]
             dy = sy + utils.dirs[1][1]     
             if utils.inMap(dx, dy) and not mazemap[dx, dy, utils.Cell.Wall]:
                 sx = dx
                 sy = dy
         step += 1
     return step
Ejemplo n.º 7
0
 def isValidPath(self, agent_id, pos, t, agent_pos, end_pos):
     if not utils.inMap(pos):
         # print 'not valid: out of map'
         return False
     for i in self.reserve_interval:
         if t < len(self.reserve) and encode(pos) in self.reserve[
                 t] and self.reserve[t][encode(pos)][0] != agent_id:
             # print 'not valid: reserved by ' + str(self.reserve[encode(pos, t)])
             return False
     if pos == end_pos:
         return True
     if pos in self.source_pos or pos in self.hole_pos:
         # print 'not valid: source or hole'
         return False
     return True
Ejemplo n.º 8
0
    def _step(self, action):

        done = False
        reward = -1

        if action == 4:

            new_source = [
                np.random.random_integers(0, config.Map.Height - 1),
                np.random.random_integers(0, config.Map.Width - 1)
            ]

            if self.mazemap[new_source[0], new_source[1], utils.Cell.Target]:
                done = True
                self.mazemap[self.source[0],
                             self.source[1]] = utils.Cell.EmptyV
                self.mazemap[new_source[0], new_source[1]] = utils.Cell.SourceV
                self.source = new_source
                # utils.displayMap(self.mazemap)

        else:

            new_source = self.source + utils.dirs[action]

            if utils.inMap(new_source[0], new_source[1]):

                if self.mazemap[new_source[0], new_source[1],
                                utils.Cell.Target]:
                    done = True
                    self.mazemap[self.source[0],
                                 self.source[1]] = utils.Cell.EmptyV
                    self.mazemap[new_source[0],
                                 new_source[1]] = utils.Cell.SourceV
                    self.source = new_source
                    #utils.displayMap(self.mazemap)

                if self.mazemap[new_source[0], new_source[1],
                                utils.Cell.Empty]:
                    self.mazemap[self.source[0],
                                 self.source[1]] = utils.Cell.EmptyV
                    self.mazemap[new_source[0],
                                 new_source[1]] = utils.Cell.SourceV
                    self.source = new_source
                    #utils.displayMap(self.mazemap)

        return self.mazemap, reward, done, {}
Ejemplo n.º 9
0
 def rightdownupleft_path(self, mazemap):
     [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap)
     if sx == -1 or sy == -1 or tx == -1 or ty == -1:
         return -1
     step = 0
     max_step = 200
     while (sx != tx or sy != ty) and step < max_step:
         # deterministic order: right, down, up, left
         for i in range(len(utils.dirs)):
             dx = sx + utils.dirs[i][0]
             dy = sy + utils.dirs[i][1]     
             if utils.inMap(dx, dy) and not mazemap[dx, dy, utils.Cell.Wall]:
                 sx = dx
                 sy = dy
                 break
         step += 1
     return step
Ejemplo n.º 10
0
 def rightdown_random_path(self, mazemap):
     [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap)
     if sx == -1 or sy == -1 or tx == -1 or ty == -1:
         return -1
     step = 0
     max_step = 200
     while (sx != tx or sy != ty) and step < max_step:
         while True:
             # right 0.4, down 0.4, up 0.1, left 0.1
             selected_dir = np.argmax(np.random.multinomial(1, [0.4, 0.4, 0.1, 0.1]))
             dx = sx + utils.dirs[selected_dir][0]
             dy = sy + utils.dirs[selected_dir][1]     
             if utils.inMap(dx, dy) and not mazemap[dx, dy, utils.Cell.Wall]:
                 sx = dx
                 sy = dy
                 break
         step += 1
     return step        
Ejemplo n.º 11
0
 def random_path(self, mazemap):
     [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap)
     if sx == -1 or sy == -1 or tx == -1 or ty == -1:
         return -1
     step = 0
     max_step = 200
     while (sx != tx or sy != ty) and step < max_step:
         valid_dirs = []
         for i in range(len(utils.dirs)):
             dx = sx + utils.dirs[i][0]
             dy = sy + utils.dirs[i][1]
             if utils.inMap(dx, dy) and not mazemap[dx, dy, utils.Cell.Wall]:
                 valid_dirs.append(i)
         selected_dir = valid_dirs[np.random.randint(len(valid_dirs))]
         sx += utils.dirs[selected_dir][0]
         sy += utils.dirs[selected_dir][1]
         step += 1
     return step
Ejemplo n.º 12
0
    def _step(self, action):

        reward = 0

        for i in range(self.agent_num):
            r = 0
            [pos_x, pos_y] = self.agent_pos[i]
            [a_x, a_y] = action[i]
            pos = [pos_x + a_x, pos_y + a_y]
            if utils.inMap(pos):
                if pos in self.source_pos:  # source
                    source_idx = self.source_pos.index(pos)
                    print([i, 'source'])
                    if self.agent_city[i] == -1:
                        self.agent_pos[i] = pos
                        self.agent_city[i] = self.genCity(self.city_dis)
                        self.source_reward[source_idx] += 1
                elif pos in self.hole_pos:  # hole
                    hole_idx = self.hole_pos.index(pos)
                    print([i, 'hole'])
                    print(self.agent_city[i], self.hole_city[hole_idx])
                    if self.agent_city[i] == self.hole_city[hole_idx]:
                        self.agent_pos[i] = pos
                        self.agent_city[i] = -1
                        self.agent_reward[i] += 1
                        self.hole_reward[hole_idx] += 1
                        reward += 1
                elif pos not in self.agent_pos:  # path (not agent)
                    self.agent_pos[i] = pos
                else:
                    print([i, 'agent'])

        self.time += 1
        if self.time == self.total_time:
            done = True
        else:
            done = False

        return [
            self.agent_pos, self.agent_city, self.agent_reward,
            self.hole_reward, self.source_reward
        ], reward, done, {}
Ejemplo n.º 13
0
 def get_one_distance(self, start):
     scale = config.Map.Width * config.Map.Height
     distance = scale * np.ones((config.Map.Width, config.Map.Height))
     distance[start[0]][start[1]] = 0
     queue = Queue.Queue(maxsize=config.Map.Width * config.Map.Height)
     queue.put(start)
     while not queue.empty():
         current = queue.get()
         all_dir = [[1, 0], [0, 1], [-1, 0], [0, -1]]
         ends = []
         for i in range(4):
             if self.trans[current[0]][current[1]][i] == 1:
                 ends.append([
                     current[0] + all_dir[i][0], current[1] + all_dir[i][1]
                 ])
         for pos in ends:
             if not utils.inMap(pos) or distance[pos[0]][pos[1]] != scale:
                 continue
             queue.put(pos)
             distance[pos[0]][pos[1]] = distance[current[0]][current[1]] + 1
     return distance
Ejemplo n.º 14
0
    def isvalid_mazemap(self, mazemap):

        [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap)
        if sx == -1 or sy == -1 or tx == -1 or ty == -1:
            return False

        queue = deque()
        queue.append([sx,sy])
        visited = np.zeros([config.Map.Height, config.Map.Width], dtype=np.int)

        while len(queue):
            [cx, cy] = queue.popleft()
            visited[cx][cy] = 1

            for k in range(len(utils.dirs)):
                [nx, ny] = [cx, cy] + utils.dirs[k]
                if not utils.inMap(nx, ny) or visited[nx][ny]:
                    continue
                if mazemap[nx, ny, utils.Cell.Empty]:
                    queue.append([nx, ny])
                if nx == tx and ny == ty:
                    return True

        return False
Ejemplo n.º 15
0
    def _step(self, action):
        dir = [[1, 0], [0, 1], [-1, 0], [0, -1], [0, 0]]
        rewards = [0.0] * self.agent_num
        pick_drop = 1
        hit_wall = 0
        illegal = 0

        agent_next_pos = []
        done = [False] * len(action)
        # astar_action = self.whca.getJointAction(self.agent_pos, self.agent_city, [[-1,-1]]*len(action))
        astar_action = self.astar.getJointAction(self.agent_pos,
                                                 self.agent_city)

        self.steps += 1
        # invalid
        for i in range(self.agent_num):
            pos = self.agent_pos[i]
            a = astar_action[i]
            # a = dir[action[i]]
            if a != [0, 0] and self.trans[self.agent_pos[i][0]][
                    self.agent_pos[i][1]][dir.index(a)] == 0:
                print "illegal"
                rewards[i] -= illegal
            # TODO simple resolution
            next_pos = [pos[0] + a[0], pos[1] + a[1]]
            if next_pos not in agent_next_pos and next_pos not in self.agent_pos:
                agent_next_pos.append(next_pos)
            else:
                agent_next_pos.append(pos)
            if pos == agent_next_pos[i]:
                done[i] = True
            elif not utils.inMap(agent_next_pos[i]):
                agent_next_pos[i] = self.agent_pos[i]
                done[i] = True
                rewards[i] -= hit_wall
            elif agent_next_pos[
                    i] in self.source_pos and self.agent_city[i] != -1:
                agent_next_pos[i] = self.agent_pos[i]
                done[i] = True
                rewards[i] -= hit_wall
            elif agent_next_pos[i] in self.hole_pos and self.agent_city[
                    i] != self.hole_city[self.hole_pos.index(
                        agent_next_pos[i])]:
                agent_next_pos[i] = self.agent_pos[i]
                done[i] = True
                rewards[i] -= hit_wall

        # circle
        for i in range(self.agent_num):
            if done[i]:
                continue
            circle = []
            j = i
            while not done[j] and j not in circle and agent_next_pos[
                    j] in self.agent_pos:
                circle.append(j)
                j = self.agent_pos.index(agent_next_pos[j])
            if len(circle) > 0 and j == circle[0]:
                if len(circle) == 1:
                    print 'error: len(circle) == 1'
                if len(circle) == 2:
                    agent_next_pos[circle[0]] = self.agent_pos[circle[0]]
                    agent_next_pos[circle[1]] = self.agent_pos[circle[1]]
                    done[circle[0]] = True
                    done[circle[1]] = True
                else:
                    for k in range(len(circle)):
                        done[circle[k]] = True

        # line
        for i in range(self.agent_num):
            if done[i]:
                continue
            line = []
            j = i
            while not done[j] and agent_next_pos[j] in self.agent_pos:
                if j in line:
                    print 'error: duplicate in line'
                    print i, j
                    print line
                    print self.agent_pos
                    print agent_next_pos
                    print done
                line.append(j)
                j = self.agent_pos.index(agent_next_pos[j])
            if not done[j]:
                line.append(j)
                collision = False
                for k in range(self.agent_num):
                    if done[k] and agent_next_pos[k] == agent_next_pos[j]:
                        collision = True
                        break
                for k in range(len(line)):
                    if collision:
                        agent_next_pos[line[k]] = self.agent_pos[line[k]]
                    done[line[k]] = True

        if False in done:
            print 'error: False in done'
            print self.agent_pos
            print agent_next_pos
            print done

        for i in range(self.agent_num):
            if self.agent_pos[i] == agent_next_pos[i]:
                if np.random.uniform() < 0.5:
                    tran = self.trans[self.agent_pos[i][0]][self.agent_pos[i]
                                                            [1]]
                    for j in range(4):
                        if tran[j] == 1:
                            direction = dir[j]
                            next_pos = [
                                agent_next_pos[i][0] + direction[0],
                                agent_next_pos[i][1] + direction[1]
                            ]
                            if next_pos not in agent_next_pos and utils.inMap(
                                    next_pos):
                                agent_next_pos[i] = [
                                    agent_next_pos[i][0] + direction[0],
                                    agent_next_pos[i][1] + direction[1]
                                ]
                                # self.astar.end_update[i] = True
                                break

        self.agent_pos = agent_next_pos

        pack_count = []

        for i in range(self.agent_num):
            pack_count.append(0)
            pos = self.agent_pos[i]

            self.therm[self.agent_pos[i][0]][self.agent_pos[i][1]] += 1
            # a = action[i]
            # if a == [0, 0]:
            #     continue
            if pos in self.source_pos and self.agent_city[i] == -1:  # source
                source_idx = self.source_pos.index(pos)
                self.agent_city[i] = self.genCity(self.city_dis)
                rewards[i] += pick_drop
                self.astar.end_update[i] = True
            elif pos in self.hole_pos and self.agent_city[i] != -1:  # hole
                hole_idx = self.hole_pos.index(pos)
                self.agent_city[i] = -1
                pack_count[-1] = 1
                rewards[i] += pick_drop
                self.astar.end_update[i] = True
            self.agent_reward[i] += rewards[i]

        # for r0 in rewards:
        #     if r0 > 0:
        #         self.end_count = 0
        # self.end_count += 1
        self.time += 1
        self.steps += 1
        if self.time == self.total_time:
            done = True
            # for i in range(self.therm.shape[0]):
            #     self.therm[i] = self.therm[i]/np.max(self.therm[i])
            # ther_log = open(
            #     "environment/result/thers/thermal" + str(config.Map.Width) + '_' + str(config.episode) + '_' + str(
            #         config.epi_of_epi), 'w')
            # ther_log.write(str(self.hole_city) + '\n')
            # ther_log.write(str(sum(self.agent_reward)) + '\n')
            # ther_log.write(str(self.therm.tolist()) + '\n')
            # ther_log.close()
            # config.data.append([copy.deepcopy(self.hole_city), sum(self.agent_reward), copy.deepcopy(self.therm)])
            # print [self.hole_city, sum(self.agent_reward)]
            return [1], np.array(rewards), done, [
                copy.deepcopy(self.hole_city),
                sum(self.agent_reward),
                copy.deepcopy(self.therm)
            ]
        else:
            done = False

        return [1], np.array(rewards), done, {}
Ejemplo n.º 16
0
    def _step(self, action):

        agent_next_pos = []
        done = [False] * len(action)

        # invalid
        for i in range(self.agent_num):
            pos = self.agent_pos[i]
            a = action[i]
            agent_next_pos.append([pos[0] + a[0], pos[1] + a[1]])
            if agent_next_pos[i] == pos[i]:
                done[i] = True
            elif !utils.inMap(agent_next_pos[i]):
                agent_next_pos[i] = pos[i]
                done[i] = True
            elif agent_next_pos[i] in self.source_pos and self.agent_city[i] != -1:
                agent_next_pos[i] = pos[i]
                done[i] = True
            elif agent_next_pos[i] in self.hole_pos and self.agent_city[i] != self.hole_city[self.hole_pos.index[agent_next_pos[i]]]:
                agent_next_pos[i] = pos[i]
                done[i] = True

        # circle
        for i in range(self.agent_num):
            if done[i]:
                continue
            circle = []
            j = i
            while not done[j] and j not in circle and agent_next_pos[j] in self.agent_pos:
                circle.append(j)
                j = self.agent_pos.index(agent_next_pos[j])
            if len(circle) > 0 and j == circle[0]:
                if len(circle) == 1:
                    print 'error: len(circle) == 1'
                if len(circle) == 2:
                    agent_next_pos[circle[0]] = self.agent_pos[circle[0]]
                    agent_next_pos[circle[1]] = self.agent_pos[circle[1]]
                    done[circle[0]] = True
                    done[circle[1]] = True
                else:
                    for k in range(len(circle)):
                        done[circle[k]] = True

        # line
        for i in range(self.agent_num):
            if done[i]:
                continue
            line = []
            j = i
            while not done[j] and agent_next_pos[j] in self.agent_pos:
                if j in line:
                    print 'error: duplicate in line'
                    print line
                    print self.agent_pos
                    print self.agent_next_pos
                    print done
                line.append(j)
                j = self.agent_pos.index(agent_next_pos[j])
            if done[j]:
                for k in range(len(line)):
                    if agent_next_pos[j] == self.agent_pos[j]:
                        agent_next_pos[k] = self.agent_pos[k]
                    done[circle[k]] = True
            else:
                line.append(j)
                collision = False
                for k in range(self.agent_num):
                    if done[k] and agent_next_pos[k] == agent_next_pos[j]:
                        collision = True
                        break
                for k in range(len(line)):
                    if collision:
                        agent_next_pos[k] = self.agent_pos[k]
                    done[circle[k]] = True
        
        if False in done:
            print 'error: False in done'
            print self.agent_pos
            print self.agent_next_pos
            print done
        
        self.agent_pos = agent_next_pos

        rewards = []

        for i in range(self.agent_num):
            rewards.append(0)
            pos = self.agent_pos[i]
            a = action[i]
            if a == [0, 0]:
                continue
            if pos in self.source_pos: # source
                self.agent_city[i] = self.genCity(self.city_dis)
                self.source_reward[source_idx] += 1
            elif pos in self.hole_pos: # hole
                hole_idx = self.hole_pos.index(pos)
                self.agent_city[i] = -1
                self.agent_reward[i] += 1
                self.hole_reward[hole_idx] += 1
                rewards[-1] = 1
            
        self.time += 1
        if self.time  == self.total_time:
            done = True
        else:
            done = False

        return [self.agent_pos, self.agent_city, self.agent_reward, self.hole_reward, self.source_reward], rewards, done, {}