def shortest_random_path(self, mazemap): [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap) if sx == -1 or sy == -1 or tx == -1 or ty == -1: return -1 queue = deque() queue.append([tx, ty]) shortest_path = np.zeros([config.Map.Height, config.Map.Width], dtype=np.int) # zero for unvisited shortest_path[tx][ty] = 0 #utils.displayMap(mazemap) while len(queue): [cx, cy] = queue.popleft() cur_path_len = shortest_path[cx][cy] for k in range(len(utils.dirs)): [nx, ny] = [cx, cy] + utils.dirs[k] if not utils.inMap(nx, ny): continue if mazemap[nx, ny, utils.Cell.Empty] or mazemap[nx, ny, utils.Cell.Source]: if shortest_path[nx][ny] == 0 or shortest_path[nx][ny] > cur_path_len + 1: queue.append([nx, ny]) shortest_path[nx][ny] = cur_path_len + 1 # go optimal direction in probability $optimal_dir_prob step = 0 max_step = 200 optimal_dir_prob = 0.8 invalid_distance = config.Map.Height * config.Map.Width while (sx != tx or sy != ty) and step < max_step: distance_dirs = [] valid_dir_n = 0 for i in range(len(utils.dirs)): dx = sx + utils.dirs[i][0] dy = sy + utils.dirs[i][1] if utils.inMap(dx, dy) and not mazemap[dx, dy, utils.Cell.Wall]: distance_dirs.append(shortest_path[dx][dy]) valid_dir_n += 1 else: distance_dirs.append(invalid_distance) prob_dirs = [] for i in range(len(utils.dirs)): if i == np.argmin(distance_dirs): prob_dirs.append(optimal_dir_prob) elif distance_dirs[i] != invalid_distance: prob_dirs.append((1 - optimal_dir_prob) / (valid_dir_n - 1)) else: prob_dirs.append(0.) selected_dir = np.argmax(np.random.multinomial(1, prob_dirs)) sx += utils.dirs[selected_dir][0] sy += utils.dirs[selected_dir][1] step += 1 return step
def dfs_path(self, mazemap): [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap) if sx == -1 or sy == -1 or tx == -1 or ty == -1: return -1 # explore in a dfs way until find target stack = [[sx, sy]] step = 0 visited = np.zeros([config.Map.Height, config.Map.Width], dtype=np.int) # zero for unvisited visited[sx][sy] = 1 while len(stack) > 0: [x, y] = stack[-1] if x == tx and y == ty: break expended = False for i in range(len(utils.dirs)): dx = x + utils.dirs[i][0] dy = y + utils.dirs[i][1] if utils.inMap(dx, dy) and not mazemap[dx, dy, utils.Cell.Wall] and visited[dx][dy] == 0: expended = True visited[dx][dy] = 1 stack.append([dx, dy]) step += 1 break if not expended: stack.pop() step += 1 return step
def shortest_path(mazemap): [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap) if sx == -1 or sy == -1 or tx == -1 or ty == -1: return -1 queue = deque() queue.append([sx, sy]) shortest_path = np.zeros([config.Map.Height, config.Map.Width], dtype=np.int) # zero for unvisited shortest_path[sx][sy] = 1 #utils.displayMap(mazemap) while len(queue): [cx, cy] = queue.popleft() cur_path_len = shortest_path[cx][cy] for k in range(len(utils.dirs)): [nx, ny] = [cx, cy] + utils.dirs[k] if not utils.inMap(nx, ny): continue if mazemap[nx, ny, utils.Cell.Empty] or mazemap[nx, ny, utils.Cell.Target]: if shortest_path[nx][ny] == 0 or shortest_path[nx][ny] > cur_path_len + 1: queue.append([nx, ny]) shortest_path[nx][ny] = cur_path_len + 1 #print('shortest_path:' + str(shortest_path[tx][ty])) #if shortest_path[tx][ty]==11: # utils.displayMap(mazemap) # print('error') return shortest_path[tx][ty]-1
def right_hand_path(self, mazemap): [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap) if sx == -1 or sy == -1 or tx == -1 or ty == -1: return -1 mazemap[sx, sy] = utils.Cell.EmptyV count = 0 cx, cy = sx, sy path = [] cur_dir = 0 dirs = np.array([[1, 0], [0, -1], [-1, 0], [0, 1]]) p = [1, 0, 3, 2] while cx != tx or cy != ty: for i in p: next_dir = (cur_dir + i) % 4 nx, ny = [cx, cy] + dirs[next_dir] if utils.inMap(nx, ny): if mazemap[nx,ny,utils.Cell.Empty] or mazemap[nx,ny,utils.Cell.Target] : cx, cy = nx, ny cur_dir = next_dir break count += 1 path.append([cx, cy]) mazemap[sx, sy] = utils.Cell.SourceV print(count, path) return count
def getDirections(self): directions = -np.ones((config.Source_num + config.Hole_num, config.Map.Width, config.Map.Height), dtype=np.int32) q = queue.Queue(maxsize=config.Map.Width * config.Map.Height) for i in range(config.Source_num): q.put(self.source_pos[i]) while not q.empty(): current = q.get() for j in range(4): if self.trans[current[0]][current[1]][j] == 0: next_pos = [ current[0] + trans_to_action[j][0], current[1] + trans_to_action[j][1] ] if utils.inMap(next_pos) and directions[i][ next_pos[0]][next_pos[1]] == -1: directions[i][next_pos[0]][next_pos[1]] = (j + 2) % 4 if self.source_hole_map[next_pos[0]][ next_pos[1]] == -2: q.put(next_pos) for i in range(config.Source_num, config.Source_num + config.Hole_num): q.put(self.hole_pos[i - config.Source_num]) while not q.empty(): current = q.get() for j in range(4): if self.trans[current[0]][current[1]][j] == 0: next_pos = [ current[0] + trans_to_action[j][0], current[1] + trans_to_action[j][1] ] if utils.inMap(next_pos) and directions[i][ next_pos[0]][next_pos[1]] == -1: directions[i][next_pos[0]][next_pos[1]] = (j + 2) % 4 if self.source_hole_map[next_pos[0]][ next_pos[1]] == -2: q.put(next_pos) return directions
def rightdown_path(self, mazemap): [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap) if sx == -1 or sy == -1 or tx == -1 or ty == -1: return -1 step = 0 max_step = 200 while (sx != tx or sy != ty) and step < max_step: # right dx = sx + utils.dirs[0][0] dy = sy + utils.dirs[0][1] if utils.inMap(dx, dy) and not mazemap[dx, dy, utils.Cell.Wall]: sx = dx sy = dy else: # down dx = sx + utils.dirs[1][0] dy = sy + utils.dirs[1][1] if utils.inMap(dx, dy) and not mazemap[dx, dy, utils.Cell.Wall]: sx = dx sy = dy step += 1 return step
def isValidPath(self, agent_id, pos, t, agent_pos, end_pos): if not utils.inMap(pos): # print 'not valid: out of map' return False for i in self.reserve_interval: if t < len(self.reserve) and encode(pos) in self.reserve[ t] and self.reserve[t][encode(pos)][0] != agent_id: # print 'not valid: reserved by ' + str(self.reserve[encode(pos, t)]) return False if pos == end_pos: return True if pos in self.source_pos or pos in self.hole_pos: # print 'not valid: source or hole' return False return True
def _step(self, action): done = False reward = -1 if action == 4: new_source = [ np.random.random_integers(0, config.Map.Height - 1), np.random.random_integers(0, config.Map.Width - 1) ] if self.mazemap[new_source[0], new_source[1], utils.Cell.Target]: done = True self.mazemap[self.source[0], self.source[1]] = utils.Cell.EmptyV self.mazemap[new_source[0], new_source[1]] = utils.Cell.SourceV self.source = new_source # utils.displayMap(self.mazemap) else: new_source = self.source + utils.dirs[action] if utils.inMap(new_source[0], new_source[1]): if self.mazemap[new_source[0], new_source[1], utils.Cell.Target]: done = True self.mazemap[self.source[0], self.source[1]] = utils.Cell.EmptyV self.mazemap[new_source[0], new_source[1]] = utils.Cell.SourceV self.source = new_source #utils.displayMap(self.mazemap) if self.mazemap[new_source[0], new_source[1], utils.Cell.Empty]: self.mazemap[self.source[0], self.source[1]] = utils.Cell.EmptyV self.mazemap[new_source[0], new_source[1]] = utils.Cell.SourceV self.source = new_source #utils.displayMap(self.mazemap) return self.mazemap, reward, done, {}
def rightdownupleft_path(self, mazemap): [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap) if sx == -1 or sy == -1 or tx == -1 or ty == -1: return -1 step = 0 max_step = 200 while (sx != tx or sy != ty) and step < max_step: # deterministic order: right, down, up, left for i in range(len(utils.dirs)): dx = sx + utils.dirs[i][0] dy = sy + utils.dirs[i][1] if utils.inMap(dx, dy) and not mazemap[dx, dy, utils.Cell.Wall]: sx = dx sy = dy break step += 1 return step
def rightdown_random_path(self, mazemap): [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap) if sx == -1 or sy == -1 or tx == -1 or ty == -1: return -1 step = 0 max_step = 200 while (sx != tx or sy != ty) and step < max_step: while True: # right 0.4, down 0.4, up 0.1, left 0.1 selected_dir = np.argmax(np.random.multinomial(1, [0.4, 0.4, 0.1, 0.1])) dx = sx + utils.dirs[selected_dir][0] dy = sy + utils.dirs[selected_dir][1] if utils.inMap(dx, dy) and not mazemap[dx, dy, utils.Cell.Wall]: sx = dx sy = dy break step += 1 return step
def random_path(self, mazemap): [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap) if sx == -1 or sy == -1 or tx == -1 or ty == -1: return -1 step = 0 max_step = 200 while (sx != tx or sy != ty) and step < max_step: valid_dirs = [] for i in range(len(utils.dirs)): dx = sx + utils.dirs[i][0] dy = sy + utils.dirs[i][1] if utils.inMap(dx, dy) and not mazemap[dx, dy, utils.Cell.Wall]: valid_dirs.append(i) selected_dir = valid_dirs[np.random.randint(len(valid_dirs))] sx += utils.dirs[selected_dir][0] sy += utils.dirs[selected_dir][1] step += 1 return step
def _step(self, action): reward = 0 for i in range(self.agent_num): r = 0 [pos_x, pos_y] = self.agent_pos[i] [a_x, a_y] = action[i] pos = [pos_x + a_x, pos_y + a_y] if utils.inMap(pos): if pos in self.source_pos: # source source_idx = self.source_pos.index(pos) print([i, 'source']) if self.agent_city[i] == -1: self.agent_pos[i] = pos self.agent_city[i] = self.genCity(self.city_dis) self.source_reward[source_idx] += 1 elif pos in self.hole_pos: # hole hole_idx = self.hole_pos.index(pos) print([i, 'hole']) print(self.agent_city[i], self.hole_city[hole_idx]) if self.agent_city[i] == self.hole_city[hole_idx]: self.agent_pos[i] = pos self.agent_city[i] = -1 self.agent_reward[i] += 1 self.hole_reward[hole_idx] += 1 reward += 1 elif pos not in self.agent_pos: # path (not agent) self.agent_pos[i] = pos else: print([i, 'agent']) self.time += 1 if self.time == self.total_time: done = True else: done = False return [ self.agent_pos, self.agent_city, self.agent_reward, self.hole_reward, self.source_reward ], reward, done, {}
def get_one_distance(self, start): scale = config.Map.Width * config.Map.Height distance = scale * np.ones((config.Map.Width, config.Map.Height)) distance[start[0]][start[1]] = 0 queue = Queue.Queue(maxsize=config.Map.Width * config.Map.Height) queue.put(start) while not queue.empty(): current = queue.get() all_dir = [[1, 0], [0, 1], [-1, 0], [0, -1]] ends = [] for i in range(4): if self.trans[current[0]][current[1]][i] == 1: ends.append([ current[0] + all_dir[i][0], current[1] + all_dir[i][1] ]) for pos in ends: if not utils.inMap(pos) or distance[pos[0]][pos[1]] != scale: continue queue.put(pos) distance[pos[0]][pos[1]] = distance[current[0]][current[1]] + 1 return distance
def isvalid_mazemap(self, mazemap): [sx, sy, tx, ty] = utils.findSourceAndTarget(mazemap) if sx == -1 or sy == -1 or tx == -1 or ty == -1: return False queue = deque() queue.append([sx,sy]) visited = np.zeros([config.Map.Height, config.Map.Width], dtype=np.int) while len(queue): [cx, cy] = queue.popleft() visited[cx][cy] = 1 for k in range(len(utils.dirs)): [nx, ny] = [cx, cy] + utils.dirs[k] if not utils.inMap(nx, ny) or visited[nx][ny]: continue if mazemap[nx, ny, utils.Cell.Empty]: queue.append([nx, ny]) if nx == tx and ny == ty: return True return False
def _step(self, action): dir = [[1, 0], [0, 1], [-1, 0], [0, -1], [0, 0]] rewards = [0.0] * self.agent_num pick_drop = 1 hit_wall = 0 illegal = 0 agent_next_pos = [] done = [False] * len(action) # astar_action = self.whca.getJointAction(self.agent_pos, self.agent_city, [[-1,-1]]*len(action)) astar_action = self.astar.getJointAction(self.agent_pos, self.agent_city) self.steps += 1 # invalid for i in range(self.agent_num): pos = self.agent_pos[i] a = astar_action[i] # a = dir[action[i]] if a != [0, 0] and self.trans[self.agent_pos[i][0]][ self.agent_pos[i][1]][dir.index(a)] == 0: print "illegal" rewards[i] -= illegal # TODO simple resolution next_pos = [pos[0] + a[0], pos[1] + a[1]] if next_pos not in agent_next_pos and next_pos not in self.agent_pos: agent_next_pos.append(next_pos) else: agent_next_pos.append(pos) if pos == agent_next_pos[i]: done[i] = True elif not utils.inMap(agent_next_pos[i]): agent_next_pos[i] = self.agent_pos[i] done[i] = True rewards[i] -= hit_wall elif agent_next_pos[ i] in self.source_pos and self.agent_city[i] != -1: agent_next_pos[i] = self.agent_pos[i] done[i] = True rewards[i] -= hit_wall elif agent_next_pos[i] in self.hole_pos and self.agent_city[ i] != self.hole_city[self.hole_pos.index( agent_next_pos[i])]: agent_next_pos[i] = self.agent_pos[i] done[i] = True rewards[i] -= hit_wall # circle for i in range(self.agent_num): if done[i]: continue circle = [] j = i while not done[j] and j not in circle and agent_next_pos[ j] in self.agent_pos: circle.append(j) j = self.agent_pos.index(agent_next_pos[j]) if len(circle) > 0 and j == circle[0]: if len(circle) == 1: print 'error: len(circle) == 1' if len(circle) == 2: agent_next_pos[circle[0]] = self.agent_pos[circle[0]] agent_next_pos[circle[1]] = self.agent_pos[circle[1]] done[circle[0]] = True done[circle[1]] = True else: for k in range(len(circle)): done[circle[k]] = True # line for i in range(self.agent_num): if done[i]: continue line = [] j = i while not done[j] and agent_next_pos[j] in self.agent_pos: if j in line: print 'error: duplicate in line' print i, j print line print self.agent_pos print agent_next_pos print done line.append(j) j = self.agent_pos.index(agent_next_pos[j]) if not done[j]: line.append(j) collision = False for k in range(self.agent_num): if done[k] and agent_next_pos[k] == agent_next_pos[j]: collision = True break for k in range(len(line)): if collision: agent_next_pos[line[k]] = self.agent_pos[line[k]] done[line[k]] = True if False in done: print 'error: False in done' print self.agent_pos print agent_next_pos print done for i in range(self.agent_num): if self.agent_pos[i] == agent_next_pos[i]: if np.random.uniform() < 0.5: tran = self.trans[self.agent_pos[i][0]][self.agent_pos[i] [1]] for j in range(4): if tran[j] == 1: direction = dir[j] next_pos = [ agent_next_pos[i][0] + direction[0], agent_next_pos[i][1] + direction[1] ] if next_pos not in agent_next_pos and utils.inMap( next_pos): agent_next_pos[i] = [ agent_next_pos[i][0] + direction[0], agent_next_pos[i][1] + direction[1] ] # self.astar.end_update[i] = True break self.agent_pos = agent_next_pos pack_count = [] for i in range(self.agent_num): pack_count.append(0) pos = self.agent_pos[i] self.therm[self.agent_pos[i][0]][self.agent_pos[i][1]] += 1 # a = action[i] # if a == [0, 0]: # continue if pos in self.source_pos and self.agent_city[i] == -1: # source source_idx = self.source_pos.index(pos) self.agent_city[i] = self.genCity(self.city_dis) rewards[i] += pick_drop self.astar.end_update[i] = True elif pos in self.hole_pos and self.agent_city[i] != -1: # hole hole_idx = self.hole_pos.index(pos) self.agent_city[i] = -1 pack_count[-1] = 1 rewards[i] += pick_drop self.astar.end_update[i] = True self.agent_reward[i] += rewards[i] # for r0 in rewards: # if r0 > 0: # self.end_count = 0 # self.end_count += 1 self.time += 1 self.steps += 1 if self.time == self.total_time: done = True # for i in range(self.therm.shape[0]): # self.therm[i] = self.therm[i]/np.max(self.therm[i]) # ther_log = open( # "environment/result/thers/thermal" + str(config.Map.Width) + '_' + str(config.episode) + '_' + str( # config.epi_of_epi), 'w') # ther_log.write(str(self.hole_city) + '\n') # ther_log.write(str(sum(self.agent_reward)) + '\n') # ther_log.write(str(self.therm.tolist()) + '\n') # ther_log.close() # config.data.append([copy.deepcopy(self.hole_city), sum(self.agent_reward), copy.deepcopy(self.therm)]) # print [self.hole_city, sum(self.agent_reward)] return [1], np.array(rewards), done, [ copy.deepcopy(self.hole_city), sum(self.agent_reward), copy.deepcopy(self.therm) ] else: done = False return [1], np.array(rewards), done, {}
def _step(self, action): agent_next_pos = [] done = [False] * len(action) # invalid for i in range(self.agent_num): pos = self.agent_pos[i] a = action[i] agent_next_pos.append([pos[0] + a[0], pos[1] + a[1]]) if agent_next_pos[i] == pos[i]: done[i] = True elif !utils.inMap(agent_next_pos[i]): agent_next_pos[i] = pos[i] done[i] = True elif agent_next_pos[i] in self.source_pos and self.agent_city[i] != -1: agent_next_pos[i] = pos[i] done[i] = True elif agent_next_pos[i] in self.hole_pos and self.agent_city[i] != self.hole_city[self.hole_pos.index[agent_next_pos[i]]]: agent_next_pos[i] = pos[i] done[i] = True # circle for i in range(self.agent_num): if done[i]: continue circle = [] j = i while not done[j] and j not in circle and agent_next_pos[j] in self.agent_pos: circle.append(j) j = self.agent_pos.index(agent_next_pos[j]) if len(circle) > 0 and j == circle[0]: if len(circle) == 1: print 'error: len(circle) == 1' if len(circle) == 2: agent_next_pos[circle[0]] = self.agent_pos[circle[0]] agent_next_pos[circle[1]] = self.agent_pos[circle[1]] done[circle[0]] = True done[circle[1]] = True else: for k in range(len(circle)): done[circle[k]] = True # line for i in range(self.agent_num): if done[i]: continue line = [] j = i while not done[j] and agent_next_pos[j] in self.agent_pos: if j in line: print 'error: duplicate in line' print line print self.agent_pos print self.agent_next_pos print done line.append(j) j = self.agent_pos.index(agent_next_pos[j]) if done[j]: for k in range(len(line)): if agent_next_pos[j] == self.agent_pos[j]: agent_next_pos[k] = self.agent_pos[k] done[circle[k]] = True else: line.append(j) collision = False for k in range(self.agent_num): if done[k] and agent_next_pos[k] == agent_next_pos[j]: collision = True break for k in range(len(line)): if collision: agent_next_pos[k] = self.agent_pos[k] done[circle[k]] = True if False in done: print 'error: False in done' print self.agent_pos print self.agent_next_pos print done self.agent_pos = agent_next_pos rewards = [] for i in range(self.agent_num): rewards.append(0) pos = self.agent_pos[i] a = action[i] if a == [0, 0]: continue if pos in self.source_pos: # source self.agent_city[i] = self.genCity(self.city_dis) self.source_reward[source_idx] += 1 elif pos in self.hole_pos: # hole hole_idx = self.hole_pos.index(pos) self.agent_city[i] = -1 self.agent_reward[i] += 1 self.hole_reward[hole_idx] += 1 rewards[-1] = 1 self.time += 1 if self.time == self.total_time: done = True else: done = False return [self.agent_pos, self.agent_city, self.agent_reward, self.hole_reward, self.source_reward], rewards, done, {}