def randomGame(): print("\n== init random game ==") game = Game(20, 20) # height, width game.setRandomMap(3, 3, 4) # numbers of agents, targets, obstacles game.setScore(100, 20, 10, -2, -0.04, -20) game.printGodInfo() print("\n== 1st round ==") commands = [] commands.append(Command(0, 1, 1)) # id, dx, dy commands.append(Command(1, -1, 1)) commands.append(Command(2, 1, -1)) game.runOneRound(commands) game.printConsoleInfo() print("Score: " + str(game.outputScore())) print("\n== 2ed round ==") commands = [] commands.append(Command(0, 1, 1)) commands.append(Command(1, -1, 1)) commands.append(Command(2, 1, -1)) game.runOneRound(commands) game.printConsoleInfo() print("Score: " + str(game.outputScore()))
def testManualGameImageOutput2(): print("\n== init manual setting game ==") height = 20 width = 20 mode = {0: False, 1: False, 2: False} #now target = {0:{},1:{},2:{}} belongs = {0: [], 1: [], 2: []} game = Game(height, width) game.setRandomMap(0, 50, 0) # numbers of agents, targets, obstacles game.setScore(100, 10, -0.01, -100) game.printGodMap() agents = { 0: Agent(0, 0, 0, height, width, r=5), # id, x, y, height, width 1: Agent(1, width - 1, 0, height, width, r=5), # id, x, y, height, width 2: Agent(2, int(width / 2), height - 1, height, width, r=5), # id, x, y, height, width } game.setAgents(agents) #agents[id].x,game.consolemap.targets game.printConsoleMap() game.runOneRound([Command(0, 1, 1), Command(1, -1, 1), Command(2, -1, -1)]) for item in game.consolemap.targets: index = 0 if target_agent_len(item, agents[index]) > target_agent_len( item, agents[1]): index = 1 if target_agent_len(item, agents[index]) > target_agent_len( item, agents[2]): index = 2 belongs[index].append(item) print(belongs) for i in range(3): if mode[i] == False: target_find = belongs[i][0] for target_list in belongs[i]: if target_agent_len(target_list, agents[i]) < target_agent_len( target_find, agents[i]): target_find = target_list print(target_find) mode[i] = True game.printConsoleMap() '''for i in range(0, 10):
def testManualGameImageOutput1(): print("\n== init manual setting game ==") height = 10 width = 10 game = Game(height, width) obstacles = [{"x": 1, "y": 1}, {"x": 2, "y": 2}, {"x": 3, "y": 3}] targets = [{"x": 4, "y": 4}, {"x": 5, "y": 5}, {"x": 6, "y": 6}] game.setObstacles(obstacles) game.setTargets(targets) #game.setScore(100, 10, -0.01, -100) game.printGodMap() agents = { 0: Agent(0, 7, 6, height, width, r=3), # id, x, y, height, width } game.setAgents(agents) game.printConsoleMap() print("\n== 1st round ==") game.runOneRound([Command(0, -1, -1)]) # (6, 5) game.printConsoleMap() print("\n== 2st round ==") game.runOneRound([Command(0, -1, 0)]) # (5, 5) game.printConsoleMap() print("\n== 3st round ==") game.runOneRound([Command(0, -1, 0)]) #(4, 5) game.printConsoleMap()
def manualGame(): print("\n== init manual setting game ==") height = 20 width = 20 game = Game(height, width) obstacles = [{"x": 1, "y": 1}, {"x": 2, "y": 2}, {"x": 3, "y": 3}] targets = [{"x": 10, "y": 10}, {"x": 12, "y": 12}, {"x": 13, "y": 13}] game.setObstacles(obstacles) game.setTargets(targets) agents = { 0: Agent(0, 4, 4, height, width), # id, x, y, height, width 1: Agent(1, 15, 17, height, width), 2: Agent(2, 12, 15, height, width), } game.setAgents(agents) game.setScore() game.printGodInfo() print("\n== 1st round ==") commands = [] commands.append(Command(0, 1, 1)) # id, dx, dy commands.append(Command(1, -1, 1)) commands.append(Command(2, 1, -1)) game.runOneRound(commands) game.printConsoleInfo() print("\n== 2ed round ==") commands = [] commands.append(Command(0, 1, 1)) commands.append(Command(1, -1, 1)) commands.append(Command(2, 1, -1)) game.runOneRound(commands) game.printConsoleInfo()
def testManualGameImageOutput2(): print("\n== init manual setting game ==") height = 40 width = 40 crash = 0 mode = { 0: False, 1: False, 2: False } #agents' mode True if agent has target rightnow found_target = [] now_target = {0: [], 1: [], 2: []} belongs = {0: [], 1: [], 2: []} cmd = [] game = Game(height, width) game.setRandomMap(0, 200, 0) # numbers of agents, targets, obstacles #game.setScore(100, 10, -0.01, -100) game.printGodMap() agents = { 0: Agent(0, 0, 0, height, width, r=5), # id, x, y, height, width 1: Agent(1, width - 1, 0, height, width, r=5), # id, x, y, height, width 2: Agent(2, int(width / 2), height - 1, height, width, r=5), # id, x, y, height, width } game.setAgents(agents) ########## game.runOneRoundwithoutMovement() game.printConsoleMap() round = 1 while (game.consolemap.targets != [] or haveunseenspace(game.consolemap.areas, height, width)): print("====the %d round" % round) found_target = game.consolemap.targets #print("found:",found_target) #print("agent mode",mode) for item in found_target: # cluster the target index = 0 if target_agent_len(item, agents[index]) > target_agent_len( item, agents[1]): index = 1 if target_agent_len(item, agents[index]) > target_agent_len( item, agents[2]): index = 2 belongs[index].append(item) cmd = [] # store the new command for agents for i in agents: if mode[i] == False: # assign a target to agent now_target[i] = [] for target_list in belongs[i]: if now_target[i] == [] and target_list != []: now_target[i] = target_list mode[i] = True else: if target_agent_len(target_list, agents[i]) < target_agent_len( now_target[i], agents[i]): now_target[i] = target_list no_target_command = { 0: { "dx": 0, "dy": 0 }, 1: { "dx": 1, "dy": -1 }, 2: { "dx": -1, "dy": -1 }, 3: { "dx": -1, "dy": 1 }, 4: { "dx": 1, "dy": 1 }, } if mode[i] == False: # assign the cammand to agent direction = no_target_walk(game.getmap(), agents[i]) cmd.append( Command(agents[i].id, no_target_command[direction]["dx"], no_target_command[direction]["dy"])) print("agent %d goes" % agents[i].id, no_target_command[direction]["dx"], "and", no_target_command[direction]["dy"]) elif mode[i] == True: cmd.append(walk(now_target[i], agents[i])) mode[i] = False game.runOneRound(cmd) for i in agents: #calculate crash time for j in range(i + 1, len(agents)): if agents[i].x == agents[j].x and agents[i].y == agents[j].y: crash += 1 print(found_target) game.printConsoleMap() belongs = {0: [], 1: [], 2: []} round += 1 print("crush time: %d" % crash) print("finish")
class ReplayBuffer(): def __init__(self, agent_num, model_structure, model_weight, height, width, path = "./buffer", cuda=False): #initial the model in the replay buffer #model is be defined using pytorch lib self.model = model_structure self.model_weight = model_weight self.loadWeight() #the game is the enviroment of route planning game in the project self.height = height self.width = width self.game = None self.hx = None self.cx = None self.bot_observe = None # self.agent_numbers = int(((self.height + self.width) / 2) * 0.3) self.agent_numbers = agent_num #default score setting in the game self.acquisition_sum = 700 self.explored_target_sum = 70 self.explored_sum = 40 self.time_decrease = -0.00005 self.crash_time_penalty = -0.0001 self.crash_sum = -20 #path is the folder of the replay buffer self.path = path if not os.path.exists(path): os.makedirs(path) self.IOlimit = 20 self.buffer_limit = 1000 self.cuda = cuda print(cuda) def collect(self, game_round): #data is stored in the python list using pickle #data list is a 2d list index is the batch index of collection if self.game == None: self.initialGame() if self.cuda: self.model.cuda() #collecting part while True: data = [] collect_iter = 0 while collect_iter < self.IOlimit: temp = [] if self.game.active and self.game.state < game_round: #observe part of the bot # bot_observe = np.zeros((self.agent_numbers, 3, self.width, self.height)) # for i in range(self.agent_numbers): # bot_observe[i] = self.game.outputAgentImage(i) if self.bot_observe is None: self.bot_observe = [self.game.outputAgentImage(i) for i in range(self.agent_numbers)] self.bot_observe = np.array(bot_observe).astype(np.uint8) # bot_observe = np.uint8(bot_observe) #batch * width * height * 3 (numpy array)[0] temp.append(self.bot_observe) temp.append(self.hx) #batch * 512[1](tensor)[1] temp.append(self.cx) #batch * 512[1](tensor)[2] #model output process self.bot_observe = torch.from_numpy(self.bot_observe).float() if self.cuda: self.hx = self.hx.cuda() self.cx = self.cx.cuda() self.bot_observe = self.bot_observe.cuda() critic_score, bot_command, (self.hx, self.cx) = self.model(self.bot_observe, self.hx, self.cx) #bot action tensor temp.append(bot_command.cpu()) #tensor[3] #game score output temp.append(self.game.outputScore()) #float[4] #input the commands of the model to the game commands = self.interpretAction(bot_command.cpu()) self.game.runOneRound(commands) self.bot_observe = [self.game.outputAgentImage(i) for i in range(self.agent_numbers)] self.bot_observe = np.array(self.bot_observe).astype(np.uint8) #batch * width * height * 3 (numpy array)[5] temp.append(self.bot_observe) temp.append(self.hx.cpu()) #batch * 512[1](tensor)[6] temp.append(self.cx.cpu()) #batch * 512[1](tensor)[7] #game score output temp.append(self.game.outputScore()) #float[8] data.append(temp) collect_iter += 1 if collect_iter % 5 == 0 and collect_iter is not 0: print('Collecting Progress: ', collect_iter, ' / ', self.IOlimit, ' | Score: %02.5f' % self.game.outputScore()) else: #only occur when the game is not active self.initialGame() #print('Collecting process done\nStart writing file...') for i in range(self.IOlimit): filename = self.path + '/' + datetime.datetime.now().isoformat() + '.pkl' save_object(filename, data[i]) #print('All collection process done\n') del data self.loadWeight() def loadWeight(self): if os.path.exists(self.model_weight): self.model.load_state_dict(torch.load(self.model_weight)) else: print('Model weight [{0}] not found'.format(self.model_weight)) return def initialGame(self): #initial the game envirnment for th replay buffer t = (self.height + self.width) / 2 self.game = Game(self.height, self.width) self.game.setRandomMap(self.agent_numbers, int(t * 0.3) ** 2, 1) self.game.setScore(self.acquisition_sum, self.explored_target_sum, self.explored_sum, self.time_decrease, self.crash_time_penalty, self.crash_sum) commands = [Command(i, 0, 0) for i in range(self.agent_numbers)] self.game.runOneRound(commands) #the memory tensor of the model in the lstm self.hx = torch.zeros([self.agent_numbers, 512], dtype=torch.float32) self.cx = torch.zeros([self.agent_numbers, 512], dtype=torch.float32) self.bot_observe = [self.game.outputAgentImage(i) for i in range(self.agent_numbers)] self.bot_observe = np.array(self.bot_observe).astype(np.uint8) print('New Game!') def resetGameScore(self, acquisition_sum, explored_target_sum, explored_sum, time_decrease, crash_time_penalty, crash_sum): #the score calculating standard of the game self.acquisition_sum = acquisition_sum self.explored_target_sum = explored_target_sum self.explored_sum = explored_sum self.time_decrease = time_decrease self.crash_time_penalty = crash_time_penalty self.crash_sum = crash_sum def countFileNum(self, path): file_list = os.listdir(path) count = 0 for i in range(len(file_list)): if file_list[i].endswith('.pkl'): count += 1 return count def grabFileName(self, path): file_list = os.listdir(path) name_list = [] for i in range(len(file_list)): if file_list[i].endswith('.pkl'): name_list.append(file_list[i]) return name_list def resetIOBatch(self, new_limit): self.IOlimit = new_limit def resetBufferLimit(self, new_limit): self.buffer_limit = new_limit def interpretAction(self, command_tensor): command_tensor = command_tensor.view(-1, 5) command = torch.max(command_tensor, 1)[1].tolist() commands = [self.intoCommand(i, command[i]) for i in range(len(command))] return commands def intoCommand(self, i, command): if command == 0: return Command(i, 0, 1) elif command == 1: return Command(i, 1, 0) elif command == 2: return Command(i, 0, -1) elif command == 3: return Command(i, -1, 0) return Command(i, 0, 0)
class ReplayBuffer(): def __init__(self, agent_num, height, width, model, modelpath, game_round, cuda=True): #initial the model in the replay buffer #model is be defined using pytorch lib #the game is the enviroment of route planning game in the project self.height = height self.width = width self.game_round = game_round self.agent_num = agent_num self.model = model self.modelpath = modelpath self.cuda = cuda self.state = torch.zeros([self.agent_num, 3, self.height, self.width], dtype=torch.float32) #default score setting in the game self.acquisition_sum = 400 self.explored_target_sum = 70 self.explored_sum = 40 self.total_score = self.acquisition_sum + self.explored_target_sum + self.explored_sum self.time_decrease = -0.00005 self.crash_time_penalty = -0.0001 self.crash_sum = -400 self.reg_val = 1 # self.action = torch.tensor([random.randint(0, 4) for i in range(self.agent_num)]) self.memory = [] self.buffer_limit = 1000 self.game = None self.score = None self.initialGame() def collect(self, model, verbose=1): if self.cuda: self.model.cuda() self.model.load_state_dict(model.state_dict()) if self.game.active and self.game.state < self.game_round: #observe part of the bot action = self.select_action() self.game.runOneRound([intoCommand(i, action[i]) for i in range(self.agent_num)]) next_state = np.array([self.game.outputAgentImage(i) for i in range(self.agent_num)]).astype(np.float32) next_state = torch.from_numpy(next_state) if verbose == 1: print(self.game.outputScore()) done = False if self.game.active is False: done = True data = Data(self.state, action, next_state) self.memory.append(data) self.state = next_state if len(self.memory) > self.buffer_limit: self.memory.remove(self.memory[0]) else: #only occur when the game is not active self.initialGame() def select_action(self): return torch.tensor([random.randint(0, 4) for i in range(self.agent_num)]) def save_state(self, path): for i in range(self.agent_num): img = np.zeros([32, 32, 3], dtype=np.uint8) arr = self.game.outputAgentImage(i) img[:,:,0] = arr[0,:,:] img[:,:,1] = arr[1,:,:] img[:,:,2] = arr[2,:,:] plt.subplot(2,2,i + 1) plt.imshow(img) plt.show() def play(self, model, game_round): while self.game.active and self.game.state < game_round: self.states[0] = self.states[1] self.states[1] = self.states[2] self.states[2] = self.states[3] s = np.array([self.game.outputAgentImage(i) for i in range(self.agent_num)]).astype(np.float32) s = torch.from_numpy(s) self.states[3] = s action = model(self.states[1:].unsqueeze(0)).max(2)[1].view(self.agent_num) print(action) action = [intoCommand(i, action[i]) for i in range(self.agent_num)] self.game.runOneRound(action) print(self.game.outputScore()) sleep(0.1) def initialGame(self): #initial the game envirnment for th replay buffer t = (self.height + self.width) / 2 self.game = Game(self.height, self.width, self.game_round) self.game.setRandomMap(self.agent_num, int(t * 0.3) ** 2, int(t * 0.1) ** 2) self.game.setScore(self.acquisition_sum, self.explored_target_sum, self.explored_sum, self.time_decrease, self.crash_time_penalty, self.crash_sum, self.reg_val) self.game.runOneRound([Command(i, 0, 0) for i in range(self.agent_num)]) self.score = np.array([self.game.outputScore() for i in range(self.agent_num)]) self.state = np.array([self.game.outputAgentImage(i) for i in range(self.agent_num)]).astype(np.float32) self.state = torch.from_numpy(self.state) print('New Game!') def loadWeight(self): if os.path.exists(self.modelpath): if self.cuda: self.model.load_state_dict(torch.load(self.modelpath)) else: self.model.load_state_dict(torch.load(self.modelpath, map_location='cpu')) else: print('Model weight [{0}] not found'.format(self.modelpath)) return