class ReplayBuffer(): def __init__(self, agent_num, model_structure, model_weight, height, width, path = "./buffer", cuda=False): #initial the model in the replay buffer #model is be defined using pytorch lib self.model = model_structure self.model_weight = model_weight self.loadWeight() #the game is the enviroment of route planning game in the project self.height = height self.width = width self.game = None self.hx = None self.cx = None self.bot_observe = None # self.agent_numbers = int(((self.height + self.width) / 2) * 0.3) self.agent_numbers = agent_num #default score setting in the game self.acquisition_sum = 700 self.explored_target_sum = 70 self.explored_sum = 40 self.time_decrease = -0.00005 self.crash_time_penalty = -0.0001 self.crash_sum = -20 #path is the folder of the replay buffer self.path = path if not os.path.exists(path): os.makedirs(path) self.IOlimit = 20 self.buffer_limit = 1000 self.cuda = cuda print(cuda) def collect(self, game_round): #data is stored in the python list using pickle #data list is a 2d list index is the batch index of collection if self.game == None: self.initialGame() if self.cuda: self.model.cuda() #collecting part while True: data = [] collect_iter = 0 while collect_iter < self.IOlimit: temp = [] if self.game.active and self.game.state < game_round: #observe part of the bot # bot_observe = np.zeros((self.agent_numbers, 3, self.width, self.height)) # for i in range(self.agent_numbers): # bot_observe[i] = self.game.outputAgentImage(i) if self.bot_observe is None: self.bot_observe = [self.game.outputAgentImage(i) for i in range(self.agent_numbers)] self.bot_observe = np.array(bot_observe).astype(np.uint8) # bot_observe = np.uint8(bot_observe) #batch * width * height * 3 (numpy array)[0] temp.append(self.bot_observe) temp.append(self.hx) #batch * 512[1](tensor)[1] temp.append(self.cx) #batch * 512[1](tensor)[2] #model output process self.bot_observe = torch.from_numpy(self.bot_observe).float() if self.cuda: self.hx = self.hx.cuda() self.cx = self.cx.cuda() self.bot_observe = self.bot_observe.cuda() critic_score, bot_command, (self.hx, self.cx) = self.model(self.bot_observe, self.hx, self.cx) #bot action tensor temp.append(bot_command.cpu()) #tensor[3] #game score output temp.append(self.game.outputScore()) #float[4] #input the commands of the model to the game commands = self.interpretAction(bot_command.cpu()) self.game.runOneRound(commands) self.bot_observe = [self.game.outputAgentImage(i) for i in range(self.agent_numbers)] self.bot_observe = np.array(self.bot_observe).astype(np.uint8) #batch * width * height * 3 (numpy array)[5] temp.append(self.bot_observe) temp.append(self.hx.cpu()) #batch * 512[1](tensor)[6] temp.append(self.cx.cpu()) #batch * 512[1](tensor)[7] #game score output temp.append(self.game.outputScore()) #float[8] data.append(temp) collect_iter += 1 if collect_iter % 5 == 0 and collect_iter is not 0: print('Collecting Progress: ', collect_iter, ' / ', self.IOlimit, ' | Score: %02.5f' % self.game.outputScore()) else: #only occur when the game is not active self.initialGame() #print('Collecting process done\nStart writing file...') for i in range(self.IOlimit): filename = self.path + '/' + datetime.datetime.now().isoformat() + '.pkl' save_object(filename, data[i]) #print('All collection process done\n') del data self.loadWeight() def loadWeight(self): if os.path.exists(self.model_weight): self.model.load_state_dict(torch.load(self.model_weight)) else: print('Model weight [{0}] not found'.format(self.model_weight)) return def initialGame(self): #initial the game envirnment for th replay buffer t = (self.height + self.width) / 2 self.game = Game(self.height, self.width) self.game.setRandomMap(self.agent_numbers, int(t * 0.3) ** 2, 1) self.game.setScore(self.acquisition_sum, self.explored_target_sum, self.explored_sum, self.time_decrease, self.crash_time_penalty, self.crash_sum) commands = [Command(i, 0, 0) for i in range(self.agent_numbers)] self.game.runOneRound(commands) #the memory tensor of the model in the lstm self.hx = torch.zeros([self.agent_numbers, 512], dtype=torch.float32) self.cx = torch.zeros([self.agent_numbers, 512], dtype=torch.float32) self.bot_observe = [self.game.outputAgentImage(i) for i in range(self.agent_numbers)] self.bot_observe = np.array(self.bot_observe).astype(np.uint8) print('New Game!') def resetGameScore(self, acquisition_sum, explored_target_sum, explored_sum, time_decrease, crash_time_penalty, crash_sum): #the score calculating standard of the game self.acquisition_sum = acquisition_sum self.explored_target_sum = explored_target_sum self.explored_sum = explored_sum self.time_decrease = time_decrease self.crash_time_penalty = crash_time_penalty self.crash_sum = crash_sum def countFileNum(self, path): file_list = os.listdir(path) count = 0 for i in range(len(file_list)): if file_list[i].endswith('.pkl'): count += 1 return count def grabFileName(self, path): file_list = os.listdir(path) name_list = [] for i in range(len(file_list)): if file_list[i].endswith('.pkl'): name_list.append(file_list[i]) return name_list def resetIOBatch(self, new_limit): self.IOlimit = new_limit def resetBufferLimit(self, new_limit): self.buffer_limit = new_limit def interpretAction(self, command_tensor): command_tensor = command_tensor.view(-1, 5) command = torch.max(command_tensor, 1)[1].tolist() commands = [self.intoCommand(i, command[i]) for i in range(len(command))] return commands def intoCommand(self, i, command): if command == 0: return Command(i, 0, 1) elif command == 1: return Command(i, 1, 0) elif command == 2: return Command(i, 0, -1) elif command == 3: return Command(i, -1, 0) return Command(i, 0, 0)
class ReplayBuffer(): def __init__(self, agent_num, height, width, model, modelpath, game_round, cuda=True): #initial the model in the replay buffer #model is be defined using pytorch lib #the game is the enviroment of route planning game in the project self.height = height self.width = width self.game_round = game_round self.agent_num = agent_num self.model = model self.modelpath = modelpath self.cuda = cuda self.state = torch.zeros([self.agent_num, 3, self.height, self.width], dtype=torch.float32) #default score setting in the game self.acquisition_sum = 400 self.explored_target_sum = 70 self.explored_sum = 40 self.total_score = self.acquisition_sum + self.explored_target_sum + self.explored_sum self.time_decrease = -0.00005 self.crash_time_penalty = -0.0001 self.crash_sum = -400 self.reg_val = 1 # self.action = torch.tensor([random.randint(0, 4) for i in range(self.agent_num)]) self.memory = [] self.buffer_limit = 1000 self.game = None self.score = None self.initialGame() def collect(self, model, verbose=1): if self.cuda: self.model.cuda() self.model.load_state_dict(model.state_dict()) if self.game.active and self.game.state < self.game_round: #observe part of the bot action = self.select_action() self.game.runOneRound([intoCommand(i, action[i]) for i in range(self.agent_num)]) next_state = np.array([self.game.outputAgentImage(i) for i in range(self.agent_num)]).astype(np.float32) next_state = torch.from_numpy(next_state) if verbose == 1: print(self.game.outputScore()) done = False if self.game.active is False: done = True data = Data(self.state, action, next_state) self.memory.append(data) self.state = next_state if len(self.memory) > self.buffer_limit: self.memory.remove(self.memory[0]) else: #only occur when the game is not active self.initialGame() def select_action(self): return torch.tensor([random.randint(0, 4) for i in range(self.agent_num)]) def save_state(self, path): for i in range(self.agent_num): img = np.zeros([32, 32, 3], dtype=np.uint8) arr = self.game.outputAgentImage(i) img[:,:,0] = arr[0,:,:] img[:,:,1] = arr[1,:,:] img[:,:,2] = arr[2,:,:] plt.subplot(2,2,i + 1) plt.imshow(img) plt.show() def play(self, model, game_round): while self.game.active and self.game.state < game_round: self.states[0] = self.states[1] self.states[1] = self.states[2] self.states[2] = self.states[3] s = np.array([self.game.outputAgentImage(i) for i in range(self.agent_num)]).astype(np.float32) s = torch.from_numpy(s) self.states[3] = s action = model(self.states[1:].unsqueeze(0)).max(2)[1].view(self.agent_num) print(action) action = [intoCommand(i, action[i]) for i in range(self.agent_num)] self.game.runOneRound(action) print(self.game.outputScore()) sleep(0.1) def initialGame(self): #initial the game envirnment for th replay buffer t = (self.height + self.width) / 2 self.game = Game(self.height, self.width, self.game_round) self.game.setRandomMap(self.agent_num, int(t * 0.3) ** 2, int(t * 0.1) ** 2) self.game.setScore(self.acquisition_sum, self.explored_target_sum, self.explored_sum, self.time_decrease, self.crash_time_penalty, self.crash_sum, self.reg_val) self.game.runOneRound([Command(i, 0, 0) for i in range(self.agent_num)]) self.score = np.array([self.game.outputScore() for i in range(self.agent_num)]) self.state = np.array([self.game.outputAgentImage(i) for i in range(self.agent_num)]).astype(np.float32) self.state = torch.from_numpy(self.state) print('New Game!') def loadWeight(self): if os.path.exists(self.modelpath): if self.cuda: self.model.load_state_dict(torch.load(self.modelpath)) else: self.model.load_state_dict(torch.load(self.modelpath, map_location='cpu')) else: print('Model weight [{0}] not found'.format(self.modelpath)) return