def train_by_records(self, train_epochs, mini_batch_size=8, save_file_name='./autojump_rec.npz'): self.__read_samples() sample_num = len(self.choice) self.rewards = self.rewards.reshape((sample_num, 1)) self.dqn = DQN.ZeroGamaDQN(True, self.sample_states.shape[1:]) choice_matrix = np.zeros((sample_num, self.dqn.decision_size)) choice_matrix[[i for i in range(sample_num)], self.choice] = 1 for _ in range(train_epochs): print('round %d' % _) mini_batch_index = random.sample([i for i in range(sample_num)], mini_batch_size) test_node = self.dqn.train(self.sample_states[mini_batch_index], choice_matrix[mini_batch_index], self.rewards[mini_batch_index], 1) print(test_node) self.dqn.save_weights(save_file_name) return
def decide_and_jump(self, jump_time, trainable_flag, save_flag, weights_file_name=None): # press_location_change_flag = True self.press_time = 400 self.getNextState() resize_state = self.__preprocess_state(self.state) resize_width = resize_state.shape[0] resize_height = resize_state.shape[1] channel = resize_state.shape[2] self.dqn = DQN.ZeroGamaDQN(trainable_flag, (resize_height, resize_width, channel), weights_file_name) train_flag = False die_flag = False last_score = 0 for _ in range(jump_time): print('trainging_round:', _) self.getNextState() ### here decide by neural network and basic judge if it's dead, then set the press time # self.resize_state = imresize(self.state, size=(resize_height, resize_width)).reshape((1, resize_height, # resize_width, 3)) self.resize_state = self.__preprocess_state(self.state).reshape( (1, resize_height, resize_width, channel)) if die_flag: train_flag = False die_flag = self.__is_died(self.state) ##decide by basic judge if die_flag: if trainable_flag and train_flag: train_degree = 6 # label = np.ones((1, self.dqn.decision_size)) # label = self.last_d_prob + self.last_d_prob[0, self.last_decision]/(self.dqn.decision_size - 1) label = np.zeros((1, self.dqn.decision_size)) label[0, self.last_decision] = 1 reward = np.array([[-1]]) # label = label/(self.dqn.decision_size - 1) # label = label / 2 self.dqn.train(self.last_state, label, reward, train_degree) self.press_time = 500 # press_location_change_flag = True else: ### survive, so promote the last decision in last state cur_score = self.score_recognizer.recognize(self.state) if trainable_flag and train_flag: ## todo set label and training degree by score change train_degree = 3 reward = np.array([[(cur_score - last_score) % 10]]) print(reward) label = np.zeros((1, self.dqn.decision_size)) label[0, self.last_decision] = 1 self.dqn.train(self.last_state, label, reward, train_degree) ## self.press_time, self.last_decision, self.last_d_prob = self.dqn.run( self.resize_state) self.last_state = self.resize_state if not train_flag: train_flag = True last_score = cur_score # if press_location_change_flag: # self.__set_button_position(self.state, die_flag) # press_location_change_flag = False self.__set_button_position(self.state, die_flag) self.__press(die_flag) print('decision:', self.last_decision, 'press_time:', self.press_time) print() time.sleep(1) if save_flag: self.dqn.save_weights('autojump.npz') return