예제 #1
0
 def train_by_records(self,
                      train_epochs,
                      mini_batch_size=8,
                      save_file_name='./autojump_rec.npz'):
     self.__read_samples()
     sample_num = len(self.choice)
     self.rewards = self.rewards.reshape((sample_num, 1))
     self.dqn = DQN.ZeroGamaDQN(True, self.sample_states.shape[1:])
     choice_matrix = np.zeros((sample_num, self.dqn.decision_size))
     choice_matrix[[i for i in range(sample_num)], self.choice] = 1
     for _ in range(train_epochs):
         print('round %d' % _)
         mini_batch_index = random.sample([i for i in range(sample_num)],
                                          mini_batch_size)
         test_node = self.dqn.train(self.sample_states[mini_batch_index],
                                    choice_matrix[mini_batch_index],
                                    self.rewards[mini_batch_index], 1)
     print(test_node)
     self.dqn.save_weights(save_file_name)
     return
예제 #2
0
    def decide_and_jump(self,
                        jump_time,
                        trainable_flag,
                        save_flag,
                        weights_file_name=None):
        # press_location_change_flag = True
        self.press_time = 400
        self.getNextState()
        resize_state = self.__preprocess_state(self.state)
        resize_width = resize_state.shape[0]
        resize_height = resize_state.shape[1]
        channel = resize_state.shape[2]
        self.dqn = DQN.ZeroGamaDQN(trainable_flag,
                                   (resize_height, resize_width, channel),
                                   weights_file_name)
        train_flag = False
        die_flag = False
        last_score = 0
        for _ in range(jump_time):
            print('trainging_round:', _)
            self.getNextState()
            ### here decide by neural network and basic judge if it's dead, then set the press time
            # self.resize_state = imresize(self.state, size=(resize_height, resize_width)).reshape((1, resize_height,
            #                                                                                       resize_width, 3))
            self.resize_state = self.__preprocess_state(self.state).reshape(
                (1, resize_height, resize_width, channel))
            if die_flag:
                train_flag = False

            die_flag = self.__is_died(self.state)  ##decide by basic judge

            if die_flag:
                if trainable_flag and train_flag:
                    train_degree = 6
                    # label = np.ones((1, self.dqn.decision_size))
                    # label = self.last_d_prob + self.last_d_prob[0, self.last_decision]/(self.dqn.decision_size - 1)
                    label = np.zeros((1, self.dqn.decision_size))
                    label[0, self.last_decision] = 1
                    reward = np.array([[-1]])
                    # label = label/(self.dqn.decision_size - 1)
                    # label = label / 2
                    self.dqn.train(self.last_state, label, reward,
                                   train_degree)

                self.press_time = 500
                # press_location_change_flag = True

            else:
                ### survive, so promote the last decision in last state
                cur_score = self.score_recognizer.recognize(self.state)
                if trainable_flag and train_flag:
                    ## todo set label and training degree by score change
                    train_degree = 3
                    reward = np.array([[(cur_score - last_score) % 10]])
                    print(reward)
                    label = np.zeros((1, self.dqn.decision_size))
                    label[0, self.last_decision] = 1
                    self.dqn.train(self.last_state, label, reward,
                                   train_degree)
                ##
                self.press_time, self.last_decision, self.last_d_prob = self.dqn.run(
                    self.resize_state)
                self.last_state = self.resize_state
                if not train_flag:
                    train_flag = True
                last_score = cur_score

            # if press_location_change_flag:
            #     self.__set_button_position(self.state, die_flag)
            #     press_location_change_flag = False
            self.__set_button_position(self.state, die_flag)
            self.__press(die_flag)
            print('decision:', self.last_decision, 'press_time:',
                  self.press_time)
            print()
            time.sleep(1)

        if save_flag:
            self.dqn.save_weights('autojump.npz')

        return