Exemple #1
0
    def train_policies(self, load_best_policy=False, load_reinforcement=False):
        if load_reinforcement:
            for i in range(0, game_setting.K):
                policy = Policy(self.game_setting)
                file_name = policy.load_reinforcement_model(i)
                self.policies.append([policy, file_name, 0, 0])
            return

        if load_best_policy:
            start = 1
            policy = Policy(self.game_setting)
            nr_of_training_cases = policy.load_best_model()
            self.policies.append([policy, nr_of_training_cases, 0, 0])
        else:
            start = 0
        policy = Policy(self.game_setting)

        max_cases = min(
            policy.import_data_and_train(max_cases=self.max_cases,
                                         test_nr_of_cases=True),
            self.max_cases)

        if self.negative_training_power > 0:
            for i in range(start, self.K):
                nr_of_cases = max(
                    0, max_cases // ((i + 1)**self.negative_training_power))

                if nr_of_cases > 0:
                    policy = Policy(self.game_setting)
                    actual_nr_of_cases = policy.import_data_and_train(
                        max_cases=nr_of_cases)
                else:
                    policy = Policy(self.game_setting, no_model=True)
                    actual_nr_of_cases = 0

                self.policies.append([policy, actual_nr_of_cases, 0, 0])
        else:
            for i in range(start, self.K):
                policy = Policy(self.game_setting)
                nr_of_cases = max(
                    int(max_cases * (self.K - i - 1) / (self.K - 1)), 0)
                if nr_of_cases > 0:
                    actual_nr_of_cases = policy.import_data_and_train(
                        max_cases=nr_of_cases)
                    self.policies.append([policy, actual_nr_of_cases, 0, 0])
                else:
                    self.policies.append([policy, 0, 0, 0])