def train_policies(self, load_best_policy=False, load_reinforcement=False): if load_reinforcement: for i in range(0, game_setting.K): policy = Policy(self.game_setting) file_name = policy.load_reinforcement_model(i) self.policies.append([policy, file_name, 0, 0]) return if load_best_policy: start = 1 policy = Policy(self.game_setting) nr_of_training_cases = policy.load_best_model() self.policies.append([policy, nr_of_training_cases, 0, 0]) else: start = 0 policy = Policy(self.game_setting) max_cases = min( policy.import_data_and_train(max_cases=self.max_cases, test_nr_of_cases=True), self.max_cases) if self.negative_training_power > 0: for i in range(start, self.K): nr_of_cases = max( 0, max_cases // ((i + 1)**self.negative_training_power)) if nr_of_cases > 0: policy = Policy(self.game_setting) actual_nr_of_cases = policy.import_data_and_train( max_cases=nr_of_cases) else: policy = Policy(self.game_setting, no_model=True) actual_nr_of_cases = 0 self.policies.append([policy, actual_nr_of_cases, 0, 0]) else: for i in range(start, self.K): policy = Policy(self.game_setting) nr_of_cases = max( int(max_cases * (self.K - i - 1) / (self.K - 1)), 0) if nr_of_cases > 0: actual_nr_of_cases = policy.import_data_and_train( max_cases=nr_of_cases) self.policies.append([policy, actual_nr_of_cases, 0, 0]) else: self.policies.append([policy, 0, 0, 0])