Exemple #1
0
    def initialise_rule_based_players(self):
        self.players = {}
        for strat in self.strategy_types:
            self.players[strat] = get_player("computer", None, strat)

        self.players_rand = {1: [], 2: []}
        for p, strat in product([1, 2], self.strategy_types):
            self.players_rand[p].append(get_player("computer", None, strat))
def play_game(gameplay, params_1, params_2):
    board, tiles = Board(), Tiles()
    player_1 = get_player(params_1["player_type"],
                          board,
                          params_1["strategy_type"],
                          params=params_1)
    player_2 = get_player(params_2["player_type"],
                          board,
                          params_2["strategy_type"],
                          params=params_2)
    winner, _ = gameplay.play_test_game(player_1, player_2)
    return winner
    def initialise_game(self, player_to_start):
        super().initialise_game()
        strat_1, strat_2 = get_strategy_types(self.params)
        self.players[1] = get_player(self.params[1]["player_type"],
                                     self.board,
                                     strat_1,
                                     params=self.params[1])
        self.players[2] = get_player(self.params[2]["player_type"],
                                     self.board,
                                     strat_2,
                                     params=self.params[2])

        self.turn_of = get_other_player(player_to_start)
        self.other = get_other_player(self.turn_of)
        self.representation = RepresentationGenerator(self.params)
    def initialise_game(self, _):
        super().initialise_game()
        strat_1, strat_2 = get_strategy_types(self.params)
        self.players[1] = get_player(self.params[1]["player_type"],
                                     self.board,
                                     strat_1,
                                     params=self.params[1])
        self.players[2] = get_player(self.params[2]["player_type"],
                                     self.board,
                                     strat_2,
                                     params=self.params[2])

        self.turn_of = np.random.choice([1, 2])
        self.other = get_other_player(self.turn_of)
        self.players[self.other].pick_up(self.tiles)
        self.players[self.turn_of].pick_up(self.tiles)
        self.representation = RepresentationGenerator()
def play_game(gameplay, params):
    strat_1, strat_2 = get_strategy_types(params)
    player_1 = get_player(params[1]["player_type"], None, strat_1, params=params[1])
    player_2 = get_player(params[2]["player_type"], None, strat_2, params=params[2])
    winner, _ = gameplay.play_test_game(player_1, player_2)
    return winner
Exemple #6
0
    def train(self):
        self.initialise_rule_based_players()

        self.training_p1 = get_player(
            "computer",
            None,
            "rl",
            params={"max_eval_batch_size": self.p.max_eval_batch_size})
        self.training_p2 = get_player(
            "computer",
            None,
            "rl",
            params={"max_eval_batch_size": self.p.max_eval_batch_size})
        self.training_p1.strategy.set_model(self.get_new_network())
        self.training_p2.strategy.set_model(self.get_new_network())

        self.test_player = get_player(
            "computer",
            None,
            "rl",
            params={"max_eval_batch_size": self.p.max_eval_batch_size})
        self.test_player.strategy.set_model(self.get_new_network())

        if self.p.restore_ckpt_dir is not None:
            print("Loading checkpoint")
            load_ckpt_path = os.path.join(self.p.restore_ckpt_dir,
                                          "latest.pth")
            self.load_model(load_ckpt_path)

            # self.net.load_state_dict(torch.load(load_ckpt_path, map_location=self.device))
            # set_model_to_float(self.net)

            self.load_optimiser(self.p.restore_ckpt_dir)
            set_optimizer_params(self.optimizer,
                                 lr=self.lr_tracker,
                                 weight_decay=self.p.weight_decay)

        self.save_model(self.latest_ckpt_path)

        if self.p.restore_ckpt_dir is not None:
            load_ckpt_path = os.path.join(self.p.restore_ckpt_dir,
                                          "best_self.pth")
        else:
            load_ckpt_path = self.latest_ckpt_path

        self.training_p1.strategy.load_model(load_ckpt_path)
        self.training_p2.strategy.load_model(load_ckpt_path)

        if self.p.restore_ckpt_dir is not None:
            p1 = self.training_p1
            p2 = self.training_p2
        else:
            p1 = get_player("computer", None, "random")
            p2 = get_player("computer", None, "random")

        self.fill_replay_buffer(p1, p2)
        self.add_n_games_to_replay_buffer(self.training_p1, self.training_p2,
                                          2)
        self.net = self.net.to(self.device)

        running_loss, running_error = 0.0, 0.0
        best_win_rate_rule = 0.0
        self.training_finished = False
        self.add_graph_to_logs()

        print("Start training")
        while not self.training_finished:
            print("Step", self.current_step)

            self.add_n_games_to_replay_buffer(self.training_p1,
                                              self.training_p2,
                                              self.p.episodes_per_step)
            avg_loss, abs_error, vis_inputs = self.apply_n_learning_updates(
                self.p.updates_per_step)
            running_loss += avg_loss
            running_error += abs_error

            if self.current_step > 0 and self.current_step % int(
                    self.p.log_every_n_steps) == 0:
                avg_running_loss = running_loss / float(
                    self.p.log_every_n_steps)
                mean_abs_error = running_error / float(
                    self.p.log_every_n_steps)
                self.write_metrics_to_tensorboard(avg_running_loss,
                                                  mean_abs_error)
                running_loss, running_error = 0.0, 0.0

            if self.current_step % int(self.p.vis_every_n_steps) == 0:
                vis_figs = generate_debug_visualisation(vis_inputs)
                self.writer.add_figure('examples',
                                       vis_figs,
                                       global_step=self.current_step)

            if self.current_step % int(self.p.test_every_n_steps) == 0:
                self.save_model(self.latest_ckpt_path)
                copyfile(
                    self.latest_ckpt_path,
                    os.path.join(self.logs_dir,
                                 f"ckpt-{self.current_step}.pth"))
                self.test_player.strategy.load_model(self.latest_ckpt_path)
                self.save_optimiser()

                print(f"Playing {self.p.n_test_games} test games against self")
                self_win_rate, _, _ = self.play_n_test_games(
                    self.test_player,
                    self.training_p1,
                    self.p.n_test_games,
                    learn=False)
                self.writer.add_scalar('win_rates/rl', self_win_rate,
                                       self.current_step)
                print("Win rate: {:.2f}".format(self_win_rate))

                if self_win_rate > self.p.improvement_threshold:
                    print("Best self model improved!")
                    self.training_p1.strategy.load_model(self.latest_ckpt_path)
                    self.training_p2.strategy.load_model(self.latest_ckpt_path)
                    copyfile(self.latest_ckpt_path, self.best_self_ckpt_path)
                    self.best_self_model_step = self.current_step

                win_rate_rule = 0.0
                for strat in self.strategy_types:
                    print(
                        f"Playing {self.p.n_other_games} test games against {strat}"
                    )
                    win_rate, _, _ = self.play_n_test_games(
                        self.test_player,
                        self.players[strat],
                        self.p.n_other_games,
                        learn=False)
                    self.writer.add_scalar(f'win_rates/{strat}', win_rate,
                                           self.current_step)
                    print("Win rate: {:.2f}".format(win_rate))
                    win_rate_rule += win_rate

                if win_rate_rule >= best_win_rate_rule:
                    best_win_rate_rule = win_rate_rule
                    print("Best rule model improved!")
                    copyfile(self.latest_ckpt_path, self.best_rule_ckpt_path)
                    self.best_rule_model_step = self.current_step

                self.save_training_state()

            self.step_learning_rate_scheduling()
            self.current_step += 1

        print(f"Training finished after {self.current_step} steps...")
        print(f"Final best self model was at step {self.best_self_model_step}")
        print(f"Final best rule model was at step {self.best_rule_model_step}")
        self.writer.close()