Esempio n. 1
0
 def __init__(
     self,
     g,
     p,
     verbose,
     k,
     print_parameters=False,
     save_interval=10,
     actor_net_parameters=None,
     mcts_parameters=None,
 ):
     self.number_of_episodes_to_play = g
     self.starting_player_option = p
     self.k = k
     self.verbose = verbose
     self.state_manager = None
     self.current_state = None
     self.winner_stats = np.zeros((2, 2))
     self.mcts_parameters = mcts_parameters if mcts_parameters else {}
     if actor_net_parameters:
         self.actor_net_parameters = actor_net_parameters
         self.actor_network = ANET(k, **actor_net_parameters)
     else:
         self.actor_network = ANET(k)
     self.save_interval = save_interval
     if print_parameters:
         self.print_all_parameters()
Esempio n. 2
0
def model_match(models_directory, player1, player2, starting_player=1):
    models_dict = {}
    models = ANET.load_models(models_directory)
    board_size = ANET.infer_board_size_from_model(models[0].model)
    for model in models:
        models_dict[model.episode_number] = model
    player1 = models_dict.get(player1)
    player2 = models_dict.get(player2)
    game = GameVisualizer(
        board_size, player1=player1, player2=player2, starting_player=starting_player
    )
    game.run()
Esempio n. 3
0
 def model_perform_action(self, model: ANET):
     print(self.state_manager.get_state())
     distribution = model.predict(self.state_manager.get_state())
     print(distribution)
     argmax_distribution_index = int(
         np.argmax(distribution)
     )  # Greedy best from distribution
     action = self.state_manager.get_action_from_flattened_board_index(
         argmax_distribution_index, self.state_manager.get_state()
     )
     self.perform_action(GameVisualizer.preprocess_action(action))
Esempio n. 4
0
def train_from_cases_and_show_loss():
    cases_directory = "/Users/svoss/KODE/AI-Prog/runs/overnight/cases"
    actor_net_parameters = {
        "buffer_batch_size": 350,
        "max_size_buffer": 3000,
        "replay_buffer_cutoff_rate": 0.3,
        "epochs": 200,
        "verbose": 2,  # 2: one line per epoch
        "save_directory": "trained_models",
        "hidden_layers_structure": [200, 200],
        "learning_rate": 0.05,
    }
    anet, history = ANET.train_network_from_cases(cases_directory, actor_net_parameters)
    anet.save_model(32)
    plt.plot(history.history["loss"])
    plt.plot(history.history["val_loss"])
    plt.title("Model loss")
    plt.ylabel("Loss")
    plt.xlabel("Epoch")
    plt.legend(["Train", "Test"], loc="upper left")
    plt.show()
Esempio n. 5
0
    def __init__(self, path: str, verbose=False):

        self.models = ANET.load_models(path)
        self.state_manager = None
        self.board_size = ANET.infer_board_size_from_model(self.models[0].model)
        self.verbose = verbose
Esempio n. 6
0
 def __init__(self, model_path: str, IP_address=None, verbose=True):
     self.series_id = -1
     BasicClientActorAbs.__init__(self, IP_address, verbose=verbose)
     self.model = ANET.load_model(model_path)
Esempio n. 7
0
class GameSimulator:
    def __init__(
        self,
        g,
        p,
        verbose,
        k,
        print_parameters=False,
        save_interval=10,
        actor_net_parameters=None,
        mcts_parameters=None,
    ):
        self.number_of_episodes_to_play = g
        self.starting_player_option = p
        self.k = k
        self.verbose = verbose
        self.state_manager = None
        self.current_state = None
        self.winner_stats = np.zeros((2, 2))
        self.mcts_parameters = mcts_parameters if mcts_parameters else {}
        if actor_net_parameters:
            self.actor_net_parameters = actor_net_parameters
            self.actor_network = ANET(k, **actor_net_parameters)
        else:
            self.actor_network = ANET(k)
        self.save_interval = save_interval
        if print_parameters:
            self.print_all_parameters()

    def print_all_parameters(self):
        print("===================================")
        print("            PARAMETERS             ")
        print("===================================")
        print("number of games in a batch:", self.number_of_episodes_to_play)
        print("starting-player option:", self.starting_player_option)
        print("Verbose:", self.verbose)
        print("k:", self.k)
        print("save interval:", self.save_interval)
        print("===================================")
        self.print_parameters(self.actor_net_parameters,
                              "          ANET-PARAMETERS          ")
        self.print_parameters(self.mcts_parameters,
                              "          MCTS-PARAMETERS          ")

    @staticmethod
    def print_parameters(parameters, header):
        if parameters:
            print(header)
            print("===================================")
            print("".join(
                [f"{key}: {parameters[key]} \n" for key in parameters.keys()]))
            print("===================================")

    def print_start_state(self, i, timer):
        if self.verbose:
            print(f"--- Starting game {i} ---")
            print(f"Start state: {self.state_manager.pretty_state_string()}")
        else:
            print_loader(
                i,
                self.number_of_episodes_to_play,
                10,
                timer,
                self.number_of_episodes_to_play,
            )

    def print_action(self, action: str):
        if self.verbose:
            x_pos, y_pos, player = self.state_manager.check_and_extract_action_string(
                action, check_player_turn=False)
            print(f"Player {player} placed a piece at ({x_pos}, {y_pos})"
                  f" : {self.state_manager.pretty_state_string()}")

    def print_winner_of_batch_game(self):
        if self.verbose:
            print(
                f"Player {2 if self.state_manager.current_player() == 1 else 1} wins the game"
            )

    def print_run_summary(self):
        print("\n------------- SUMMARY -------------")
        header = ["winning player \ starting player", "1", "2"]
        t = PrettyTable(header)
        for index, row in enumerate(self.winner_stats):
            line = [str(index + 1)]
            for cell in row:
                line.append(cell)
            t.add_row(line)
        print(t)

    def save_loss_graph(self, loss, val_loss, id):
        loss = np.array(loss)
        val_loss = np.array(val_loss)
        plt.clf()
        fig, ax1 = plt.subplots()
        ax1.set_xlabel("Games")
        ax1.set_ylabel("Loss")
        ax1.plot(loss, label="Train")
        ax1.plot(val_loss, label="Test")
        ax1.legend(loc="upper right")
        ax1.set_title("Model loss")

        ax2 = ax1.twinx()
        color = "tab:green"
        ax2.set_ylabel("Delta train test", color=color, alpha=0.5)
        ax2.plot(np.sqrt(np.power((loss - val_loss), 2)),
                 color=color,
                 alpha=0.5)
        fig.tight_layout()
        if not os.path.exists("loss_graphs"):
            os.mkdir("loss_graphs")
        plt.savefig(f"loss_graphs/{id}.png")

    def update_winner_stats(self, starting_player: int) -> None:
        second_index = starting_player - 1
        winning_player = 1 if self.state_manager.current_player() == 2 else 2
        first_index = winning_player - 1
        self.winner_stats[first_index][second_index] += 1

    def run(self):
        starting_player = StartingPlayerOptions.get_starting_player(
            self.starting_player_option)
        self.actor_network.save_model(episode_number=0)
        loss = []
        val_loss = []
        timer = Timer()
        for i in range(1, self.number_of_episodes_to_play + 1):
            self.state_manager = StateManager(self.k, starting_player)
            self.print_start_state(i, timer)
            timer.start()
            mcts = MCTS(
                self.state_manager,
                self.actor_network,
                random_simulation_rate=math.tanh(
                    i / self.number_of_episodes_to_play) * 1.2,
                **self.mcts_parameters,
            )
            while not self.state_manager.is_end_state():
                action = mcts.run(self.state_manager.get_state(),
                                  i / self.number_of_episodes_to_play)
                self.state_manager.perform_action(action)
                self.print_action(action)
            self.update_winner_stats(starting_player)
            self.print_winner_of_batch_game()
            history = self.actor_network.train()
            loss.append(np.average(history.history["loss"]))
            val_loss.append(np.average(history.history["val_loss"]))
            if self.starting_player_option == StartingPlayerOptions.ALTERNATING:
                starting_player = StateManager.get_opposite_player(
                    starting_player)
            if i % self.save_interval == 0:
                self.save_loss_graph(loss, val_loss, i)
                self.actor_network.save_model(episode_number=i)
            timer.stop()
            if i % 50 == 0:
                self.actor_network.save_buffer_to_file(
                    i, self.k, self.mcts_parameters["number_of_simulations"])
        self.print_run_summary()