Esempio n. 1
0
    def run(
        self,
        max_rounds: int = -1,
        initial_game_state: QuartoGameState = QuartoGameState()
    ) -> 'Tuple[float]':
        round_id = 0

        score_history = np.array((0, 0, 0))
        while round_id < max_rounds or round_id == -1:
            gs = initial_game_state.copy_game_state()
            terminal = False
            while not terminal:
                current_player = gs.get_current_player_id()
                action_ids = gs.get_available_actions_id_for_player(
                    current_player)
                print(action_ids)
                info_state = gs.get_information_state_for_player(
                    current_player)
                action = self.agents[current_player].act(
                    current_player, info_state, action_ids)

                (gs, score, terminal) = gs.step(current_player, action)
                self.agents[current_player].observe(
                    (1 if current_player == 0 else -1) * score, terminal)

                if terminal:
                    score_history += (1 if score == 1 else 0,
                                      1 if score == -1 else 0,
                                      1 if score == 0 else 0)
                    other_player = (current_player + 1) % 2
                    self.agents[other_player].observe(
                        (1 if other_player == 0 else -1) * score, terminal)

            if round_id != -1:
                round_id += 1
                if self.print_and_reset_score_history_threshold is not None and \
                        round_id % self.print_and_reset_score_history_threshold == 0:
                    print(score_history /
                          self.print_and_reset_score_history_threshold)
                    if self.prev_history is not None and \
                            score_history[0] == self.prev_history[0] and \
                            score_history[1] == self.prev_history[1] and \
                            score_history[2] == self.prev_history[2]:
                        self.stuck_on_same_score += 1
                    else:
                        self.prev_history = score_history
                        self.stuck_on_same_score = 0
                    if (self.
                            replace_player1_with_commandline_after_similar_results
                            is not None and self.stuck_on_same_score >= self.
                            replace_player1_with_commandline_after_similar_results
                        ):
                        self.agents = (CommandLineAgent(), self.agents[1])
                        self.stuck_on_same_score = 0
                    score_history = np.array((0, 0, 0))
        return tuple(score_history)
Esempio n. 2
0
    def run(
        self,
        max_rounds: int = -1,
        initial_game_state: WindJammersGameState = WindJammersGameState()
    ) -> 'Tuple[float]':
        round_id = 0

        score_history = np.array((0, 0, 0.0))
        while round_id < max_rounds or round_id == -1:
            gs = initial_game_state.copy_game_state()
            terminal = False
            while not terminal:
                sleep(0.016)
                print(gs)
                current_player = gs.get_current_player_id()
                action = 0
                if current_player != -1:
                    action_ids = gs.get_available_actions_id_for_player(
                        current_player)
                    info_state = gs.get_information_state_for_player(
                        current_player)
                    action = self.agents[current_player].act(
                        current_player, info_state, action_ids)

                # WARNING : Two Players Zero Sum Game Hypothesis
                (gs, score, terminal) = gs.step(current_player, action)
                self.agents[0].observe(score, terminal)
                self.agents[1].observe(-score, terminal)

                if not terminal:
                    score_history += (score if score > 0 else 0.0,
                                      -score if score < 0 else 0.0, 0)

            if round_id != -1:
                round_id += 1
                if self.print_and_reset_score_history_threshold is not None and \
                        round_id % self.print_and_reset_score_history_threshold == 0:
                    print(score_history /
                          self.print_and_reset_score_history_threshold)
                    if self.prev_history is not None and \
                            score_history[0] == self.prev_history[0] and \
                            score_history[1] == self.prev_history[1] and \
                            score_history[2] == self.prev_history[2]:
                        self.stuck_on_same_score += 1
                    else:
                        self.prev_history = score_history
                        self.stuck_on_same_score = 0
                    if (self.
                            replace_player1_with_commandline_after_similar_results
                            is not None and self.stuck_on_same_score >= self.
                            replace_player1_with_commandline_after_similar_results
                        ):
                        self.agents = (CommandLineAgent(), self.agents[1])
                        self.stuck_on_same_score = 0
                    score_history = np.array((0, 0, 0.0))
        return tuple(score_history)
Esempio n. 3
0
    def run(self, max_rounds: int = -1,
            initial_game_state: TicTacToeGameState = TicTacToeGameState()) -> 'Tuple[float]':
        round_id = 0

        score_history = np.array((0, 0, 0))
        while round_id < max_rounds or round_id == -1:
            if round_id == 0:
                gs = initial_game_state.copy_game_state()
            else:
                gs.newGameState()
            terminal = False
            while not terminal:
                current_player = gs.get_current_player_id()
                action_ids = gs.get_available_actions_id_for_player(current_player)
                info_state = gs.get_information_state_for_player(current_player)
                action_time = time()
                action = self.agents[current_player].act(current_player,
                                                         info_state,
                                                         action_ids)
                self.mean_action_duration_sum[current_player] += time() - action_time

                # WARNING : Two Players Zero Sum Game Hypothesis
                (gs, score, terminal) = gs.step(current_player, action)
                self.agents[current_player].observe(
                    (1 if current_player == 0 else -1) * score,
                    terminal)

                if terminal:
                    score_history += (1 if score == 1 else 0, 1 if score == -1 else 0, 1 if score == 0 else 0)
                    other_player = (current_player + 1) % 2
                    self.agents[other_player].observe(
                        (1 if other_player == 0 else -1) * score,
                        terminal)

            if round_id != -1:
                round_id += 1
                if self.log_and_reset_score_history_threshold is not None and \
                        round_id % self.log_and_reset_score_history_threshold == 0:
                    # print(score_history / self.log_and_reset_score_history_threshold)
                    if self.prev_history is not None and \
                            score_history[0] == self.prev_history[0] and \
                            score_history[1] == self.prev_history[1] and \
                            score_history[2] == self.prev_history[2]:
                        self.stuck_on_same_score += 1
                    else:
                        self.prev_history = score_history
                        self.stuck_on_same_score = 0
                    if (self.replace_player1_with_commandline_after_similar_results is not None and
                            self.stuck_on_same_score >= self.replace_player1_with_commandline_after_similar_results):
                        self.agents = (CommandLineAgent(), self.agents[1])
                        self.stuck_on_same_score = 0

                    self.writerTimeAgent1.add_summary(tf.Summary(
                        value=[tf.Summary.Value(tag="Time",
                                                simple_value=self.mean_action_duration_sum[0] / round_id)],
                    ), round_id)
                    self.writerTimeAgent1.flush()
                    self.writerTimeAgent2.add_summary(tf.Summary(
                        value=[tf.Summary.Value(tag="Time",
                                                simple_value=self.mean_action_duration_sum[1] / round_id)],
                    ), round_id)
                    self.writerTimeAgent2.flush()
                    self.writerAgent1.add_summary(tf.Summary(
                        value=[tf.Summary.Value(tag="Score", simple_value=score_history[0] / self.log_and_reset_score_history_threshold)],
                    ), round_id)
                    self.writerAgent1.flush()
                    self.writerAgent2.add_summary(tf.Summary(
                        value=[tf.Summary.Value(tag="Score", simple_value=score_history[1] / self.log_and_reset_score_history_threshold)],
                    ), round_id)
                    self.writerAgent2.flush()
                    self.writerDraw.add_summary(tf.Summary(
                        value=[tf.Summary.Value(tag="Score", simple_value=score_history[2] / self.log_and_reset_score_history_threshold)],
                    ), round_id)
                    self.writerDraw.flush()

                    self.mean_action_duration_sum = {0: 0.0, 1: 0.0}
                    self.mean_accumulated_reward_sum = {0: 0.0, 1: 0.0}
                    score_history = np.array((0, 0, 0))
        return tuple(score_history)
    def run(self, max_rounds: int = -1,
            initial_game_state: QuartoGameState = QuartoGameState()) -> 'Tuple[float]':
        round_id = 0
        self.round_duration_sum = 0.0
        self.mean_action_duration_sum = np.array((0.0, 0.0))
        self.score_history = np.array((0, 0, 0))
        while round_id < max_rounds or round_id == -1:
            gs = initial_game_state.copy_game_state()
            terminal = False
            round_time = 0.0
            round_step = 0
            self.action_duration_sum = ([0.0, 0.0])
            self.mean_action_duration = np.array((0.0, 0.0))
            self.accumulated_reward_sum = ([0.0, 0.0])

            while not terminal:
                # print(gs)
                round_time = time.time()
                current_player = gs.get_current_player_id()
                action = 0
                if current_player != -1:
                    action_ids = gs.get_available_actions_id_for_player(current_player)
                    info_state = gs.get_information_state_for_player(current_player)
                    action_time = time.time()
                    action = self.agents[current_player].act(current_player,
                                                             info_state,
                                                             action_ids)
                    action_time = time.time() - action_time
                    self.action_duration_sum[current_player] += action_time

                (gs, score, terminal) = gs.step(current_player, action)
                self.agents[current_player].observe(
                    (1 if current_player == 0 else -1) * score,
                    terminal)

                round_step += 1

            self.round_duration = time.time() - round_time
            self.round_duration_sum += self.round_duration
            self.mean_action_duration = (
                self.action_duration_sum[0] / round_step, self.action_duration_sum[1] / round_step)
            self.mean_action_duration_sum += (self.mean_action_duration[0], self.mean_action_duration[1])
            self.score_history += (1 if score == 1 else 0, 1 if score == -1 else 0, 1 if score == 0 else 0)
            other_player = (current_player + 1) % 2
            self.agents[other_player].observe(
                (1 if other_player == 0 else -1) * score,
                terminal)

            self.writer.add_summary(tf.Summary(
                value=[
                    tf.Summary.Value(tag="agent1_action_mean_duration",
                                     simple_value=self.mean_action_duration[0]),

                    tf.Summary.Value(tag="agent2_action_mean_duration",
                                     simple_value=self.mean_action_duration[1]),

                    tf.Summary.Value(tag="round_duration",
                                     simple_value=self.round_duration),

                    tf.Summary.Value(tag="agent1_accumulated_reward",
                                     simple_value=self.accumulated_reward_sum[0]),

                    tf.Summary.Value(tag="agent2_accumulated_reward",
                                     simple_value=self.accumulated_reward_sum[1])

                ],
            ), round_id)

            if round_id != -1:
                round_id += 1
                if self.print_and_reset_score_history_threshold is not None and \
                        round_id % self.print_and_reset_score_history_threshold == 0:
                    if self.prev_history is not None and \
                            self.score_history[0] == self.prev_history[0] and \
                            self.score_history[1] == self.prev_history[1] and \
                            self.score_history[2] == self.prev_history[2]:
                        self.stuck_on_same_score += 1
                    else:
                        self.prev_history = self.score_history
                        self.stuck_on_same_score = 0
                    if (self.replace_player1_with_commandline_after_similar_results is not None and
                            self.stuck_on_same_score >= self.replace_player1_with_commandline_after_similar_results):
                        self.agents = (CommandLineAgent(), self.agents[1])
                        self.stuck_on_same_score = 0
        return tuple(self.score_history), self.round_duration_sum, self.mean_action_duration_sum
Esempio n. 5
0
                if self.print_and_reset_score_history_threshold is not None and \
                        round_id % self.print_and_reset_score_history_threshold == 0:
                    print(score_history /
                          self.print_and_reset_score_history_threshold)
                    if self.prev_history is not None and \
                            score_history[0] == self.prev_history[0] and \
                            score_history[1] == self.prev_history[1] and \
                            score_history[2] == self.prev_history[2]:
                        self.stuck_on_same_score += 1
                    else:
                        self.prev_history = score_history
                        self.stuck_on_same_score = 0
                    if (self.
                            replace_player1_with_commandline_after_similar_results
                            is not None and self.stuck_on_same_score >= self.
                            replace_player1_with_commandline_after_similar_results
                        ):
                        self.agents = (CommandLineAgent(), self.agents[1])
                        self.stuck_on_same_score = 0
                    score_history = np.array((0, 0, 0))
        return tuple(score_history)


if __name__ == "__main__":
    print("MOISMCTSWithRandomRolloutsAgent VS RandomAgent")
    print(
        BasicQuartoRunner(
            CommandLineAgent(),
            CommandLineAgent(),
            print_and_reset_score_history_threshold=1000).run(1000))
Esempio n. 6
0
    def run(self, max_rounds: int = -1,
            initial_game_state: TicTacToeGameState = TicTacToeGameState()) -> 'Tuple[float]':
        round_id = 0

        score_history = np.array((0, 0, 0))
        while round_id < max_rounds or round_id == -1:
            gs = initial_game_state.copy_game_state()
            terminal = False
            tour = 0
            execution_time = np.array((0.0, 0.0))
            while not terminal:
                current_player = gs.get_current_player_id()
                action_ids = gs.get_available_actions_id_for_player(current_player)
                info_state = gs.get_information_state_for_player(current_player)
                begin = time()
                action = self.agents[current_player].act(current_player,
                                                         info_state,
                                                         action_ids)
                end = time()
                # WARNING : Two Players Zero Sum Game Hypothesis
                (gs, score, terminal) = gs.step(current_player, action)
                self.agents[current_player].observe(
                    (1 if current_player == 0 else -1) * score,
                    terminal)

                execution_time[current_player] += end - begin

                if terminal:
                    score_history += (1 if score == 1 else 0, 1 if score == -1 else 0, 1 if score == 0 else 0)
                    other_player = (current_player + 1) % 2
                    self.agents[other_player].observe(
                        (1 if other_player == 0 else -1) * score,
                        terminal)

            # self.execution_time += execution_time / (tour * 0.5)

            if round_id != -1:
                round_id += 1
                if self.print_and_reset_score_history_threshold is not None and \
                        round_id % self.print_and_reset_score_history_threshold == 0:
                    print(score_history / self.print_and_reset_score_history_threshold)
                    if self.file is not None:
                        score_to_print = score_history / self.print_and_reset_score_history_threshold
                        execution_time_to_print = self.execution_time/self.print_and_reset_score_history_threshold
                        self.file.write(str(score_to_print[0]) + ";" + str(execution_time_to_print[0]) + ";"
                                        + str(score_to_print[1]) + ";" + str(execution_time_to_print[1]) + ";"
                                        + str(score_to_print[2]) + "\n")
                    if self.prev_history is not None and \
                            score_history[0] == self.prev_history[0] and \
                            score_history[1] == self.prev_history[1] and \
                            score_history[2] == self.prev_history[2]:
                        self.stuck_on_same_score += 1
                    else:
                        self.prev_history = score_history
                        self.stuck_on_same_score = 0
                    if (self.replace_player1_with_commandline_after_similar_results is not None and
                            self.stuck_on_same_score >= self.replace_player1_with_commandline_after_similar_results):
                        self.agents = (CommandLineAgent(), self.agents[1])
                        self.stuck_on_same_score = 0
                    score_history = np.array((0, 0, 0))
                    self.execution_time = np.array((0.0, 0.0))
        return tuple(score_history)
    def run(
        self,
        max_rounds: int = -1,
        initial_game_state: Power4GameState = Power4GameState()
    ) -> 'Tuple[float]':
        round_id = 0

        # AJOUT LOG#
        filename = "../logs/" + type(self.agents[0]).__name__ + "_VS_" + type(
            self.agents[1]).__name__ + str(time.time()) + ".txt"
        logs_scores_file = open(filename, "w")
        logs_scores_file.close()
        print(type(self.agents[1]).__name__)
        #####

        score_history = np.array((0, 0, 0))
        while round_id < max_rounds or round_id == -1:
            gs = initial_game_state.copy_game_state()
            terminal = False
            while not terminal:
                current_player = gs.get_current_player_id()
                action_ids = gs.get_available_actions_id_for_player(
                    current_player)
                info_state = gs.get_information_state_for_player(
                    current_player)

                #### AJOUT TIMER
                if round_id == 1000 or round_id == 10000 or round_id == 100000 or round_id == 1000000:
                    timer = time.time()
                ######

                action = self.agents[current_player].act(
                    current_player, info_state, action_ids)
                ###AJOUT
                if round_id == 1000 or round_id == 10000 or round_id == 100000 or round_id == 1000000:
                    logs_scores_file = open(filename, "a")
                    logs_scores_file.write("TIMER " + str(round_id) +
                                           " PLAYER " + str(current_player) +
                                           " == " + str(time.time() - timer) +
                                           "\n")
                    logs_scores_file.close()
                ####

                # WARNING : Two Players Zero Sum Game Hypothesis
                (gs, score, terminal) = gs.step(current_player, action)
                self.agents[current_player].observe(
                    (1 if current_player == 0 else -1) * score, terminal)

                if terminal:
                    score_history += (1 if score == 1 else 0,
                                      1 if score == -1 else 0,
                                      1 if score == 0 else 0)
                    other_player = (current_player + 1) % 2
                    self.agents[other_player].observe(
                        (1 if other_player == 0 else -1) * score, terminal)

            if round_id != -1:
                round_id += 1
                if round_id == 1000 or round_id == 10000 or round_id == 100000 or round_id == 1000000:
                    logs_scores_file = open(filename, "a")
                    logs_scores_file.write(
                        "CHECKPOINT " + str(round_id) + " == " +
                        str(score_history /
                            self.print_and_reset_score_history_threshold) +
                        "\n")
                    logs_scores_file.close()
                if self.print_and_reset_score_history_threshold is not None and \
                        round_id % self.print_and_reset_score_history_threshold == 0:
                    print(score_history /
                          self.print_and_reset_score_history_threshold)
                    if self.prev_history is not None and \
                            score_history[0] == self.prev_history[0] and \
                            score_history[1] == self.prev_history[1] and \
                            score_history[2] == self.prev_history[2]:
                        self.stuck_on_same_score += 1
                    else:
                        self.prev_history = score_history
                        self.stuck_on_same_score = 0
                    if (self.
                            replace_player1_with_commandline_after_similar_results
                            is not None and self.stuck_on_same_score >= self.
                            replace_player1_with_commandline_after_similar_results
                        ):
                        self.agents = (CommandLineAgent(), self.agents[1])
                        self.stuck_on_same_score = 0
                    score_history = np.array((0, 0, 0))
        logs_scores_file.close()
        return tuple(score_history)