Esempio n. 1
0
    def run(
        self,
        max_rounds: int = -1,
        initial_game_state: TicTacToeGameState = TicTacToeGameState()
    ) -> 'Tuple[float]':
        round_id = 0

        accumulated_scores = np.array([0.0, 0.0])
        while round_id < max_rounds or round_id == -1:
            gs = initial_game_state.copy_game_state()
            scores, terminal = gs.get_current_scores()

            while not terminal:
                current_player = gs.get_current_player_id()
                action_ids = gs.get_available_actions_id_for_player(
                    current_player)
                info_state = gs.get_information_state_for_player(
                    current_player)
                action = self.agents[current_player].act(
                    current_player, info_state, action_ids)

                (gs, score, terminal) = gs.step(current_player, action)
                self.agents[current_player].observe(
                    (1 if current_player == 0 else -1) * score, terminal)

                scores, terminal = gs.get_current_scores()

            accumulated_scores += scores
            round_id += 1

        return tuple(accumulated_scores)
    def run(
        self,
        max_rounds: int = -1,
        initial_game_state: TicTacToeGameState = TicTacToeGameState()
    ) -> 'Tuple[float]':
        round_id = 0

        score_history = np.array((0, 0, 0))
        while round_id < max_rounds or round_id == -1:
            gs = initial_game_state.copy_game_state()
            terminal = False
            while not terminal:
                current_player = gs.get_current_player_id()
                action_ids = gs.get_available_actions_id_for_player(
                    current_player)
                info_state = gs.get_information_state_for_player(
                    current_player)
                action = self.agents[current_player].act(
                    current_player, info_state, action_ids)

                (gs, score, terminal) = gs.step(current_player, action)
                self.agents[current_player].observe(
                    (1 if current_player == 0 else -1) * score, terminal)

                if terminal:
                    score_history += (1 if score == 1 else 0,
                                      1 if score == -1 else 0,
                                      1 if score == 0 else 0)
                    other_player = (current_player + 1) % 2
                    self.agents[other_player].observe(
                        (1 if other_player == 0 else -1) * score, terminal)

            if round_id != -1:
                round_id += 1
                if self.print_and_reset_score_history_threshold is not None and \
                        round_id % self.print_and_reset_score_history_threshold == 0:
                    print(score_history /
                          self.print_and_reset_score_history_threshold)
                    if self.prev_history is not None and \
                            score_history[0] == self.prev_history[0] and \
                            score_history[1] == self.prev_history[1] and \
                            score_history[2] == self.prev_history[2]:
                        self.stuck_on_same_score += 1
                    else:
                        self.prev_history = score_history
                        self.stuck_on_same_score = 0
                    if (self.
                            replace_player1_with_commandline_after_similar_results
                            is not None and self.stuck_on_same_score >= self.
                            replace_player1_with_commandline_after_similar_results
                        ):
                        self.agents = (CommandLineAgent(), self.agents[1])
                        self.stuck_on_same_score = 0
        return tuple(score_history)
    def test_diagonal_wins_for_player_1(self):
        gs = self.gs
        gs.step(0, 1)
        gs.step(1, 0)
        gs.step(0, 2)
        gs.step(1, 4)
        gs.step(0, 5)
        _, rew, term = gs.step(1, 8)
        assert (rew, term) == (-1, True)

        gs = TicTacToeGameState()
        gs.step(0, 0)
        gs.step(1, 2)
        gs.step(0, 1)
        gs.step(1, 4)
        gs.step(0, 5)
        _, rew, term = gs.step(1, 6)
        assert (rew, term) == (-1, True)
 def test_lines_wins_for_player_0(self):
     gs = self.gs
     for i in range(3):
         gs = TicTacToeGameState()
         gs.step(0, i * 3 + 0)
         gs.step(1, ((i + 1) % 3) * 3 + 0)
         gs.step(0, i * 3 + 1)
         gs.step(1, ((i + 1) % 3) * 3 + 1)
         _, rew, term = gs.step(0, i * 3 + 2)
         assert (rew, term) == (1, True)
 def setUp(self):
     self.gs = TicTacToeGameState()
 def test_columns_wins_for_player_1(self):
     gs = self.gs
     for i in range(3):
         gs = TicTacToeGameState()
         gs.step(0, i + 0)
         gs.step(1, ((i + 1) % 3) + 0)
         gs.step(0, i + 1 * 3)
         gs.step(1, ((i + 1) % 3) + 1 * 3)
         gs.step(0, (i + 2) % 3 + 0)
         _, rew, term = gs.step(1, ((i + 1) % 3) + 2 * 3)
         assert (rew, term) == (-1, True)
Esempio n. 7
0
    def run(self, max_rounds: int = -1,
            initial_game_state: TicTacToeGameState = TicTacToeGameState()) -> 'Tuple[float]':
        round_id = 0

        score_history = np.array((0, 0, 0))
        while round_id < max_rounds or round_id == -1:
            if round_id == 0:
                gs = initial_game_state.copy_game_state()
            else:
                gs.newGameState()
            terminal = False
            while not terminal:
                current_player = gs.get_current_player_id()
                action_ids = gs.get_available_actions_id_for_player(current_player)
                info_state = gs.get_information_state_for_player(current_player)
                action_time = time()
                action = self.agents[current_player].act(current_player,
                                                         info_state,
                                                         action_ids)
                self.mean_action_duration_sum[current_player] += time() - action_time

                # WARNING : Two Players Zero Sum Game Hypothesis
                (gs, score, terminal) = gs.step(current_player, action)
                self.agents[current_player].observe(
                    (1 if current_player == 0 else -1) * score,
                    terminal)

                if terminal:
                    score_history += (1 if score == 1 else 0, 1 if score == -1 else 0, 1 if score == 0 else 0)
                    other_player = (current_player + 1) % 2
                    self.agents[other_player].observe(
                        (1 if other_player == 0 else -1) * score,
                        terminal)

            if round_id != -1:
                round_id += 1
                if self.log_and_reset_score_history_threshold is not None and \
                        round_id % self.log_and_reset_score_history_threshold == 0:
                    # print(score_history / self.log_and_reset_score_history_threshold)
                    if self.prev_history is not None and \
                            score_history[0] == self.prev_history[0] and \
                            score_history[1] == self.prev_history[1] and \
                            score_history[2] == self.prev_history[2]:
                        self.stuck_on_same_score += 1
                    else:
                        self.prev_history = score_history
                        self.stuck_on_same_score = 0
                    if (self.replace_player1_with_commandline_after_similar_results is not None and
                            self.stuck_on_same_score >= self.replace_player1_with_commandline_after_similar_results):
                        self.agents = (CommandLineAgent(), self.agents[1])
                        self.stuck_on_same_score = 0

                    self.writerTimeAgent1.add_summary(tf.Summary(
                        value=[tf.Summary.Value(tag="Time",
                                                simple_value=self.mean_action_duration_sum[0] / round_id)],
                    ), round_id)
                    self.writerTimeAgent1.flush()
                    self.writerTimeAgent2.add_summary(tf.Summary(
                        value=[tf.Summary.Value(tag="Time",
                                                simple_value=self.mean_action_duration_sum[1] / round_id)],
                    ), round_id)
                    self.writerTimeAgent2.flush()
                    self.writerAgent1.add_summary(tf.Summary(
                        value=[tf.Summary.Value(tag="Score", simple_value=score_history[0] / self.log_and_reset_score_history_threshold)],
                    ), round_id)
                    self.writerAgent1.flush()
                    self.writerAgent2.add_summary(tf.Summary(
                        value=[tf.Summary.Value(tag="Score", simple_value=score_history[1] / self.log_and_reset_score_history_threshold)],
                    ), round_id)
                    self.writerAgent2.flush()
                    self.writerDraw.add_summary(tf.Summary(
                        value=[tf.Summary.Value(tag="Score", simple_value=score_history[2] / self.log_and_reset_score_history_threshold)],
                    ), round_id)
                    self.writerDraw.flush()

                    self.mean_action_duration_sum = {0: 0.0, 1: 0.0}
                    self.mean_accumulated_reward_sum = {0: 0.0, 1: 0.0}
                    score_history = np.array((0, 0, 0))
        return tuple(score_history)
Esempio n. 8
0
    def run(self, max_rounds: int = -1,
            initial_game_state: TicTacToeGameState = TicTacToeGameState()) -> 'Tuple[float]':
        round_id = 0

        score_history = np.array((0, 0, 0))
        while round_id < max_rounds or round_id == -1:
            gs = initial_game_state.copy_game_state()
            terminal = False
            tour = 0
            execution_time = np.array((0.0, 0.0))
            while not terminal:
                current_player = gs.get_current_player_id()
                action_ids = gs.get_available_actions_id_for_player(current_player)
                info_state = gs.get_information_state_for_player(current_player)
                begin = time()
                action = self.agents[current_player].act(current_player,
                                                         info_state,
                                                         action_ids)
                end = time()
                # WARNING : Two Players Zero Sum Game Hypothesis
                (gs, score, terminal) = gs.step(current_player, action)
                self.agents[current_player].observe(
                    (1 if current_player == 0 else -1) * score,
                    terminal)

                execution_time[current_player] += end - begin

                if terminal:
                    score_history += (1 if score == 1 else 0, 1 if score == -1 else 0, 1 if score == 0 else 0)
                    other_player = (current_player + 1) % 2
                    self.agents[other_player].observe(
                        (1 if other_player == 0 else -1) * score,
                        terminal)

            # self.execution_time += execution_time / (tour * 0.5)

            if round_id != -1:
                round_id += 1
                if self.print_and_reset_score_history_threshold is not None and \
                        round_id % self.print_and_reset_score_history_threshold == 0:
                    print(score_history / self.print_and_reset_score_history_threshold)
                    if self.file is not None:
                        score_to_print = score_history / self.print_and_reset_score_history_threshold
                        execution_time_to_print = self.execution_time/self.print_and_reset_score_history_threshold
                        self.file.write(str(score_to_print[0]) + ";" + str(execution_time_to_print[0]) + ";"
                                        + str(score_to_print[1]) + ";" + str(execution_time_to_print[1]) + ";"
                                        + str(score_to_print[2]) + "\n")
                    if self.prev_history is not None and \
                            score_history[0] == self.prev_history[0] and \
                            score_history[1] == self.prev_history[1] and \
                            score_history[2] == self.prev_history[2]:
                        self.stuck_on_same_score += 1
                    else:
                        self.prev_history = score_history
                        self.stuck_on_same_score = 0
                    if (self.replace_player1_with_commandline_after_similar_results is not None and
                            self.stuck_on_same_score >= self.replace_player1_with_commandline_after_similar_results):
                        self.agents = (CommandLineAgent(), self.agents[1])
                        self.stuck_on_same_score = 0
                    score_history = np.array((0, 0, 0))
                    self.execution_time = np.array((0.0, 0.0))
        return tuple(score_history)
Esempio n. 9
0
 def create_game_state_from_information_state(self):
     from games.tictactoe.TicTacToeGameState import TicTacToeGameState
     gs = TicTacToeGameState()
     gs.board = self.board.copy()
     gs.current_player = self.current_player
     return gs
    def run(
        self,
        max_rounds: int = -1,
        initial_game_state: TicTacToeGameState = TicTacToeGameState()
    ) -> 'Tuple[float]':
        round_id = 0
        self.round_duration_sum = 0.0
        self.mean_action_duration_sum = np.array((0.0, 0.0))
        self.score_history = np.array((0, 0, 0))
        score = 0
        current_player = 0

        while round_id < max_rounds or round_id == -1:
            gs = initial_game_state.copy_game_state()
            terminal = False
            round_time = 0.0
            round_step = 0
            self.mean_action_duration = {0: 0.0, 1: 0.0}
            self.action_duration_sum = {0: 0.0, 1: 0.0}
            self.accumulated_reward_sum = {0: 0.0, 1: 0.0}

            while not terminal:
                # print(gs)
                round_time = time.time()
                current_player = gs.get_current_player_id()
                action = 0
                if current_player != -1:
                    action_ids = gs.get_available_actions_id_for_player(
                        current_player)
                    info_state = gs.get_information_state_for_player(
                        current_player)
                    action_time = time.time()
                    action = self.agents[current_player].act(
                        current_player, info_state, action_ids)
                    action_time = time.time() - action_time
                    self.action_duration_sum[current_player] += action_time

                (gs, score, terminal) = gs.step(current_player, action)

                self.agents[current_player].observe(
                    (1 if current_player == 0 else -1) * score, terminal)
                round_step += 1

                if terminal:
                    self.round_duration = time.time() - round_time
                    self.round_duration_sum += self.round_duration
                    self.mean_action_duration = (self.action_duration_sum[0] /
                                                 round_step,
                                                 self.action_duration_sum[1] /
                                                 round_step)
                    self.mean_action_duration_sum += (
                        self.mean_action_duration[0],
                        self.mean_action_duration[1])
                    self.score_history += (1 if score == 1 else 0,
                                           1 if score == -1 else 0,
                                           1 if score == 0 else 0)
                    other_player = (current_player + 1) % 2
                    self.agents[other_player].observe(
                        (1 if other_player == 0 else -1) * score, terminal)

            self.writer.add_summary(
                tf.Summary(value=[
                    tf.Summary.Value(
                        tag="agent1_action_mean_duration",
                        simple_value=self.mean_action_duration[0]),
                    tf.Summary.Value(
                        tag="agent2_action_mean_duration",
                        simple_value=self.mean_action_duration[1]),
                    tf.Summary.Value(tag="round_duration",
                                     simple_value=self.round_duration),
                    tf.Summary.Value(tag="agent1_accumulated_reward",
                                     simple_value=self.score_history[0]),
                    tf.Summary.Value(tag="agent2_accumulated_reward",
                                     simple_value=self.score_history[1])
                ], ), round_id)

            if round_id != -1:
                round_id += 1
                if self.print_and_reset_score_history_threshold is not None and \
                        round_id % self.print_and_reset_score_history_threshold == 0:
                    # print(self.score_history / self.print_and_reset_score_history_threshold)
                    if self.prev_history is not None and \
                            self.score_history[0] == self.prev_history[0] and \
                            self.score_history[1] == self.prev_history[1] and \
                            self.score_history[2] == self.prev_history[2]:
                        self.stuck_on_same_score += 1
                    else:
                        self.prev_history = self.score_history
                        self.stuck_on_same_score = 0
                    if (self.
                            replace_player1_with_commandline_after_similar_results
                            is not None and self.stuck_on_same_score >= self.
                            replace_player1_with_commandline_after_similar_results
                        ):
                        self.agents = (CommandLineAgent(), self.agents[1])
                        self.stuck_on_same_score = 0
        return tuple(self.score_history
                     ), self.round_duration_sum, self.mean_action_duration_sum