예제 #1
0
def train(debug, iterations, table):
    game = TicTacToe()
    ai1 = Agent(True, table)
    ai2 = Agent(True, table)

    try:
        while True:
            if iterations == 0:
                table.save_q_table()
                break

            if iterations > 0:
                #if iterations < 50:
                #    debug = True

                iterations -= 1
                if iterations % 100_000 == 0:
                    print(iterations)
                    print("q_table len", len(table.table))

                if game.is_board_full() or game.get_winner():
                    if game.get_winner() is Player.ONE:
                        ai1.reward(1)
                        ai2.reward(0)
                    elif game.get_winner() is Player.TWO:
                        ai1.reward(0)
                        ai2.reward(1)
                    else:
                        ai1.reward(0.1)
                        ai2.reward(0.5)
                    game.reset()

                    ai1.reset_history()
                    ai2.reset_history()
                    continue

                ai1.iterate(game, print_q=debug) if game.get_player(
                ) == Player.ONE else ai2.iterate(game)
                if debug:
                    print()
                    print(game.get_hash())
                    game.print_board()
                    time.sleep(0.1)

                continue
    except KeyboardInterrupt:
        table.save_q_table()
        exit()
예제 #2
0
    def iterate(self,
                game: TicTacToe,
                train: bool = False,
                print_q: bool = False):
        moves = game.get_legal_moves()
        move = self.random_move(moves)
        field_hash = game.get_hash()

        if self.ai:
            move = self.get_optimal_move(moves, field_hash, print_q, train)
            self.history = [{
                "hash": field_hash,
                "move": str(move)
            }] + self.history

        game.input(int(move))