コード例 #1
0
ファイル: qlearning.py プロジェクト: ydincer/Python-Library
    def train_and_save(self):

        # ______________________________
        # Play random games

        for tgi in range(self._TRAIN_GAME_COUNT):  # TGI = Train Game Index

            # Play
            dojo = Board()
            random_pupil = RandomPlayer(self)
            ai_pupil = AIPlayer(self)
            game_is_active = True
            while game_is_active:
                next_player = dojo.get_next_player()

                if (random.randint(1, 10)) < 8:
                    next_move = random_pupil.get_next_move(dojo)
                else:
                    next_move = ai_pupil.get_next_move(dojo)

                dojo.play(next_player, next_move)
                game_is_active = not dojo.is_game_complete()

            # Evaluate
            self._learn_from_board(dojo)

        # ______________________________
        # Save

        self._save_to_file()
コード例 #2
0
def test_rd_vs_ql_exact_all_fireblast_deck():
    """
    If start_health set to 7, 9 or 15, test player1 win rate should be 0.

    1. if start_health set to 15, Q-learning should learn to
    use three heropowers in turn 1 - 3 and then use all fireblasts.
    or: not use Coin in the first turn, use hero power in second turn, and start to use
    Coin + fireblast in the third turn, and only firefblast afterwards
    2. if start_health set to 7, Q-learning should learn to not use Coin in the
     2nd turn, use heropower in the 4th turn, and use Coin then Fireblast in
     the 6th turn
    3. if start_health set to 9, win rate should also be 0.
    4. if start_health set to 8, test player1 win rate should be around 0.1 - 0.3. No matter how Q-learning
    learns, player1 can play two heropowers in the first three turns by chance, and then
    use fireblast in the fourth turn
    """
    start_health = 8
    gamma = 1.0  # discounting factor
    epsilon = 0.2  # epsilon-greedy
    alpha = 1.0  # learning rate
    deck = constant.all_fireblast_deck
    logger = logging.getLogger('hearthstone')
    logger.addHandler(logging.StreamHandler())
    logger.setLevel(logging.WARNING)
    player1 = RandomPlayer(cls=HeroClass.MAGE,
                           name='player1',
                           start_health=start_health,
                           first_player=True,
                           fix_deck=deck)
    player2 = QLearningPlayer(
        cls=HeroClass.MAGE,
        name='player2',
        start_health=start_health,
        first_player=False,
        fix_deck=deck,
        method='exact',
        gamma=gamma,
        epsilon=epsilon,
        alpha=alpha,
        test=False,
        annotation='all_fireblast_deck_strthl{0}'.format(start_health),
    )
    # train
    match = Match(player1, player2)
    match.play_n_match(n=1000)
    # test
    logger.setLevel(logging.INFO)
    player1.reset(test=True)
    player2.reset(test=True)
    match = Match(player1, player2)
    match.play_n_match(n=0)
コード例 #3
0
ファイル: test_multiprocess_env.py プロジェクト: czxttkl/X-AI
    def output(self, match_num=100):
        start_health = 30
        deck = constant.mage_fix_deck
        logger = logging.getLogger('hearthstone')
        logger.addHandler(logging.StreamHandler())
        logger.setLevel(logging.WARNING)
        player1 = RandomPlayer(cls=HeroClass.MAGE, name='player1', first_player=True,
                               start_health=start_health, fix_deck=deck)
        player2 = RandomPlayer(cls=HeroClass.MAGE, name='player2', first_player=False,
                               start_health=start_health, fix_deck=deck)
        # test
        # logger.setLevel(logging.INFO)
        player1.reset(test=True)
        player2.reset(test=True)
        match = Match(player1, player2)

        start_time = time.time()
        win_results = []
        p = multiprocessing.Pool()
        for win_player in p.imap_unordered(match.play_one_match, range(match_num)):
            win_results.append(win_player.name)
        duration = time.time() - start_time
        player1_win_rate = numpy.mean(numpy.array(win_results) == "player1")

        # print("win result:", win_results)
        print("player1 win result:", player1_win_rate)
        print("duration:", duration)
        return player1_win_rate
コード例 #4
0
def test_rd_vs_ql_dqn_all_fireblast_deck():
    """ test q learningdqn with Deep Q-Network"""
    start_health = 15
    gamma = 1.0  # discounting factor
    epsilon = 0.3  # epsilon-greedy
    alpha = 0.01  # learning rate
    hidden_dim = 50  # hidden unit dimension for 2 hidden layer NN
    deck = constant.all_fireblast_deck
    logger = logging.getLogger('hearthstone')
    logger.addHandler(logging.StreamHandler())
    logger.setLevel(logging.WARNING)
    player1 = RandomPlayer(cls=HeroClass.MAGE,
                           name='player1',
                           first_player=True,
                           start_health=start_health,
                           fix_deck=deck)
    player2 = QLearningPlayer(
        cls=HeroClass.MAGE,
        name='player2',
        first_player=False,
        start_health=start_health,
        fix_deck=deck,
        method='dqn',
        annotation='all_fireblast_deck_strthl{0}'.format(start_health),
        hidden_dim=hidden_dim,
        gamma=gamma,
        epsilon=epsilon,
        alpha=alpha,
        test=False)
    # train
    match = Match(player1, player2)
    match.play_n_match(n=500000)
コード例 #5
0
def create_players():
    players = []
    num_rand = 3
    rand_player = RandomPlayer()
    for i in range(0, num_rand):
        players.append(rand_player)
    for i in range(0, 4 - num_rand):
        players.append(AiPlayer.fromRandom())
    return players
コード例 #6
0
def test_rd_vs_ql_la_all_fireblast_deck():
    """
    test q learning linear approximation with deck=all_fireblast deck.
    However, always observe weight update explosion.
    """
    start_health = 8
    gamma = 0.95  # discounting factor
    epsilon = 0.2  # epsilon-greedy
    alpha = 0.1  # learning rate
    deck = constant.all_fireblast_deck
    logger = logging.getLogger('hearthstone')
    logger.addHandler(logging.StreamHandler())
    logger.setLevel(logging.INFO)
    player1 = RandomPlayer(cls=HeroClass.MAGE,
                           name='player1',
                           first_player=True,
                           start_health=start_health,
                           fix_deck=deck)
    player2 = QLearningPlayer(
        cls=HeroClass.MAGE,
        name='player2',
        first_player=False,
        start_health=start_health,
        fix_deck=deck,
        method='linear',
        annotation='_all_fireblast_deck_strthl{0}'.format(start_health),
        degree=1,
        gamma=gamma,
        epsilon=epsilon,
        alpha=alpha,
        test=False)
    # train
    match = Match(player1, player2)
    match.play_n_match(n=10)
    # test
    logger.setLevel(logging.INFO)
    player1.reset(test=True)
    player2.reset(test=True)
    match = Match(player1, player2)
    match.play_n_match(n=2)
コード例 #7
0
def test_rd_vs_ql_exact_mage_fix_deck():
    """
    the test for real game with mage_fix_deck. exact method will failed
    because """
    start_health = 30
    gamma = 1.0  # discounting factor
    epsilon = 0.2  # epsilon-greedy
    alpha = 1.0  # learning rate
    deck = constant.mage_fix_deck
    logger = logging.getLogger('hearthstone')
    logger.addHandler(logging.StreamHandler())
    logger.setLevel(logging.WARNING)
    player1 = RandomPlayer(cls=HeroClass.MAGE,
                           name='player1',
                           first_player=True,
                           start_health=start_health,
                           fix_deck=deck)
    player2 = QLearningPlayer(
        cls=HeroClass.MAGE,
        name='player2',
        first_player=False,
        start_health=start_health,
        fix_deck=deck,
        method='exact',
        annotation='mage_fix_deck_strthl{0}'.format(start_health),
        gamma=gamma,
        epsilon=epsilon,
        alpha=alpha,
        test=False)
    # train
    match = Match(player1, player2)
    match.play_n_match(n=99999999999)
    # test
    logger.setLevel(logging.INFO)
    player1.reset(test=True)
    player2.reset(test=True)
    match = Match(player1, player2)
    match.play_n_match(n=100)
コード例 #8
0
    def get_next_move(self, board: Board) -> Coordinate:

        # AI response

        output = self.ai.get_next_move(board)

        # Fallback: Random

        if output is None:
            output = RandomPlayer().get_next_move(board)

        # Flush

        return output
コード例 #9
0
def test_rd_vs_rd_all_fireblast_deck():
    """ test random vs. random """
    start_health = 30
    deck = constant.mage_fix_deck
    logger = logging.getLogger('hearthstone')
    logger.addHandler(logging.StreamHandler())
    logger.setLevel(logging.WARNING)
    player1 = RandomPlayer(cls=HeroClass.MAGE,
                           name='player1',
                           first_player=True,
                           start_health=start_health,
                           fix_deck=deck)
    player2 = RandomPlayer(cls=HeroClass.MAGE,
                           name='player2',
                           first_player=False,
                           start_health=start_health,
                           fix_deck=deck)
    # test
    # logger.setLevel(logging.INFO)
    player1.reset(test=True)
    player2.reset(test=True)
    match = Match(player1, player2)
    match.play_n_match(n=100)
コード例 #10
0
def matchmake_with_randoms(non_random_players):
    players = non_random_players
    while len(non_random_players) < 4:
        players.append(RandomPlayer())
    return players
コード例 #11
0
def play_against_randoms(special_player):
    rand_player = RandomPlayer()
    players = [special_player, rand_player, rand_player, rand_player]
    special_wins = play(players) == 0
    return special_wins
コード例 #12
0
def play_with_randomizer():
    TicTacToeGUI(RandomPlayer())
コード例 #13
0
    """ test random vs. random """
    match, idx = arg
    return match.play_one_match(idx).name


if __name__ == "__main__":
    match_num = 6000

    start_health = 30
    deck = constant.mage_fix_deck
    logger = logging.getLogger('hearthstone')
    logger.addHandler(logging.StreamHandler())
    logger.setLevel(logging.WARNING)
    player1 = RandomPlayer(cls=HeroClass.MAGE,
                           name='player1',
                           first_player=True,
                           start_health=start_health,
                           fix_deck=deck)
    player2 = RandomPlayer(cls=HeroClass.MAGE,
                           name='player2',
                           first_player=False,
                           start_health=start_health,
                           fix_deck=deck)
    # test
    # logger.setLevel(logging.INFO)
    player1.reset(test=True)
    player2.reset(test=True)
    match = Match(player1, player2)

    start_time = time.time()
    win_results = []