def train_and_save(self): # ______________________________ # Play random games for tgi in range(self._TRAIN_GAME_COUNT): # TGI = Train Game Index # Play dojo = Board() random_pupil = RandomPlayer(self) ai_pupil = AIPlayer(self) game_is_active = True while game_is_active: next_player = dojo.get_next_player() if (random.randint(1, 10)) < 8: next_move = random_pupil.get_next_move(dojo) else: next_move = ai_pupil.get_next_move(dojo) dojo.play(next_player, next_move) game_is_active = not dojo.is_game_complete() # Evaluate self._learn_from_board(dojo) # ______________________________ # Save self._save_to_file()
def test_rd_vs_ql_exact_all_fireblast_deck(): """ If start_health set to 7, 9 or 15, test player1 win rate should be 0. 1. if start_health set to 15, Q-learning should learn to use three heropowers in turn 1 - 3 and then use all fireblasts. or: not use Coin in the first turn, use hero power in second turn, and start to use Coin + fireblast in the third turn, and only firefblast afterwards 2. if start_health set to 7, Q-learning should learn to not use Coin in the 2nd turn, use heropower in the 4th turn, and use Coin then Fireblast in the 6th turn 3. if start_health set to 9, win rate should also be 0. 4. if start_health set to 8, test player1 win rate should be around 0.1 - 0.3. No matter how Q-learning learns, player1 can play two heropowers in the first three turns by chance, and then use fireblast in the fourth turn """ start_health = 8 gamma = 1.0 # discounting factor epsilon = 0.2 # epsilon-greedy alpha = 1.0 # learning rate deck = constant.all_fireblast_deck logger = logging.getLogger('hearthstone') logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.WARNING) player1 = RandomPlayer(cls=HeroClass.MAGE, name='player1', start_health=start_health, first_player=True, fix_deck=deck) player2 = QLearningPlayer( cls=HeroClass.MAGE, name='player2', start_health=start_health, first_player=False, fix_deck=deck, method='exact', gamma=gamma, epsilon=epsilon, alpha=alpha, test=False, annotation='all_fireblast_deck_strthl{0}'.format(start_health), ) # train match = Match(player1, player2) match.play_n_match(n=1000) # test logger.setLevel(logging.INFO) player1.reset(test=True) player2.reset(test=True) match = Match(player1, player2) match.play_n_match(n=0)
def output(self, match_num=100): start_health = 30 deck = constant.mage_fix_deck logger = logging.getLogger('hearthstone') logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.WARNING) player1 = RandomPlayer(cls=HeroClass.MAGE, name='player1', first_player=True, start_health=start_health, fix_deck=deck) player2 = RandomPlayer(cls=HeroClass.MAGE, name='player2', first_player=False, start_health=start_health, fix_deck=deck) # test # logger.setLevel(logging.INFO) player1.reset(test=True) player2.reset(test=True) match = Match(player1, player2) start_time = time.time() win_results = [] p = multiprocessing.Pool() for win_player in p.imap_unordered(match.play_one_match, range(match_num)): win_results.append(win_player.name) duration = time.time() - start_time player1_win_rate = numpy.mean(numpy.array(win_results) == "player1") # print("win result:", win_results) print("player1 win result:", player1_win_rate) print("duration:", duration) return player1_win_rate
def test_rd_vs_ql_dqn_all_fireblast_deck(): """ test q learningdqn with Deep Q-Network""" start_health = 15 gamma = 1.0 # discounting factor epsilon = 0.3 # epsilon-greedy alpha = 0.01 # learning rate hidden_dim = 50 # hidden unit dimension for 2 hidden layer NN deck = constant.all_fireblast_deck logger = logging.getLogger('hearthstone') logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.WARNING) player1 = RandomPlayer(cls=HeroClass.MAGE, name='player1', first_player=True, start_health=start_health, fix_deck=deck) player2 = QLearningPlayer( cls=HeroClass.MAGE, name='player2', first_player=False, start_health=start_health, fix_deck=deck, method='dqn', annotation='all_fireblast_deck_strthl{0}'.format(start_health), hidden_dim=hidden_dim, gamma=gamma, epsilon=epsilon, alpha=alpha, test=False) # train match = Match(player1, player2) match.play_n_match(n=500000)
def create_players(): players = [] num_rand = 3 rand_player = RandomPlayer() for i in range(0, num_rand): players.append(rand_player) for i in range(0, 4 - num_rand): players.append(AiPlayer.fromRandom()) return players
def test_rd_vs_ql_la_all_fireblast_deck(): """ test q learning linear approximation with deck=all_fireblast deck. However, always observe weight update explosion. """ start_health = 8 gamma = 0.95 # discounting factor epsilon = 0.2 # epsilon-greedy alpha = 0.1 # learning rate deck = constant.all_fireblast_deck logger = logging.getLogger('hearthstone') logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.INFO) player1 = RandomPlayer(cls=HeroClass.MAGE, name='player1', first_player=True, start_health=start_health, fix_deck=deck) player2 = QLearningPlayer( cls=HeroClass.MAGE, name='player2', first_player=False, start_health=start_health, fix_deck=deck, method='linear', annotation='_all_fireblast_deck_strthl{0}'.format(start_health), degree=1, gamma=gamma, epsilon=epsilon, alpha=alpha, test=False) # train match = Match(player1, player2) match.play_n_match(n=10) # test logger.setLevel(logging.INFO) player1.reset(test=True) player2.reset(test=True) match = Match(player1, player2) match.play_n_match(n=2)
def test_rd_vs_ql_exact_mage_fix_deck(): """ the test for real game with mage_fix_deck. exact method will failed because """ start_health = 30 gamma = 1.0 # discounting factor epsilon = 0.2 # epsilon-greedy alpha = 1.0 # learning rate deck = constant.mage_fix_deck logger = logging.getLogger('hearthstone') logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.WARNING) player1 = RandomPlayer(cls=HeroClass.MAGE, name='player1', first_player=True, start_health=start_health, fix_deck=deck) player2 = QLearningPlayer( cls=HeroClass.MAGE, name='player2', first_player=False, start_health=start_health, fix_deck=deck, method='exact', annotation='mage_fix_deck_strthl{0}'.format(start_health), gamma=gamma, epsilon=epsilon, alpha=alpha, test=False) # train match = Match(player1, player2) match.play_n_match(n=99999999999) # test logger.setLevel(logging.INFO) player1.reset(test=True) player2.reset(test=True) match = Match(player1, player2) match.play_n_match(n=100)
def get_next_move(self, board: Board) -> Coordinate: # AI response output = self.ai.get_next_move(board) # Fallback: Random if output is None: output = RandomPlayer().get_next_move(board) # Flush return output
def test_rd_vs_rd_all_fireblast_deck(): """ test random vs. random """ start_health = 30 deck = constant.mage_fix_deck logger = logging.getLogger('hearthstone') logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.WARNING) player1 = RandomPlayer(cls=HeroClass.MAGE, name='player1', first_player=True, start_health=start_health, fix_deck=deck) player2 = RandomPlayer(cls=HeroClass.MAGE, name='player2', first_player=False, start_health=start_health, fix_deck=deck) # test # logger.setLevel(logging.INFO) player1.reset(test=True) player2.reset(test=True) match = Match(player1, player2) match.play_n_match(n=100)
def matchmake_with_randoms(non_random_players): players = non_random_players while len(non_random_players) < 4: players.append(RandomPlayer()) return players
def play_against_randoms(special_player): rand_player = RandomPlayer() players = [special_player, rand_player, rand_player, rand_player] special_wins = play(players) == 0 return special_wins
def play_with_randomizer(): TicTacToeGUI(RandomPlayer())
""" test random vs. random """ match, idx = arg return match.play_one_match(idx).name if __name__ == "__main__": match_num = 6000 start_health = 30 deck = constant.mage_fix_deck logger = logging.getLogger('hearthstone') logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.WARNING) player1 = RandomPlayer(cls=HeroClass.MAGE, name='player1', first_player=True, start_health=start_health, fix_deck=deck) player2 = RandomPlayer(cls=HeroClass.MAGE, name='player2', first_player=False, start_health=start_health, fix_deck=deck) # test # logger.setLevel(logging.INFO) player1.reset(test=True) player2.reset(test=True) match = Match(player1, player2) start_time = time.time() win_results = []