def __init__(self, init_model=None): self.state = Game_State() self.policy_value_net = PolicyValueNet() self.game = Blackjack(self.state, self.policy_value_net) self.game_batch_num = 30 #相当于更新次数 self.play_batch_size = 1 #跑多少次去获取batch self.batch_size = 8 self.buffer_size = 512 self.epochs = 32 #更新多少次 self.data_buffer = deque(maxlen=self.buffer_size) self.learn_rate = 2e-3 self.lr_multiplier = 1 self.kl_targ = 0.02
def __init__(self): self.BJ = Blackjack(None) self.player = Player(None, None, None) self.quit_without_save = False self.login_options = { 1: self._registration, 2: self._sign_in, 3: self._quit } self.game_options = { 1: self._play, 2: self._deposit, 3: self._info, 4: self._sign_out, 5: self._delete_account, 6: self._quit }
class TestBlackjack(unittest.TestCase): deck = Deck(BuildDeck().create_deck()) blackjack = Blackjack(deck) def test_data(self): self.assertEqual(self.blackjack.deck, self.deck) self.assertEqual(self.blackjack.draw_count, 0) self.assertEqual(self.blackjack.bet, 0) def test_restart(self): self.blackjack.draw_count = 5 self.blackjack.bet = 10 self.assertEqual(self.blackjack.draw_count, 5) self.assertEqual(self.blackjack.bet, 10) self.blackjack.restart() self.assertIsNone(self.blackjack.deck) self.assertEqual(self.blackjack.draw_count, 0) self.assertEqual(self.blackjack.bet, 0)
def main(): game = Blackjack() while True: game.betting_fase() game.player_fase() if game.player.hand.value > 0 and game.player.hand.value != 21: game.dealer_fase() game.end_fase() print 'Do you want to keep playing? y/n' answer = raw_input().lower() if answer == 'y': game.reset() else: break
# averageOptima = sum(optima) / numGames # averageRounds = total / numGames # print "Average win rate: {0:.2f}%".format(100.0 * wins / total) # print "Average number rounds before bust: {0:.2f}".format(1.0 * averageRounds) # print "Average maximum money over games: {0:.2f}".format(1.0 * averageOptima) # print "S.D. in maximum money over games: {0:.2f}".format((1.0 * sum([(i - averageOptima)**2 for i in optima]) / numGames)**0.5) # print "S.D. in number of rounds over games: {0:.2f}".format((1.0 * sum([(i - averageRounds)**2 for i in numRounds]) / numGames)**0.5) ## Getting win rate for qlearner trainingRounds = 1000000 file = "qLearningMitBetting1000" args = {"flags": ["-np", "-cd"], "file": file} # args = {"flags": ["-np"]} player = QLearningAgent(0.8, 0.1, 1, **args) game = Blackjack(8, player, **args) rounds = 0 player.setTraining(True) wins = 0 total = 0 while rounds < trainingRounds: result = game.playRound() if rounds % 1000 == 0: print rounds rounds += 1 # # Writes best action to file # s = "" # for j in xrange(20, 3, -1): # for i in xrange(2, 12): # # print j, i, player.qVals[(((j, False), i), 1)], player.qVals[(((j, False), i), 2)], player.getPolicy(((j, False), i), {1: "hit", 2: "stand"})
def main(self): # Buttons for cards in home screen blue = Button(black, 145, 295, 110, 160) gray = Button(black, 295, 295, 110, 160) green = Button(black, 445, 295, 110, 160) # Custom Bet Input slider = Slider(self.win, 250, 250, 200, 40, min=1, max=500, step=1, handleRadius=25) outputText = TextBox(self.win, 315, 325, 70, 50, fontSize=30) # Buttons for Bet Selection minButton = Button(white, 190, 400, 100, 50, 'MIN') maxButton = Button(white, 410, 400, 100, 50, 'MAX') customButton = Button(white, 300, 400, 100, 50, "CUSTOM") # Buttons for Game Selection hitButton = Button(white, 600, 150, 90, 50, 'HIT') standButton = Button(white, 600, 250, 90, 50, 'STAND') splitButton = Button(white, 600, 350, 90, 50, 'SPLIT') doubleButton = Button(white, 600, 450, 90, 50, 'DOUBLE') back = '' state = 0 # Game Class blackjack = Blackjack() user = blackjack.user dealer = blackjack.dealer run = True while run: events = pygame.event.get() for event in events: pos = pygame.mouse.get_pos() if event.type == pygame.QUIT: run = False self.checkHover(blue, gray, green, minButton, maxButton, customButton, hitButton, standButton, splitButton, doubleButton, pos) if event.type == pygame.MOUSEBUTTONDOWN: if state == 0: if blue.isOver(pos): back = pygame.transform.scale(blueCard, (80, 110)) elif gray.isOver(pos): back = pygame.transform.scale(grayCard, (80, 110)) elif green.isOver(pos): back = pygame.transform.scale(greenCard, (80, 110)) else: break state = 1 self.fade() blackjack.deckOfCards.shuffle() elif state == 1: bet = 0 if minButton.isOver(pos): bet = 1 elif maxButton.isOver(pos): bet = 500 elif customButton.isOver(pos): bet = slider.getValue() else: break state = 2 blackjack.place_bet(bet) blackjack.deal_start_cards() self.display_first_cards(user, dealer, back, blackjack) elif state == 2: if hitButton.isOver(pos): blackjack.hit() elif standButton.isOver(pos): blackjack.stand() elif doubleButton.isOver(pos): blackjack.double() elif splitButton.isOver(pos): pass else: break if state == 0: self.display_homescreen(blue, gray, green) elif state == 1: slider.listen(events) blackjack.set_status('user') self.display_betting(user.balance, slider, outputText, minButton, maxButton, customButton, back) elif state == 2: self.display_game(blackjack, user, dealer, back, hitButton, standButton, splitButton, doubleButton) self.display_status(blackjack) stat = blackjack.get_status() if stat == 'user': blackjack.check_blackjack() elif stat == 'dealer': self.display_status(blackjack) self.dealer_turn(blackjack) elif stat == 'reset' or stat == 'over' or stat == 'dealerbust' or stat == 'won' or stat == 'lost': state = 1 self.display_status(blackjack) pygame.display.update() blackjack.reset_game() pygame.time.delay(2500) pygame.display.update()
class TrainPipeline(): def __init__(self, init_model=None): self.state = Game_State() self.policy_value_net = PolicyValueNet() self.game = Blackjack(self.state, self.policy_value_net) self.game_batch_num = 30 #相当于更新次数 self.play_batch_size = 1 #跑多少次去获取batch self.batch_size = 8 self.buffer_size = 512 self.epochs = 32 #更新多少次 self.data_buffer = deque(maxlen=self.buffer_size) self.learn_rate = 2e-3 self.lr_multiplier = 1 self.kl_targ = 0.02 #好像最重要的是start_selfplay,其他保存的好像都可以的了。 def collect_selfplay_data(self, n_games=1): ''' 收集selfplay的数据 ''' for _ in range(n_games): #n_games selfplay的次数 play_data = self.game.start_self_play( ) #开始selfplay,并返回数据给play_data play_data = list(play_data)[:] self.episode_len = len(play_data) self.data_buffer.extend(play_data) #把selfplay加入大data_buffer def run(self, ep): #self.policy_value_net.load_ckpt() #加载原来的参数继续训练 for i in range(ep): #训练次数 self.collect_selfplay_data(self.play_batch_size) print("batch i:{}, episode_len:{}".format(i + 1, self.episode_len)) if len(self.data_buffer) > self.batch_size: loss = self.policy_update() print( '============================No%i update network SUCCESS===========================================' % (i)) self.policy_value_net.save_ckpt() def policy_update(self): """update the policy-value net""" #========解压数据============ mini_batch = random.sample(self.data_buffer, self.batch_size) state_batch = [data[0] for data in mini_batch] # state mcts_probs_batch = [data[1] for data in mini_batch] # probs winner_batch = [data[2] for data in mini_batch] # winner #=========================== #这里好像做了important sampling,直接计算KL_diverges大小,超过一定就早停 self.policy_value_net.update_param() old_probs, old_v = self.policy_value_net.policy_value_old(state_batch) #进行epochs次训练 for _ in range(self.epochs): # 开始训练 loss, value_loss, policy_loss, l2_penalty = self.policy_value_net.train_step( state_batch, mcts_probs_batch, winner_batch, self.learn_rate * self.lr_multiplier) print( 'total_loss: %f , value_loss: %f , policy_loss: %f , l2_penalty: %f' % (loss, value_loss, policy_loss, l2_penalty)) new_probs, new_v, _ = self.policy_value_net.policy_value( state_batch) kl = np.mean( np.sum(old_probs * (np.log(old_probs + 1e-10) - np.log(new_probs + 1e-10)), axis=1)) if kl > self.kl_targ * 4: # early stopping if D_KL diverges badly break # adaptively adjust the learning rate # 根据上次更新的KL_diverges大小,动态调整学习率 if kl > self.kl_targ * 2 and self.lr_multiplier > 0.1: self.lr_multiplier /= 1.5 elif kl < self.kl_targ / 2 and self.lr_multiplier < 10: self.lr_multiplier *= 1.5 explained_var_old = (1 - np.var(np.array(winner_batch) - old_v) / np.var(np.array(winner_batch))) explained_var_new = (1 - np.var(np.array(winner_batch) - new_v) / np.var(np.array(winner_batch))) print(("kl:{:.5f}," "lr_multiplier:{:.3f}," "loss:{}," "explained_var_old:{:.3f}," "explained_var_new:{:.3f}").format(kl, self.lr_multiplier, loss, explained_var_old, explained_var_new)) return loss def play_game(self, playtime=1): self.policy_value_net.load_ckpt() #读取神经网络参数 pure_tree_playre_win_count = 0 tree_player_win_count = 0 for _ in range(playtime): winner = self.game.start_game() if winner == 0: print('pure_tree_playre_win!!!') pure_tree_playre_win_count += 1 else: print('MCTS_tree_player_win!!!') tree_player_win_count += 1 print('===========Result============') print('pure_tree_playre_win: %i ' % (pure_tree_playre_win_count)) print('MCTS_tree_player_win: %i ' % (tree_player_win_count)) return tree_player_win_count def playgame_with_human(self, playtime=1): ''' 和真人玩 ''' self.policy_value_net.load_ckpt() #加载参数 human_win_count = 0 msts_player_win_count = 0 for i in range(playtime): winner = self.game.start_game_human() if winner == 0: print('Human_playre_win!!!') human_win_count += 1 else: print('MCTS_tree_player_win!!!') msts_player_win_count += 1 print('===========Result============') print('Human_playre_win: %i ' % (human_win_count)) print('MCTS_player_win: %i ' % (msts_player_win_count))
class Menu: # Creats empty Blackjack session and Player def __init__(self): self.BJ = Blackjack(None) self.player = Player(None, None, None) self.quit_without_save = False self.login_options = { 1: self._registration, 2: self._sign_in, 3: self._quit } self.game_options = { 1: self._play, 2: self._deposit, 3: self._info, 4: self._sign_out, 5: self._delete_account, 6: self._quit } # Runs the game def run(self): print("\n\t\t\t| Welcome to Console-Blackjack-21. |\n") while True: self._start_menu() choice = self._menu_choice(1, 3) system('cls||clear') action = self.login_options[choice] action() while True: self._game_menu() choice = self._menu_choice(1, 6) system('cls||clear') action = self.game_options[choice] action() if choice == 4 or choice == 5: break # View of "start menu" def _start_menu(self): print(""" 1. Register 2. Sign in 3. Quit """) # View of "game menu" def _game_menu(self): print(""" 1. Play 2. Make deposit 3. Account information 4. Sign out 5. Delete account 6. Quit """) # User can make choice between "min" and "max" def _menu_choice(self, min, max): while True: try: choice = int(input("$ ")) if choice < min or choice > max: raise ValueError except ValueError: print("Your choice must be between {0} and {1}.".format(min, max)) except Exception: print("Something went wrong.") else: return choice # Execute registration method from Registration class def _registration(self): self.quit_without_save = False self.player = Account().make_registration() # Execute sign in method from SingIn class def _sign_in(self): self.quit_without_save = False self.player = Account().make_sign_in() # Quits the program def _quit(self): print("\nThank you for playing Blackjack!\n") if self.quit_without_save is False: Account().save(self.player) exit() # Start Blackjack session def _play(self): while self.player.money_balance < self.BJ.bet: self._deposit() if self.BJ.deck is None: while True: try: self.BJ.bet = int(input("Make your bet per distribution: ")) if self.BJ.bet <= 0 or self.BJ.bet > self.player.money_balance: raise ValueError except ValueError: print("Enter valid bet.") continue except TypeError: print("Your bet must be integer.") continue except Exception: print("Something went wrong.") continue try: deck_num = int(input("Select number of decks[1-6]: ")) if deck_num < 1 or deck_num > 6: raise ValueError except ValueError: print("Numbers of decks must be between 1 and 6.") except Exception: print("Something went wrong.") else: self.BJ.deck = Deck(BuildDeck().create_deck(deck_num)) self.BJ.deck.stir() break self.player.games += 1 if self.BJ.play(): self.player.wins += 1 self.player.money_balance += self.BJ.bet print("\nYou win.") else: self.player.money_balance -= self.BJ.bet if self.player.money_balance < 0: self.player.money_balance = 0 print("\nYou lose.") # Executes deposit() method from Player class def _deposit(self): while True: try: deposit = int(input("Deposit: ")) if deposit <= 0: raise ValueError except ValueError: print("Enter valid deposit bigger than zero.") except TypeError: print("Your deposit must be integer.") except Exception: print("Something went wrong.") else: break self.player.deposit(deposit) # Gives information about the player def _info(self): self.player.print() # Signing out def _sign_out(self): Account().save(self.player) self.BJ.restart() # Delete the account def _delete_account(self): self.quit_without_save = True Account().delete(self.player.username) self.BJ.restart()