Esempio n. 1
0
    def __init__(self, init_model=None):
        self.state = Game_State()
        self.policy_value_net = PolicyValueNet()
        self.game = Blackjack(self.state, self.policy_value_net)

        self.game_batch_num = 30  #相当于更新次数
        self.play_batch_size = 1  #跑多少次去获取batch
        self.batch_size = 8
        self.buffer_size = 512
        self.epochs = 32  #更新多少次
        self.data_buffer = deque(maxlen=self.buffer_size)
        self.learn_rate = 2e-3
        self.lr_multiplier = 1
        self.kl_targ = 0.02
Esempio n. 2
0
    def __init__(self):
        self.BJ = Blackjack(None)
        self.player = Player(None, None, None)
        self.quit_without_save = False

        self.login_options = {
            1: self._registration,
            2: self._sign_in,
            3: self._quit
        }
        self.game_options = {
            1: self._play,
            2: self._deposit,
            3: self._info,
            4: self._sign_out,
            5: self._delete_account,
            6: self._quit
        }
Esempio n. 3
0
class TestBlackjack(unittest.TestCase):
    deck = Deck(BuildDeck().create_deck())
    blackjack = Blackjack(deck)

    def test_data(self):
        self.assertEqual(self.blackjack.deck, self.deck)
        self.assertEqual(self.blackjack.draw_count, 0)
        self.assertEqual(self.blackjack.bet, 0)

    def test_restart(self):
        self.blackjack.draw_count = 5
        self.blackjack.bet = 10
        self.assertEqual(self.blackjack.draw_count, 5)
        self.assertEqual(self.blackjack.bet, 10)

        self.blackjack.restart()

        self.assertIsNone(self.blackjack.deck)
        self.assertEqual(self.blackjack.draw_count, 0)
        self.assertEqual(self.blackjack.bet, 0)
Esempio n. 4
0
def main():
    game = Blackjack()

    while True:
        game.betting_fase()
        game.player_fase()
        if game.player.hand.value > 0 and game.player.hand.value != 21:
            game.dealer_fase()
        game.end_fase()

        print 'Do you want to keep playing? y/n'
        answer = raw_input().lower()
        if answer == 'y':
            game.reset()
        else:
            break
Esempio n. 5
0
# averageOptima = sum(optima) / numGames
# averageRounds = total / numGames
# print "Average win rate: {0:.2f}%".format(100.0 * wins / total)
# print "Average number rounds before bust: {0:.2f}".format(1.0 * averageRounds)
# print "Average maximum money over games: {0:.2f}".format(1.0 * averageOptima)
# print "S.D. in maximum money over games: {0:.2f}".format((1.0 * sum([(i - averageOptima)**2 for i in optima]) / numGames)**0.5)
# print "S.D. in number of rounds over games: {0:.2f}".format((1.0 * sum([(i - averageRounds)**2 for i in numRounds]) / numGames)**0.5)

## Getting win rate for qlearner
trainingRounds = 1000000
file = "qLearningMitBetting1000"
args = {"flags": ["-np", "-cd"], "file": file}
# args = {"flags": ["-np"]}
player = QLearningAgent(0.8, 0.1, 1, **args)
game = Blackjack(8, player, **args)
rounds = 0
player.setTraining(True)
wins = 0
total = 0
while rounds < trainingRounds:
    result = game.playRound()
    if rounds % 1000 == 0:
        print rounds
    rounds += 1

# # Writes best action to file
# s = ""
# for j in xrange(20, 3, -1):
# 	for i in xrange(2, 12):
# 		# print j, i, player.qVals[(((j, False), i), 1)], player.qVals[(((j, False), i), 2)], player.getPolicy(((j, False), i), {1: "hit", 2: "stand"})
Esempio n. 6
0
    def main(self):

        # Buttons for cards in home screen
        blue = Button(black, 145, 295, 110, 160)
        gray = Button(black, 295, 295, 110, 160)
        green = Button(black, 445, 295, 110, 160)

        # Custom Bet Input
        slider = Slider(self.win, 250, 250, 200, 40, min=1, max=500, step=1, handleRadius=25)
        outputText = TextBox(self.win, 315, 325, 70, 50, fontSize=30)

        # Buttons for Bet Selection
        minButton = Button(white, 190, 400, 100, 50, 'MIN')
        maxButton = Button(white, 410, 400, 100, 50, 'MAX')
        customButton = Button(white, 300, 400, 100, 50, "CUSTOM")

        # Buttons for Game Selection
        hitButton = Button(white, 600, 150, 90, 50, 'HIT')
        standButton = Button(white, 600, 250, 90, 50, 'STAND')
        splitButton = Button(white, 600, 350, 90, 50, 'SPLIT')
        doubleButton = Button(white, 600, 450, 90, 50, 'DOUBLE')

        back = ''
        state = 0

        # Game Class
        blackjack = Blackjack()
        user = blackjack.user
        dealer = blackjack.dealer

        run = True
        while run:
            events = pygame.event.get()
            for event in events:
                pos = pygame.mouse.get_pos()
                if event.type == pygame.QUIT:
                    run = False

                self.checkHover(blue, gray, green, minButton, maxButton, customButton, hitButton, standButton, splitButton, doubleButton, pos)

                if event.type == pygame.MOUSEBUTTONDOWN:
                    if state == 0:
                        if blue.isOver(pos):
                            back = pygame.transform.scale(blueCard, (80, 110))
                        elif gray.isOver(pos):
                            back = pygame.transform.scale(grayCard, (80, 110))
                        elif green.isOver(pos):
                            back = pygame.transform.scale(greenCard, (80, 110))
                        else:
                            break
                        state = 1
                        self.fade()
                        blackjack.deckOfCards.shuffle()
                    elif state == 1:
                        bet = 0
                        if minButton.isOver(pos):
                            bet = 1
                        elif maxButton.isOver(pos):
                            bet = 500
                        elif customButton.isOver(pos):
                            bet = slider.getValue()
                        else:
                            break
                        state = 2
                        blackjack.place_bet(bet)
                        blackjack.deal_start_cards()
                        self.display_first_cards(user, dealer, back, blackjack)
                    elif state == 2:
                        if hitButton.isOver(pos):
                            blackjack.hit()
                        elif standButton.isOver(pos):
                            blackjack.stand()
                        elif doubleButton.isOver(pos):
                            blackjack.double()
                        elif splitButton.isOver(pos):
                            pass
                        else:
                            break

            if state == 0:
                self.display_homescreen(blue, gray, green)
            elif state == 1:
                slider.listen(events)
                blackjack.set_status('user')
                self.display_betting(user.balance, slider, outputText, minButton, maxButton, customButton, back)
            elif state == 2:
                self.display_game(blackjack, user, dealer, back, hitButton, standButton, splitButton, doubleButton)
                self.display_status(blackjack)

            stat = blackjack.get_status()
            if stat == 'user':
                blackjack.check_blackjack()
            elif stat == 'dealer':
                self.display_status(blackjack)
                self.dealer_turn(blackjack)
            elif stat == 'reset' or stat == 'over' or stat == 'dealerbust' or stat == 'won' or stat == 'lost':
                state = 1
                self.display_status(blackjack)
                pygame.display.update()
                blackjack.reset_game()
                pygame.time.delay(2500)

            pygame.display.update()
Esempio n. 7
0
class TrainPipeline():
    def __init__(self, init_model=None):
        self.state = Game_State()
        self.policy_value_net = PolicyValueNet()
        self.game = Blackjack(self.state, self.policy_value_net)

        self.game_batch_num = 30  #相当于更新次数
        self.play_batch_size = 1  #跑多少次去获取batch
        self.batch_size = 8
        self.buffer_size = 512
        self.epochs = 32  #更新多少次
        self.data_buffer = deque(maxlen=self.buffer_size)
        self.learn_rate = 2e-3
        self.lr_multiplier = 1
        self.kl_targ = 0.02

    #好像最重要的是start_selfplay,其他保存的好像都可以的了。
    def collect_selfplay_data(self, n_games=1):
        '''
        收集selfplay的数据
        '''
        for _ in range(n_games):  #n_games selfplay的次数
            play_data = self.game.start_self_play(
            )  #开始selfplay,并返回数据给play_data
            play_data = list(play_data)[:]
            self.episode_len = len(play_data)
            self.data_buffer.extend(play_data)  #把selfplay加入大data_buffer

    def run(self, ep):
        #self.policy_value_net.load_ckpt()              #加载原来的参数继续训练
        for i in range(ep):  #训练次数
            self.collect_selfplay_data(self.play_batch_size)

            print("batch i:{}, episode_len:{}".format(i + 1, self.episode_len))
            if len(self.data_buffer) > self.batch_size:
                loss = self.policy_update()
                print(
                    '============================No%i update network SUCCESS==========================================='
                    % (i))
            self.policy_value_net.save_ckpt()

    def policy_update(self):
        """update the policy-value net"""
        #========解压数据============
        mini_batch = random.sample(self.data_buffer, self.batch_size)
        state_batch = [data[0] for data in mini_batch]  # state
        mcts_probs_batch = [data[1] for data in mini_batch]  # probs
        winner_batch = [data[2] for data in mini_batch]  # winner

        #===========================
        #这里好像做了important sampling,直接计算KL_diverges大小,超过一定就早停
        self.policy_value_net.update_param()
        old_probs, old_v = self.policy_value_net.policy_value_old(state_batch)

        #进行epochs次训练
        for _ in range(self.epochs):
            # 开始训练
            loss, value_loss, policy_loss, l2_penalty = self.policy_value_net.train_step(
                state_batch, mcts_probs_batch, winner_batch,
                self.learn_rate * self.lr_multiplier)
            print(
                'total_loss: %f , value_loss: %f , policy_loss: %f , l2_penalty: %f'
                % (loss, value_loss, policy_loss, l2_penalty))
            new_probs, new_v, _ = self.policy_value_net.policy_value(
                state_batch)
            kl = np.mean(
                np.sum(old_probs *
                       (np.log(old_probs + 1e-10) - np.log(new_probs + 1e-10)),
                       axis=1))
            if kl > self.kl_targ * 4:  # early stopping if D_KL diverges badly
                break
        # adaptively adjust the learning rate
        # 根据上次更新的KL_diverges大小,动态调整学习率
        if kl > self.kl_targ * 2 and self.lr_multiplier > 0.1:
            self.lr_multiplier /= 1.5
        elif kl < self.kl_targ / 2 and self.lr_multiplier < 10:
            self.lr_multiplier *= 1.5

        explained_var_old = (1 - np.var(np.array(winner_batch) - old_v) /
                             np.var(np.array(winner_batch)))
        explained_var_new = (1 - np.var(np.array(winner_batch) - new_v) /
                             np.var(np.array(winner_batch)))

        print(("kl:{:.5f},"
               "lr_multiplier:{:.3f},"
               "loss:{},"
               "explained_var_old:{:.3f},"
               "explained_var_new:{:.3f}").format(kl, self.lr_multiplier, loss,
                                                  explained_var_old,
                                                  explained_var_new))
        return loss

    def play_game(self, playtime=1):
        self.policy_value_net.load_ckpt()  #读取神经网络参数
        pure_tree_playre_win_count = 0
        tree_player_win_count = 0

        for _ in range(playtime):
            winner = self.game.start_game()
            if winner == 0:
                print('pure_tree_playre_win!!!')
                pure_tree_playre_win_count += 1
            else:
                print('MCTS_tree_player_win!!!')
                tree_player_win_count += 1

        print('===========Result============')
        print('pure_tree_playre_win: %i  ' % (pure_tree_playre_win_count))
        print('MCTS_tree_player_win: %i  ' % (tree_player_win_count))
        return tree_player_win_count

    def playgame_with_human(self, playtime=1):
        '''
        和真人玩
        '''
        self.policy_value_net.load_ckpt()  #加载参数
        human_win_count = 0
        msts_player_win_count = 0
        for i in range(playtime):
            winner = self.game.start_game_human()
            if winner == 0:
                print('Human_playre_win!!!')
                human_win_count += 1
            else:
                print('MCTS_tree_player_win!!!')
                msts_player_win_count += 1
        print('===========Result============')
        print('Human_playre_win: %i  ' % (human_win_count))
        print('MCTS_player_win: %i  ' % (msts_player_win_count))
Esempio n. 8
0
class Menu:
    # Creats empty Blackjack session and Player
    def __init__(self):
        self.BJ = Blackjack(None)
        self.player = Player(None, None, None)
        self.quit_without_save = False

        self.login_options = {
            1: self._registration,
            2: self._sign_in,
            3: self._quit
        }
        self.game_options = {
            1: self._play,
            2: self._deposit,
            3: self._info,
            4: self._sign_out,
            5: self._delete_account,
            6: self._quit
        }


    # Runs the game
    def run(self):
        print("\n\t\t\t| Welcome to Console-Blackjack-21. |\n")

        while True:
            self._start_menu()
            choice = self._menu_choice(1, 3)

            system('cls||clear')
            action = self.login_options[choice]
            action()

            while True:
                self._game_menu()
                choice = self._menu_choice(1, 6)

                system('cls||clear')
                action = self.game_options[choice]
                action()

                if choice == 4 or choice == 5:
                    break

    # View of "start menu"
    def _start_menu(self):
        print("""
        1. Register
        2. Sign in
        3. Quit
        """)

    # View of "game menu"
    def _game_menu(self):
        print("""
        1. Play
        2. Make deposit
        3. Account information
        4. Sign out
        5. Delete account
        6. Quit
        """)


    # User can make choice between "min" and "max"
    def _menu_choice(self, min, max):
        while True:
            try:
                choice = int(input("$ "))

                if choice < min or choice > max:
                    raise ValueError
            except ValueError:
                print("Your choice must be between {0} and {1}.".format(min, max))
            except Exception:
                print("Something went wrong.")
            else:
                return choice


    # Execute registration method from Registration class
    def _registration(self):
        self.quit_without_save = False
        self.player = Account().make_registration()


    # Execute sign in method from SingIn class
    def _sign_in(self):
        self.quit_without_save = False
        self.player = Account().make_sign_in()


    # Quits the program
    def _quit(self):
        print("\nThank you for playing Blackjack!\n")

        if self.quit_without_save is False:
            Account().save(self.player)

        exit()

    
    # Start Blackjack session
    def _play(self):
        while self.player.money_balance < self.BJ.bet:
            self._deposit()

        if self.BJ.deck is None:
            while True:
                try:
                    self.BJ.bet = int(input("Make your bet per distribution: "))
                    if self.BJ.bet <= 0 or self.BJ.bet > self.player.money_balance:
                        raise ValueError
                except ValueError:
                    print("Enter valid bet.")
                    continue
                except TypeError:
                    print("Your bet must be integer.")
                    continue
                except Exception:
                    print("Something went wrong.")
                    continue

                try:
                    deck_num = int(input("Select number of decks[1-6]: "))
                    if deck_num < 1 or deck_num > 6:
                        raise ValueError
                except ValueError:
                    print("Numbers of decks must be between 1 and 6.")
                except Exception:
                    print("Something went wrong.")
                else:
                    self.BJ.deck = Deck(BuildDeck().create_deck(deck_num))
                    self.BJ.deck.stir()
                    break

        self.player.games += 1
        if self.BJ.play():
            self.player.wins += 1
            self.player.money_balance += self.BJ.bet
            print("\nYou win.")
        else:
            self.player.money_balance -= self.BJ.bet
            if self.player.money_balance < 0:
                self.player.money_balance = 0
            print("\nYou lose.")

    
    # Executes deposit() method from Player class
    def _deposit(self):
        while True:
            try:
                deposit = int(input("Deposit: "))
                if deposit <= 0:
                    raise ValueError
            except ValueError:
                print("Enter valid deposit bigger than zero.")
            except TypeError:
                print("Your deposit must be integer.")
            except Exception:
                print("Something went wrong.")
            else:
                break

        self.player.deposit(deposit)

    
    # Gives information about the player
    def _info(self):
        self.player.print()

    
    # Signing out
    def _sign_out(self):
        Account().save(self.player)
        self.BJ.restart()

    # Delete the account
    def _delete_account(self):
        self.quit_without_save = True
        Account().delete(self.player.username)
        self.BJ.restart()