def _newGame(self): """Instantiates the model and establishes the GUI""" self._model = Blackjack() #Refresh the card panes #Player Cards self._playerImages = list( map(lambda card: PhotoImage(file=card.getFilename()), self._model.getPlayerCards())) self._playerLabels = list( map(lambda i: Label(self._playerPane, image=i), self._playerImages)) for col in range(len(self._playerLabels)): self._playerLabels[col].grid(row=0, column=col) #Dealer Cards self._dealerImages = list( map(lambda card: PhotoImage(file=card.getFilename()), self._model.getDealerCards())) self._dealerLabels = list( map(lambda i: Label(self._dealerPane, image=i), self._dealerImages)) for col in range(len(self._dealerLabels)): self._dealerLabels[col].grid(row=0, column=col) #Re-enable the buttons and clear the status field self._hitButton["state"] = NORMAL self._passButton["state"] = NORMAL self._statusVar.set("")
def __init__(self, pcard1=0, pcard2=0, pcard3=0, pcard4=0, pcard5=0, dcard1=0, dcard2=0, dcard3=0, dcard4=0, dcard5=0, qdecks=1): self.pcard1 = pcard1 self.pcard2 = pcard2 self.pcard3 = pcard3 self.pcard4 = pcard4 self.pcard5 = pcard5 self.dcard1 = dcard1 self.dcard2 = dcard2 self.dcard3 = dcard3 self.dcard4 = dcard4 self.dcard5 = dcard5 self.qdecks = qdecks self.main() self.blackjack = Blackjack(self.pcard1, self.pcard2, self.pcard3, self.pcard4, self.pcard5, self.dcard1, self.dcard2, self.dcard3, self.dcard4, self.dcard5, self.qdecks) self.inicio_partida()
def test_stand(): bj = Blackjack() bj.player = BlackjackHand([0, 11]) bj.dealer = BlackjackHand([2,3]) bj.stand() assert len(bj.player) == 2 , "Player didn't stand" assert len(bj.dealer) >= 3, "Dealer didn't hit"
def start_game(bot, update): global bjack bjack = Blackjack() c = bjack.pop_card() global user_cards user_cards = [c] update.message.reply_text("Hai tirato su %s" % str(c)) update.message.reply_text("Vuoi contuinuare?")
def test_score_21_with_as_worth_11(self): cards = [ Card(rank='A', suit='♠'), Card(rank='8', suit='♠'), Card(rank='2', suit='♠') ] blackjack = Blackjack() self.assertEqual(21, blackjack.score(cards))
def test_score_21_with_only_numbers(self): cards = [ Card(rank='10', suit='♠'), Card(rank='5', suit='♠'), Card(rank='6', suit='♠') ] blackjack = Blackjack() self.assertEqual(21, blackjack.score(cards))
def test_hit(): bj = Blackjack() bj.player = BlackjackHand([0, 11]) bj.dealer = BlackjackHand([2,3]) bj.hit() assert len(bj.player) == 3, "Player didn't hit" assert len(bj.dealer) == 3, "Dealer didn't hit" assert bj.dealer[2] not in bj.deck assert bj.player[2] not in bj.deck
def _newGame(self): self.destroy() Frame.__init__(self) self.master.title("Blackjack") self.grid() #Add the command buttons self._hitButton = Button(self, text="Hit", command=self._hit) self._hitButton.grid(row=0, column=0) self._passButton = Button(self, text="Pass", command=self._pass) self._passButton.grid(row=0, column=1) self._newGameButton = Button(self, text="New Game", command=self._newGame) self._newGameButton.grid(row=0, column=2) #Add the status field self._statusVar = StringVar() self._statusField = Entry(self, textvariable=self._statusVar) self._statusField.grid(row=1, column=0, columnspan=3) #Add the panes for the player and dealer cards self._playerPane = Frame(self) self._playerPane.grid(row=2, column=0, columnspan=3) self._dealerPane = Frame(self) self._dealerPane.grid(row=3, column=0, columnspan=3) ## self._model = Blackjack() #Refresh the card panes #Player Cards self._playerImages = list( map(lambda card: getCardImage(card), self._model.getPlayerCards())) self._playerLabels = list( map(lambda i: Label(self._playerPane, image=i), self._playerImages)) for col in range(len(self._playerLabels)): self._playerLabels[col].grid(row=0, column=col) #Dealer Cards self._dealerImages = list( map(lambda card: getCardImage(card), self._model.getDealerCards())) self._dealerLabels = list( map(lambda i: Label(self._dealerPane, image=i), self._dealerImages)) for col in range(len(self._dealerLabels)): self._dealerLabels[col].grid(row=0, column=col) #Re-enable the buttons and clear the status field self._hitButton["state"] = NORMAL self._passButton["state"] = NORMAL self._statusVar.set("")
def __init__(self): self.bj = Blackjack() self.root = Tk() self.canvas = Canvas(self.root, width = 900, height = 600) self.canvas.pack() self.canvas.configure(background='green') self.addInputs() self.addButtons() self.addLables() self.addDeck() self.root.mainloop()
def inicio_partida(self): self.pcard1 = input('Digite a sua primeira carta: ') self.pcard2 = input('Digite a sua segunda carta: ') self.dcard1 = input('Digite a carta revelada do Dealer: ') self.qdecks = input('Digite a quantidade de baralhos: ') self.blackjack = Blackjack(self.pcard1, self.pcard2, self.pcard3, self.pcard4, self.pcard5, self.dcard1, self.dcard2, self.dcard3, self.dcard4, self.dcard5, self.qdecks) self.main() self.escolher_sugestao()
def test_check_draw(self): #Testing if the game ends when there's a draw deck = Deck() player = Player("Mikkel") player.hand.append(Card("Hearts", "Ace", 11)) player.hand.append(Card("Spades", "Jack", 10)) dealer = Player("dealer") dealer.hand.append(Card("Clubs", "Ace", 11)) dealer.hand.append(Card("Diamonds", "Jack", 10)) bj = Blackjack(player, dealer, deck) bj.check_who_won() self.assertFalse(bj.game_running, "Game should be over since the game drawed")
def run(): game = Blackjack() print('This is a simple reinforcement learning based blackjack game') train_ai(game) while True: try: userInput = int( input( "Great ! Would you like to play yourself or let the AI do the work ? \n1. AI\n2. Self\n" )) except ValueError: print("Not an integer! Try again.") continue else: if userInput == 1 or userInput == 2: break else: print("Not a valid option ! Please enter 1 or 2") continue if userInput == 1: game.play_ai() else: game.play() while True: print("\n\nCurrent scores:") print("Player: {}".format(game.player_score)) print("Dealer: {}\n\n".format(game.dealer_score)) while True: try: userInput = int( input( "What do we do now ? \n1. AI Play\n2. Self Play\n3. Improve Model\n4. Exit\n" )) except ValueError: print("Not an integer! Try again.") continue else: if userInput >= 1 or userInput <= 4: break else: print("Not a valid option ! Please enter 1 or 2") continue if userInput == 1: game.play_ai() elif userInput == 2: game.play() elif userInput == 3: train_ai(game) elif userInput == 4: break return 0
def test_check_dealer_win( self): #Testing if the game ends if the dealer wins deck = Deck() player = Player("Mikkel") player.hand.append(Card("Hearts", "Ace", 11)) player.hand.append(Card("Spades", 2, 2)) dealer = Player("dealer") dealer.hand.append(Card("Clubs", "Ace", 11)) dealer.hand.append(Card("Diamonds", 7, 7)) bj = Blackjack(player, dealer, deck) bj.check_who_won() self.assertFalse(bj.game_running, "Game should be over since the dealer won")
def test_check_keeps_going( self ): #Testing if the game keeps running even if the dealer has more value than the player (dealer has to keep hitting till he hits atleast 17) deck = Deck() player = Player("Mikkel") player.hand.append(Card("Hearts", "Ace", 11)) player.hand.append(Card("Spades", 2, 2)) dealer = Player("dealer") dealer.hand.append(Card("Clubs", "Ace", 11)) dealer.hand.append(Card("Diamonds", 4, 4)) bj = Blackjack(player, dealer, deck) bj.check_who_won() self.assertTrue( bj.game_running, "Game should keep running since the dealer hasn't hit atleast 17 value yet" )
def iniciarServidor(): # Se carga el diccionario a partir de un archivo files = os.listdir("lenguaje") for f in files: with open(os.path.join("lenguaje", f)) as json_file: name = Path(f).resolve().stem diccionario[name] = json.load(json_file) puerto = 3039 # Se inicia la instancia del juego de Blackjack que es compartida por todos los threads blackGame = Blackjack(diccionario) sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind(('', puerto)) print(crearMensajeLog("Socket bindeado")) sock.listen(5) print(crearMensajeLog("Socket escuchando")) while True: cliente, direccionCliente = sock.accept() print(cliente) print(crearMensajeLog("Nuevo jugador desde: " + direccionCliente[0])) # Se inicia un thread por cada cliente conectado. Este thread tiene acceso a la instancia de blackjack. start_new_thread(inicializarCliente, (cliente, blackGame)) sock.close()
def setUp(self): self.blackjack = Blackjack() self.deck = self.blackjack.deck self.suits = ["♣", "♦", "♥", "♠"] self.numbers = [ "A", "2", "3", "4", "5", "6", "7", "8", "9", "10", "Q", "J", "K" ]
def test_player_hand(self): p_hand = Blackjack.PlayerHand() p_hand.cards.append(1) p_hand.cards.append(2) for a, b in zip(p_hand.cards, [1, 2]): self.assertEqual(a, b) p_hand2 = Blackjack.PlayerHand() p_hand2.cards.append(3) p_hand2.cards.append(4) # There was a bug where the cards were shared between hands due # to the cards in the PlayerHand arguments defaulting to '[]' instead # of None. for a, b in zip(p_hand2.cards, [3, 4]): self.assertEqual(a, b)
def func(args): # # generate epsiode # blackjack = Blackjack() (episode, reward) = blackjack.play() logging.info('{1} -> {2:2d} [ {0} ]'.format(args, blackjack, reward)) # # calculate returns # returns = Returns() for (state, _) in episode: returns[state.ace][state].append(reward) return returns
class BlackjackIntegreationTests(unittest.TestCase): def setup(self): self.blackjack = Blackjack() self.blackjack.player = Hand() self.blackjack.dealer = Hand() def cleanup(self): self.blackjack = None # tests if bust logic works for player hand # if the player busts, then the game is over and the dealer wins # also tests if player has not busted def test_player_busted(self): self.setup() self.blackjack.player.add_to_hand(Card("Spades", "A")) self.blackjack.player.add_to_hand(Card("Hearts", "A")) self.assertFalse(self.blackjack.player_busted()) self.blackjack.player.add_to_hand(Card("Hearts", "K")) self.blackjack.player.add_to_hand(Card("Hearts", "Q")) self.assertTrue(self.blackjack.player_busted()) self.cleanup() # tests if bust logic works for dealer hand # if the dealer busts, then the game is over and the player wins # also tests if dealer as not busted def test_dealer_busted(self): self.setup() self.blackjack.dealer.add_to_hand(Card("Spades", "A")) self.blackjack.dealer.add_to_hand(Card("Hearts", "A")) self.assertFalse(self.blackjack.dealer_busted()) self.blackjack.dealer.add_to_hand(Card("Hearts", "K")) self.blackjack.dealer.add_to_hand(Card("Hearts", "Q")) self.assertTrue(self.blackjack.dealer_busted()) self.cleanup() # tests if check blackjack method properly functions # starts by having player test their hand, first when # it is not a blackjack and then when it is def test_check_blackjack_player(self): self.setup() self.blackjack.player.add_to_hand(Card("Spades", "A")) self.assertEqual((False, False), self.blackjack.check_blackjack()) self.blackjack.player.add_to_hand(Card("Hearts", "Q")) self.assertEqual((True, False), self.blackjack.check_blackjack()) self.cleanup() # same test except for the dealer rather than the player # also test if there is not a black jack def test_check_blackjack_dealer(self): self.setup() self.blackjack.dealer.add_to_hand(Card("Clubs", "A")) self.assertEqual((False, False), self.blackjack.check_blackjack()) self.blackjack.dealer.add_to_hand(Card("Diamonds", "Q")) self.assertEqual((False, True), self.blackjack.check_blackjack()) self.cleanup()
def run_Black_Jack_environment(self, q_t, q_c, mode): # Start a new game game = Blackjack(mode) state = game.get_state() rl_state = self.get_rl_state(state, game) # Convert to condensed RL state # Create dictionary to temporarily hold the current game's state-actions returns = {} # (state, decision): reward while game.get_status() == 1: # While game state is not terminal # Epsilon-greedy action selection action_probs = self.get_q_reward(rl_state, q_t) if random.random() < EPSILON: decision = random.randint(0, 1) else: decision = np.argmax( action_probs ) # Select an action with the highest probability sa = (rl_state, decision) # Add an action-value pair to returns list. Default value is 0 returns[sa] = 0 q_c[sa] += 1 # Increment average counter game.play_game(decision) # Make a move state = game.get_state() # Get the new game state rl_state = self.get_rl_state(state, game) # Compress state # After a game is finished, assign rewards to all state-actions that took place in the game for key in returns: returns[key] = self.get_reward(state[2]) q_t = self.update_table(q_t, q_c, returns) return q_t, q_c
class Run: def __init__(self): self.pack = Pack() self.blackjack = Blackjack() self.players = [] available_players = random.sample([HumanPlayer, CpuPlayer], k=2) for available_player in available_players: player = available_player() player.cards = self.init_deck() obj_player = { 'player': player, 'cards': player.cards, 'score': player.score, 'name': str(uuid.uuid4()) } self.players.append(obj_player) self.main() def init_deck(self): return [self.pack.next(), self.pack.next()] def bust_card(self, cards): if self.blackjack.score(cards) > 21: return True return False def show_scores(self): for player in self.players: print(f"Player: {player['name']}") print([f'{card.rank} {card.suit}' for card in player['cards']]) print(f"Score é: {player['player'].score}\n") print("/\/\/\/\//\/\/\/\/\//\/\/\/\/\/\/\/\/\/\/\\") print("VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV") def main(self): for i, player_time in enumerate(self.players): player = player_time['player'] if i != 0: player.opponent_cards = self.players[i-1]['cards'] while True: self.show_scores() if self.bust_card(cards=player_time['cards']): print(f'Passou do limite de 21 pontos') break player.cards = player_time['cards'] if player.will_continue(): player_time['cards'].append(self.pack.next()) else: break
class TestBlackjack(unittest.TestCase): def setUp(self): self.blackjack = Blackjack() self.player = Player() def test_game_over_end_game(self): deck = ["8♥", "6♣", "2♦", "10♦"] for card in range(len(deck)): self.player.hit_card(deck) self.assertTrue(self.blackjack.game_over([self.player])) """Method return True to Game Over""" def test_game_over_continue_game(self): deck = ["8♥", "6♣", "2♦"] for card in range(len(deck)): self.player.hit_card(deck) self.assertFalse(self.blackjack.game_over([self.player])) """Method return False to Game Over, i.e., the game continues""" def tearDown(self): pass
def __init__(self): self.pack = Pack() self.blackjack = Blackjack() self.players = [] available_players = random.sample([HumanPlayer, CpuPlayer], k=2) for available_player in available_players: player = available_player() player.cards = self.init_deck() obj_player = { 'player': player, 'cards': player.cards, 'score': player.score, 'name': str(uuid.uuid4()) } self.players.append(obj_player) self.main()
def test_hand_value(self): errors = 0 tests = [([9, 9], 20), ([0, 9], 21), ([0, 9, 9], 21), ([0, 0, 9], 12), ([0, 0, 0, 0, 5], 20), ([13, 27, 51], 13)] #0, 1, 12 -> Ace, 2, King for test in tests: hand = test[0] correct = test[1] result = Blackjack.hand_value(hand) self.assertEqual(result, correct)
def __init__(self, master=None): super().__init__(master) self.master = master self.master.title("Blackjack - Basic Strategy") # self.master.iconbitmap('out3_nr3_icon.ico') self.master.geometry("900x500") self.pack(fill="both", expand=1) self.game = Blackjack() self.bg_color = "#006644" self.t_color = "#fcf403" self.h_color = "#005634" self.card_width = 100 self.card_height = 145 self.display_hidden_house = False self.game_finished = True self.images_load() self.widgets_create() self.scene_update()
def test_blackjack_push(): bj = Blackjack() bj.status = 'playing' bj.player = BlackjackHand([13, 12]) bj.dealer = BlackjackHand([0,11]) bj._check_blackjack() assert bj.status == 'push'
def test_dealer_blackjack(): bj = Blackjack() bj.status = 'playing' bj.player = BlackjackHand([1,2]) bj.dealer = BlackjackHand([0,11]) bj._check_blackjack() assert bj.status == 'house'
def func(args): (games, gamma) = args state = State(13, 2, True) actions = (True, False) b = 1 / len(actions) Q = cl.defaultdict(float) C = cl.defaultdict(int) ordinary = [] weighted = [] for i in range(games): blackjack = Blackjack(state, RandomPlayer) (episode, reward) = blackjack.play() G = 0 W = 1 for e in it.takewhile(lambda _: W, reversed(episode)): G = gamma * G + reward C[e] += W Q[e] += (W / C[e]) * (G - Q[e]) (s, a) = e # player = Player(s.player, 2, s.ace) # action = player.hit(s.dealer) action = bj.fairmax(Q, s) if a != action: break W *= 1 / b ordinary.append(W * G) weighted.append(W) r = sum(rewards) return (r / x for x in (len(rewards), sum(weighted)))
def main(): banker = Player("Banker", 100) player = Player("Player", 100) game = Blackjack([banker, player]) game.init_game() while True: game.run_one_round()
def runPlays(times): blackjack = Blackjack(1, 1) blackjack.shuffle() for i in range(times): if i % 100000 == 0: print(i) blackjack.play() print(blackjack)
def do_PUBLISH(self): """ When the shuffled and encrypted deck is published """ data.deck = [Card(e) for e in self.body_json()['deck']] # We can do our round of decryption now for (key, card) in zip(data.card_keys, data.deck): card.decrypt(key) self.send_response(HTTPStatus.OK) self.end_headers() # Initialize game mechanics data.mechanics = Blackjack(data.players, data.deck, data.key_pair, data.name)
def test_state(self): play = Blackjack('You') assert play.player.stat is None play.player.draw_card(0) play.player.draw_card(10) play._check_points(play.player) assert play.player.stat == 'blackjack' play.player.draw_card(11) play.player.draw_card(12) play._check_points(play.player) assert play.player.stat == 'bust'
from blackjack import Blackjack # This is just a simple module for printing out the value/card relationships. card_map = [(x, Blackjack.number_to_card(x)) for x in range(0, 52)] for key, value in card_map: print(key, value)
def __init__(self): self.game = Blackjack(6)
print("DEALER BLACKJACK!") return bank - bet def dealerBusted(bank, bet): print("DEALER BUSTED!") return bank + bet def playerBusted(bank, bet): print("PLAYER BUSTED!") return bank - bet ################################################################ # Game logic ################################################################ game = Blackjack() while True: ############################################################ # Reset game ############################################################ print("Welcome to Blackjack!") playerBank = 100.0 while playerBank > 0: ######################################################## # Play next hand ########################################################
def play(delay = 1.25, n_decks = 1): """ play: - gameplay script utilizing Blackjack class and user input - delay parameter for added suspense/user-experience """ # initialize game, print documentation game = Blackjack(n_decks) print(game.__doc__) print("Gameplay attributes:\n {} deck(s)\n {} sec deal delay".format(n_decks, delay)) # init deal loop choice = "d" round_count = 0 while choice == "d" and game.bank > 0: choice = prompt("\n[d]eal, [q]uit: ", accept = "dq") if choice == "q": break round_count += 1 print("\n - Round {} - ".format(round_count)) # init round, place bet game.new_hand() player = game.player dealer = game.dealer game.place_bet() # deal, show hands game.init_deal() game.show(dealer_hide = True) # check for player/mutual blackjack if game.rulecheck(): continue # initialize player hit loop choice2 = prompt("[h]it or [s]tay: ", accept = "hs") while choice2 == "h": print("Player hits...") sleep(delay) player.hit(game.deck) game.show(dealer_hide = True) if player.total() >= 21: break choice2 = prompt("[h]it or [s]tay: ", accept = "hs") # check for player bust if game.rulecheck(): continue # reveal dealer's hand print("Dealer reveal...") game.show() sleep(delay) # dealer hit loop while dealer.total() < 17 and dealer.total() <= player.total(): print("Dealer hits...") dealer.hit(game.deck) game.show() sleep(delay) # check for dealer bust if game.rulecheck(): continue # check for win game.wincheck() print(" - End of Round - ") # out of deal loop, print endgame info print("\n - End of Game - ") if game.bank < 1: print("\nOut of PyChips. The House always wins!\n") else: print("\nFinal bank = {} PyChips".format(game.bank)) print("total won = {}\ntotal lost = {}\n".format(game.net_wins, game.net_loss))
def player_win(bank, bet): print("PLAYER WINS!") return bank + Blackjack.payWin(bet)
from blackjack import Blackjack def always_stand(distribution, dealer_hand, player_hand): return Blackjack.STAND def always_hit(distribution, dealer_hand, player_hand): return Blackjack.HIT #always hit total_won = 0 total_lost = 0 total_tied = 0 for i in range(1000): bj = Blackjack(always_hit) won, lost, tied = bj.run() total_won += won total_lost += lost total_tied += tied print("Win rate, always hit: " + str(round(total_won * 100.0/(total_won + total_lost + total_tied), 2)) + "%") #always stand total_won = 0 total_lost = 0 total_tied = 0 for i in range(1000): bj = Blackjack(always_stand)
def playerBlackjack(bank, bet): print("BLACKJACK!") return bank + Blackjack.payBlackjack(bet)
def simulate(self, verbose=False): ''' シュミレーション(ゲームを1回実行して価値観数と方策を更新する) ''' game = Blackjack() if verbose: print("[blackjack simulate]:") game.output() print game.dealer_face_value() player_total_queue = deque() player_has_ace_queue = deque() player_hit_queue = deque() dealer_face_value = game.dealer_face_value() while(True): player_total = game.player_total player_has_ace = game.player_has_ace() # 最初の行動はExploring Starts(ランダムに選択) if not player_hit_queue: player_hit = random.choice([True, False]) else: # 方策によって行動を選択 player_hit = self.policy.hit(player_total, player_has_ace, dealer_face_value) # キューに追加 player_total_queue.append(player_total) player_has_ace_queue.append(player_has_ace) player_hit_queue.append(player_hit) if player_hit: game.player_hit() if verbose: game.output() if game.finish: break else: game.player_stand() if verbose: game.output() break # 勝敗を確認(報酬) result = game.judgement_result() if verbose: print("result: %s" % (result)) # 今回の行動、状態、報酬によって行動価値を更新 for player_total, player_has_ace, player_hit in zip(player_total_queue, player_has_ace_queue, player_hit_queue): self.action_value.update(player_total, player_has_ace, dealer_face_value, player_hit, result) # 方策を更新(1ゲームごとに更新する) for player_total, player_has_ace in zip(player_total_queue, player_has_ace_queue): # 今回の状態でヒットした場合の価値 hit_value = self.action_value.get(player_total, player_has_ace, dealer_face_value, True) # 今回の状態でスタンドした場合の価値 stand_value = self.action_value.get(player_total, player_has_ace, dealer_face_value, False) if hit_value > stand_value: # ヒットの価値が高ければ方策をヒットに更新 self.policy.set(player_total, player_has_ace, dealer_face_value, True) elif hit_value < stand_value: # ヒットの価値が低ければ方策をスタンドに更新 self.policy.set(player_total, player_has_ace, dealer_face_value, False)
def setUp(self): self.blackjack = Blackjack() self.player = Player()
def simulate(self, verbose=False, train=True): ''' シュミレーション(ゲームを1回実行して価値観数と方策を更新する) ''' game = Blackjack() if verbose: print("[blackjack simulate]:") game.output() print game.dealer_face_value() player_total_queue = deque() player_has_ace_queue = deque() player_hit_queue = deque() dealer_face_value = game.dealer_face_value() while(True): player_total = game.player_total player_has_ace = game.player_has_ace() # 方策ON # 最善方策を取得 player_hit = self.policy.hit(player_total, player_has_ace, dealer_face_value) # 最善方策以外も選択する可能性がある if train: # ε-greedy select = random.random() for i, action in enumerate((True, False)): if select < ((self.epsilon / 2) * (i+1)): player_hit = action break # キューに追加 player_total_queue.append(player_total) player_has_ace_queue.append(player_has_ace) player_hit_queue.append(player_hit) if player_hit: game.player_hit() if verbose: game.output() if game.finish: break else: game.player_stand() if verbose: game.output() break # 勝敗を確認(報酬) result = game.judgement_result() if verbose: print("result: %s" % (result)) # 今回の行動、状態、報酬によって行動価値を更新 for player_total, player_has_ace, player_hit in zip(player_total_queue, player_has_ace_queue, player_hit_queue): self.action_value.update(player_total, player_has_ace, dealer_face_value, player_hit, result) # 方策を更新(1ゲームごとに更新する) for player_total, player_has_ace in zip(player_total_queue, player_has_ace_queue): # 今回の状態でヒットした場合の価値 hit_value = self.action_value.get(player_total, player_has_ace, dealer_face_value, True) # 今回の状態でスタンドした場合の価値 stand_value = self.action_value.get(player_total, player_has_ace, dealer_face_value, False) if hit_value > stand_value: # ヒットの価値が高ければ方策をヒットに更新 self.policy.set(player_total, player_has_ace, dealer_face_value, True) elif hit_value < stand_value: # ヒットの価値が低ければ方策をスタンドに更新 self.policy.set(player_total, player_has_ace, dealer_face_value, False)