def reinforce_learning(): Qsa, Csa = {}, {} for s1 in PLAYER_STATES: # s1 means sum of player's cards for s2 in DEALER_STATES: # s2 means the first card of dealer # about action # k=0 -> Hit, k=1 -> Stick Qsa[(s1, s2)] = [0, 0] Csa[(s1, s2)] = False while True: s1, s2 = choice(PLAYER_STATES), choice(DEALER_STATES) for a in xrange(2): if a == 0: # HIT new_card = get_card() if new_card[0] == RED: new_s1 = s1 - new_card[1] else: assert new_card[0] == BLACK new_s1 = s1 + new_card[1] if new_s1 < 1 or new_s1 > 21: qrs = LOSE elif new_s1 == 21: qrs = WIN else: new_Q = [] for k, v in Qsa.iteritems(): _s1, _s2 = k if _s1 == new_s1: new_Q += v qrs = 0 + GAMMA * max(new_Q) elif a == 1: # STICK # Dealer's turn new_s2 = s2 while True: if new_s2 < 1 or new_s2 > 21: qrs = WIN break elif new_s2 >= 17: if s1 == new_s2: qrs = DRAW elif s1 > new_s2: qrs = WIN else: assert s1 < new_s2 qrs = LOSE break else: # HIT new_card = get_card() if new_card[0] == RED: new_s2 -= new_card[1] else: assert new_card[0] == BLACK new_s2 += new_card[1] else: assert False Qsa[(s1, s2)][a] = (1 - ALPHA) * Qsa[(s1, s2)][a] + ALPHA * qrs # Check where the state is converged or not if abs(Qsa[(s1, s2)][0] - Qsa[(s1, s2)][1]) > CONVERGENCE_CONDITION: Csa[(s1, s2)] = True # If all states are converged at once, end reinforce learning for v in Csa.itervalues(): if not v: break else: break return Qsa
def play_round(way_to_decision): # Initialize a game player_cards, dealer_cards = [], [] while True: new_card = get_card() if new_card[0] == RED: continue else: break player_cards.append(new_card) while True: new_card = get_card() if new_card[0] == RED: continue else: break dealer_cards.append(new_card) # rv_player = None while True: if DISPLAY: print "Player's turn" display_current(player_cards, dealer_cards) player_choice = way_to_decision(player_cards, dealer_cards) if player_choice == HIT: nc = get_card() if DISPLAY: print 'New card is (%s, %d)' % ('Red' if nc[0] == RED else 'Black', nc[1]) player_cards.append(nc) s = get_sum(player_cards) if s < 1 or s > 21: rv_player = BUST break if s == 21: return WIN else: assert player_choice == STICK, player_choice rv_player = STICK break # if rv_player != BUST: # the player did not go bust rv_dealer = None while True: if DISPLAY: print "Dealer's turn" display_current(player_cards, dealer_cards) s = get_sum(dealer_cards) if s < 1 or s > 21: rv_dealer = BUST break elif s >= 17: rv_dealer = STICK break if s == 21: return LOSE else: # HIT nc = get_card() if DISPLAY: print 'New card is (%s, %d)' % ('Red' if nc[0] == RED else 'Black', nc[1]) dealer_cards.append(nc) if rv_dealer == BUST: return WIN player_sum, dealer_sum = get_sum(player_cards), get_sum(dealer_cards) if player_sum == dealer_sum: return DRAW return WIN if player_sum > dealer_sum else LOSE
def play_round(way_to_decision): # Initialize a game player_cards, dealer_cards = [], [] while True: new_card = get_card() if new_card[0] == RED: continue else: break player_cards.append(new_card) while True: new_card = get_card() if new_card[0] == RED: continue else: break dealer_cards.append(new_card) # rv_player = None while True: if DISPLAY: print "Player's turn" display_current(player_cards, dealer_cards) player_choice = way_to_decision(player_cards, dealer_cards) if player_choice == HIT: nc = get_card() if DISPLAY: print 'New card is (%s, %d)' %('Red' if nc[0]==RED else 'Black', nc[1]) player_cards.append(nc) s = get_sum(player_cards) if s < 1 or s > 21: rv_player = BUST break if s == 21: return WIN else: assert player_choice == STICK, player_choice rv_player = STICK break # if rv_player != BUST: # the player did not go bust rv_dealer = None while True: if DISPLAY: print "Dealer's turn" display_current(player_cards, dealer_cards) s = get_sum(dealer_cards) if s < 1 or s > 21: rv_dealer = BUST break elif s >= 17: rv_dealer = STICK break if s == 21: return LOSE else: # HIT nc = get_card() if DISPLAY: print 'New card is (%s, %d)' %('Red' if nc[0]==RED else 'Black', nc[1]) dealer_cards.append(nc) if rv_dealer == BUST: return WIN player_sum, dealer_sum = get_sum(player_cards), get_sum(dealer_cards) if player_sum == dealer_sum: return DRAW return WIN if player_sum > dealer_sum else LOSE