def test_hit(): game = BJ.BlackJack() # Test for STAND on 21 game.deck = ['A', '4', '2', '5', '3', 'A'] game.deal() assert (max(game.get_hand()) == 16) assert (game.hit() == BJ.Status.GOOD) assert (max(game.get_hand()) == 20) assert (game.hit() == BJ.Status.STAND) assert (max(game.get_hand()) == 21) # Test when player can't hit on STAND assert (game.hit() == BJ.Status.STAND) assert (max(game.get_hand()) == 21) # Test for BUST game.deck = ['7', '3', '9', '2', '5', '2', 'A'] game.deal() assert (max(game.get_hand()) == 16) assert (game.hit() == BJ.Status.GOOD) assert (max(game.get_hand()) == 15) assert (game.hit() == BJ.Status.GOOD) assert (max(game.get_hand()) == 18) assert (game.hit() == BJ.Status.BUST) assert (max(game.get_hand()) == 25) # Test when player can't hit on BUST assert (game.hit() == BJ.Status.BUST) assert (max(game.get_hand()) == 25)
def test_count_hand(): game = BJ.BlackJack() # Test normal hand hand = ['5', '2'] hand_sum = game.count_hand(hand) assert (len(hand_sum) == 1) assert (hand_sum[0] == 7) # Test ace hand hand = ['A', 'A'] hand_sum = game.count_hand(hand) assert (len(hand_sum) == 2) assert (hand_sum[0] == 2) assert (hand_sum[1] == 12) # Test natural blackjack hand = ['Q', 'A'] hand_sum = game.count_hand(hand) assert (len(hand_sum) == 2) assert (hand_sum[0] == 11) assert (hand_sum[1] == 21) # Test all cards hand hand = ['2', '3', '4', '5', '6', '7', '8', '9', '10', 'J', 'Q', 'K', 'A'] hand_sum = game.count_hand(hand) assert (len(hand_sum) == 1) assert (hand_sum[0] == 85)
def play(learning_table, episodes): game = BJ.BlackJack() win = 0 tie = 0 lose = 0 for i in range(episodes): status = game.deal() if (status is BJ.Status.BLACKJACK): win += 1 continue while game.round is None: cur_state = game.get_state() action = learning_table.get_action(cur_state, False) if (action == BJ.Action.HIT): game.hit() else: game.stand() if game.round == BJ.Round.WIN: win += 1 elif game.round == BJ.Round.TIE: tie += 1 else: lose += 1 return (win, tie, lose)
def test_train_ql(): learning_agent = QL.QLearning() for i in range(1, MAX_TRAIN_EPISODE): game = BJ.BlackJack() status = game.deal() step = 0 if ( status is BJ.Status.BLACKJACK ): # Game is over right after distrubution and this not useful for training continue game_history = [] # Agent turn while (game.round is None): # When action return STAND or BUST the loop should exit step += 1 previous_state = game.get_state() action = learning_agent.get_action(previous_state) if (action == BJ.Action.HIT): status = game.hit() game_history.append( [previous_state, action, game.get_state(), step]) if (status is BJ.Status.GOOD): # non-terminal state continue else: status = game.stand() game_history.append( [previous_state, action, game.get_state(), step]) if (status is not BJ.Status.STAND): # non-terminal state continue if (game.round == BJ.Round.WIN): reward = 1 elif (game.round == BJ.Round.LOSE): reward = -1 elif (game.round == BJ.Round.TIE): reward = 0 else: raise ValueError('Error in handling the game status') for ele in game_history: if (step == ele[3]): reward_recalculated = reward else: reward_recalculated = 0 learning_agent.learn(ele[0], ele[1], ele[2], reward_recalculated) print_state_table(learning_agent) print(learning_agent._Q) report(play(learning_agent, MAX_RUNIN_EPISODE))
def play_exp(self, EPS): win = 0 tie = 0 lose = 0 for episode in range(EPS): eligibility_trace = defaultdict(float) game = BJ.BlackJack() status = game.deal() if status is BJ.Status.BLACKJACK: # Game is over right after distrubution and this not useful for training win += 1 continue current_state = game.get_state() current_action = self.get_action(0, current_state, False) while (game.round is None): self._counter_state[current_state] += 1 self._counter_state_action[(current_state, current_action)] += 1 if (current_action == BJ.Action.HIT): status = game.hit() # game_history.append([previous_state, action, game.get_state(), step]) if (status is BJ.Status.GOOD): # non-terminal state continue else: status = game.stand() # game_history.append([previous_state, action, game.get_state(), step]) if (status is not BJ.Status.STAND): # non-terminal state continue if (game.round == BJ.Round.WIN): reward = 1 elif (game.round == BJ.Round.LOSE): reward = -1 else: reward = 0 # next action next_state = game.get_state() next_action = self.get_action(0, next_state, False) eligibility_trace[(current_state, current_action)] += 1 current_state = next_state current_action = next_action if game.round == BJ.Round.WIN: win += 1 elif game.round == BJ.Round.TIE: tie += 1 else: lose += 1 return (win, tie, lose)
def test_dealer_reveal(): game = BJ.BlackJack() # Test LOSE on BUST game.status = BJ.Status.BUST game._dealer_reveal() assert (game.round == BJ.Round.LOSE) # Test LOSE game.player_hand = ['10', '8'] game.dealer_hand = ['Q', 'K'] game.status = BJ.Status.STAND game._dealer_reveal() assert (game.round == BJ.Round.LOSE) # Test WIN game.player_hand = ['10', '8'] game.dealer_hand = ['10', '7'] game.deck = ['J'] game.status = BJ.Status.STAND game._dealer_reveal() assert (game.round == BJ.Round.WIN) # Test TIE game.player_hand = ['10', '8'] game.dealer_hand = ['J', '8'] game.status = BJ.Status.STAND game._dealer_reveal() assert (game.round == BJ.Round.TIE) # Test WIN on player natural blackjack game.player_hand = ['A', 'Q'] game.dealer_hand = ['5', 'J'] game.status = BJ.Status.BLACKJACK game._dealer_reveal() assert (game.round == BJ.Round.WIN) # Test LOSE on dealer natural blackjack game.player_hand = ['5', '6', 'J'] game.dealer_hand = ['K', 'A'] game.status = BJ.Status.STAND game._dealer_reveal() assert (game.round == BJ.Round.LOSE) # Test TIE on both natural blackjack game.player_hand = ['A', 'Q'] game.dealer_hand = ['K', 'A'] game.status = BJ.Status.BLACKJACK game._dealer_reveal() assert (game.round == BJ.Round.TIE)
def test_deal(): game = BJ.BlackJack() # Test for natural blackjack game.deck = ['2', 'Q', '3', 'A'] assert (game.deal() == BJ.Status.BLACKJACK) #Player has 21 total, True for UsableAce and Dealer has an '1' faced-up assert (game.get_state() == (21, True, 3)) # Test for good hand game.deck = ['3', '2', 'Q', '2'] assert (game.deal() == BJ.Status.GOOD) assert (game.get_state() == (4, False, 10))
async def blackjack_instantiate(self, ctx, *args): try: bet_amount = int(args[0][0]) except ValueError: await self.bot.send_message(ctx.message.channel, "that's not a number") return if self.economy_manager.get_balance_of_player( ctx.message.author.id) >= bet_amount > 0: try: temp = self.blackjack_items[ctx.message.channel.id] except KeyError: self.blackjack_items[ctx.message.channel.id] = bj.BlackJack( self.economy_manager) if not self.thread.is_alive(): self.thread.start() message = await self.bot.send_message(ctx.message.channel, "starting") mess2 = (self.blackjack_items[ctx.message.channel.id]).start() embed = discord.Embed() embed.title = "BlackJack" await self.bot.edit_message(message, new_content=mess2, embed=embed) await self.bot.add_reaction(message, "🇽") finally: message = ( self.blackjack_items[ctx.message.channel.id]).addPlayer( ctx.message.author, int(args[0][0])) mess = await self.bot.send_message(ctx.message.channel, message) await asyncio.sleep(1) await self.bot.delete_message(mess) # item = blackjackItems[ctx.message.channel.id] # await my_bot.edit_message(item.message, embed=item.build_embed()) await self.bot.delete_message(ctx.message) else: await self.bot.send_message( ctx.message.channel, "you don't have enough moulah :angry:")
def marginal_probability(agent_class=BasicAgent): probabilities = {i: [] for i in range(11, 23)} for i in range(11, 20): agent = agent_class(i) #decision point for the basic agent num_hands = 1000000 points_total = {i: 0 for i in range(11, 23)} for hand in range(num_hands): game = blackjack.BlackJack() game.start_game() while game.state.terminate == 0: game.act(agent) game.final() points = game.calc(game.state.player_hand) if (points > 22): points = 22 points_total[points] += 1 for p, total in points_total.items(): probabilities[p].append(total / num_hands) print_probabilities(probabilities)
def train(self, EPS): win = 0 tie = 0 lose = 0 for episode in range(EPS): eligibility_trace = defaultdict(float) game = BJ.BlackJack() status = game.deal() if status is BJ.Status.BLACKJACK: # Game is over right after distrubution and this not useful for training continue current_state = game.get_state() epsilon = self._n_zero / float(self._n_zero + self._counter_state[current_state]) current_action = self.get_action(epsilon, current_state) while (game.round is None): self._counter_state[current_state] += 1 self._counter_state_action[(current_state, current_action)] += 1 if (current_action == BJ.Action.HIT): status = game.hit() if (status is BJ.Status.GOOD): # non-terminal state continue else: status = game.stand() if (status is not BJ.Status.STAND): # non-terminal state continue if (game.round == BJ.Round.WIN): reward = 1 elif (game.round == BJ.Round.LOSE): reward = -1 else: reward = 0 #next action next_state = game.get_state() next_action = self.get_action(epsilon, next_state) delta = reward + self._gamma * self._value_function[(next_state, next_action)] - \ self._value_function[(current_state, current_action)] alpha = 1.0 / self._counter_state_action[(current_state, current_action)] eligibility_trace[(current_state, current_action)] += 1 #update table for key in self._value_function: self._value_function[ key] += alpha * delta * eligibility_trace[key] eligibility_trace[key] *= self._gamma * self._lambda current_state = next_state current_action = next_action if game.round == BJ.Round.WIN: win += 1 elif game.round == BJ.Round.TIE: tie += 1 else: lose += 1 return (win, tie, lose)
import blackjack print("Welcome to Blackjack.") print("This is a simplied version of Blackjack. The rules are simple:") print("1. Both players are dealt 2 cards facing up.") print( "2. Either players can choose to either\n\tstand[s] (not do anything) \n\tor \n\thit[h] (get another card)." ) print( "3. Number cards' values are their numbers, 10s and face cards' values are 10, and ace's value is 11." ) print("4. A player automatically wins when their cards' values add up to 21.") print("5. A player automatically loses when their cards' values goes over 21.") print( "Note: Both players can choose stand unlimited amount of times in this game.\n" ) print("Are you ready? [y/n]") main = input() gameIsOn = main == "y" if gameIsOn: blackjack = blackjack.BlackJack() blackjack.play()
def process(prob='Forest Management'): if prob == 'Forest Management': n_states = [50, 75, 100, 125, 150, 175, 200, 225, 250, 275, 300] else: n_states = [723] df_iter = pd.DataFrame(columns=algos) df_time = pd.DataFrame(columns=algos) df_v = pd.DataFrame(columns=algos) for n_state in n_states: print(n_state) if prob == 'Forest Management': P, R = mdptoolbox.example.forest(S=n_state, r1=n_state + 1, r2=2, p=0.1, is_sparse=False) else: bj = blackjack.BlackJack() P, R = bj.get_matrices() df_iter_tmp = pd.DataFrame(columns=algos) df_time_tmp = pd.DataFrame(columns=algos) df_v_tmp = pd.DataFrame(columns=algos) for i in range(10): # run pi, vi = util.run(P, R, 0.9) # stats df_iter_tmp.loc[len(df_iter_tmp)] = [pi.iter, vi.iter] df_time_tmp.loc[len(df_time_tmp)] = [pi.time, vi.time] df_v_tmp.loc[len(df_time_tmp)] = [pi.V[0], vi.V[0]] if pi.policy != vi.policy: print(n_state, pi.policy, vi.policy) df_iter.loc[len(df_iter)] = df_iter_tmp.mean(axis=0) df_time.loc[len(df_time)] = df_time_tmp.mean(axis=0) df_v.loc[len(df_time)] = df_v_tmp.mean(axis=0) # plot df_iter.set_index(pd.Index(n_states), inplace=True) df_time.set_index(pd.Index(n_states), inplace=True) df_v.set_index(pd.Index(n_states), inplace=True) util.plot(df_iter, df_time, df_v) df_iter = pd.DataFrame(columns=algos) df_time = pd.DataFrame(columns=algos) df_v = pd.DataFrame(columns=algos) for decay in ql_decays: if prob == 'Forest Management': P, R = mdptoolbox.example.forest(S=100, r1=100 + 1, r2=2, p=0.1, is_sparse=False) else: bj = blackjack.BlackJack() P, R = bj.get_matrices() df_iter_tmp = pd.DataFrame(columns=algos) df_time_tmp = pd.DataFrame(columns=algos) df_v_tmp = pd.DataFrame(columns=algos) for i in range(10): # run pi, vi = util.run(P, R, decay) # stats df_iter_tmp.loc[len(df_iter_tmp)] = [pi.iter, vi.iter] df_time_tmp.loc[len(df_time_tmp)] = [pi.time, vi.time] df_v_tmp.loc[len(df_time_tmp)] = [pi.V[0], vi.V[0]] if pi.policy != vi.policy: print(100, pi.policy, vi.policy) df_iter.loc[len(df_iter)] = df_iter_tmp.mean(axis=0) df_time.loc[len(df_time)] = df_time_tmp.mean(axis=0) df_v.loc[len(df_time)] = df_v_tmp.mean(axis=0) # plot df_iter.set_index(pd.Index(ql_decays), inplace=True) df_time.set_index(pd.Index(ql_decays), inplace=True) df_v.set_index(pd.Index(ql_decays), inplace=True) util.plot(df_iter, df_time, df_v, xlabel='Discount Factor') # Q learning df_time = pd.DataFrame(columns=ql_decays) df_v = pd.DataFrame(columns=ql_decays) for n_state in n_states: print(n_state) P, R = mdptoolbox.example.forest(S=n_state, r1=n_state + 1, r2=2, p=0.1, is_sparse=False) v_list, time_list = [], [] for decay in ql_decays: v_list_tmp, time_list_tmp = [], [] for i in range(10): v, time = util.run_ql(P, R, decay) v_list_tmp.append(v) time_list_tmp.append(time) v_list.append(np.max(v_list_tmp)) time_list.append(np.mean(time_list_tmp)) df_time.loc[len(df_time)] = time_list df_v.loc[len(df_time)] = v_list # plot df_time.set_index(pd.Index(n_states), inplace=True) df_v.set_index(pd.Index(n_states), inplace=True) util.plot(None, df_time=df_time, df_v=df_v) return
def main(): game = blackjack.BlackJack() for i in range(TOTAL_ROUNDS): game.PlayRound() game.PrintResults()