Esempio n. 1
0
def test_hit():
    game = BJ.BlackJack()

    # Test for STAND on 21
    game.deck = ['A', '4', '2', '5', '3', 'A']
    game.deal()
    assert (max(game.get_hand()) == 16)
    assert (game.hit() == BJ.Status.GOOD)
    assert (max(game.get_hand()) == 20)
    assert (game.hit() == BJ.Status.STAND)
    assert (max(game.get_hand()) == 21)

    # Test when player can't hit on STAND
    assert (game.hit() == BJ.Status.STAND)
    assert (max(game.get_hand()) == 21)

    # Test for BUST
    game.deck = ['7', '3', '9', '2', '5', '2', 'A']
    game.deal()
    assert (max(game.get_hand()) == 16)
    assert (game.hit() == BJ.Status.GOOD)
    assert (max(game.get_hand()) == 15)
    assert (game.hit() == BJ.Status.GOOD)
    assert (max(game.get_hand()) == 18)
    assert (game.hit() == BJ.Status.BUST)
    assert (max(game.get_hand()) == 25)

    # Test when player can't hit on BUST
    assert (game.hit() == BJ.Status.BUST)
    assert (max(game.get_hand()) == 25)
Esempio n. 2
0
def test_count_hand():
    game = BJ.BlackJack()

    # Test normal hand
    hand = ['5', '2']
    hand_sum = game.count_hand(hand)
    assert (len(hand_sum) == 1)
    assert (hand_sum[0] == 7)

    # Test ace hand
    hand = ['A', 'A']
    hand_sum = game.count_hand(hand)
    assert (len(hand_sum) == 2)
    assert (hand_sum[0] == 2)
    assert (hand_sum[1] == 12)

    # Test natural blackjack
    hand = ['Q', 'A']
    hand_sum = game.count_hand(hand)
    assert (len(hand_sum) == 2)
    assert (hand_sum[0] == 11)
    assert (hand_sum[1] == 21)

    # Test all cards hand
    hand = ['2', '3', '4', '5', '6', '7', '8', '9', '10', 'J', 'Q', 'K', 'A']
    hand_sum = game.count_hand(hand)
    assert (len(hand_sum) == 1)
    assert (hand_sum[0] == 85)
def play(learning_table, episodes):
    game = BJ.BlackJack()
    win = 0
    tie = 0
    lose = 0

    for i in range(episodes):
        status = game.deal()
        if (status is BJ.Status.BLACKJACK):
            win += 1
            continue
        while game.round is None:
            cur_state = game.get_state()
            action = learning_table.get_action(cur_state, False)

            if (action == BJ.Action.HIT):
                game.hit()
            else:
                game.stand()
        if game.round == BJ.Round.WIN:
            win += 1
        elif game.round == BJ.Round.TIE:
            tie += 1
        else:
            lose += 1
    return (win, tie, lose)
def test_train_ql():
    learning_agent = QL.QLearning()
    for i in range(1, MAX_TRAIN_EPISODE):
        game = BJ.BlackJack()
        status = game.deal()
        step = 0

        if (
                status is BJ.Status.BLACKJACK
        ):  # Game is over right after distrubution and this not useful for training
            continue

        game_history = []

        # Agent turn
        while (game.round is None):
            # When action  return STAND or BUST the loop should exit
            step += 1
            previous_state = game.get_state()
            action = learning_agent.get_action(previous_state)

            if (action == BJ.Action.HIT):
                status = game.hit()
                game_history.append(
                    [previous_state, action,
                     game.get_state(), step])
                if (status is BJ.Status.GOOD):  # non-terminal state
                    continue
            else:
                status = game.stand()
                game_history.append(
                    [previous_state, action,
                     game.get_state(), step])
                if (status is not BJ.Status.STAND):  # non-terminal state
                    continue

            if (game.round == BJ.Round.WIN):
                reward = 1
            elif (game.round == BJ.Round.LOSE):
                reward = -1
            elif (game.round == BJ.Round.TIE):
                reward = 0
            else:
                raise ValueError('Error in handling the game status')

            for ele in game_history:
                if (step == ele[3]):
                    reward_recalculated = reward
                else:
                    reward_recalculated = 0
                learning_agent.learn(ele[0], ele[1], ele[2],
                                     reward_recalculated)

    print_state_table(learning_agent)
    print(learning_agent._Q)
    report(play(learning_agent, MAX_RUNIN_EPISODE))
Esempio n. 5
0
    def play_exp(self, EPS):
        win = 0
        tie = 0
        lose = 0
        for episode in range(EPS):
            eligibility_trace = defaultdict(float)
            game = BJ.BlackJack()
            status = game.deal()
            if status is BJ.Status.BLACKJACK:  # Game is over right after distrubution and this not useful for training
                win += 1
                continue
            current_state = game.get_state()
            current_action = self.get_action(0, current_state, False)

            while (game.round is None):
                self._counter_state[current_state] += 1
                self._counter_state_action[(current_state,
                                            current_action)] += 1

                if (current_action == BJ.Action.HIT):
                    status = game.hit()
                    # game_history.append([previous_state, action, game.get_state(), step])
                    if (status is BJ.Status.GOOD):  # non-terminal state
                        continue
                else:
                    status = game.stand()
                    # game_history.append([previous_state, action, game.get_state(), step])
                    if (status is not BJ.Status.STAND):  # non-terminal state
                        continue

                if (game.round == BJ.Round.WIN):
                    reward = 1
                elif (game.round == BJ.Round.LOSE):
                    reward = -1
                else:
                    reward = 0

                # next action
                next_state = game.get_state()
                next_action = self.get_action(0, next_state, False)
                eligibility_trace[(current_state, current_action)] += 1

                current_state = next_state
                current_action = next_action

            if game.round == BJ.Round.WIN:
                win += 1
            elif game.round == BJ.Round.TIE:
                tie += 1
            else:
                lose += 1
        return (win, tie, lose)
Esempio n. 6
0
def test_dealer_reveal():
    game = BJ.BlackJack()

    # Test LOSE on BUST
    game.status = BJ.Status.BUST
    game._dealer_reveal()
    assert (game.round == BJ.Round.LOSE)

    # Test LOSE
    game.player_hand = ['10', '8']
    game.dealer_hand = ['Q', 'K']
    game.status = BJ.Status.STAND
    game._dealer_reveal()
    assert (game.round == BJ.Round.LOSE)

    # Test WIN
    game.player_hand = ['10', '8']
    game.dealer_hand = ['10', '7']
    game.deck = ['J']
    game.status = BJ.Status.STAND
    game._dealer_reveal()
    assert (game.round == BJ.Round.WIN)

    # Test TIE
    game.player_hand = ['10', '8']
    game.dealer_hand = ['J', '8']
    game.status = BJ.Status.STAND
    game._dealer_reveal()
    assert (game.round == BJ.Round.TIE)

    # Test WIN on player natural blackjack
    game.player_hand = ['A', 'Q']
    game.dealer_hand = ['5', 'J']
    game.status = BJ.Status.BLACKJACK
    game._dealer_reveal()
    assert (game.round == BJ.Round.WIN)

    # Test LOSE on dealer natural blackjack
    game.player_hand = ['5', '6', 'J']
    game.dealer_hand = ['K', 'A']
    game.status = BJ.Status.STAND
    game._dealer_reveal()
    assert (game.round == BJ.Round.LOSE)

    # Test TIE on both natural blackjack
    game.player_hand = ['A', 'Q']
    game.dealer_hand = ['K', 'A']
    game.status = BJ.Status.BLACKJACK
    game._dealer_reveal()
    assert (game.round == BJ.Round.TIE)
Esempio n. 7
0
def test_deal():
    game = BJ.BlackJack()

    # Test for natural blackjack
    game.deck = ['2', 'Q', '3', 'A']
    assert (game.deal() == BJ.Status.BLACKJACK)

    #Player has 21 total, True for UsableAce and Dealer has an '1' faced-up
    assert (game.get_state() == (21, True, 3))

    # Test for good hand
    game.deck = ['3', '2', 'Q', '2']
    assert (game.deal() == BJ.Status.GOOD)
    assert (game.get_state() == (4, False, 10))
Esempio n. 8
0
    async def blackjack_instantiate(self, ctx, *args):
        try:
            bet_amount = int(args[0][0])
        except ValueError:
            await self.bot.send_message(ctx.message.channel,
                                        "that's not a number")
            return

        if self.economy_manager.get_balance_of_player(
                ctx.message.author.id) >= bet_amount > 0:

            try:
                temp = self.blackjack_items[ctx.message.channel.id]
            except KeyError:
                self.blackjack_items[ctx.message.channel.id] = bj.BlackJack(
                    self.economy_manager)
                if not self.thread.is_alive():
                    self.thread.start()
                message = await self.bot.send_message(ctx.message.channel,
                                                      "starting")
                mess2 = (self.blackjack_items[ctx.message.channel.id]).start()
                embed = discord.Embed()
                embed.title = "BlackJack"
                await self.bot.edit_message(message,
                                            new_content=mess2,
                                            embed=embed)
                await self.bot.add_reaction(message, "🇽")
            finally:
                message = (
                    self.blackjack_items[ctx.message.channel.id]).addPlayer(
                        ctx.message.author, int(args[0][0]))
                mess = await self.bot.send_message(ctx.message.channel,
                                                   message)
                await asyncio.sleep(1)
                await self.bot.delete_message(mess)
                # item = blackjackItems[ctx.message.channel.id]
                # await my_bot.edit_message(item.message, embed=item.build_embed())
                await self.bot.delete_message(ctx.message)
        else:
            await self.bot.send_message(
                ctx.message.channel, "you don't have enough moulah :angry:")
Esempio n. 9
0
def marginal_probability(agent_class=BasicAgent):
    probabilities = {i: [] for i in range(11, 23)}
    for i in range(11, 20):
        agent = agent_class(i)  #decision point for the basic agent
        num_hands = 1000000
        points_total = {i: 0 for i in range(11, 23)}

        for hand in range(num_hands):
            game = blackjack.BlackJack()
            game.start_game()
            while game.state.terminate == 0:
                game.act(agent)
            game.final()
            points = game.calc(game.state.player_hand)
            if (points > 22):
                points = 22
            points_total[points] += 1

        for p, total in points_total.items():
            probabilities[p].append(total / num_hands)
    print_probabilities(probabilities)
Esempio n. 10
0
    def train(self, EPS):
        win = 0
        tie = 0
        lose = 0
        for episode in range(EPS):
            eligibility_trace = defaultdict(float)
            game = BJ.BlackJack()
            status = game.deal()
            if status is BJ.Status.BLACKJACK:  # Game is over right after distrubution and this not useful for training
                continue
            current_state = game.get_state()
            epsilon = self._n_zero / float(self._n_zero +
                                           self._counter_state[current_state])
            current_action = self.get_action(epsilon, current_state)

            while (game.round is None):
                self._counter_state[current_state] += 1
                self._counter_state_action[(current_state,
                                            current_action)] += 1

                if (current_action == BJ.Action.HIT):
                    status = game.hit()
                    if (status is BJ.Status.GOOD):  # non-terminal state
                        continue
                else:
                    status = game.stand()
                    if (status is not BJ.Status.STAND):  # non-terminal state
                        continue

                if (game.round == BJ.Round.WIN):
                    reward = 1
                elif (game.round == BJ.Round.LOSE):
                    reward = -1
                else:
                    reward = 0

                #next action
                next_state = game.get_state()
                next_action = self.get_action(epsilon, next_state)
                delta = reward + self._gamma * self._value_function[(next_state, next_action)] - \
                        self._value_function[(current_state, current_action)]

                alpha = 1.0 / self._counter_state_action[(current_state,
                                                          current_action)]
                eligibility_trace[(current_state, current_action)] += 1

                #update table
                for key in self._value_function:
                    self._value_function[
                        key] += alpha * delta * eligibility_trace[key]
                    eligibility_trace[key] *= self._gamma * self._lambda

                current_state = next_state
                current_action = next_action

            if game.round == BJ.Round.WIN:
                win += 1
            elif game.round == BJ.Round.TIE:
                tie += 1
            else:
                lose += 1

        return (win, tie, lose)
Esempio n. 11
0
import blackjack

print("Welcome to Blackjack.")
print("This is a simplied version of Blackjack. The rules are simple:")
print("1. Both players are dealt 2 cards facing up.")
print(
    "2. Either players can choose to either\n\tstand[s] (not do anything) \n\tor \n\thit[h] (get another card)."
)
print(
    "3. Number cards' values are their numbers, 10s and face cards' values are 10, and ace's value is 11."
)
print("4. A player automatically wins when their cards' values add up to 21.")
print("5. A player automatically loses when their cards' values goes over 21.")
print(
    "Note: Both players can choose stand unlimited amount of times in this game.\n"
)
print("Are you ready? [y/n]")
main = input()

gameIsOn = main == "y"

if gameIsOn:
    blackjack = blackjack.BlackJack()
    blackjack.play()
Esempio n. 12
0
def process(prob='Forest Management'):
    if prob == 'Forest Management':
        n_states = [50, 75, 100, 125, 150, 175, 200, 225, 250, 275, 300]
    else:
        n_states = [723]

    df_iter = pd.DataFrame(columns=algos)
    df_time = pd.DataFrame(columns=algos)
    df_v = pd.DataFrame(columns=algos)
    for n_state in n_states:
        print(n_state)

        if prob == 'Forest Management':
            P, R = mdptoolbox.example.forest(S=n_state,
                                             r1=n_state + 1,
                                             r2=2,
                                             p=0.1,
                                             is_sparse=False)
        else:
            bj = blackjack.BlackJack()
            P, R = bj.get_matrices()

        df_iter_tmp = pd.DataFrame(columns=algos)
        df_time_tmp = pd.DataFrame(columns=algos)
        df_v_tmp = pd.DataFrame(columns=algos)
        for i in range(10):
            # run
            pi, vi = util.run(P, R, 0.9)
            # stats
            df_iter_tmp.loc[len(df_iter_tmp)] = [pi.iter, vi.iter]
            df_time_tmp.loc[len(df_time_tmp)] = [pi.time, vi.time]
            df_v_tmp.loc[len(df_time_tmp)] = [pi.V[0], vi.V[0]]
            if pi.policy != vi.policy:
                print(n_state, pi.policy, vi.policy)

        df_iter.loc[len(df_iter)] = df_iter_tmp.mean(axis=0)
        df_time.loc[len(df_time)] = df_time_tmp.mean(axis=0)
        df_v.loc[len(df_time)] = df_v_tmp.mean(axis=0)

    # plot
    df_iter.set_index(pd.Index(n_states), inplace=True)
    df_time.set_index(pd.Index(n_states), inplace=True)
    df_v.set_index(pd.Index(n_states), inplace=True)
    util.plot(df_iter, df_time, df_v)

    df_iter = pd.DataFrame(columns=algos)
    df_time = pd.DataFrame(columns=algos)
    df_v = pd.DataFrame(columns=algos)
    for decay in ql_decays:
        if prob == 'Forest Management':
            P, R = mdptoolbox.example.forest(S=100,
                                             r1=100 + 1,
                                             r2=2,
                                             p=0.1,
                                             is_sparse=False)
        else:
            bj = blackjack.BlackJack()
            P, R = bj.get_matrices()

        df_iter_tmp = pd.DataFrame(columns=algos)
        df_time_tmp = pd.DataFrame(columns=algos)
        df_v_tmp = pd.DataFrame(columns=algos)
        for i in range(10):
            # run
            pi, vi = util.run(P, R, decay)
            # stats
            df_iter_tmp.loc[len(df_iter_tmp)] = [pi.iter, vi.iter]
            df_time_tmp.loc[len(df_time_tmp)] = [pi.time, vi.time]
            df_v_tmp.loc[len(df_time_tmp)] = [pi.V[0], vi.V[0]]
            if pi.policy != vi.policy:
                print(100, pi.policy, vi.policy)

        df_iter.loc[len(df_iter)] = df_iter_tmp.mean(axis=0)
        df_time.loc[len(df_time)] = df_time_tmp.mean(axis=0)
        df_v.loc[len(df_time)] = df_v_tmp.mean(axis=0)

    # plot
    df_iter.set_index(pd.Index(ql_decays), inplace=True)
    df_time.set_index(pd.Index(ql_decays), inplace=True)
    df_v.set_index(pd.Index(ql_decays), inplace=True)
    util.plot(df_iter, df_time, df_v, xlabel='Discount Factor')

    # Q learning
    df_time = pd.DataFrame(columns=ql_decays)
    df_v = pd.DataFrame(columns=ql_decays)
    for n_state in n_states:
        print(n_state)
        P, R = mdptoolbox.example.forest(S=n_state,
                                         r1=n_state + 1,
                                         r2=2,
                                         p=0.1,
                                         is_sparse=False)
        v_list, time_list = [], []
        for decay in ql_decays:
            v_list_tmp, time_list_tmp = [], []
            for i in range(10):
                v, time = util.run_ql(P, R, decay)
                v_list_tmp.append(v)
                time_list_tmp.append(time)
            v_list.append(np.max(v_list_tmp))
            time_list.append(np.mean(time_list_tmp))
        df_time.loc[len(df_time)] = time_list
        df_v.loc[len(df_time)] = v_list

    # plot
    df_time.set_index(pd.Index(n_states), inplace=True)
    df_v.set_index(pd.Index(n_states), inplace=True)
    util.plot(None, df_time=df_time, df_v=df_v)

    return
Esempio n. 13
0
def main():
    game = blackjack.BlackJack()
    for i in range(TOTAL_ROUNDS):
        game.PlayRound()
    game.PrintResults()