コード例 #1
0
def reinforce_learning():
    Qsa, Csa = {}, {}
    for s1 in PLAYER_STATES:
        # s1 means sum of player's cards
        for s2 in DEALER_STATES:
            # s2 means the first card of dealer
            # about action
            # k=0  -> Hit, k=1  -> Stick
            Qsa[(s1, s2)] = [0, 0]
            Csa[(s1, s2)] = False
    while True:
        s1, s2 = choice(PLAYER_STATES), choice(DEALER_STATES)
        for a in xrange(2):
            if a == 0:
                # HIT
                new_card = get_card()
                if new_card[0] == RED:
                    new_s1 = s1 - new_card[1]
                else:
                    assert new_card[0] == BLACK
                    new_s1 = s1 + new_card[1]
                if new_s1 < 1 or new_s1 > 21:
                    qrs = LOSE
                elif new_s1 == 21:
                    qrs = WIN
                else:
                    new_Q = []
                    for k, v in Qsa.iteritems():
                        _s1, _s2 = k
                        if _s1 == new_s1:
                            new_Q += v
                    qrs = 0 + GAMMA * max(new_Q)
            elif a == 1:
                # STICK
                # Dealer's turn
                new_s2 = s2
                while True:
                    if new_s2 < 1 or new_s2 > 21:
                        qrs = WIN
                        break
                    elif new_s2 >= 17:
                        if s1 == new_s2:
                            qrs = DRAW
                        elif s1 > new_s2:
                            qrs = WIN
                        else:
                            assert s1 < new_s2
                            qrs = LOSE
                        break
                    else:
                        # HIT
                        new_card = get_card()
                        if new_card[0] == RED:
                            new_s2 -= new_card[1]
                        else:
                            assert new_card[0] == BLACK
                            new_s2 += new_card[1]
            else:
                assert False
            Qsa[(s1, s2)][a] = (1 - ALPHA) * Qsa[(s1, s2)][a] + ALPHA * qrs
        # Check where the state is converged or not
        if abs(Qsa[(s1, s2)][0] - Qsa[(s1, s2)][1]) > CONVERGENCE_CONDITION:
            Csa[(s1, s2)] = True
        # If all states are converged at once, end reinforce learning
        for v in Csa.itervalues():
            if not v:
                break
        else:
            break
    return Qsa
コード例 #2
0
ファイル: game_play.py プロジェクト: jerryhan88/workspace_SMU
def play_round(way_to_decision):
    # Initialize a game
    player_cards, dealer_cards = [], []
    while True:
        new_card = get_card()
        if new_card[0] == RED:
            continue
        else:
            break
    player_cards.append(new_card)
    while True:
        new_card = get_card()
        if new_card[0] == RED:
            continue
        else:
            break
    dealer_cards.append(new_card)
    #
    rv_player = None
    while True:
        if DISPLAY:
            print "Player's turn"
            display_current(player_cards, dealer_cards)
        player_choice = way_to_decision(player_cards, dealer_cards)
        if player_choice == HIT:
            nc = get_card()
            if DISPLAY:
                print 'New card is (%s, %d)' % ('Red' if nc[0] == RED else
                                                'Black', nc[1])
            player_cards.append(nc)
            s = get_sum(player_cards)
            if s < 1 or s > 21:
                rv_player = BUST
                break
            if s == 21:
                return WIN
        else:
            assert player_choice == STICK, player_choice
            rv_player = STICK
            break
    #
    if rv_player != BUST:
        # the player did not go bust
        rv_dealer = None
        while True:
            if DISPLAY:
                print "Dealer's turn"
                display_current(player_cards, dealer_cards)
            s = get_sum(dealer_cards)
            if s < 1 or s > 21:
                rv_dealer = BUST
                break
            elif s >= 17:
                rv_dealer = STICK
                break
            if s == 21:
                return LOSE
            else:
                # HIT
                nc = get_card()
                if DISPLAY:
                    print 'New card is (%s, %d)' % ('Red' if nc[0] == RED else
                                                    'Black', nc[1])
                dealer_cards.append(nc)
        if rv_dealer == BUST:
            return WIN
    player_sum, dealer_sum = get_sum(player_cards), get_sum(dealer_cards)
    if player_sum == dealer_sum:
        return DRAW
    return WIN if player_sum > dealer_sum else LOSE
コード例 #3
0
ファイル: game_play.py プロジェクト: jerryhan88/workspace_SMU
def play_round(way_to_decision):
    # Initialize a game
    player_cards, dealer_cards = [], []
    while True:
        new_card = get_card()
        if new_card[0] == RED:
            continue
        else:
            break
    player_cards.append(new_card)
    while True:
        new_card = get_card()
        if new_card[0] == RED:
            continue
        else:
            break 
    dealer_cards.append(new_card)
    #
    rv_player = None
    while True:
        if DISPLAY:
            print "Player's turn"
            display_current(player_cards, dealer_cards)
        player_choice = way_to_decision(player_cards, dealer_cards)
        if player_choice == HIT:
            nc = get_card()
            if DISPLAY:
                print 'New card is (%s, %d)' %('Red' if nc[0]==RED else 'Black', nc[1])
            player_cards.append(nc)
            s = get_sum(player_cards) 
            if s < 1  or s > 21:
                rv_player = BUST
                break
            if s == 21:
                return WIN
        else:
            assert player_choice == STICK, player_choice 
            rv_player = STICK
            break
    #
    if rv_player != BUST:
        # the player did not go bust
        rv_dealer = None
        while True:
            if DISPLAY:
                print "Dealer's turn"
                display_current(player_cards, dealer_cards)
            s = get_sum(dealer_cards)
            if s < 1  or s > 21:
                rv_dealer = BUST
                break
            elif s >= 17:
                rv_dealer = STICK
                break
            if s == 21:
                return LOSE
            else:
                # HIT
                nc = get_card()
                if DISPLAY:
                    print 'New card is (%s, %d)' %('Red' if nc[0]==RED else 'Black', nc[1])
                dealer_cards.append(nc)
        if rv_dealer == BUST:
            return WIN
    player_sum, dealer_sum = get_sum(player_cards), get_sum(dealer_cards)
    if player_sum == dealer_sum:
        return DRAW
    return WIN if player_sum > dealer_sum else LOSE
コード例 #4
0
def reinforce_learning():
    Qsa, Csa = {}, {}
    for s1 in PLAYER_STATES:
        # s1 means sum of player's cards
        for s2 in DEALER_STATES:
            # s2 means the first card of dealer
            # about action
            # k=0  -> Hit, k=1  -> Stick
            Qsa[(s1, s2)] = [0, 0]
            Csa[(s1, s2)] = False
    while True:
        s1, s2 = choice(PLAYER_STATES), choice(DEALER_STATES)
        for a in xrange(2):
            if a == 0:
                # HIT
                new_card = get_card()
                if new_card[0] == RED:
                    new_s1 = s1 - new_card[1]
                else:
                    assert new_card[0] == BLACK
                    new_s1 = s1 + new_card[1]
                if new_s1 < 1 or new_s1 > 21:
                    qrs = LOSE
                elif new_s1 == 21:
                    qrs = WIN
                else:
                    new_Q = []
                    for k, v in Qsa.iteritems():
                        _s1, _s2 = k 
                        if _s1 == new_s1:
                            new_Q += v
                    qrs = 0 + GAMMA * max(new_Q)
            elif a == 1:
                # STICK
                # Dealer's turn
                new_s2 = s2 
                while True:
                    if new_s2 < 1 or new_s2 > 21:
                        qrs = WIN
                        break
                    elif new_s2 >= 17:
                        if s1 == new_s2:
                            qrs = DRAW
                        elif s1 > new_s2:
                            qrs = WIN
                        else:
                            assert s1 < new_s2
                            qrs = LOSE
                        break
                    else:
                        # HIT
                        new_card = get_card()
                        if new_card[0] == RED:
                            new_s2 -= new_card[1]
                        else:
                            assert new_card[0] == BLACK
                            new_s2 += new_card[1]
            else:
                assert False
            Qsa[(s1, s2)][a] = (1 - ALPHA) * Qsa[(s1, s2)][a] + ALPHA * qrs
        # Check where the state is converged or not
        if abs(Qsa[(s1, s2)][0] - Qsa[(s1, s2)][1]) > CONVERGENCE_CONDITION:
            Csa[(s1, s2)] = True
        # If all states are converged at once, end reinforce learning
        for v in Csa.itervalues():
            if not v:
                break
        else:
            break 
    return Qsa