class Dealer(Player):
    def __init__(self, game):
        self.game = game
        self.hand = Hand()

    def calculate_value(self):

        #If the dealer has more than 21 and still have an ace, then set that ace from 11 to 1
        #It will do nothing if the dealer has not 11-valued aces
        if (self.hand.calculate_value() > 21 and self.hand.have_an_ace()):

            self.hand.set_an_ace_1()

        return self.hand.calculate_value()

    def make_move(self, player_value):

        while (player_value <= 21 and self.calculate_value() < 17):
            print '\n Dealer Action: Asks for a card '
            new_card = self.game.get_deck().give_a_card()
            self.get_card(new_card)
            print str(new_card.rank) + ' of ' + str(new_card.suit)

        print '\nDealer Action: Stand '

    def print_victories(self):
        print 'Dealer won ' + str(self.victories) + ' times'

    def print_hand(self):
        print str(self.hand.cards[0].rank) + ' of ' + str(
            self.hand.cards[0].suit)
class HumanPlayer(Player):

    coins = 0
    actions = []
    states = []
    fg_values_matrix = {}
    temp_state_action = []

    def __init__(self, game, coins):
        self.game = game
        self.hand = Hand()
        self.coins = coins
        #the actions are continue or stand. Split and bet more coins will be developed later
        self.actions = ['continue', 'stand', 'double bet', 'split']
        """
        The human player can take decisions only between 1 and 20. The '1' case is the lowest initial hand that the human
        can receive (two Aces, and make split). And the '20' case is the highest value that human can obtain without winning or loosing
        that round.
        """
        for i in range(1, 21):
            #The lowest value card that the dealer can receive is 1 (an Ace) and the highest is 11 (Soft Ace)
            #In this initial implementation the Ace will have one unique value: 1. Later it can be 1 or 11. (in progress)
            for j in range(1, 12):
                player_status = str(i) + 's'
                self.states.append((player_status, j))
                player_status = str(i) + 'h'
                self.states.append((player_status, j))

        #Initialize fg_values_matrix with 0.0 probability each one. I use a dictionary to do this.
        # The key is: (the state,action). And the value will be the fg_value
        for x in range(0, len(self.states)):
            for y in range(0, len(self.actions)):
                current_state = self.states[x]
                current_action = self.actions[y]
                self.fg_values_matrix[current_state, current_action] = 0.0

        #Initialize the Ace possible values. It can be 1 or 11
        #The initial value is 0.5 for both Ace possibility

    def bet(self, training_flag):
        #This must get the player's bet from the command line
        #Returns the bet
        if not training_flag:
            self.coins -= 1
        return 1  #The automated player only bets 1 coin

    def double_bet(self):
        self.game.current_player_bet *= 2

    def calculate_value(self):
        return self.hand.calculate_value()

    def get_prize(self, prize):
        #This gives the player the prize if won a hand
        self.coins += prize

    def print_victories(self):
        print 'Player won ' + str(self.victories) + ' times'

    def has_two_equal_ranked_cards(self):
        return self.hand.has_two_equal_ranked_cards()

    def have_an_ace(self):
        return self.hand.have_an_ace()

    def have_more_than_1_ace(self):
        return self.hand.have_more_than_1_ace()

    #Obviously, this method MUST be refactored.
    def take_decision(self, dealer_original_value, training_flag):

        #If I'm training, my decisions are random
        if training_flag:

            random_number = randint(0, 9)
            # double bet, only at the first decision
            if random_number < 2 and len(self.hand.cards) == 2:
                if (self.calculate_value() <= 20):
                    self.temp_state_action.append(
                        ((self.hand.calculate_status(), dealer_original_value),
                         'double bet'))
                    self.double_bet()
                    return 'double bet'

            elif random_number >= 5 and self.can_split():
                if (self.calculate_value() <= 20):
                    self.temp_state_action.append(
                        ((self.hand.calculate_status(), dealer_original_value),
                         'split'))
                    return 'split'

            else:
                # continue
                if 0 <= random_number < 5:
                    if (self.calculate_value() <= 20):
                        self.temp_state_action.append(
                            ((self.hand.calculate_status(),
                              dealer_original_value), 'continue'))
                        return 'continue'
                # stand
                else:
                    if self.calculate_value() <= 20:
                        self.temp_state_action.append(
                            ((self.hand.calculate_status(),
                              dealer_original_value), 'stand'))
                        return 'stand'

            # when it is not training
        elif self.calculate_value() <= 20:

            # compare the values, if stand_value is higher than continue_value, the next action will be stand
            values = {}

            stand_value = self.fg_values_matrix[(self.hand.calculate_status(),
                                                 dealer_original_value),
                                                'stand']
            values[stand_value] = 'stand'
            continue_value = self.fg_values_matrix[(
                self.hand.calculate_status(), dealer_original_value),
                                                   'continue']
            values[continue_value] = 'continue'
            double_bet_value = self.fg_values_matrix[(
                self.hand.calculate_status(), dealer_original_value),
                                                     'double bet']
            values[double_bet_value] = 'double bet'

            if (self.can_split()):

                split_value = self.fg_values_matrix[(
                    self.hand.calculate_status(), dealer_original_value),
                                                    'split']
                values[split_value] = 'split'

            max = self.calculate_maximum_from_vector(values)

            return values[max]

        #Typically, this last return line will only be reached
        #if you are over 20, so you insta-win or insta-lose
        return 'stand'

        # Returns true if the player chooses to stand
        # Returns false if the player chooses to get another card

    def make_move(self, dealer_original_value, training_flag):

        decision = ''

        while not (decision == 'stand'):

            decision = self.take_decision(dealer_original_value, training_flag)

            if (decision == 'continue'):

                print 'Action: Asks for a card'
                new_card = self.game.get_deck().give_a_card()
                self.get_card(new_card)
                print str(new_card.rank) + ' of ' + str(new_card.suit)

            elif (decision == 'stand'):

                print 'Action: Stand'

            elif (decision == 'split'):

                #You go back to the BlackjackGame and the split-hand begins
                return 'split'

            elif (decision == 'double bet'):

                print 'Player doubles the bet\n'
                print 'Action: Asks for a card'
                new_card = self.game.get_deck().give_a_card()
                self.get_card(new_card)
                print str(new_card.rank) + ' of ' + str(new_card.suit)

                if (self.calculate_value() <= 20):
                    self.temp_state_action.append(
                        ((self.hand.calculate_status(), dealer_original_value),
                         'stand'))

                #This means, if you double-betted, then you MUST ask stand after getting one more card!
                decision = 'stand'
        return ''

    #the fg_values that are updated, are those that take you to directly win or lose. The previous values do not get updated
    def update_fg_values(self, result):
        alpha = 0.8  # it can be modified
        gamma = 0.5  # it can be modified, but between 0 and 1
        reward = 0.0

        if result == 'win':
            reward = 0.2 * self.game.current_player_bet
        elif result == 'lose':
            reward = -0.2 * self.game.current_player_bet

        terminal_s_a = self.temp_state_action[len(self.temp_state_action) - 1]
        q_s_a = self.fg_values_matrix[self.temp_state_action[
            len(self.temp_state_action) - 1]]

        # max Q(s', a') is 0, because is a terminal state
        self.fg_values_matrix[terminal_s_a] = (1 - alpha) * q_s_a + alpha * (
            reward + gamma * 0)

        # if is not a terminal state-action, the reward is 0
        reward = 0.0
        if (len(self.temp_state_action) - 2) >= 0:
            for x in range((len(self.temp_state_action) - 2), -1, -1):
                s_a = self.temp_state_action[x]
                s_a_prime = self.temp_state_action[x + 1]
                q_s_a_x = self.fg_values_matrix[s_a]

                # CALCULATE THE MAX Q(s',a')
                q_stand_value_prime = self.fg_values_matrix[s_a_prime]
                q_continue_value_prime = self.fg_values_matrix[s_a_prime]
                q_double_bet_value_prime = self.fg_values_matrix[s_a_prime]
                q_values_prime = [
                    q_stand_value_prime, q_continue_value_prime,
                    q_double_bet_value_prime
                ]

                self.fg_values_matrix[s_a] = (1 - alpha) * q_s_a_x + alpha * (
                    reward + gamma * max(q_values_prime))

    def restart_temp_state_action(self):
        self.temp_state_action = []

    def can_split(self):
        if len(self.hand.cards) > 1:
            #You can only split if you have two and only two cards valued the same
            if (self.has_two_equal_ranked_cards()
                    and len(self.hand.cards) == 2):
                return True

            else:
                return False

    def print_hand(self):
        print str(self.hand.cards[0].rank) + ' of ' + str(
            self.hand.cards[0].suit)
        print str(self.hand.cards[1].rank) + ' of ' + str(
            self.hand.cards[1].suit) + '\n'

    def calculate_maximum_from_vector(self, vector):

        max = -9999999
        for key in vector:

            if key > max:
                max = key

        return max