def __init__(self, restore_from=[]): self.hand_evaluator = HandEvaluator() self.Q_gamma = 0.9 self.epsilon = 0.995 self.new_state = [] if restore_from: print("Restoring from: " + restore_from) self.Q = load_model(restore_from) else: self.Q = self.initialize_network()
def __init__(self, identificador, fichas, nombre, p=0.5): ''' Constructor del Bot ''' self.handEval = HandEvaluator() self.bot = True self.nombre = nombre self.fichas = fichas self.id = identificador self.mano = [None, None] self.apuesta_actual = 0 self.dealer = False self.jugada = None self.esperar = False self.allin = False self.p = p
class Bot(Jugador): ''' El bot extiende de un jugador, redefine los métodos del mismo ''' def __init__(self, identificador, fichas, nombre, p=0.5): ''' Constructor del Bot ''' self.handEval = HandEvaluator() self.bot = True self.nombre = nombre self.fichas = fichas self.id = identificador self.mano = [None, None] self.apuesta_actual = 0 self.dealer = False self.jugada = None self.esperar = False self.allin = False self.p = p def inicializar_estrategia(self): estrategia = self.establecer_estrategia(self.p) self.cerebro = Cerebro(estrategia[0], estrategia[1]) def obtener_jugada(self, ronda, comunitarias): ''' Dependiendo de la ronda hace lo que tiene que hacer @param ronda : Ronda, para saber el tipo de ronda y las apuesta hasta el momento en esa ronda 1-preflop, 2-flop, 3-turn y 4-river 1, 2, 3 o 4 veces. @type ronda: Ronda @param comunitarias: lista de 5 strings que denotan las cartas comunitarias en la ronda, por ejemplo, si es el flop la lista sería [carta1, carta2, carta3, None, None] @type comunitarias: String[] @return jugada : devuelve lo que tiene que hacer si es un jugador se obtiene de la pantalla si es un bot se calcula. @rtype: String ''' ''' return Cerebro().elegir_accion(mano, comunitarias, ronda, dict_odds, dealer) dealer: true o false si es que soy o no dealer ''' #print self.mano return self.cerebro.elegir_accion(self.mano, comunitarias, ronda, self.calcular_odds(ronda, comunitarias), self.dealer) def calcular_odds(self, ronda, comunitarias): cartas_restantes = [1,50, 47, 46, 45] odds={"carta alta":[None,True],"par":[None,True], "doble par":[None,True], "trio":[None,True], "escalera interna":[None,True], "escalera abierta":[None,True], "color":[None, True], "full":[None,True], "poker":[None,True], "escalera color":[None,True]} numero,colores = self.handEval.gobisificar(self.mano, comunitarias) if ronda.tipo == 1:#solo en el pre-flop tipo = self.tiene_cartas_consecutivas() if tipo: odds["escalera interna"][0]=(cartas_restantes[ronda.tipo]/12)-1 odds["escalera abierta"][0]=(cartas_restantes[ronda.tipo]/12)-1 if self.tiene_cartas_del_mismo_color(): odds["color"][0]=(cartas_restantes[ronda.tipo]/11)-1 if self.tiene_carta_alta(): odds["carta alta"][0]=0 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++ tipo, jugada = self.handEval.comprobar_par(numero,colores) if tipo: #tiene par odds["trio"][0]=(cartas_restantes[ronda.tipo]/2)-1 odds["doble par"][0]=(cartas_restantes[ronda.tipo]/3)-1 odds["par"][0]=0 else: #no tiene par odds["par"][0]=(cartas_restantes[ronda.tipo]/6)-1 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++ # print "numero:", numero # print "colores: ", colores tipo, jugada = self.handEval.comprobar_doble_par(numero,colores) if tipo: #tiene doble par odds["doble par"][0]=0 odds["full"][0]= (cartas_restantes[ronda.tipo]/4)-1 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++ tipo, jugada = self.handEval.comprobar_trio(numero,colores) if tipo: #tiene trio odds["trio"][0]= 0 odds["full"][0]=(cartas_restantes[ronda.tipo]/3)-1 odds["poker"][0] = (cartas_restantes[ronda.tipo]/1)-1 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++ tipo, jugada = self.handEval.comprobar_escalera(numero,colores) if tipo: #tiene Escalera odds["escalera abierta"][0]=0 odds["escalera interna"][0]=0 else:#no tiene escalera if self.handEval.posible_escalera_abierta(numero,colores): odds["escalera abierta"][0]= (cartas_restantes[ronda.tipo]/8)-1 else: if self.handEval.posible_escalera_interna(numero,colores): odds["escalera interna"][0] = (cartas_restantes[ronda.tipo]/4)-1 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++ tipo,jugada = self.handEval.comprobar_color(numero,colores) if tipo: #tiene color odds["color"][0]= 0 else: if self.handEval.posible_color(numero,colores): odds["color"][0]= (cartas_restantes[ronda.tipo]/9)-1 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++ tipo,jugada = self.handEval.comprobar_full(numero, colores) if tipo: odds["full"][0]=0 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++ tipo, jugada = self.handEval.comprobar_poker(numero,colores) if tipo: odds["poker"][0]=0 #+++++++++++++++++++++++++++++++++++++++++++++++++++++++ if (odds["escalera interna"][0]==0 or odds["escalera abierta"][0]==0) and odds["color"][0]==0: odds["escalera color"][0]=0 if not ronda.tipo == 1: self.comprobar_jugada_en_mesa(odds, comunitarias) return odds def comprobar_jugada_en_mesa(self,odds, comunitarias): numero,colores = self.handEval.gobisificar([],comunitarias) #comprobar par tipo,jugada = self.handEval.comprobar_par(numero,colores) if tipo: odds["par"][1] = False #comprobar doble par tipo,jugada = self.handEval.comprobar_doble_par(numero,colores) if tipo: odds["doble par"][1] = False #comprobar trio tipo,jugada = self.handEval.comprobar_trio(numero,colores) if tipo: odds["trio"][1]=False #comprobar escalera tipo,jugada = self.handEval.comprobar_escalera(numero,colores) if tipo: odds["escalera interna"][1] = False odds["escalera abierta"][1] = False #comprobar color tipo,jugada = self.handEval.comprobar_color(numero,colores) if tipo: odds["color"][1] = False #comprobar full tipo,jugada = self.handEval.comprobar_full(numero,colores) if tipo: odds["full"][1] = False #comprobar poker tipo,jugada = self.handEval.comprobar_poker(numero,colores) if tipo: odds["full"][1] = False def tiene_cartas_consecutivas(self): cartas={"1":1, "2":2, "3":3, "4":4, "5":5, "6":6, "7":7, "8":8, "9":9, "d":10, "j":11, "q":12, "k":13} carta1 = self.mano[0][0] carta2 = self.mano[1][0] r = cartas[carta1]-cartas[carta2] if r==1 or r==-1 or r== 12: return True return False def tiene_cartas_del_mismo_color(self): if self.mano[0][1] == self.mano[1][0]: return True return False def tiene_carta_alta(self): if self.mano[0][0]== "7" or self.mano[0][0]=="8" or self.mano[0][0]=="9" \ or self.mano[0][0]=="d"or self.mano[0][0]=="j" or self.mano[0][0]=="q" \ or self.mano[0][0]=="k" or self.mano[0][0]=="1": return True return False def tiene_par_en_mano(self): if self.mano[0][0] == self.mano[0][0] : return True return False def establecer_estrategia(self,numero): aleatorio = random.random() if numero < 0.9: limite_superior = numero + 0.1 else: limite_superior = 1 if numero > 0.1: limite_inferior = numero - 0.1 else: limite_inferior = 0 if limite_superior < 1: rango_mentira = 1 - limite_superior else: rango_mentira = 0 if aleatorio < limite_inferior: return (2,0) if aleatorio >= limite_inferior and aleatorio <= limite_superior: return (3,random.randint(1,4)) rango_mentira = rango_mentira / 4 for i in range(1,5): if aleatorio <= limite_superior + rango_mentira*i: return (1,i)
class AdaptiveBrain: def __init__(self, restore_from=[]): self.hand_evaluator = HandEvaluator() self.Q_gamma = 0.9 self.epsilon = 0.995 self.new_state = [] if restore_from: print("Restoring from: " + restore_from) self.Q = load_model(restore_from) else: self.Q = self.initialize_network() def initialize_network(self): TIMESTEPS = None TEMPORAL_DATADIM = 5 STATIC_DATADIM = 1 # Learns over the temporal feature matrix temporal_model = Sequential() temporal_model.add( LSTM(12, return_sequences=True, input_shape=(TIMESTEPS, TEMPORAL_DATADIM)) ) # returns a sequence of vectors of dimension 12 temporal_model.add(LSTM(12, return_sequences=True) ) # returns a sequence of vectors of dimension 12 temporal_model.add(LSTM(12)) # return a single vector of dimension 12 # Learns over static features. static_model = Sequential() static_model.add(Dense(5, input_dim=STATIC_DATADIM, activation="relu")) combined_model = Sequential() combined_model.add(Merge([temporal_model, static_model], mode='concat')) combined_model.add(Dense(10, activation='relu')) combined_model.add(Dense(3, activation='relu')) combined_model.add(Dense(1, activation='linear')) combined_model.compile(loss='mse', optimizer='adam') return combined_model def enumerate_next_action_vectors(self, bot): BET_INCREMENT = 5 STACKSIZE = 200.0 possible_actions = bot.possible_actions last_in_pot_hero = bot.temporal_feature_matrix[0, -1] last_in_pot_villain = bot.temporal_feature_matrix[1, -1] street = bot.get_street() if is_discard_round(possible_actions): # Showdown probabilities of discarding. win_pct_discard_none = self.hand_evaluator.evaluate_showdown_probabilities( [bot.hand['hole1'], bot.hand['hole2']], bot.hand['board'], 100) win_pct_discard_1 = self.hand_evaluator.evaluate_showdown_probabilities( [bot.hand['hole2']], bot.hand['board'], 100) win_pct_discard_2 = self.hand_evaluator.evaluate_showdown_probabilities( [bot.hand['hole1']], bot.hand['board'], 100) a_none = np.asarray( [last_in_pot_hero, last_in_pot_villain, street, 0, 0]).reshape( (1, -1)) a_discard = np.asarray( [last_in_pot_hero, last_in_pot_villain, street, 1, 0]).reshape( (1, -1)) input_discard_none = [a_none, np.asarray([[win_pct_discard_none]])] input_discard_1 = [a_discard, np.asarray([[win_pct_discard_1]])] input_discard_2 = [a_discard, np.asarray([[win_pct_discard_2]])] inputs = [input_discard_none, input_discard_1, input_discard_2] action_strs = [ 'CHECK', 'DISCARD:' + bot.hand['hole1'], 'DISCARD:' + bot.hand['hole2'] ] else: # Available actions should be one of the following: # BET:minBet:maxBet * # CALL* # CHECK * # FOLD * # RAISE:minRaise:maxRaise * showdown_prob = self.hand_evaluator.evaluate_showdown_probabilities( [bot.hand['hole1'], bot.hand['hole2']], bot.hand['board'], 100) inputs = [] action_strs = [] # Folding #if can_fold(possible_actions): # inputs.append([np.asarray([-1, -1, -1, -1, -1]).reshape((1,-1)), # np.asarray([[showdown_prob]]) ]) # action_strs.append("FOLD") # Checking inputs.append([ np.asarray( [last_in_pot_hero, last_in_pot_villain, street, 0, 0]).reshape((1, -1)), np.asarray([[showdown_prob]]) ]) if can_check(possible_actions): action_strs.append("CHECK") else: action_strs.append("FOLD") # Calling if can_call(possible_actions): call_amt = bot.temporal_feature_matrix[1, -1] inputs.append([ np.asarray([call_amt, last_in_pot_villain, street, 0, 0]).reshape((1, -1)), np.asarray([[showdown_prob]]) ]) action_strs.append("CALL") # Betting or raising. cpmi, action_type, min_bet, max_bet = can_put_money_in( possible_actions) if cpmi: max_of_prev_street = bot.get_max_of_prev_street(0) for i in range(min_bet, max_bet + 1, BET_INCREMENT): inputs.append([ np.asarray([ float(i) / STACKSIZE + max_of_prev_street, last_in_pot_villain, street, 0, 0 ]).reshape((1, -1)), np.asarray([[showdown_prob]]) ]) action_strs.append(action_type + ":" + str(i)) if i != max_bet: # tack on the max_bet i = max_bet inputs.append([ np.asarray([ float(i) / STACKSIZE + max_of_prev_street, last_in_pot_villain, street, 0, 0 ]).reshape((1, -1)), np.asarray([[showdown_prob]]) ]) action_strs.append(action_type + ":" + str(i)) return inputs, action_strs def evaluate_Q_function(self, S, a): # S = state # a = action(s) Q_in_temporal = np.vstack((S, a[0][0])).reshape((1, -1, S.shape[1])) Q_in_static = a[0][1] for i in range(1, len(a)): q_i = np.vstack((S, a[i][0])).reshape((1, -1, S.shape[1])) Q_in_temporal = np.vstack((Q_in_temporal, q_i)) Q_in_static = np.vstack((Q_in_static, a[i][1])) possible_states = [Q_in_temporal, Q_in_static] return self.Q.predict(possible_states), possible_states def update_Q_function(self, bot): # self.new_state is the move we made in the last decision point. if len(self.new_state) > 0: # only update if tfm is initialized. print("YO!! Updating Q-function") actions, action_strs = self.enumerate_next_action_vectors(bot) Qvals, possible_states = self.evaluate_Q_function( bot.temporal_feature_matrix.T, actions) reward = bot.hand['winnings'] if reward != 0: # hand is over -> terminal state. target = reward else: # we're still playing the hand #newQ = self.Q.predict(self.new_state, batch_size=1) maxQ = np.max(Qvals) target = self.Q_gamma * maxQ # reward is zero, so our target is simply our expected future reward. print(target) self.Q.fit(self.new_state, np.asarray(target).reshape((1, 1)), batch_size=1, nb_epoch=5, verbose=0) def learn_from_last_action(self, bot): self.update_Q_function(bot) def get_reward_for_folding(self, bot): return -self.bot.temporal_feature_matrix[0, -1] * 200.0 def get_epsilon_value(self): self.epsilon = 0.995 * self.epsilon return self.epsilon def make_decision(self, bot): print("Making Q decision") actions, action_strs = self.enumerate_next_action_vectors(bot) # Evaluates Q-function over all non-folding actions. Qvals, possible_states = self.evaluate_Q_function( bot.temporal_feature_matrix.T, actions) if random.random() > self.get_epsilon_value(): best_idx = np.argmax(Qvals) else: print("woowowow taking random action!!") best_idx = random.randint(0, Qvals.shape[0] - 1) best_temporal_in = possible_states[0][best_idx] best_static_in = possible_states[1][best_idx] self.new_state = [ best_temporal_in.reshape( (1, best_temporal_in.shape[0], best_temporal_in.shape[1])), best_static_in.reshape((1, 1)) ] return action_strs[best_idx]
def __init__(self): self.hand_evaluator = HandEvaluator()
class RationalBrain: def __init__(self): self.hand_evaluator = HandEvaluator() def make_decision(self, bot): # bot is a poker player bot. # by passing bot, this function has access to the internals of bot # which includes various features. possible_actions = bot.possible_actions if is_discard_round(possible_actions): print("This is a discard round") # Just take a look at naive showdown probabilities and make the discard decision based on that. win_pct_discard_none = self.hand_evaluator.evaluate_showdown_probabilities( [bot.hand['hole1'], bot.hand['hole2']], bot.hand['board'], 100) win_pct_discard_1 = self.hand_evaluator.evaluate_showdown_probabilities( [bot.hand['hole2']], bot.hand['board'], 100) win_pct_discard_2 = self.hand_evaluator.evaluate_showdown_probabilities( [bot.hand['hole1']], bot.hand['board'], 100) prob_vec = np.asarray( [win_pct_discard_none, win_pct_discard_1, win_pct_discard_2]) print(prob_vec) action_idx = np.argmax(prob_vec) if action_idx == 0: action_str = "CHECK" elif action_idx == 1: action_str = "DISCARD:" + bot.hand['hole1'] elif action_idx == 2: action_str = "DISCARD:" + bot.hand['hole2'] print("ACTION_TAKEN -> " + action_str) return action_str else: # This is a quantitative bet round, where we have to decide how much money to put on the table (if any <=> fold) print("This is a Q-bet round") showdown_prob = self.hand_evaluator.evaluate_showdown_probabilities( [bot.hand['hole1'], bot.hand['hole2']], bot.hand['board'], 100) print("showdown prob:" + str(showdown_prob)) if showdown_prob > 0.8: stake_is_worth = random.randint(170, 200) elif showdown_prob > 0.6 and showdown_prob < 0.8: stake_is_worth = random.randint(100, 169) elif showdown_prob > 0.5 and showdown_prob < 0.6: stake_is_worth = random.randint(50, 100) elif showdown_prob > 0.3 and showdown_prob < 0.5: stake_is_worth = random.randint(10, 50) else: stake_is_worth = 0 # Available actions should be one of the following: # BET:minBet:maxBet # CALL # CHECK # FOLD # RAISE:minRaise:maxRaise current_stake = np.sum(bot.temporal_feature_matrix[0]) * 200 villain_hero_differential = (bot.hand['pot_size'][-1] - current_stake) stake_difference = stake_is_worth - villain_hero_differential print("Current stake: " + str(current_stake)) print("Stake worth: " + str(stake_is_worth)) print("Stake diff: " + str(stake_difference)) if stake_difference < 0: action_str = "CHECK" if can_check( bot.possible_actions) else "FOLD" else: if stake_difference < 30: action_str = "CHECK" if can_check( bot.possible_actions) else "CALL" else: can_put_money_in, action_type, min_bet, max_bet = can_put_money_in( bot.possible_actions) if action_type == "RAISE": bet_val = max( min(stake_difference + current_stake, max_bet), min_bet) else: bet_val = max(min(stake_difference, max_bet), min_bet) action_str = action_type + ":" + str(bet_val) print("ACTION_TAKEN -> " + action_str) return action_str
def setUp(self, hand=None, eval=None): self.hand = Hand() self.eval = HandEvaluator()
class HandEvaluator_Tester(unittest.TestCase): def setUp(self, hand=None, eval=None): self.hand = Hand() self.eval = HandEvaluator() def test_hand_fullhouse(self): for k in range(5): self.hand.pop(0) self.hand.append(Card(1, 1)) self.hand.append(Card(2, 1)) self.hand.append(Card(1, 2)) self.hand.append(Card(0, 2)) self.hand.append(Card(3, 1)) start = time.clock() self.assertTrue(self.eval.evaluate_hand(self.hand) == "FullHouse") elapsed_time = time.clock() - start print "Elapsed Time = %f" % elapsed_time def test_hand_straight(self): for k in range(5): self.hand.pop(0) self.hand.append(Card(1, 1)) self.hand.append(Card(2, 2)) self.hand.append(Card(1, 5)) self.hand.append(Card(0, 4)) self.hand.append(Card(3, 3)) self.assertTrue(self.eval.evaluate_hand(self.hand) == "Straight") def test_hand_two_pair(self): for k in range(5): self.hand.pop(0) self.hand.append(Card(1, 2)) self.hand.append(Card(2, 2)) self.hand.append(Card(1, 4)) self.hand.append(Card(0, 4)) self.hand.append(Card(3, 10)) self.assertTrue(self.eval.evaluate_hand(self.hand) == "TwoPair") def test_hand_pair(self): for k in range(5): self.hand.pop(0) self.hand.append(Card(1, 12)) self.hand.append(Card(2, 12)) self.hand.append(Card(1, 6)) self.hand.append(Card(0, 4)) self.hand.append(Card(3, 9)) self.assertTrue(self.eval.evaluate_hand(self.hand) == "Pair") def test_hand_pair_not(self): for k in range(5): self.hand.pop(0) self.hand.append(Card(1, 1)) self.hand.append(Card(2, 2)) self.hand.append(Card(1, 6)) self.hand.append(Card(0, 4)) self.hand.append(Card(3, 10)) self.assertFalse(self.eval.evaluate_hand(self.hand) == "Pair") def test_hand_trips(self): for k in range(5): self.hand.pop(0) self.hand.append(Card(1, 2)) self.hand.append(Card(2, 2)) self.hand.append(Card(1, 2)) self.hand.append(Card(0, 4)) self.hand.append(Card(3, 10)) self.assertTrue(self.eval.evaluate_hand(self.hand) == "Trips") def test_hand_quads(self): for k in range(5): self.hand.pop(0) self.hand.append(Card(1, 2)) self.hand.append(Card(2, 2)) self.hand.append(Card(3, 2)) self.hand.append(Card(0, 2)) self.hand.append(Card(3, 10)) self.assertTrue(self.eval.evaluate_hand(self.hand) == "Quads") def test_hand_flush(self): for k in range(5): self.hand.pop(0) self.hand.append(Card(2, 2)) self.hand.append(Card(2, 4)) self.hand.append(Card(2, 6)) self.hand.append(Card(2, 8)) self.hand.append(Card(2, 10)) self.assertTrue(self.eval.evaluate_hand(self.hand) == "Flush") def test_hand_straight_flush(self): for k in range(5): self.hand.pop(0) self.hand.append(Card(2, 2)) self.hand.append(Card(2, 3)) self.hand.append(Card(2, 4)) self.hand.append(Card(2, 5)) self.hand.append(Card(2, 6)) self.assertTrue(self.eval.evaluate_hand(self.hand) == "StraightFlush") def test_hand_nothing(self): for k in range(5): self.hand.pop(0) self.hand.append(Card(0, 7)) self.hand.append(Card(1, 6)) self.hand.append(Card(2, 3)) self.hand.append(Card(2, 13)) self.hand.append(Card(3, 10)) start = time.clock() self.assertFalse(self.eval.evaluate_hand(self.hand) == "Nothing") elapsed_time = time.clock() - start print "Elapsed Time = %f" % elapsed_time def test_hand_not_straight_flush(self): for k in range(5): self.hand.pop(0) self.hand.append(Card(2, 1)) self.hand.append(Card(2, 10)) self.hand.append(Card(2, 11)) self.hand.append(Card(2, 12)) self.hand.append(Card(2, 13)) self.assertFalse(self.eval.evaluate_hand(self.hand) == "StraightFlush") def test_hand_royal_flush(self): for k in range(5): self.hand.pop(0) self.hand.append(Card(2, 1)) self.hand.append(Card(2, 10)) self.hand.append(Card(2, 11)) self.hand.append(Card(2, 12)) self.hand.append(Card(2, 13)) self.assertTrue(self.eval.evaluate_hand(self.hand) == "RoyalFlush") def test_hand_wheel_straight(self): for k in range(5): self.hand.pop(0) self.hand.append(Card(2, 1)) self.hand.append(Card(3, 10)) self.hand.append(Card(0, 11)) self.hand.append(Card(2, 12)) self.hand.append(Card(1, 13)) self.assertTrue(self.eval.evaluate_hand(self.hand) == "Straight")