def build_street_start_message(self, state): message = { "message_type": self.STREET_START_MESSAGE, "round_state": DataEncoder.encode_round_state(state) } message.update(DataEncoder.encode_street(state["street"])) return self.__build_notification_message(message)
def test_street_start_message(self): state = self.__setup_state() message = MessageBuilder.build_street_start_message(state) msg = message["message"] self.eq(MessageBuilder.STREET_START_MESSAGE, msg["message_type"]) self.eq("notification", message["type"]) self.eq("flop", msg["street"]) self.eq(DataEncoder.encode_round_state(state), msg["round_state"])
def test_street_start_message(self): state = self.__setup_state() message = MessageBuilder.build_street_start_message(state) msg = message['message'] self.eq(MessageBuilder.STREET_START_MESSAGE, msg['message_type']) self.eq('notification', message['type']) self.eq('flop', msg['street']) self.eq(DataEncoder.encode_round_state(state), msg['round_state'])
def build_round_result_message(self, round_count, winners, hand_info, state): message = { "message_type": self.ROUND_RESULT_MESSAGE, "round_count": round_count, "hand_info" : hand_info, "round_state": DataEncoder.encode_round_state(state) } message.update(DataEncoder.encode_winners(winners)) return self.__build_notification_message(message)
def build_game_update_message(self, player_pos, action, amount, state): player = state["table"].seats.players[player_pos] message = { "message_type": self.GAME_UPDATE_MESSAGE, "action": DataEncoder.encode_action(player, action, amount), "round_state": DataEncoder.encode_round_state(state), "action_histories": DataEncoder.encode_action_histories(state["table"]) } return self.__build_notification_message(message)
def build_round_result_message(self, round_count, winners, hand_info, state): message = { "message_type": self.ROUND_RESULT_MESSAGE, "round_count": round_count, "hand_info": hand_info, "round_state": DataEncoder.encode_round_state(state, True) } message.update(DataEncoder.encode_winners(winners)) return self.__build_notification_message(message)
def build_round_result_message(self, round_count, winners, hand_info, state): message = { 'message_type': self.ROUND_RESULT_MESSAGE, 'round_count': round_count, 'hand_info': hand_info, 'round_state': DataEncoder.encode_round_state(state) } message.update(DataEncoder.encode_winners(winners)) return self.__build_notification_message(message)
def build_game_update_message(self, player_pos, action, amount, state): player = state['table'].seats.players[player_pos] message = { 'message_type': self.GAME_UPDATE_MESSAGE, 'action': DataEncoder.encode_action(player, action, amount), 'round_state': DataEncoder.encode_round_state(state), 'action_histories': DataEncoder.encode_action_histories(state['table']) } return self.__build_notification_message(message)
def test_game_update_message(self): state = self.__setup_state() table = state['table'] player = table.seats.players[1] message = MessageBuilder.build_game_update_message(1, 'call', 10, state) msg = message['message'] self.eq('notification', message['type']) self.eq(MessageBuilder.GAME_UPDATE_MESSAGE, msg['message_type']) self.eq(DataEncoder.encode_action(player, 'call', 10), msg['action']) self.eq(DataEncoder.encode_round_state(state), msg['round_state']) self.eq(DataEncoder.encode_action_histories(table), msg['action_histories'])
def test_ask_message(self): state = self.__setup_state() table = state["table"] message = MessageBuilder.build_ask_message(1, state) msg = message["message"] self.eq("ask", message["type"]) self.eq(MessageBuilder.ASK_MESSAGE, msg["message_type"]) self.eq(["CA", "C2"], msg["hole_card"]) self.eq(3, len(msg["valid_actions"])) self.eq(DataEncoder.encode_round_state(state), msg["round_state"]) self.eq(DataEncoder.encode_action_histories(table), msg["action_histories"])
def _visualize_action_log(self, task, value_function, experience): state, action, _next_state, _reward = experience players = state["table"].seats.players me = [p for p in players if p.uuid == "uuid-0"][0] me_pos = players.index(me) sb_amount = state["small_blind_amount"] valid_actions = ActionChecker.legal_actions(players, me_pos, sb_amount) hole = [str(card) for card in me.hole_card] round_state = DataEncoder.encode_round_state(state) visualized_state = visualize_declare_action(valid_actions, hole, round_state) action_log = "Agent took action [ %s: %s (%s) ] at round %d" % ( action["action"], action["amount"], action["name"], state["round_count"]) actions = task.generate_possible_actions(state) act_vals = [ value_function.predict_value(state, act) for act in actions ] act_names = [act["name"] for act in actions] action_value_log = " => %s" % zip(act_names, act_vals) if self.show_weights: weights_log = ["** weights and features in detail **"] features_title = value_function.delegate.generate_features_title() features = value_function.delegate.construct_poker_features( "dummy", "dummy", round_state, me.uuid, hole, value_function.delegate.handicappers, blind_structure) w_for_acts = value_function.delegate.model.get_weights()[0].T bias = value_function.delegate.model.layers[0].b.get_value() weights_log.append("features : %s" % features) weights_log.append("bias : %s" % bias) for act in actions: weights_log.append("") act_val = act_vals[actions.index(act)] weights = w_for_acts[actions.index(act)].tolist() linear_comb = [ (f * w, f, w, features_title[idx]) for idx, (f, w) in enumerate(zip(features, weights)) ] linear_comb.append((bias[actions.index(act)], 0, 0, "bias")) linear_comb = sorted(linear_comb, key=lambda item: abs(item[0]))[::-1] display_items = [] for item in linear_comb: display_items.append(item) if abs(act_val - sum([tpl[0] for tpl in display_items])) < 0.01: break weights_log.append("linear combination for %s :" % act["name"]) for comb in display_items: weights_log.append(" %s" % str(comb)) action_value_log += "\n" + "\n".join(weights_log) return "\n".join([visualized_state, action_log, action_value_log])
def test_game_update_message(self): state = self.__setup_state() table = state["table"] player = table.seats.players[1] message = MessageBuilder.build_game_update_message(1, "call", 10, state) msg = message["message"] self.eq("notification", message["type"]) self.eq(MessageBuilder.GAME_UPDATE_MESSAGE, msg["message_type"]) self.eq(DataEncoder.encode_action(player, "call", 10), msg["action"]) self.eq(DataEncoder.encode_round_state(state), msg["round_state"]) self.eq(DataEncoder.encode_action_histories(table), msg["action_histories"])
def test_ask_message(self): state = self.__setup_state() table = state['table'] message = MessageBuilder.build_ask_message(1, state) msg = message['message'] self.eq('ask', message['type']) self.eq(MessageBuilder.ASK_MESSAGE, msg['message_type']) self.eq(['2d', '3d'], msg['hole_card']) self.eq(3, len(msg['valid_actions'])) self.eq(DataEncoder.encode_round_state(state), msg['round_state']) self.eq(DataEncoder.encode_action_histories(table), msg['action_histories'])
def test_transit_state_till_round_finish(self): self.task = TexasHoldemTask(final_round=1) def recommend_call(state, action): if action["action"] == "call": return 1 else: return 0 value_func = Mock() value_func.predict_value.side_effect = recommend_call self.task.set_opponent_value_functions([value_func] * 9) state = self.task.generate_initial_state() act_call = self.task.generate_possible_actions(state)[1] state = self.task.transit_state(state, act_call) players = state["table"].seats.players round_state = DataEncoder.encode_round_state(state) self.eq(1, state["round_count"]) self.eq(0, state["table"].dealer_btn) self.eq(6, state["next_player"]) self.eq("flop", round_state["street"]) self.eq(500, round_state["pot"]["main"]["amount"]) act_call = self.task.generate_possible_actions(state)[1] state = self.task.transit_state(state, act_call) round_state = DataEncoder.encode_round_state(state) self.eq(6, state["next_player"]) self.eq("turn", round_state["street"]) self.eq(500, round_state["pot"]["main"]["amount"]) act_call = self.task.generate_possible_actions(state)[1] state = self.task.transit_state(state, act_call) round_state = DataEncoder.encode_round_state(state) self.eq(6, state["next_player"]) self.eq("river", round_state["street"]) self.eq(500, round_state["pot"]["main"]["amount"]) act_call = self.task.generate_possible_actions(state)[1] state = self.task.transit_state(state, act_call) self.true(self.task.is_terminal_state(state))
def test_round_result_message(self): state = self.__setup_state() winners = state['table'].seats.players[1:2] hand_info = ['dummy', 'info'] message = MessageBuilder.build_round_result_message(7, winners, hand_info, state) msg = message['message'] self.eq('notification', message['type']) self.eq(MessageBuilder.ROUND_RESULT_MESSAGE, msg['message_type']) self.eq(7, msg['round_count']) self.eq(hand_info, msg['hand_info']) self.eq(DataEncoder.encode_winners(winners)['winners'], msg['winners']) self.eq(DataEncoder.encode_round_state(state), msg['round_state'])
def build_round_result_message(self, round_count, winners, hand_info, state): winner_uuid = [winner.uuid for winner in winners] hand_info = [info for info in hand_info if info['uuid'] in winner_uuid] message = { "message_type": self.ROUND_RESULT_MESSAGE, "round_count": round_count, "hand_info": hand_info, "round_state": DataEncoder.encode_round_state(state, True) } message.update(DataEncoder.encode_winners(winners)) return self.__build_notification_message(message)
def test_round_result_message(self): state = self.__setup_state() winners = state["table"].seats.players[1:2] hand_info = ["dummy", "info"] message = MessageBuilder.build_round_result_message(7, winners, hand_info, state) msg = message["message"] self.eq("notification", message["type"]) self.eq(MessageBuilder.ROUND_RESULT_MESSAGE, msg["message_type"]) self.eq(7, msg["round_count"]) self.eq(hand_info, msg["hand_info"]) self.eq(DataEncoder.encode_winners(winners)["winners"], msg["winners"]) self.eq(DataEncoder.encode_round_state(state), msg["round_state"])
def construct_features(self, state, action): my_uuid = state["table"].seats.players[state["next_player"]].uuid hole_card = [ p for p in state["table"].seats.players if p.uuid == my_uuid ][0].hole_card hole_str = [str(card) for card in hole_card] round_state = DataEncoder.encode_round_state(state) features = self.construct_poker_features(state, action, round_state, my_uuid, hole_str, self.handicappers, self.blind_structure) return features, action
def build_ask_message(self, player_pos, state): players = state["table"].seats.players player = players[player_pos] hole_card = DataEncoder.encode_player(player, holecard=True)["hole_card"] valid_actions = ActionChecker.legal_actions(players, player_pos, state["small_blind_amount"]) message = { "message_type" : self.ASK_MESSAGE, "hole_card": hole_card, "valid_actions": valid_actions, "round_state": DataEncoder.encode_round_state(state), "action_histories": DataEncoder.encode_action_histories(state["table"]) } return self.__build_ask_message(message)
def test_encode_round_state(self): state = setup_round_state() state["table"].set_blind_pos(1, 3) hsh = DataEncoder.encode_round_state(state) self.eq("flop", hsh["street"]) self.eq(DataEncoder.encode_pot(state["table"].seats.players), hsh["pot"]) self.eq(DataEncoder.encode_seats(state["table"].seats)["seats"], hsh["seats"]) self.eq(["CA"], hsh["community_card"]) self.eq(state["table"].dealer_btn, hsh["dealer_btn"]) self.eq(state["next_player"], hsh["next_player"]) self.eq(1, hsh["small_blind_pos"]) self.eq(3, hsh["big_blind_pos"]) self.eq(DataEncoder.encode_action_histories(state["table"])["action_histories"], hsh["action_histories"]) self.eq(state["round_count"], hsh["round_count"]) self.eq(state["small_blind_amount"], hsh["small_blind_amount"])
def build_ask_message(self, player_pos, state): players = state['table'].seats.players player = players[player_pos] hole_card = DataEncoder.encode_player(player, holecard=True)['hole_card'] valid_actions = ActionChecker.legal_actions( players, player_pos, state['small_blind_amount']) message = { 'message_type': self.ASK_MESSAGE, 'hole_card': hole_card, 'valid_actions': valid_actions, 'round_state': DataEncoder.encode_round_state(state), 'action_histories': DataEncoder.encode_action_histories(state['table']) } return self.__build_ask_message(message)
def build_ask_message(self, player_pos, state): players = state["table"].seats.players player = players[player_pos] hole_card = DataEncoder.encode_player(player, holecard=True)["hole_card"] valid_actions = ActionChecker.legal_actions( players, player_pos, state["small_blind_amount"]) message = { "message_type": self.ASK_MESSAGE, "hole_card": hole_card, "valid_actions": valid_actions, "round_state": DataEncoder.encode_round_state(state), "action_histories": DataEncoder.encode_action_histories(state["table"]) } return self.__build_ask_message(message)
def test_encode_round_state(self): state = setup_round_state() state['table'].set_blind_pos(1, 3) hsh = DataEncoder.encode_round_state(state) self.eq('flop', hsh['street']) self.eq(DataEncoder.encode_pot(state['table'].seats.players), hsh['pot']) self.eq( DataEncoder.encode_seats(state['table'].seats)['seats'], hsh['seats']) self.eq(['2d'], hsh['community_card']) self.eq(state['table'].dealer_btn, hsh['dealer_btn']) self.eq(state['next_player'], hsh['next_player']) self.eq(1, hsh['small_blind_pos']) self.eq(3, hsh['big_blind_pos']) self.eq( DataEncoder.encode_action_histories( state['table'])['action_histories'], hsh['action_histories']) self.eq(state['round_count'], hsh['round_count']) self.eq(state['small_blind_amount'], hsh['small_blind_amount'])
def test_encode_round_state(self): state = setup_round_state() state["table"].set_blind_pos(1, 3) hsh = DataEncoder.encode_round_state(state) self.eq("flop", hsh["street"]) self.eq(DataEncoder.encode_pot(state["table"].seats.players), hsh["pot"]) self.eq( DataEncoder.encode_seats(state["table"].seats)["seats"], hsh["seats"]) self.eq(["CA"], hsh["community_card"]) self.eq(state["table"].dealer_btn, hsh["dealer_btn"]) self.eq(state["next_player"], hsh["next_player"]) self.eq(1, hsh["small_blind_pos"]) self.eq(3, hsh["big_blind_pos"]) self.eq( DataEncoder.encode_action_histories( state["table"])["action_histories"], hsh["action_histories"]) self.eq(state["round_count"], hsh["round_count"]) self.eq(state["small_blind_amount"], hsh["small_blind_amount"])
def test_construct_scaled_scalar_features_with_action_record(self): task = T.TexasHoldemTask(final_round=10, action_record=True) def recommend_random_action(state, action): return 1 value_func = Mock() value_func.predict_value.side_effect = recommend_random_action task.set_opponent_value_functions([value_func] * 9) state = task.generate_initial_state() round_state = DataEncoder.encode_round_state(state) blind_structure = {1: "dummy", 3: "dummy", 5: "dummy", 10: "dummy"} act_call = task.generate_possible_actions(state)[1] state = task.transit_state(state, act_call) #state = task.transit_state(state, act_call) #self.stop() vec = F.construct_scaled_scalar_features_with_action_record( state, round_state, T.my_uuid, ["S2", "D4"], blind_structure, "dummy_action", algorithm="simulation") self.size(35 + 10 * 4, vec)