Beispiel #1
0
    def _set_call_matrix(self, board):
        street = card_tools.board_to_street(board)
        self.equity_matrix = arguments.Tensor(
            game_settings.card_count, game_settings.card_count).zero_()

        if street == 0:
            #iterate through all possible next round streetss
            next_round_boards = card_tools.get_second_round_boards()
            boards_count = next_round_boards.size(0)
            next_round_equity_matrix = arguments.Tensor(
                game_settings.card_count, game_settings.card_count)
            for board in range(boards_count):
                self.get_last_round_call_matrix(next_round_boards[board],
                                                next_round_equity_matrix)
                self.equity_matrix.add_(next_round_equity_matrix)
            #averaging the values in the call matrix
            weight_constant = game_settings.board_card_count == 1 and 1 / (
                game_settings.card_count - 2) or 2 / (
                    (game_settings.card_count - 2) *
                    (game_settings.card_count - 3))
            self.equity_matrix.mul_(weight_constant)
        elif street == 1:
            #for last round we just return the matrix
            self.get_last_round_call_matrix(board, self.equity_matrix)
        else:
            #impossible street
            assert (False)  #, 'impossible street');
Beispiel #2
0
    def _get_terminal_value(self, state, player):
        # oppo take the action and lead to this terminal node

        node = state.node
        assert (node.terminal)
        value = arguments.Tensor([0, 0])

        if node.type == constants.node_types.terminal_fold:
            #ternimal fold
            value[node.current_player] = node.bets.sum()

        elif node.type == constants.node_types.terminal_call:
            # show down
            player_hand = arguments.Tensor(
                state.private[node.current_player].tolist() +
                node.board.tolist())
            player_strength = evaluator.evaluate(player_hand, -1)
            oppo_hand = arguments.Tensor(
                state.private[1 - node.current_player].tolist() +
                node.board.tolist())
            oppo_strength = evaluator.evaluate(oppo_hand, -1)

            # the one take call lose
            if player_strength < oppo_strength:
                value[node.current_player] = node.bets.sum()
            elif player_strength > oppo_strength:
                value[1 - node.current_player] = node.bets.sum()
            else:
                value = node.bets.clone()
        else:
            assert (False)  # not a vaild terminal node

        return value[player]
Beispiel #3
0
def dfs_fill_table(node, table, dqnmodel, builder):
    if node.terminal:
        return


#    if node.current_player == constants.players.chance:
#        node.table = arguments.Tensor([])
#        node.rl = arguments.Tensor([])
#        children = node.children
#        for child in children:
#            dfs_fill_table(child,table, dqnmodel, builder)
#        return

# sl
    all_table = table[node.node_id, :, :]
    #    print(node.node_id)
    for i in range(all_table.size(0)):
        all_table[i, :] = all_table[i, :] / all_table[i, :].sum()

    node.table = all_table

    #rl
    for i in range(game_settings.card_count):
        state = GameState()
        state.node = node
        state.private = [arguments.Tensor([i]), arguments.Tensor([i])]
        state_tensor = builder.statenode_to_tensor(state)
        node.rl = torch.cat(
            (node.rl, dqnmodel(Variable(state_tensor, volatile=True)).data), 0)

    children = node.children
    for child in children:
        dfs_fill_table(child, table, dqnmodel, builder)
Beispiel #4
0
    def dfs_fill_strategy(self, agent_sl, node, builder):
        if node.terminal:
            return
        if node.current_player == constants.players.chance:
            node.table = arguments.Tensor([])
            node.rl = arguments.Tensor([])
            children = node.children
            for child in children:
                self.dfs_fill_strategy(agent_sl, child, builder)
            return
            
        #sl
        for card in range(game_settings.card_count):
            state = GameState()
            for player in range(game_settings.player_count):
                state.private.append(arguments.Tensor([card]))
            state.node = node
            tensor = builder.statenode_to_tensor(state)
            strategy = agent_sl.model(Variable(tensor)).data[0][0:len(node.children)]
            if isinstance(agent_sl, DQNOptim):
#                print(strategy)
                max_ix = strategy.lt(strategy.max())
                strategy[max_ix] = 0.0001
                strategy[1-max_ix] = 1
            strategy.div_(strategy.sum())
            node.strategy[:,card] = strategy

        children = node.children
        for child in children:
            self.dfs_fill_strategy(agent_sl, child, builder)
Beispiel #5
0
    def dfs_fill_table(self, node, table, builder):
        if node.terminal:
            return
        if node.current_player == constants.players.chance:
            node.table = arguments.Tensor([])
            node.rl = arguments.Tensor([])
            children = node.children
            for child in children:
                self.dfs_fill_table(child,table, builder)
            return
                
        # sl
        all_table = table[node.node_id,:,0:len(node.children)]
        node.table = torch.transpose(all_table.clone(),0,1)
        
    #    print(node.node_id)

        for i in range(node.table.size(1)):
            node.table[:,i].div_(node.table[:,i].sum())
        
        node.strategy = node.table.clone()

    
#        print(node.strategy)
        children = node.children
        for child in children:
            self.dfs_fill_table(child,table, builder)
Beispiel #6
0
    def state2tensor(self, state):
        if state is None:
            return None
        # print(state.action_string)

        # transform street [0,1] means the first street # 4 /32 0-31
        street_tensor = arguments.Tensor(constants.streets_count * 4).fill_(0)
        street_tensor[int(state.street) * 4:int(state.street + 1) * 4] = 1

        # position_tensor # /48 32-80
        position_tensor = arguments.Tensor(game_settings.player_count *
                                           4).fill_(0)
        position_tensor[state.current_player * 4:(state.current_player + 1) *
                        4] = 1

        # active tensor / 6 81-86
        active_tensor = arguments.Tensor(game_settings.player_count)
        active_tensor[state.active] = 1

        # transform bets 60 87-146
        bet_tensor = arguments.Tensor(arguments.bet_bucket *
                                      game_settings.player_count).fill_(0)
        for i in range(game_settings.player_count):
            bet_tensor[i * arguments.bet_bucket +
                       int((state.bets[i] - 1) / arguments.bet_bucket_len)] = 1

        # ransform pot 60 87-146
        pot_size = state.bets.max().item()
        pot_tensor = arguments.Tensor(
            len(arguments.pot_times) * game_settings.player_count).fill_(0)
        for i in range(game_settings.player_count):
            for j in range(len(arguments.pot_times)):
                if state.bets[i] < arguments.pot_times[j] * pot_size:
                    pot_tensor[i * len(arguments.pot_times) + j] = 1
                    break


#      print(node.bets)
#      print(bet_player_tensor)
#      print(bet_oppo_tensor)

# transform hand(private and board) 52
#      print(len(state.private)) 52
        private_tensor = self._cards_to_tensor(
            state.hole[state.current_player])
        board_tensor = self._cards_to_tensor(state.board)

        #transform hand strengen
        # street: 1-2 position 3 bets 4-5 private
        return_tensor = torch.unsqueeze(
            torch.cat((
                street_tensor,
                position_tensor,
                active_tensor,
                bet_tensor,
                pot_tensor,
                private_tensor,
                board_tensor,
            ), 0), 0)
        return return_tensor
Beispiel #7
0
    def statenode_to_tensor(self, state):
        #      tensor = arguments.Tensor(constants.player_count, \
        #                                constants.streets_count, \
        #                                constants.raises_count, \
        #                                constants.acions_count, \
        #                                constants.card_count * 2).fill_(0)
        if (state == None):
            return torch.unsqueeze(arguments.Tensor(20), 0)

        # transform street [0,1] means the first street
        street_tensor = arguments.Tensor(constants.streets_count)
        street_tensor[state.node.street - 1] = 1

        # transform #detpth# and bets
        bets_tensor = state.node.bets / arguments.stack

        # transform hand(private and board)
        assert (len(state.private) == 2)
        private_tensor = card_tools.hand_to_tensor(
            state.private[state.node.current_player])
        board_tensor = card_tools.hand_to_tensor(state.node.board)

        return torch.unsqueeze(
            torch.cat(
                (street_tensor, bets_tensor, private_tensor, board_tensor), 0),
            0)
Beispiel #8
0
def string_to_board(card_string):
  
  if card_string == '':
    return arguments.Tensor([])
 
  
  return arguments.Tensor([string_to_card(card_string)])
Beispiel #9
0
    def step(self, state, action, is_rl=False):
        pot_size = state.bets.sum()
        current_player = state.current_player
        current_bet = state.bets[current_player]

        vaild_action = self.get_vaild_action(state)

        action_taken = action
        # if action is invaild
        if action_taken >= len(vaild_action):
            action_taken = len(vaild_action) - 1
#            print(action)
        action_tuple = vaild_action[action_taken]

        # copy the current state, may be slow
        #        print(state.action_string)
        next_state = copy.deepcopy(state)
        state.next = next_state
        next_state.prev = state

        next_state.do_action(action_tuple)

        reward = arguments.Tensor([
            current_bet - next_state.bets[current_player]
        ]) if not self.distributed else arguments.Tensor([0])

        terminal = next_state.terminal

        # TODO !!!!! here we store action not action_taken
        #        self.store_memory(current_player, state, action, next_state, reward)
        action[0][0] = action_taken
        if is_rl:
            self.store_memory(current_player, state, action, reward)
#        assert(reward[0] < 10 and reward[0] > -10)
# only for debug
#        self.store_memory(current_player, state, action_tuple, next_state, reward)

        if next_state.terminal:
            terminal_value = next_state.get_terminal_value()
            for record in self.memory:
                if len(record) > 0:
                    record_player = record[-1].state.current_player
                    if self.distributed:
                        record[-1].reward.add_(terminal_value[record_player] -
                                               next_state.bets[record_player])
                    else:
                        record[-1].reward.add_(terminal_value[record_player])

            # fix the small and big bind
            if len(self.memory[0]) > 0 and len(
                    self.memory[1]) > 0 and not self.distributed:
                self.memory[0][-1].reward.sub_(50)
                self.memory[1][-1].reward.sub_(100)
#                self.memory[0][-1].reward.sub_(0.3)
#                self.memory[1][-1].reward.sub_(0.6)
            next_state = None

        return next_state, terminal, action_taken
Beispiel #10
0
 def get_possible_hand_indexes(self, board):
     out = arguments.Tensor(game_settings.card_count).fill_(0)
     if board.dim() == 0:
         out.fill_(1)
         return out
     whole_hand = arguments.Tensor(board.size(0) + 1)
     #mjb the frount is the board cards
     whole_hand[0:-1].copy_(board)
     for card in range(game_settings.card_count):
         whole_hand[-1] = card
         if self.hand_is_possible(whole_hand):
             out[card] = 1
     return out
Beispiel #11
0
    def statenode_to_tensor(self, state):
        #      tensor = arguments.Tensor(constants.player_count, \
        #                                constants.streets_count, \
        #                                constants.raises_count, \
        #                                constants.acions_count, \
        #                                constants.card_count * 2).fill_(0)
        if (state == None):
            return None
        node = state.node

        # transform street [0,1] means the first street
        street_tensor = arguments.Tensor(constants.streets_count).fill_(0)
        street_tensor[int(node.street)] = 1

        #position_tensor
        position_tensor = arguments.Tensor(4).fill_(node.current_player)

        # transform #detpth# and bets
        bet_player_tensor = arguments.Tensor(arguments.bet_bucket).fill_(0)
        bet_player_tensor[int((node.bets[node.current_player] - 1) /
                              arguments.bet_bucket_len)] = 1
        bet_oppo_tensor = arguments.Tensor(arguments.bet_bucket).fill_(0)
        bet_oppo_tensor[int((node.bets[1 - node.current_player] - 1) /
                            arguments.bet_bucket_len)] = 1
        #      print(node.bets)
        #      print(bet_player_tensor)
        #      print(bet_oppo_tensor)

        # transform hand(private and board)
        #      print(len(state.private))
        assert (len(state.private) == 2)
        private_tensor = card_tools.hand_to_tensor(
            arguments.Tensor(state.private[node.current_player].tolist()))
        board_tensor = card_tools.hand_to_tensor(node.board)

        #transform hand strengen
        #      player_hand = arguments.Tensor(state.private[node.current_player].tolist() + node.board.tolist())
        #      evaluator = Evaluator()
        #      player_strength = evaluator.evaluate(player_hand, -1)
        #      strength_tensor = arguments.Tensor([player_strength])

        # street: 1-2 position 3 bets 4-5 private
        return_tensor = torch.unsqueeze(
            torch.cat((street_tensor, position_tensor, bet_player_tensor,
                       bet_oppo_tensor, private_tensor, board_tensor), 0), 0)
        #      print("private:" + str(state.private[node.current_player]))
        #      print("board:" + node.board_string)

        #      print(return_tensor)
        return return_tensor
Beispiel #12
0
    def get_possible_bets(self, node):
        current_player = node.current_player
        assert (current_player >= 0
                and current_player < game_settings.player_count
                )  #, 'Wrong player for bet size computation')
        opponent_bet = node.bets.max()

        assert (node.bets[current_player] <= opponent_bet)

        #compute min possible raise size
        max_raise_size = arguments.stack - opponent_bet
        min_raise_size = opponent_bet - node.bets[current_player]
        min_raise_size = max(min_raise_size, arguments.ante)
        min_raise_size = min(max_raise_size, min_raise_size)

        if min_raise_size == 0:
            return arguments.Tensor()
        elif min_raise_size == max_raise_size:
            out = arguments.Tensor(1, game_settings.player_count)
            out[0] = node.bets.clone()
            out[0][current_player] = opponent_bet + min_raise_size
            return out
        else:
            #iterate through all bets and check if they are possible
            max_possible_bets_count = self.pot_fractions.size(
                0) + 1  #we can always go allin
            #         out = arguments.Tensor(max_possible_bets_count,game_settings.player_count).copy_(node.bets)
            out = arguments.Tensor(max_possible_bets_count,
                                   game_settings.player_count)
            for i in range(max_possible_bets_count):
                out[i] = node.bets.clone()

            #take pot size after opponent bet is called
            pot = opponent_bet * 2
            used_bets_count = 0
            #try all pot fractions bet and see if we can use them
            for i in range(self.pot_fractions.size(0)):
                raise_size = pot * self.pot_fractions[i]
                if raise_size >= min_raise_size and raise_size < max_raise_size:
                    used_bets_count = used_bets_count + 1
                    out[used_bets_count - 1,
                        current_player] = opponent_bet + raise_size

            #adding allin
            used_bets_count = used_bets_count + 1
            assert (used_bets_count <= max_possible_bets_count)
            out[used_bets_count - 1,
                current_player] = opponent_bet + max_raise_size
            return out[0:used_bets_count, :]
Beispiel #13
0
    def batch_eval(self, board, impossible_hand_value = -1):

        hand_values = arguments.Tensor(game_settings.card_count).fill_(-1)
        if board.dim() == 0: 
            for hand in range(game_settings.card_count): 
                hand_values[hand] = math.floor((hand -1 ) / game_settings.suit_count ) + 1
        else:
            board_size = board.size(0)
            assert(board_size == 1 or board_size == 2)#, 'Incorrect board size for Leduc' )
            whole_hand = arguments.Tensor(board_size + 1)
            whole_hand[0:-1].copy_(board)
            for card in range(game_settings.card_count): 
                whole_hand[-1] = card; 
                hand_values[card] = self.evaluate(whole_hand, impossible_hand_value)
        return hand_values
Beispiel #14
0
   def parsed_state_to_nodestate(self, processed_state):
       node = Node()
       node.street = processed_state['current_street']
       node.board = card_to_string.string_to_board(processed_state['board'])
       node.current_player = processed_state['acting_player']
       node.bets = arguments.Tensor([processed_state['bet1'], processed_state['bet2']])
       
       state = GameState()
       state.node = node
       
       #TODO mjb private card been hardcode
       state.private = [-1 for i in range(game_settings.player_count)]
       state.private[node.current_player] = arguments.Tensor([processed_state['hand_id']])
 
       return state
Beispiel #15
0
    def finish_episode(self, env_memory):
        self.model.train()
        self.steps_done += 1

        policy_loss = []

        for i_agnet in range(len(env_memory)):
            if len(env_memory[i_agnet]) == 0: continue
            env_reward = reinf_tran(*zip(*env_memory[i_agnet])).reward
            rewards = []
            R = 0
            for r in env_reward:
                R = r + arguments.gamma * R
                rewards.insert(0, R)
            rewards = arguments.Tensor(rewards)
            # rewards = (rewards - rewards.mean()) / (rewards.std().item() + np.finfo(np.float32).eps)
            rewards = rewards / arguments.stack
            for log_prob, reward in zip(self.model.saved_log_probs[i_agnet],
                                        rewards):
                policy_loss.append(-log_prob * reward)

        self.optimizer.zero_grad()
        policy_loss = torch.cat(policy_loss).sum()
        policy_loss.backward()
        for param in self.model.parameters():
            param.grad.data.clamp_(0, 1)
        self.optimizer.step()
        for i_policy, _ in enumerate(self.model.saved_log_probs):
            self.model.saved_log_probs[i_policy] = []
Beispiel #16
0
 def __init__(self):
     self.current_player = -2
     self.node_type = ""
     self.type = ""
     self.street = -1
     self.board = ""
     self.board_string = ""
     self.bets = arguments.Tensor(2)
     self.pot = 0
     self.parent = Node
     self.children = []
     self.terminal = False
     self.actions = []
     self.bet_sizing = []
     self.node_id = 0
     self.strategy = arguments.Tensor([])
Beispiel #17
0
    def build_tree(self, params):
        root = Node()
        #.current_player necessary stuff from the root_node not to touch the input
        root.street = params['root_node']['street']
        root.bets = params['root_node']['bets'].clone()
        root.current_player = params['root_node']['current_player']
        root.board = params['root_node']['board'].clone()
        root.board_string = card_to_string.cards_to_string(root.board)

        params['bet_sizing'] = params[
            'bet_sizing'] if 'bet_sizing' in params else BetSizing(
                arguments.Tensor(arguments.bet_sizing))

        assert (params['bet_sizing'])

        self.bet_sizing = params['bet_sizing']
        self.limit_to_street = params['limit_to_street']

        self._build_tree_dfs(root)

        #      mjb
        #      strategy_filling = StrategyFilling()
        #      strategy_filling.fill_uniform(root)

        return root
Beispiel #18
0
 def _set_fold_matrix(self, board):
     self.fold_matrix = arguments.Tensor(game_settings.card_count,
                                         game_settings.card_count)
     self.fold_matrix.fill_(1)
     #setting cards that block each other to zero - exactly elements on diagonal in leduc variants
     self.fold_matrix.sub_(
         torch.eye(game_settings.card_count).type_as(self.fold_matrix))
     self._handle_blocking_cards(self.fold_matrix, board)
Beispiel #19
0
    def hand_to_tensor(self, hand):
        hand_tensor = arguments.Tensor(game_settings.card_count).fill_(0)

        if hand.dim() == 0:
            return hand_tensor

        for card in hand:
            hand_tensor[int(card)] = 1
        return hand_tensor
Beispiel #20
0
    def get_random_range(self, board, seed):
        seed = seed or torch.random()
        gen = torch.Generator()
        torch.manualSeed(gen, seed)

        out = torch.rand(gen,
                         game_settings.card_count).typeAs(arguments.Tensor())
        out.cmul(self.get_possible_hand_indexes(board))
        out.div(out.sum())

        return out
Beispiel #21
0
    def get_second_round_boards(self):
        boards_count = self.get_boards_count()
        if game_settings.board_card_count == 1:
            out = arguments.Tensor(boards_count, 1)
            for card in range(game_settings.card_count):
                out[card, 0] = card
            return out
        elif game_settings.board_card_count == 2:
            out = arguments.Tensor(boards_count, 2)
            board_idx = 0
            for card_1 in range(game_settings.card_count):
                for card_2 in range(card_1 + 1, game_settings.card_count):
                    board_idx = board_idx + 1
                    out[board_idx, 0] = card_1
                    out[board_idx, 1] = card_2

            assert (board_idx == boards_count)  #, 'wrong boards count!')
            return out
        else:
            assert (False)  #, 'unsupported board size' )
Beispiel #22
0
def main():
    import time
    time_start = time.time()
    total_reward = 0.0

    for i_episode in range(arguments.epoch_count):
        # choose policy 0-sl 1-rl
        flag = 0 if random.random() > arguments.eta else 1

        # Initialize the environment and state
        env.reset()
        state = env.state
        for t in count():
            state_tensor = builder.statenode_to_tensor(state)
            # Select and perform an action
            assert (state_tensor.size(1) == 20)

            if flag == 0:
                # sl
                action = table_sl.select_action(state)
            else:
                #rl
                action = dqn_optim.select_action(state_tensor)

            next_state, reward, done = env.step(state, int(action[0][0]))

            # transform to tensor
            next_state_tensor = builder.statenode_to_tensor(next_state)
            reward_tensor = arguments.Tensor([reward])
            action_tensor = action

            # Store the transition in reforcement learning memory Mrl
            dqn_optim.memory.push(state_tensor, action_tensor,
                                  next_state_tensor, reward_tensor)
            if flag == 1:
                # if choose sl store tuple(s,a) in supervised learning memory Msl
                table_sl.store(state, action)

            # Perform one step of the optimization (on the target network)
            dqn_optim.optimize_model()
            # Move to the next state
            state = next_state

            #accumlate the reward
            total_reward = total_reward + reward

            if done:
                dqn_optim.episode_durations.append(t + 1)
                #                dqn_optim.plot_durations()
                break

    print('Complete')
    print((time.time() - time_start))
    print(total_reward / arguments.epoch_count)
Beispiel #23
0
    def test(self, table_sl):
    
        builder = PokerTreeBuilder()
        
        params = {}
        
        params['root_node'] = {}
        params['root_node']['board'] = card_to_string.string_to_board('')
        params['root_node']['street'] = 0
        params['root_node']['current_player'] = constants.players.P1
        params['root_node']['bets'] = arguments.Tensor([100, 100])
        params['limit_to_street'] = False
        
        tree = builder.build_tree(params)
        
#        table_sl = torch.load('/home/mjb/Nutstore/deepStack/Data/Model/Iter:' + str(model_num) + '.sl')

        #constract the starting range
        filling = StrategyFilling()

        range1 = card_tools.get_uniform_range(params['root_node']['board'])
        range2 = card_tools.get_uniform_range(params['root_node']['board'])

        filling.fill_uniform(tree)


        starting_ranges = arguments.Tensor(game_settings.player_count, game_settings.card_count)
        starting_ranges[0].copy_(range1)
        starting_ranges[1].copy_(range2)
        
        table_sl.model.eval()
#        self.dfs_fill_table(tree, table_sl,builder)
        self.dfs_fill_strategy(table_sl,tree, builder)
        
        tree_values = TreeValues()
        tree_values.compute_values(tree, starting_ranges)
        
        
        
        print('Exploitability: ' + str(tree.exploitability.item()) + '[chips]' )
        return tree.exploitability.item()
Beispiel #24
0
    def _fill_uniformly(self, node):
        assert (node.current_player == constants.players.P1
                or node.current_player == constants.players.P2)

        if (node.terminal):
            return

#      assert(node.current_player >= 0 )

        node.strategy = arguments.Tensor(len(node.children),
                                         game_settings.card_count).fill_(
                                             1.0 / len(node.children))
Beispiel #25
0
 def __init__(self):
     params = {}
     params['root_node'] = {}
     params['root_node']['board'] = card_to_string.string_to_board('')
     params['root_node']['street'] = 0
     params['root_node']['current_player'] = constants.players.P1
     params['root_node']['bets'] = arguments.Tensor([100, 100])
     params['limit_to_street'] = False
     builder = PokerTreeBuilder()
     self.root_node = builder.build_tree(params)
     #        print(self.builder.node_id_acc)
     filling.fill_uniform(self.root_node)
     self.state = GameState()
     self._cached_terminal_equities = {}
Beispiel #26
0
def generate_cards( count ):
  #marking all used cards
  used_cards = torch.ByteTensor(game_settings.card_count).zero_()
  
  out = arguments.Tensor(count)
  #counter for generated cards
  generated_cards_count = 0
  while(generated_cards_count < count):
    card = random.randint(0, game_settings.card_count - 1)
    if ( used_cards[card] == 0 ): 
      out[generated_cards_count] = card
      generated_cards_count = generated_cards_count + 1
      used_cards[card] = 1
  return out
Beispiel #27
0
 def _init_board_index_table(self):
     if game_settings.board_card_count == 1:
         self._board_index_table = torch.arange(
             0, game_settings.card_count).float()
     elif game_settings.board_card_count == 2:
         self._board_index_table = arguments.Tensor(
             game_settings.card_count, game_settings.card_count).fill_(-1)
         board_idx = 0
         for card_1 in range(game_settings.card_count):
             for card_2 in range(card_1 + 1, game_settings.card_count):
                 board_idx = board_idx + 1
                 self._board_index_table[card_1][card_2] = board_idx
                 self._board_index_table[card_2][card_1] = board_idx
     else:
         assert (False)  #, 'unsupported board size')
Beispiel #28
0
    def compute_action(self, state):

        # convert tensor for rl
        builder = PokerTreeBuilder()
        state_tensor = builder.statenode_to_tensor(state)

        # !!!! the return action is a longTensor[[]]
        #        action_id = (self.table_sl.select_action(state) if random.random() > arguments.eta \
        #                 else self.dqn_optim.select_action(state_tensor))[0][0]
        #        action_id = self.table_sl.select_action(state)[0][0]
        action_id = self.net_sl[state.node.current_player].select_action(
            state_tensor).item()
        #        print('_____________')
        #        print(action_id)
        #        print('_____________')
        # action['action:  ,'raise_amount':  ]
        action = {}

        #fold
        if action_id == 0:
            action['action'] = constants.acpc_actions.fold
        # call
        elif action_id == 1 or action_id >= game_settings.actions_count:
            action['action'] = constants.acpc_actions.ccall
        #raise
        elif action_id > 1:
            # get possible to determine the raising size
            bet_sizding = BetSizing(arguments.Tensor(arguments.bet_sizing))
            possible_bets = bet_sizding.get_possible_bets(state.node)
            if possible_bets.dim() != 0:
                possible_bet = possible_bets[:, state.node.current_player]
            else:
                action['action'] = constants.acpc_actions.ccall
                return action

            raise_action_id = action_id - 2  # to override fold and call action
            # node possible bet in this state so call
            action['action'] = constants.acpc_actions.rraise
            if (len(possible_bet) <= raise_action_id):
                action['raise_amount'] = possible_bet[len(possible_bet) -
                                                      1].item()
            else:
                action['raise_amount'] = possible_bet[raise_action_id].item(
                )  # to override fold and call action
        else:
            assert (False)  #invaild actions

        return action
Beispiel #29
0
    def _fill_chance(self, node):
        assert (not node.terminal)

        #filling strategy
        #we will fill strategy with an uniform probability, but it has to be zero for hands that are not possible on
        #corresponding board
        node.strategy = arguments.Tensor(len(node.children),
                                         game_settings.card_count).fill_(0)
        #setting probability of impossible hands to 0
        for i in range(len(node.children)):
            child_node = node.children[i]
            mask = card_tools.get_possible_hand_indexes(
                child_node.board).byte()
            node.strategy[i].fill_(0)
            #remove 2 because each player holds one card
            node.strategy[i][mask] = 1.0 / (game_settings.card_count - 2)
Beispiel #30
0
    def _get_terminal_value(self, state):
        node = state.node
        assert (node.ternimal)
        value = arguments.Tensor(2).fill_(-1)
        value[node.current_player] = 1
        if node.node_typee == constants.node_types.terminal_fold:
            #ternimal fold
            value.mul(node.bets[1 - node.current_player])
        else:
            # show down
            player_hand = self.private[
                node.current_player].tolist() + node.board.tolist()
            player_strength = evaluator.evaluate(player_hand, -1)
            oppo_hand = self.private[
                1 - node.current_player].tolist() + node.board.tolist()
            oppo_strength = evaluator.evaluate(oppo_hand, -1)

            if player_strength > oppo_strength:
                value.mul(node.bets[1 - node.current_player])
            else:
                value.mul(-node.bets[1 - node.current_player])
            return value