Esempio n. 1
0
 def __init__(self):
     params = {}
     params['root_node'] = {}
     params['root_node']['board'] = card_to_string.string_to_board('')
     params['root_node']['street'] = 0
     params['root_node']['current_player'] = constants.players.P1
     params['root_node']['bets'] = arguments.Tensor([100, 100])
     params['limit_to_street'] = False
     builder = PokerTreeBuilder()
     self.root_node = builder.build_tree(params)
     #        print(self.builder.node_id_acc)
     filling.fill_uniform(self.root_node)
     self.state = GameState()
     self._cached_terminal_equities = {}
Esempio n. 2
0
    def compute_action(self, state):

        # convert tensor for rl
        builder = PokerTreeBuilder()
        state_tensor = builder.statenode_to_tensor(state)

        # !!!! the return action is a longTensor[[]]
        #        action_id = (self.table_sl.select_action(state) if random.random() > arguments.eta \
        #                 else self.dqn_optim.select_action(state_tensor))[0][0]
        #        action_id = self.table_sl.select_action(state)[0][0]
        action_id = self.net_sl[state.node.current_player].select_action(
            state_tensor).item()
        #        print('_____________')
        #        print(action_id)
        #        print('_____________')
        # action['action:  ,'raise_amount':  ]
        action = {}

        #fold
        if action_id == 0:
            action['action'] = constants.acpc_actions.fold
        # call
        elif action_id == 1 or action_id >= game_settings.actions_count:
            action['action'] = constants.acpc_actions.ccall
        #raise
        elif action_id > 1:
            # get possible to determine the raising size
            bet_sizding = BetSizing(arguments.Tensor(arguments.bet_sizing))
            possible_bets = bet_sizding.get_possible_bets(state.node)
            if possible_bets.dim() != 0:
                possible_bet = possible_bets[:, state.node.current_player]
            else:
                action['action'] = constants.acpc_actions.ccall
                return action

            raise_action_id = action_id - 2  # to override fold and call action
            # node possible bet in this state so call
            action['action'] = constants.acpc_actions.rraise
            if (len(possible_bet) <= raise_action_id):
                action['raise_amount'] = possible_bet[len(possible_bet) -
                                                      1].item()
            else:
                action['raise_amount'] = possible_bet[raise_action_id].item(
                )  # to override fold and call action
        else:
            assert (False)  #invaild actions

        return action
Esempio n. 3
0
    def test(self, table_sl):
    
        builder = PokerTreeBuilder()
        
        params = {}
        
        params['root_node'] = {}
        params['root_node']['board'] = card_to_string.string_to_board('')
        params['root_node']['street'] = 0
        params['root_node']['current_player'] = constants.players.P1
        params['root_node']['bets'] = arguments.Tensor([100, 100])
        params['limit_to_street'] = False
        
        tree = builder.build_tree(params)
        
#        table_sl = torch.load('/home/mjb/Nutstore/deepStack/Data/Model/Iter:' + str(model_num) + '.sl')

        #constract the starting range
        filling = StrategyFilling()

        range1 = card_tools.get_uniform_range(params['root_node']['board'])
        range2 = card_tools.get_uniform_range(params['root_node']['board'])

        filling.fill_uniform(tree)


        starting_ranges = arguments.Tensor(game_settings.player_count, game_settings.card_count)
        starting_ranges[0].copy_(range1)
        starting_ranges[1].copy_(range2)
        
        table_sl.model.eval()
#        self.dfs_fill_table(tree, table_sl,builder)
        self.dfs_fill_strategy(table_sl,tree, builder)
        
        tree_values = TreeValues()
        tree_values.compute_values(tree, starting_ranges)
        
        
        
        print('Exploitability: ' + str(tree.exploitability.item()) + '[chips]' )
        return tree.exploitability.item()
Esempio n. 4
0
import torch
import numpy as np
import Settings.arguments as arguments
import Settings.constants as constants
import Settings.game_settings as game_settings
from itertools import count
from nn.env import Env
from nn.dqn import DQN
from nn.dqn import DQNOptim
from nn.table_sl import TableSL
from nn.Q_learning import QLearning
from nn.state import GameState
from Tree.tree_builder import PokerTreeBuilder
from Tree.Tests.test_tree_values import ValuesTester
from collections import namedtuple
builder = PokerTreeBuilder()

num_episodes = 10
env = Env()
value_tester = ValuesTester()
action = np.arange(5)

Agent = namedtuple('Agent', ['rl', 'sl'])

agent0 = Agent(rl=QLearning(action), sl=TableSL())
agent1 = Agent(rl=QLearning(action), sl=TableSL())
table_sl = agent0.sl
agents = [agent0, agent1]


def load_model(dqn_optim, iter_time):
Esempio n. 5
0
 def __init__(self, verbose=0):
     self.tree_builder = PokerTreeBuilder()
     self.verbose = verbose