def __init__(self): params = {} params['root_node'] = {} params['root_node']['board'] = card_to_string.string_to_board('') params['root_node']['street'] = 0 params['root_node']['current_player'] = constants.players.P1 params['root_node']['bets'] = arguments.Tensor([100, 100]) params['limit_to_street'] = False builder = PokerTreeBuilder() self.root_node = builder.build_tree(params) # print(self.builder.node_id_acc) filling.fill_uniform(self.root_node) self.state = GameState() self._cached_terminal_equities = {}
def compute_action(self, state): # convert tensor for rl builder = PokerTreeBuilder() state_tensor = builder.statenode_to_tensor(state) # !!!! the return action is a longTensor[[]] # action_id = (self.table_sl.select_action(state) if random.random() > arguments.eta \ # else self.dqn_optim.select_action(state_tensor))[0][0] # action_id = self.table_sl.select_action(state)[0][0] action_id = self.net_sl[state.node.current_player].select_action( state_tensor).item() # print('_____________') # print(action_id) # print('_____________') # action['action: ,'raise_amount': ] action = {} #fold if action_id == 0: action['action'] = constants.acpc_actions.fold # call elif action_id == 1 or action_id >= game_settings.actions_count: action['action'] = constants.acpc_actions.ccall #raise elif action_id > 1: # get possible to determine the raising size bet_sizding = BetSizing(arguments.Tensor(arguments.bet_sizing)) possible_bets = bet_sizding.get_possible_bets(state.node) if possible_bets.dim() != 0: possible_bet = possible_bets[:, state.node.current_player] else: action['action'] = constants.acpc_actions.ccall return action raise_action_id = action_id - 2 # to override fold and call action # node possible bet in this state so call action['action'] = constants.acpc_actions.rraise if (len(possible_bet) <= raise_action_id): action['raise_amount'] = possible_bet[len(possible_bet) - 1].item() else: action['raise_amount'] = possible_bet[raise_action_id].item( ) # to override fold and call action else: assert (False) #invaild actions return action
def test(self, table_sl): builder = PokerTreeBuilder() params = {} params['root_node'] = {} params['root_node']['board'] = card_to_string.string_to_board('') params['root_node']['street'] = 0 params['root_node']['current_player'] = constants.players.P1 params['root_node']['bets'] = arguments.Tensor([100, 100]) params['limit_to_street'] = False tree = builder.build_tree(params) # table_sl = torch.load('/home/mjb/Nutstore/deepStack/Data/Model/Iter:' + str(model_num) + '.sl') #constract the starting range filling = StrategyFilling() range1 = card_tools.get_uniform_range(params['root_node']['board']) range2 = card_tools.get_uniform_range(params['root_node']['board']) filling.fill_uniform(tree) starting_ranges = arguments.Tensor(game_settings.player_count, game_settings.card_count) starting_ranges[0].copy_(range1) starting_ranges[1].copy_(range2) table_sl.model.eval() # self.dfs_fill_table(tree, table_sl,builder) self.dfs_fill_strategy(table_sl,tree, builder) tree_values = TreeValues() tree_values.compute_values(tree, starting_ranges) print('Exploitability: ' + str(tree.exploitability.item()) + '[chips]' ) return tree.exploitability.item()
import torch import numpy as np import Settings.arguments as arguments import Settings.constants as constants import Settings.game_settings as game_settings from itertools import count from nn.env import Env from nn.dqn import DQN from nn.dqn import DQNOptim from nn.table_sl import TableSL from nn.Q_learning import QLearning from nn.state import GameState from Tree.tree_builder import PokerTreeBuilder from Tree.Tests.test_tree_values import ValuesTester from collections import namedtuple builder = PokerTreeBuilder() num_episodes = 10 env = Env() value_tester = ValuesTester() action = np.arange(5) Agent = namedtuple('Agent', ['rl', 'sl']) agent0 = Agent(rl=QLearning(action), sl=TableSL()) agent1 = Agent(rl=QLearning(action), sl=TableSL()) table_sl = agent0.sl agents = [agent0, agent1] def load_model(dqn_optim, iter_time):
def __init__(self, verbose=0): self.tree_builder = PokerTreeBuilder() self.verbose = verbose