def __init__(self): params = {} params['root_node'] = {} params['root_node']['board'] = card_to_string.string_to_board('') params['root_node']['street'] = 0 params['root_node']['current_player'] = constants.players.P1 params['root_node']['bets'] = arguments.Tensor([100, 100]) params['limit_to_street'] = False builder = PokerTreeBuilder() self.root_node = builder.build_tree(params) # print(self.builder.node_id_acc) filling.fill_uniform(self.root_node) self.state = GameState() self._cached_terminal_equities = {}
def compute_action(self, state): # convert tensor for rl builder = PokerTreeBuilder() state_tensor = builder.statenode_to_tensor(state) # !!!! the return action is a longTensor[[]] # action_id = (self.table_sl.select_action(state) if random.random() > arguments.eta \ # else self.dqn_optim.select_action(state_tensor))[0][0] # action_id = self.table_sl.select_action(state)[0][0] action_id = self.net_sl[state.node.current_player].select_action( state_tensor).item() # print('_____________') # print(action_id) # print('_____________') # action['action: ,'raise_amount': ] action = {} #fold if action_id == 0: action['action'] = constants.acpc_actions.fold # call elif action_id == 1 or action_id >= game_settings.actions_count: action['action'] = constants.acpc_actions.ccall #raise elif action_id > 1: # get possible to determine the raising size bet_sizding = BetSizing(arguments.Tensor(arguments.bet_sizing)) possible_bets = bet_sizding.get_possible_bets(state.node) if possible_bets.dim() != 0: possible_bet = possible_bets[:, state.node.current_player] else: action['action'] = constants.acpc_actions.ccall return action raise_action_id = action_id - 2 # to override fold and call action # node possible bet in this state so call action['action'] = constants.acpc_actions.rraise if (len(possible_bet) <= raise_action_id): action['raise_amount'] = possible_bet[len(possible_bet) - 1].item() else: action['raise_amount'] = possible_bet[raise_action_id].item( ) # to override fold and call action else: assert (False) #invaild actions return action
def test(self, table_sl): builder = PokerTreeBuilder() params = {} params['root_node'] = {} params['root_node']['board'] = card_to_string.string_to_board('') params['root_node']['street'] = 0 params['root_node']['current_player'] = constants.players.P1 params['root_node']['bets'] = arguments.Tensor([100, 100]) params['limit_to_street'] = False tree = builder.build_tree(params) # table_sl = torch.load('/home/mjb/Nutstore/deepStack/Data/Model/Iter:' + str(model_num) + '.sl') #constract the starting range filling = StrategyFilling() range1 = card_tools.get_uniform_range(params['root_node']['board']) range2 = card_tools.get_uniform_range(params['root_node']['board']) filling.fill_uniform(tree) starting_ranges = arguments.Tensor(game_settings.player_count, game_settings.card_count) starting_ranges[0].copy_(range1) starting_ranges[1].copy_(range2) table_sl.model.eval() # self.dfs_fill_table(tree, table_sl,builder) self.dfs_fill_strategy(table_sl,tree, builder) tree_values = TreeValues() tree_values.compute_values(tree, starting_ranges) print('Exploitability: ' + str(tree.exploitability.item()) + '[chips]' ) return tree.exploitability.item()
import torch import numpy as np import Settings.arguments as arguments import Settings.constants as constants import Settings.game_settings as game_settings from itertools import count from nn.env import Env from nn.dqn import DQN from nn.dqn import DQNOptim from nn.table_sl import TableSL from nn.Q_learning import QLearning from nn.state import GameState from Tree.tree_builder import PokerTreeBuilder from Tree.Tests.test_tree_values import ValuesTester from collections import namedtuple builder = PokerTreeBuilder() num_episodes = 10 env = Env() value_tester = ValuesTester() action = np.arange(5) Agent = namedtuple('Agent', ['rl', 'sl']) agent0 = Agent(rl=QLearning(action), sl=TableSL()) agent1 = Agent(rl=QLearning(action), sl=TableSL()) table_sl = agent0.sl agents = [agent0, agent1] def load_model(dqn_optim, iter_time):
@author: mjb """ import Settings.game_settings as game_settings import Game.card_to_string as card_to_string from Game.card_tools import card_tools card_tools = card_tools() import Settings.arguments as arguments from Tree.tree_builder import PokerTreeBuilder from Tree.tree_visualiser import TreeVisualiser from Tree.tree_builder import Node from Tree.strategy_filling import StrategyFilling import Settings.constants as constants builder = PokerTreeBuilder() params = {} params['root_node'] = {} params['root_node']['board'] = card_to_string.string_to_board('Ks') params['root_node']['street'] = 1 params['root_node']['current_player'] = constants.players.P1 params['root_node']['bets'] = arguments.Tensor([100, 100]) params['limit_to_street'] = True tree = builder.build_tree(params) filling = StrategyFilling() range1 = card_tools.get_uniform_range(params['root_node']['board']) range2 = card_tools.get_uniform_range(params['root_node']['board'])
#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Wed Aug 23 00:27:06 2017 @author: mjb """ import Settings.game_settings as game_settings import Game.card_to_string as card_to_string import Settings.arguments as arguments from Tree.tree_builder import PokerTreeBuilder from Tree.tree_visualiser import TreeVisualiser import Settings.constants as constants builder = PokerTreeBuilder() params = {} params['root_node'] = {} params['root_node']['board'] = card_to_string.string_to_board('Ks') params['root_node']['street'] = 2 params['root_node']['current_player'] = constants.players.P1 params['root_node']['bets'] = arguments.Tensor([300, 300]) params['limit_to_street'] = False tree = builder.build_tree(params) acc_list = [] builder.acc_node(tree, acc_list) print(max(acc_list)) print(builder.node_id_acc)
#rl for i in range(game_settings.card_count): state = GameState() state.node = node state.private = [arguments.Tensor([i]), arguments.Tensor([i])] state_tensor = builder.statenode_to_tensor(state) node.rl = torch.cat( (node.rl, dqnmodel(Variable(state_tensor, volatile=True)).data), 0) children = node.children for child in children: dfs_fill_table(child, table, dqnmodel, builder) model_num = 25000 builder = PokerTreeBuilder() params = {} params['root_node'] = {} params['root_node']['board'] = card_to_string.string_to_board('') params['root_node']['street'] = 0 params['root_node']['current_player'] = constants.players.P1 params['root_node']['bets'] = arguments.Tensor([100, 100]) params['limit_to_street'] = False tree = builder.build_tree(params) table_sl = torch.load('/home/mjb/Nutstore/deepStack/Data/Model/Iter:' + str(model_num) + '.sl') dqn = DQN()
#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Tue Aug 22 01:30:35 2017 @author: mjb """ import Settings.game_settings as game_settings import Game.card_to_string as card_to_string import Settings.arguments as arguments from Tree.tree_builder import PokerTreeBuilder from Tree.tree_visualiser import TreeVisualiser import Settings.constants as constants from nn.state import GameState from nn.env import Env builder = PokerTreeBuilder() env = Env() env.reset() tensor = builder.statenode_to_tensor(env.state) print(tensor.size(0))
def __init__(self, verbose=0): self.tree_builder = PokerTreeBuilder() self.verbose = verbose
class Resolving(): def __init__(self, verbose=0): self.tree_builder = PokerTreeBuilder() self.verbose = verbose def _create_lookahead_tree(self, node): ''' Builds a depth-limited public tree rooted at a given game node. @param: node the root of the tree ''' build_tree_params = TreeParams() build_tree_params.root_node = node build_tree_params.limit_to_street = True self.lookahead_tree = self.tree_builder.build_tree(build_tree_params) def resolve_first_node(self, node, player_range, opponent_range): ''' Re-solves a depth-limited lookahead using input ranges. Uses the input range for the opponent instead of a gadget range, so only appropriate for re-solving the root node of the game tree (where ranges are fixed). @param: node the public node at which to re-solve @param: player_range a range vector for the re-solving player @param: opponent_range a range vector for the opponent ''' self._create_lookahead_tree(node) self.lookahead = Lookahead() self.lookahead.build_lookahead(self.lookahead_tree) self.lookahead.resolve_first_node(player_range, opponent_range) self.resolve_results = self.lookahead.get_results() if self.verbose > 0: PC, CC = constants.players_count, game_settings.card_count starting_ranges = np.zeros([PC, CC], dtype=arguments.dtype) starting_ranges[0] = player_range starting_ranges[1] = opponent_range tree_cfr = TreeCFR() tree_cfr.run_cfr(self.lookahead_tree, starting_ranges) tree_values = TreeValues() tree_values.compute_values(self.lookahead_tree, starting_ranges) print('Exploitability: ' + str(self.lookahead_tree.exploitability) + ' [chips]') # debugging # print(np.array2string(self.lookahead_tree.cf_values[self.lookahead_tree.current_player].reshape([-1,2]), suppress_small=True, precision=2)) # print() # print(np.array2string(self.resolve_results.root_cfvs.reshape([-1,2]), suppress_small=True, precision=2)) # print(np.array2string(self.lookahead_tree.strategy.reshape([-1,6]), suppress_small=True, precision=2)) # print() # print(np.array2string(self.resolve_results.strategy.reshape([-1,6]), suppress_small=True, precision=2)) return self.resolve_results def resolve(self, node, player_range, opponent_cfvs): ''' Re-solves a depth-limited lookahead using an input range for the player and the @{cfrd_gadget|CFRDGadget} to generate ranges for the opponent. @param: node the public node at which to re-solve @param: player_range a range vector for the re-solving player @param: opponent_cfvs a vector of cfvs achieved by the opponent before re-solving ''' assert (card_tools.is_valid_range(player_range, node.board)) self._create_lookahead_tree(node) self.lookahead = Lookahead() self.lookahead.build_lookahead(self.lookahead_tree) self.lookahead.resolve(player_range, opponent_cfvs) self.resolve_results = self.lookahead.get_results() return self.resolve_results def _action_to_action_id(self, action): ''' Gives the index of the given action at the node being re-solved. The node must first be re-solved with @{resolve} or @{resolve_first_node}. @param: action a legal action at the node @return the index of the action ''' actions = self.get_possible_actions() action_id = -1 for i in range(actions.shape[0]): if action == actions[i]: action_id = i assert (action_id != -1) return action_id def get_possible_actions(self): ''' Gives a list of possible actions at the node being re-solved. The node must first be re-solved with @{resolve} or @{resolve_first_node}. @return a list of legal actions ''' return self.lookahead_tree.actions def get_root_cfv(self): ''' Gives the average counterfactual values that the re-solve player received at the node during re-solving. The node must first be re-solved with @{resolve_first_node}. @return a vector of cfvs ''' return self.resolve_results.root_cfvs def get_root_cfv_both_players(self): ''' Gives the average counterfactual values that each player received at the node during re-solving. Usefull for data generation for neural net training The node must first be re-solved with @{resolve_first_node}. @return a (2,K) tensor of cfvs, where K is the range size ''' return self.resolve_results.root_cfvs_both_players def get_action_cfv(self, action): ''' Gives the average counterfactual values that the opponent received during re-solving after the re-solve player took a given action. Used during continual re-solving to track opponent cfvs. The node must first be re-solved with @{resolve} or @{resolve_first_node}. @param: action the action taken by the re-solve player at the node being re-solved @return a vector of cfvs ''' action_id = self._action_to_action_id(action) return self.resolve_results.children_cfvs[action_id] def get_chance_action_cfv(self, action, board): ''' Gives the average counterfactual values that the opponent received during re-solving after a chance event (the betting round changes and more cards are dealt). Used during continual re-solving to track opponent cfvs. The node must first be re-solved with @{resolve} or @{resolve_first_node}. @param: action the action taken by the re-solve player at the node being re-solved @param: board a vector of board cards which were updated by the chance event @return a vector of cfvs ''' action_id = self._action_to_action_id(action) return self.lookahead.get_chance_action_cfv(action_id, board) def get_action_strategy(self, action): ''' Gives the probability that the re-solved strategy takes a given action. The node must first be re-solved with @{resolve} or @{resolve_first_node}. @param action a legal action at the re-solve node @return a vector giving the probability of taking the action with each private hand ''' action_id = self._action_to_action_id(action) return self.resolve_results.strategy[action_id]