Beispiel #1
0
 def __init__(self):
     params = {}
     params['root_node'] = {}
     params['root_node']['board'] = card_to_string.string_to_board('')
     params['root_node']['street'] = 0
     params['root_node']['current_player'] = constants.players.P1
     params['root_node']['bets'] = arguments.Tensor([100, 100])
     params['limit_to_street'] = False
     builder = PokerTreeBuilder()
     self.root_node = builder.build_tree(params)
     #        print(self.builder.node_id_acc)
     filling.fill_uniform(self.root_node)
     self.state = GameState()
     self._cached_terminal_equities = {}
Beispiel #2
0
    def compute_action(self, state):

        # convert tensor for rl
        builder = PokerTreeBuilder()
        state_tensor = builder.statenode_to_tensor(state)

        # !!!! the return action is a longTensor[[]]
        #        action_id = (self.table_sl.select_action(state) if random.random() > arguments.eta \
        #                 else self.dqn_optim.select_action(state_tensor))[0][0]
        #        action_id = self.table_sl.select_action(state)[0][0]
        action_id = self.net_sl[state.node.current_player].select_action(
            state_tensor).item()
        #        print('_____________')
        #        print(action_id)
        #        print('_____________')
        # action['action:  ,'raise_amount':  ]
        action = {}

        #fold
        if action_id == 0:
            action['action'] = constants.acpc_actions.fold
        # call
        elif action_id == 1 or action_id >= game_settings.actions_count:
            action['action'] = constants.acpc_actions.ccall
        #raise
        elif action_id > 1:
            # get possible to determine the raising size
            bet_sizding = BetSizing(arguments.Tensor(arguments.bet_sizing))
            possible_bets = bet_sizding.get_possible_bets(state.node)
            if possible_bets.dim() != 0:
                possible_bet = possible_bets[:, state.node.current_player]
            else:
                action['action'] = constants.acpc_actions.ccall
                return action

            raise_action_id = action_id - 2  # to override fold and call action
            # node possible bet in this state so call
            action['action'] = constants.acpc_actions.rraise
            if (len(possible_bet) <= raise_action_id):
                action['raise_amount'] = possible_bet[len(possible_bet) -
                                                      1].item()
            else:
                action['raise_amount'] = possible_bet[raise_action_id].item(
                )  # to override fold and call action
        else:
            assert (False)  #invaild actions

        return action
Beispiel #3
0
    def test(self, table_sl):
    
        builder = PokerTreeBuilder()
        
        params = {}
        
        params['root_node'] = {}
        params['root_node']['board'] = card_to_string.string_to_board('')
        params['root_node']['street'] = 0
        params['root_node']['current_player'] = constants.players.P1
        params['root_node']['bets'] = arguments.Tensor([100, 100])
        params['limit_to_street'] = False
        
        tree = builder.build_tree(params)
        
#        table_sl = torch.load('/home/mjb/Nutstore/deepStack/Data/Model/Iter:' + str(model_num) + '.sl')

        #constract the starting range
        filling = StrategyFilling()

        range1 = card_tools.get_uniform_range(params['root_node']['board'])
        range2 = card_tools.get_uniform_range(params['root_node']['board'])

        filling.fill_uniform(tree)


        starting_ranges = arguments.Tensor(game_settings.player_count, game_settings.card_count)
        starting_ranges[0].copy_(range1)
        starting_ranges[1].copy_(range2)
        
        table_sl.model.eval()
#        self.dfs_fill_table(tree, table_sl,builder)
        self.dfs_fill_strategy(table_sl,tree, builder)
        
        tree_values = TreeValues()
        tree_values.compute_values(tree, starting_ranges)
        
        
        
        print('Exploitability: ' + str(tree.exploitability.item()) + '[chips]' )
        return tree.exploitability.item()
Beispiel #4
0
import torch
import numpy as np
import Settings.arguments as arguments
import Settings.constants as constants
import Settings.game_settings as game_settings
from itertools import count
from nn.env import Env
from nn.dqn import DQN
from nn.dqn import DQNOptim
from nn.table_sl import TableSL
from nn.Q_learning import QLearning
from nn.state import GameState
from Tree.tree_builder import PokerTreeBuilder
from Tree.Tests.test_tree_values import ValuesTester
from collections import namedtuple
builder = PokerTreeBuilder()

num_episodes = 10
env = Env()
value_tester = ValuesTester()
action = np.arange(5)

Agent = namedtuple('Agent', ['rl', 'sl'])

agent0 = Agent(rl=QLearning(action), sl=TableSL())
agent1 = Agent(rl=QLearning(action), sl=TableSL())
table_sl = agent0.sl
agents = [agent0, agent1]


def load_model(dqn_optim, iter_time):
@author: mjb
"""

import Settings.game_settings as game_settings
import Game.card_to_string as card_to_string
from Game.card_tools import card_tools
card_tools = card_tools()
import Settings.arguments as arguments
from Tree.tree_builder import PokerTreeBuilder
from Tree.tree_visualiser import TreeVisualiser
from Tree.tree_builder import Node
from Tree.strategy_filling import StrategyFilling
import Settings.constants as constants


builder = PokerTreeBuilder()

params = {}
params['root_node'] = {}
params['root_node']['board'] = card_to_string.string_to_board('Ks')
params['root_node']['street'] = 1
params['root_node']['current_player'] = constants.players.P1
params['root_node']['bets'] = arguments.Tensor([100, 100])
params['limit_to_street'] = True

tree = builder.build_tree(params)

filling = StrategyFilling()

range1 = card_tools.get_uniform_range(params['root_node']['board'])
range2 = card_tools.get_uniform_range(params['root_node']['board'])
Beispiel #6
0
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 23 00:27:06 2017

@author: mjb
"""

import Settings.game_settings as game_settings
import Game.card_to_string as card_to_string
import Settings.arguments as arguments
from Tree.tree_builder import PokerTreeBuilder
from Tree.tree_visualiser import TreeVisualiser
import Settings.constants as constants

builder = PokerTreeBuilder()

params = {}

params['root_node'] = {}
params['root_node']['board'] = card_to_string.string_to_board('Ks')
params['root_node']['street'] = 2
params['root_node']['current_player'] = constants.players.P1
params['root_node']['bets'] = arguments.Tensor([300, 300])
params['limit_to_street'] = False
tree = builder.build_tree(params)

acc_list = []
builder.acc_node(tree, acc_list)
print(max(acc_list))
print(builder.node_id_acc)
Beispiel #7
0
    #rl
    for i in range(game_settings.card_count):
        state = GameState()
        state.node = node
        state.private = [arguments.Tensor([i]), arguments.Tensor([i])]
        state_tensor = builder.statenode_to_tensor(state)
        node.rl = torch.cat(
            (node.rl, dqnmodel(Variable(state_tensor, volatile=True)).data), 0)

    children = node.children
    for child in children:
        dfs_fill_table(child, table, dqnmodel, builder)

model_num = 25000

builder = PokerTreeBuilder()

params = {}

params['root_node'] = {}
params['root_node']['board'] = card_to_string.string_to_board('')
params['root_node']['street'] = 0
params['root_node']['current_player'] = constants.players.P1
params['root_node']['bets'] = arguments.Tensor([100, 100])
params['limit_to_street'] = False
tree = builder.build_tree(params)

table_sl = torch.load('/home/mjb/Nutstore/deepStack/Data/Model/Iter:' +
                      str(model_num) + '.sl')

dqn = DQN()
Beispiel #8
0
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 22 01:30:35 2017

@author: mjb
"""

import Settings.game_settings as game_settings
import Game.card_to_string as card_to_string
import Settings.arguments as arguments
from Tree.tree_builder import PokerTreeBuilder
from Tree.tree_visualiser import TreeVisualiser
import Settings.constants as constants
from nn.state import GameState

from nn.env import Env

builder = PokerTreeBuilder()
env = Env()
env.reset()

tensor = builder.statenode_to_tensor(env.state)

print(tensor.size(0))
Beispiel #9
0
 def __init__(self, verbose=0):
     self.tree_builder = PokerTreeBuilder()
     self.verbose = verbose
Beispiel #10
0
class Resolving():
    def __init__(self, verbose=0):
        self.tree_builder = PokerTreeBuilder()
        self.verbose = verbose

    def _create_lookahead_tree(self, node):
        ''' Builds a depth-limited public tree rooted at a given game node.
		@param: node the root of the tree
		'''
        build_tree_params = TreeParams()
        build_tree_params.root_node = node
        build_tree_params.limit_to_street = True
        self.lookahead_tree = self.tree_builder.build_tree(build_tree_params)

    def resolve_first_node(self, node, player_range, opponent_range):
        ''' Re-solves a depth-limited lookahead using input ranges.
			Uses the input range for the opponent instead of a gadget range,
			so only appropriate for re-solving the root node of the game tree
			(where ranges are fixed).
		@param: node the public node at which to re-solve
		@param: player_range a range vector for the re-solving player
		@param: opponent_range a range vector for the opponent
		'''
        self._create_lookahead_tree(node)
        self.lookahead = Lookahead()
        self.lookahead.build_lookahead(self.lookahead_tree)
        self.lookahead.resolve_first_node(player_range, opponent_range)
        self.resolve_results = self.lookahead.get_results()
        if self.verbose > 0:
            PC, CC = constants.players_count, game_settings.card_count
            starting_ranges = np.zeros([PC, CC], dtype=arguments.dtype)
            starting_ranges[0] = player_range
            starting_ranges[1] = opponent_range
            tree_cfr = TreeCFR()
            tree_cfr.run_cfr(self.lookahead_tree, starting_ranges)
            tree_values = TreeValues()
            tree_values.compute_values(self.lookahead_tree, starting_ranges)
            print('Exploitability: ' +
                  str(self.lookahead_tree.exploitability) + ' [chips]')
            # debugging
            # print(np.array2string(self.lookahead_tree.cf_values[self.lookahead_tree.current_player].reshape([-1,2]), suppress_small=True, precision=2))
            # print()
            # print(np.array2string(self.resolve_results.root_cfvs.reshape([-1,2]), suppress_small=True, precision=2))
            # print(np.array2string(self.lookahead_tree.strategy.reshape([-1,6]), suppress_small=True, precision=2))
            # print()
            # print(np.array2string(self.resolve_results.strategy.reshape([-1,6]), suppress_small=True, precision=2))
        return self.resolve_results

    def resolve(self, node, player_range, opponent_cfvs):
        ''' Re-solves a depth-limited lookahead using an input range for the player
			and the @{cfrd_gadget|CFRDGadget} to generate ranges for the opponent.
			@param: node the public node at which to re-solve
			@param: player_range a range vector for the re-solving player
			@param: opponent_cfvs a vector of cfvs achieved by the opponent
					before re-solving
		'''
        assert (card_tools.is_valid_range(player_range, node.board))
        self._create_lookahead_tree(node)
        self.lookahead = Lookahead()
        self.lookahead.build_lookahead(self.lookahead_tree)
        self.lookahead.resolve(player_range, opponent_cfvs)
        self.resolve_results = self.lookahead.get_results()
        return self.resolve_results

    def _action_to_action_id(self, action):
        ''' Gives the index of the given action at the node being re-solved.
			The node must first be re-solved with @{resolve} or @{resolve_first_node}.
		@param: action a legal action at the node
		@return the index of the action
		'''
        actions = self.get_possible_actions()
        action_id = -1
        for i in range(actions.shape[0]):
            if action == actions[i]:
                action_id = i
        assert (action_id != -1)
        return action_id

    def get_possible_actions(self):
        ''' Gives a list of possible actions at the node being re-solved.
			 The node must first be re-solved with @{resolve} or @{resolve_first_node}.
		@return a list of legal actions
		'''
        return self.lookahead_tree.actions

    def get_root_cfv(self):
        ''' Gives the average counterfactual values that the re-solve player
			received at the node during re-solving.
			The node must first be re-solved with @{resolve_first_node}.
		@return a vector of cfvs
		'''
        return self.resolve_results.root_cfvs

    def get_root_cfv_both_players(self):
        ''' Gives the average counterfactual values that each player received
			at the node during re-solving.
			Usefull for data generation for neural net training
			The node must first be re-solved with @{resolve_first_node}.
		@return a (2,K) tensor of cfvs, where K is the range size
		'''
        return self.resolve_results.root_cfvs_both_players

    def get_action_cfv(self, action):
        ''' Gives the average counterfactual values that the opponent received
			during re-solving after the re-solve player took a given action.
			Used during continual re-solving to track opponent cfvs. The node must
			first be re-solved with @{resolve} or @{resolve_first_node}.
		@param: action the action taken by the re-solve player
				at the node being re-solved
		@return a vector of cfvs
		'''
        action_id = self._action_to_action_id(action)
        return self.resolve_results.children_cfvs[action_id]

    def get_chance_action_cfv(self, action, board):
        ''' Gives the average counterfactual values that the opponent received
			during re-solving after a chance event (the betting round changes and
			more cards are dealt).
			Used during continual re-solving to track opponent cfvs.
			The node must first be re-solved with @{resolve} or @{resolve_first_node}.
		@param: action the action taken by the re-solve player
				at the node being re-solved
		@param: board a vector of board cards
				which were updated by the chance event
		@return a vector of cfvs
		'''
        action_id = self._action_to_action_id(action)
        return self.lookahead.get_chance_action_cfv(action_id, board)

    def get_action_strategy(self, action):
        ''' Gives the probability that the re-solved strategy takes a given action.
			The node must first be re-solved with @{resolve} or @{resolve_first_node}.
		@param action a legal action at the re-solve node
		@return a vector giving the probability of taking the action
				with each private hand
		'''
        action_id = self._action_to_action_id(action)
        return self.resolve_results.strategy[action_id]