Ejemplo n.º 1
0
def dfs_fill_table(node, table, dqnmodel, builder):
    if node.terminal:
        return


#    if node.current_player == constants.players.chance:
#        node.table = arguments.Tensor([])
#        node.rl = arguments.Tensor([])
#        children = node.children
#        for child in children:
#            dfs_fill_table(child,table, dqnmodel, builder)
#        return

# sl
    all_table = table[node.node_id, :, :]
    #    print(node.node_id)
    for i in range(all_table.size(0)):
        all_table[i, :] = all_table[i, :] / all_table[i, :].sum()

    node.table = all_table

    #rl
    for i in range(game_settings.card_count):
        state = GameState()
        state.node = node
        state.private = [arguments.Tensor([i]), arguments.Tensor([i])]
        state_tensor = builder.statenode_to_tensor(state)
        node.rl = torch.cat(
            (node.rl, dqnmodel(Variable(state_tensor, volatile=True)).data), 0)

    children = node.children
    for child in children:
        dfs_fill_table(child, table, dqnmodel, builder)
Ejemplo n.º 2
0
Archivo: env.py Proyecto: mjbmjb/DeepS
    def step(self, state, action):
        parent_node = state.node
        if parent_node.terminal:
            # TODO ternimal value
            #            terminal_value =

            return None, parent_node.bets[
                parent_node.current_player] + 99, True
        # TODO grasp if action if invaild
        if action >= len(state.node.children):
            action = len(state.node.children) - 1

        assert (action < 4)
        next_node = state.node.children[action]
        if next_node.current_player == constants.players.chance:
            rannum = random.random()
            hand_id = int(state.private[parent_node.current_player][0])
            chance_strategy = parent_node.strategy[:, hand_id]
            for i in range(len(chance_strategy)):
                if rannum <= sum(chance_strategy[0:i + 1]):
                    next_node = parent_node.children[i]
                    break

    #    next_state reward

        next_state = GameState()
        next_state.node = next_node
        next_state.private = state.private
        reward = parent_node.bets[parent_node.current_player] - next_node.bets[
            parent_node.current_player]
        terminal = False

        return next_state, reward, terminal
Ejemplo n.º 3
0
    def dfs_fill_strategy(self, agent_sl, node, builder):
        if node.terminal:
            return
        if node.current_player == constants.players.chance:
            node.table = arguments.Tensor([])
            node.rl = arguments.Tensor([])
            children = node.children
            for child in children:
                self.dfs_fill_strategy(agent_sl, child, builder)
            return
            
        #sl
        for card in range(game_settings.card_count):
            state = GameState()
            for player in range(game_settings.player_count):
                state.private.append(arguments.Tensor([card]))
            state.node = node
            tensor = builder.statenode_to_tensor(state)
            strategy = agent_sl.model(Variable(tensor)).data[0][0:len(node.children)]
            if isinstance(agent_sl, DQNOptim):
#                print(strategy)
                max_ix = strategy.lt(strategy.max())
                strategy[max_ix] = 0.0001
                strategy[1-max_ix] = 1
            strategy.div_(strategy.sum())
            node.strategy[:,card] = strategy

        children = node.children
        for child in children:
            self.dfs_fill_strategy(agent_sl, child, builder)
Ejemplo n.º 4
0
   def parsed_state_to_nodestate(self, processed_state):
       node = Node()
       node.street = processed_state['current_street']
       node.board = card_to_string.string_to_board(processed_state['board'])
       node.current_player = processed_state['acting_player']
       node.bets = arguments.Tensor([processed_state['bet1'], processed_state['bet2']])
       
       state = GameState()
       state.node = node
       
       #TODO mjb private card been hardcode
       state.private = [-1 for i in range(game_settings.player_count)]
       state.private[node.current_player] = arguments.Tensor([processed_state['hand_id']])
 
       return state
Ejemplo n.º 5
0
 def reset(self):
     self.state = GameState()
     pri_card = random_card_generator.generate_cards(
         game_settings.private_count * 2)
     self.state.private.append(pri_card[0:game_settings.private_count])
     self.state.private.append(pri_card[game_settings.private_count:])
     self.state.node = self.root_node
Ejemplo n.º 6
0
 def __init__(self):
     params = {}
     params['root_node'] = {}
     params['root_node']['board'] = card_to_string.string_to_board('')
     params['root_node']['street'] = 0
     params['root_node']['current_player'] = constants.players.P1
     params['root_node']['bets'] = arguments.Tensor([100, 100])
     params['limit_to_street'] = False
     builder = PokerTreeBuilder()
     self.root_node = builder.build_tree(params)
     #        print(self.builder.node_id_acc)
     filling.fill_uniform(self.root_node)
     self.state = GameState()
     self._cached_terminal_equities = {}
Ejemplo n.º 7
0
    def step(self, agent, state, action):
        parent_node = state.node

        # TODO grasp if action if invaild
        if action[0][0] >= len(state.node.children):
            action[0][0] = len(state.node.children) - 1
        # fold in first round is invaild
#        if action[0][0] == 0 and parent_node.bets[0] == 100 and parent_node.bets[1] == 100:
#            action[0][0] = 1

#        assert (action < 4)
        next_node = state.node.children[action[0][0]]
        if next_node.current_player == constants.players.chance:
            next_node = self._chance_next(next_node, state)

    #    next_state reward
        next_state = GameState()
        next_state.node = next_node
        next_state.private = state.private
        reward = parent_node.bets[parent_node.current_player] - next_node.bets[
            parent_node.current_player]
        # if we choose to reach the terminal we should observe the ternimal value
        terminal = False

        #if chance node and the acting player is player 0 (who is act first at every round)
        if next_node.current_player == parent_node.current_player:
            return next_state, next_state, reward - arguments.blind, terminal
        if next_node.terminal:
            reward = reward + self._get_terminal_value(
                next_state, parent_node.current_player)
            terminal = True
            next_state = None
            real_next_state = next_state

        else:
            next_state_tensor = builder.statenode_to_tensor(next_state)
            #            oppo_action = int((agent.sl.select_action(next_state) if random.random() > arguments.eta \
            #                          else agent.rl.select_action(next_state_tensor))[0][0])
            oppo_action = int(agent.sl.select_action(next_state_tensor)[0][0])
            if oppo_action >= len(next_node.children):
                oppo_action = 1
            real_next_node = next_node.children[oppo_action]

            if real_next_node.current_player == constants.players.chance:
                real_next_node = self._chance_next(real_next_node, state)

            assert (real_next_node != None)
            real_next_state = GameState()
            real_next_state.node = real_next_node
            real_next_state.private = state.private
            if real_next_node.terminal:
                reward = reward + self._get_terminal_value(
                    real_next_state, parent_node.current_player)
                real_next_state = None

#        print(parent_node.node_id)
#        print(next_node.node_id)
#        if real_next_node:
#            print(real_next_node.node_id)
#        else:
#            print('None')
#        print(reward)
#        self.process_log(state, real_next_node, action, reward)
        return next_state, real_next_state, reward - arguments.blind, terminal
Ejemplo n.º 8
0
params['root_node']['board'] = card_to_string.string_to_board('')
params['root_node']['street'] = 0
params['root_node']['current_player'] = constants.players.P1
params['root_node']['bets'] = arguments.Tensor([100, 100])
params['limit_to_street'] = False
tree = builder.build_tree(params)

table_sl = torch.load('/home/mjb/Nutstore/deepStack/Data/Model/Iter:' +
                      str(model_num) + '.sl')

dqn = DQN()
if torch.cuda.is_available():
    dqn.cuda()

dqn.load_state_dict(
    torch.load('/home/mjb/Nutstore/deepStack/Data/Model/Iter:' +
               str(model_num) + '.rl'))

#dfs_fill_table(tree, table_sl, dqn, builder)

acc_list = []
acc_node = {}
builder.acc_node(tree, acc_node, acc_list)
state = GameState()
state.private = [arguments.Tensor([0]), arguments.Tensor([2])]
state.node = acc_node['858']
print(dqn(Variable(builder.statenode_to_tensor(state))))

#visualiser = TreeVisualiser()
#visualiser.graphviz(tree,"table_sl:" + str(model_num))