def dfs_fill_table(node, table, dqnmodel, builder): if node.terminal: return # if node.current_player == constants.players.chance: # node.table = arguments.Tensor([]) # node.rl = arguments.Tensor([]) # children = node.children # for child in children: # dfs_fill_table(child,table, dqnmodel, builder) # return # sl all_table = table[node.node_id, :, :] # print(node.node_id) for i in range(all_table.size(0)): all_table[i, :] = all_table[i, :] / all_table[i, :].sum() node.table = all_table #rl for i in range(game_settings.card_count): state = GameState() state.node = node state.private = [arguments.Tensor([i]), arguments.Tensor([i])] state_tensor = builder.statenode_to_tensor(state) node.rl = torch.cat( (node.rl, dqnmodel(Variable(state_tensor, volatile=True)).data), 0) children = node.children for child in children: dfs_fill_table(child, table, dqnmodel, builder)
def step(self, state, action): parent_node = state.node if parent_node.terminal: # TODO ternimal value # terminal_value = return None, parent_node.bets[ parent_node.current_player] + 99, True # TODO grasp if action if invaild if action >= len(state.node.children): action = len(state.node.children) - 1 assert (action < 4) next_node = state.node.children[action] if next_node.current_player == constants.players.chance: rannum = random.random() hand_id = int(state.private[parent_node.current_player][0]) chance_strategy = parent_node.strategy[:, hand_id] for i in range(len(chance_strategy)): if rannum <= sum(chance_strategy[0:i + 1]): next_node = parent_node.children[i] break # next_state reward next_state = GameState() next_state.node = next_node next_state.private = state.private reward = parent_node.bets[parent_node.current_player] - next_node.bets[ parent_node.current_player] terminal = False return next_state, reward, terminal
def parsed_state_to_nodestate(self, processed_state): node = Node() node.street = processed_state['current_street'] node.board = card_to_string.string_to_board(processed_state['board']) node.current_player = processed_state['acting_player'] node.bets = arguments.Tensor([processed_state['bet1'], processed_state['bet2']]) state = GameState() state.node = node #TODO mjb private card been hardcode state.private = [-1 for i in range(game_settings.player_count)] state.private[node.current_player] = arguments.Tensor([processed_state['hand_id']]) return state
def step(self, agent, state, action): parent_node = state.node # TODO grasp if action if invaild if action[0][0] >= len(state.node.children): action[0][0] = len(state.node.children) - 1 # fold in first round is invaild # if action[0][0] == 0 and parent_node.bets[0] == 100 and parent_node.bets[1] == 100: # action[0][0] = 1 # assert (action < 4) next_node = state.node.children[action[0][0]] if next_node.current_player == constants.players.chance: next_node = self._chance_next(next_node, state) # next_state reward next_state = GameState() next_state.node = next_node next_state.private = state.private reward = parent_node.bets[parent_node.current_player] - next_node.bets[ parent_node.current_player] # if we choose to reach the terminal we should observe the ternimal value terminal = False #if chance node and the acting player is player 0 (who is act first at every round) if next_node.current_player == parent_node.current_player: return next_state, next_state, reward - arguments.blind, terminal if next_node.terminal: reward = reward + self._get_terminal_value( next_state, parent_node.current_player) terminal = True next_state = None real_next_state = next_state else: next_state_tensor = builder.statenode_to_tensor(next_state) # oppo_action = int((agent.sl.select_action(next_state) if random.random() > arguments.eta \ # else agent.rl.select_action(next_state_tensor))[0][0]) oppo_action = int(agent.sl.select_action(next_state_tensor)[0][0]) if oppo_action >= len(next_node.children): oppo_action = 1 real_next_node = next_node.children[oppo_action] if real_next_node.current_player == constants.players.chance: real_next_node = self._chance_next(real_next_node, state) assert (real_next_node != None) real_next_state = GameState() real_next_state.node = real_next_node real_next_state.private = state.private if real_next_node.terminal: reward = reward + self._get_terminal_value( real_next_state, parent_node.current_player) real_next_state = None # print(parent_node.node_id) # print(next_node.node_id) # if real_next_node: # print(real_next_node.node_id) # else: # print('None') # print(reward) # self.process_log(state, real_next_node, action, reward) return next_state, real_next_state, reward - arguments.blind, terminal
params['root_node']['board'] = card_to_string.string_to_board('') params['root_node']['street'] = 0 params['root_node']['current_player'] = constants.players.P1 params['root_node']['bets'] = arguments.Tensor([100, 100]) params['limit_to_street'] = False tree = builder.build_tree(params) table_sl = torch.load('/home/mjb/Nutstore/deepStack/Data/Model/Iter:' + str(model_num) + '.sl') dqn = DQN() if torch.cuda.is_available(): dqn.cuda() dqn.load_state_dict( torch.load('/home/mjb/Nutstore/deepStack/Data/Model/Iter:' + str(model_num) + '.rl')) #dfs_fill_table(tree, table_sl, dqn, builder) acc_list = [] acc_node = {} builder.acc_node(tree, acc_node, acc_list) state = GameState() state.private = [arguments.Tensor([0]), arguments.Tensor([2])] state.node = acc_node['858'] print(dqn(Variable(builder.statenode_to_tensor(state)))) #visualiser = TreeVisualiser() #visualiser.graphviz(tree,"table_sl:" + str(model_num))