def store_memory(env, agents): # store the memory for record_list in env.memory: for i in range(len(record_list)): state, action, reward = record_list[i] # if reward[0] > 10 or reward[0] < -10: # assert(False) # print(reward[0]) # the next state is the state when agents action next time if i+1 >= len(record_list): next_state = None else: next_state,_,_ = record_list[i+1] # if reward[0] > 0: # print(reward[0]) # convert to tensor # street_list.append(state.street) state_tensor = env.state2tensor(state) action_tensor = arguments.LongTensor([[action]]) next_state_tensor = env.state2tensor(next_state) # reward.div_(arguments.stack*game_settings.player_count) # agents[state.current_player].rl.memory.push(state_tensor, action_tensor, next_state_tensor, reward) # print(reward[0]) agents.rl.memory.push(state_tensor, action_tensor, next_state_tensor, reward.unsqueeze(1) / arguments.stack)
def test_five_card(state): call = Action(atype=constants.actions.ccall, amount=0) rrasie = Action(atype=constants.actions.rraise, amount=20000) fold = Action(atype=constants.actions.fold, amount=0) hole = torch.LongTensor([[40, 41], [50, 51], [4, 5], [8, 9], [44, 45], [48, 49]]) #board = torch.LongTensor([6,30,31,38,43]) board = torch.LongTensor([6, 30, 31, 39, 43]) state.bets = arguments.LongTensor( [10000, 10000, 10000, 10000, 10000, 10000]) state.street = 3 state.current_player = 0 state.hole = hole state.board = board state.do_action(rrasie) state.do_action(fold) state.do_action(fold) state.do_action(fold) state.do_action(call) #state.do_action(call) state.street = 3
def select_action(self, state): self.model.eval() # to use the batchNorm correctly policy = self.model(Variable(state)).data #convert log(softmax) to softmax policy = torch.exp(policy) # assert((policy >= 0).sum() == 7) m = Categorical(policy) action = arguments.LongTensor(1, 1) action[0] = m.sample() return action
def select_action(self, state): #TODO count the num of node state_id = int((state.node.node_id * 4 + state.private[state.node.current_player])[0]) policy = self.s_a_table[state_id, :] / self.s_a_table[ state_id, :].sum() random_num = torch.rand(1) for i in range(game_settings.actions_count): if random_num.sub_(policy[i])[0] <= 0: return arguments.LongTensor([[i]])
def select_action(self, state): #TODO count the num of node #node id start from 1 state_id = state.node.node_id hand_id = int(state.private[state.node.current_player][0]) policy = self.strategy[state_id, hand_id, :] / self.strategy[state_id, hand_id, :].sum() action = arguments.LongTensor([np.random.choice(np.arange(game_settings.actions_count),\ 1,\ replace=False,\ p=policy.numpy())]) return action
def select_action(self, state, *useless): self.model.eval() # to use the batchNorm correctly sample = random.random() # eps_threshold = self.EPS_END + (self.EPS_START - self.EPS_END) * \ # math.exp(-1. * self.steps_done / self.EPS_DECAY) eps_threshold = 0.06 / np.sqrt(self.steps_done) # self.steps_done += 1 if sample > eps_threshold: return self.model(Variable(state)).data.max(1)[1].view(1, 1) else: m = Categorical(arguments.dqn_init_policy) action = arguments.LongTensor(1, 1) action[0] = m.sample() return action
def make_data(size=10000): cat = [] data = [] for _ in range(size): state.street = np.random.randint(2) state.current_player = np.random.randint(3) state.terminal = True state.hole = torch.LongTensor(3, 1).fill_(0) state.hole[state.current_player][0] = np.random.randint(10) # board = torch.LongTensor([6,30,31,38,43]) state.board = torch.LongTensor([6]) state.bets = arguments.LongTensor( np.random.randint(arguments.stack, size=3)) state_tensor = env.state2tensor(state) cat.append( (state.hole[state.current_player].item(), state.bets.clone())) data.append(state_tensor) return cat, data
#for i in range(game_settings.player_count): # net_sl[i].model.load_state_dict(torch.load(arguments.WORK_PATH+'/Data/Model/Iter:' + iter_str + '_' + str(i) +'_' + '.sl')) # net_sl[i].model.eval() # net_rl[i].model.load_state_dict(torch.load(arguments.WORK_PATH+'/Data/Model/Iter:' + iter_str + '_' + str(i) +'_' + '.rl')) # net_rl[i].model.eval() state = GameState() call = Action(atype=constants.actions.ccall, amount=0) rrasie = Action(atype=constants.actions.rraise, amount=1000) fold = Action(atype=constants.actions.fold, amount=0) hole = torch.LongTensor([[0], [1], [2], [3], [4], [5]]) #board = torch.LongTensor([6,30,31,38,43]) board = torch.LongTensor([6]) state.bets = arguments.LongTensor([1000, 1000, 1000, 1000, 1000, 1000]) state.street = 1 state.current_player = 0 state.hole = hole state.board = board state.train = False state.do_action(rrasie) state.do_action(fold) state.do_action(fold) state.do_action(fold) state.do_action(call) #state.do_action(call) state.street = 1 state.terminal = True