Beispiel #1
0
def store_memory(env, agents):
    # store the memory
    for record_list in env.memory:
        for i in range(len(record_list)):                   
            state, action, reward = record_list[i]
#            if reward[0] > 10 or reward[0] < -10:
#                assert(False)
#                print(reward[0])
            # the next state is the state when agents action next time
            if i+1 >= len(record_list):
                next_state = None
            else:
                next_state,_,_ = record_list[i+1]
#            if reward[0] > 0:
#                print(reward[0])
            # convert to tensor
            # street_list.append(state.street)
            state_tensor = env.state2tensor(state)
            action_tensor = arguments.LongTensor([[action]])
            next_state_tensor = env.state2tensor(next_state)
#            reward.div_(arguments.stack*game_settings.player_count)
            
#            agents[state.current_player].rl.memory.push(state_tensor, action_tensor, next_state_tensor, reward)
#            print(reward[0])
            agents.rl.memory.push(state_tensor, action_tensor, next_state_tensor, reward.unsqueeze(1) / arguments.stack)
Beispiel #2
0
def test_five_card(state):
    call = Action(atype=constants.actions.ccall, amount=0)
    rrasie = Action(atype=constants.actions.rraise, amount=20000)
    fold = Action(atype=constants.actions.fold, amount=0)

    hole = torch.LongTensor([[40, 41], [50, 51], [4, 5], [8, 9], [44, 45],
                             [48, 49]])
    #board = torch.LongTensor([6,30,31,38,43])
    board = torch.LongTensor([6, 30, 31, 39, 43])

    state.bets = arguments.LongTensor(
        [10000, 10000, 10000, 10000, 10000, 10000])
    state.street = 3
    state.current_player = 0

    state.hole = hole
    state.board = board

    state.do_action(rrasie)
    state.do_action(fold)
    state.do_action(fold)
    state.do_action(fold)
    state.do_action(call)
    #state.do_action(call)
    state.street = 3
Beispiel #3
0
 def select_action(self, state):
     self.model.eval()  # to use the batchNorm correctly
     policy = self.model(Variable(state)).data
     #convert log(softmax) to softmax
     policy = torch.exp(policy)
     #        assert((policy >= 0).sum() == 7)
     m = Categorical(policy)
     action = arguments.LongTensor(1, 1)
     action[0] = m.sample()
     return action
Beispiel #4
0
    def select_action(self, state):
        #TODO count the num of node
        state_id = int((state.node.node_id * 4 +
                        state.private[state.node.current_player])[0])
        policy = self.s_a_table[state_id, :] / self.s_a_table[
            state_id, :].sum()

        random_num = torch.rand(1)
        for i in range(game_settings.actions_count):
            if random_num.sub_(policy[i])[0] <= 0:
                return arguments.LongTensor([[i]])
Beispiel #5
0
    def select_action(self, state):
        #TODO count the num of node
        #node id start from 1
        state_id = state.node.node_id
        hand_id = int(state.private[state.node.current_player][0])
        policy = self.strategy[state_id,
                               hand_id, :] / self.strategy[state_id,
                                                           hand_id, :].sum()

        action = arguments.LongTensor([np.random.choice(np.arange(game_settings.actions_count),\
                                                         1,\
                                                         replace=False,\
                                                         p=policy.numpy())])
        return action
Beispiel #6
0
 def select_action(self, state, *useless):
     self.model.eval()  # to use the batchNorm correctly
     sample = random.random()
     #        eps_threshold = self.EPS_END + (self.EPS_START - self.EPS_END) * \
     #            math.exp(-1. * self.steps_done / self.EPS_DECAY)
     eps_threshold = 0.06 / np.sqrt(self.steps_done)
     #        self.steps_done += 1
     if sample > eps_threshold:
         return self.model(Variable(state)).data.max(1)[1].view(1, 1)
     else:
         m = Categorical(arguments.dqn_init_policy)
         action = arguments.LongTensor(1, 1)
         action[0] = m.sample()
         return action
Beispiel #7
0
def make_data(size=10000):
    cat = []
    data = []

    for _ in range(size):
        state.street = np.random.randint(2)
        state.current_player = np.random.randint(3)
        state.terminal = True

        state.hole = torch.LongTensor(3, 1).fill_(0)
        state.hole[state.current_player][0] = np.random.randint(10)
        # board = torch.LongTensor([6,30,31,38,43])
        state.board = torch.LongTensor([6])

        state.bets = arguments.LongTensor(
            np.random.randint(arguments.stack, size=3))

        state_tensor = env.state2tensor(state)

        cat.append(
            (state.hole[state.current_player].item(), state.bets.clone()))
        data.append(state_tensor)

    return cat, data
Beispiel #8
0
#for i in range(game_settings.player_count):
#    net_sl[i].model.load_state_dict(torch.load(arguments.WORK_PATH+'/Data/Model/Iter:' + iter_str + '_' + str(i) +'_' + '.sl'))
#    net_sl[i].model.eval()
#    net_rl[i].model.load_state_dict(torch.load(arguments.WORK_PATH+'/Data/Model/Iter:' + iter_str + '_' + str(i) +'_' + '.rl'))
#    net_rl[i].model.eval()
state = GameState()
call = Action(atype=constants.actions.ccall, amount=0)
rrasie = Action(atype=constants.actions.rraise, amount=1000)
fold = Action(atype=constants.actions.fold, amount=0)

hole = torch.LongTensor([[0], [1], [2], [3], [4], [5]])
#board = torch.LongTensor([6,30,31,38,43])
board = torch.LongTensor([6])

state.bets = arguments.LongTensor([1000, 1000, 1000, 1000, 1000, 1000])
state.street = 1
state.current_player = 0

state.hole = hole
state.board = board

state.train = False
state.do_action(rrasie)
state.do_action(fold)
state.do_action(fold)
state.do_action(fold)
state.do_action(call)
#state.do_action(call)
state.street = 1
state.terminal = True