Beispiel #1
0
class OmahaActor(Network):
    def __init__(self,
                 seed,
                 nS,
                 nA,
                 nB,
                 params,
                 hidden_dims=(64, 64),
                 activation=F.leaky_relu):
        super().__init__()
        self.activation = activation
        self.nS = nS
        self.nA = nA
        self.nB = nB
        self.combined_output = nA - 2 + nB
        self.helper_functions = NetworkFunctions(self.nA, self.nB)
        self.maxlen = params['maxlen']
        self.device = params['device']
        self.epsilon = params['epsilon']
        self.epsilon_weights = params['epsilon_weights'].to(self.device)
        self.process_input = PreProcessLayer(params)

        # self.seed = torch.manual_seed(seed)
        self.state_mapping = params['state_mapping']
        self.action_emb = Embedder(Action.UNOPENED, 64)
        self.betsize_emb = Embedder(self.nB, 64)
        self.noise = GaussianNoise(self.device)
        self.emb = 1248
        n_heads = 8
        depth = 2
        # self.attention = EncoderAttention(params['lstm_in'],params['lstm_out'])
        self.lstm = nn.LSTM(params['lstm_in'],
                            params['lstm_out'],
                            bidirectional=True)
        self.batchnorm = nn.BatchNorm1d(self.maxlen)
        # self.blocks = nn.Sequential(
        #     IdentityBlock(hidden_dims=(2560,2560,512),activation=F.leaky_relu),
        #     IdentityBlock(hidden_dims=(512,512,256),activation=F.leaky_relu),
        # )
        self.fc_final = nn.Linear(5120, self.combined_output)

    def set_device(self, device):
        self.device = device
        self.process_input.set_device(device)

    def forward(self, state, action_mask, betsize_mask, target=False):
        """
        state: B,M,39
        """
        if not isinstance(state, torch.Tensor):
            state = torch.tensor(state, dtype=torch.float32).to(self.device)
            action_mask = torch.tensor(action_mask,
                                       dtype=torch.float32).to(self.device)
            betsize_mask = torch.tensor(betsize_mask,
                                        dtype=torch.float32).to(self.device)
        mask = combined_masks(action_mask, betsize_mask)
        if target and np.random.random() < self.epsilon:
            B = state.size(0)
            # pick random legal move
            action_masked = self.epsilon_weights * mask
            action_probs = action_masked / action_masked.sum(-1).unsqueeze(-1)
            action = action_probs.multinomial(num_samples=1, replacement=False)
            action_prob = torch.zeros(B, 1)
        else:
            out = self.process_input(state)
            B, M, c = state.size()
            n_padding = self.maxlen - M
            if n_padding < 0:
                h = out[:, -self.maxlen:, :]
            else:
                padding = torch.zeros(B, n_padding,
                                      out.size(-1)).to(self.device)
                h = torch.cat((padding, out), dim=1)
            lstm_out, hidden_states = self.lstm(h)
            norm = self.batchnorm(lstm_out)
            # self.attention(out)
            # blocks_out = self.blocks(lstm_out.view(-1))
            t_logits = self.fc_final(norm.view(B, -1))
            category_logits = self.noise(t_logits)
            # skip connection
            # category_logits += h
            action_soft = F.softmax(category_logits, dim=-1)
            action_probs = norm_frequencies(action_soft, mask)
            m = Categorical(action_probs)
            action = m.sample()
            action_prob = m.log_prob(action)
        previous_action = torch.as_tensor(
            state[:, -1, self.state_mapping['last_action']]).to(self.device)
        action_category, betsize_category = self.helper_functions.batch_unwrap_action(
            action, previous_action)
        if B > 1:
            # batch training
            outputs = {
                'action': action,
                'action_category': action_category,
                'action_prob': action_prob,
                'action_probs': action_probs,
                'betsize': betsize_category
            }
        else:
            # playing hand
            outputs = {
                'action': action.item(),
                'action_category': action_category.item(),
                'action_prob': action_prob,
                'action_probs': action_probs,
                'betsize': betsize_category.item()
            }
        return outputs
Beispiel #2
0
class OmahaActor(Network):
    def __init__(self,
                 seed,
                 nS,
                 nA,
                 nB,
                 params,
                 hidden_dims=(64, 64),
                 activation=F.leaky_relu):
        super().__init__()
        self.activation = activation
        self.nS = nS
        self.nA = nA
        self.nB = nB
        self.combined_output = nA - 2 + nB
        self.helper_functions = NetworkFunctions(self.nA, self.nB)
        self.maxlen = params['maxlen']
        self.device = params['device']
        self.process_input = PreProcessLayer(params)

        # self.seed = torch.manual_seed(seed)
        self.state_mapping = params['state_mapping']
        self.action_emb = Embedder(Action.UNOPENED, 64)
        self.betsize_emb = Embedder(self.nB, 64)
        self.noise = GaussianNoise(self.device)
        self.emb = 1248
        n_heads = 8
        depth = 2
        # self.attention = EncoderAttention(params['lstm_in'],params['lstm_out'])
        self.lstm = nn.LSTM(params['lstm_in'],
                            params['lstm_out'],
                            bidirectional=True)
        self.batchnorm = nn.BatchNorm1d(self.maxlen)
        # self.blocks = nn.Sequential(
        #     IdentityBlock(hidden_dims=(2560,2560,512),activation=F.leaky_relu),
        #     IdentityBlock(hidden_dims=(512,512,256),activation=F.leaky_relu),
        # )
        self.fc_final = nn.Linear(2560, self.combined_output)
        self.dropout = nn.Dropout(0.5)

    def forward(self, state, action_mask, betsize_mask):
        """
        state: B,M,39
        """
        x = state
        if not isinstance(x, torch.Tensor):
            x = torch.tensor(x, dtype=torch.float32).to(self.device)
            action_mask = torch.tensor(action_mask,
                                       dtype=torch.float).to(self.device)
            betsize_mask = torch.tensor(betsize_mask,
                                        dtype=torch.float).to(self.device)
        mask = combined_masks(action_mask, betsize_mask)
        out = self.process_input(x)
        B, M, c = out.size()
        n_padding = self.maxlen - M
        if n_padding < 0:
            h = out[:, -self.maxlen:, :]
        else:
            padding = torch.zeros(B, n_padding, out.size(-1)).to(self.device)
            h = torch.cat((padding, out), dim=1)
        lstm_out, hidden_states = self.lstm(h)
        norm = self.batchnorm(lstm_out)
        # self.attention(out)
        # blocks_out = self.blocks(lstm_out.view(-1))
        t_logits = self.fc_final(norm.view(B, -1))
        category_logits = self.noise(t_logits)
        # skip connection
        # category_logits += h
        action_soft = F.softmax(category_logits, dim=-1)
        # if torch.cuda.is_available():
        #     action_probs = norm_frequencies(action_soft,mask.cuda())
        #     previous_action = torch.as_tensor(state[:,-1,self.state_mapping['last_action']]).cuda()#.to(self.device)
        # else:
        action_probs = norm_frequencies(action_soft, mask)
        previous_action = torch.as_tensor(
            state[:, -1, self.state_mapping['last_action']]).to(self.device)
        m = Categorical(action_probs)
        action = m.sample()
        action_category, betsize_category = self.helper_functions.batch_unwrap_action(
            action, previous_action)
        if B > 1:
            # batch training
            outputs = {
                'action': action,
                'action_category': action_category,
                'action_prob': m.log_prob(action),
                'action_probs': action_probs,
                'betsize': betsize_category
            }
        else:
            # playing hand
            outputs = {
                'action': action.item(),
                'action_category': action_category.item(),
                'action_prob': m.log_prob(action),
                'action_probs': action_probs,
                'betsize': betsize_category.item()
            }
        return outputs