Esempio n. 1
0
 def __init__(self,
              seed,
              nO,
              nA,
              nB,
              params,
              hidden_dims=(64, 64),
              activation=F.leaky_relu):
     super().__init__()
     self.activation = activation
     self.nO = nO
     self.nA = nA
     self.combined_output = nA - 2 + nB
     # self.attention = VectorAttention(params['transformer_in'])
     self.process_input = PreProcessLayer(params, critic=True)
     self.maxlen = params['maxlen']
     self.mapping = params['state_mapping']
     self.device = params['device']
     # self.emb = params['embedding_size']
     emb = params['transformer_in']
     n_heads = 8
     depth = 2
     self.transformer = CTransformer(emb, n_heads, depth, self.maxlen,
                                     params['transformer_out'])
     self.dropout = nn.Dropout(0.5)
     self.value_output = nn.Linear(params['transformer_out'], 1)
     self.advantage_output = nn.Linear(params['transformer_out'],
                                       self.combined_output)
Esempio n. 2
0
class OmahaQCritic(Network):
    def __init__(self,
                 seed,
                 nO,
                 nA,
                 nB,
                 params,
                 hidden_dims=(64, 64),
                 activation=F.leaky_relu):
        super().__init__()
        self.activation = activation
        self.nO = nO
        self.nA = nA
        self.combined_output = nA - 2 + nB
        self.process_input = PreProcessLayer(params)
        self.maxlen = params['maxlen']
        self.mapping = params['state_mapping']
        self.device = params['device']
        # self.emb = params['embedding_size']
        # self.lstm = nn.LSTM(1280, 128)
        emb = params['transformer_in']
        n_heads = 8
        depth = 2
        self.transformer = CTransformer(emb, n_heads, depth, self.maxlen,
                                        params['transformer_out'])
        self.dropout = nn.Dropout(0.5)
        self.value_output = nn.Linear(params['transformer_out'], 1)
        self.advantage_output = nn.Linear(params['transformer_out'],
                                          self.combined_output)

    def set_device(self, device):
        self.device = device
        self.process_input.set_device(device)

    def forward(self, state):
        x = torch.tensor(state, dtype=torch.float32).to(self.device)
        out = self.process_input(x)
        # B,M,c = out.size()
        # n_padding = max(self.maxlen - M,0)
        # padding = torch.zeros(B,n_padding,out.size(-1))
        # h = torch.cat((out,padding),dim=1)
        q_input = self.transformer(out)
        a = self.advantage_output(q_input)
        v = self.value_output(q_input)
        v = v.expand_as(a)
        q = v + a - a.mean(-1, keepdim=True).expand_as(a)
        outputs = {'value': q.squeeze(0)}
        return outputs
Esempio n. 3
0
 def __init__(self,
              seed,
              nO,
              nA,
              nB,
              params,
              hidden_dims=(64, 64),
              activation=F.leaky_relu):
     super().__init__()
     self.activation = activation
     self.nO = nO
     self.nA = nA
     self.nB = nB
     self.combined_output = nA - 2 + nB
     self.maxlen = params['maxlen']
     self.mapping = params['state_mapping']
     self.device = params['device']
     # self.emb = params['embedding_size']
     self.helper_functions = NetworkFunctions(self.nA, self.nB)
     self.process_input = PreProcessLayer(params)
     self.lstm = nn.LSTM(1280, 128)
     self.policy_out = nn.Linear(1280, self.combined_output)
     self.noise = GaussianNoise(self.device)
     emb = params['transformer_in']
     n_heads = 8
     depth = 2
     self.transformer = CTransformer(emb, n_heads, depth, self.maxlen,
                                     params['transformer_out'])
     self.dropout = nn.Dropout(0.5)
     self.value_output = nn.Linear(params['transformer_out'], 1)
     self.advantage_output = nn.Linear(params['transformer_out'],
                                       self.combined_output)
Esempio n. 4
0
    def __init__(self,
                 seed,
                 nS,
                 nA,
                 nB,
                 params,
                 hidden_dims=(64, 64),
                 activation=F.leaky_relu):
        super().__init__()
        self.activation = activation
        self.nS = nS
        self.nA = nA
        self.nB = nB
        self.combined_output = nA - 2 + nB
        self.helper_functions = NetworkFunctions(self.nA, self.nB)
        self.maxlen = params['maxlen']
        self.process_input = PreProcessLayer(params)

        # self.seed = torch.manual_seed(seed)
        self.mapping = params['mapping']
        self.hand_emb = Embedder(5, 64)
        self.action_emb = Embedder(6, 64)
        self.betsize_emb = Embedder(self.nB, 64)
        self.noise = GaussianNoise()
        self.emb = 1248
        n_heads = 8
        depth = 2
        self.lstm = nn.LSTM(self.emb, 128)
        # self.transformer = CTransformer(emb,n_heads,depth,self.max_length,self.nA)

        self.fc1 = nn.Linear(528, hidden_dims[0])
        self.fc2 = nn.Linear(hidden_dims[0], hidden_dims[1])
        self.fc3 = nn.Linear(1280, self.combined_output)
        self.dropout = nn.Dropout(0.5)
Esempio n. 5
0
class OmahaObsQCritic(Network):
    def __init__(self,
                 seed,
                 nO,
                 nA,
                 nB,
                 params,
                 hidden_dims=(64, 64),
                 activation=F.leaky_relu):
        super().__init__()
        self.activation = activation
        self.nO = nO
        self.nA = nA
        self.combined_output = nA - 2 + nB
        # self.attention = VectorAttention(params['transformer_in'])
        self.process_input = PreProcessLayer(params, critic=True)
        self.maxlen = params['maxlen']
        self.mapping = params['state_mapping']
        self.device = params['device']
        # self.emb = params['embedding_size']
        emb = params['transformer_in']
        n_heads = 8
        depth = 2
        self.transformer = CTransformer(emb, n_heads, depth, self.maxlen,
                                        params['transformer_out'])
        self.dropout = nn.Dropout(0.5)
        self.value_output = nn.Linear(params['transformer_out'], 1)
        self.advantage_output = nn.Linear(params['transformer_out'],
                                          self.combined_output)

    def set_device(self, device):
        self.device = device
        self.process_input.set_device(device)

    def forward(self, obs):
        if not isinstance(obs, torch.Tensor):
            obs = torch.tensor(obs, dtype=torch.float32).to(self.device)
        out = self.process_input(obs)
        # context = self.attention(out)
        q_input = self.transformer(out)
        a = self.advantage_output(q_input)
        v = self.value_output(q_input)
        v = v.expand_as(a)
        q = v + a - a.mean(-1, keepdim=True).expand_as(a)
        outputs = {'value': q.squeeze(0)}
        return outputs
Esempio n. 6
0
    def __init__(self,
                 seed,
                 nS,
                 nA,
                 nB,
                 params,
                 hidden_dims=(64, 64),
                 activation=F.leaky_relu):
        super().__init__()
        self.activation = activation
        self.nS = nS
        self.nA = nA
        self.nB = nB
        self.combined_output = nA - 2 + nB
        self.helper_functions = NetworkFunctions(self.nA, self.nB)
        self.maxlen = params['maxlen']
        self.device = params['device']
        self.epsilon = params['epsilon']
        self.epsilon_weights = params['epsilon_weights'].to(self.device)
        self.process_input = PreProcessLayer(params)

        # self.seed = torch.manual_seed(seed)
        self.state_mapping = params['state_mapping']
        self.action_emb = Embedder(Action.UNOPENED, 64)
        self.betsize_emb = Embedder(self.nB, 64)
        self.noise = GaussianNoise(self.device)
        self.emb = 1248
        n_heads = 8
        depth = 2
        # self.attention = EncoderAttention(params['lstm_in'],params['lstm_out'])
        self.lstm = nn.LSTM(params['lstm_in'],
                            params['lstm_out'],
                            bidirectional=True)
        self.batchnorm = nn.BatchNorm1d(self.maxlen)
        # self.blocks = nn.Sequential(
        #     IdentityBlock(hidden_dims=(2560,2560,512),activation=F.leaky_relu),
        #     IdentityBlock(hidden_dims=(512,512,256),activation=F.leaky_relu),
        # )
        self.fc_final = nn.Linear(5120, self.combined_output)
Esempio n. 7
0
    def __init__(self,
                 seed,
                 nO,
                 nA,
                 nB,
                 params,
                 hidden_dims=(64, 64),
                 activation=F.leaky_relu):
        super().__init__()
        self.activation = activation
        self.nO = nO
        self.nA = nA

        # self.seed = torch.manual_seed(seed)
        self.mapping = params['mapping']

        self.process_input = PreProcessLayer(params, critic=True)
        self.fc1 = nn.Linear(304, hidden_dims[0])
        self.fc2 = nn.Linear(hidden_dims[0], hidden_dims[1])
        self.fc3 = nn.Linear(hidden_dims[1], nA)
        self.dropout = nn.Dropout(0.5)
        self.value_output = nn.Linear(64, 1)
        self.advantage_output = nn.Linear(64, self.nA)
Esempio n. 8
0
    def __init__(self,
                 seed,
                 nO,
                 nA,
                 nB,
                 params,
                 hidden_dims=(64, 64),
                 activation=F.leaky_relu):
        super().__init__()
        self.activation = activation
        self.nO = nO
        self.nA = nA

        self.process_input = PreProcessLayer(params)
        self.maxlen = params['maxlen']
        self.mapping = params['mapping']
        emb = 1248
        n_heads = 8
        depth = 2
        self.transformer = CTransformer(emb, n_heads, depth, self.maxlen,
                                        self.nA)
        self.dropout = nn.Dropout(0.5)
        self.value_output = nn.Linear(5, 1)
        self.advantage_output = nn.Linear(5, self.nA)
Esempio n. 9
0
class OmahaActor(Network):
    def __init__(self,
                 seed,
                 nS,
                 nA,
                 nB,
                 params,
                 hidden_dims=(64, 64),
                 activation=F.leaky_relu):
        super().__init__()
        self.activation = activation
        self.nS = nS
        self.nA = nA
        self.nB = nB
        self.combined_output = nA - 2 + nB
        self.helper_functions = NetworkFunctions(self.nA, self.nB)
        self.maxlen = params['maxlen']
        self.device = params['device']
        self.epsilon = params['epsilon']
        self.epsilon_weights = params['epsilon_weights'].to(self.device)
        self.process_input = PreProcessLayer(params)

        # self.seed = torch.manual_seed(seed)
        self.state_mapping = params['state_mapping']
        self.action_emb = Embedder(Action.UNOPENED, 64)
        self.betsize_emb = Embedder(self.nB, 64)
        self.noise = GaussianNoise(self.device)
        self.emb = 1248
        n_heads = 8
        depth = 2
        # self.attention = EncoderAttention(params['lstm_in'],params['lstm_out'])
        self.lstm = nn.LSTM(params['lstm_in'],
                            params['lstm_out'],
                            bidirectional=True)
        self.batchnorm = nn.BatchNorm1d(self.maxlen)
        # self.blocks = nn.Sequential(
        #     IdentityBlock(hidden_dims=(2560,2560,512),activation=F.leaky_relu),
        #     IdentityBlock(hidden_dims=(512,512,256),activation=F.leaky_relu),
        # )
        self.fc_final = nn.Linear(5120, self.combined_output)

    def set_device(self, device):
        self.device = device
        self.process_input.set_device(device)

    def forward(self, state, action_mask, betsize_mask, target=False):
        """
        state: B,M,39
        """
        if not isinstance(state, torch.Tensor):
            state = torch.tensor(state, dtype=torch.float32).to(self.device)
            action_mask = torch.tensor(action_mask,
                                       dtype=torch.float32).to(self.device)
            betsize_mask = torch.tensor(betsize_mask,
                                        dtype=torch.float32).to(self.device)
        mask = combined_masks(action_mask, betsize_mask)
        if target and np.random.random() < self.epsilon:
            B = state.size(0)
            # pick random legal move
            action_masked = self.epsilon_weights * mask
            action_probs = action_masked / action_masked.sum(-1).unsqueeze(-1)
            action = action_probs.multinomial(num_samples=1, replacement=False)
            action_prob = torch.zeros(B, 1)
        else:
            out = self.process_input(state)
            B, M, c = state.size()
            n_padding = self.maxlen - M
            if n_padding < 0:
                h = out[:, -self.maxlen:, :]
            else:
                padding = torch.zeros(B, n_padding,
                                      out.size(-1)).to(self.device)
                h = torch.cat((padding, out), dim=1)
            lstm_out, hidden_states = self.lstm(h)
            norm = self.batchnorm(lstm_out)
            # self.attention(out)
            # blocks_out = self.blocks(lstm_out.view(-1))
            t_logits = self.fc_final(norm.view(B, -1))
            category_logits = self.noise(t_logits)
            # skip connection
            # category_logits += h
            action_soft = F.softmax(category_logits, dim=-1)
            action_probs = norm_frequencies(action_soft, mask)
            m = Categorical(action_probs)
            action = m.sample()
            action_prob = m.log_prob(action)
        previous_action = torch.as_tensor(
            state[:, -1, self.state_mapping['last_action']]).to(self.device)
        action_category, betsize_category = self.helper_functions.batch_unwrap_action(
            action, previous_action)
        if B > 1:
            # batch training
            outputs = {
                'action': action,
                'action_category': action_category,
                'action_prob': action_prob,
                'action_probs': action_probs,
                'betsize': betsize_category
            }
        else:
            # playing hand
            outputs = {
                'action': action.item(),
                'action_category': action_category.item(),
                'action_prob': action_prob,
                'action_probs': action_probs,
                'betsize': betsize_category.item()
            }
        return outputs