def __init__(self, seed, nO, nA, nB, params, hidden_dims=(64, 64), activation=F.leaky_relu): super().__init__() self.activation = activation self.nO = nO self.nA = nA self.combined_output = nA - 2 + nB # self.attention = VectorAttention(params['transformer_in']) self.process_input = PreProcessLayer(params, critic=True) self.maxlen = params['maxlen'] self.mapping = params['state_mapping'] self.device = params['device'] # self.emb = params['embedding_size'] emb = params['transformer_in'] n_heads = 8 depth = 2 self.transformer = CTransformer(emb, n_heads, depth, self.maxlen, params['transformer_out']) self.dropout = nn.Dropout(0.5) self.value_output = nn.Linear(params['transformer_out'], 1) self.advantage_output = nn.Linear(params['transformer_out'], self.combined_output)
class OmahaQCritic(Network): def __init__(self, seed, nO, nA, nB, params, hidden_dims=(64, 64), activation=F.leaky_relu): super().__init__() self.activation = activation self.nO = nO self.nA = nA self.combined_output = nA - 2 + nB self.process_input = PreProcessLayer(params) self.maxlen = params['maxlen'] self.mapping = params['state_mapping'] self.device = params['device'] # self.emb = params['embedding_size'] # self.lstm = nn.LSTM(1280, 128) emb = params['transformer_in'] n_heads = 8 depth = 2 self.transformer = CTransformer(emb, n_heads, depth, self.maxlen, params['transformer_out']) self.dropout = nn.Dropout(0.5) self.value_output = nn.Linear(params['transformer_out'], 1) self.advantage_output = nn.Linear(params['transformer_out'], self.combined_output) def set_device(self, device): self.device = device self.process_input.set_device(device) def forward(self, state): x = torch.tensor(state, dtype=torch.float32).to(self.device) out = self.process_input(x) # B,M,c = out.size() # n_padding = max(self.maxlen - M,0) # padding = torch.zeros(B,n_padding,out.size(-1)) # h = torch.cat((out,padding),dim=1) q_input = self.transformer(out) a = self.advantage_output(q_input) v = self.value_output(q_input) v = v.expand_as(a) q = v + a - a.mean(-1, keepdim=True).expand_as(a) outputs = {'value': q.squeeze(0)} return outputs
def __init__(self, seed, nO, nA, nB, params, hidden_dims=(64, 64), activation=F.leaky_relu): super().__init__() self.activation = activation self.nO = nO self.nA = nA self.nB = nB self.combined_output = nA - 2 + nB self.maxlen = params['maxlen'] self.mapping = params['state_mapping'] self.device = params['device'] # self.emb = params['embedding_size'] self.helper_functions = NetworkFunctions(self.nA, self.nB) self.process_input = PreProcessLayer(params) self.lstm = nn.LSTM(1280, 128) self.policy_out = nn.Linear(1280, self.combined_output) self.noise = GaussianNoise(self.device) emb = params['transformer_in'] n_heads = 8 depth = 2 self.transformer = CTransformer(emb, n_heads, depth, self.maxlen, params['transformer_out']) self.dropout = nn.Dropout(0.5) self.value_output = nn.Linear(params['transformer_out'], 1) self.advantage_output = nn.Linear(params['transformer_out'], self.combined_output)
def __init__(self, seed, nS, nA, nB, params, hidden_dims=(64, 64), activation=F.leaky_relu): super().__init__() self.activation = activation self.nS = nS self.nA = nA self.nB = nB self.combined_output = nA - 2 + nB self.helper_functions = NetworkFunctions(self.nA, self.nB) self.maxlen = params['maxlen'] self.process_input = PreProcessLayer(params) # self.seed = torch.manual_seed(seed) self.mapping = params['mapping'] self.hand_emb = Embedder(5, 64) self.action_emb = Embedder(6, 64) self.betsize_emb = Embedder(self.nB, 64) self.noise = GaussianNoise() self.emb = 1248 n_heads = 8 depth = 2 self.lstm = nn.LSTM(self.emb, 128) # self.transformer = CTransformer(emb,n_heads,depth,self.max_length,self.nA) self.fc1 = nn.Linear(528, hidden_dims[0]) self.fc2 = nn.Linear(hidden_dims[0], hidden_dims[1]) self.fc3 = nn.Linear(1280, self.combined_output) self.dropout = nn.Dropout(0.5)
class OmahaObsQCritic(Network): def __init__(self, seed, nO, nA, nB, params, hidden_dims=(64, 64), activation=F.leaky_relu): super().__init__() self.activation = activation self.nO = nO self.nA = nA self.combined_output = nA - 2 + nB # self.attention = VectorAttention(params['transformer_in']) self.process_input = PreProcessLayer(params, critic=True) self.maxlen = params['maxlen'] self.mapping = params['state_mapping'] self.device = params['device'] # self.emb = params['embedding_size'] emb = params['transformer_in'] n_heads = 8 depth = 2 self.transformer = CTransformer(emb, n_heads, depth, self.maxlen, params['transformer_out']) self.dropout = nn.Dropout(0.5) self.value_output = nn.Linear(params['transformer_out'], 1) self.advantage_output = nn.Linear(params['transformer_out'], self.combined_output) def set_device(self, device): self.device = device self.process_input.set_device(device) def forward(self, obs): if not isinstance(obs, torch.Tensor): obs = torch.tensor(obs, dtype=torch.float32).to(self.device) out = self.process_input(obs) # context = self.attention(out) q_input = self.transformer(out) a = self.advantage_output(q_input) v = self.value_output(q_input) v = v.expand_as(a) q = v + a - a.mean(-1, keepdim=True).expand_as(a) outputs = {'value': q.squeeze(0)} return outputs
def __init__(self, seed, nS, nA, nB, params, hidden_dims=(64, 64), activation=F.leaky_relu): super().__init__() self.activation = activation self.nS = nS self.nA = nA self.nB = nB self.combined_output = nA - 2 + nB self.helper_functions = NetworkFunctions(self.nA, self.nB) self.maxlen = params['maxlen'] self.device = params['device'] self.epsilon = params['epsilon'] self.epsilon_weights = params['epsilon_weights'].to(self.device) self.process_input = PreProcessLayer(params) # self.seed = torch.manual_seed(seed) self.state_mapping = params['state_mapping'] self.action_emb = Embedder(Action.UNOPENED, 64) self.betsize_emb = Embedder(self.nB, 64) self.noise = GaussianNoise(self.device) self.emb = 1248 n_heads = 8 depth = 2 # self.attention = EncoderAttention(params['lstm_in'],params['lstm_out']) self.lstm = nn.LSTM(params['lstm_in'], params['lstm_out'], bidirectional=True) self.batchnorm = nn.BatchNorm1d(self.maxlen) # self.blocks = nn.Sequential( # IdentityBlock(hidden_dims=(2560,2560,512),activation=F.leaky_relu), # IdentityBlock(hidden_dims=(512,512,256),activation=F.leaky_relu), # ) self.fc_final = nn.Linear(5120, self.combined_output)
def __init__(self, seed, nO, nA, nB, params, hidden_dims=(64, 64), activation=F.leaky_relu): super().__init__() self.activation = activation self.nO = nO self.nA = nA # self.seed = torch.manual_seed(seed) self.mapping = params['mapping'] self.process_input = PreProcessLayer(params, critic=True) self.fc1 = nn.Linear(304, hidden_dims[0]) self.fc2 = nn.Linear(hidden_dims[0], hidden_dims[1]) self.fc3 = nn.Linear(hidden_dims[1], nA) self.dropout = nn.Dropout(0.5) self.value_output = nn.Linear(64, 1) self.advantage_output = nn.Linear(64, self.nA)
def __init__(self, seed, nO, nA, nB, params, hidden_dims=(64, 64), activation=F.leaky_relu): super().__init__() self.activation = activation self.nO = nO self.nA = nA self.process_input = PreProcessLayer(params) self.maxlen = params['maxlen'] self.mapping = params['mapping'] emb = 1248 n_heads = 8 depth = 2 self.transformer = CTransformer(emb, n_heads, depth, self.maxlen, self.nA) self.dropout = nn.Dropout(0.5) self.value_output = nn.Linear(5, 1) self.advantage_output = nn.Linear(5, self.nA)
class OmahaActor(Network): def __init__(self, seed, nS, nA, nB, params, hidden_dims=(64, 64), activation=F.leaky_relu): super().__init__() self.activation = activation self.nS = nS self.nA = nA self.nB = nB self.combined_output = nA - 2 + nB self.helper_functions = NetworkFunctions(self.nA, self.nB) self.maxlen = params['maxlen'] self.device = params['device'] self.epsilon = params['epsilon'] self.epsilon_weights = params['epsilon_weights'].to(self.device) self.process_input = PreProcessLayer(params) # self.seed = torch.manual_seed(seed) self.state_mapping = params['state_mapping'] self.action_emb = Embedder(Action.UNOPENED, 64) self.betsize_emb = Embedder(self.nB, 64) self.noise = GaussianNoise(self.device) self.emb = 1248 n_heads = 8 depth = 2 # self.attention = EncoderAttention(params['lstm_in'],params['lstm_out']) self.lstm = nn.LSTM(params['lstm_in'], params['lstm_out'], bidirectional=True) self.batchnorm = nn.BatchNorm1d(self.maxlen) # self.blocks = nn.Sequential( # IdentityBlock(hidden_dims=(2560,2560,512),activation=F.leaky_relu), # IdentityBlock(hidden_dims=(512,512,256),activation=F.leaky_relu), # ) self.fc_final = nn.Linear(5120, self.combined_output) def set_device(self, device): self.device = device self.process_input.set_device(device) def forward(self, state, action_mask, betsize_mask, target=False): """ state: B,M,39 """ if not isinstance(state, torch.Tensor): state = torch.tensor(state, dtype=torch.float32).to(self.device) action_mask = torch.tensor(action_mask, dtype=torch.float32).to(self.device) betsize_mask = torch.tensor(betsize_mask, dtype=torch.float32).to(self.device) mask = combined_masks(action_mask, betsize_mask) if target and np.random.random() < self.epsilon: B = state.size(0) # pick random legal move action_masked = self.epsilon_weights * mask action_probs = action_masked / action_masked.sum(-1).unsqueeze(-1) action = action_probs.multinomial(num_samples=1, replacement=False) action_prob = torch.zeros(B, 1) else: out = self.process_input(state) B, M, c = state.size() n_padding = self.maxlen - M if n_padding < 0: h = out[:, -self.maxlen:, :] else: padding = torch.zeros(B, n_padding, out.size(-1)).to(self.device) h = torch.cat((padding, out), dim=1) lstm_out, hidden_states = self.lstm(h) norm = self.batchnorm(lstm_out) # self.attention(out) # blocks_out = self.blocks(lstm_out.view(-1)) t_logits = self.fc_final(norm.view(B, -1)) category_logits = self.noise(t_logits) # skip connection # category_logits += h action_soft = F.softmax(category_logits, dim=-1) action_probs = norm_frequencies(action_soft, mask) m = Categorical(action_probs) action = m.sample() action_prob = m.log_prob(action) previous_action = torch.as_tensor( state[:, -1, self.state_mapping['last_action']]).to(self.device) action_category, betsize_category = self.helper_functions.batch_unwrap_action( action, previous_action) if B > 1: # batch training outputs = { 'action': action, 'action_category': action_category, 'action_prob': action_prob, 'action_probs': action_probs, 'betsize': betsize_category } else: # playing hand outputs = { 'action': action.item(), 'action_category': action_category.item(), 'action_prob': action_prob, 'action_probs': action_probs, 'betsize': betsize_category.item() } return outputs