class DuelingC51Head(nn.Module): def __init__(self, max_num_leaves, in_dim, use_noisylayers, num_atoms): super(DuelingC51Head, self).__init__() self._max_num_leaves = max_num_leaves self._num_atoms if use_noisylayers: self._V_out = NoisyLinear(in_dim, 1 * self._num_atoms) self._A_out = NoisyLinear(in_dim, 1 * self._num_atoms) else: self._V_out = nn.Linear(in_dim, self._num_atoms) self._A_out = nn.Linear(in_dim, self._num_atoms) def forward(self, h, leaves_mask, log=False): h_V, h_A = h # [m, 1, num_atoms] V = self._V_out(h_V).view(-1, 1, self._num_atoms) # [m, max_num_leaves, num_atoms] leaves_mask_expanded = leaves_mask.unsqueeze(2).expand( -1, -1, self._num_atoms) # [m, max_num_leaves, num_atoms] A = self._A_out(h_A).view(-1, self._max_num_leaves, self._num_atoms) # [m, max_num_leaves, num_atoms] Y = V + A - ((A * leaves_mask_expanded).sum(1, keepdim=True) / leaves_mask_expanded.sum(1, keepdim=True)) if log: P = F.log_softmax(Y, dim=2) else: P = F.softmax(Y, dim=2) return P, leaves_mask def reset_noise(self): self._V_out.reset_noise() self._A_out.reset_noise()
class DuelingHead(nn.Module): def __init__(self, max_num_leaves, in_dim, use_noisylayers): super(DuelingHead, self).__init__() self._max_num_leaves = max_num_leaves if use_noisylayers: self._V_out = NoisyLinear(in_dim, 1) self._A_out = NoisyLinear(in_dim, 1) else: self._V_out = nn.Linear(in_dim, 1) self._A_out = nn.Linear(in_dim, 1) def forward(self, h, leaves_mask, _): h_V, h_A = h # [m, 1] V = self._V_out(h_V) # [m, max_num_leaves] A = self._A_out(h_A).view(-1, self._max_num_leaves) # [m, max_num_leaves] Q = V + A - ((A * leaves_mask).sum(1, keepdim=True) / leaves_mask.sum(1, keepdim=True)) inf_mask = leaves_mask.clone() inf_mask[leaves_mask == 0] = float('inf') inf_mask[leaves_mask == 1] = 0 Q -= inf_mask return Q def reset_noise(self): self._V_out.reset_noise() self._A_out.reset_noise()
class C51Head(nn.Module): def __init__(self, max_num_leaves, in_dim, use_noisylayers, num_atoms): super(C51Head, self).__init__() self._max_num_leaves = max_num_leaves self._num_atoms = num_atoms if use_noisylayers: self._out = NoisyLinear(in_dim, self._num_atoms) else: self._out = nn.Linear(in_dim, self._num_atoms) def forward(self, h, leaves_mask, log=False): Y = self._out(h).view(-1, self._max_num_leaves, self._num_atoms) if log: P = F.log_softmax(Y, dim=2) else: P = F.softmax(Y, dim=2) # [m, max_num_leaves, num_atoms], [m, max_num_leaves] return P, leaves_mask def reset_noise(self): self._out.reset_noise()
class Head(nn.Module): def __init__(self, max_num_leaves, max_num_goal_tokens, dom_stream_dim, use_noisylayers): super(Head, self).__init__() self._max_num_leaves = max_num_leaves self._max_num_goal_tokens = max_num_goal_tokens mode_stream_dim, token_stream_dim = int(dom_stream_dim / 4), int( dom_stream_dim / 2) if use_noisylayers: self._out_dom = NoisyLinear(dom_stream_dim, 1) self._out_mode = NoisyLinear(mode_stream_dim, 2) self._out_token = NoisyLinear(token_stream_dim, 1) else: self._out_dom = nn.Linear(dom_stream_dim, 1) self._out_mode = nn.Linear(mode_stream_dim, 2) self._out_token = nn.Linear(token_stream_dim, 1) def forward(self, h_list, leaves_mask, goal_mask, _): """ Outputs: Q_dom for dom, Q_type for click or type(binary), Q_token for selected goal token idx """ h_dom, h_mode, h_token = h_list Q_dom = self._out_dom(h_dom) # 1. [m, max_num_leaves] <= [m*max_num_leaves] Q_dom = Q_dom.view(-1, self._max_num_leaves) inf_mask = leaves_mask.clone() inf_mask[leaves_mask == 0] = float('inf') inf_mask[leaves_mask == 1] = 0 Q_dom -= inf_mask # 2. [m, 2] Q_mode = self._out_mode(h_mode) Q_token = self._out_token(h_token) # 3. [m, max_num_tokens] <= [m*max_num_tokens] Q_token = Q_token.view(-1, self._max_num_goal_tokens) inf_mask = goal_mask.clone() inf_mask[goal_mask == 0] = float('inf') inf_mask[goal_mask == 1] = 0 # [m, max_num_tokens] Q_token -= inf_mask return Q_dom, Q_mode, Q_token def reset_noise(self): self._out_dom.reset_noise() self._out_mode.reset_noise() self._out_token.reset_noise()
class Body(nn.Module): def __init__(self, in_dom_global_dim, max_num_leaves, max_num_goal_tokens, in_dom_dim, in_token_dim, h_dom_dim, use_noisylayers, use_local, use_neighbor, use_global): super(Body, self).__init__() assert use_local or use_neighbor self._use_local = use_local self._use_neighbor = use_neighbor self._use_global = use_global if not use_global: if use_local and use_neighbor: self._in_cat_dom_dim = in_dom_dim * 2 else: self._in_cat_dom_dim = in_dom_dim else: if use_local and use_neighbor: self._in_cat_dom_dim = in_dom_dim * 2 + in_dom_global_dim else: self._in_cat_dom_dim = in_dom_dim + in_dom_global_dim self._in_mode_dim = in_dom_global_dim self._in_token_dim = in_token_dim self._max_num_leaves = max_num_leaves self._max_num_goal_tokens = max_num_goal_tokens h_mode_dim, h_token_dim = int(h_dom_dim / 4), int(h_dom_dim / 2) if use_noisylayers: self._fc1_dom = NoisyLinear(self._in_cat_dom_dim, h_dom_dim) self._fc2_dom = NoisyLinear(h_dom_dim, h_dom_dim) self._fc1_mode = NoisyLinear(self._in_mode_dim, h_mode_dim) self._fc2_mode = NoisyLinear(h_mode_dim, h_mode_dim) self._fc1_token = NoisyLinear(self._in_token_dim, h_token_dim) self._fc2_token = NoisyLinear(h_token_dim, h_token_dim) else: pass def forward(self, e_local, e_neighbor, e_global, e_tokens): # [m, d] e_global_ = e_global # [m, max_num_leaves(copied), d]<-[m, d] e_global = e_global.unsqueeze(1).expand(-1, self._max_num_leaves, -1) # [m * self._max_num_leaves, 3*d] if not self._use_global: if self._use_local and self._use_neighbor: e_dom = torch.cat((e_local, e_neighbor), dim=2) elif not self._use_local: e_dom = e_neighbor else: e_dom = e_local else: if self._use_local and self._use_neighbor: e_dom = torch.cat((e_local, e_neighbor, e_global), dim=2) elif not self._use_local: e_dom = torch.cat((e_neighbor, e_glboal), dim=2) else: e_dom = torch.cat((e_local, e_global), dim=2) # [1] DOM stream # [m*V_size, in_dom_dim] <- [m, V_size, in_dom_dim] e_dom = e_dom.view(-1, self._in_cat_dom_dim) h_dom = F.relu(self._fc1_dom(e_dom)) h_dom = F.relu(self._fc2_dom(h_dom)) # [2] Mode stream h_mode = F.relu(self._fc1_mode(e_global_)) h_mode = F.relu(self._fc2_mode(h_mode)) # [3] Token Stream # [m*max_num_tokens, in_token_dim] <- [m, max_num_tokens, in_token_dim] e_tokens = e_tokens.view(-1, self._in_token_dim) h_tokens = F.relu(self._fc1_token(e_tokens)) h_tokens = F.relu(self._fc2_token(h_tokens)) return h_dom, h_mode, h_tokens def reset_noise(self): self._fc1_dom.reset_noise() self._fc2_dom.reset_noise() self._fc1_mode.reset_noise() self._fc2_mode.reset_noise() self._fc1_token.reset_noise() self._fc2_token.reset_noise()