Beispiel #1
0
class DuelingC51Head(nn.Module):
    def __init__(self, max_num_leaves, in_dim, use_noisylayers, num_atoms):
        super(DuelingC51Head, self).__init__()
        self._max_num_leaves = max_num_leaves
        self._num_atoms
        if use_noisylayers:
            self._V_out = NoisyLinear(in_dim, 1 * self._num_atoms)
            self._A_out = NoisyLinear(in_dim, 1 * self._num_atoms)
        else:
            self._V_out = nn.Linear(in_dim, self._num_atoms)
            self._A_out = nn.Linear(in_dim, self._num_atoms)

    def forward(self, h, leaves_mask, log=False):
        h_V, h_A = h
        # [m, 1, num_atoms]
        V = self._V_out(h_V).view(-1, 1, self._num_atoms)
        # [m, max_num_leaves, num_atoms]
        leaves_mask_expanded = leaves_mask.unsqueeze(2).expand(
            -1, -1, self._num_atoms)
        # [m, max_num_leaves, num_atoms]
        A = self._A_out(h_A).view(-1, self._max_num_leaves, self._num_atoms)
        # [m, max_num_leaves, num_atoms]
        Y = V + A - ((A * leaves_mask_expanded).sum(1, keepdim=True) /
                     leaves_mask_expanded.sum(1, keepdim=True))
        if log:
            P = F.log_softmax(Y, dim=2)
        else:
            P = F.softmax(Y, dim=2)
        return P, leaves_mask

    def reset_noise(self):
        self._V_out.reset_noise()
        self._A_out.reset_noise()
Beispiel #2
0
class DuelingHead(nn.Module):
    def __init__(self, max_num_leaves, in_dim, use_noisylayers):
        super(DuelingHead, self).__init__()
        self._max_num_leaves = max_num_leaves
        if use_noisylayers:
            self._V_out = NoisyLinear(in_dim, 1)
            self._A_out = NoisyLinear(in_dim, 1)
        else:
            self._V_out = nn.Linear(in_dim, 1)
            self._A_out = nn.Linear(in_dim, 1)

    def forward(self, h, leaves_mask, _):
        h_V, h_A = h
        # [m, 1]
        V = self._V_out(h_V)
        # [m, max_num_leaves]
        A = self._A_out(h_A).view(-1, self._max_num_leaves)
        # [m, max_num_leaves]
        Q = V + A - ((A * leaves_mask).sum(1, keepdim=True) /
                     leaves_mask.sum(1, keepdim=True))
        inf_mask = leaves_mask.clone()
        inf_mask[leaves_mask == 0] = float('inf')
        inf_mask[leaves_mask == 1] = 0
        Q -= inf_mask
        return Q

    def reset_noise(self):
        self._V_out.reset_noise()
        self._A_out.reset_noise()
Beispiel #3
0
class C51Head(nn.Module):
    def __init__(self, max_num_leaves, in_dim, use_noisylayers, num_atoms):
        super(C51Head, self).__init__()
        self._max_num_leaves = max_num_leaves
        self._num_atoms = num_atoms
        if use_noisylayers:
            self._out = NoisyLinear(in_dim, self._num_atoms)
        else:
            self._out = nn.Linear(in_dim, self._num_atoms)

    def forward(self, h, leaves_mask, log=False):
        Y = self._out(h).view(-1, self._max_num_leaves, self._num_atoms)
        if log:
            P = F.log_softmax(Y, dim=2)
        else:
            P = F.softmax(Y, dim=2)
        # [m, max_num_leaves, num_atoms], [m, max_num_leaves]
        return P, leaves_mask

    def reset_noise(self):
        self._out.reset_noise()
Beispiel #4
0
class Head(nn.Module):
    def __init__(self, max_num_leaves, max_num_goal_tokens, dom_stream_dim,
                 use_noisylayers):
        super(Head, self).__init__()
        self._max_num_leaves = max_num_leaves
        self._max_num_goal_tokens = max_num_goal_tokens
        mode_stream_dim, token_stream_dim = int(dom_stream_dim / 4), int(
            dom_stream_dim / 2)
        if use_noisylayers:
            self._out_dom = NoisyLinear(dom_stream_dim, 1)
            self._out_mode = NoisyLinear(mode_stream_dim, 2)
            self._out_token = NoisyLinear(token_stream_dim, 1)
        else:
            self._out_dom = nn.Linear(dom_stream_dim, 1)
            self._out_mode = nn.Linear(mode_stream_dim, 2)
            self._out_token = nn.Linear(token_stream_dim, 1)

    def forward(self, h_list, leaves_mask, goal_mask, _):
        """
        Outputs: Q_dom for dom, Q_type for click or type(binary),
                 Q_token for selected goal token idx
        """
        h_dom, h_mode, h_token = h_list
        Q_dom = self._out_dom(h_dom)
        # 1. [m, max_num_leaves] <= [m*max_num_leaves]
        Q_dom = Q_dom.view(-1, self._max_num_leaves)
        inf_mask = leaves_mask.clone()
        inf_mask[leaves_mask == 0] = float('inf')
        inf_mask[leaves_mask == 1] = 0
        Q_dom -= inf_mask
        # 2. [m, 2]
        Q_mode = self._out_mode(h_mode)

        Q_token = self._out_token(h_token)
        # 3. [m, max_num_tokens] <= [m*max_num_tokens]
        Q_token = Q_token.view(-1, self._max_num_goal_tokens)
        inf_mask = goal_mask.clone()
        inf_mask[goal_mask == 0] = float('inf')
        inf_mask[goal_mask == 1] = 0
        # [m, max_num_tokens]
        Q_token -= inf_mask
        return Q_dom, Q_mode, Q_token

    def reset_noise(self):
        self._out_dom.reset_noise()
        self._out_mode.reset_noise()
        self._out_token.reset_noise()
Beispiel #5
0
class Body(nn.Module):
    def __init__(self, in_dom_global_dim, max_num_leaves, max_num_goal_tokens,
                 in_dom_dim, in_token_dim, h_dom_dim, use_noisylayers,
                 use_local, use_neighbor, use_global):
        super(Body, self).__init__()
        assert use_local or use_neighbor
        self._use_local = use_local
        self._use_neighbor = use_neighbor
        self._use_global = use_global
        if not use_global:
            if use_local and use_neighbor:
                self._in_cat_dom_dim = in_dom_dim * 2
            else:
                self._in_cat_dom_dim = in_dom_dim
        else:
            if use_local and use_neighbor:
                self._in_cat_dom_dim = in_dom_dim * 2 + in_dom_global_dim
            else:
                self._in_cat_dom_dim = in_dom_dim + in_dom_global_dim

        self._in_mode_dim = in_dom_global_dim
        self._in_token_dim = in_token_dim
        self._max_num_leaves = max_num_leaves
        self._max_num_goal_tokens = max_num_goal_tokens
        h_mode_dim, h_token_dim = int(h_dom_dim / 4), int(h_dom_dim / 2)
        if use_noisylayers:
            self._fc1_dom = NoisyLinear(self._in_cat_dom_dim, h_dom_dim)
            self._fc2_dom = NoisyLinear(h_dom_dim, h_dom_dim)
            self._fc1_mode = NoisyLinear(self._in_mode_dim, h_mode_dim)
            self._fc2_mode = NoisyLinear(h_mode_dim, h_mode_dim)
            self._fc1_token = NoisyLinear(self._in_token_dim, h_token_dim)
            self._fc2_token = NoisyLinear(h_token_dim, h_token_dim)
        else:
            pass

    def forward(self, e_local, e_neighbor, e_global, e_tokens):
        # [m, d]
        e_global_ = e_global
        # [m, max_num_leaves(copied), d]<-[m, d]
        e_global = e_global.unsqueeze(1).expand(-1, self._max_num_leaves, -1)
        # [m * self._max_num_leaves, 3*d]
        if not self._use_global:
            if self._use_local and self._use_neighbor:
                e_dom = torch.cat((e_local, e_neighbor), dim=2)
            elif not self._use_local:
                e_dom = e_neighbor
            else:
                e_dom = e_local
        else:
            if self._use_local and self._use_neighbor:
                e_dom = torch.cat((e_local, e_neighbor, e_global), dim=2)
            elif not self._use_local:
                e_dom = torch.cat((e_neighbor, e_glboal), dim=2)
            else:
                e_dom = torch.cat((e_local, e_global), dim=2)
        # [1] DOM stream
        # [m*V_size, in_dom_dim] <- [m, V_size, in_dom_dim]
        e_dom = e_dom.view(-1, self._in_cat_dom_dim)
        h_dom = F.relu(self._fc1_dom(e_dom))
        h_dom = F.relu(self._fc2_dom(h_dom))
        # [2] Mode stream
        h_mode = F.relu(self._fc1_mode(e_global_))
        h_mode = F.relu(self._fc2_mode(h_mode))
        # [3] Token Stream
        # [m*max_num_tokens, in_token_dim] <- [m, max_num_tokens, in_token_dim]
        e_tokens = e_tokens.view(-1, self._in_token_dim)
        h_tokens = F.relu(self._fc1_token(e_tokens))
        h_tokens = F.relu(self._fc2_token(h_tokens))
        return h_dom, h_mode, h_tokens

    def reset_noise(self):
        self._fc1_dom.reset_noise()
        self._fc2_dom.reset_noise()
        self._fc1_mode.reset_noise()
        self._fc2_mode.reset_noise()
        self._fc1_token.reset_noise()
        self._fc2_token.reset_noise()