Esempio n. 1
0
    def __init__(
        self,
        out_embed_dims,
        vocab_size,
        vocab_reduction_module=None,
        hidden_layer_size=256,
    ):
        super().__init__(out_embed_dims, vocab_size, vocab_reduction_module)
        self.hidden_layer = NonlinearLayer(vocab_size,
                                           hidden_layer_size,
                                           bias=False,
                                           activation_fn=nn.ReLU)
        trans_dim = sum(out_embed_dims[1:])
        self.gating_network = NonlinearLayer(
            hidden_layer_size + trans_dim,
            hidden_layer_size,
            bias=True,
            activation_fn=nn.Sigmoid,
        )

        # output_projections is [LM projection, Joint projection]. This is a
        # trick to load pretrained LM projection.
        self.output_projections = nn.ModuleList([
            OutputProjection(out_embed_dims[0], vocab_size),
            OutputProjection(hidden_layer_size + trans_dim, vocab_size,
                             vocab_reduction_module),
        ])
        self.pre_softmax_activation = nn.ReLU()
 def __init__(self, out_embed_dims, vocab_size, vocab_reduction_module=None):
     super().__init__(out_embed_dims, vocab_size, vocab_reduction_module)
     out_embed_dim = out_embed_dims[0]
     assert all(d == out_embed_dim for d in out_embed_dims)
     self.output_projection = OutputProjection(
         out_embed_dim, vocab_size, vocab_reduction_module
     )
 def __init__(self, out_embed_dims, vocab_size, vocab_reduction_module=None):
     super().__init__(out_embed_dims, vocab_size, vocab_reduction_module)
     dim = out_embed_dims[0]
     self.bottleneck = Linear(sum(out_embed_dims), dim)
     self.output_projection = OutputProjection(
         dim, vocab_size, vocab_reduction_module
     )
Esempio n. 4
0
 def __init__(self,
              out_embed_dims,
              vocab_size,
              vocab_reduction_module=None):
     super().__init__(out_embed_dims, vocab_size, vocab_reduction_module)
     self.output_projection = OutputProjection(sum(out_embed_dims),
                                               vocab_size,
                                               vocab_reduction_module)
 def __init__(
     self, out_embed_dims, vocab_size, vocab_reduction_module=None, prob_space=False
 ):
     super().__init__(out_embed_dims, vocab_size)
     assert vocab_reduction_module is None
     self.output_projections = nn.ModuleList(
         [OutputProjection(dim, vocab_size) for dim in out_embed_dims]
     )
     self.prob_space = prob_space
Esempio n. 6
0
 def __init__(self,
              out_embed_dims,
              vocab_size,
              vocab_reduction_module=None):
     super().__init__(out_embed_dims, vocab_size, vocab_reduction_module)
     self.gating_network = NonlinearLayer(out_embed_dims[0],
                                          1,
                                          bias=True,
                                          activation_fn=nn.Sigmoid)
     self.output_projection = OutputProjection(sum(out_embed_dims),
                                               vocab_size,
                                               vocab_reduction_module)
Esempio n. 7
0
 def __init__(
     self,
     out_embed_dims,
     vocab_size,
     vocab_reduction_module=None,
     norm_fn=None,
     to_log=False,
 ):
     super().__init__(out_embed_dims, vocab_size)
     assert vocab_reduction_module is None
     self.output_projections = nn.ModuleList(
         [OutputProjection(dim, vocab_size) for dim in out_embed_dims])
     self.to_log = to_log
     self.norm_fn = norm_fn
Esempio n. 8
0
 def __init__(
     self,
     out_embed_dims,
     vocab_size,
     vocab_reduction_module=None,
     activation_fn=torch.nn.ReLU,
 ):
     super().__init__(out_embed_dims, vocab_size, vocab_reduction_module)
     dim = out_embed_dims[0]
     self.bottleneck = nn.Sequential(
         Linear(sum(out_embed_dims), dim, bias=True),
         activation_fn(),
         Linear(dim, dim, bias=True),
     )
     self.output_projection = OutputProjection(dim, vocab_size,
                                               vocab_reduction_module)