Esempio n. 1
0
    def __init__(
        self,
        out_embed_dims,
        vocab_size,
        vocab_reduction_module=None,
        hidden_layer_size=256,
    ):
        super().__init__(out_embed_dims, vocab_size, vocab_reduction_module)
        self.hidden_layer = NonlinearLayer(vocab_size,
                                           hidden_layer_size,
                                           bias=False,
                                           activation_fn=nn.ReLU)
        trans_dim = sum(out_embed_dims[1:])
        self.gating_network = NonlinearLayer(
            hidden_layer_size + trans_dim,
            hidden_layer_size,
            bias=True,
            activation_fn=nn.Sigmoid,
        )

        # output_projections is [LM projection, Joint projection]. This is a
        # trick to load pretrained LM projection.
        self.output_projections = nn.ModuleList([
            OutputProjection(out_embed_dims[0], vocab_size),
            OutputProjection(hidden_layer_size + trans_dim, vocab_size,
                             vocab_reduction_module),
        ])
        self.pre_softmax_activation = nn.ReLU()
Esempio n. 2
0
 def __init__(self,
              out_embed_dims,
              vocab_size,
              vocab_reduction_module=None):
     super().__init__(out_embed_dims, vocab_size, vocab_reduction_module)
     self.gating_network = NonlinearLayer(out_embed_dims[0],
                                          1,
                                          bias=True,
                                          activation_fn=nn.Sigmoid)
     self.output_projection = OutputProjection(sum(out_embed_dims),
                                               vocab_size,
                                               vocab_reduction_module)
Esempio n. 3
0
    def __init__(
        self,
        src_dict,
        dst_dict,
        n=4,
        encoder_hidden_dim=512,
        embed_dim=512,
        freeze_embed=False,
        hidden_dim=512,
        out_embed_dim=512,
        num_layers=1,
        dropout_in=0.1,
        dropout_out=0.1,
        attention_type="dot",
        residual_level=None,
        activation_fn=nn.ReLU,
    ):
        super().__init__(dst_dict)
        self.history_len = n - 1
        self.encoder_hidden_dim = encoder_hidden_dim
        self.embed_dim = embed_dim
        self.hidden_dim = hidden_dim
        self.out_embed_dim = out_embed_dim
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.attention_type = attention_type
        self.residual_level = residual_level
        self.dst_dict = dst_dict
        self.activation_fn = activation_fn

        num_embeddings = len(dst_dict)
        padding_idx = dst_dict.pad()
        self.embed_tokens = Embedding(
            num_embeddings=num_embeddings,
            embedding_dim=embed_dim,
            padding_idx=padding_idx,
            freeze_embed=freeze_embed,
        )

        self.history_conv = nn.Sequential(
            torch.nn.Conv1d(embed_dim, hidden_dim, self.history_len),
            activation_fn())

        self.hidden_dim = hidden_dim
        self.layers = nn.ModuleList([
            NonlinearLayer(hidden_dim, hidden_dim, activation_fn=activation_fn)
            for _ in range(num_layers)
        ])

        self.attention = attention.build_attention(
            attention_type=attention_type,
            decoder_hidden_state_dim=hidden_dim,
            encoder_output_dim=encoder_hidden_dim,
            force_projection=True,
        )
        self.combined_output_and_context_dim = (
            self.attention.encoder_output_dim + hidden_dim)
        if self.combined_output_and_context_dim != out_embed_dim:
            self.additional_fc = Linear(self.combined_output_and_context_dim,
                                        out_embed_dim)

        self.output_projection_w = nn.Parameter(
            torch.FloatTensor(num_embeddings,
                              out_embed_dim).uniform_(-0.1, 0.1))
        self.output_projection_b = nn.Parameter(
            torch.FloatTensor(num_embeddings).zero_())
Esempio n. 4
0
    def __init__(
        self,
        src_dict,
        dst_dict,
        vocab_reduction_params=None,
        n=4,
        encoder_hidden_dim=512,
        embed_dim=512,
        freeze_embed=False,
        hidden_dim=512,
        out_embed_dim=512,
        num_layers=1,
        dropout_in=0.1,
        dropout_out=0.1,
        attention_type="dot",
        residual_level=None,
        activation_fn=nn.ReLU,
        project_output=True,
        pretrained_embed=None,
        projection_pretrained_embed=None,
    ):
        super().__init__(
            src_dict,
            dst_dict,
            vocab_reduction_params,
            out_embed_dim,
            project_output=project_output,
            pretrained_embed=projection_pretrained_embed,
        )
        self.history_len = n - 1
        self.encoder_hidden_dim = encoder_hidden_dim
        self.embed_dim = embed_dim
        self.hidden_dim = hidden_dim
        self.out_embed_dim = out_embed_dim
        self.dropout_in = dropout_in
        self.dropout_out = dropout_out
        self.attention_type = attention_type
        self.residual_level = residual_level
        self.dst_dict = dst_dict
        self.activation_fn = activation_fn

        num_embeddings = len(dst_dict)
        padding_idx = dst_dict.pad()
        self.embed_tokens = Embedding(
            num_embeddings=num_embeddings,
            embedding_dim=embed_dim,
            padding_idx=padding_idx,
            freeze_embed=freeze_embed,
        )
        pytorch_translate_utils.load_embedding(
            embedding=self.embed_tokens,
            dictionary=dst_dict,
            pretrained_embed=pretrained_embed,
        )

        self.history_conv = nn.Sequential(
            torch.nn.Conv1d(embed_dim, hidden_dim, self.history_len),
            activation_fn())

        self.hidden_dim = hidden_dim
        self.layers = nn.ModuleList([
            NonlinearLayer(hidden_dim, hidden_dim, activation_fn=activation_fn)
            for _ in range(num_layers)
        ])

        self.attention = attention.build_attention(
            attention_type=attention_type,
            decoder_hidden_state_dim=hidden_dim,
            context_dim=encoder_hidden_dim,
            force_projection=True,
        )
        self.combined_output_and_context_dim = self.attention.context_dim + hidden_dim
        if self.combined_output_and_context_dim != out_embed_dim:
            self.additional_fc = Linear(self.combined_output_and_context_dim,
                                        out_embed_dim)