def __init__( self, src_dict, dst_dict, vocab_reduction_params=None, n=4, encoder_hidden_dim=512, embed_dim=512, freeze_embed=False, hidden_dim=512, out_embed_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, attention_type="dot", residual_level=None, activation_fn=nn.ReLU, project_output=True, pretrained_embed=None, projection_pretrained_embed=None, ): super().__init__( src_dict, dst_dict, vocab_reduction_params, out_embed_dim, project_output=project_output, pretrained_embed=projection_pretrained_embed, ) self.history_len = n - 1 self.encoder_hidden_dim = encoder_hidden_dim self.embed_dim = embed_dim self.hidden_dim = hidden_dim self.out_embed_dim = out_embed_dim self.dropout_in = dropout_in self.dropout_out = dropout_out self.attention_type = attention_type self.residual_level = residual_level self.dst_dict = dst_dict self.activation_fn = activation_fn num_embeddings = len(dst_dict) padding_idx = dst_dict.pad() self.embed_tokens = Embedding( num_embeddings=num_embeddings, embedding_dim=embed_dim, padding_idx=padding_idx, freeze_embed=freeze_embed, ) pytorch_translate_utils.load_embedding( embedding=self.embed_tokens, dictionary=dst_dict, pretrained_embed=pretrained_embed, ) self.history_conv = nn.Sequential( torch.nn.Conv1d(embed_dim, hidden_dim, self.history_len), activation_fn()) self.hidden_dim = hidden_dim self.layers = nn.ModuleList([ NonlinearLayer(hidden_dim, hidden_dim, activation_fn=activation_fn) for _ in range(num_layers) ]) self.attention = attention.build_attention( attention_type=attention_type, decoder_hidden_state_dim=hidden_dim, context_dim=encoder_hidden_dim, force_projection=True, ) self.combined_output_and_context_dim = self.attention.context_dim + hidden_dim if self.combined_output_and_context_dim != out_embed_dim: self.additional_fc = Linear(self.combined_output_and_context_dim, out_embed_dim)
def __init__( self, src_dict, dst_dict, vocab_reduction_params=None, encoder_hidden_dim=512, embed_dim=512, freeze_embed=False, hidden_dim=512, out_embed_dim=512, cell_type="lstm", num_layers=1, dropout_in=0.1, dropout_out=0.1, attention_type="dot", residual_level=None, averaging_encoder=False, project_output=True, ): super().__init__( src_dict, dst_dict, vocab_reduction_params, out_embed_dim, project_output=project_output, ) self.encoder_hidden_dim = encoder_hidden_dim self.embed_dim = embed_dim self.hidden_dim = hidden_dim self.out_embed_dim = out_embed_dim self.dropout_in = dropout_in self.dropout_out = dropout_out self.attention_type = attention_type self.residual_level = residual_level num_embeddings = len(dst_dict) padding_idx = dst_dict.pad() self.embed_tokens = Embedding( num_embeddings=num_embeddings, embedding_dim=embed_dim, padding_idx=padding_idx, freeze_embed=freeze_embed, ) self.hidden_dim = hidden_dim self.averaging_encoder = averaging_encoder if cell_type == "lstm": cell_class = rnn_cell.LSTMCell elif cell_type == "milstm": cell_class = rnn_cell.MILSTMCell elif cell_type == "layer_norm_lstm": cell_class = rnn_cell.LayerNormLSTMCell if hidden_dim != encoder_hidden_dim: hidden_init_fc_list = [] cell_init_fc_list = [] for _ in range(num_layers): hidden_init_fc_list.append( Linear(encoder_hidden_dim, hidden_dim)) cell_init_fc_list.append(Linear(encoder_hidden_dim, hidden_dim)) self.hidden_init_fc_list = nn.ModuleList(hidden_init_fc_list) self.cell_init_fc_list = nn.ModuleList(cell_init_fc_list) self.initial_attn_context = nn.Parameter( torch.Tensor(encoder_hidden_dim).zero_()) if attention_type is not None: self.attention = attention.build_attention( attention_type=attention_type, decoder_hidden_state_dim=hidden_dim, encoder_output_dim=encoder_hidden_dim, ) self.combined_output_and_context_dim = encoder_hidden_dim + hidden_dim else: self.attention = None self.combined_output_and_context_dim = hidden_dim layers = [] for layer in range(num_layers): if layer == 0: if self.attention is not None: cell_input_dim = encoder_hidden_dim + embed_dim else: cell_input_dim = embed_dim else: cell_input_dim = hidden_dim layers.append( cell_class(input_dim=cell_input_dim, hidden_dim=hidden_dim)) self.layers = nn.ModuleList(layers) if self.combined_output_and_context_dim != out_embed_dim: self.additional_fc = Linear(self.combined_output_and_context_dim, out_embed_dim)
def __init__( self, src_dict, dst_dict, vocab_reduction_params=None, encoder_hidden_dim=512, embed_dim=512, freeze_embed=False, hidden_dim=512, out_embed_dim=512, cell_type="lstm", num_layers=1, dropout_in=0.1, dropout_out=0.1, attention_type="dot", residual_level=None, averaging_encoder=False, project_output=True, tie_embeddings=False, pretrained_embed=None, projection_pretrained_embed=None, ): super().__init__( src_dict, dst_dict, vocab_reduction_params, out_embed_dim, project_output=project_output, pretrained_embed=projection_pretrained_embed, ) encoder_hidden_dim = max(1, encoder_hidden_dim) self.encoder_hidden_dim = encoder_hidden_dim self.embed_dim = embed_dim self.hidden_dim = hidden_dim self.out_embed_dim = out_embed_dim self.dropout_in = dropout_in self.dropout_out = dropout_out self.attention_type = attention_type self.residual_level = residual_level self.tie_embeddings = tie_embeddings num_embeddings = len(dst_dict) padding_idx = dst_dict.pad() if pretrained_embed is not None and type(pretrained_embed) is not str: self.embed_tokens = pretrained_embed else: self.embed_tokens = Embedding( num_embeddings=num_embeddings, embedding_dim=embed_dim, padding_idx=padding_idx, freeze_embed=freeze_embed, pretrained_embed=pretrained_embed, ) if self.tie_embeddings: assert self.embed_dim == self.out_embed_dim, ( "Input embeddings and output projections must have the same " "dimension for the weights to be tied" ) self.embed_tokens.weight = self.output_projection_w self.hidden_dim = hidden_dim self.averaging_encoder = averaging_encoder if cell_type == "lstm": cell_class = rnn_cell.LSTMCell elif cell_type == "milstm": cell_class = rnn_cell.MILSTMCell elif cell_type == "layer_norm_lstm": cell_class = rnn_cell.LayerNormLSTMCell if hidden_dim != encoder_hidden_dim: hidden_init_fc_list = [] cell_init_fc_list = [] for _ in range(num_layers): hidden_init_fc_list.append(Linear(encoder_hidden_dim, hidden_dim)) cell_init_fc_list.append(Linear(encoder_hidden_dim, hidden_dim)) self.hidden_init_fc_list = nn.ModuleList(hidden_init_fc_list) self.cell_init_fc_list = nn.ModuleList(cell_init_fc_list) self.attention = attention.build_attention( attention_type=attention_type, decoder_hidden_state_dim=hidden_dim, context_dim=encoder_hidden_dim, ) if self.attention.context_dim: self.initial_attn_context = nn.Parameter( torch.Tensor(self.attention.context_dim).zero_() ) self.combined_output_and_context_dim = self.attention.context_dim + hidden_dim layers = [] for layer in range(num_layers): if layer == 0: cell_input_dim = embed_dim + self.attention.context_dim else: cell_input_dim = hidden_dim layers.append(cell_class(input_dim=cell_input_dim, hidden_dim=hidden_dim)) self.layers = nn.ModuleList(layers) if self.combined_output_and_context_dim != out_embed_dim: self.additional_fc = Linear( self.combined_output_and_context_dim, out_embed_dim )
def __init__( self, src_dict, dst_dict, n=4, encoder_hidden_dim=512, embed_dim=512, freeze_embed=False, hidden_dim=512, out_embed_dim=512, num_layers=1, dropout_in=0.1, dropout_out=0.1, attention_type="dot", residual_level=None, activation_fn=nn.ReLU, ): super().__init__(dst_dict) self.history_len = n - 1 self.encoder_hidden_dim = encoder_hidden_dim self.embed_dim = embed_dim self.hidden_dim = hidden_dim self.out_embed_dim = out_embed_dim self.dropout_in = dropout_in self.dropout_out = dropout_out self.attention_type = attention_type self.residual_level = residual_level self.dst_dict = dst_dict self.activation_fn = activation_fn num_embeddings = len(dst_dict) padding_idx = dst_dict.pad() self.embed_tokens = Embedding( num_embeddings=num_embeddings, embedding_dim=embed_dim, padding_idx=padding_idx, freeze_embed=freeze_embed, ) self.history_conv = nn.Sequential( torch.nn.Conv1d(embed_dim, hidden_dim, self.history_len), activation_fn()) self.hidden_dim = hidden_dim self.layers = nn.ModuleList([ NonlinearLayer(hidden_dim, hidden_dim, activation_fn=activation_fn) for _ in range(num_layers) ]) self.attention = attention.build_attention( attention_type=attention_type, decoder_hidden_state_dim=hidden_dim, encoder_output_dim=encoder_hidden_dim, force_projection=True, ) self.combined_output_and_context_dim = self.attention.context_dim + hidden_dim if self.combined_output_and_context_dim != out_embed_dim: self.additional_fc = Linear(self.combined_output_and_context_dim, out_embed_dim) self.output_projection_w = nn.Parameter( torch.FloatTensor(num_embeddings, out_embed_dim).uniform_(-0.1, 0.1)) self.output_projection_b = nn.Parameter( torch.FloatTensor(num_embeddings).zero_())