def __init__(self, bpe, dictionary: Dictionary): self.bpe = bpe self.vocab = Vocabulary( dictionary.symbols, pad_token=str(dictionary[dictionary.pad()]), bos_token=str(dictionary[dictionary.bos()]), eos_token=str(dictionary[dictionary.eos()]), ) self.bos = self.vocab.bos_token self.eos = self.vocab.eos_token
def __init__( self, dictionary: Dictionary, embed_dim: int = 512, hidden_size: int = 512, out_embed_dim: int = 512, num_layers: int = 1, dropout_in: float = 0.1, dropout_out: float = 0.1, attention: bool = True, encoder_embed_dim: int = 512, encoder_output_units: int = 512, pretrained_embed: Optional[nn.Embedding] = None, share_input_output_embed: bool = False, adaptive_softmax_cutoff: Optional[int] = None, ): super().__init__(dictionary) self.dropout_in = dropout_in self.dropout_out = dropout_out self.hidden_size = hidden_size self.share_input_output_embed = share_input_output_embed self.need_attn = True self.adaptive_softmax = None num_embeddings = len(dictionary) padding_idx = dictionary.pad() if pretrained_embed is None: self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx) else: self.embed_tokens = pretrained_embed self.encoder_output_units = encoder_output_units self.layers = nn.ModuleList([ LSTMCell( input_size=hidden_size + embed_dim if layer == 0 else hidden_size, hidden_size=hidden_size, ) for layer in range(num_layers) ]) self.attention = AttentionLayer(hidden_size, encoder_output_units, hidden_size) if attention else None if hidden_size != out_embed_dim: self.additional_fc = Linear(hidden_size, out_embed_dim) if adaptive_softmax_cutoff is not None: # setting adaptive_softmax dropout to dropout_out for now but can be redefined self.adaptive_softmax = AdaptiveSoftmax(num_embeddings, embed_dim, adaptive_softmax_cutoff, dropout=dropout_out) elif not self.share_input_output_embed: self.fc_out = Linear(out_embed_dim, num_embeddings, dropout=dropout_out)