def __init__(self, langpair: str, is_base: bool = True) -> None:
        super().__init__()
        configs = Config()
        configs.add_tokenizer(langpair)
        configs.add_model(is_base)
        dim_model: int = configs.model.model_params.dim_model
        vocab_size = configs.tokenizer.vocab_size

        self.encoder = Encoder(langpair)
        self.decoder = Decoder(langpair)
        self.linear = nn.Linear(dim_model, vocab_size)
Ejemplo n.º 2
0
    def __init__(self, langpair: str, is_base: bool = True) -> None:
        super().__init__()
        # TODO: support transformer-base and transformer-big
        configs = Config()
        configs.add_model(is_base)
        configs.add_tokenizer(langpair)
        tokenizer = load_tokenizer(langpair)
        padding_idx = tokenizer.token_to_id("<pad>")

        self.dim_model: int = configs.model.model_params.dim_model
        self.vocab_size = configs.tokenizer.vocab_size
        self.embedding_matrix = nn.Embedding(self.vocab_size,
                                             self.dim_model,
                                             padding_idx=padding_idx)
        self.scale = self.dim_model**0.5
        self.max_len = configs.model.model_params.max_len
        self.positional_encoding = PositionalEncoding(self.max_len,
                                                      self.dim_model)