Example #1
0
    def __init__(self, args, dictionary):

        super().__init__(dictionary)
        self.dropout = args.dropout
        self.embed_dim = args.decoder_embed_dim
        self.output_embed_dim = args.decoder_embed_dim
        self.padding_idx = dictionary.pad_idx
        self.max_tgt_positions = args.max_tgt_positions

        self.embedding = generate_embedding(len(dictionary), self.embed_dim,
                                            dictionary.pad_idx)
        self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(
            self.embed_dim)

        self.embed_positions = PositionalEmbedding(
            self.embed_dim,
            padding_idx=self.padding_idx,
            init_size=self.max_tgt_positions + self.padding_idx + 1)

        self.layers = nn.ModuleList([])

        # Generate N identical Decoder Layers
        self.layers.extend([
            TransformerDecoderLayer(args) for _ in range(args.decoder_layers)
        ])

        self.embed_out = nn.Linear(self.output_embed_dim, len(dictionary))

        nn.init.normal_(self.embed_out.weight,
                        mean=0,
                        std=self.output_embed_dim**-0.5)
Example #2
0
    def __init__(self, args, dictionary):

        super().__init__(dictionary)

        self.dropout = args.dropout
        self.embed_dim = args.encoder_embed_dim
        self.padding_idx = dictionary.pad_idx
        self.max_src_positions = args.max_src_positions
        self.embedding = generate_embedding(len(dictionary), self.embed_dim,
                                            dictionary.pad_idx)
        self.embed_scale = 1.0 if args.no_scale_embedding else math.sqrt(
            self.embed_dim)

        self.embed_positions = PositionalEmbedding(
            self.embed_dim,
            padding_idx=self.padding_idx,
            init_size=self.max_src_positions + self.padding_idx + 1)

        self.layers = nn.ModuleList([])

        # Generate N identical Encoder Layers
        self.layers.extend([
            TransformerEncoderLayer(args) for _ in range(args.encoder_layers)
        ])