Esempio n. 1
0
    def __init__(self, args):
        super(TransformerDecoder, self).__init__()
        self.layers_num = args.layers_num
        self.layernorm_positioning = args.layernorm_positioning
        self.relative_position_embedding = args.relative_position_embedding
        self.transformer_decoder = nn.ModuleList(
            [TransformerDecoderLayer(args) for _ in range(self.layers_num)]
        )
        if "deepspeed_checkpoint_activations" in args:
            self.deepspeed_checkpoint_activations = args.deepspeed_checkpoint_activations
            self.deepspeed_checkpoint_layers_num = args.deepspeed_checkpoint_layers_num
        else:
            self.deepspeed_checkpoint_activations = False

        has_bias = bool(1 - args.remove_transformer_bias)

        if self.layernorm_positioning == "pre":
            if args.layernorm == "t5":
                self.layer_norm = T5LayerNorm(args.hidden_size)
            else:
                self.layer_norm = LayerNorm(args.hidden_size)

        if self.relative_position_embedding:
            self.self_pos_emb = RelativePositionEmbedding(bidirectional=False, heads_num=args.heads_num,
                                                          num_buckets=args.relative_attention_buckets_num)
Esempio n. 2
0
 def __init__(self, args):
     super(TransformerDecoder, self).__init__()
     self.layers_num = args.layers_num
     self.layernorm_positioning = args.layernorm_positioning
     self.transformer_decoder = nn.ModuleList(
         [TransformerDecoderLayer(args) for _ in range(self.layers_num)])
     if self.layernorm_positioning == "pre":
         self.layer_norm = LayerNorm(args.hidden_size)
Esempio n. 3
0
    def __init__(self, args):
        super(TransformerDecoder, self).__init__()
        self.layers_num = args.layers_num
        self.layernorm_positioning = args.layernorm_positioning
        self.relative_position_embedding = args.relative_position_embedding
        self.transformer_decoder = nn.ModuleList(
            [TransformerDecoderLayer(args) for _ in range(self.layers_num)])
        if self.layernorm_positioning == "pre":
            self.layer_norm = LayerNorm(args.hidden_size)

        if self.relative_position_embedding:
            self.relative_pos_emb = RelativePositionEmbedding(
                bidirectional=False, heads_num=args.heads_num)
 def __init__(self, args):
     super(TransformerDecoder, self).__init__()
     self.layers_num = args.layers_num
     self.transformer_decoder = nn.ModuleList([
         TransformerDecoderLayer(args) for _ in range(self.layers_num)
     ])