Ejemplo n.º 1
0
 def __init__(self, h, d_model, p, d_ff, position_encoder, time_encoder, attn_p=0.1, version=1.0):
     super(UniversalDecoderLayer, self).__init__()
     self.version = version
     self.position_encoder = position_encoder
     self.time_encoder = time_encoder
     
     self.preprocess_attn = PrePostProcessing(d_model, p, sequence='n')
     self.postprocess_attn = PrePostProcessing(d_model, p, sequence='da', static=onmt.constants.static)
     
     self.preprocess_src_attn = PrePostProcessing(d_model, p, sequence='n')
     self.postprocess_src_attn = PrePostProcessing(d_model, p, sequence='da', static=onmt.constants.static)
     
     self.preprocess_ffn = PrePostProcessing(d_model, p, sequence='n')
     self.postprocess_ffn = PrePostProcessing(d_model, p, sequence='da', static=onmt.constants.static)
     
     
     self.multihead_tgt = MultiHeadAttention(h, d_model, attn_p=attn_p, static=onmt.constants.static)
     self.multihead_src = MultiHeadAttention(h, d_model, attn_p=attn_p, static=onmt.constants.static)
     
     if onmt.constants.activation_layer == 'linear_relu_linear':
         ff_p = p
         feedforward = FeedForward(d_model, d_ff, ff_p, static=onmt.constants.static)
     elif onmt.constants.activation_layer == 'maxout':
         k = int(math.ceil(d_ff / d_model))
         feedforward = MaxOut(d_model, d_model, k)
     self.feedforward = Bottle(feedforward)
Ejemplo n.º 2
0
    def __init__(
        self,
        h,
        d_model,
        p,
        d_ff,
        attn_p=0.1,
    ):
        super(LMDecoderLayer, self).__init__()

        self.preprocess_attn = PrePostProcessing(d_model, p, sequence='n')
        self.postprocess_attn = PrePostProcessing(d_model,
                                                  p,
                                                  sequence='da',
                                                  static=onmt.constants.static)

        self.preprocess_ffn = PrePostProcessing(d_model, p, sequence='n')
        self.postprocess_ffn = PrePostProcessing(d_model,
                                                 p,
                                                 sequence='da',
                                                 static=onmt.constants.static)

        self.multihead_tgt = MultiHeadAttention(h,
                                                d_model,
                                                attn_p=attn_p,
                                                static=onmt.constants.static,
                                                share=1)

        ff_p = p
        feedforward = FeedForward(d_model,
                                  d_ff,
                                  ff_p,
                                  static=onmt.constants.static)
        self.feedforward = Bottle(feedforward)
Ejemplo n.º 3
0
    def __init__(self, h, d_model, p, d_ff, attn_p=0.1, version=1.0, ignore_source=False,
                 variational=False, death_rate=0.0):
        super(TransformerXLDecoderLayer, self).__init__()
        self.version = version
        self.ignore_source = ignore_source
        self.variational = variational
        self.death_rate = death_rate

        self.preprocess_attn = PrePostProcessing(d_model, p, sequence='n')
        self.postprocess_attn = PrePostProcessing(d_model, p, sequence='da', variational=self.variational)

        self.preprocess_ffn = PrePostProcessing(d_model, p, sequence='n')
        self.postprocess_ffn = PrePostProcessing(d_model, p, sequence='da', variational=self.variational)

        d_head = d_model // h
        self.multihead_tgt = RelPartialLearnableMultiHeadAttn(h, d_model, d_head, dropatt=attn_p)

        if onmt.constants.activation_layer == 'linear_relu_linear':
            ff_p = p
            feedforward = FeedForward(d_model, d_ff, ff_p, variational=self.variational)
        elif onmt.constants.activation_layer == 'maxout':
            k = int(math.ceil(d_ff / d_model))
            feedforward = MaxOut(d_model, d_model, k)
        elif onmt.constants.activation_layer == 'linear_swish_linear':
            ff_p = p
            feedforward = FeedForwardSwish(d_model, d_ff, ff_p)
        else:
            raise NotImplementedError
        self.feedforward = Bottle(feedforward)
Ejemplo n.º 4
0
    def __init__(self, h, d_model, p, d_ff, attn_p=0.1):
        super(FCTDecoderLayer, self).__init__()

        self.preprocess_attn = PrePostProcessing(d_model, p, sequence='n')
        self.postprocess_attn = PrePostProcessing(d_model,
                                                  p,
                                                  sequence='da',
                                                  static=True)

        self.preprocess_src_attn = PrePostProcessing(d_model, p, sequence='n')
        self.postprocess_src_attn = PrePostProcessing(d_model,
                                                      p,
                                                      sequence='da',
                                                      static=True)

        self.preprocess_ffn = PrePostProcessing(d_model, p, sequence='n')
        self.postprocess_ffn = PrePostProcessing(d_model,
                                                 p,
                                                 sequence='da',
                                                 static=True)

        #~ self.multihead_tgt = HierarchicalMultiHeadAttention(h, d_model, attn_p=attn_p)
        self.multihead_tgt = UniformMultiHeadAttention(h,
                                                       d_model,
                                                       attn_p=attn_p)
        #~ self.multihead_src = MultiHeadAttention(h, d_model, attn_p=attn_p)
        self.multihead_src = UniformMultiHeadAttention(h,
                                                       d_model,
                                                       attn_p=attn_p)

        if onmt.constants.activation_layer == 'linear_relu_linear':
            ff_p = p
            feedforward = FeedForward(d_model, d_ff, ff_p)
        elif onmt.constants.activation_layer == 'maxout':
            k = int(math.ceil(d_ff / d_model))
            feedforward = MaxOut(d_model, d_model, k)
        self.feedforward = Bottle(feedforward)