Esempio n. 1
0
    def __init__(self, opt, dicts, positional_encoder, language_embeddings=None, ignore_source=False):

        self.death_rate = opt.death_rate
        self.n_heads = opt.n_heads
        self.checkpointing = opt.checkpointing
        self.absolute_position_encoding = opt.absolute_position_encoding
        self.late_emb_scale = opt.decoder_late_emb_scale
        self.learnable_position_encoding = opt.learnable_position_encoding
        self.max_pos_length = opt.max_pos_length
        self.reversible = opt.tgt_reversible

        # build_modules will be called from the inherited constructor
        super(RelativeTransformerDecoder, self).__init__(opt, dicts,
                                                         positional_encoder,
                                                         language_embeddings,
                                                         ignore_source,
                                                         allocate_positions=False)

        if self.learnable_position_encoding:
            self.positional_encoder = None
        else:
            if not self.absolute_position_encoding:
                # or using pre-set sinusoidal
                self.positional_encoder = SinusoidalPositionalEmbedding(opt.model_size)
            else:
                self.positional_encoder = FastSinusoidalPositionalEncoding(opt.model_size)
        self.d_head = self.model_size // self.n_heads

        if opt.rezero or opt.post_norm:
            self.postprocess_layer = Identity()
Esempio n. 2
0
    def __init__(self, opt, dicts, positional_encoder, language_embeddings=None, ignore_source=False):

        self.death_rate = opt.death_rate
        self.max_memory_size = opt.max_memory_size
        self.stream_context = opt.stream_context
        self.extra_context_size = opt.extra_context_size
        self.n_heads = opt.n_heads
        self.fast_self_attn = opt.fast_self_attention
        self.mpw = opt.multilingual_partitioned_weights
        self.learnable_position_encoding = opt.learnable_position_encoding
        self.max_pos_length = opt.max_pos_length

        # build_modules will be called from the inherited constructor
        super().__init__(opt, dicts, positional_encoder, language_embeddings,
                         ignore_source,
                         allocate_positions=False)
        if self.learnable_position_encoding:
            self.positional_encoder = None
        else:
            self.positional_encoder = SinusoidalPositionalEmbedding(opt.model_size)
        self.d_head = self.model_size // self.n_heads
        # Parameters for the position biases - deprecated. kept for backward compatibility
        # self.r_w_bias = nn.Parameter(torch.Tensor(self.n_heads, self.d_head))
        # self.r_r_bias = nn.Parameter(torch.Tensor(self.n_heads, self.d_head))

        self.mln = opt.multilingual_layer_norm
        if not opt.rezero:
            self.postprocess_layer = PrePostProcessing(opt.model_size, opt.dropout, sequence='n', multilingual=self.mln,
                                                       n_languages=opt.n_languages)
        else:
            self.postprocess_layer = Identity()
Esempio n. 3
0
    def __init__(self, opt, dicts, positional_encoder, encoder_type='text', language_embeddings=None):
        self.death_rate = opt.death_rate
        self.learnable_position_encoding = opt.learnable_position_encoding
        self.layer_modules = list()
        self.asynchronous = opt.asynchronous
        self.max_memory_size = opt.max_memory_size
        self.extra_context_size = opt.extra_context_size
        self.experimental = opt.experimental
        self.unidirectional = opt.unidirectional
        self.reversible = opt.src_reversible
        self.n_heads = opt.n_heads
        self.fast_self_attn = opt.fast_self_attention
        self.checkpointing = opt.checkpointing
        self.mpw = opt.multilingual_partitioned_weights
        self.multilingual_linear_projection = opt.multilingual_linear_projection
        self.mln = opt.multilingual_layer_norm
        self.no_input_scale = opt.no_input_scale
        self.learnable_position_encoding = opt.learnable_position_encoding
        self.max_pos_length = opt.max_pos_length

        # TODO: multilingually linear transformation

        # build_modules will be called from the inherited constructor
        super().__init__(opt, dicts, positional_encoder, encoder_type, language_embeddings)

        # learnable position encoding
        if self.learnable_position_encoding:
            # raise NotImplementedError
            self.positional_encoder = None
        else:
            # or using pre-set sinusoidal
            self.positional_encoder = SinusoidalPositionalEmbedding(opt.model_size)

        self.d_head = self.model_size // self.n_heads

        if self.multilingual_linear_projection:
            self.linear_proj = nn.Parameter(torch.Tensor(opt.n_languages, self.model_size, self.model_size))

            std_ = math.sqrt(2.0 / (self.model_size + self.model_size))
            torch.nn.init.normal_(self.linear_proj, 0.0, std_)

        self.mln = opt.multilingual_layer_norm

        if not opt.rezero:
            self.postprocess_layer = PrePostProcessing(opt.model_size, opt.dropout, sequence='n', multilingual=self.mln,
                                                       n_languages=opt.n_languages)
        else:
            self.postprocess_layer = Identity()
Esempio n. 4
0
    def __init__(self,
                 opt,
                 dicts,
                 positional_encoder,
                 encoder_type='text',
                 language_embeddings=None):
        self.death_rate = opt.death_rate
        self.learnable_position_encoding = opt.learnable_position_encoding
        self.layer_modules = list()
        self.unidirectional = opt.unidirectional
        self.n_heads = opt.n_heads
        self.n_languages = opt.n_languages
        self.checkpointing = opt.checkpointing
        self.absolute_position_encoding = opt.absolute_position_encoding
        self.early_emb_scale = opt.encoder_early_emb_scale
        self.learnable_position_encoding = opt.learnable_position_encoding
        self.max_pos_length = opt.max_pos_length
        self.reversible = opt.src_reversible

        # build_modules will be called from the inherited constructor
        super(RelativeTransformerEncoder,
              self).__init__(opt, dicts, positional_encoder, encoder_type,
                             language_embeddings)

        if not self.early_emb_scale and (self.use_language_embedding
                                         or self.absolute_position_encoding):
            print(
                "[INFO] Embedding will be scaled after being added with embedding and position encoding."
                "\n[INFO] For multilingual models its advisable to use -encoder_early_emb_scale"
            )

        # learnable position encoding
        if self.learnable_position_encoding:
            self.positional_encoder = None
        else:
            if not self.absolute_position_encoding:
                # or using pre-set sinusoidal
                self.positional_encoder = SinusoidalPositionalEmbedding(
                    opt.model_size)
            else:
                self.positional_encoder = FastSinusoidalPositionalEncoding(
                    opt.model_size)

        if opt.rezero or opt.post_norm:
            self.postprocess_layer = Identity()

        self.d_head = self.model_size // self.n_heads
def preprocessing(rezero, *args, **kwargs):
    if rezero:
        return Identity()
    else:
        return PrePostProcessing(*args, **kwargs)