Python PrePostProcessing 예제들, onmt.modules.pre_post_processing.PrePostProcessing Python 예제들

예제 #1

0

파일 보기

파일: relative_transformer.py 프로젝트: quanpn90/NMTGMinor

    def __init__(self, opt, dicts, positional_encoder, language_embeddings=None, ignore_source=False):

        self.death_rate = opt.death_rate
        self.max_memory_size = opt.max_memory_size
        self.stream_context = opt.stream_context
        self.extra_context_size = opt.extra_context_size
        self.n_heads = opt.n_heads
        self.fast_self_attn = opt.fast_self_attention
        self.mpw = opt.multilingual_partitioned_weights
        self.learnable_position_encoding = opt.learnable_position_encoding
        self.max_pos_length = opt.max_pos_length

        # build_modules will be called from the inherited constructor
        super().__init__(opt, dicts, positional_encoder, language_embeddings,
                         ignore_source,
                         allocate_positions=False)
        if self.learnable_position_encoding:
            self.positional_encoder = None
        else:
            self.positional_encoder = SinusoidalPositionalEmbedding(opt.model_size)
        self.d_head = self.model_size // self.n_heads
        # Parameters for the position biases - deprecated. kept for backward compatibility
        # self.r_w_bias = nn.Parameter(torch.Tensor(self.n_heads, self.d_head))
        # self.r_r_bias = nn.Parameter(torch.Tensor(self.n_heads, self.d_head))

        self.mln = opt.multilingual_layer_norm
        if not opt.rezero:
            self.postprocess_layer = PrePostProcessing(opt.model_size, opt.dropout, sequence='n', multilingual=self.mln,
                                                       n_languages=opt.n_languages)
        else:
            self.postprocess_layer = Identity()

예제 #2

0

파일 보기

파일: transformer_layers.py 프로젝트: quanpn90/NMTGMinor

    def __init__(self, opt, death_rate=0.0, **kwargs):
        super(EncoderLayer, self).__init__()
        self.variational = opt.variational_dropout
        self.death_rate = death_rate
        self.fast_self_attention = opt.fast_self_attention
        self.macaron = opt.macaron
        self.ffn_scale = 0.5 if self.macaron else 1

        if self.macaron:
            self.preprocess_mcr_ffn = preprocessing(opt.rezero, opt.model_size, opt.dropout, sequence='n')
            self.postprocess_mcr_ffn = PrePostProcessing(opt.model_size, opt.dropout,
                                                         sequence='da', variational=self.variational)

            self.mcr_feedforward = PositionWiseFeedForward(opt.model_size, opt.inner_size, opt.dropout,
                                                           variational=self.variational,
                                                           activation=opt.ffn_activation, glu=opt.ffn_glu)

        self.preprocess_attn = PrePostProcessing(opt.model_size, opt.dropout, sequence='n')
        self.postprocess_attn = PrePostProcessing(opt.model_size, opt.dropout, sequence='da',
                                                  variational=self.variational)
        self.preprocess_ffn = PrePostProcessing(opt.model_size, opt.dropout, sequence='n')
        self.postprocess_ffn = PrePostProcessing(opt.model_size, opt.dropout, sequence='da',
                                                 variational=self.variational)

        if opt.fast_self_attention:
            self.multihead = SelfMultiheadAttn(opt.model_size, opt.n_heads, opt.attn_dropout)
        else:
            self.multihead = MultiHeadAttention(opt.n_heads, opt.model_size, attn_p=opt.attn_dropout, share=1)

        if not opt.fast_feed_forward:

            feedforward = FeedForward(opt.model_size, opt.inner_size, opt.dropout,
                                      variational=self.variational)
            self.feedforward = Bottle(feedforward)
        else:
            self.feedforward = PositionWiseFeedForward(opt.model_size, opt.inner_size, opt.dropout,
                                                       variational=self.variational,
                                                       activation=opt.ffn_activation, glu=opt.ffn_glu)

예제 #3

0

파일 보기

파일: relative_transformer.py 프로젝트: quanpn90/NMTGMinor

    def __init__(self, opt, dicts, positional_encoder, encoder_type='text', language_embeddings=None):
        self.death_rate = opt.death_rate
        self.learnable_position_encoding = opt.learnable_position_encoding
        self.layer_modules = list()
        self.asynchronous = opt.asynchronous
        self.max_memory_size = opt.max_memory_size
        self.extra_context_size = opt.extra_context_size
        self.experimental = opt.experimental
        self.unidirectional = opt.unidirectional
        self.reversible = opt.src_reversible
        self.n_heads = opt.n_heads
        self.fast_self_attn = opt.fast_self_attention
        self.checkpointing = opt.checkpointing
        self.mpw = opt.multilingual_partitioned_weights
        self.multilingual_linear_projection = opt.multilingual_linear_projection
        self.mln = opt.multilingual_layer_norm
        self.no_input_scale = opt.no_input_scale
        self.learnable_position_encoding = opt.learnable_position_encoding
        self.max_pos_length = opt.max_pos_length

        # TODO: multilingually linear transformation

        # build_modules will be called from the inherited constructor
        super().__init__(opt, dicts, positional_encoder, encoder_type, language_embeddings)

        # learnable position encoding
        if self.learnable_position_encoding:
            # raise NotImplementedError
            self.positional_encoder = None
        else:
            # or using pre-set sinusoidal
            self.positional_encoder = SinusoidalPositionalEmbedding(opt.model_size)

        self.d_head = self.model_size // self.n_heads

        if self.multilingual_linear_projection:
            self.linear_proj = nn.Parameter(torch.Tensor(opt.n_languages, self.model_size, self.model_size))

            std_ = math.sqrt(2.0 / (self.model_size + self.model_size))
            torch.nn.init.normal_(self.linear_proj, 0.0, std_)

        self.mln = opt.multilingual_layer_norm

        if not opt.rezero:
            self.postprocess_layer = PrePostProcessing(opt.model_size, opt.dropout, sequence='n', multilingual=self.mln,
                                                       n_languages=opt.n_languages)
        else:
            self.postprocess_layer = Identity()

예제 #4

0

파일 보기

    def __init__(self, opt, death_rate=0.0, lid_net=None):
        super(RelativeTransformerDecoderLayer, self).__init__()
        self.ignore_source = opt.ignore_source
        self.variational = opt.variational_dropout
        self.death_rate = death_rate
        self.mfw = opt.multilingual_factorized_weights
        self.mpw = opt.multilingual_partitioned_weights
        self.mln = opt.multilingual_layer_norm
        self.weight_drop = opt.weight_drop
        self.multilingual_adapter = opt.multilingual_adapter
        self.adapter_bottleneck_size = opt.adapter_bottleneck_size

        self.preprocess_attn = PrePostProcessing(opt.model_size,
                                                 opt.dropout,
                                                 sequence='n',
                                                 multilingual=self.mln,
                                                 n_languages=opt.n_languages)
        self.postprocess_attn = PrePostProcessing(opt.model_size,
                                                  opt.dropout,
                                                  sequence='da',
                                                  variational=self.variational)

        if not self.ignore_source:
            self.preprocess_src_attn = PrePostProcessing(
                opt.model_size,
                opt.dropout,
                sequence='n',
                multilingual=self.mln,
                n_languages=opt.n_languages)
            self.postprocess_src_attn = PrePostProcessing(
                opt.model_size,
                opt.dropout,
                sequence='da',
                variational=self.variational)

            if self.mfw:
                self.multihead_src = MFWEncdecMultiheadAttn(
                    opt.n_heads,
                    opt.model_size,
                    opt.attn_dropout,
                    n_languages=opt.n_languages,
                    rank=opt.mfw_rank,
                    use_multiplicative=opt.mfw_multiplicative,
                    weight_drop=self.weight_drop,
                    mfw_activation=opt.mfw_activation)
            elif self.mpw:
                self.multihead_src = MPEncdecMultiheadAttn(
                    opt.n_heads,
                    opt.model_size,
                    opt.attn_dropout,
                    factor_size=opt.mpw_factor_size)

            else:
                self.multihead_src = EncdecMultiheadAttn(
                    opt.n_heads, opt.model_size, opt.attn_dropout)

        self.preprocess_ffn = PrePostProcessing(opt.model_size,
                                                opt.dropout,
                                                sequence='n',
                                                multilingual=self.mln,
                                                n_languages=opt.n_languages)
        self.postprocess_ffn = PrePostProcessing(opt.model_size,
                                                 opt.dropout,
                                                 sequence='da',
                                                 variational=self.variational)

        d_head = opt.model_size // opt.n_heads

        if self.mfw:
            self.feedforward = MFWPositionWiseFeedForward(
                opt.model_size,
                opt.inner_size,
                opt.dropout,
                variational=self.variational,
                n_languages=opt.n_languages,
                rank=opt.mfw_rank,
                use_multiplicative=opt.mfw_multiplicative,
                weight_drop=self.weight_drop,
                mfw_activation=opt.mfw_activation)

            self.multihead_tgt = MFWRelativeSelfMultiheadAttn(
                opt.model_size,
                opt.n_heads,
                opt.attn_dropout,
                n_languages=opt.n_languages,
                rank=opt.mfw_rank,
                use_multiplicative=opt.mfw_multiplicative,
                weight_drop=self.weight_drop,
                mfw_activation=opt.mfw_activation)
        elif self.mpw:
            self.feedforward = MPPositionWiseFeedForward(
                opt.model_size,
                opt.inner_size,
                opt.dropout,
                variational=self.variational,
                factor_size=opt.mpw_factor_size)

            self.multihead_tgt = MPRelativeSelfMultiheadAttn(
                opt.model_size,
                opt.n_heads,
                opt.attn_dropout,
                factor_size=opt.mpw_factor_size)
        else:
            self.multihead_tgt = RelativeSelfMultiheadAttn(
                opt.model_size, opt.n_heads, opt.attn_dropout)

            self.feedforward = PositionWiseFeedForward(
                opt.model_size,
                opt.inner_size,
                opt.dropout,
                variational=self.variational)

        self.lfv_multilingual = opt.lfv_multilingual

        if opt.lfv_multilingual:
            self.lid_net = lid_net
            self.lfv_mapper = nn.Linear(opt.bottleneck_size, opt.model_size)
        else:
            self.lid_net = None
            self.lfv_mapper = None

        if self.multilingual_adapter:

            from onmt.modules.multilingual_factorized.multilingual_adapters import MultilingualAdapter
            self.adapters = MultilingualAdapter(opt.model_size,
                                                opt.adapter_bottleneck_size,
                                                n_languages=opt.n_languages,
                                                dropout=opt.dropout)

예제 #5

0

파일 보기

    def __init__(self, opt, death_rate=0.0, **kwargs):
        super(RelativeTransformerEncoderLayer, self).__init__()
        self.variational = opt.variational_dropout
        self.death_rate = death_rate
        self.fast_self_attention = opt.fast_self_attention
        self.depthwise_conv = opt.depthwise_conv
        self.mfw = opt.multilingual_factorized_weights
        self.mpw = opt.multilingual_partitioned_weights
        self.mln = opt.multilingual_layer_norm
        self.no_ffn = opt.no_ffn
        self.weight_drop = opt.weight_drop
        self.multilingual_adapter = opt.multilingual_adapter
        self.adapter_bottleneck_size = opt.adapter_bottleneck_size

        if self.mfw:
            assert not self.mpw, "[ERROR] factorized and partitioned weights cannot be used at the same time."

        self.preprocess_attn = PrePostProcessing(opt.model_size,
                                                 opt.dropout,
                                                 sequence='n',
                                                 multilingual=self.mln,
                                                 n_languages=opt.n_languages)
        self.postprocess_attn = PrePostProcessing(opt.model_size,
                                                  opt.dropout,
                                                  sequence='da',
                                                  variational=self.variational)

        if not self.no_ffn:
            self.preprocess_ffn = PrePostProcessing(
                opt.model_size,
                opt.dropout,
                sequence='n',
                multilingual=self.mln,
                n_languages=opt.n_languages)
            self.postprocess_ffn = PrePostProcessing(
                opt.model_size,
                opt.dropout,
                sequence='da',
                variational=self.variational)
        d_head = opt.model_size // opt.n_heads

        if self.mfw:

            if not self.no_ffn:
                self.feedforward = MFWPositionWiseFeedForward(
                    opt.model_size,
                    opt.inner_size,
                    opt.dropout,
                    variational=self.variational,
                    n_languages=opt.n_languages,
                    rank=opt.mfw_rank,
                    use_multiplicative=opt.mfw_multiplicative,
                    weight_drop=self.weight_drop,
                    mfw_activation=opt.mfw_activation)

            self.multihead = MFWRelativeSelfMultiheadAttn(
                opt.model_size,
                opt.n_heads,
                opt.attn_dropout,
                n_languages=opt.n_languages,
                rank=opt.mfw_rank,
                use_multiplicative=opt.mfw_multiplicative,
                weight_drop=self.weight_drop,
                mfw_activation=opt.mfw_activation)

        elif self.mpw:
            if not self.no_ffn:
                self.feedforward = MPPositionWiseFeedForward(
                    opt.model_size,
                    opt.inner_size,
                    opt.dropout,
                    variational=self.variational,
                    factor_size=opt.mpw_factor_size)

            self.multihead = MPRelativeSelfMultiheadAttn(
                opt.model_size,
                opt.n_heads,
                opt.attn_dropout,
                factor_size=opt.mpw_factor_size)

        else:
            if not self.no_ffn:
                self.feedforward = PositionWiseFeedForward(
                    opt.model_size,
                    opt.inner_size,
                    opt.dropout,
                    variational=self.variational)

            self.multihead = RelativeSelfMultiheadAttn(opt.model_size,
                                                       opt.n_heads,
                                                       opt.attn_dropout)

        if self.depthwise_conv:
            self.preprocess_conv = PrePostProcessing(
                opt.model_size,
                opt.dropout,
                sequence='n',
                multilingual=self.mln,
                n_languages=opt.n_languages)
            self.postprocess_conv = PrePostProcessing(
                opt.model_size,
                opt.dropout,
                sequence='da',
                variational=self.variational)
            self.depthwise_conv = ConformerConvBlock(opt.model_size,
                                                     opt.conv_kernel,
                                                     bias=True)
        else:
            self.depthwise_conv = None

        if self.multilingual_adapter:

            from onmt.modules.multilingual_factorized.multilingual_adapters import MultilingualAdapter
            self.adapters = MultilingualAdapter(opt.model_size,
                                                opt.adapter_bottleneck_size,
                                                n_languages=opt.n_languages,
                                                dropout=opt.dropout)

예제 #6

0

파일 보기

파일: transformer_layers.py 프로젝트: quanpn90/NMTGMinor

def preprocessing(rezero, *args, **kwargs):

    if rezero:
        return Identity()
    else:
        return PrePostProcessing(*args, **kwargs)

예제 #7

0

파일 보기

파일: relative_transformer_layers.py 프로젝트: mullovc/NMTGMinor

    def __init__(self, opt, death_rate=0.0, lid_net=None):
        super(RelativeTransformerDecoderLayer, self).__init__()
        self.ignore_source = opt.ignore_source
        self.variational = opt.variational_dropout
        self.death_rate = death_rate
        self.mfw = opt.multilingual_factorized_weights
        self.mpw = opt.multilingual_partitioned_weights
        self.mln = opt.multilingual_layer_norm
        self.weight_drop = opt.weight_drop
        self.multilingual_adapter = opt.multilingual_adapter
        self.adapter_bottleneck_size = opt.adapter_bottleneck_size
        self.macaron = opt.macaron
        self.ffn_scale = 0.5 if self.macaron else 1
        self.rezero = opt.rezero
        self.learnable_pos = opt.learnable_position_encoding
        self.residual_dropout = opt.residual_dropout if opt.residual_dropout >= 0 else opt.dropout
        self.ffn_dropout = opt.ffn_dropout if opt.ffn_dropout >= 0 else opt.dropout

        self.preprocess_attn = preprocessing(self.rezero,
                                             opt.model_size,
                                             0.0,
                                             sequence='n',
                                             multilingual=self.mln,
                                             n_languages=opt.n_languages)

        self.postprocess_attn = PrePostProcessing(
            opt.model_size,
            self.residual_dropout,
            sequence='dz' if self.rezero else 'da',
            variational=self.variational)

        if self.macaron:
            self.preprocess_mcr_ffn = preprocessing(
                self.rezero,
                opt.model_size,
                0.0,
                sequence='n',
                multilingual=self.mln,
                n_languages=opt.n_languages)
            self.postprocess_mcr_ffn = PrePostProcessing(
                opt.model_size,
                self.residual_dropout,
                sequence='dz' if self.rezero else 'da',
                variational=self.variational)

            if self.mfw:
                self.mcr_feedforward = MFWPositionWiseFeedForward(
                    opt.model_size,
                    opt.inner_size,
                    self.ffn_dropoutt,
                    variational=self.variational,
                    n_languages=opt.n_languages,
                    rank=opt.mfw_rank,
                    use_multiplicative=opt.mfw_multiplicative,
                    activation=opt.ffn_activation,
                    glu=opt.ffn_glu)
            else:
                self.mcr_feedforward = PositionWiseFeedForward(
                    opt.model_size,
                    opt.inner_size,
                    self.ffn_dropout,
                    variational=self.variational,
                    activation=opt.ffn_activation,
                    glu=opt.ffn_glu)

        if not self.ignore_source:
            self.preprocess_src_attn = preprocessing(
                self.rezero,
                opt.model_size,
                0.0,
                sequence='n',
                multilingual=self.mln,
                n_languages=opt.n_languages)
            self.postprocess_src_attn = PrePostProcessing(
                opt.model_size,
                self.residual_dropout,
                sequence='dz' if self.rezero else 'da',
                variational=self.variational)

            if self.mfw:
                self.multihead_src = MFWEncdecMultiheadAttn(
                    opt.n_heads,
                    opt.model_size,
                    opt.attn_dropout,
                    n_languages=opt.n_languages,
                    rank=opt.mfw_rank,
                    use_multiplicative=opt.mfw_multiplicative,
                    weight_drop=self.weight_drop,
                    mfw_activation=opt.mfw_activation)
            elif self.mpw:
                self.multihead_src = MPEncdecMultiheadAttn(
                    opt.n_heads,
                    opt.model_size,
                    opt.attn_dropout,
                    factor_size=opt.mpw_factor_size)

            else:
                self.multihead_src = EncdecMultiheadAttn(
                    opt.n_heads, opt.model_size, opt.attn_dropout)

        self.preprocess_ffn = preprocessing(self.rezero,
                                            opt.model_size,
                                            0.0,
                                            sequence='n',
                                            multilingual=self.mln,
                                            n_languages=opt.n_languages)
        self.postprocess_ffn = PrePostProcessing(
            opt.model_size,
            self.residual_dropout,
            sequence='dz' if self.rezero else 'da',
            variational=self.variational)

        d_head = opt.model_size // opt.n_heads

        if self.mfw:
            self.feedforward = MFWPositionWiseFeedForward(
                opt.model_size,
                opt.inner_size,
                self.ffn_dropout,
                variational=self.variational,
                n_languages=opt.n_languages,
                rank=opt.mfw_rank,
                use_multiplicative=opt.mfw_multiplicative,
                weight_drop=self.weight_drop,
                mfw_activation=opt.mfw_activation,
                activation=opt.ffn_activation,
                glu=opt.ffn_glu)

            self.multihead_tgt = MFWRelativeSelfMultiheadAttn(
                opt.model_size,
                opt.n_heads,
                opt.attn_dropout,
                n_languages=opt.n_languages,
                rank=opt.mfw_rank,
                use_multiplicative=opt.mfw_multiplicative,
                weight_drop=self.weight_drop,
                mfw_activation=opt.mfw_activation)
        elif self.mpw:
            self.feedforward = MPPositionWiseFeedForward(
                opt.model_size,
                opt.inner_size,
                self.ffn_dropout,
                variational=self.variational,
                factor_size=opt.mpw_factor_size)

            self.multihead_tgt = MPRelativeSelfMultiheadAttn(
                opt.model_size,
                opt.n_heads,
                opt.attn_dropout,
                factor_size=opt.mpw_factor_size)
        else:
            self.multihead_tgt = RelativeSelfMultiheadAttn(
                opt.model_size,
                opt.n_heads,
                opt.attn_dropout,
                learnable_pos=self.learnable_pos,
                max_pos=opt.max_pos_length)

            self.feedforward = PositionWiseFeedForward(
                opt.model_size,
                opt.inner_size,
                self.ffn_dropout,
                variational=self.variational,
                activation=opt.ffn_activation,
                glu=opt.ffn_glu)

        # self.lfv_multilingual = opt.lfv_multilingual
        #
        # if opt.lfv_multilingual:
        #     self.lid_net = lid_net
        #     self.lfv_mapper = nn.Linear(opt.bottleneck_size, opt.model_size)
        # else:
        #     self.lid_net = None
        #     self.lfv_mapper = None

        if self.multilingual_adapter:
            from onmt.modules.multilingual_factorized.multilingual_adapters import MultilingualAdapter
            self.adapters = MultilingualAdapter(opt.model_size,
                                                opt.adapter_bottleneck_size,
                                                n_languages=opt.n_languages,
                                                dropout=opt.dropout)