예제 #1
0
    def __init__(self,
                 isize,
                 ncomb=2,
                 hsize=None,
                 dropout=0.0,
                 use_GeLU=use_adv_act_default,
                 enable_bias=enable_prev_ln_bias_default):

        super(ResidueCombiner, self).__init__()

        _hsize = isize * 2 * ncomb if hsize is None else hsize

        # should dropout be in front of sigmoid or not?
        self.net = nn.Sequential(
            Linear(isize * ncomb, _hsize),
            GeLU() if use_GeLU else nn.Sigmoid(),
            Dropout(dropout, inplace=inplace_after_GeLU),
            Linear(_hsize, isize, bias=enable_bias),
            Dropout(dropout,
                    inplace=True)) if dropout > 0.0 else nn.Sequential(
                        Linear(isize * ncomb, _hsize),
                        GeLU() if use_GeLU else nn.Sigmoid(),
                        Linear(_hsize, isize, bias=enable_bias))

        self.out_normer = nn.LayerNorm(isize,
                                       eps=ieps_ln_default,
                                       elementwise_affine=enable_ln_parameters)
예제 #2
0
    def __init__(self,
                 isize,
                 hsize=None,
                 dropout=0.0,
                 num_pos=cache_len_default,
                 use_GeLU=use_adv_act_default):

        super(AverageAttn, self).__init__()

        _hsize = isize if hsize is None else hsize

        self.num_pos = num_pos
        self.register_buffer('w', torch.Tensor(num_pos, num_pos))

        self.ffn = nn.Sequential(
            Linear(isize, _hsize), Dropout(dropout, inplace=True),
            GeLU() if use_GeLU else nn.ReLU(inplace=True),
            Linear(_hsize, isize), Dropout(
                dropout, inplace=True)) if dropout > 0.0 else nn.Sequential(
                    Linear(isize, _hsize),
                    GeLU() if use_GeLU else nn.ReLU(
                        inplace=True), Linear(_hsize, isize))

        self.gw = Linear(isize * 2, isize * 2)

        self.reset_parameters()
예제 #3
0
    def __init__(self,
                 isize,
                 hsize=None,
                 dropout=0.0,
                 norm_residual=norm_residual_default,
                 use_GeLU=use_adv_act_default,
                 enable_bias=enable_prev_ln_bias_default):

        super(PositionwiseFF, self).__init__()

        _hsize = isize * 4 if hsize is None else hsize

        self.net = nn.Sequential(
            Linear(isize, _hsize),
            GeLU() if use_GeLU else nn.ReLU(
                inplace=True), Dropout(dropout, inplace=inplace_after_GeLU),
            Linear(_hsize, isize, bias=enable_bias),
            Dropout(
                dropout, inplace=True)) if dropout > 0.0 else nn.Sequential(
                    Linear(isize, _hsize),
                    GeLU() if use_GeLU else nn.ReLU(
                        inplace=True), Linear(_hsize, isize, bias=enable_bias))

        self.normer = nn.LayerNorm(isize,
                                   eps=ieps_ln_default,
                                   elementwise_affine=enable_ln_parameters)

        self.norm_residual = norm_residual
예제 #4
0
    def __init__(self,
                 isize,
                 hsize=None,
                 dropout=0.0,
                 use_GeLU=use_adv_act_default):

        super(DATTNCombiner, self).__init__()

        _hsize = isize * 4 if hsize is None else hsize

        self.net = nn.Sequential(
            Linear(isize * 2, _hsize), Dropout(dropout, inplace=True),
            GeLU() if use_GeLU else nn.Sigmoid(), Scorer(
                _hsize, bias=False)) if dropout > 0.0 else nn.Sequential(
                    Linear(isize * 2, _hsize),
                    GeLU() if use_GeLU else nn.Sigmoid(),
                    Scorer(_hsize, bias=False))
예제 #5
0
    def __init__(self,
                 isize,
                 osize=None,
                 dropout=0.0,
                 use_GeLU=use_adv_act_default,
                 enable_bias=enable_residual_bias_default):

        super(LSTMCell4RNMT, self).__init__()

        _osize = isize if osize is None else osize

        # layer normalization is also applied for the computation of hidden for efficiency. bias might be disabled in case provided by LayerNorm
        self.trans = Linear(isize + _osize, _osize * 4, bias=enable_bias)
        self.normer = nn.LayerNorm((4, _osize),
                                   eps=ieps_ln_default,
                                   elementwise_affine=enable_ln_parameters)

        self.act = GeLU() if use_GeLU else nn.Tanh()
        self.drop = Dropout(
            dropout, inplace=inplace_after_GeLU) if dropout > 0.0 else None

        self.osize = _osize
예제 #6
0
    def __init__(self,
                 isize,
                 osize=None,
                 dropout=0.0,
                 use_GeLU=use_adv_act_default,
                 enable_bias=enable_residual_bias_default):

        super(GRUCell4RNMT, self).__init__()

        _osize = isize if osize is None else osize

        self.trans = Linear(isize + _osize, _osize * 2, bias=enable_bias)
        self.transi = Linear(isize, _osize)
        self.transh = Linear(_osize, _osize)

        self.normer = nn.LayerNorm((2, _osize),
                                   eps=ieps_ln_default,
                                   elementwise_affine=enable_ln_parameters)

        self.act = GeLU() if use_GeLU else nn.Tanh()
        self.drop = Dropout(
            dropout, inplace=inplace_after_GeLU) if dropout > 0.0 else None

        self.osize = _osize