Esempio n. 1
0
    def __init__(self,
                 isize,
                 ncomb=2,
                 hsize=None,
                 dropout=0.0,
                 custom_act=use_adv_act_default,
                 enable_bias=enable_prev_ln_bias_default):

        super(ResidueCombiner, self).__init__()

        _hsize = isize * 2 * ncomb if hsize is None else hsize

        # should dropout be in front of sigmoid or not?
        self.net = nn.Sequential(
            Linear(isize * ncomb, _hsize),
            Custom_Act() if custom_act else nn.Sigmoid(),
            Dropout(dropout, inplace=inplace_after_Custom_Act),
            Linear(_hsize, isize, bias=enable_bias),
            Dropout(dropout,
                    inplace=True)) if dropout > 0.0 else nn.Sequential(
                        Linear(isize * ncomb, _hsize),
                        Custom_Act() if custom_act else nn.Sigmoid(),
                        Linear(_hsize, isize, bias=enable_bias))

        self.out_normer = nn.LayerNorm(isize,
                                       eps=ieps_ln_default,
                                       elementwise_affine=enable_ln_parameters)
Esempio n. 2
0
    def __init__(self,
                 isize,
                 hsize=None,
                 dropout=0.0,
                 num_pos=cache_len_default,
                 custom_act=use_adv_act_default):

        super(AverageAttn, self).__init__()

        _hsize = isize if hsize is None else hsize

        self.num_pos = num_pos
        self.register_buffer('w', torch.Tensor(num_pos, 1))

        self.ffn = nn.Sequential(
            Linear(isize, _hsize),
            Custom_Act() if custom_act else nn.ReLU(inplace=True),
            Dropout(dropout, inplace=inplace_after_Custom_Act),
            Linear(_hsize, isize), Dropout(
                dropout, inplace=True)) if dropout > 0.0 else nn.Sequential(
                    Linear(isize, _hsize),
                    Custom_Act() if custom_act else nn.ReLU(
                        inplace=True), Linear(_hsize, isize))

        self.gw = Linear(isize * 2, isize * 2)

        self.reset_parameters()
Esempio n. 3
0
    def __init__(self,
                 isize,
                 hsize=None,
                 dropout=0.0,
                 norm_residual=norm_residual_default,
                 custom_act=use_adv_act_default,
                 enable_bias=enable_prev_ln_bias_default):

        super(PositionwiseFF, self).__init__()

        _hsize = isize * 4 if hsize is None else hsize

        self.net = nn.Sequential(
            Linear(isize, _hsize),
            Custom_Act() if custom_act else nn.ReLU(inplace=True),
            Dropout(dropout, inplace=inplace_after_Custom_Act),
            Linear(_hsize, isize, bias=enable_bias),
            Dropout(
                dropout, inplace=True)) if dropout > 0.0 else nn.Sequential(
                    Linear(isize, _hsize),
                    Custom_Act() if custom_act else nn.ReLU(
                        inplace=True), Linear(_hsize, isize, bias=enable_bias))

        self.normer = nn.LayerNorm(isize,
                                   eps=ieps_ln_default,
                                   elementwise_affine=enable_ln_parameters)

        self.norm_residual = norm_residual
Esempio n. 4
0
    def __init__(self,
                 isize,
                 osize=None,
                 dropout=0.0,
                 custom_act=use_adv_act_default,
                 enable_bias=enable_prev_ln_bias_default):

        super(GRUCell4RNMT, self).__init__()

        _osize = isize if osize is None else osize

        self.trans = Linear(isize + _osize, _osize * 2, bias=enable_bias)
        self.transi = Linear(isize, _osize)
        self.transh = Linear(_osize, _osize)

        self.normer = nn.LayerNorm((2, _osize),
                                   eps=ieps_ln_default,
                                   elementwise_affine=enable_ln_parameters)

        self.act = Custom_Act() if custom_act else nn.Tanh()
        self.drop = Dropout(
            dropout,
            inplace=inplace_after_Custom_Act) if dropout > 0.0 else None

        self.osize = _osize
Esempio n. 5
0
    def __init__(self,
                 isize,
                 hsize=None,
                 dropout=0.0,
                 custom_act=use_adv_act_default):

        super(DATTNCombiner, self).__init__()

        _hsize = isize * 4 if hsize is None else hsize

        self.net = nn.Sequential(
            Linear(isize * 2, _hsize), Dropout(dropout, inplace=True),
            Custom_Act() if custom_act else nn.Sigmoid(),
            Scorer(_hsize, bias=False)) if dropout > 0.0 else nn.Sequential(
                Linear(isize * 2, _hsize),
                Custom_Act() if custom_act else nn.Sigmoid(),
                Scorer(_hsize, bias=False))
Esempio n. 6
0
	def __init__(self, isize, num_head=8, osize=None, dropout=0.0, custom_act=use_adv_act_default, enable_bias=enable_prev_ln_bias_default):

		super(MHPLSTMCore, self).__init__()

		_osize = isize if osize is None else osize

		i_head_dim = float2odd(float(isize) / num_head)
		i_hsize = i_head_dim * num_head
		o_head_dim = float2odd(float(_osize) / num_head)
		o_hsize = o_head_dim * num_head

		self.trans_hid = GroupLinear(i_hsize + i_hsize, o_hsize * 3, num_head, bias=enable_bias, shuffle=False, trans_input=False, flatten_output=False)
		self.trans_og = nn.Sequential(GroupLinear(i_hsize + o_hsize, o_hsize, num_head, bias=enable_bias, shuffle=False, trans_input=False, flatten_output=False), nn.LayerNorm((num_head, o_head_dim), eps=ieps_ln_default, elementwise_affine=enable_ln_parameters))

		self.normer_csum = nn.LayerNorm((num_head, i_head_dim), eps=ieps_ln_default, elementwise_affine=enable_ln_parameters)
		self.normer_hid = nn.LayerNorm((num_head, 3, o_head_dim), eps=ieps_ln_default, elementwise_affine=enable_ln_parameters)

		self.act = Custom_Act() if custom_act else nn.ReLU()#Tanh()
		self.drop = Dropout(dropout, inplace=inplace_after_Custom_Act) if dropout > 0.0 else None
		self.init_cx = nn.Parameter(torch.zeros(1, num_head, o_head_dim))
Esempio n. 7
0
    def __init__(self,
                 isize,
                 osize=None,
                 dropout=0.0,
                 custom_act=use_adv_act_default,
                 enable_bias=enable_prev_ln_bias_default):

        super(LSTMCell4RNMT, self).__init__()

        _osize = isize if osize is None else osize

        # layer normalization is also applied for the computation of hidden for efficiency. bias might be disabled in case provided by LayerNorm
        self.trans = Linear(isize + _osize, _osize * 4, bias=enable_bias)
        self.normer = nn.LayerNorm((4, _osize),
                                   eps=ieps_ln_default,
                                   elementwise_affine=enable_ln_parameters)

        self.act = Custom_Act() if custom_act else nn.Tanh()
        self.drop = Dropout(
            dropout,
            inplace=inplace_after_Custom_Act) if dropout > 0.0 else None

        self.osize = _osize