Esempio n. 1
0
    def __init__(self,
                 isize,
                 num_head=8,
                 osize=None,
                 fhsize=None,
                 dropout=0.0,
                 **kwargs):

        _osize = isize if osize is None else osize

        super(BiHPLSTM, self).__init__(isize,
                                       num_head=num_head,
                                       osize=_osize,
                                       dropout=dropout,
                                       **kwargs)

        i_hsize = float2odd(float(isize) / num_head) * num_head
        o_hsize = float2odd(float(_osize) / num_head) * num_head
        _fhsize = float2odd(
            float(o_hsize * 4 if fhsize is None else fhsize) /
            num_head) * num_head

        self.net = MHPLSTMCore(i_hsize + i_hsize,
                               num_head=self.num_head + self.num_head,
                               osize=o_hsize + o_hsize,
                               fhsize=_fhsize + _fhsize,
                               dropout=dropout)
Esempio n. 2
0
	def __init__(self, isize, num_head=8, osize=None, dropout=0.0, enable_proj_bias=enable_proj_bias_default):

		super(HPLSTM, self).__init__()

		_osize = isize if osize is None else osize
		o_hsize = float2odd(float(_osize) / num_head) * num_head

		self.head_dim = float2odd(float(isize) / num_head)
		i_hsize = self.head_dim * num_head
		self.num_head = num_head

		self.trans_input = Linear(isize, i_hsize, bias=enable_proj_bias)
		self.net = MHPLSTMCore(i_hsize, num_head=self.num_head, osize=o_hsize, dropout=dropout)
		self.trans_output = Linear(o_hsize, _osize, bias=enable_proj_bias)
Esempio n. 3
0
	def __init__(self, isize, num_head=8, osize=None, dropout=0.0, custom_act=use_adv_act_default, enable_bias=enable_prev_ln_bias_default):

		super(MHPLSTMCore, self).__init__()

		_osize = isize if osize is None else osize

		i_head_dim = float2odd(float(isize) / num_head)
		i_hsize = i_head_dim * num_head
		o_head_dim = float2odd(float(_osize) / num_head)
		o_hsize = o_head_dim * num_head

		self.trans_hid = GroupLinear(i_hsize + i_hsize, o_hsize * 3, num_head, bias=enable_bias, shuffle=False, trans_input=False, flatten_output=False)
		self.trans_og = nn.Sequential(GroupLinear(i_hsize + o_hsize, o_hsize, num_head, bias=enable_bias, shuffle=False, trans_input=False, flatten_output=False), nn.LayerNorm((num_head, o_head_dim), eps=ieps_ln_default, elementwise_affine=enable_ln_parameters))

		self.normer_csum = nn.LayerNorm((num_head, i_head_dim), eps=ieps_ln_default, elementwise_affine=enable_ln_parameters)
		self.normer_hid = nn.LayerNorm((num_head, 3, o_head_dim), eps=ieps_ln_default, elementwise_affine=enable_ln_parameters)

		self.act = Custom_Act() if custom_act else nn.ReLU()#Tanh()
		self.drop = Dropout(dropout, inplace=inplace_after_Custom_Act) if dropout > 0.0 else None
		self.init_cx = nn.Parameter(torch.zeros(1, num_head, o_head_dim))