def __init__(self, isize, num_head=8, osize=None, fhsize=None, dropout=0.0, **kwargs): _osize = isize if osize is None else osize super(BiHPLSTM, self).__init__(isize, num_head=num_head, osize=_osize, dropout=dropout, **kwargs) i_hsize = float2odd(float(isize) / num_head) * num_head o_hsize = float2odd(float(_osize) / num_head) * num_head _fhsize = float2odd( float(o_hsize * 4 if fhsize is None else fhsize) / num_head) * num_head self.net = MHPLSTMCore(i_hsize + i_hsize, num_head=self.num_head + self.num_head, osize=o_hsize + o_hsize, fhsize=_fhsize + _fhsize, dropout=dropout)
def __init__(self, isize, num_head=8, osize=None, dropout=0.0, enable_proj_bias=enable_proj_bias_default): super(HPLSTM, self).__init__() _osize = isize if osize is None else osize o_hsize = float2odd(float(_osize) / num_head) * num_head self.head_dim = float2odd(float(isize) / num_head) i_hsize = self.head_dim * num_head self.num_head = num_head self.trans_input = Linear(isize, i_hsize, bias=enable_proj_bias) self.net = MHPLSTMCore(i_hsize, num_head=self.num_head, osize=o_hsize, dropout=dropout) self.trans_output = Linear(o_hsize, _osize, bias=enable_proj_bias)
def __init__(self, isize, num_head=8, osize=None, dropout=0.0, custom_act=use_adv_act_default, enable_bias=enable_prev_ln_bias_default): super(MHPLSTMCore, self).__init__() _osize = isize if osize is None else osize i_head_dim = float2odd(float(isize) / num_head) i_hsize = i_head_dim * num_head o_head_dim = float2odd(float(_osize) / num_head) o_hsize = o_head_dim * num_head self.trans_hid = GroupLinear(i_hsize + i_hsize, o_hsize * 3, num_head, bias=enable_bias, shuffle=False, trans_input=False, flatten_output=False) self.trans_og = nn.Sequential(GroupLinear(i_hsize + o_hsize, o_hsize, num_head, bias=enable_bias, shuffle=False, trans_input=False, flatten_output=False), nn.LayerNorm((num_head, o_head_dim), eps=ieps_ln_default, elementwise_affine=enable_ln_parameters)) self.normer_csum = nn.LayerNorm((num_head, i_head_dim), eps=ieps_ln_default, elementwise_affine=enable_ln_parameters) self.normer_hid = nn.LayerNorm((num_head, 3, o_head_dim), eps=ieps_ln_default, elementwise_affine=enable_ln_parameters) self.act = Custom_Act() if custom_act else nn.ReLU()#Tanh() self.drop = Dropout(dropout, inplace=inplace_after_Custom_Act) if dropout > 0.0 else None self.init_cx = nn.Parameter(torch.zeros(1, num_head, o_head_dim))