Esempio n. 1
0
 def __init__(self, conf: AttentionPlainConf, **kwargs):
     super().__init__(conf, **kwargs)
     conf: AttentionPlainConf = self.conf
     dim_q, dim_k, dim_v, nh_qk, d_qk, nh_v, d_v = \
         conf.dim_q, conf.dim_k, conf.dim_v, conf.nh_qk, conf.d_qk, conf.nh_v, conf.d_v
     # --
     self._att_scale = math.sqrt(conf.d_qk)  # scale for score
     # pre-att affines (no dropouts here!)
     _eg_q = BK.get_inita_xavier_uniform((d_qk, dim_q)) / BK.get_inita_xavier_uniform((nh_qk*d_qk, dim_q))
     self.affine_q = AffineNode(None, isize=dim_q, osize=nh_qk*d_qk, no_drop=True, init_scale=_eg_q*conf.init_scale_hin)
     _eg_k = BK.get_inita_xavier_uniform((d_qk, dim_k)) / BK.get_inita_xavier_uniform((nh_qk*d_qk, dim_k))
     self.affine_k = AffineNode(None, isize=dim_k, osize=nh_qk*d_qk, no_drop=True, init_scale=_eg_k*conf.init_scale_hin)
     self.affine_v = AffineNode(None, isize=dim_v, osize=nh_v*d_v, no_drop=True)
     # rel dist keys
     self.rposi = RelDistNode(conf.rel, _dim=d_qk) if conf.use_rposi else None
     # att & output
     if conf.useaff_qk2v:
         self.aff_qk2v = AffineNode(None, isize=nh_qk, osize=nh_v)
     else:
         # assert nh_qk == nh_v
         if nh_qk != nh_v:
             zwarn(f"Possible problems with AttNode since hin({nh_qk}) != hout({nh_v})")
     self.adrop = DropoutNode(None, drop_rate=conf.att_drop, fix_drop=False)
     # todo(note): with drops(y) & act(?) & bias(y)?
     self.final_linear = AffineNode(None, isize=nh_v*d_v, osize=dim_v, out_act=conf.out_act)
Esempio n. 2
0
 def __init__(self, conf: PairwiseBlockConf, **kwargs):
     super().__init__(conf, **kwargs)
     conf: PairwiseBlockConf = self.conf
     ndim, nlab, nhead_in, dim_qk, nhead_out, dim_v = \
         conf.ndim, conf.nlab, conf.nhead_in, conf.dim_qk, conf.nhead_out, conf.dim_v
     # --
     self._att_scale = math.sqrt(conf.dim_qk)  # scale for score; note: no scale here since already small
     # pre-att affines, (no dropouts here)
     _extra_gain = BK.get_inita_xavier_uniform((dim_qk, ndim)) / BK.get_inita_xavier_uniform((nhead_in*dim_qk, ndim))
     self.affine_q = AffineNode(None, isize=ndim, osize=nhead_in*dim_qk, no_drop=True, init_scale=_extra_gain*conf.hin_init_scale)
     self.affine_k = AffineNode(None, isize=ndim, osize=nhead_in*dim_qk, no_drop=True, init_scale=_extra_gain*conf.hin_init_scale)
     self.affine_v = AffineNode(None, isize=ndim, osize=nhead_out*dim_v, no_drop=True)
     # pred
     self.pred_in = AffineNode(None, isize=nhead_in, osize=nlab, no_drop=True)
     self.aff_cf = AffineNode(None, isize=nhead_in, osize=1, no_drop=True, init_scale=conf.cf_init_scale)  # for pairwise confident score
     # final layers
     self.adrop = DropoutNode(None, drop_rate=conf.att_drop, fix_drop=False)
     self.fl_score = AffineNode(None, isize=nlab, osize=nhead_out, no_drop=True)
     self.fl_expr = AffineNode(None, isize=nhead_out*dim_v, osize=ndim, out_act=conf.out_act)
     self.norm = LayerNormNode(None, osize=ndim)  # add&norm