Esempio n. 1
0
 def __init__(self, pc, conf: NodeExtractorConfHead, vocab: HLabelVocab,
              extract_type: str):
     super().__init__(pc, conf, vocab, extract_type)
     # node selector
     conf.sel_conf._input_dim = conf._input_dim  # make dims fit
     self.sel: NodeSelector = self.add_sub_node(
         "sel", NodeSelector(pc, conf.sel_conf))
     # encoding
     self.dmxnn = conf.dmxnn
     self.posi_embed = self.add_sub_node(
         "pe", RelPosiEmbedding(pc, conf.posi_dim, max=conf.posi_cut))
     if self.dmxnn:
         conf.e_enc._input_dim = conf._input_dim + conf.posi_dim
     else:
         conf.e_enc._input_dim = conf._input_dim
     self.e_encoder = self.add_sub_node("ee", MyEncoder(pc, conf.e_enc))
     e_enc_dim = self.e_encoder.get_output_dims()[0]
     # decoding
     # todo(note): dropout after pooling; todo(+N): cannot go to previous layers if there are no encoders
     self.special_drop = self.add_sub_node("sd", Dropout(pc, (e_enc_dim, )))
     self.use_lab_f = conf.use_lab_f
     self.lab_f_use_lexi = conf.lab_f_use_lexi
     if self.use_lab_f:
         lab_f_input_dims = [e_enc_dim] * 3 if self.dmxnn else [e_enc_dim]
         if self.lab_f_use_lexi:
             lab_f_input_dims.append(conf._lexi_dim)
         self.lab_f = self.add_sub_node(
             "lab",
             Affine(pc,
                    lab_f_input_dims,
                    conf.lab_conf.n_dim,
                    act=conf.lab_f_act))
     else:
         self.lab_f = lambda x: x[0]  # only use the first one
     # secondary type
     self.use_secondary_type = conf.use_secondary_type
     if self.use_secondary_type:
         # todo(note): re-use vocab; or totally reuse the predictor?
         if conf.sectype_reuse_hl:
             self.hl2: HLabelNode = self.hl
         else:
             new_lab_conf = deepcopy(conf.lab_conf)
             new_lab_conf.zero_nil = False  # todo(note): not zero_nil here!
             self.hl2: HLabelNode = self.add_sub_node(
                 "hl", HLabelNode(pc, new_lab_conf, vocab))
         # enc+t1 -> t2
         self.t1tot2 = self.add_sub_node(
             "1to2", Embedding(pc, self.hl_output_size,
                               conf.lab_conf.n_dim))
     else:
         self.hl2 = None
         self.t1tot2 = None
Esempio n. 2
0
 def __init__(self, pc: BK.ParamCollection, sconf: ScorerConf):
     super().__init__(pc, None, None)
     # options
     input_dim = sconf._input_dim
     arc_space = sconf.arc_space
     lab_space = sconf.lab_space
     ff_hid_size = sconf.ff_hid_size
     ff_hid_layer = sconf.ff_hid_layer
     use_biaffine = sconf.use_biaffine
     use_ff = sconf.use_ff
     use_ff2 = sconf.use_ff2
     biaffine_div = sconf.biaffine_div
     biaffine_init_ortho = sconf.biaffine_init_ortho
     transform_act = sconf.transform_act
     #
     self.input_dim = input_dim
     self.num_label = sconf._num_label
     # attach/arc
     self.arc_m = self.add_sub_node("am", Affine(pc, input_dim, arc_space, act=transform_act))
     self.arc_h = self.add_sub_node("ah", Affine(pc, input_dim, arc_space, act=transform_act))
     self.arc_scorer = self.add_sub_node(
         "as", BiAffineScorer(pc, arc_space, arc_space, 1, ff_hid_size, ff_hid_layer=ff_hid_layer,
                              use_biaffine=use_biaffine, use_ff=use_ff, use_ff2=use_ff2,
                              biaffine_div=biaffine_div, biaffine_init_ortho=biaffine_init_ortho))
     # only add distance for arc
     if sconf.arc_dist_clip > 0:
         # todo(+N): how to include dist feature?
         # self.dist_helper = self.add_sub_node("dh", AttDistHelper(pc, sconf.get_dist_aconf(), arc_space))
         self.dist_helper = None
         raise NotImplemented("TODO")
     else:
         self.dist_helper = None
     # labeling
     self.lab_m = self.add_sub_node("lm", Affine(pc, input_dim, lab_space, act=transform_act))
     self.lab_h = self.add_sub_node("lh", Affine(pc, input_dim, lab_space, act=transform_act))
     self.lab_scorer = self.add_sub_node(
         "ls", BiAffineScorer(pc, lab_space, lab_space, self.num_label, ff_hid_size, ff_hid_layer=ff_hid_layer,
                              use_biaffine=use_biaffine, use_ff=use_ff, use_ff2=use_ff2,
                              biaffine_div=biaffine_div, biaffine_init_ortho=biaffine_init_ortho))
Esempio n. 3
0
 def __init__(self, pc: BK.ParamCollection, pname: str, input_dim: int,
              conf: SeqCrfNodeConf, inputter: Inputter):
     super().__init__(pc, conf, name="CRF")
     self.conf = conf
     self.inputter = inputter
     self.input_dim = input_dim
     # this step is performed at the embedder, thus still does not influence the inputter
     self.add_root_token = self.inputter.embedder.add_root_token
     # --
     self.pname = pname
     self.attr_name = pname + "_seq"  # attribute name in Instance
     self.vocab = inputter.vpack.get_voc(pname)
     # todo(note): we must make sure that 0 means NAN
     assert self.vocab.non == 0
     # models
     if conf.hid_dim <= 0:  # no hidden layer
         self.hid_layer = None
         self.pred_input_dim = input_dim
     else:
         self.hid_layer = self.add_sub_node(
             "hid", Affine(pc, input_dim, conf.hid_dim, act=conf.hid_act))
         self.pred_input_dim = conf.hid_dim
     self.tagset_size = self.vocab.unk  # todo(note): UNK is the prediction boundary
     self.pred_layer = self.add_sub_node(
         "pr",
         Affine(pc,
                self.pred_input_dim,
                self.tagset_size + 2,
                init_rop=NoDropRop()))
     # transition matrix
     init_transitions = np.zeros(
         [self.tagset_size + 2, self.tagset_size + 2])
     init_transitions[:, START_TAG] = -10000.0
     init_transitions[STOP_TAG, :] = -10000.0
     init_transitions[:, 0] = -10000.0
     init_transitions[0, :] = -10000.0
     self.transitions = self.add_param(
         "T", (self.tagset_size + 2, self.tagset_size + 2),
         init=init_transitions)
Esempio n. 4
0
 def __init__(self, pc, conf: NodeExtractorConfGene0, vocab: HLabelVocab,
              extract_type: str):
     super().__init__(pc, conf, vocab, extract_type)
     # decoding
     # -----
     # the two parts: actually in biaffine attention forms
     # transform embeddings for attention match (token evidence)
     self.T_tok = self.add_sub_node(
         "at",
         Affine(pc,
                conf.lab_conf.n_dim,
                conf._input_dim,
                init_rop=NoDropRop()))
     # transform embeddings for global match (sent evidence)
     self.T_sent = self.add_sub_node(
         "as",
         Affine(pc,
                conf.lab_conf.n_dim,
                conf._input_dim,
                init_rop=NoDropRop()))
     # to be refreshed
     self.query_tok = None  # [L, D]
     self.query_sent = None  # [L, D]
     # -----
     # how to combine the two parts: fix lambda or dynamic gated (with the input features)
     self.lambda_score_tok = conf.lambda_score_tok
     if self.lambda_score_tok < 0.:  # auto mode: using an MLP (make hidden size equal to input//4)
         self.score_gate = self.add_sub_node(
             "mix",
             get_mlp(pc, [conf._input_dim] * 4,
                     1,
                     conf._input_dim,
                     hidden_act="elu",
                     final_act="sigmoid",
                     final_init_rop=NoDropRop(),
                     hidden_which_affine=3))
     else:
         self.score_gate = None
Esempio n. 5
0
 def __init__(self, pc, input_dim: int, inputp_dim: int,
              conf: DparG1DecoderConf, inputter: Inputter):
     super().__init__(pc, conf, name="dp")
     self.conf = conf
     self.inputter = inputter
     self.input_dim = input_dim
     self.inputp_dim = inputp_dim
     # checkout and assign vocab
     self._check_vocab()
     # -----
     # this step is performed at the embedder, thus still does not influence the inputter
     self.add_root_token = self.inputter.embedder.add_root_token
     assert self.add_root_token, "Currently assert this one!!"  # todo(+N)
     # -----
     # transform dp space
     if conf.pre_dp_space > 0:
         dp_space = conf.pre_dp_space
         self.pre_aff_m = self.add_sub_node(
             "pm", Affine(pc, input_dim, dp_space, act=conf.pre_dp_act))
         self.pre_aff_h = self.add_sub_node(
             "ph", Affine(pc, input_dim, dp_space, act=conf.pre_dp_act))
     else:
         dp_space = input_dim
         self.pre_aff_m = self.pre_aff_h = lambda x: x
     # dep pairwise scorer: output includes [0, r1) -> [non]+valid_words
     self.dps_node = self.add_sub_node(
         "dps",
         PairScorer(pc,
                    dp_space,
                    dp_space,
                    self.dlab_r1,
                    conf=conf.dps_conf,
                    in_size_pair=inputp_dim))
     self.dps_s0_mask = np.array([1.] + [0.] *
                                 (self.dlab_r1 - 1))  # [0, 1, ..., 1]
     # whether detach input?
     self.no_detach_input = ScheduledValue(f"dpar:no_detach",
                                           conf.no_detach_input)
Esempio n. 6
0
 def __init__(self, pc, conf: CandidateExtractorConf, input_enc_dims):
     super().__init__(pc, None, None)
     self.conf = conf
     # scorer
     self.adp = self.add_sub_node(
         'adp', TaskSpecAdp(pc, input_enc_dims, [], conf.hidden_dim))
     adp_hidden_size = self.adp.get_output_dims()[0]
     self.predictor = self.add_sub_node(
         'pred', Affine(pc, adp_hidden_size, 2,
                        init_rop=NoDropRop()))  # 0 as nil
     # others
     self.id_counter = defaultdict(
         int)  # docid->ef-count (make sure unique ef-id)
     self.valid_hlidx = HLabelIdx(["unk"], [1])
Esempio n. 7
0
 def __init__(self, pc: BK.ParamCollection, slconf: SL1Conf):
     super().__init__(pc, None, None)
     self.dim = slconf._input_dim
     self.use_par = slconf.use_par
     self.use_chs = slconf.use_chs
     # parent and children attentional senc
     self.node_par = self.add_sub_node(
         "npar",
         MultiHeadAttention(pc, self.dim, self.dim, self.dim,
                            slconf.sl_par_att))
     self.node_chs = self.add_sub_node(
         "nchs",
         MultiHeadAttention(pc, self.dim, self.dim, self.dim,
                            slconf.sl_chs_att))
     self.ff_par = self.add_sub_node(
         "par_ff", Affine(pc, self.dim, self.dim, act="tanh"))
     self.ff_chs = self.add_sub_node(
         "chs_ff", Affine(pc, self.dim, self.dim, act="tanh"))
     # todo(note): currently simply sum them!
     self.mix_marginals_head_count = slconf.mix_marginals_head_count
     self.mix_marginals_rate = slconf.mix_marginals_rate
     if slconf.zero_extra_output_params:
         self.ff_par.zero_params()
         self.ff_chs.zero_params()
Esempio n. 8
0
 def __init__(self, pc: BK.ParamCollection, rconf: SL0Conf):
     super().__init__(pc, None, None)
     self.dim = rconf._input_dim  # both input/output dim
     # padders for child nodes
     self.chs_start_posi = -rconf.chs_num
     self.ch_idx_padder = DataPadder(2, pad_vals=0,
                                     mask_range=2)  # [*, num-ch]
     self.ch_label_padder = DataPadder(2, pad_vals=0)
     #
     self.label_embeddings = self.add_sub_node(
         "label",
         Embedding(pc, rconf._num_label, rconf.dim_label, fix_row0=False))
     self.dim_label = rconf.dim_label
     # todo(note): now adopting flatten groupings for basic, and then that is all, no more recurrent features
     # group 1: [cur, chs, par] -> head_pre_size
     self.use_chs = rconf.use_chs
     self.use_par = rconf.use_par
     self.use_label_feat = rconf.use_label_feat
     # components (add the parameters anyway)
     # todo(note): children features: children + (label of mod->children)
     self.chs_reprer = self.add_sub_node("chs", ChsReprer(pc, rconf))
     self.chs_ff = self.add_sub_node(
         "chs_ff",
         Affine(pc,
                self.chs_reprer.get_output_dims()[0],
                self.dim,
                act="tanh"))
     # todo(note): parent features: parent + (label of parent->mod)
     # todo(warn): always add label related params
     par_ff_inputs = [self.dim, rconf.dim_label]
     self.par_ff = self.add_sub_node(
         "par_ff", Affine(pc, par_ff_inputs, self.dim, act="tanh"))
     # no other groups anymore!
     if rconf.zero_extra_output_params:
         self.par_ff.zero_params()
         self.chs_ff.zero_params()
Esempio n. 9
0
 def __init__(self, pc: BK.ParamCollection, conf: EmbedderNodeConf, vpack: VocabPackage):
     super().__init__(pc, None, None)
     self.conf = conf
     self.vpack = vpack
     self.add_root_token = conf.add_root_token
     # -----
     self.nodes = []  # params
     self.comp_names = []
     self.comp_dims = []  # real dims
     self.berter: Berter2 = None
     for comp_name, comp_conf in conf.ec_dict.items():
         if comp_conf.comp_dim > 0:
             # directly get the nodes
             one_node = InputEmbedNode.get_input_embed_node(comp_name, pc, comp_name, comp_conf, conf, vpack)
             comp_dim = one_node.get_output_dims()[0]  # fix dim
             # especially for berter
             if comp_name == "bert":
                 assert self.berter is None
                 self.berter = one_node.berter
             # general steps
             self.comp_names.append(comp_name)
             self.nodes.append(self.add_sub_node(f"EC{comp_name}", one_node))
             self.comp_dims.append(comp_dim)
     # final projection?
     self.has_proj = (conf.emb_proj_dim > 0)
     if self.has_proj:
         proj_layer = Affine(self.pc, sum(self.comp_dims), conf.emb_proj_dim,
                             act=conf.emb_proj_act, init_scale=conf.emb_proj_init_scale)
         if conf.emb_proj_norm:
             norm_layer = LayerNorm(self.pc, conf.emb_proj_dim)
             self.final_layer = self.add_sub_node("fl", Sequential(self.pc, [proj_layer, norm_layer]))
         else:
             self.final_layer = self.add_sub_node("fl", proj_layer)
         self.output_dim = conf.emb_proj_dim
     else:
         self.final_layer = None
         self.output_dim = sum(self.comp_dims)
Esempio n. 10
0
 def __init__(self, pc: BK.ParamCollection, econf: EmbedConf,
              vpack: VocabPackage):
     super().__init__(pc, None, None)
     self.conf = econf
     #
     repr_sizes = []
     # word
     self.has_word = (econf.dim_word > 0)
     if self.has_word:
         npvec = vpack.get_emb(
             "word") if econf.init_words_from_pretrain else None
         self.word_embed = self.add_sub_node(
             "ew",
             Embedding(self.pc,
                       len(vpack.get_voc("word")),
                       econf.dim_word,
                       npvec=npvec,
                       name="word",
                       freeze=econf.word_freeze))
         repr_sizes.append(econf.dim_word)
     # char
     self.has_char = (econf.dim_char > 0)
     if self.has_char:
         # todo(warn): cnns will also use emb's drop?
         self.char_embed = self.add_sub_node(
             "ec",
             Embedding(self.pc,
                       len(vpack.get_voc("char")),
                       econf.dim_char,
                       name="char"))
         per_cnn_size = econf.char_cnn_hidden // len(econf.char_cnn_windows)
         self.char_cnns = [
             self.add_sub_node(
                 "cnnc",
                 CnnLayer(self.pc,
                          econf.dim_char,
                          per_cnn_size,
                          z,
                          pooling="max",
                          act="tanh")) for z in econf.char_cnn_windows
         ]
         repr_sizes.append(econf.char_cnn_hidden)
     # posi: absolute positional embeddings
     self.has_posi = (econf.dim_posi > 0)
     if self.has_posi:
         self.posi_embed = self.add_sub_node(
             "ep",
             PosiEmbedding(self.pc, econf.dim_posi, econf.posi_clip,
                           econf.posi_fix_sincos, econf.posi_freeze))
         repr_sizes.append(econf.dim_posi)
     # extras: like POS, ...
     self.dim_extras = econf.dim_extras
     self.extra_names = econf.extra_names
     zcheck(
         len(self.dim_extras) == len(self.extra_names),
         "Unmatched dims and names!")
     self.extra_embeds = []
     for one_extra_dim, one_name in zip(self.dim_extras, self.extra_names):
         self.extra_embeds.append(
             self.add_sub_node(
                 "ext",
                 Embedding(self.pc,
                           len(vpack.get_voc(one_name)),
                           one_extra_dim,
                           npvec=vpack.get_emb(one_name, None),
                           name="extra:" + one_name)))
         repr_sizes.append(one_extra_dim)
     # auxes
     self.dim_auxes = econf.dim_auxes
     self.fold_auxes = econf.fold_auxes
     self.aux_overall_gammas = []
     self.aux_fold_lambdas = []
     for one_aux_dim, one_aux_fold in zip(self.dim_auxes, self.fold_auxes):
         repr_sizes.append(one_aux_dim)
         # aux gamma and fold trainable lambdas
         self.aux_overall_gammas.append(self.add_param("AG", (),
                                                       1.))  # scalar
         self.aux_fold_lambdas.append(
             self.add_param(
                 "AL", (), [1. / one_aux_fold
                            for _ in range(one_aux_fold)]))  # [#fold]
     # =====
     # another projection layer? & set final dim
     if len(repr_sizes) <= 0:
         zwarn("No inputs??")
     # zcheck(len(repr_sizes)>0, "No inputs?")
     self.repr_sizes = repr_sizes
     self.has_proj = (econf.emb_proj_dim > 0)
     if self.has_proj:
         proj_layer = Affine(self.pc, sum(repr_sizes), econf.emb_proj_dim)
         if econf.emb_proj_norm:
             norm_layer = LayerNorm(self.pc, econf.emb_proj_dim)
             self.final_layer = self.add_sub_node(
                 "fl", Sequential(self.pc, [proj_layer, norm_layer]))
         else:
             self.final_layer = self.add_sub_node("fl", proj_layer)
         self.output_dim = econf.emb_proj_dim
     else:
         self.final_layer = None
         self.output_dim = sum(repr_sizes)
     # =====
     # special MdDropout: dropout the entire last dim (for word, char, extras, but not posi)
     self.dropmd_word = self.add_sub_node("md", DropoutLastN(pc, lastn=1))
     self.dropmd_char = self.add_sub_node("md", DropoutLastN(pc, lastn=1))
     self.dropmd_extras = [
         self.add_sub_node("md", DropoutLastN(pc, lastn=1))
         for _ in self.extra_names
     ]
     # dropouts for aux
     self.drop_auxes = [
         self.add_sub_node("aux", Dropout(pc, (one_aux_dim, )))
         for one_aux_dim in self.dim_auxes
     ]
Esempio n. 11
0
 def __init__(self, pc: BK.ParamCollection, input_dim: int,
              conf: PlainLMNodeConf, inputter: Inputter):
     super().__init__(pc, conf, name="PLM")
     self.conf = conf
     self.inputter = inputter
     self.input_dim = input_dim
     self.split_input_blm = conf.split_input_blm
     # this step is performed at the embedder, thus still does not influence the inputter
     self.add_root_token = self.inputter.embedder.add_root_token
     # vocab and padder
     vpack = inputter.vpack
     vocab_word = vpack.get_voc("word")
     # models
     real_input_dim = input_dim // 2 if self.split_input_blm else input_dim
     if conf.hid_dim <= 0:  # no hidden layer
         self.l2r_hid_layer = self.r2l_hid_layer = None
         self.pred_input_dim = real_input_dim
     else:
         self.l2r_hid_layer = self.add_sub_node(
             "l2r_h",
             Affine(pc, real_input_dim, conf.hid_dim, act=conf.hid_act))
         self.r2l_hid_layer = self.add_sub_node(
             "r2l_h",
             Affine(pc, real_input_dim, conf.hid_dim, act=conf.hid_act))
         self.pred_input_dim = conf.hid_dim
     # todo(note): unk is the first one above real words
     self.pred_size = min(conf.max_pred_rank + 1, vocab_word.unk)
     if conf.tie_input_embeddings:
         zwarn("Tie all preds in plm with input embeddings!!")
         self.l2r_pred = self.r2l_pred = None
         self.inputter_embed_node = self.inputter.embedder.get_node("word")
     else:
         self.l2r_pred = self.add_sub_node(
             "l2r_p",
             Affine(pc,
                    self.pred_input_dim,
                    self.pred_size,
                    init_rop=NoDropRop()))
         if conf.tie_bidirect_pred:
             self.r2l_pred = self.l2r_pred
         else:
             self.r2l_pred = self.add_sub_node(
                 "r2l_p",
                 Affine(pc,
                        self.pred_input_dim,
                        self.pred_size,
                        init_rop=NoDropRop()))
         self.inputter_embed_node = None
         if conf.init_pred_from_pretrain:
             npvec = vpack.get_emb("word")
             if npvec is None:
                 zwarn(
                     "Pretrained vector not provided, skip init pred embeddings!!"
                 )
             else:
                 with BK.no_grad_env():
                     self.l2r_pred.ws[0].copy_(
                         BK.input_real(npvec[:self.pred_size].T))
                     self.r2l_pred.ws[0].copy_(
                         BK.input_real(npvec[:self.pred_size].T))
                 zlog(
                     f"Init pred embeddings from pretrained vectors (size={self.pred_size})."
                 )
Esempio n. 12
0
 def __init__(self,
              pc,
              conf: HLabelNodeConf,
              hl_vocab: HLabelVocab,
              eff_max_layer=None):
     super().__init__(pc, None, None)
     self.conf = conf
     self.hl_vocab = hl_vocab
     assert self.hl_vocab.nil_as_zero  # for each layer, the idx=0 is the full-NIL
     # basic pool embeddings
     npvec = hl_vocab.pool_init_vec
     if not conf.pool_init_hint:
         npvec = None
     else:
         assert npvec is not None, "pool-init not provided by the Vocab!"
     n_dim, n_pool = conf.n_dim, len(hl_vocab.pools_k)
     self.pool_pred = self.add_sub_node(
         "pp",
         Embedding(
             pc,
             n_pool,
             n_dim,
             fix_row0=conf.zero_nil,
             npvec=npvec,
             init_rop=(NoDropRop() if conf.nodrop_pred_embeds else None)))
     if conf.tie_embeds:
         self.pool_lookup = self.pool_pred
     else:
         self.pool_lookup = self.add_sub_node(
             "pl",
             Embedding(pc,
                       n_pool,
                       n_dim,
                       fix_row0=conf.zero_nil,
                       npvec=npvec,
                       init_rop=(NoDropRop()
                                 if conf.nodrop_lookup_embeds else None)))
     # layered labels embeddings (to be refreshed)
     self.max_layer = hl_vocab.max_layer
     self.layered_embeds_pred = [None] * self.max_layer
     self.layered_embeds_lookup = [None] * self.max_layer
     self.layered_prei = [
         None
     ] * self.max_layer  # previous layer i, for score combining
     self.layered_isnil = [None] * self.max_layer  # whether is nil(None)
     self.zero_nil = conf.zero_nil
     # lookup summer
     assert conf.strategy_predict == "sum"
     self.lookup_is_sum, self.lookup_is_ff = [
         conf.strategy_lookup == z for z in ["sum", "ff"]
     ]
     if self.lookup_is_ff:
         self.lookup_summer = self.add_sub_node(
             "summer",
             Affine(pc, [n_dim] * self.max_layer, n_dim, act="tanh"))
     elif self.lookup_is_sum:
         self.sum_dropout = self.add_sub_node("sdrop",
                                              Dropout(pc, (n_dim, )))
         self.lookup_summer = lambda embeds: self.sum_dropout(
             BK.stack(embeds, 0).sum(0))
     else:
         raise NotImplementedError(
             f"UNK strategy_lookup: {conf.strategy_lookup}")
     # bias for prediction
     self.prediction_sizes = [
         len(hl_vocab.layered_pool_links_padded[i])
         for i in range(self.max_layer)
     ]
     if conf.bias_predict:
         self.biases_pred = [
             self.add_param(name="B", shape=(x, ))
             for x in self.prediction_sizes
         ]
     else:
         self.biases_pred = [None] * self.max_layer
     # =====
     # training
     self.is_hinge_loss, self.is_prob_loss = [
         conf.loss_function == z for z in ["hinge", "prob"]
     ]
     self.loss_lambdas = conf.loss_lambdas + [1.] * (
         self.max_layer - len(conf.loss_lambdas))  # loss scale
     self.margin_lambdas = conf.margin_lambdas + [0.] * (
         self.max_layer - len(conf.margin_lambdas))  # margin scale
     self.lookup_soft_alphas = conf.lookup_soft_alphas + [1.] * (
         self.max_layer - len(conf.lookup_soft_alphas))
     self.loss_fullnil_weight = conf.loss_fullnil_weight
     # ======
     # set current effective max_layer
     self.eff_max_layer = self.max_layer
     if eff_max_layer is not None:
         self.set_eff_max_layer(eff_max_layer)
Esempio n. 13
0
 def __init__(self, pc, dim: int, conf: VRecConf):
     super().__init__(pc, None, None)
     self.conf = conf
     self.dim = dim
     # =====
     # Feat
     if conf.feat_mod == "matt":
         self.feat_node = self.add_sub_node(
             "feat", MAttNode(pc, dim, dim, dim, conf.matt_conf))
         self.attn_count = conf.matt_conf.head_count
     elif conf.feat_mod == "fcomb":
         self.feat_node = self.add_sub_node(
             "feat", FCombNode(pc, dim, dim, dim, conf.fc_conf))
         self.attn_count = conf.fc_conf.fc_count
     else:
         raise NotImplementedError()
     feat_out_dim = self.feat_node.get_output_dims()[0]
     # =====
     # Combiner
     if conf.comb_mode == "affine":
         self.comb_aff = self.add_sub_node(
             "aff",
             AffineCombiner(pc, [dim, feat_out_dim],
                            [conf.comb_affine_q, conf.comb_affine_v],
                            dim,
                            out_act=conf.comb_affine_act,
                            out_drop=conf.comb_affine_drop))
         self.comb_f = lambda q, v, c: (self.comb_aff([q, v]), None)
     elif conf.comb_mode == "lstm":
         self.comb_lstm = self.add_sub_node(
             "lstm", LstmNode2(pc, feat_out_dim, dim))
         self.comb_f = self._call_lstm
     else:
         raise NotImplementedError()
     # =====
     # ff
     if conf.ff_dim > 0:
         self.has_ff = True
         self.linear1 = self.add_sub_node(
             "l1",
             Affine(pc,
                    dim,
                    conf.ff_dim,
                    act=conf.ff_act,
                    init_rop=NoDropRop()))
         self.dropout1 = self.add_sub_node(
             "d1", Dropout(pc, (conf.ff_dim, ), fix_rate=conf.ff_drop))
         self.linear2 = self.add_sub_node(
             "l2",
             Affine(pc,
                    conf.ff_dim,
                    dim,
                    act="linear",
                    init_rop=NoDropRop()))
         self.dropout2 = self.add_sub_node(
             "d2", Dropout(pc, (dim, ), fix_rate=conf.ff_drop))
     else:
         self.has_ff = False
     # layer norms
     if conf.use_pre_norm:
         self.att_pre_norm = self.add_sub_node("aln1", LayerNorm(pc, dim))
         self.ff_pre_norm = self.add_sub_node("fln1", LayerNorm(pc, dim))
     else:
         self.att_pre_norm = self.ff_pre_norm = None
     if conf.use_post_norm:
         self.att_post_norm = self.add_sub_node("aln2", LayerNorm(pc, dim))
         self.ff_post_norm = self.add_sub_node("fln2", LayerNorm(pc, dim))
     else:
         self.att_post_norm = self.ff_post_norm = None
Esempio n. 14
0
 def __init__(self, pc, conf: LinkerConf, vocab: HLabelVocab):
     super().__init__(pc, None, None)
     self.conf = conf
     self.vocab = vocab
     assert vocab.nil_as_zero
     assert len(
         vocab.layered_hlidx) == 1, "Currently we only allow one layer role"
     self.hl_output_size = len(
         vocab.layered_hlidx[0])  # num of output labels
     # -----
     # models
     sconf = conf
     input_dim = sconf._input_dim
     dim_label = sconf.dim_label
     arc_space = sconf.arc_space
     lab_space = sconf.lab_space
     ff_hid_size = sconf.ff_hid_size
     ff_hid_layer = sconf.ff_hid_layer
     use_biaffine = sconf.use_biaffine
     use_ff = sconf.use_ff
     use_ff2 = sconf.use_ff2
     biaffine_div = sconf.biaffine_div
     biaffine_init_ortho = sconf.biaffine_init_ortho
     transform_act = sconf.transform_act
     #
     self.input_dim = input_dim
     self.num_label = self.hl_output_size
     # label embeddings
     self.emb_ef = self.add_sub_node(
         "eef",
         Embedding(pc,
                   conf._num_ef_label,
                   dim_label,
                   fix_row0=sconf.zero_unk_lemb))
     self.emb_evt = self.add_sub_node(
         "eevt",
         Embedding(pc,
                   conf._num_evt_label,
                   dim_label,
                   fix_row0=sconf.zero_unk_lemb))
     # attach/arc
     self.arc_m = self.add_sub_node(
         "am",
         Affine(pc, [input_dim, dim_label], arc_space, act=transform_act))
     self.arc_h = self.add_sub_node(
         "ah",
         Affine(pc, [input_dim, dim_label], arc_space, act=transform_act))
     self.arc_scorer = self.add_sub_node(
         "as",
         BiAffineScorer(pc,
                        arc_space,
                        arc_space,
                        1,
                        ff_hid_size,
                        ff_hid_layer=ff_hid_layer,
                        use_biaffine=use_biaffine,
                        use_ff=use_ff,
                        use_ff2=use_ff2,
                        biaffine_div=biaffine_div,
                        biaffine_init_ortho=biaffine_init_ortho))
     # labeling
     self.lab_m = self.add_sub_node(
         "lm",
         Affine(pc, [input_dim, dim_label], lab_space, act=transform_act))
     self.lab_h = self.add_sub_node(
         "lh",
         Affine(pc, [input_dim, dim_label], lab_space, act=transform_act))
     self.lab_scorer = self.add_sub_node(
         "ls",
         BiAffineScorer(pc,
                        lab_space,
                        lab_space,
                        self.num_label,
                        ff_hid_size,
                        ff_hid_layer=ff_hid_layer,
                        use_biaffine=use_biaffine,
                        use_ff=use_ff,
                        use_ff2=use_ff2,
                        biaffine_div=biaffine_div,
                        biaffine_init_ortho=biaffine_init_ortho))
     #
     self.nil_mask = None
Esempio n. 15
0
 def __init__(self, pc: BK.ParamCollection, input_dim: int,
              conf: MaskLMNodeConf, inputter: Inputter):
     super().__init__(pc, conf, name="MLM")
     self.conf = conf
     self.inputter = inputter
     self.input_dim = input_dim
     # this step is performed at the embedder, thus still does not influence the inputter
     self.add_root_token = self.inputter.embedder.add_root_token
     # vocab and padder
     vpack = inputter.vpack
     vocab_word, vocab_pos = vpack.get_voc("word"), vpack.get_voc("pos")
     # no mask fields
     self.nomask_names_set = set(conf.nomask_names)
     # models
     if conf.hid_dim <= 0:  # no hidden layer
         self.hid_layer = None
         self.pred_input_dim = input_dim
     else:
         self.hid_layer = self.add_sub_node(
             "hid", Affine(pc, input_dim, conf.hid_dim, act=conf.hid_act))
         self.pred_input_dim = conf.hid_dim
     # todo(note): unk is the first one above real words
     self.pred_word_size = min(conf.max_pred_rank + 1, vocab_word.unk)
     self.pred_pos_size = vocab_pos.unk
     if conf.tie_input_embeddings:
         zwarn("Tie all preds in mlm with input embeddings!!")
         self.pred_word_layer = self.pred_pos_layer = None
         self.inputter_word_node = self.inputter.embedder.get_node("word")
         self.inputter_pos_node = self.inputter.embedder.get_node("pos")
     else:
         self.inputter_word_node, self.inputter_pos_node = None, None
         self.pred_word_layer = self.add_sub_node(
             "pw",
             Affine(pc,
                    self.pred_input_dim,
                    self.pred_word_size,
                    init_rop=NoDropRop()))
         self.pred_pos_layer = self.add_sub_node(
             "pp",
             Affine(pc,
                    self.pred_input_dim,
                    self.pred_pos_size,
                    init_rop=NoDropRop()))
         if conf.init_pred_from_pretrain:
             npvec = vpack.get_emb("word")
             if npvec is None:
                 zwarn(
                     "Pretrained vector not provided, skip init pred embeddings!!"
                 )
             else:
                 with BK.no_grad_env():
                     self.pred_word_layer.ws[0].copy_(
                         BK.input_real(npvec[:self.pred_word_size].T))
                 zlog(
                     f"Init pred embeddings from pretrained vectors (size={self.pred_word_size})."
                 )
     # =====
     COMBINE_METHOD_FS = {
         "sum": lambda xs: BK.stack(xs, -1).sum(-1),
         "avg": lambda xs: BK.stack(xs, -1).mean(-1),
         "min": lambda xs: BK.stack(xs, -1).min(-1)[0],
         "max": lambda xs: BK.stack(xs, -1).max(-1)[0],
     }
     self.loss_comb_f = COMBINE_METHOD_FS[conf.loss_comb_method]
     self.score_comb_f = COMBINE_METHOD_FS[conf.score_comb_method]