Ejemplo n.º 1
0
 def __init__(self, pc: BK.ParamCollection, sconf: FpScorerConf):
     super().__init__(pc, None, None)
     # options
     input_dim = sconf._input_dim
     arc_space = sconf.arc_space
     lab_space = sconf.lab_space
     transform_act = sconf.transform_act
     #
     self.input_dim = input_dim
     self.num_label = sconf._num_label
     self.mask_value = Constants.REAL_PRAC_MIN
     # attach/arc
     if arc_space > 0:
         self.arc_f = self.add_sub_node(
             "af", Affine(pc, input_dim, arc_space, act=transform_act))
     else:
         self.arc_f = None
         arc_space = input_dim
     self.arc_scorer = self.add_sub_node(
         "as", Affine(pc, arc_space, 1, init_rop=NoDropRop()))
     # labeling
     if lab_space > 0:
         self.lab_f = self.add_sub_node(
             "lf", Affine(pc, input_dim, lab_space, act=transform_act))
     else:
         self.lab_f = None
         lab_space = input_dim
     self.lab_scorer = self.add_sub_node(
         "ls", Affine(pc, lab_space, self.num_label, init_rop=NoDropRop()))
Ejemplo n.º 2
0
 def __init__(self, pc, conf: RealisTypePredictorConf, vocab: HLabelVocab,
              input_enc_dims):
     super().__init__(pc, None, None)
     self.conf = conf
     self.vocab = vocab
     assert vocab.nil_as_zero
     VOCAB_LAYER = -1  # todo(note): simply use final largest layer
     self.lidx2hlidx = vocab.layered_hlidx[
         VOCAB_LAYER]  # int-idx -> HLabelIdx
     # scorer
     self.adp = self.add_sub_node(
         'adp', TaskSpecAdp(pc, input_enc_dims, [], conf.hidden_dim))
     adp_hidden_size = self.adp.get_output_dims()[0]
     # fixed types for realis
     self.realis_predictor = self.add_sub_node(
         'pr',
         Affine(pc,
                adp_hidden_size,
                len(EVENT_REALIS_LIST),
                init_rop=NoDropRop()))
     # type predictor as a possible aux task
     self.type_predictor = self.add_sub_node(
         'pt',
         Affine(pc,
                adp_hidden_size,
                len(self.lidx2hlidx),
                init_rop=NoDropRop()))
Ejemplo n.º 3
0
 def __init__(self,
              pc,
              input_dims: List[int],
              use_affs: List[bool],
              out_dim: int,
              out_act='linear',
              out_drop=0.,
              param_init_scale=1.):
     super().__init__(pc, None, None)
     # -----
     self.input_dims = input_dims
     self.use_affs = use_affs
     self.out_dim = out_dim
     # =====
     assert len(input_dims) == len(use_affs)
     self.aff_nodes = []
     for d, use in zip(input_dims, use_affs):
         if use:
             one_aff = self.add_sub_node(
                 "aff",
                 Affine(pc,
                        d,
                        out_dim,
                        init_scale=param_init_scale,
                        init_rop=NoDropRop()))
         else:
             assert d == out_dim, f"Dimension mismatch for skipping affine: {d} vs {out_dim}!"
             one_aff = None
         self.aff_nodes.append(one_aff)
     self.out_act_f = ActivationHelper.get_act(out_act)
     self.out_drop = self.add_sub_node(
         "drop", Dropout(pc, (out_dim, ), fix_rate=out_drop))
Ejemplo n.º 4
0
 def __init__(self, pc: BK.ParamCollection, pname: str, input_dim: int,
              conf: SeqLabNodeConf, inputter: Inputter):
     super().__init__(pc, conf, name="SLB")
     self.conf = conf
     self.inputter = inputter
     self.input_dim = input_dim
     # this step is performed at the embedder, thus still does not influence the inputter
     self.add_root_token = self.inputter.embedder.add_root_token
     # --
     self.pname = pname
     self.attr_name = pname + "_seq"  # attribute name in Instance
     self.vocab = inputter.vpack.get_voc(pname)
     # todo(note): we must make sure that 0 means NAN
     assert self.vocab.non == 0
     # models
     if conf.hid_dim <= 0:  # no hidden layer
         self.hid_layer = None
         self.pred_input_dim = input_dim
     else:
         self.hid_layer = self.add_sub_node(
             "hid", Affine(pc, input_dim, conf.hid_dim, act=conf.hid_act))
         self.pred_input_dim = conf.hid_dim
     self.pred_out_dim = self.vocab.unk  # todo(note): UNK is the prediction boundary
     self.pred_layer = self.add_sub_node(
         "pr",
         Affine(pc,
                self.pred_input_dim,
                self.pred_out_dim,
                init_rop=NoDropRop()))
Ejemplo n.º 5
0
 def __init__(self, pc: BK.ParamCollection, conf: MaskLMNodeConf,
              vpack: VocabPackage):
     super().__init__(pc, None, None)
     self.conf = conf
     # vocab and padder
     self.word_vocab = vpack.get_voc("word")
     self.padder = DataPadder(
         2, pad_vals=self.word_vocab.pad,
         mask_range=2)  # todo(note): <pad>-id is very large
     # models
     self.hid_layer = self.add_sub_node(
         "hid", Affine(pc, conf._input_dim, conf.hid_dim, act=conf.hid_act))
     self.pred_layer = self.add_sub_node(
         "pred",
         Affine(pc,
                conf.hid_dim,
                conf.max_pred_rank + 1,
                init_rop=NoDropRop()))
     if conf.init_pred_from_pretrain:
         npvec = vpack.get_emb("word")
         if npvec is None:
             zwarn(
                 "Pretrained vector not provided, skip init pred embeddings!!"
             )
         else:
             with BK.no_grad_env():
                 self.pred_layer.ws[0].copy_(
                     BK.input_real(npvec[:conf.max_pred_rank + 1].T))
             zlog(
                 f"Init pred embeddings from pretrained vectors (size={conf.max_pred_rank+1})."
             )
Ejemplo n.º 6
0
 def __init__(self, pc: BK.ParamCollection, jconf: JPosConf, pos_vocab):
     super().__init__(pc, None, None)
     self.jpos_stacking = jconf.jpos_stacking
     self.jpos_multitask = jconf.jpos_multitask
     self.jpos_lambda = jconf.jpos_lambda
     self.jpos_decode = jconf.jpos_decode
     # encoder0
     jconf.jpos_enc._input_dim = jconf._input_dim
     self.enc = self.add_sub_node("enc0", MyEncoder(self.pc,
                                                    jconf.jpos_enc))
     self.enc_output_dim = self.enc.get_output_dims()[0]
     # output
     # todo(warn): here, include some other things for convenience
     num_labels = len(pos_vocab)
     self.pred = self.add_sub_node(
         "pred",
         Affine(self.pc,
                self.enc_output_dim,
                num_labels,
                init_rop=NoDropRop()))
     # further stacking (if not, then simply multi-task learning)
     if jconf.jpos_stacking:
         self.pos_weights = self.add_param(
             "w", (num_labels, self.enc_output_dim))  # [n, dim] to be added
     else:
         self.pos_weights = None
Ejemplo n.º 7
0
 def __init__(self,
              pc,
              input_dim,
              hid_dim: int,
              hid_act='linear',
              hid_drop=0.,
              hid_piece4init=1,
              out_dim=0,
              out_fbias=0.,
              out_fact="linear",
              out_piece4init=1,
              init_scale=1.):
     super().__init__(pc, None, None)
     # -----
     # hidden layer
     self.hid_aff = self.add_sub_node(
         "hidden",
         Affine(pc,
                input_dim,
                hid_dim,
                n_piece4init=hid_piece4init,
                init_scale=init_scale,
                init_rop=NoDropRop()))
     self.hid_act_f = ActivationHelper.get_act(hid_act)
     self.hid_drop = self.add_sub_node(
         "drop", Dropout(pc, (hid_dim, ), fix_rate=hid_drop))
     # -----
     # output layer (optional)
     self.final_output_dim = hid_dim
     # todo(+N): how about split hidden layers for each specific output
     self.out_fbias = out_fbias  # fixed extra bias
     self.out_act_f = ActivationHelper.get_act(out_fact)
     # no output dropouts
     if out_dim > 0:
         assert hid_act != "linear", "Please use non-linear activation for mlp!"
         assert hid_piece4init == 1, "Strange hid_piece4init for hidden layer with out_dim>0"
         self.final_aff = self.add_sub_node(
             "final",
             Affine(pc,
                    hid_dim,
                    out_dim,
                    n_piece4init=out_piece4init,
                    init_scale=init_scale,
                    init_rop=NoDropRop()))
         self.final_output_dim = out_dim
     else:
         self.final_aff = None
Ejemplo n.º 8
0
 def __init__(self, pc, conf: ArgSpanExpanderConf, input_enc_dims):
     super().__init__(pc, None, None)
     self.conf = conf
     # assert not conf.use_binary_scorer, "this mode seems problematic!!"
     # todo(note): only using bert's ones, simply flatten
     bert_input_dims, _ = input_enc_dims
     bert_dim, bert_fold = bert_input_dims
     flatten_bert_dim = bert_dim * bert_fold
     self.flatten_bert_dim = flatten_bert_dim
     # scoring params
     self.use_lstm_scorer = conf.use_lstm_scorer
     if self.use_lstm_scorer:
         self.llstm = self.add_sub_node(
             "llstm",
             RnnNode.get_rnn_node("lstm2", pc, flatten_bert_dim,
                                  conf.hid_dim))
         self.rlstm = self.add_sub_node(
             "rlstm",
             RnnNode.get_rnn_node("lstm2", pc, flatten_bert_dim,
                                  conf.hid_dim))
         self.lscorer = self.add_sub_node(
             "ls", Affine(pc, conf.hid_dim, 1, init_rop=NoDropRop()))
         self.rscorer = self.add_sub_node(
             "rs", Affine(pc, conf.hid_dim, 1, init_rop=NoDropRop()))
     else:
         self.lscorer = self.add_sub_node(
             "ls",
             get_mlp(pc, [flatten_bert_dim, flatten_bert_dim],
                     1,
                     hidden_which_affine=3,
                     n_hidden=conf.hid_dim,
                     n_hidden_layer=1,
                     hidden_act='elu',
                     final_act="linear",
                     final_bias=False,
                     final_init_rop=NoDropRop()))
         self.rscorer = self.add_sub_node(
             "rs",
             get_mlp(pc, [flatten_bert_dim, flatten_bert_dim],
                     1,
                     hidden_which_affine=3,
                     n_hidden=conf.hid_dim,
                     n_hidden_layer=1,
                     hidden_act='elu',
                     final_act="linear",
                     final_bias=False,
                     final_init_rop=NoDropRop()))
Ejemplo n.º 9
0
 def __init__(self, pc, conf: NodeExtractorConfGene0, vocab: HLabelVocab,
              extract_type: str):
     super().__init__(pc, conf, vocab, extract_type)
     # decoding
     # -----
     # the two parts: actually in biaffine attention forms
     # transform embeddings for attention match (token evidence)
     self.T_tok = self.add_sub_node(
         "at",
         Affine(pc,
                conf.lab_conf.n_dim,
                conf._input_dim,
                init_rop=NoDropRop()))
     # transform embeddings for global match (sent evidence)
     self.T_sent = self.add_sub_node(
         "as",
         Affine(pc,
                conf.lab_conf.n_dim,
                conf._input_dim,
                init_rop=NoDropRop()))
     # to be refreshed
     self.query_tok = None  # [L, D]
     self.query_sent = None  # [L, D]
     # -----
     # how to combine the two parts: fix lambda or dynamic gated (with the input features)
     self.lambda_score_tok = conf.lambda_score_tok
     if self.lambda_score_tok < 0.:  # auto mode: using an MLP (make hidden size equal to input//4)
         self.score_gate = self.add_sub_node(
             "mix",
             get_mlp(pc, [conf._input_dim] * 4,
                     1,
                     conf._input_dim,
                     hidden_act="elu",
                     final_act="sigmoid",
                     final_init_rop=NoDropRop(),
                     hidden_which_affine=3))
     else:
         self.score_gate = None
Ejemplo n.º 10
0
 def __init__(self, pc, conf: CandidateExtractorConf, input_enc_dims):
     super().__init__(pc, None, None)
     self.conf = conf
     # scorer
     self.adp = self.add_sub_node(
         'adp', TaskSpecAdp(pc, input_enc_dims, [], conf.hidden_dim))
     adp_hidden_size = self.adp.get_output_dims()[0]
     self.predictor = self.add_sub_node(
         'pred', Affine(pc, adp_hidden_size, 2,
                        init_rop=NoDropRop()))  # 0 as nil
     # others
     self.id_counter = defaultdict(
         int)  # docid->ef-count (make sure unique ef-id)
     self.valid_hlidx = HLabelIdx(["unk"], [1])
Ejemplo n.º 11
0
 def __init__(self, pc, conf: NodeSelectorConf):
     super().__init__(pc, None, None)
     self.conf = conf
     self.input_dim = conf._input_dim
     self.scorer = self.add_sub_node(
         "sc",
         get_mlp(pc,
                 self.input_dim,
                 1,
                 conf.mlp_hidden_dim,
                 n_hidden_layer=conf.mlp_hidden_layer,
                 hidden_act=conf.mlp_hidden_act,
                 final_init_rop=NoDropRop()))
     # loss function
     self.loss_prob, self.loss_hinge = [
         conf.ns_loss == z for z in ["prob", "hinge"]
     ]
Ejemplo n.º 12
0
 def __init__(self, pc: BK.ParamCollection, comp_name: str, ec_conf: EmbedderCompConf,
              conf: EmbedderNodeConf, vpack: VocabPackage):
     super().__init__(pc, comp_name, ec_conf, conf, vpack)
     # -----
     # get embeddings
     npvec = None
     if self.ec_conf.comp_init_from_pretrain:
         npvec = vpack.get_emb(comp_name)
         zlog(f"Try to init InputEmbedNode {comp_name} with npvec.shape={npvec.shape if (npvec is not None) else None}")
         if npvec is None:
             zwarn("Warn: cannot get pre-trained embeddings to init!!")
     # get rare unk range
     # - get freq vals, make sure special ones will not be pruned; todo(note): directly use that field
     voc_rare_mask = [float(z is not None and z<=ec_conf.comp_rare_thr) for z in self.voc.final_vals]
     self.rare_mask = BK.input_real(voc_rare_mask)
     self.use_rare_unk = (ec_conf.comp_rare_unk>0. and ec_conf.comp_rare_thr>0)
     # --
     # dropout outside explicitly
     self.E = self.add_sub_node(f"E{self.comp_name}", Embedding(
         pc, len(self.voc), self.comp_dim, fix_row0=conf.embed_fix_row0, npvec=npvec, name=comp_name,
         init_rop=NoDropRop(), init_scale=self.comp_init_scale))
     self.create_dropout_node()
Ejemplo n.º 13
0
 def __init__(self, pc: BK.ParamCollection, pname: str, input_dim: int,
              conf: SeqCrfNodeConf, inputter: Inputter):
     super().__init__(pc, conf, name="CRF")
     self.conf = conf
     self.inputter = inputter
     self.input_dim = input_dim
     # this step is performed at the embedder, thus still does not influence the inputter
     self.add_root_token = self.inputter.embedder.add_root_token
     # --
     self.pname = pname
     self.attr_name = pname + "_seq"  # attribute name in Instance
     self.vocab = inputter.vpack.get_voc(pname)
     # todo(note): we must make sure that 0 means NAN
     assert self.vocab.non == 0
     # models
     if conf.hid_dim <= 0:  # no hidden layer
         self.hid_layer = None
         self.pred_input_dim = input_dim
     else:
         self.hid_layer = self.add_sub_node(
             "hid", Affine(pc, input_dim, conf.hid_dim, act=conf.hid_act))
         self.pred_input_dim = conf.hid_dim
     self.tagset_size = self.vocab.unk  # todo(note): UNK is the prediction boundary
     self.pred_layer = self.add_sub_node(
         "pr",
         Affine(pc,
                self.pred_input_dim,
                self.tagset_size + 2,
                init_rop=NoDropRop()))
     # transition matrix
     init_transitions = np.zeros(
         [self.tagset_size + 2, self.tagset_size + 2])
     init_transitions[:, START_TAG] = -10000.0
     init_transitions[STOP_TAG, :] = -10000.0
     init_transitions[:, 0] = -10000.0
     init_transitions[0, :] = -10000.0
     self.transitions = self.add_param(
         "T", (self.tagset_size + 2, self.tagset_size + 2),
         init=init_transitions)
Ejemplo n.º 14
0
 def __init__(self,
              pc,
              conf: HLabelNodeConf,
              hl_vocab: HLabelVocab,
              eff_max_layer=None):
     super().__init__(pc, None, None)
     self.conf = conf
     self.hl_vocab = hl_vocab
     assert self.hl_vocab.nil_as_zero  # for each layer, the idx=0 is the full-NIL
     # basic pool embeddings
     npvec = hl_vocab.pool_init_vec
     if not conf.pool_init_hint:
         npvec = None
     else:
         assert npvec is not None, "pool-init not provided by the Vocab!"
     n_dim, n_pool = conf.n_dim, len(hl_vocab.pools_k)
     self.pool_pred = self.add_sub_node(
         "pp",
         Embedding(
             pc,
             n_pool,
             n_dim,
             fix_row0=conf.zero_nil,
             npvec=npvec,
             init_rop=(NoDropRop() if conf.nodrop_pred_embeds else None)))
     if conf.tie_embeds:
         self.pool_lookup = self.pool_pred
     else:
         self.pool_lookup = self.add_sub_node(
             "pl",
             Embedding(pc,
                       n_pool,
                       n_dim,
                       fix_row0=conf.zero_nil,
                       npvec=npvec,
                       init_rop=(NoDropRop()
                                 if conf.nodrop_lookup_embeds else None)))
     # layered labels embeddings (to be refreshed)
     self.max_layer = hl_vocab.max_layer
     self.layered_embeds_pred = [None] * self.max_layer
     self.layered_embeds_lookup = [None] * self.max_layer
     self.layered_prei = [
         None
     ] * self.max_layer  # previous layer i, for score combining
     self.layered_isnil = [None] * self.max_layer  # whether is nil(None)
     self.zero_nil = conf.zero_nil
     # lookup summer
     assert conf.strategy_predict == "sum"
     self.lookup_is_sum, self.lookup_is_ff = [
         conf.strategy_lookup == z for z in ["sum", "ff"]
     ]
     if self.lookup_is_ff:
         self.lookup_summer = self.add_sub_node(
             "summer",
             Affine(pc, [n_dim] * self.max_layer, n_dim, act="tanh"))
     elif self.lookup_is_sum:
         self.sum_dropout = self.add_sub_node("sdrop",
                                              Dropout(pc, (n_dim, )))
         self.lookup_summer = lambda embeds: self.sum_dropout(
             BK.stack(embeds, 0).sum(0))
     else:
         raise NotImplementedError(
             f"UNK strategy_lookup: {conf.strategy_lookup}")
     # bias for prediction
     self.prediction_sizes = [
         len(hl_vocab.layered_pool_links_padded[i])
         for i in range(self.max_layer)
     ]
     if conf.bias_predict:
         self.biases_pred = [
             self.add_param(name="B", shape=(x, ))
             for x in self.prediction_sizes
         ]
     else:
         self.biases_pred = [None] * self.max_layer
     # =====
     # training
     self.is_hinge_loss, self.is_prob_loss = [
         conf.loss_function == z for z in ["hinge", "prob"]
     ]
     self.loss_lambdas = conf.loss_lambdas + [1.] * (
         self.max_layer - len(conf.loss_lambdas))  # loss scale
     self.margin_lambdas = conf.margin_lambdas + [0.] * (
         self.max_layer - len(conf.margin_lambdas))  # margin scale
     self.lookup_soft_alphas = conf.lookup_soft_alphas + [1.] * (
         self.max_layer - len(conf.lookup_soft_alphas))
     self.loss_fullnil_weight = conf.loss_fullnil_weight
     # ======
     # set current effective max_layer
     self.eff_max_layer = self.max_layer
     if eff_max_layer is not None:
         self.set_eff_max_layer(eff_max_layer)
Ejemplo n.º 15
0
 def __init__(self, pc, dim: int, conf: VRecConf):
     super().__init__(pc, None, None)
     self.conf = conf
     self.dim = dim
     # =====
     # Feat
     if conf.feat_mod == "matt":
         self.feat_node = self.add_sub_node(
             "feat", MAttNode(pc, dim, dim, dim, conf.matt_conf))
         self.attn_count = conf.matt_conf.head_count
     elif conf.feat_mod == "fcomb":
         self.feat_node = self.add_sub_node(
             "feat", FCombNode(pc, dim, dim, dim, conf.fc_conf))
         self.attn_count = conf.fc_conf.fc_count
     else:
         raise NotImplementedError()
     feat_out_dim = self.feat_node.get_output_dims()[0]
     # =====
     # Combiner
     if conf.comb_mode == "affine":
         self.comb_aff = self.add_sub_node(
             "aff",
             AffineCombiner(pc, [dim, feat_out_dim],
                            [conf.comb_affine_q, conf.comb_affine_v],
                            dim,
                            out_act=conf.comb_affine_act,
                            out_drop=conf.comb_affine_drop))
         self.comb_f = lambda q, v, c: (self.comb_aff([q, v]), None)
     elif conf.comb_mode == "lstm":
         self.comb_lstm = self.add_sub_node(
             "lstm", LstmNode2(pc, feat_out_dim, dim))
         self.comb_f = self._call_lstm
     else:
         raise NotImplementedError()
     # =====
     # ff
     if conf.ff_dim > 0:
         self.has_ff = True
         self.linear1 = self.add_sub_node(
             "l1",
             Affine(pc,
                    dim,
                    conf.ff_dim,
                    act=conf.ff_act,
                    init_rop=NoDropRop()))
         self.dropout1 = self.add_sub_node(
             "d1", Dropout(pc, (conf.ff_dim, ), fix_rate=conf.ff_drop))
         self.linear2 = self.add_sub_node(
             "l2",
             Affine(pc,
                    conf.ff_dim,
                    dim,
                    act="linear",
                    init_rop=NoDropRop()))
         self.dropout2 = self.add_sub_node(
             "d2", Dropout(pc, (dim, ), fix_rate=conf.ff_drop))
     else:
         self.has_ff = False
     # layer norms
     if conf.use_pre_norm:
         self.att_pre_norm = self.add_sub_node("aln1", LayerNorm(pc, dim))
         self.ff_pre_norm = self.add_sub_node("fln1", LayerNorm(pc, dim))
     else:
         self.att_pre_norm = self.ff_pre_norm = None
     if conf.use_post_norm:
         self.att_post_norm = self.add_sub_node("aln2", LayerNorm(pc, dim))
         self.ff_post_norm = self.add_sub_node("fln2", LayerNorm(pc, dim))
     else:
         self.att_post_norm = self.ff_post_norm = None
Ejemplo n.º 16
0
 def __init__(self, pc: BK.ParamCollection, comp_name: str, ec_conf: EmbedderCompConf,
              conf: EmbedderNodeConf, vpack: VocabPackage):
     super().__init__(pc, comp_name, ec_conf, conf, vpack)
     # -----
     per_cnn_size = conf.char_cnn_hidden // len(conf.char_cnn_windows)
     self.char_cnns = [self.add_sub_node("char_cnn", CnnLayer(
         self.pc, self.comp_dim, per_cnn_size, z, pooling="max", act="tanh", init_rop=NoDropRop()))
                       for z in conf.char_cnn_windows]
     self.output_dim = conf.char_cnn_hidden
     self.create_dropout_node()
Ejemplo n.º 17
0
 def __init__(self, pc: BK.ParamCollection, input_dim: int,
              conf: MaskLMNodeConf, inputter: Inputter):
     super().__init__(pc, conf, name="MLM")
     self.conf = conf
     self.inputter = inputter
     self.input_dim = input_dim
     # this step is performed at the embedder, thus still does not influence the inputter
     self.add_root_token = self.inputter.embedder.add_root_token
     # vocab and padder
     vpack = inputter.vpack
     vocab_word, vocab_pos = vpack.get_voc("word"), vpack.get_voc("pos")
     # no mask fields
     self.nomask_names_set = set(conf.nomask_names)
     # models
     if conf.hid_dim <= 0:  # no hidden layer
         self.hid_layer = None
         self.pred_input_dim = input_dim
     else:
         self.hid_layer = self.add_sub_node(
             "hid", Affine(pc, input_dim, conf.hid_dim, act=conf.hid_act))
         self.pred_input_dim = conf.hid_dim
     # todo(note): unk is the first one above real words
     self.pred_word_size = min(conf.max_pred_rank + 1, vocab_word.unk)
     self.pred_pos_size = vocab_pos.unk
     if conf.tie_input_embeddings:
         zwarn("Tie all preds in mlm with input embeddings!!")
         self.pred_word_layer = self.pred_pos_layer = None
         self.inputter_word_node = self.inputter.embedder.get_node("word")
         self.inputter_pos_node = self.inputter.embedder.get_node("pos")
     else:
         self.inputter_word_node, self.inputter_pos_node = None, None
         self.pred_word_layer = self.add_sub_node(
             "pw",
             Affine(pc,
                    self.pred_input_dim,
                    self.pred_word_size,
                    init_rop=NoDropRop()))
         self.pred_pos_layer = self.add_sub_node(
             "pp",
             Affine(pc,
                    self.pred_input_dim,
                    self.pred_pos_size,
                    init_rop=NoDropRop()))
         if conf.init_pred_from_pretrain:
             npvec = vpack.get_emb("word")
             if npvec is None:
                 zwarn(
                     "Pretrained vector not provided, skip init pred embeddings!!"
                 )
             else:
                 with BK.no_grad_env():
                     self.pred_word_layer.ws[0].copy_(
                         BK.input_real(npvec[:self.pred_word_size].T))
                 zlog(
                     f"Init pred embeddings from pretrained vectors (size={self.pred_word_size})."
                 )
     # =====
     COMBINE_METHOD_FS = {
         "sum": lambda xs: BK.stack(xs, -1).sum(-1),
         "avg": lambda xs: BK.stack(xs, -1).mean(-1),
         "min": lambda xs: BK.stack(xs, -1).min(-1)[0],
         "max": lambda xs: BK.stack(xs, -1).max(-1)[0],
     }
     self.loss_comb_f = COMBINE_METHOD_FS[conf.loss_comb_method]
     self.score_comb_f = COMBINE_METHOD_FS[conf.score_comb_method]
Ejemplo n.º 18
0
 def __init__(self, pc: BK.ParamCollection, input_dim: int,
              conf: PlainLMNodeConf, inputter: Inputter):
     super().__init__(pc, conf, name="PLM")
     self.conf = conf
     self.inputter = inputter
     self.input_dim = input_dim
     self.split_input_blm = conf.split_input_blm
     # this step is performed at the embedder, thus still does not influence the inputter
     self.add_root_token = self.inputter.embedder.add_root_token
     # vocab and padder
     vpack = inputter.vpack
     vocab_word = vpack.get_voc("word")
     # models
     real_input_dim = input_dim // 2 if self.split_input_blm else input_dim
     if conf.hid_dim <= 0:  # no hidden layer
         self.l2r_hid_layer = self.r2l_hid_layer = None
         self.pred_input_dim = real_input_dim
     else:
         self.l2r_hid_layer = self.add_sub_node(
             "l2r_h",
             Affine(pc, real_input_dim, conf.hid_dim, act=conf.hid_act))
         self.r2l_hid_layer = self.add_sub_node(
             "r2l_h",
             Affine(pc, real_input_dim, conf.hid_dim, act=conf.hid_act))
         self.pred_input_dim = conf.hid_dim
     # todo(note): unk is the first one above real words
     self.pred_size = min(conf.max_pred_rank + 1, vocab_word.unk)
     if conf.tie_input_embeddings:
         zwarn("Tie all preds in plm with input embeddings!!")
         self.l2r_pred = self.r2l_pred = None
         self.inputter_embed_node = self.inputter.embedder.get_node("word")
     else:
         self.l2r_pred = self.add_sub_node(
             "l2r_p",
             Affine(pc,
                    self.pred_input_dim,
                    self.pred_size,
                    init_rop=NoDropRop()))
         if conf.tie_bidirect_pred:
             self.r2l_pred = self.l2r_pred
         else:
             self.r2l_pred = self.add_sub_node(
                 "r2l_p",
                 Affine(pc,
                        self.pred_input_dim,
                        self.pred_size,
                        init_rop=NoDropRop()))
         self.inputter_embed_node = None
         if conf.init_pred_from_pretrain:
             npvec = vpack.get_emb("word")
             if npvec is None:
                 zwarn(
                     "Pretrained vector not provided, skip init pred embeddings!!"
                 )
             else:
                 with BK.no_grad_env():
                     self.l2r_pred.ws[0].copy_(
                         BK.input_real(npvec[:self.pred_size].T))
                     self.r2l_pred.ws[0].copy_(
                         BK.input_real(npvec[:self.pred_size].T))
                 zlog(
                     f"Init pred embeddings from pretrained vectors (size={self.pred_size})."
                 )
Ejemplo n.º 19
0
 def __init__(self, pc, conf: NodeExtractorConfGene1, vocab: HLabelVocab, extract_type: str):
     super().__init__(pc, conf, vocab, extract_type)
     # -----
     # decoding
     # 1. attention for selecting token
     self.affine_k = self.add_sub_node("ak", Affine(pc, [conf._input_dim, 1], conf.hid_att,
                                                    bias=False, which_affine=3, init_rop=NoDropRop()))
     self.affine_q = self.add_sub_node("aq", Affine(pc, [conf.hid_repo, conf.hid_state], conf.hid_att,
                                                    bias=False, which_affine=3, init_rop=NoDropRop()))
     self.repos = self.add_param("r", [conf.num_repo, conf.hid_repo], lookup=True)
     # input is (last_hid_layer + lab_embed)
     self.rnn_unit = self.add_sub_node("rnn", RnnNode.get_rnn_node("lstm2", pc, 2*conf.lab_conf.n_dim, conf.hid_state))
     # 2. labeling
     self.lab_f = self.add_sub_node("lab", Affine(pc, [conf._input_dim, 1, conf.hid_repo, conf.hid_state],
                                                  conf.lab_conf.n_dim, which_affine=3, act="elu"))