コード例 #1
0
ファイル: base.py プロジェクト: ValentinaPy/zmsp
 def __init__(self,
              pc,
              input_dims: List[int],
              use_affs: List[bool],
              out_dim: int,
              out_act='linear',
              out_drop=0.,
              param_init_scale=1.):
     super().__init__(pc, None, None)
     # -----
     self.input_dims = input_dims
     self.use_affs = use_affs
     self.out_dim = out_dim
     # =====
     assert len(input_dims) == len(use_affs)
     self.aff_nodes = []
     for d, use in zip(input_dims, use_affs):
         if use:
             one_aff = self.add_sub_node(
                 "aff",
                 Affine(pc,
                        d,
                        out_dim,
                        init_scale=param_init_scale,
                        init_rop=NoDropRop()))
         else:
             assert d == out_dim, f"Dimension mismatch for skipping affine: {d} vs {out_dim}!"
             one_aff = None
         self.aff_nodes.append(one_aff)
     self.out_act_f = ActivationHelper.get_act(out_act)
     self.out_drop = self.add_sub_node(
         "drop", Dropout(pc, (out_dim, ), fix_rate=out_drop))
コード例 #2
0
ファイル: embedder.py プロジェクト: ValentinaPy/zmsp
 def create_dropout_node(self):
     if self.comp_dropout > 0.:
         self.dropout = self.add_sub_node(
             f"D{self.comp_name}", Dropout(self.pc, (self.output_dim,), fix_rate=self.comp_dropout))
     else:
         self.dropout = lambda x: x
     return self.dropout
コード例 #3
0
ファイル: head.py プロジェクト: ValentinaPy/zmsp
 def __init__(self, pc, conf: NodeExtractorConfHead, vocab: HLabelVocab,
              extract_type: str):
     super().__init__(pc, conf, vocab, extract_type)
     # node selector
     conf.sel_conf._input_dim = conf._input_dim  # make dims fit
     self.sel: NodeSelector = self.add_sub_node(
         "sel", NodeSelector(pc, conf.sel_conf))
     # encoding
     self.dmxnn = conf.dmxnn
     self.posi_embed = self.add_sub_node(
         "pe", RelPosiEmbedding(pc, conf.posi_dim, max=conf.posi_cut))
     if self.dmxnn:
         conf.e_enc._input_dim = conf._input_dim + conf.posi_dim
     else:
         conf.e_enc._input_dim = conf._input_dim
     self.e_encoder = self.add_sub_node("ee", MyEncoder(pc, conf.e_enc))
     e_enc_dim = self.e_encoder.get_output_dims()[0]
     # decoding
     # todo(note): dropout after pooling; todo(+N): cannot go to previous layers if there are no encoders
     self.special_drop = self.add_sub_node("sd", Dropout(pc, (e_enc_dim, )))
     self.use_lab_f = conf.use_lab_f
     self.lab_f_use_lexi = conf.lab_f_use_lexi
     if self.use_lab_f:
         lab_f_input_dims = [e_enc_dim] * 3 if self.dmxnn else [e_enc_dim]
         if self.lab_f_use_lexi:
             lab_f_input_dims.append(conf._lexi_dim)
         self.lab_f = self.add_sub_node(
             "lab",
             Affine(pc,
                    lab_f_input_dims,
                    conf.lab_conf.n_dim,
                    act=conf.lab_f_act))
     else:
         self.lab_f = lambda x: x[0]  # only use the first one
     # secondary type
     self.use_secondary_type = conf.use_secondary_type
     if self.use_secondary_type:
         # todo(note): re-use vocab; or totally reuse the predictor?
         if conf.sectype_reuse_hl:
             self.hl2: HLabelNode = self.hl
         else:
             new_lab_conf = deepcopy(conf.lab_conf)
             new_lab_conf.zero_nil = False  # todo(note): not zero_nil here!
             self.hl2: HLabelNode = self.add_sub_node(
                 "hl", HLabelNode(pc, new_lab_conf, vocab))
         # enc+t1 -> t2
         self.t1tot2 = self.add_sub_node(
             "1to2", Embedding(pc, self.hl_output_size,
                               conf.lab_conf.n_dim))
     else:
         self.hl2 = None
         self.t1tot2 = None
コード例 #4
0
ファイル: base.py プロジェクト: ValentinaPy/zmsp
 def __init__(self,
              pc,
              input_dim,
              hid_dim: int,
              hid_act='linear',
              hid_drop=0.,
              hid_piece4init=1,
              out_dim=0,
              out_fbias=0.,
              out_fact="linear",
              out_piece4init=1,
              init_scale=1.):
     super().__init__(pc, None, None)
     # -----
     # hidden layer
     self.hid_aff = self.add_sub_node(
         "hidden",
         Affine(pc,
                input_dim,
                hid_dim,
                n_piece4init=hid_piece4init,
                init_scale=init_scale,
                init_rop=NoDropRop()))
     self.hid_act_f = ActivationHelper.get_act(hid_act)
     self.hid_drop = self.add_sub_node(
         "drop", Dropout(pc, (hid_dim, ), fix_rate=hid_drop))
     # -----
     # output layer (optional)
     self.final_output_dim = hid_dim
     # todo(+N): how about split hidden layers for each specific output
     self.out_fbias = out_fbias  # fixed extra bias
     self.out_act_f = ActivationHelper.get_act(out_fact)
     # no output dropouts
     if out_dim > 0:
         assert hid_act != "linear", "Please use non-linear activation for mlp!"
         assert hid_piece4init == 1, "Strange hid_piece4init for hidden layer with out_dim>0"
         self.final_aff = self.add_sub_node(
             "final",
             Affine(pc,
                    hid_dim,
                    out_dim,
                    n_piece4init=out_piece4init,
                    init_scale=init_scale,
                    init_rop=NoDropRop()))
         self.final_output_dim = out_dim
     else:
         self.final_aff = None
コード例 #5
0
ファイル: c2_norm.py プロジェクト: ValentinaPy/zmsp
 def __init__(self, pc: BK.ParamCollection, head_count,
              conf: MAttNormerConf):
     super().__init__(pc, None, None)
     self.conf = conf
     self.head_count = head_count
     # -----
     self._norm_f = getattr(self, "_norm_" + conf.norm_mode)  # shortcut
     self._norm_dims = {
         'flatten': [-1],
         'head': [-1],
         'cand': [-2],
         'head_cand': [-1, -2],
         'binary': [-1]
     }[conf.norm_mode]
     self.norm_prune = conf.norm_prune
     # cnode: special attention
     self.cnode = self.add_sub_node("cn", ConcreteNode(pc, conf.cconf))
     # attention dropout: # no-fix attention dropout, not elegant here
     rr = NoFixRop()
     self.adrop = self.add_sub_node(
         "adrop", Dropout(pc, (), init_rop=rr, fix_rate=conf.attn_dropout))
コード例 #6
0
ファイル: mtl.py プロジェクト: ValentinaPy/zmsp
 def __init__(self, conf: MtlMlmModelConf, vpack: VocabPackage):
     super().__init__(conf)
     # for easier checking
     self.word_vocab = vpack.get_voc("word")
     # components
     self.embedder = self.add_node("emb", EmbedderNode(self.pc, conf.emb_conf, vpack))
     self.inputter = Inputter(self.embedder, vpack)  # not a node
     self.emb_out_dim = self.embedder.get_output_dims()[0]
     self.enc_attn_count = conf.default_attn_count
     if conf.enc_choice == "vrec":
         self.encoder = self.add_component("enc", VRecEncoder(self.pc, self.emb_out_dim, conf.venc_conf))
         self.enc_attn_count = self.encoder.attn_count
     elif conf.enc_choice == "original":
         conf.oenc_conf._input_dim = self.emb_out_dim
         self.encoder = self.add_node("enc", MyEncoder(self.pc, conf.oenc_conf))
     else:
         raise NotImplementedError()
     zlog(f"Finished building model's encoder {self.encoder}, all size is {self.encoder.count_allsize_parameters()}")
     self.enc_out_dim = self.encoder.get_output_dims()[0]
     # --
     conf.rprep_conf._rprep_vr_conf.matt_conf.head_count = self.enc_attn_count  # make head-count agree
     self.rpreper = self.add_node("rprep", RPrepNode(self.pc, self.enc_out_dim, conf.rprep_conf))
     # --
     self.lambda_agree = self.add_scheduled_value(ScheduledValue(f"agr:lambda", conf.lambda_agree))
     self.agree_loss_f = EntropyHelper.get_method(conf.agree_loss_f)
     # --
     self.masklm = self.add_component("mlm", MaskLMNode(self.pc, self.enc_out_dim, conf.mlm_conf, self.inputter))
     self.plainlm = self.add_component("plm", PlainLMNode(self.pc, self.enc_out_dim, conf.plm_conf, self.inputter))
     # todo(note): here we use attn as dim_pair, do not use pair if not using vrec!!
     self.orderpr = self.add_component("orp", OrderPredNode(
         self.pc, self.enc_out_dim, self.enc_attn_count, conf.orp_conf, self.inputter))
     # =====
     # pre-training pre-load point!!
     if conf.load_pretrain_model_name:
         zlog(f"At preload_pretrain point: Loading from {conf.load_pretrain_model_name}")
         self.pc.load(conf.load_pretrain_model_name, strict=False)
     # =====
     self.dpar = self.add_component("dpar", DparG1Decoder(
         self.pc, self.enc_out_dim, self.enc_attn_count, conf.dpar_conf, self.inputter))
     self.upos = self.add_component("upos", SeqLabNode(
         self.pc, "pos", self.enc_out_dim, self.conf.upos_conf, self.inputter))
     if conf.do_ner:
         if conf.ner_use_crf:
             self.ner = self.add_component("ner", SeqCrfNode(
                 self.pc, "ner", self.enc_out_dim, self.conf.ner_conf, self.inputter))
         else:
             self.ner = self.add_component("ner", SeqLabNode(
                 self.pc, "ner", self.enc_out_dim, self.conf.ner_conf, self.inputter))
     else:
         self.ner = None
     # for pairwise reprs (no trainable params here!)
     self.rel_dist_embed = self.add_node("oremb", PosiEmbedding2(self.pc, n_dim=self.enc_attn_count, max_val=100))
     self._prepr_f_attn_sum = lambda cache, rdist: BK.stack(cache.list_attn, 0).sum(0) if (len(cache.list_attn))>0 else None
     self._prepr_f_attn_avg = lambda cache, rdist: BK.stack(cache.list_attn, 0).mean(0) if (len(cache.list_attn))>0 else None
     self._prepr_f_attn_max = lambda cache, rdist: BK.stack(cache.list_attn, 0).max(0)[0] if (len(cache.list_attn))>0 else None
     self._prepr_f_attn_last = lambda cache, rdist: cache.list_attn[-1] if (len(cache.list_attn))>0 else None
     self._prepr_f_rdist = lambda cache, rdist: self._get_rel_dist_embed(rdist, False)
     self._prepr_f_rdist_abs = lambda cache, rdist: self._get_rel_dist_embed(rdist, True)
     self.prepr_f = getattr(self, "_prepr_f_"+conf.prepr_choice)  # shortcut
     # --
     self.testing_rand_gen = Random.create_sep_generator(conf.testing_rand_gen_seed)  # especial gen for testing
     # =====
     if conf.orp_loss_special:
         self.orderpr.add_node_special(self.masklm)
     # =====
     # extra one!!
     self.aug_word2 = self.aug_encoder = self.aug_mixturer = None
     if conf.aug_word2:
         self.aug_word2 = self.add_node("aug2", AugWord2Node(self.pc, conf.emb_conf, vpack,
                                                             "word2", conf.aug_word2_dim, self.emb_out_dim))
         if conf.aug_word2_aug_encoder:
             assert conf.enc_choice == "vrec"
             self.aug_detach_drop = self.add_node("dd", Dropout(self.pc, (self.enc_out_dim,), fix_rate=conf.aug_detach_dropout))
             self.aug_encoder = self.add_component("Aenc", VRecEncoder(self.pc, self.emb_out_dim, conf.venc_conf))
             self.aug_mixturer = self.add_node("Amix", BertFeaturesWeightLayer(self.pc, conf.aug_detach_numlayer))
コード例 #7
0
ファイル: embedder.py プロジェクト: zzsfornlp/zmsp
 def __init__(self, pc: BK.ParamCollection, econf: EmbedConf,
              vpack: VocabPackage):
     super().__init__(pc, None, None)
     self.conf = econf
     #
     repr_sizes = []
     # word
     self.has_word = (econf.dim_word > 0)
     if self.has_word:
         npvec = vpack.get_emb(
             "word") if econf.init_words_from_pretrain else None
         self.word_embed = self.add_sub_node(
             "ew",
             Embedding(self.pc,
                       len(vpack.get_voc("word")),
                       econf.dim_word,
                       npvec=npvec,
                       name="word",
                       freeze=econf.word_freeze))
         repr_sizes.append(econf.dim_word)
     # char
     self.has_char = (econf.dim_char > 0)
     if self.has_char:
         # todo(warn): cnns will also use emb's drop?
         self.char_embed = self.add_sub_node(
             "ec",
             Embedding(self.pc,
                       len(vpack.get_voc("char")),
                       econf.dim_char,
                       name="char"))
         per_cnn_size = econf.char_cnn_hidden // len(econf.char_cnn_windows)
         self.char_cnns = [
             self.add_sub_node(
                 "cnnc",
                 CnnLayer(self.pc,
                          econf.dim_char,
                          per_cnn_size,
                          z,
                          pooling="max",
                          act="tanh")) for z in econf.char_cnn_windows
         ]
         repr_sizes.append(econf.char_cnn_hidden)
     # posi: absolute positional embeddings
     self.has_posi = (econf.dim_posi > 0)
     if self.has_posi:
         self.posi_embed = self.add_sub_node(
             "ep",
             PosiEmbedding(self.pc, econf.dim_posi, econf.posi_clip,
                           econf.posi_fix_sincos, econf.posi_freeze))
         repr_sizes.append(econf.dim_posi)
     # extras: like POS, ...
     self.dim_extras = econf.dim_extras
     self.extra_names = econf.extra_names
     zcheck(
         len(self.dim_extras) == len(self.extra_names),
         "Unmatched dims and names!")
     self.extra_embeds = []
     for one_extra_dim, one_name in zip(self.dim_extras, self.extra_names):
         self.extra_embeds.append(
             self.add_sub_node(
                 "ext",
                 Embedding(self.pc,
                           len(vpack.get_voc(one_name)),
                           one_extra_dim,
                           npvec=vpack.get_emb(one_name, None),
                           name="extra:" + one_name)))
         repr_sizes.append(one_extra_dim)
     # auxes
     self.dim_auxes = econf.dim_auxes
     self.fold_auxes = econf.fold_auxes
     self.aux_overall_gammas = []
     self.aux_fold_lambdas = []
     for one_aux_dim, one_aux_fold in zip(self.dim_auxes, self.fold_auxes):
         repr_sizes.append(one_aux_dim)
         # aux gamma and fold trainable lambdas
         self.aux_overall_gammas.append(self.add_param("AG", (),
                                                       1.))  # scalar
         self.aux_fold_lambdas.append(
             self.add_param(
                 "AL", (), [1. / one_aux_fold
                            for _ in range(one_aux_fold)]))  # [#fold]
     # =====
     # another projection layer? & set final dim
     if len(repr_sizes) <= 0:
         zwarn("No inputs??")
     # zcheck(len(repr_sizes)>0, "No inputs?")
     self.repr_sizes = repr_sizes
     self.has_proj = (econf.emb_proj_dim > 0)
     if self.has_proj:
         proj_layer = Affine(self.pc, sum(repr_sizes), econf.emb_proj_dim)
         if econf.emb_proj_norm:
             norm_layer = LayerNorm(self.pc, econf.emb_proj_dim)
             self.final_layer = self.add_sub_node(
                 "fl", Sequential(self.pc, [proj_layer, norm_layer]))
         else:
             self.final_layer = self.add_sub_node("fl", proj_layer)
         self.output_dim = econf.emb_proj_dim
     else:
         self.final_layer = None
         self.output_dim = sum(repr_sizes)
     # =====
     # special MdDropout: dropout the entire last dim (for word, char, extras, but not posi)
     self.dropmd_word = self.add_sub_node("md", DropoutLastN(pc, lastn=1))
     self.dropmd_char = self.add_sub_node("md", DropoutLastN(pc, lastn=1))
     self.dropmd_extras = [
         self.add_sub_node("md", DropoutLastN(pc, lastn=1))
         for _ in self.extra_names
     ]
     # dropouts for aux
     self.drop_auxes = [
         self.add_sub_node("aux", Dropout(pc, (one_aux_dim, )))
         for one_aux_dim in self.dim_auxes
     ]
コード例 #8
0
 def __init__(self,
              pc,
              conf: HLabelNodeConf,
              hl_vocab: HLabelVocab,
              eff_max_layer=None):
     super().__init__(pc, None, None)
     self.conf = conf
     self.hl_vocab = hl_vocab
     assert self.hl_vocab.nil_as_zero  # for each layer, the idx=0 is the full-NIL
     # basic pool embeddings
     npvec = hl_vocab.pool_init_vec
     if not conf.pool_init_hint:
         npvec = None
     else:
         assert npvec is not None, "pool-init not provided by the Vocab!"
     n_dim, n_pool = conf.n_dim, len(hl_vocab.pools_k)
     self.pool_pred = self.add_sub_node(
         "pp",
         Embedding(
             pc,
             n_pool,
             n_dim,
             fix_row0=conf.zero_nil,
             npvec=npvec,
             init_rop=(NoDropRop() if conf.nodrop_pred_embeds else None)))
     if conf.tie_embeds:
         self.pool_lookup = self.pool_pred
     else:
         self.pool_lookup = self.add_sub_node(
             "pl",
             Embedding(pc,
                       n_pool,
                       n_dim,
                       fix_row0=conf.zero_nil,
                       npvec=npvec,
                       init_rop=(NoDropRop()
                                 if conf.nodrop_lookup_embeds else None)))
     # layered labels embeddings (to be refreshed)
     self.max_layer = hl_vocab.max_layer
     self.layered_embeds_pred = [None] * self.max_layer
     self.layered_embeds_lookup = [None] * self.max_layer
     self.layered_prei = [
         None
     ] * self.max_layer  # previous layer i, for score combining
     self.layered_isnil = [None] * self.max_layer  # whether is nil(None)
     self.zero_nil = conf.zero_nil
     # lookup summer
     assert conf.strategy_predict == "sum"
     self.lookup_is_sum, self.lookup_is_ff = [
         conf.strategy_lookup == z for z in ["sum", "ff"]
     ]
     if self.lookup_is_ff:
         self.lookup_summer = self.add_sub_node(
             "summer",
             Affine(pc, [n_dim] * self.max_layer, n_dim, act="tanh"))
     elif self.lookup_is_sum:
         self.sum_dropout = self.add_sub_node("sdrop",
                                              Dropout(pc, (n_dim, )))
         self.lookup_summer = lambda embeds: self.sum_dropout(
             BK.stack(embeds, 0).sum(0))
     else:
         raise NotImplementedError(
             f"UNK strategy_lookup: {conf.strategy_lookup}")
     # bias for prediction
     self.prediction_sizes = [
         len(hl_vocab.layered_pool_links_padded[i])
         for i in range(self.max_layer)
     ]
     if conf.bias_predict:
         self.biases_pred = [
             self.add_param(name="B", shape=(x, ))
             for x in self.prediction_sizes
         ]
     else:
         self.biases_pred = [None] * self.max_layer
     # =====
     # training
     self.is_hinge_loss, self.is_prob_loss = [
         conf.loss_function == z for z in ["hinge", "prob"]
     ]
     self.loss_lambdas = conf.loss_lambdas + [1.] * (
         self.max_layer - len(conf.loss_lambdas))  # loss scale
     self.margin_lambdas = conf.margin_lambdas + [0.] * (
         self.max_layer - len(conf.margin_lambdas))  # margin scale
     self.lookup_soft_alphas = conf.lookup_soft_alphas + [1.] * (
         self.max_layer - len(conf.lookup_soft_alphas))
     self.loss_fullnil_weight = conf.loss_fullnil_weight
     # ======
     # set current effective max_layer
     self.eff_max_layer = self.max_layer
     if eff_max_layer is not None:
         self.set_eff_max_layer(eff_max_layer)
コード例 #9
0
 def __init__(self, pc, dim: int, conf: VRecConf):
     super().__init__(pc, None, None)
     self.conf = conf
     self.dim = dim
     # =====
     # Feat
     if conf.feat_mod == "matt":
         self.feat_node = self.add_sub_node(
             "feat", MAttNode(pc, dim, dim, dim, conf.matt_conf))
         self.attn_count = conf.matt_conf.head_count
     elif conf.feat_mod == "fcomb":
         self.feat_node = self.add_sub_node(
             "feat", FCombNode(pc, dim, dim, dim, conf.fc_conf))
         self.attn_count = conf.fc_conf.fc_count
     else:
         raise NotImplementedError()
     feat_out_dim = self.feat_node.get_output_dims()[0]
     # =====
     # Combiner
     if conf.comb_mode == "affine":
         self.comb_aff = self.add_sub_node(
             "aff",
             AffineCombiner(pc, [dim, feat_out_dim],
                            [conf.comb_affine_q, conf.comb_affine_v],
                            dim,
                            out_act=conf.comb_affine_act,
                            out_drop=conf.comb_affine_drop))
         self.comb_f = lambda q, v, c: (self.comb_aff([q, v]), None)
     elif conf.comb_mode == "lstm":
         self.comb_lstm = self.add_sub_node(
             "lstm", LstmNode2(pc, feat_out_dim, dim))
         self.comb_f = self._call_lstm
     else:
         raise NotImplementedError()
     # =====
     # ff
     if conf.ff_dim > 0:
         self.has_ff = True
         self.linear1 = self.add_sub_node(
             "l1",
             Affine(pc,
                    dim,
                    conf.ff_dim,
                    act=conf.ff_act,
                    init_rop=NoDropRop()))
         self.dropout1 = self.add_sub_node(
             "d1", Dropout(pc, (conf.ff_dim, ), fix_rate=conf.ff_drop))
         self.linear2 = self.add_sub_node(
             "l2",
             Affine(pc,
                    conf.ff_dim,
                    dim,
                    act="linear",
                    init_rop=NoDropRop()))
         self.dropout2 = self.add_sub_node(
             "d2", Dropout(pc, (dim, ), fix_rate=conf.ff_drop))
     else:
         self.has_ff = False
     # layer norms
     if conf.use_pre_norm:
         self.att_pre_norm = self.add_sub_node("aln1", LayerNorm(pc, dim))
         self.ff_pre_norm = self.add_sub_node("fln1", LayerNorm(pc, dim))
     else:
         self.att_pre_norm = self.ff_pre_norm = None
     if conf.use_post_norm:
         self.att_post_norm = self.add_sub_node("aln2", LayerNorm(pc, dim))
         self.ff_post_norm = self.add_sub_node("fln2", LayerNorm(pc, dim))
     else:
         self.att_post_norm = self.ff_post_norm = None