def __init__(self, pc: BK.ParamCollection, conf: MaskLMNodeConf, vpack: VocabPackage): super().__init__(pc, None, None) self.conf = conf # vocab and padder self.word_vocab = vpack.get_voc("word") self.padder = DataPadder( 2, pad_vals=self.word_vocab.pad, mask_range=2) # todo(note): <pad>-id is very large # models self.hid_layer = self.add_sub_node( "hid", Affine(pc, conf._input_dim, conf.hid_dim, act=conf.hid_act)) self.pred_layer = self.add_sub_node( "pred", Affine(pc, conf.hid_dim, conf.max_pred_rank + 1, init_rop=NoDropRop())) if conf.init_pred_from_pretrain: npvec = vpack.get_emb("word") if npvec is None: zwarn( "Pretrained vector not provided, skip init pred embeddings!!" ) else: with BK.no_grad_env(): self.pred_layer.ws[0].copy_( BK.input_real(npvec[:conf.max_pred_rank + 1].T)) zlog( f"Init pred embeddings from pretrained vectors (size={conf.max_pred_rank+1})." )
def __init__(self, pc: BK.ParamCollection, conf: FpEncConf, vpack: VocabPackage): super().__init__(pc, None, None) self.conf = conf # ===== Vocab ===== self.word_vocab = vpack.get_voc("word") self.char_vocab = vpack.get_voc("char") self.pos_vocab = vpack.get_voc("pos") # avoid no params error self._tmp_v = self.add_param("nope", (1, )) # ===== Model ===== # embedding self.emb = self.add_sub_node("emb", MyEmbedder(self.pc, conf.emb_conf, vpack)) self.emb_output_dim = self.emb.get_output_dims()[0] # bert self.bert = self.add_sub_node("bert", Berter2(self.pc, conf.bert_conf)) self.bert_output_dim = self.bert.get_output_dims()[0] # make sure there are inputs assert self.emb_output_dim > 0 or self.bert_output_dim > 0 # middle? if conf.middle_dim > 0: self.middle_node = self.add_sub_node( "mid", Affine(self.pc, self.emb_output_dim + self.bert_output_dim, conf.middle_dim, act="elu")) self.enc_input_dim = conf.middle_dim else: self.middle_node = None self.enc_input_dim = self.emb_output_dim + self.bert_output_dim # concat the two parts (if needed) # encoder? # todo(note): feed compute-on-the-fly hp conf.enc_conf._input_dim = self.enc_input_dim self.enc = self.add_sub_node("enc", MyEncoder(self.pc, conf.enc_conf)) self.enc_output_dim = self.enc.get_output_dims()[0] # ===== Input Specification ===== # inputs (word, char, pos) and vocabulary self.need_word = self.emb.has_word self.need_char = self.emb.has_char # todo(warn): currently only allow extra fields for POS self.need_pos = False if len(self.emb.extra_names) > 0: assert len( self.emb.extra_names) == 1 and self.emb.extra_names[0] == "pos" self.need_pos = True # self.word_padder = DataPadder(2, pad_vals=self.word_vocab.pad, mask_range=2) self.char_padder = DataPadder(3, pad_lens=(0, 0, conf.char_max_length), pad_vals=self.char_vocab.pad) self.pos_padder = DataPadder(2, pad_vals=self.pos_vocab.pad)
def __init__(self, pc: BK.ParamCollection, bconf: BTConf, vpack: VocabPackage): super().__init__(pc, None, None) self.bconf = bconf # ===== Vocab ===== self.word_vocab = vpack.get_voc("word") self.char_vocab = vpack.get_voc("char") self.pos_vocab = vpack.get_voc("pos") # ===== Model ===== # embedding self.emb = self.add_sub_node( "emb", MyEmbedder(self.pc, bconf.emb_conf, vpack)) emb_output_dim = self.emb.get_output_dims()[0] # encoder0 for jpos # todo(note): will do nothing if not use_jpos bconf.jpos_conf._input_dim = emb_output_dim self.jpos_enc = self.add_sub_node( "enc0", JPosModule(self.pc, bconf.jpos_conf, self.pos_vocab)) enc0_output_dim = self.jpos_enc.get_output_dims()[0] # encoder # todo(0): feed compute-on-the-fly hp bconf.enc_conf._input_dim = enc0_output_dim self.enc = self.add_sub_node("enc", MyEncoder(self.pc, bconf.enc_conf)) self.enc_output_dim = self.enc.get_output_dims()[0] # ===== Input Specification ===== # inputs (word, char, pos) and vocabulary self.need_word = self.emb.has_word self.need_char = self.emb.has_char # todo(warn): currently only allow extra fields for POS self.need_pos = False if len(self.emb.extra_names) > 0: assert len( self.emb.extra_names) == 1 and self.emb.extra_names[0] == "pos" self.need_pos = True # todo(warn): currently only allow one aux field self.need_aux = False if len(self.emb.dim_auxes) > 0: assert len(self.emb.dim_auxes) == 1 self.need_aux = True # self.word_padder = DataPadder(2, pad_vals=self.word_vocab.pad, mask_range=2) self.char_padder = DataPadder(3, pad_lens=(0, 0, bconf.char_max_length), pad_vals=self.char_vocab.pad) self.pos_padder = DataPadder(2, pad_vals=self.pos_vocab.pad) # self.random_sample_stream = Random.stream(Random.random_sample)
def __init__(self, conf: FpParserConf, vpack: VocabPackage): self.conf = conf self.vpack = vpack tconf = conf.tconf # ===== Vocab ===== self.label_vocab = vpack.get_voc("label") # ===== Model ===== self.pc = BK.ParamCollection(True) # bottom-part: input + encoder self.enc = FpEncoder(self.pc, conf.encoder_conf, vpack) self.enc_output_dim = self.enc.get_output_dims()[0] self.enc_lrf_sv = ScheduledValue("enc_lrf", tconf.enc_lrf) self.pc.optimizer_set(tconf.enc_optim.optim, self.enc_lrf_sv, tconf.enc_optim, params=self.enc.get_parameters(), check_repeat=True, check_full=True) # middle-part: structured layer at the middle (build later for convenient re-loading) self.slayer = self.build_slayer() self.mid_lrf_sv = ScheduledValue("mid_lrf", tconf.mid_lrf) if self.slayer is not None: self.pc.optimizer_set(tconf.mid_optim.optim, self.mid_lrf_sv, tconf.mid_optim, params=self.slayer.get_parameters(), check_repeat=True, check_full=True) # upper-part: decoder self.dec = self.build_decoder() self.dec_lrf_sv = ScheduledValue("dec_lrf", tconf.dec_lrf) self.pc.optimizer_set(tconf.dec_optim.optim, self.dec_lrf_sv, tconf.dec_optim, params=self.dec.get_parameters(), check_repeat=True, check_full=True) # extra aux loss conf.masklm_conf._input_dim = self.enc_output_dim self.masklm = MaskLMNode(self.pc, conf.masklm_conf, vpack) self.pc.optimizer_set(tconf.dec_optim.optim, self.dec_lrf_sv, tconf.dec_optim, params=self.masklm.get_parameters(), check_repeat=True, check_full=True) # ===== For training ===== # schedule values self.margin = ScheduledValue("margin", tconf.margin) self.lambda_parse = ScheduledValue("lambda_parse", conf.lambda_parse) self.lambda_masklm = ScheduledValue("lambda_masklm", conf.lambda_masklm) self._scheduled_values = [ self.margin, self.enc_lrf_sv, self.mid_lrf_sv, self.dec_lrf_sv, self.lambda_parse, self.lambda_masklm ] # for refreshing dropouts self.previous_refresh_training = True
def __init__(self, conf: BaseParserConf, vpack: VocabPackage): self.conf = conf self.vpack = vpack tconf = conf.tconf # ===== Vocab ===== self.label_vocab = vpack.get_voc("label") # ===== Model ===== self.pc = BK.ParamCollection(conf.new_name_conv) # bottom-part: input + encoder self.bter = ParserBT(self.pc, conf.bt_conf, vpack) self.enc_output_dim = self.bter.get_output_dims()[0] self.enc_lrf_sv = ScheduledValue("enc_lrf", tconf.enc_lrf) self.pc.optimizer_set(tconf.enc_optim.optim, self.enc_lrf_sv, tconf.enc_optim, params=self.bter.get_parameters(), check_repeat=True, check_full=True) # upper-part: decoder # todo(+2): very ugly here! self.scorer = self.build_decoder() self.dec_lrf_sv = ScheduledValue("dec_lrf", tconf.dec_lrf) self.dec2_lrf_sv = ScheduledValue("dec2_lrf", tconf.dec2_lrf) try: params, params2 = self.scorer.get_split_params() self.pc.optimizer_set(tconf.dec_optim.optim, self.dec_lrf_sv, tconf.dec_optim, params=params, check_repeat=True, check_full=False) self.pc.optimizer_set(tconf.dec2_optim.optim, self.dec2_lrf_sv, tconf.dec2_optim, params=params2, check_repeat=True, check_full=True) except: self.pc.optimizer_set(tconf.dec_optim.optim, self.dec_lrf_sv, tconf.dec_optim, params=self.scorer.get_parameters(), check_repeat=True, check_full=True) # middle-part: structured layer at the middle (build later for convenient re-loading) self.slayer = None self.mid_lrf_sv = ScheduledValue("mid_lrf", tconf.mid_lrf) # ===== For training ===== # schedule values self.margin = ScheduledValue("margin", tconf.margin) self.sched_sampling = ScheduledValue("ss", tconf.sched_sampling) self._scheduled_values = [ self.margin, self.sched_sampling, self.enc_lrf_sv, self.dec_lrf_sv, self.dec2_lrf_sv, self.mid_lrf_sv ] self.reg_scores_lambda = conf.tconf.reg_scores_lambda # for refreshing dropouts self.previous_refresh_training = True
def __init__(self, pc: BK.ParamCollection, comp_name: str, ec_conf: EmbedderCompConf, conf: EmbedderNodeConf, vpack: VocabPackage): super().__init__(pc, None, None) # ----- self.ec_conf = ec_conf self.conf = conf self.comp_name, self.comp_dim, self.comp_dropout, self.comp_init_scale = \ comp_name, ec_conf.comp_dim, ec_conf.comp_drop, ec_conf.comp_init_scale self.voc = vpack.get_voc(comp_name, None) self.output_dim = self.comp_dim # by default the input size (may be changed later) self.dropout = None # created later (after deciding output shape)
def __init__(self, pc: BK.ParamCollection, comp_name: str, ec_conf: EmbedderCompConf, conf: EmbedderNodeConf, vpack: VocabPackage): super().__init__(pc, comp_name, ec_conf, conf, vpack) # ----- # get embeddings npvec = None if self.ec_conf.comp_init_from_pretrain: npvec = vpack.get_emb(comp_name) zlog(f"Try to init InputEmbedNode {comp_name} with npvec.shape={npvec.shape if (npvec is not None) else None}") if npvec is None: zwarn("Warn: cannot get pre-trained embeddings to init!!") # get rare unk range # - get freq vals, make sure special ones will not be pruned; todo(note): directly use that field voc_rare_mask = [float(z is not None and z<=ec_conf.comp_rare_thr) for z in self.voc.final_vals] self.rare_mask = BK.input_real(voc_rare_mask) self.use_rare_unk = (ec_conf.comp_rare_unk>0. and ec_conf.comp_rare_thr>0) # -- # dropout outside explicitly self.E = self.add_sub_node(f"E{self.comp_name}", Embedding( pc, len(self.voc), self.comp_dim, fix_row0=conf.embed_fix_row0, npvec=npvec, name=comp_name, init_rop=NoDropRop(), init_scale=self.comp_init_scale)) self.create_dropout_node()
def __init__(self, pc: BK.ParamCollection, bconf: BTConf, tconf: 'BaseTrainingConf', vpack: VocabPackage): super().__init__(pc, None, None) self.bconf = bconf # ===== Vocab ===== self.word_vocab = vpack.get_voc("word") self.char_vocab = vpack.get_voc("char") self.lemma_vocab = vpack.get_voc("lemma") self.upos_vocab = vpack.get_voc("upos") self.ulabel_vocab = vpack.get_voc("ulabel") # ===== Model ===== # embedding self.emb = self.add_sub_node("emb", MyEmbedder(self.pc, bconf.emb_conf, vpack)) emb_output_dim = self.emb.get_output_dims()[0] self.emb_output_dim = emb_output_dim # doc hint self.use_doc_hint = bconf.use_doc_hint self.dh_combine_method = bconf.dh_combine_method if self.use_doc_hint: assert len(bconf.emb_conf.dim_auxes)>0 # todo(note): currently use the concat of them if input multiple layers bconf.dh_conf._input_dim = bconf.emb_conf.dim_auxes[0] # same as input bert dim bconf.dh_conf._output_dim = emb_output_dim # same as emb_output_dim self.dh_node = self.add_sub_node("dh", DocHintModule(pc, bconf.dh_conf)) else: self.dh_node = None # encoders # shared # todo(note): feed compute-on-the-fly hp bconf.enc_conf._input_dim = emb_output_dim self.enc = self.add_sub_node("enc", MyEncoder(self.pc, bconf.enc_conf)) tmp_enc_output_dim = self.enc.get_output_dims()[0] # privates bconf.enc_ef_conf._input_dim = tmp_enc_output_dim self.enc_ef = self.add_sub_node("enc_ef", MyEncoder(self.pc, bconf.enc_ef_conf)) self.enc_ef_output_dim = self.enc_ef.get_output_dims()[0] bconf.enc_evt_conf._input_dim = tmp_enc_output_dim self.enc_evt = self.add_sub_node("enc_evt", MyEncoder(self.pc, bconf.enc_evt_conf)) self.enc_evt_output_dim = self.enc_evt.get_output_dims()[0] # ===== Input Specification ===== # inputs (word, lemma, char, upos, ulabel) and vocabulary self.need_word = self.emb.has_word self.need_char = self.emb.has_char # extra fields # todo(warn): need to self.need_lemma = False self.need_upos = False self.need_ulabel = False for one_extra_name in self.emb.extra_names: if one_extra_name == "lemma": self.need_lemma = True elif one_extra_name == "upos": self.need_upos = True elif one_extra_name == "ulabel": self.need_ulabel = True else: raise NotImplementedError("UNK extra input name: " + one_extra_name) # todo(warn): currently only allow one aux field self.need_aux = False if len(self.emb.dim_auxes) > 0: assert len(self.emb.dim_auxes) == 1 self.need_aux = True # padders self.word_padder = DataPadder(2, pad_vals=self.word_vocab.pad, mask_range=2) self.char_padder = DataPadder(3, pad_lens=(0, 0, bconf.char_max_length), pad_vals=self.char_vocab.pad) self.lemma_padder = DataPadder(2, pad_vals=self.lemma_vocab.pad) self.upos_padder = DataPadder(2, pad_vals=self.upos_vocab.pad) self.ulabel_padder = DataPadder(2, pad_vals=self.ulabel_vocab.pad) # self.random_sample_stream = Random.stream(Random.random_sample) self.train_skip_noevt_rate = tconf.train_skip_noevt_rate self.train_skip_length = tconf.train_skip_length self.train_min_length = tconf.train_min_length self.test_min_length = tconf.test_min_length self.test_skip_noevt_rate = tconf.test_skip_noevt_rate self.train_sent_based = tconf.train_sent_based # assert not self.train_sent_based, "The basic model should not use this sent-level mode!"
def __init__(self, conf: MtlMlmModelConf, vpack: VocabPackage): super().__init__(conf) # for easier checking self.word_vocab = vpack.get_voc("word") # components self.embedder = self.add_node("emb", EmbedderNode(self.pc, conf.emb_conf, vpack)) self.inputter = Inputter(self.embedder, vpack) # not a node self.emb_out_dim = self.embedder.get_output_dims()[0] self.enc_attn_count = conf.default_attn_count if conf.enc_choice == "vrec": self.encoder = self.add_component("enc", VRecEncoder(self.pc, self.emb_out_dim, conf.venc_conf)) self.enc_attn_count = self.encoder.attn_count elif conf.enc_choice == "original": conf.oenc_conf._input_dim = self.emb_out_dim self.encoder = self.add_node("enc", MyEncoder(self.pc, conf.oenc_conf)) else: raise NotImplementedError() zlog(f"Finished building model's encoder {self.encoder}, all size is {self.encoder.count_allsize_parameters()}") self.enc_out_dim = self.encoder.get_output_dims()[0] # -- conf.rprep_conf._rprep_vr_conf.matt_conf.head_count = self.enc_attn_count # make head-count agree self.rpreper = self.add_node("rprep", RPrepNode(self.pc, self.enc_out_dim, conf.rprep_conf)) # -- self.lambda_agree = self.add_scheduled_value(ScheduledValue(f"agr:lambda", conf.lambda_agree)) self.agree_loss_f = EntropyHelper.get_method(conf.agree_loss_f) # -- self.masklm = self.add_component("mlm", MaskLMNode(self.pc, self.enc_out_dim, conf.mlm_conf, self.inputter)) self.plainlm = self.add_component("plm", PlainLMNode(self.pc, self.enc_out_dim, conf.plm_conf, self.inputter)) # todo(note): here we use attn as dim_pair, do not use pair if not using vrec!! self.orderpr = self.add_component("orp", OrderPredNode( self.pc, self.enc_out_dim, self.enc_attn_count, conf.orp_conf, self.inputter)) # ===== # pre-training pre-load point!! if conf.load_pretrain_model_name: zlog(f"At preload_pretrain point: Loading from {conf.load_pretrain_model_name}") self.pc.load(conf.load_pretrain_model_name, strict=False) # ===== self.dpar = self.add_component("dpar", DparG1Decoder( self.pc, self.enc_out_dim, self.enc_attn_count, conf.dpar_conf, self.inputter)) self.upos = self.add_component("upos", SeqLabNode( self.pc, "pos", self.enc_out_dim, self.conf.upos_conf, self.inputter)) if conf.do_ner: if conf.ner_use_crf: self.ner = self.add_component("ner", SeqCrfNode( self.pc, "ner", self.enc_out_dim, self.conf.ner_conf, self.inputter)) else: self.ner = self.add_component("ner", SeqLabNode( self.pc, "ner", self.enc_out_dim, self.conf.ner_conf, self.inputter)) else: self.ner = None # for pairwise reprs (no trainable params here!) self.rel_dist_embed = self.add_node("oremb", PosiEmbedding2(self.pc, n_dim=self.enc_attn_count, max_val=100)) self._prepr_f_attn_sum = lambda cache, rdist: BK.stack(cache.list_attn, 0).sum(0) if (len(cache.list_attn))>0 else None self._prepr_f_attn_avg = lambda cache, rdist: BK.stack(cache.list_attn, 0).mean(0) if (len(cache.list_attn))>0 else None self._prepr_f_attn_max = lambda cache, rdist: BK.stack(cache.list_attn, 0).max(0)[0] if (len(cache.list_attn))>0 else None self._prepr_f_attn_last = lambda cache, rdist: cache.list_attn[-1] if (len(cache.list_attn))>0 else None self._prepr_f_rdist = lambda cache, rdist: self._get_rel_dist_embed(rdist, False) self._prepr_f_rdist_abs = lambda cache, rdist: self._get_rel_dist_embed(rdist, True) self.prepr_f = getattr(self, "_prepr_f_"+conf.prepr_choice) # shortcut # -- self.testing_rand_gen = Random.create_sep_generator(conf.testing_rand_gen_seed) # especial gen for testing # ===== if conf.orp_loss_special: self.orderpr.add_node_special(self.masklm) # ===== # extra one!! self.aug_word2 = self.aug_encoder = self.aug_mixturer = None if conf.aug_word2: self.aug_word2 = self.add_node("aug2", AugWord2Node(self.pc, conf.emb_conf, vpack, "word2", conf.aug_word2_dim, self.emb_out_dim)) if conf.aug_word2_aug_encoder: assert conf.enc_choice == "vrec" self.aug_detach_drop = self.add_node("dd", Dropout(self.pc, (self.enc_out_dim,), fix_rate=conf.aug_detach_dropout)) self.aug_encoder = self.add_component("Aenc", VRecEncoder(self.pc, self.emb_out_dim, conf.venc_conf)) self.aug_mixturer = self.add_node("Amix", BertFeaturesWeightLayer(self.pc, conf.aug_detach_numlayer))
def __init__(self, pc: BK.ParamCollection, econf: EmbedConf, vpack: VocabPackage): super().__init__(pc, None, None) self.conf = econf # repr_sizes = [] # word self.has_word = (econf.dim_word > 0) if self.has_word: npvec = vpack.get_emb( "word") if econf.init_words_from_pretrain else None self.word_embed = self.add_sub_node( "ew", Embedding(self.pc, len(vpack.get_voc("word")), econf.dim_word, npvec=npvec, name="word", freeze=econf.word_freeze)) repr_sizes.append(econf.dim_word) # char self.has_char = (econf.dim_char > 0) if self.has_char: # todo(warn): cnns will also use emb's drop? self.char_embed = self.add_sub_node( "ec", Embedding(self.pc, len(vpack.get_voc("char")), econf.dim_char, name="char")) per_cnn_size = econf.char_cnn_hidden // len(econf.char_cnn_windows) self.char_cnns = [ self.add_sub_node( "cnnc", CnnLayer(self.pc, econf.dim_char, per_cnn_size, z, pooling="max", act="tanh")) for z in econf.char_cnn_windows ] repr_sizes.append(econf.char_cnn_hidden) # posi: absolute positional embeddings self.has_posi = (econf.dim_posi > 0) if self.has_posi: self.posi_embed = self.add_sub_node( "ep", PosiEmbedding(self.pc, econf.dim_posi, econf.posi_clip, econf.posi_fix_sincos, econf.posi_freeze)) repr_sizes.append(econf.dim_posi) # extras: like POS, ... self.dim_extras = econf.dim_extras self.extra_names = econf.extra_names zcheck( len(self.dim_extras) == len(self.extra_names), "Unmatched dims and names!") self.extra_embeds = [] for one_extra_dim, one_name in zip(self.dim_extras, self.extra_names): self.extra_embeds.append( self.add_sub_node( "ext", Embedding(self.pc, len(vpack.get_voc(one_name)), one_extra_dim, npvec=vpack.get_emb(one_name, None), name="extra:" + one_name))) repr_sizes.append(one_extra_dim) # auxes self.dim_auxes = econf.dim_auxes self.fold_auxes = econf.fold_auxes self.aux_overall_gammas = [] self.aux_fold_lambdas = [] for one_aux_dim, one_aux_fold in zip(self.dim_auxes, self.fold_auxes): repr_sizes.append(one_aux_dim) # aux gamma and fold trainable lambdas self.aux_overall_gammas.append(self.add_param("AG", (), 1.)) # scalar self.aux_fold_lambdas.append( self.add_param( "AL", (), [1. / one_aux_fold for _ in range(one_aux_fold)])) # [#fold] # ===== # another projection layer? & set final dim if len(repr_sizes) <= 0: zwarn("No inputs??") # zcheck(len(repr_sizes)>0, "No inputs?") self.repr_sizes = repr_sizes self.has_proj = (econf.emb_proj_dim > 0) if self.has_proj: proj_layer = Affine(self.pc, sum(repr_sizes), econf.emb_proj_dim) if econf.emb_proj_norm: norm_layer = LayerNorm(self.pc, econf.emb_proj_dim) self.final_layer = self.add_sub_node( "fl", Sequential(self.pc, [proj_layer, norm_layer])) else: self.final_layer = self.add_sub_node("fl", proj_layer) self.output_dim = econf.emb_proj_dim else: self.final_layer = None self.output_dim = sum(repr_sizes) # ===== # special MdDropout: dropout the entire last dim (for word, char, extras, but not posi) self.dropmd_word = self.add_sub_node("md", DropoutLastN(pc, lastn=1)) self.dropmd_char = self.add_sub_node("md", DropoutLastN(pc, lastn=1)) self.dropmd_extras = [ self.add_sub_node("md", DropoutLastN(pc, lastn=1)) for _ in self.extra_names ] # dropouts for aux self.drop_auxes = [ self.add_sub_node("aux", Dropout(pc, (one_aux_dim, ))) for one_aux_dim in self.dim_auxes ]
def main(): np.random.seed(1234) NUM_POS = 10 # build vocabs reader = TextReader("./test_utils.py") vb_word = VocabBuilder("w") vb_char = VocabBuilder("c") for one in reader: vb_word.feed_stream(one.tokens) vb_char.feed_stream((c for w in one.tokens for c in w)) voc_word = vb_word.finish() voc_char = vb_char.finish() voc_pos = VocabBuilder.build_from_stream(range(NUM_POS), name="pos") vpack = VocabPackage({ "word": voc_word, "char": voc_char, "pos": voc_pos }, {"word": None}) # build model pc = BK.ParamCollection() conf_emb = EmbedConf().init_from_kwargs(init_words_from_pretrain=False, dim_char=10, dim_posi=10, emb_proj_dim=400, dim_extras="50", extra_names="pos") conf_emb.do_validate() mod_emb = MyEmbedder(pc, conf_emb, vpack) conf_enc = EncConf().init_from_kwargs(enc_rnn_type="lstm2", enc_cnn_layer=1, enc_att_layer=1) conf_enc._input_dim = mod_emb.get_output_dims()[0] mod_enc = MyEncoder(pc, conf_enc) enc_output_dim = mod_enc.get_output_dims()[0] mod_scorer = BiAffineScorer(pc, enc_output_dim, enc_output_dim, 10) # build data word_padder = DataPadder(2, pad_lens=(0, 50), mask_range=2) char_padder = DataPadder(3, pad_lens=(0, 50, 20)) word_idxes = [] char_idxes = [] pos_idxes = [] for toks in reader: one_words = [] one_chars = [] for w in toks.tokens: one_words.append(voc_word.get_else_unk(w)) one_chars.append([voc_char.get_else_unk(c) for c in w]) word_idxes.append(one_words) char_idxes.append(one_chars) pos_idxes.append( np.random.randint(voc_pos.trg_len(), size=len(one_words)) + 1) # pred->trg word_arr, word_mask_arr = word_padder.pad(word_idxes) pos_arr, _ = word_padder.pad(pos_idxes) char_arr, _ = char_padder.pad(char_idxes) # # run rop = layers.RefreshOptions(hdrop=0.2, gdrop=0.2, fix_drop=True) for _ in range(5): mod_emb.refresh(rop) mod_enc.refresh(rop) mod_scorer.refresh(rop) # expr_emb = mod_emb(word_arr, char_arr, [pos_arr]) zlog(BK.get_shape(expr_emb)) expr_enc = mod_enc(expr_emb, word_mask_arr) zlog(BK.get_shape(expr_enc)) # mask_expr = BK.input_real(word_mask_arr) score0 = mod_scorer.paired_score(expr_enc, expr_enc, mask_expr, mask_expr) score1 = mod_scorer.plain_score(expr_enc.unsqueeze(-2), expr_enc.unsqueeze(-3), mask_expr.unsqueeze(-1), mask_expr.unsqueeze(-2)) # zmiss = float(BK.avg(score0 - score1)) assert zmiss < 0.0001 zlog("OK") pass
def __init__(self, comp_name, vpack: VocabPackage): self.comp_name = comp_name self.comp_seq_name = f"{comp_name}_seq" self.voc = vpack.get_voc(comp_name) self.padder = DataPadder(2, pad_vals=0) # pad 0