def __init__(self, conf: MyIEModelConf, vpack: VocabPackage): self.conf = conf self.vpack = vpack tconf = conf.tconf # ===== Vocab ===== # ===== Model ===== self.pc = BK.ParamCollection(True) # bottom-part: input + encoder self.bter: MyIEBT = self.build_encoder() self.lexi_output_dim = self.bter.emb_output_dim self.enc_ef_output_dim, self.enc_evt_output_dim = self.bter.get_output_dims()[0] self.enc_lrf_sv = ScheduledValue("enc_lrf", tconf.enc_lrf) self.pc.optimizer_set(tconf.enc_optim.optim, self.enc_lrf_sv, tconf.enc_optim, params=self.bter.get_parameters(), check_repeat=True, check_full=True) # upper-parts: the decoders self.decoders: List = self.build_decoders() self.dec_lrf_sv = ScheduledValue("dec_lrf", tconf.dec_lrf) self.pc.optimizer_set(tconf.dec_optim.optim, self.dec_lrf_sv, tconf.dec_optim, params=Helper.join_list(z.get_parameters() for z in self.decoders), check_repeat=True, check_full=True) # ===== For training ===== # schedule values self.margin = ScheduledValue("margin", tconf.margin) self._scheduled_values = [self.margin, self.enc_lrf_sv, self.dec_lrf_sv] # for refreshing dropouts self.previous_refresh_training = True # ===== # others self.train_constrain_evt_types = {"": None, "kbp17": KBP17_TYPES}[conf.tconf.constrain_evt_types] self.test_constrain_evt_types = {"": None, "kbp17": KBP17_TYPES}[conf.iconf.constrain_evt_types]
def main(): pc = BK.ParamCollection() N_BATCH, N_SEQ = 8, 4 N_HIDDEN, N_LAYER = 5, 3 N_INPUT = N_HIDDEN N_FF = 10 # encoders rnn_encoder = layers.RnnLayerBatchFirstWrapper(pc, layers.RnnLayer(pc, N_INPUT, N_HIDDEN, N_LAYER, bidirection=True)) cnn_encoder = layers.Sequential(pc, [layers.CnnLayer(pc, N_INPUT, N_HIDDEN, 3, act="relu") for _ in range(N_LAYER)]) att_encoder = layers.Sequential(pc, [layers.TransformerEncoderLayer(pc, N_INPUT, N_FF) for _ in range(N_LAYER)]) dropout_md = layers.DropoutLastN(pc) # rop = layers.RefreshOptions(hdrop=0.2, gdrop=0.2, dropmd=0.2, fix_drop=True) rnn_encoder.refresh(rop) cnn_encoder.refresh(rop) att_encoder.refresh(rop) dropout_md.refresh(rop) # x = BK.input_real(np.random.randn(N_BATCH, N_SEQ, N_INPUT)) x_mask = np.asarray([[1.]*z+[0.]*(N_SEQ-z) for z in np.random.randint(N_SEQ//2, N_SEQ, N_BATCH)]) y_rnn = rnn_encoder(x, x_mask) y_cnn = cnn_encoder(x, x_mask) y_att = att_encoder(x, x_mask) zz = dropout_md(y_att) print("The end.") pass
def __init__(self, conf: FpParserConf, vpack: VocabPackage): self.conf = conf self.vpack = vpack tconf = conf.tconf # ===== Vocab ===== self.label_vocab = vpack.get_voc("label") # ===== Model ===== self.pc = BK.ParamCollection(True) # bottom-part: input + encoder self.enc = FpEncoder(self.pc, conf.encoder_conf, vpack) self.enc_output_dim = self.enc.get_output_dims()[0] self.enc_lrf_sv = ScheduledValue("enc_lrf", tconf.enc_lrf) self.pc.optimizer_set(tconf.enc_optim.optim, self.enc_lrf_sv, tconf.enc_optim, params=self.enc.get_parameters(), check_repeat=True, check_full=True) # middle-part: structured layer at the middle (build later for convenient re-loading) self.slayer = self.build_slayer() self.mid_lrf_sv = ScheduledValue("mid_lrf", tconf.mid_lrf) if self.slayer is not None: self.pc.optimizer_set(tconf.mid_optim.optim, self.mid_lrf_sv, tconf.mid_optim, params=self.slayer.get_parameters(), check_repeat=True, check_full=True) # upper-part: decoder self.dec = self.build_decoder() self.dec_lrf_sv = ScheduledValue("dec_lrf", tconf.dec_lrf) self.pc.optimizer_set(tconf.dec_optim.optim, self.dec_lrf_sv, tconf.dec_optim, params=self.dec.get_parameters(), check_repeat=True, check_full=True) # extra aux loss conf.masklm_conf._input_dim = self.enc_output_dim self.masklm = MaskLMNode(self.pc, conf.masklm_conf, vpack) self.pc.optimizer_set(tconf.dec_optim.optim, self.dec_lrf_sv, tconf.dec_optim, params=self.masklm.get_parameters(), check_repeat=True, check_full=True) # ===== For training ===== # schedule values self.margin = ScheduledValue("margin", tconf.margin) self.lambda_parse = ScheduledValue("lambda_parse", conf.lambda_parse) self.lambda_masklm = ScheduledValue("lambda_masklm", conf.lambda_masklm) self._scheduled_values = [ self.margin, self.enc_lrf_sv, self.mid_lrf_sv, self.dec_lrf_sv, self.lambda_parse, self.lambda_masklm ] # for refreshing dropouts self.previous_refresh_training = True
def __init__(self, conf: BaseParserConf, vpack: VocabPackage): self.conf = conf self.vpack = vpack tconf = conf.tconf # ===== Vocab ===== self.label_vocab = vpack.get_voc("label") # ===== Model ===== self.pc = BK.ParamCollection(conf.new_name_conv) # bottom-part: input + encoder self.bter = ParserBT(self.pc, conf.bt_conf, vpack) self.enc_output_dim = self.bter.get_output_dims()[0] self.enc_lrf_sv = ScheduledValue("enc_lrf", tconf.enc_lrf) self.pc.optimizer_set(tconf.enc_optim.optim, self.enc_lrf_sv, tconf.enc_optim, params=self.bter.get_parameters(), check_repeat=True, check_full=True) # upper-part: decoder # todo(+2): very ugly here! self.scorer = self.build_decoder() self.dec_lrf_sv = ScheduledValue("dec_lrf", tconf.dec_lrf) self.dec2_lrf_sv = ScheduledValue("dec2_lrf", tconf.dec2_lrf) try: params, params2 = self.scorer.get_split_params() self.pc.optimizer_set(tconf.dec_optim.optim, self.dec_lrf_sv, tconf.dec_optim, params=params, check_repeat=True, check_full=False) self.pc.optimizer_set(tconf.dec2_optim.optim, self.dec2_lrf_sv, tconf.dec2_optim, params=params2, check_repeat=True, check_full=True) except: self.pc.optimizer_set(tconf.dec_optim.optim, self.dec_lrf_sv, tconf.dec_optim, params=self.scorer.get_parameters(), check_repeat=True, check_full=True) # middle-part: structured layer at the middle (build later for convenient re-loading) self.slayer = None self.mid_lrf_sv = ScheduledValue("mid_lrf", tconf.mid_lrf) # ===== For training ===== # schedule values self.margin = ScheduledValue("margin", tconf.margin) self.sched_sampling = ScheduledValue("ss", tconf.sched_sampling) self._scheduled_values = [ self.margin, self.sched_sampling, self.enc_lrf_sv, self.dec_lrf_sv, self.dec2_lrf_sv, self.mid_lrf_sv ] self.reg_scores_lambda = conf.tconf.reg_scores_lambda # for refreshing dropouts self.previous_refresh_training = True
def __init__(self, conf: BaseModelConf): self.conf = conf # ===== Model ===== self.pc = BK.ParamCollection() self.main_lrf = ScheduledValue(f"main:lrf", conf.main_lrf) self._scheduled_values = [self.main_lrf] # ----- self.nodes: Dict[str, BasicNode] = OrderedDict() self.components: Dict[str, BaseModule] = OrderedDict() # for refreshing dropouts self.previous_refresh_training = True
def main(): np.random.seed(1234) NUM_POS = 10 # build vocabs reader = TextReader("./test_utils.py") vb_word = VocabBuilder("w") vb_char = VocabBuilder("c") for one in reader: vb_word.feed_stream(one.tokens) vb_char.feed_stream((c for w in one.tokens for c in w)) voc_word = vb_word.finish() voc_char = vb_char.finish() voc_pos = VocabBuilder.build_from_stream(range(NUM_POS), name="pos") vpack = VocabPackage({ "word": voc_word, "char": voc_char, "pos": voc_pos }, {"word": None}) # build model pc = BK.ParamCollection() conf_emb = EmbedConf().init_from_kwargs(init_words_from_pretrain=False, dim_char=10, dim_posi=10, emb_proj_dim=400, dim_extras="50", extra_names="pos") conf_emb.do_validate() mod_emb = MyEmbedder(pc, conf_emb, vpack) conf_enc = EncConf().init_from_kwargs(enc_rnn_type="lstm2", enc_cnn_layer=1, enc_att_layer=1) conf_enc._input_dim = mod_emb.get_output_dims()[0] mod_enc = MyEncoder(pc, conf_enc) enc_output_dim = mod_enc.get_output_dims()[0] mod_scorer = BiAffineScorer(pc, enc_output_dim, enc_output_dim, 10) # build data word_padder = DataPadder(2, pad_lens=(0, 50), mask_range=2) char_padder = DataPadder(3, pad_lens=(0, 50, 20)) word_idxes = [] char_idxes = [] pos_idxes = [] for toks in reader: one_words = [] one_chars = [] for w in toks.tokens: one_words.append(voc_word.get_else_unk(w)) one_chars.append([voc_char.get_else_unk(c) for c in w]) word_idxes.append(one_words) char_idxes.append(one_chars) pos_idxes.append( np.random.randint(voc_pos.trg_len(), size=len(one_words)) + 1) # pred->trg word_arr, word_mask_arr = word_padder.pad(word_idxes) pos_arr, _ = word_padder.pad(pos_idxes) char_arr, _ = char_padder.pad(char_idxes) # # run rop = layers.RefreshOptions(hdrop=0.2, gdrop=0.2, fix_drop=True) for _ in range(5): mod_emb.refresh(rop) mod_enc.refresh(rop) mod_scorer.refresh(rop) # expr_emb = mod_emb(word_arr, char_arr, [pos_arr]) zlog(BK.get_shape(expr_emb)) expr_enc = mod_enc(expr_emb, word_mask_arr) zlog(BK.get_shape(expr_enc)) # mask_expr = BK.input_real(word_mask_arr) score0 = mod_scorer.paired_score(expr_enc, expr_enc, mask_expr, mask_expr) score1 = mod_scorer.plain_score(expr_enc.unsqueeze(-2), expr_enc.unsqueeze(-3), mask_expr.unsqueeze(-1), mask_expr.unsqueeze(-2)) # zmiss = float(BK.avg(score0 - score1)) assert zmiss < 0.0001 zlog("OK") pass