def __init__(self, conf: MyIEModelConf, vpack: VocabPackage): self.conf = conf self.vpack = vpack tconf = conf.tconf # ===== Vocab ===== # ===== Model ===== self.pc = BK.ParamCollection(True) # bottom-part: input + encoder self.bter: MyIEBT = self.build_encoder() self.lexi_output_dim = self.bter.emb_output_dim self.enc_ef_output_dim, self.enc_evt_output_dim = self.bter.get_output_dims()[0] self.enc_lrf_sv = ScheduledValue("enc_lrf", tconf.enc_lrf) self.pc.optimizer_set(tconf.enc_optim.optim, self.enc_lrf_sv, tconf.enc_optim, params=self.bter.get_parameters(), check_repeat=True, check_full=True) # upper-parts: the decoders self.decoders: List = self.build_decoders() self.dec_lrf_sv = ScheduledValue("dec_lrf", tconf.dec_lrf) self.pc.optimizer_set(tconf.dec_optim.optim, self.dec_lrf_sv, tconf.dec_optim, params=Helper.join_list(z.get_parameters() for z in self.decoders), check_repeat=True, check_full=True) # ===== For training ===== # schedule values self.margin = ScheduledValue("margin", tconf.margin) self._scheduled_values = [self.margin, self.enc_lrf_sv, self.dec_lrf_sv] # for refreshing dropouts self.previous_refresh_training = True # ===== # others self.train_constrain_evt_types = {"": None, "kbp17": KBP17_TYPES}[conf.tconf.constrain_evt_types] self.test_constrain_evt_types = {"": None, "kbp17": KBP17_TYPES}[conf.iconf.constrain_evt_types]
def do_join(self, insts_target: str, jcode: str) -> List: vs = self.vars _ff = compile(jcode, "", "eval") insts = self.get_and_check_type(insts_target, list) ret = [eval(_ff) for d in insts] ret = Helper.join_list(ret) zlog(f"Join-list by {jcode}: from {len(insts)} to {len(ret)}") return ret
def set_children_info(self, oracle_strategy, label_ranking_dict: Dict = None, free_dist_alpha: float = 0.): heads = self.heads.vals the_len = len(heads) # self.children_set = [set() for _ in range(the_len)] self.children_list = [[] for _ in range(the_len)] tmp_descendant_list = [None for _ in range(the_len)] # exclude root for m, h in enumerate(heads[1:], 1): # self.children_set[h].add(m) self.children_list[h].append(m) # l2r order # re-arrange list order (left -> right) if oracle_strategy == "i2o": for h in range(the_len): self.children_list[h].sort(key=lambda x: -x if x < h else x) elif oracle_strategy == "label": # todo(warn): only use first level! level0_labels = [z.split(":")[0] for z in self.labels.vals] for h in range(the_len): self.children_list[h].sort( key=lambda x: label_ranking_dict[level0_labels[x]]) elif oracle_strategy == "n2f": self.shuffle_children_n2f() elif oracle_strategy == "free": self.free_dist_alpha = free_dist_alpha self.shuffle_children_free() else: assert oracle_strategy == "l2r" pass # todo(+N): does the order of descendant list matter? # todo(+N): depth-first or breadth-first? (currently select the latter) # recursively get descendant list: do this # ===== def _recursive_add(cur_n): cur_children = self.children_list[cur_n] # List[int] for i in cur_children: _recursive_add(i) new_dlist = [cur_children] cur_layer = 0 while True: another_layer = Helper.join_list( tmp_descendant_list[i][cur_layer] if cur_layer < len(tmp_descendant_list[i]) else [] for i in cur_children) if len(another_layer) == 0: break new_dlist.append(another_layer) cur_layer += 1 tmp_descendant_list[cur_n] = new_dlist # ===== _recursive_add(0) self.descendant_list = [ Helper.join_list(tmp_descendant_list[i]) for i in range(the_len) ]
def _recursive_add(cur_n): cur_children = self.children_list[cur_n] # List[int] for i in cur_children: _recursive_add(i) new_dlist = [cur_children] cur_layer = 0 while True: another_layer = Helper.join_list( tmp_descendant_list[i][cur_layer] if cur_layer < len(tmp_descendant_list[i]) else [] for i in cur_children) if len(another_layer) == 0: break new_dlist.append(another_layer) cur_layer += 1 tmp_descendant_list[cur_n] = new_dlist
def main(): s0 = IterStreamer(range(200)) s1 = InstCacher(range(200), shuffle=True) s2 = InstCacher( MultiCatStreamer( [IterStreamer(range(100, 200)), IterStreamer(range(100))])) s3 = BatchArranger(InstCacher(IterStreamer(range(200))), 8, 10, None, lambda x: x == 48, None, lambda x: (x - 24)**2, True) # nums = set(list(s0)) for R in range(10): assert nums == set(list(s1)) assert nums == set(list(s2)) zz = list(s3) assert nums == set(Helper.join_list(zz) + [48])
def inference_on_batch(self, insts: List[ParseInstance], **kwargs): # iconf = self.conf.iconf with BK.no_grad_env(): self.refresh_batch(False) # pruning and scores from g1 valid_mask, go1_pack = self._get_g1_pack( insts, self.lambda_g1_arc_testing, self.lambda_g1_lab_testing) # encode input_repr, enc_repr, jpos_pack, mask_arr = self.bter.run( insts, False) mask_expr = BK.input_real(mask_arr) # decode final_valid_expr = self._make_final_valid(valid_mask, mask_expr) ret_heads, ret_labels, _, _ = self.dl.decode( insts, enc_repr, final_valid_expr, go1_pack, False, 0.) # collect the results together all_heads = Helper.join_list(ret_heads) if ret_labels is None: # todo(note): simply get labels from the go1-label classifier; must provide g1parser if go1_pack is None: _, go1_pack = self._get_g1_pack(insts, 1., 1.) _, go1_label_max_idxes = go1_pack[1].max( -1) # [bs, slen, slen] pred_heads_arr, _ = self.predict_padder.pad( all_heads) # [bs, slen] pred_heads_expr = BK.input_idx(pred_heads_arr) pred_labels_expr = BK.gather_one_lastdim( go1_label_max_idxes, pred_heads_expr).squeeze(-1) all_labels = BK.get_value(pred_labels_expr) # [bs, slen] else: all_labels = np.concatenate(ret_labels, 0) # ===== assign, todo(warn): here, the labels are directly original idx, no need to change for one_idx, one_inst in enumerate(insts): cur_length = len(one_inst) + 1 one_inst.pred_heads.set_vals( all_heads[one_idx] [:cur_length]) # directly int-val for heads one_inst.pred_labels.build_vals( all_labels[one_idx][:cur_length], self.label_vocab) # one_inst.pred_par_scores.set_vals(all_scores[one_idx][:cur_length]) # ===== # put jpos result (possibly) self.jpos_decode(insts, jpos_pack) # ----- info = {"sent": len(insts), "tok": sum(map(len, insts))} return info
def get_split_params(self): params0 = Helper.join_list(z.get_parameters() for z in [self.arc_m, self.arc_h, self.lab_m, self.lab_h]) params1 = Helper.join_list(z.get_parameters() for z in [self.arc_scorer, self.lab_scorer]) return params0, params1
def pred_events(self): return Helper.join_list(x.pred_events for x in self.sents)
def pred_entity_fillers(self): return Helper.join_list(x.pred_entity_fillers for x in self.sents)
def subword_typeids(self): if self.cur_typeids is None: return None else: return Helper.join_list(self.cur_typeids)
def subword_is_start(self): return Helper.join_list(self.cur_is_starts)
def subword_ids(self): return Helper.join_list(self.cur_ids)
def main(args): conf = PsConf() conf.update_from_args(args) # read the data path_train, path_dev, path_test = [ get_data(z) for z in [conf.train, conf.dev, conf.test] ] pretrain_file = get_data(conf.pretrain_file) train_insts = list(get_data_reader(path_train, "conllu", "", False, "")) dev_insts = list(get_data_reader(path_dev, "conllu", "", False, "")) test_insts = list(get_data_reader(path_test, "conllu", "", False, "")) use_pos = conf.use_pos num_pieces = conf.pieces max_epoch = conf.max_epoch reg_scores_lambda = conf.reg_scores_lambda cur_run = conf.cur_run zlog( f"Read from train/dev/test: {len(train_insts)}/{len(dev_insts)}/{len(test_insts)}, split train into {num_pieces}" ) # others RGPU = os.getenv("RGPU", "") # first train on all: 1. get dict (only build once), 2: score dev/test with Timer("train", "Train-ALL"): cur_conf, cur_model = "_conf.all", "_model.all" cur_load_model = cur_model + ".best" cur_base_opt = get_base_opt(cur_conf, cur_model, use_pos, True, max_epoch, reg_scores_lambda, cur_run) system(get_train_cmd(RGPU, cur_base_opt, path_train, path_dev, path_test, pretrain_file), pp=True) system(get_score_cmd(RGPU, cur_conf, cur_load_model, path_dev, "dev.scores.pkl"), pp=True) system(get_score_cmd(RGPU, cur_conf, cur_load_model, path_test, "test.scores.pkl"), pp=True) # then training on the pieces (leaving one out) # first split into pieces Random.shuffle(train_insts) piece_length = math.ceil(len(train_insts) / num_pieces) train_pieces = [] cur_idx = 0 while cur_idx < len(train_insts): next_idx = min(len(train_insts), cur_idx + piece_length) train_pieces.append(train_insts[cur_idx:next_idx]) cur_idx = next_idx zlog(f"Split training into {num_pieces}: {[len(x) for x in train_pieces]}") assert len(train_pieces) == num_pieces # next train each of the pieces for piece_id in range(num_pieces): with Timer("train", f"Train-{piece_id}"): # get current training pieces cur_training_insts = Helper.join_list( [train_pieces[x] for x in range(num_pieces) if x != piece_id]) cur_testing_insts = train_pieces[piece_id] # write files cur_path_train, cur_path_test = f"tmp.train.{piece_id}.conllu", f"tmp.test.{piece_id}.conllu" write_insts(cur_path_train, cur_training_insts) write_insts(cur_path_test, cur_testing_insts) cur_conf, cur_model = f"_conf.{piece_id}", f"_model.{piece_id}" cur_load_model = cur_model + ".best" # no build dict, reuse previous cur_base_opt = get_base_opt(cur_conf, cur_model, use_pos, False, max_epoch, reg_scores_lambda, cur_run) system(get_train_cmd(RGPU, cur_base_opt, cur_path_train, path_dev, cur_path_test, pretrain_file), pp=True) system(get_score_cmd(RGPU, cur_conf, cur_load_model, cur_path_test, f"tmp.test.{piece_id}.scores.pkl"), pp=True) # finally put them in order all_results = [] for piece_id in range(num_pieces): all_results.extend(read_results(f"tmp.test.{piece_id}.scores.pkl")) # reorder to the original order orig_indexes = [z.inst_idx for z in train_insts] orig_results = [None] * len(orig_indexes) for new_idx, orig_idx in enumerate(orig_indexes): assert orig_results[orig_idx] is None orig_results[orig_idx] = all_results[new_idx] # saving write_results("train.scores.pkl", orig_results) zlog("The end.")
def get_scheduled_values(self): return self._scheduled_values + Helper.join_list( z.get_scheduled_values() for z in self.components.values())