def _run_end(self): x = self.test_recorder.summary() res = self.res_manager.end() x.update(res) MltDevResult.calc_acc(x) Helper.printd(x, sep=" || ") return x
def _run_train_report(self): x = self.train_recorder.summary() y = GLOBAL_RECORDER.summary() if len(y) > 0: x.update(y) MltDevResult.calc_acc(x) Helper.printd(x, " || ") return RecordResult(x, score=x.get("res", 0.))
def _run_train_report(self): x = self.train_recorder.summary() y = GLOBAL_RECORDER.summary() # todo(warn): get loss/tok x["loss_tok"] = x.get("loss_sum", 0.) / x["tok"] if len(y) > 0: x.update(y) # zlog(x, "report") Helper.printd(x, " || ") return RecordResult(x)
def _run_train_report(self): x = self.train_recorder.summary() y = GLOBAL_RECORDER.summary() # todo(warn): get loss/tok # todo(note): too complex to div here, only accumulating the sums. # x["loss_tok"] = x.get("loss_sum", 0.)/x["tok"] if len(y) > 0: x.update(y) Helper.printd(x, " || ") return RecordResult(x)
def main(args): conf: DecodeAConf = init_everything(args, DecodeAConf) dconf, mconf = conf.dconf, conf.mconf iconf = mconf.iconf # vocab vpack = IEVocabPackage.build_by_reading(conf) # prepare data test_streamer = get_data_reader(dconf.test, dconf.input_format, dconf.use_label0, dconf.noef_link0, dconf.aux_repr_test, max_evt_layers=dconf.max_evt_layers) # model model = build_model(conf.model_type, conf, vpack) model.load(dconf.model_load_name) # use bert? if dconf.use_bert: bmodel = get_berter(dconf.bconf) test_streamer = BerterDataAuger(test_streamer, bmodel, "aux_repr") # finally prepare iter (No Cache!!, actually no batch_stream) test_inst_preparer = model.get_inst_preper(False) test_iter = index_stream(test_streamer, vpack, False, False, test_inst_preparer) # ===== # run decoder = ArgAugDecoder(conf.aconf, model) all_docs = [] stat_recorder = StatRecorder(False) with Timer(tag="Decode", info="Decoding", print_date=True): with zopen(dconf.output_file, 'w') as fd: data_writer = get_data_writer(fd, dconf.output_format) for one_doc in test_iter: info = decoder.decode(one_doc) stat_recorder.record(info) if conf.verbose: zlog(f"Decode one doc, id={one_doc.doc_id} info={info}") # release resources for one_sent in one_doc.sents: one_sent.extra_features[ "aux_repr"] = None # todo(note): special name! # write output data_writer.write([one_doc]) # all_docs.append(one_doc) if conf.verbose: zlog(f"Finish decoding, overall: {stat_recorder.summary()}") # eval? if conf.do_eval: evaler = MyIEEvaler(MyIEEvalerConf()) result = evaler.eval(all_docs, all_docs) Helper.printd(result) zlog("The end.")
def main(): utils.init("zlog", 1234) z = StatRecorder(True) times = Random.randint(100) for _ in range(times): with z.go(): z.record_kv("distr_n", Random.randint(10)) Helper.printd(z.summary(), "\n") # cc = Conf0() cc.update_from_args(["a:10", "y:www", "z.x:1"]) pass
def yield_data(self, files): # if not isinstance(files, (list, tuple)): files = [files] # cur_num = 0 for f in files: cur_num += 1 zlog("-----\nDataReader: [#%d] Start reading file %s." % (cur_num, f)) with zopen(f) as fd: for z in self._yield_tokens(fd): yield z if cur_num % self.report_freq == 0: zlog("** DataReader: [#%d] Summary till now:" % cur_num) Helper.printd(self.stats) zlog("=====\nDataReader: End reading ALL (#%d) ==> Summary ALL:" % cur_num) Helper.printd(self.stats)
def main(args): conf, model, vpack, test_iter = prepare_test(args, AnalyzeConf) # make sure the model is order 1 graph model, otherwise cannot run through assert isinstance(model, G1Parser) and isinstance(conf.pconf, G1ParserConf) # ===== # helpers all_stater = StatRecorder(False) def _stat(k, v): all_stater.record_kv(k, v) # check agreement def _agree2(a, b, name): agreement = (np.asarray(a) == np.asarray(b)) num_agree = int(agreement.sum()) _stat(name, num_agree) # do not care about efficiency here! step2_pack = [] for cur_insts in test_iter: # score and prune valid_mask, arc_score, label_score, mask_expr, marginals = model.prune_on_batch( cur_insts, conf.zprune) # greedy on raw scores greedy_label_scores, greedy_label_mat_idxes = label_score.max( -1) # [*, m, h] greedy_all_scores, greedy_arc_idxes = (arc_score + greedy_label_scores).max( -1) # [*, m] greedy_label_idxes = greedy_label_mat_idxes.gather( -1, greedy_arc_idxes.unsqueeze(-1)).squeeze(-1) # [*, m] # greedy on marginals (arc only) greedy_marg_arc_scores, greedy_marg_arc_idxes = marginals.max( -1) # [*, m] entropy_marg = -(marginals * (marginals + 1e-10 * (marginals == 0.).float()).log()).sum(-1) # [*, m] # decode model.inference_on_batch(cur_insts) # ===== z = ZObject() keys = list(locals().keys()) for k in keys: v = locals()[k] try: setattr(z, k, v.cpu().detach().numpy()) except: pass # ===== for idx in range(len(cur_insts)): one_inst: ParseInstance = cur_insts[idx] one_len = len(one_inst) + 1 # [1, len) _stat("all_edges", one_len - 1) arc_gold = one_inst.heads.vals[1:] arc_mst = one_inst.pred_heads.vals[1:] arc_gma = z.greedy_marg_arc_idxes[idx][1:one_len] # step 1: decoding agreement, how many edges agree: gold, mst-decode, greedy-marginal arcs = {"gold": arc_gold, "mst": arc_mst, "gma": arc_gma} cmp_keys = sorted(arcs.keys()) for i in range(len(cmp_keys)): for j in range(i + 1, len(cmp_keys)): n1, n2 = cmp_keys[i], cmp_keys[j] _agree2(arcs[n1], arcs[n2], f"{n1}_{n2}") # step 2: confidence arc_agree = (np.asarray(arc_gold) == np.asarray(arc_mst)) arc_marginals_mst = z.marginals[idx][range(1, one_len), arc_mst] arc_marginals_gold = z.marginals[idx][range(1, one_len), arc_gold] arc_entropy = z.entropy_marg[idx][1:one_len] for tidx in range(one_len - 1): step2_pack.append([ int(arc_agree[tidx]), min(1., float(arc_marginals_mst[tidx])), min(1., float(arc_marginals_gold[tidx])), float(arc_entropy[tidx]) ]) # step 2: bucket by marginals if True: NUM_BUCKET = 10 df = pd.DataFrame(step2_pack, columns=['agree', 'm_mst', 'm_gold', 'entropy']) z = df.sort_values(by='m_mst', ascending=False) z.to_csv('res.csv') for cur_b in range(NUM_BUCKET): interval = 1. / NUM_BUCKET r0, r1 = cur_b * interval, (cur_b + 1) * interval cur_v = df[(df.m_mst >= r0) & ((df.m_mst < r1))] zlog(f"#===== [{r0}, {r1}): {cur_v.shape}\n" + str(cur_v.describe())) # ===== d = all_stater.summary(get_v=False, get_str=True) Helper.printd(d, "\n\n")
def _run_end(self): x = self.test_recorder.summary() res = self.res_manager.end() x.update(res) Helper.printd(x, sep=" ") return x
def main_loop(conf: SDBasicConf, sp: SentProcessor): np.seterr(all='raise') nn_init(conf.niconf) np.random.seed(conf.rand_seed) records = defaultdict(int) # # will trigger error otherwise, save time of loading model featurer = None if conf.already_pre_computed else Featurer(conf.fconf) output_pic_fd = zopen(conf.output_pic, 'wb') if conf.output_pic else None all_insts = [] vocab = Vocab.read(conf.vocab_file) if conf.vocab_file else None unk_repl_upos_set = set(conf.unk_repl_upos) with BK.no_grad_env(): input_stream = yield_data(conf.input_file) if conf.rand_input: inputs = list(input_stream) np.random.shuffle(inputs) input_stream = inputs for one_inst in input_stream: # ----- # make sure the results are the same; to check whether we mistakenly use gold in that jumble of analysis if conf.debug_no_gold: one_inst.heads.vals = [0] * len(one_inst.heads.vals) if len(one_inst.heads.vals) > 2: one_inst.heads.vals[2] = 1 # avoid err in certain analysis one_inst.labels.vals = ["_"] * len(one_inst.labels.vals) # ----- if len(one_inst) >= conf.min_len and len(one_inst) <= conf.max_len: folded_distances = one_inst.extra_features.get("sd2_scores") if folded_distances is None: if conf.fake_scores: one_inst.extra_features["sd2_scores"] = np.zeros( featurer.output_shape(len( one_inst.words.vals[1:]))) else: # ===== replace certain words? word_seq = one_inst.words.vals[1:] upos_seq = one_inst.poses.vals[1:] if conf.unk_repl_thresh > 0: word_seq = [ (conf.unk_repl_token if (u in unk_repl_upos_set and vocab.getval(w, 0) <= conf.unk_repl_thresh) else w) for w, u in zip(word_seq, upos_seq) ] if conf.unk_repl_split_thresh < 10: berter_toker = featurer.berter.tokenizer word_seq = [ conf.unk_repl_token if (u in unk_repl_upos_set and len(berter_toker.tokenize(w)) > conf.unk_repl_split_thresh) else w for w, u in zip(word_seq, upos_seq) ] # ===== auto repl by bert? sent_repls = [word_seq] sent_fixed = [np.zeros(len(word_seq)).astype(np.bool)] for _ in range(conf.sent_repl_times): new_sent, new_fixed = featurer.repl_sent( sent_repls[-1], sent_fixed[-1]) sent_repls.append(new_sent) sent_fixed.append( new_fixed) # once fixed, always fixed one_inst.extra_features["sd3_repls"] = sent_repls one_inst.extra_features["sd3_fixed"] = sent_fixed # ===== score folded_distances = featurer.get_scores(sent_repls[-1]) assert len(sent_repls[-1]) == len(word_seq) # --- records["repl_count"] += len(word_seq) records["repl_repl"] += sum( a != b for a, b in zip(sent_repls[-1], word_seq)) # --- one_inst.extra_features[ "sd2_scores"] = folded_distances one_inst.extra_features["feat_seq"] = word_seq if output_pic_fd is not None: pickle.dump(one_inst, output_pic_fd) if conf.processing: one_info = sp.test_one_sent(one_inst) # put prediction one_inst.pred_heads.set_vals([0] + list(one_info["output"][0])) one_inst.pred_labels.set_vals(["_"] * len(one_inst.labels.vals)) # phrase_tree_string = one_info.get("phrase_tree") if phrase_tree_string is not None: one_inst.extra_pred_misc[ "phrase_tree"] = phrase_tree_string all_insts.append(one_inst) if output_pic_fd is not None: output_pic_fd.close() if conf.output_file: with zopen(conf.output_file, 'w') as wfd: data_writer = get_data_writer(wfd, "conllu") data_writer.write(all_insts) if conf.output_file_ptree: with zopen(conf.output_file_ptree, 'w') as wfd: for one_inst in all_insts: phrase_tree_string = one_inst.extra_pred_misc.get( "phrase_tree") wfd.write(str(phrase_tree_string) + "\n") # ----- Helper.printd(records) Helper.printd(sp.summary())
def main(args): conf, model, vpack, test_iter = prepare_test(args, SDConf) # make sure the model is order 1 graph model, otherwise cannot run through assert isinstance(model, G1Parser) and isinstance(conf.pconf, G1ParserConf) # ===== # helpers all_stater = StatRecorder(False) def _stat(k, v): all_stater.record_kv(k, v) # ===== # explicitly doing decoding here if conf.smodel: zlog(f"Load StatModel from {conf.smodel}") smodel: StatModel = load_model(conf.smodel) else: zlog(f"Blank model for debug") dummy_vocab = StatVocab() dummy_vocab.sort_and_cut() smodel: StatModel = StatModel(StatConf([]), dummy_vocab) aconf = conf.aconf # other options apply_pruning = conf.apply_pruning combine_marginals = conf.combine_marginals all_insts = [] for cur_insts in test_iter: # score and prune valid_mask, arc_score, label_score, mask_expr, marginals = model.prune_on_batch( cur_insts, conf.zprune) # only modifying arc score! valid_mask_arr = BK.get_value(valid_mask) # [bs, slen, slen] arc_score_arr = BK.get_value(arc_score) # [bs, slen, slen] label_score_arr = BK.get_value(label_score) # [bs, slen, slen, L] marginals_arr = BK.get_value(marginals) # [bs, slen, slen] # for each inst for one_idx, one_inst in enumerate(cur_insts): tokens = one_inst.words.vals if smodel.lc: tokens = [str.lower(z) for z in tokens] cur_len = len(tokens) cur_arange = np.arange(cur_len) # get current things: [slen, slen] one_valid_mask_arr = valid_mask_arr[one_idx, :cur_len, :cur_len] one_arc_score_arr = arc_score_arr[one_idx, :cur_len, :cur_len] one_label_score_arr = label_score_arr[one_idx, :cur_len, :cur_len] one_marginals_arr = marginals_arr[one_idx, :cur_len, :cur_len] # get scores from smodel one_sd_scores = smodel.apply_sent(tokens, aconf) # [slen, slen] if apply_pruning: one_sd_scores *= one_valid_mask_arr # TODO(WARN): 0 or -inf? orig_arc_score = (one_marginals_arr if combine_marginals else one_arc_score_arr) final_arc_score = one_sd_scores + orig_arc_score # first decoding with arc scores mst_heads_arr, _, _ = mst_unproj(np.expand_dims(final_arc_score, axis=0), np.array([cur_len], dtype=np.int32), labeled=False) mst_heads_arr = mst_heads_arr[0] # [slen] # then get each one's argmax label argmax_label_arr = one_label_score_arr[cur_arange, mst_heads_arr].argmax( -1) # [slen] # put in the results one_inst.pred_heads.set_vals( mst_heads_arr) # directly int-val for heads one_inst.pred_labels.build_vals(argmax_label_arr, model.label_vocab) # extra output one_inst.extra_pred_misc["orig_score"] = orig_arc_score[ cur_arange, mst_heads_arr].tolist() one_inst.extra_pred_misc["sd_score"] = one_sd_scores[ cur_arange, mst_heads_arr].tolist() # ===== # special analyzing with the results and the gold (only for analyzing) gold_heads = one_inst.heads.vals _stat("num_sent", 1) _stat("num_token", (cur_len - 1)) _stat("num_pairs", (cur_len - 1) * (cur_len - 1)) _stat("num_pairs_valid", one_valid_mask_arr.sum() ) # remaining ones after the pruning (pruning rate) _stat("num_gold_valid", one_valid_mask_arr[cur_arange, gold_heads][1:].sum()) # pruning coverage # about the sd scores _stat("num_sd_nonzero", (one_sd_scores > 0.).sum()) _stat("num_sd_correct", (one_sd_scores > 0.)[cur_arange, gold_heads][1:].sum()) # ===== all_insts.extend(cur_insts) # ===== # write and eval # sorting by idx of reading all_insts.sort(key=lambda x: x.inst_idx) # write dconf = conf.dconf if dconf.output_file: with zopen(dconf.output_file, "w") as fd: data_writer = get_data_writer(fd, dconf.output_format) data_writer.write(all_insts) # eval evaler = ParserEvaler() eval_arg_names = [ "poses", "heads", "labels", "pred_poses", "pred_heads", "pred_labels" ] for one_inst in all_insts: # todo(warn): exclude the ROOT symbol; the model should assign pred_* real_values = one_inst.get_real_values_select(eval_arg_names) evaler.eval_one(*real_values) report_str, res = evaler.summary() zlog(report_str, func="result") zlog("zzzzztest: testing result is " + str(res)) # ===== d = all_stater.summary(get_v=True, get_str=False) d["z_prune_rate"] = d["num_pairs_valid"] / d["num_pairs"] d["z_prune_coverage"] = d["num_gold_valid"] / d["num_token"] d["z_sd_precision"] = d["num_sd_correct"] / d["num_sd_nonzero"] d["z_sd_recall"] = d["num_sd_correct"] / d["num_token"] Helper.printd(d, "\n\n")