Esempio n. 1
0
 def execute(self, env_prefix: str):
     conf = self.conf
     # --
     dir_name = self.dir_name
     system(f"mkdir -p {dir_name}")
     # train
     system(
         f"cd {dir_name}; {env_prefix} PYTHONPATH=../src:../../src/:../../../src python3 -m msp2.tasks.{conf.task_name}.main.train {conf.conf_infile} {self.arg_str} conf_output:{conf.conf_outfile} >_log_train 2>&1;"
     )
     rr = self._read_results(dir_name + "/_log_train")
     # test
     # system(f"cd {dir_name}; {env_prefix} python3 PYTHONPATH=../../src/ python3 -m msp2.tasks.zsfp.main.test {conf.conf_outfile} device:0 {self.arg_str} >_log_test 2>&1")
     return rr
Esempio n. 2
0
def get_eval_script():
    this_dir = os.path.dirname(os.path.abspath(__file__))
    srl_eval_script = os.path.join(this_dir, "srl-eval.pl")
    if not os.path.exists(srl_eval_script):
        srl_eval_gz_file = os.path.join(this_dir, "srl-eval.pl.gz")
        if os.path.isfile(srl_eval_gz_file):
            # decompress
            system(f"gzip -c -d {srl_eval_gz_file} >{srl_eval_script}",
                   ass=True)
        else:
            raise RuntimeError("Cannot find srl_eval!!")
    # --
    return srl_eval_script
Esempio n. 3
0
 def run(self):
     conf = self.conf
     # --
     train_base_opt = self.get_train_base_opt()
     test_base_opt = self.get_test_base_opt()
     _L_PRE = conf.log_prefix
     DEBUG_OPTION = "-m pdb" if conf.debug else ""
     PRINT_OPTION = "log_stderr:0" if conf.quite else ""
     DEVICE_OPTION = "nn.device:0" if self._get_gpus_from_env_prefix(
         conf._env_prefix) else ""
     # --
     # special mode for convenience
     if conf.do_test2:
         # test without logging?
         TEST2_CMD = f"cd {conf.run_dir}; {conf._env_prefix} PYTHONPATH={conf.src_dir}:$PYTHONPATH python3 {DEBUG_OPTION} -m {conf._module}.test {conf.conf_output} {test_base_opt} log_file: log_stderr:1 {DEVICE_OPTION} {conf.test_extras} {conf._test_extras}"
         system(TEST2_CMD, pp=(not conf.quite))
     else:
         # train?
         TRAIN_CMD = f"cd {conf.run_dir}; {conf._env_prefix} PYTHONPATH={conf.src_dir}:$PYTHONPATH python3 {conf.train_pyopts} {DEBUG_OPTION} -m {conf._module}.train {conf.conf_input} {train_base_opt} log_file:{_L_PRE}_train {PRINT_OPTION} {DEVICE_OPTION} conf_output:{conf.conf_output} {conf.train_extras} {conf._train_extras}"
         if conf.do_train:
             system(TRAIN_CMD, pp=(not conf.quite))
         # test?
         TEST_CMD = f"cd {conf.run_dir}; {conf._env_prefix} PYTHONPATH={conf.src_dir}:$PYTHONPATH python3 {DEBUG_OPTION} -m {conf._module}.test {conf.conf_output} {test_base_opt} log_file:{_L_PRE}_test {PRINT_OPTION} {DEVICE_OPTION} {conf.test_extras} {conf._test_extras}"
         if conf.do_test:
             system(TEST_CMD, pp=(not conf.quite))
         # test-all?
         if conf.do_test_all:
             for extras in self.get_all_dt_opts():
                 # note: mainly input/output/log
                 _TMP_CMD = TEST_CMD + f" {extras}"
                 system(_TMP_CMD, pp=True)
Esempio n. 4
0
 def get_result(self):
     output = system(
         f"cat {self.conf.run_dir}/_log_train | grep zzzzzfinal",
         popen=True)
     result_res, result_dict = re.search("\"Result\(([0-9.]+)\): (.*)\"",
                                         output).groups()
     result_res, result_dict = eval(result_res), eval(result_dict)
     return MyResult(result_res, result_dict)
Esempio n. 5
0
 def annotate(self, insts: List):
     conf: AnnotatorP2DConf = self.conf
     # --
     # get all sentences and run in batch
     all_sents = list(yield_sents(insts))
     tmp_input = os.path.join(conf.p2d_tmp_dir, "_input.penn")
     with zopen(tmp_input, 'w') as fd:
         for sent in all_sents:
             fd.write(sent2tree(sent) + "\n")
     # run
     tmp_output = os.path.join(conf.p2d_tmp_dir, "_output.conllu")
     log_cmd = f'2>{conf.p2d_log}' if conf.p2d_log else ''
     system(f"{self.cmd} -treeFile {tmp_input} >{tmp_output} {log_cmd}")
     # read output and add back
     conll_reader_conf = ReaderGetterConf()
     conll_reader_conf.input_conf.use_multiline = True
     conll_reader_conf.input_conf.mtl_ignore_f = "'ignore_#'"
     conll_reader_conf.input_format = "conllu"
     conll_reader_conf.input_path = tmp_output
     conll_reader = get_reader(conll_reader_conf)
     new_sents = list(conll_reader)
     # --
     assert len(all_sents) == len(new_sents)
     for s0, s1 in zip(all_sents, new_sents):
         assert len(s0) == len(s1)
         mismatched_tokens = [
             (v1, v2) for v1, v2 in zip(s0.seq_word.vals, s1.seq_word.vals)
             if v1 != v2
         ]
         if len(mismatched_tokens) > 0:
             zwarn(
                 f"Mismatch token NUM={len(mismatched_tokens)}: {mismatched_tokens}"
             )
             if conf.p2d_change_words:
                 s0.build_words(s1.seq_word.vals)  # use the other one!!
             # breakpoint()
         # note: build again!
         s0.build_dep_tree(s1.tree_dep.seq_head.vals, [
             self.p2d_udp_converter(z) for z in s1.tree_dep.seq_label.vals
         ])
         if conf.p2d_use_xpos:
             trg_pos_list = s1.info.get["xpos"]
         else:
             trg_pos_list = s1.seq_upos.vals
         s0.build_uposes([self.p2d_upos_converter(z) for z in trg_pos_list])
Esempio n. 6
0
 def eval(self, gold_insts: List, pred_insts: List):
     # --
     conf: PbEvalConf = self.conf
     tmp_gold, tmp_pred = conf.tmp_file_prefix + ".gold.props", conf.tmp_file_prefix + ".pred.props"
     with zopen(tmp_gold, 'w') as fd:
         fd.write(insts2props(gold_insts))
     with zopen(tmp_pred, 'w') as fd:
         fd.write(insts2props(pred_insts))
     # --
     precision_output = system(
         f"{self.cmd} {tmp_pred} {tmp_gold} 2>/dev/null",
         ass=True,
         popen=True)
     recall_output = system(f"{self.cmd} {tmp_gold} {tmp_pred} 2>/dev/null",
                            ass=True,
                            popen=True)
     # parse output
     res = PbEvalResult(precision_output, recall_output)
     return res
Esempio n. 7
0
def run(aa):
    fin, args = aa
    # --
    fout = eval(args.output_pattern)(fin)
    CMD = args.cmd
    CMD = CMD.replace("[IN]", fin)
    CMD = CMD.replace("[OUT]", fout)
    with _global_lock:  # get resource
        cur_idx = list(global_resources.keys())[0]
        del global_resources[cur_idx]
    try:
        CMD = CMD.replace("[IDX]", str(cur_idx))
        system(CMD, pp=True)
    except:
        import traceback
        traceback.print_exc()
        raise RuntimeError()
    finally:  # put resource back
        with _global_lock:
            global_resources[cur_idx] = 1
Esempio n. 8
0
def prepare_from_treebanks(wset: str, section: str):
    system(f"mkdir -p {CONLL05}/{wset}/words")
    system(f"mkdir -p {CONLL05}/{wset}/synt")
    # --
    if "brown" in wset:
        assert section is None
        trees = [f"{PTB3}/PARSED/MRG/BROWN/CK/CK0{z}.MRG" for z in "123"]
        _tmp_in_cmd = " | grep -v '^\\*x\\*' "
        sec_infix = ""
    else:
        trees = sorted(zglob(f"{PTB3}/PARSED/MRG/WSJ/{section}/*.MRG"))
        _tmp_in_cmd = ""
        sec_infix = f".{section}" if "test" not in wset else ""
    # --
    # remove files if existing!
    system(f"mkdir -p {CONLL05}/{wset}/docids/")
    f_word, f_docid, f_synt = f"{CONLL05}/{wset}/words/{wset}{sec_infix}.words.gz", \
                              f"{CONLL05}/{wset}/docids/{wset}{sec_infix}.docids.gz", \
                              f"{CONLL05}/{wset}/synt/{wset}{sec_infix}.synt.gz"
    for f in [f_word, f_docid, f_synt]:
        if os.path.exists(f):
            os.remove(f)
    # add all
    for one_tree in trees:
        one_doc_id = one_tree.split("/")[-1].split(".")[0]
        my_system(
            f"cat {one_tree} {_tmp_in_cmd} | {GO_PERL} {SRLCONLL}/bin/wsj-removetraces.pl | {GO_PERL} {SRLCONLL}/bin/wsj-to-se.pl -w 1 | awk '{{print $1}}' | gzip >>{f_word}"
        )
        my_system(
            f"cat {one_tree} {_tmp_in_cmd} | {GO_PERL} {SRLCONLL}/bin/wsj-removetraces.pl | {GO_PERL} {SRLCONLL}/bin/wsj-to-se.pl -w 1 | awk '{{print $1}}' | sed 's/^.\\+$/{one_doc_id}/' | gzip >>{f_docid}"
        )
        my_system(
            f"cat {one_tree} {_tmp_in_cmd} | {GO_PERL} {SRLCONLL}/bin/wsj-removetraces.pl | {GO_PERL} {SRLCONLL}/bin/wsj-to-se.pl -w 0 -p 1 | gzip >>{f_synt}"
        )
Esempio n. 9
0
 def annotate(self, insts: List[DataInstance]):
     conf: AnnotatorSemaforConf = self.conf
     # --
     # get all sentences and run in batch
     all_sents = list(yield_sents(insts))
     # run all in batch
     # step 1: prepare input
     tmp_input = os.path.join(f"{conf.semafor_tmp_dir}", "_input.txt")
     tmp_input = os.path.abspath(tmp_input)  # require absolute path
     tmp_input = self.delete_and_get_file(tmp_input)
     with zopen(tmp_input, 'w') as fd:
         for sent in all_sents:  # write one line per sent
             fd.write(" ".join(sent.seq_word.vals) + "\n")
     # step 2: run semafor
     tmp_output = os.path.join(f"{conf.semafor_tmp_dir}", "_output.json")
     tmp_output = os.path.abspath(tmp_output)  # require absolute path
     tmp_output = self.delete_and_get_file(
         tmp_output, delete=(not conf.semafor_use_cached))
     if not conf.semafor_use_cached:  # otherwise simply skip running
         _semafor_log = conf.semafor_log if conf.semafor_log else "/dev/null"  # append to log!
         system(
             f"bash {self.semafor_sh} {tmp_input} {tmp_output} {conf.semafor_num_threads} >>{_semafor_log} 2>&1",
             ass=True)
     # step 3: read output and put them in sents
     semafor_results = default_json_serializer.load_list(tmp_output)
     assert len(semafor_results) == len(
         all_sents), "Error: predict inst number mismatch!"
     for one_res, one_sent in zip(semafor_results, all_sents):
         one_semafor_sent: Sent = SemaforHelper.semafor2sent(one_res)
         one_idx_map = SemaforHelper.find_sent_map(one_semafor_sent,
                                                   one_sent)
         # put them back
         one_sent.clear_events()
         one_sent.clear_entity_fillers()
         # add them all
         for evt in one_semafor_sent.events:
             evt_widx, evt_wlen = evt.mention.widx, evt.mention.wlen
             mapped_posi = SemaforHelper.map_span(evt_widx, evt_wlen,
                                                  one_idx_map)
             if mapped_posi is None:
                 zwarn(
                     f"Failed mapping evt of {evt}: {evt.mention} to {one_sent.seq_word}"
                 )
                 continue
             evt2 = one_sent.make_event(mapped_posi[0],
                                        mapped_posi[1],
                                        type=evt.type)
             for alink in evt.args:
                 ef = alink.arg
                 ef_widx, ef_wlen = ef.mention.widx, ef.mention.wlen
                 mapped_posi = SemaforHelper.map_span(
                     ef_widx, ef_wlen, one_idx_map)
                 if mapped_posi is None:
                     zwarn(
                         f"Failed mapping arg of {alink}: {ef.mention} to {one_sent.seq_word}"
                     )
                     continue
                 ef2 = one_sent.make_entity_filler(
                     mapped_posi[0],
                     mapped_posi[1])  # make new ef for each arg
                 evt2.add_arg(ef2, role=alink.role)
     # --
     self.count += 1
Esempio n. 10
0
 def _read_results(self, file):
     output = system(f"cat {file} | grep zzzzzfinal", popen=True)
     result_res, result_dict = re.search("Result\(([0-9.]+)\): (\{.*\})",
                                         output).groups()
     result_res, result_dict = eval(result_res), eval(result_dict)
     return MyResult(result_res, result_dict)
Esempio n. 11
0
File: go.py Progetto: zzsfornlp/zmsp
def main(*args):
    conf: RunConf = init_everything(RunConf(),
                                    args,
                                    add_utils=False,
                                    add_nn=False)
    # =====
    # get paths
    RUN_DIR = conf.run_dir
    if RUN_DIR:
        mkdir_p(RUN_DIR, raise_error=True)
        os.chdir(RUN_DIR)  # change to it!!
    SRC_DIR = zglob1(conf.src_dir, check_prefix="..", check_iter=10)
    VOC_DIR = zglob1(conf.voc_dir, check_prefix="..", check_iter=10)
    DATA_DIR = zglob1(conf.dataset.data_dir, check_prefix="..", check_iter=10)
    zlog(
        f"RUN with RUN={RUN_DIR}, SRC={SRC_DIR}, VOC={VOC_DIR}, DATA={DATA_DIR}"
    )
    # =====
    # modes
    dataset_choice = conf.dataset._choice
    is_pb, is_fn = [dataset_choice.startswith(z) for z in ["pb", "fn"]]
    assert is_pb or is_fn
    # =====
    # options
    # --
    # base ones
    base_opt = "conf_output:_conf"
    # eval
    if is_pb:
        base_opt += f" eval_conf:pb"
    elif is_fn:
        base_opt += f" dict_frame_file:{DATA_DIR}/{conf.dataset.frame_file}"
        base_opt += f" eval_conf:fn eval_conf.frame_file:{DATA_DIR}/{conf.dataset.frame_file}"  # eval
    # --
    # =====
    # modeling
    if conf.use_word_input:
        base_opt += " ec_word.dim:300 ec_word.drop_rate:0.2 ec_word.init_from_pretrain:1 ec_word.rare_unk_thr:2"  # word
    # base_opt += " ec_posi.dim:512"  # posi?
    # base_opt += " ec_char.dim:50 ec_char.init_scale:5."  # char?
    if conf.use_bert_input:
        base_opt += " ec_bert.dim:768 bert_model:bert-base-cased bert_output_layers:7,8,9"  # bert features?
    base_opt += " eproj_dim:512"  # --
    if conf.use_rel_posi:
        base_opt += " enc_conf.enc_att.n_layers:2 enc_conf.enc_att.use_posi:0 enc_conf.clip_dist:16"  # enc1
    else:
        base_opt += " enc_conf.enc_att.n_layers:2 enc_conf.enc_att.use_posi:1 enc_conf.clip_dist:0"  # enc1
    # base_opt += " enc_conf.enc_tatt.n_layers:2 enc_conf.enc_tatt.use_posi:1"  # enc1
    # base_opt += " enc_conf.enc_rnn.n_layers:1 enc_conf.enc_hidden:1024"  # enc1
    # --
    # frame
    base_opt += " loss_evt:0.5 pred_evt:1"  # with evts
    base_opt += " evt_conf.cand_label_smoothing:0.05 evt_conf.label_smoothing:0.1"  # label smooth
    base_opt += " evt_conf.lookup_conf.use_emb:0"  # no adding frame embeddings?
    base_opt += " evt_conf.span_conf.sconf.hid_nlayer:1"  # pred scorer?
    if conf.assume_frame:  # no need for the evt module!!
        base_opt += " loss_evt:0 pred_evt:0 eval_conf.weight_frame:0."
    elif conf.assume_trg:  # no need for cand, but still need to identify frame types
        base_opt += " evt_conf.loss_cand:0. evt_conf.loss_use_posi:1 evt_conf.pred_use_posi:1"  # use-posi for evt
        base_opt += " evt_conf.pred_addition_non_score:-100000."  # NEGINF-non
        if is_fn:  # further use cons for fn
            base_opt += f" evt_cons_lex_file:{VOC_DIR}/cons_lex_{dataset_choice}.json evt_conf.pred_use_cons:1 evt_conf.pred_use_lu:1 evt_conf.loss_use_cons:0 evt_conf.loss_use_lu:0"  # cons & use-lu for evt
    else:
        # evt_conf -> direct
        base_opt += " evt_conf.loss_cand:1.0 evt_conf.loss_lab:1.0"  # loss_cand
        base_opt += " evt_conf.span_train_sample_rate:1.0 evt_conf.span_topk_rate:1.0 evt_conf.span_train_sample:1"  # some rates
        # --
        if is_pb:  # lab is aux for pb
            base_opt += " evt_conf.loss_lab:0.5 evt_conf.pred_score_prune:0. evt_conf.pred_addition_non_score:-100000."
        elif is_fn:  # lab is essential for fn
            base_opt += " loss_evt:1 evt_conf.loss_cand:0.5 evt_conf.span_train_sample_rate:0.33 evt_conf.span_topk_rate:0.4 evt_conf.span_train_sample:1"
        # --
        if conf.no_frame_label:
            base_opt += " evt_conf.loss_lab:0. evt_conf.pred_score_prune:0. evt_conf.pred_addition_non_score:-100000."
    # --
    # arg
    base_opt += " arg_use_finput:0"
    base_opt += f" fenc_conf.enc_att.n_layers:8 fenc_conf.clip_dist:{16 if conf.use_rel_posi else 0}"  # fenc
    # base_opt += " fenc_conf.enc_tatt.n_layers:6"  # fenc
    # base_opt += " fenc_conf.enc_rnn.n_layers:3 fenc_conf.enc_hidden:1024"  # enc1
    base_opt += " loss_arg:1. pred_arg:1"  # with args
    base_opt += " arg_conf.label_smoothing:0.1"  # label smooth
    if conf.arg_mode in ["span", "head"]:
        # arg_conf -> direct
        base_opt += " arg_conf.loss_cand:0.5"  # loss_cand
        # base_opt+=" arg_conf.span_train_sample_rate:0.33 arg_conf.span_topk_rate:0.4"  # some rates
        base_opt += " arg_conf.span_topk_rate:1. arg_conf.span_topk_count:10 arg_conf.span_train_sample:0"  # some rates
        base_opt += " arg_conf.loss_weight_non:1."  # less penalizing this?
        base_opt += " arg_conf.pp_check_more:1"  # check non-overlapping
        if conf.arg_mode == "span":
            base_opt += " arg_conf.max_width:30 arg_conf.softhead_topk:5 arg_conf.pred_non_overlapping:1"  # span
        elif conf.arg_mode == "head":
            base_opt += " arg_conf.core_span_mode:shead arg_conf.max_width:1"  # head
            # extender
            base_opt += " arg_conf.loss_ext:0.5 arg_conf.pred_ext:1 arg_conf.ext_use_finput:0"
            base_opt += f" arg_conf.ext_conf.eenc_conf.enc_att.n_layers:1 arg_conf.ext_conf.eenc_conf.enc_att.aconf.clip_dist:{16 if conf.use_rel_posi else 0}"
        else:
            raise NotImplementedError()
    elif conf.arg_mode == "soft":
        base_opt += " arg_conf:soft"
        base_opt += " arg_conf.loss_ext:0.5 arg_conf.pred_ext:1 arg_conf.ext_use_finput:0"
        base_opt += f" arg_conf.ext_conf.eenc_conf.enc_att.n_layers:1 arg_conf.ext_conf.eenc_conf.enc_att.aconf.clip_dist:{16 if conf.use_rel_posi else 0}"
        base_opt += " arg_conf.pp_check_more:1"
    elif conf.arg_mode in ["anchor", "anchor2"]:
        base_opt += " arg_conf:anchor"
        if conf.arg_mode == "anchor2":  # yet another head mode!
            base_opt += " arg_conf.core_span_mode:shead"
        base_opt += " arg_conf.loss_ext:0.5 arg_conf.pred_ext:1 arg_conf.ext_use_finput:0"
        base_opt += f" arg_conf.ext_conf.eenc_conf.enc_att.n_layers:1 arg_conf.ext_conf.eenc_conf.enc_att.aconf.clip_dist:{16 if conf.use_rel_posi else 0}"
        base_opt += " arg_conf.pp_check_more:1"
    elif conf.arg_mode in ["seq", "seq0"]:
        # arg_conf -> seq
        base_opt += " arg_conf:seq arg_conf.seq_scheme:BIO"  # use seq mode!
        base_opt += " arg_conf.loss_weight_non:1."  # less penalizing this?
        # --
        if conf.arg_mode == "seq":
            base_opt += " arg_conf.beam_k:150 arg_conf.use_bigram:0 arg_conf.pred_use_seq_cons:1"  # viterbi with constraints
            if conf.arg_seq_mod == "crf":  # crf-mode
                base_opt += " arg_conf.loss_mode:crf arg_conf.use_bigram:1 arg_conf.local_normalize:0"
        elif conf.arg_mode == "seq0":  # greedy mode: no crf and no viterbi
            base_opt += " arg_conf.pred_use_seq_cons:0 arg_conf.loss_mode:mle arg_conf.use_bigram:0 arg_conf.local_normalize:1"
        else:
            raise NotImplementedError()
    else:
        raise NotImplementedError()
    # --
    # =====
    # training
    base_opt += " ema_decay:0. ema_update_step:1"  # ema
    if 1:
        UPE = 1000  # 1000 update as one epoch
        base_opt += " lrate.val:0.0002 anneal_times:10 anneal_patience:10 lrate.m:0.75"
        base_opt += f" valid_ufreq:{UPE} valid_epoch:0 max_uidx:{UPE*150} lrate_warmup_uidx:{8*UPE} lrate_decrease_alpha:0."
        if conf.use_rel_posi:
            base_opt += " train_count_mode:ftok train_batch_size:4096 accu_batch:1"  # actually bs=bs*accu
            base_opt += " test_count_mode:ftok test_batch_size:2048"
        else:
            base_opt += " train_count_mode:ftok train_batch_size:4096 accu_batch:1"  # actually bs=bs*accu
            base_opt += " test_count_mode:ftok test_batch_size:2048"
        base_opt += " df_hdrop:0.2"  # general dropout
    else:  # possibly for rnn
        base_opt += " lrate.val:0.002 anneal_times:10 anneal_patience:10"
        base_opt += " train_count_mode:frame max_eidx:100 train_batch_size:32"
        base_opt += " df_hdrop:0.33"  # general dropout
    if is_pb:
        base_opt += " train_skip_noevt_rate:0.0"
    elif is_fn:
        base_opt += " train_skip_noevt_rate:1.0"  # skip sents where no targets!
    # data
    base_opt += " " + conf.dataset.get_data_str(DATA_DIR, conf.do_ms_train)
    base_opt += f" pretrain_wv_file:{VOC_DIR}/hits_{dataset_choice}.vec pretrain_scale:10."  # filtered pretrain file
    # nn
    base_opt += f" nn.device:0 nn.random_seed:9347{conf.cur_run} nn.random_cuda_seed:9349{conf.cur_run}"
    # =====
    # note: base_opt is only for training!!
    _L_PRE = conf.log_prefix
    DEBUG_OPTION = "-m pdb" if conf.debug else ""
    TRAIN_CMD = f"CUDA_VISIBLE_DEVICES={conf.rgpu} PYTHONPATH={SRC_DIR}:$PYTHONPATH python3 {DEBUG_OPTION} -m msp2.tasks.zsfp.main.train {base_opt} log_file:{_L_PRE}_train {conf.train_extras}"
    # --
    TEST_CMD = f"CUDA_VISIBLE_DEVICES={conf.rgpu} PYTHONPATH={SRC_DIR}:$PYTHONPATH python3 {DEBUG_OPTION} -m msp2.tasks.zsfp.main.test {conf.conf_output} log_file:{_L_PRE}_test {conf.test_extras}"
    # --
    if conf.do_train:
        system(TRAIN_CMD, pp=True)
    # --
    if conf.do_test:
        system(TEST_CMD, pp=True)
    # --
    if conf.do_test_all:
        for tfile in conf.dataset.all_dt_files:
            _TMP_CMD = f"CUDA_VISIBLE_DEVICES={conf.rgpu} PYTHONPATH={SRC_DIR}:$PYTHONPATH python3 {DEBUG_OPTION} -m msp2.tasks.zsfp.main.test {conf.conf_output} test:{DATA_DIR}/{tfile} output:{conf.out_prefix}.{tfile} log_file:{_L_PRE}.{tfile} test_extra_pretrain_wv_files:{VOC_DIR}/hits_{dataset_choice}.vec {conf.test_extras}"
            system(_TMP_CMD, pp=True)
Esempio n. 12
0
#

# prepare data for conll05
# reference: https://github.com/strubell/preprocess-conll05

import sys
import os
from typing import List
from msp2.utils import system, zlog, zglob

# --
my_system = lambda *args, **kwargs: system(*args, **kwargs, pp=True)
# --

# first collect the data
"""
# -----
# step0: data & paths
wget https://www.cs.upc.edu/~srlconll/conll05st-release.tar.gz
tar -xzvf conll05st-release.tar.gz
wget https://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz
tar -xzvf conll05st-tests.tar.gz
wget https://www.cs.upc.edu/~srlconll/srlconll-1.1.tgz
tar -xzvf srlconll-1.1.tgz
# --
SRLCONLL="`pwd`/srlconll-1.1"
CONLL05="`pwd`/conll05st-release"
PTB3="`pwd`/TREEBANK_3"
export PERL5LIB=$SRLCONLL/lib:$PERL5LIB
"""