def generate_robust_all_seg_for_predict(): max_seq_length = int(sys.argv[1]) encoder = AllSegmentAsDoc(max_seq_length) worker_factory = partial( RobustWorker, RobustPredictGen(encoder, max_seq_length, 100, "desc")) runner = JobRunner(job_man_dir, 4, "robust_predict_desc_{}".format(max_seq_length), worker_factory) runner.auto_runner()
def main(): max_seq_length = int(sys.argv[1]) encoder = AllSegmentAsDoc(max_seq_length) worker_factory = partial( RobustWorkerWDataID, RobustTrainGenWDataID(encoder, max_seq_length, "desc")) runner = JobRunner(job_man_dir, 4, "robust_w_data_id_desc_{}".format(max_seq_length), worker_factory) runner.auto_runner()
def run(args): tprint("msmarco run") hp = Hyperparam() nli_setting = ExTrainConfig() def worker_factory(out_dir): worker = PredictWorker(args.input_dir, out_dir) worker.load_model(hp, nli_setting, args.model_path, "co") return worker runner = JobRunner(args.save_dir, 696, "pc_tfrecord_ex", worker_factory) runner.auto_runner()
def main(config): q_res_path = config['q_res_path'] split = config['split'] query_d: Dict[int, str] = get_all_claim_d() def worker_gen(out_dir): qkgen = QKGenFromDB(q_res_path, query_d, out_dir) return qkgen num_jobs = d_n_claims_per_split2[split] runner = JobRunner(job_man_dir, num_jobs, config['job_name'], worker_gen) runner.auto_runner()
def generate_robust_sero_for_train(): window_size = int(sys.argv[1]) n_window = int(sys.argv[2]) total_sequence_length = window_size * n_window src_window_size = window_size encoder = MultiWindow(src_window_size, total_sequence_length) worker_factory = partial( RobustWorker, RobustPointwiseTrainGenEx(encoder, total_sequence_length, "desc")) runner = JobRunner(job_man_dir, 4, "RobustSero_{}_{}".format(window_size, n_window), worker_factory) runner.auto_runner()
def main(config): def get_worker(out_dir): writer = Writer(max_seq_length=config['max_seq_length'], reverse=config['reverse']) return KDPParaWorker(config, writer, out_dir) q_res_path = config['q_res_path'] ranked_list: Dict[ str, List[SimpleRankedListEntry]] = load_galago_ranked_list(q_res_path) num_job = len(ranked_list) - 1 runner = JobRunner(job_man_dir, num_job, config['job_name'], get_worker) runner.auto_runner()
def start_generate_jobs_for_sub_split(generator: InstanceGenerator, qk_candidate_name, name_prefix, sub_split): # claim ids split to train/val print("Loading data ....") claims = load_claims_for_sub_split(sub_split) cids = {str(t['cId']) for t in claims} qk_candidate: List[QKUnit] = load_from_pickle(qk_candidate_name) print("Generate instances : ", sub_split) qk_candidate: List[QKUnit] = list( [qk for qk in qk_candidate if qk[0].query_id in cids]) def worker_factory(out_dir): return QCKWorker(qk_candidate, generator, out_dir) num_jobs = d_n_claims_per_subsplit[sub_split] runner = JobRunner(job_man_dir, num_jobs, name_prefix + "_" + sub_split, worker_factory) runner.auto_runner()
def main(config): jsonl_path = config['jsonl_path'] job_name = config['job_name'] num_jobs = config['num_jobs'] runner = JobRunner(job_man_dir, num_jobs, job_name, lambda out_dir: JsonlWorker2(jsonl_path, out_dir)) runner.auto_runner()