Esempio n. 1
0
def generate_robust_all_seg_for_predict():
    max_seq_length = int(sys.argv[1])
    encoder = AllSegmentAsDoc(max_seq_length)
    worker_factory = partial(
        RobustWorker, RobustPredictGen(encoder, max_seq_length, 100, "desc"))
    runner = JobRunner(job_man_dir, 4,
                       "robust_predict_desc_{}".format(max_seq_length),
                       worker_factory)
    runner.auto_runner()
Esempio n. 2
0
def main():
    max_seq_length = int(sys.argv[1])
    encoder = AllSegmentAsDoc(max_seq_length)
    worker_factory = partial(
        RobustWorkerWDataID,
        RobustTrainGenWDataID(encoder, max_seq_length, "desc"))
    runner = JobRunner(job_man_dir, 4,
                       "robust_w_data_id_desc_{}".format(max_seq_length),
                       worker_factory)
    runner.auto_runner()
Esempio n. 3
0
def run(args):
    tprint("msmarco run")
    hp = Hyperparam()
    nli_setting = ExTrainConfig()

    def worker_factory(out_dir):
        worker = PredictWorker(args.input_dir, out_dir)
        worker.load_model(hp, nli_setting, args.model_path, "co")
        return worker

    runner = JobRunner(args.save_dir, 696, "pc_tfrecord_ex", worker_factory)
    runner.auto_runner()
Esempio n. 4
0
def main(config):
    q_res_path = config['q_res_path']
    split = config['split']
    query_d: Dict[int, str] = get_all_claim_d()

    def worker_gen(out_dir):
        qkgen = QKGenFromDB(q_res_path, query_d, out_dir)
        return qkgen

    num_jobs = d_n_claims_per_split2[split]
    runner = JobRunner(job_man_dir, num_jobs, config['job_name'], worker_gen)
    runner.auto_runner()
Esempio n. 5
0
def generate_robust_sero_for_train():
    window_size = int(sys.argv[1])
    n_window = int(sys.argv[2])
    total_sequence_length = window_size * n_window
    src_window_size = window_size
    encoder = MultiWindow(src_window_size, total_sequence_length)
    worker_factory = partial(
        RobustWorker,
        RobustPointwiseTrainGenEx(encoder, total_sequence_length, "desc"))
    runner = JobRunner(job_man_dir, 4,
                       "RobustSero_{}_{}".format(window_size,
                                                 n_window), worker_factory)
    runner.auto_runner()
Esempio n. 6
0
def main(config):
    def get_worker(out_dir):
        writer = Writer(max_seq_length=config['max_seq_length'],
                        reverse=config['reverse'])
        return KDPParaWorker(config, writer, out_dir)

    q_res_path = config['q_res_path']
    ranked_list: Dict[
        str, List[SimpleRankedListEntry]] = load_galago_ranked_list(q_res_path)
    num_job = len(ranked_list) - 1

    runner = JobRunner(job_man_dir, num_job, config['job_name'], get_worker)
    runner.auto_runner()
Esempio n. 7
0
def start_generate_jobs_for_sub_split(generator: InstanceGenerator,
                                      qk_candidate_name, name_prefix,
                                      sub_split):
    # claim ids split to train/val
    print("Loading data ....")
    claims = load_claims_for_sub_split(sub_split)
    cids = {str(t['cId']) for t in claims}
    qk_candidate: List[QKUnit] = load_from_pickle(qk_candidate_name)
    print("Generate instances : ", sub_split)
    qk_candidate: List[QKUnit] = list(
        [qk for qk in qk_candidate if qk[0].query_id in cids])

    def worker_factory(out_dir):
        return QCKWorker(qk_candidate, generator, out_dir)

    num_jobs = d_n_claims_per_subsplit[sub_split]
    runner = JobRunner(job_man_dir, num_jobs, name_prefix + "_" + sub_split,
                       worker_factory)
    runner.auto_runner()
Esempio n. 8
0
def main(config):
    jsonl_path = config['jsonl_path']
    job_name = config['job_name']
    num_jobs = config['num_jobs']
    runner = JobRunner(job_man_dir, num_jobs, job_name, lambda out_dir: JsonlWorker2(jsonl_path, out_dir))
    runner.auto_runner()