Beispiel #1
0
def rank2records_in_batch():
    interval = 10
    start = 20
    end = 150 + interval
    filter_var_range = range(start, end, interval)

    for filter_var in tqdm(filter_var_range):
        qa_rec_dn = ensemble_config.QA_RECORD_DIR_NAME_PATTERN.format(ensemble_config.MODEL_NAME,
                                                                      filter_var,
                                                                      ensemble_config.FILTER)
        qa_rec_dp = join(path_parser.summary_rank, qa_rec_dn)

        if exists(qa_rec_dp):
            raise ValueError('qa_rec_dp exists: {}'.format(qa_rec_dp))
        os.mkdir(qa_rec_dp)

        for cid in cids:
            retrieval_params = {
                'model_name': ensemble_config.MODEL_NAME,
                'cid': cid,
                'filter_var': filter_var,
                'filter': ensemble_config.FILTER,
                'deduplicate': None,
            }

            retrieved_items = ir_tools.retrieve(**retrieval_params)
            ir_tools.dump_retrieval(fp=join(qa_rec_dp, cid), retrieved_items=retrieved_items)
Beispiel #2
0
def qa_rank2records_in_batch():
    if qa_config.FILTER == 'conf':
        filter_var_range = np.arange(0.05, 1.05, 0.05)
    else:  # topK
        interval = 10
        if qa_config.ir_config.FILTER == 'topK':
            start = interval
            end = qa_config.ir_config.FILTER_VAR + interval
        else:
            start = 40
            end = 150 + interval
        filter_var_range = range(start, end, interval)

    for filter_var in tqdm(filter_var_range):
        qa_rec_dn = qa_config.QA_RECORD_DIR_NAME_PATTERN.format(
            qa_config.QA_MODEL_NAME_BERT, filter_var, qa_config.FILTER)
        qa_rec_dp = join(path_parser.summary_rank, qa_rec_dn)

        if exists(qa_rec_dp):
            raise ValueError('qa_rec_dp exists: {}'.format(qa_rec_dp))
        os.mkdir(qa_rec_dp)

        for cid in cids:
            retrieval_params = {
                'model_name': qa_config.QA_MODEL_NAME_BERT,
                'cid': cid,
                'filter_var': filter_var,
                'filter': qa_config.FILTER,
                'deduplicate': None,
            }

            retrieved_items = ir_tools.retrieve(**retrieval_params)
            ir_tools.dump_retrieval(fp=join(qa_rec_dp, cid),
                                    retrieved_items=retrieved_items)
Beispiel #3
0
def rank2records():
    rec_dp = join(path_parser.summary_rank, ensemble_config.QA_RECORD_DIR_NAME)

    if exists(rec_dp):
        raise ValueError('rec_dp exists: {}'.format(rec_dp))
    os.mkdir(rec_dp)

    for cid in tqdm(cids):
        retrieval_params = {
            'model_name': ensemble_config.MODEL_NAME,
            'cid': cid,
            'filter_var': ensemble_config.FILTER_VAR,
            'filter': ensemble_config.FILTER,
            'deduplicate': None,
        }

        retrieved_items = ir_tools.retrieve(**retrieval_params)
        ir_tools.dump_retrieval(fp=join(rec_dp, cid), retrieved_items=retrieved_items)
Beispiel #4
0
def ir_rank2records():
    ir_rec_dp = join(path_parser.summary_rank,
                     ir_config.IR_RECORDS_DIR_NAME_TF)

    if exists(ir_rec_dp):
        raise ValueError('qa_rec_dp exists: {}'.format(ir_rec_dp))
    os.mkdir(ir_rec_dp)

    for cid in tqdm(cids):
        retrieval_params = {
            'model_name': ir_config.IR_MODEL_NAME_TF,
            'cid': cid,
            'filter_var': ir_config.FILTER_VAR,
            'filter': ir_config.FILTER,
            'deduplicate': ir_config.DEDUPLICATE,
            'prune': True,
        }

        retrieved_items = ir_tools.retrieve(**retrieval_params)
        ir_tools.dump_retrieval(fp=join(ir_rec_dp, cid),
                                retrieved_items=retrieved_items)
Beispiel #5
0
def ir_rank2records():
    ir_rec_dp = join(path_parser.summary_rank,
                     ir_config.IR_RECORDS_DIR_NAME_TF)
    assert not exists(ir_rec_dp), f'ir_rec_dp exists: {ir_rec_dp}'
    os.mkdir(ir_rec_dp)

    # cids = tools.get_test_cc_ids()
    cids = [c_q_dict['cid'] for c_q_dict in test_cid_query_dicts]
    for cid in tqdm(cids):
        retrieval_params = {
            'model_name': ir_config.IR_MODEL_NAME_TF,
            'cid': cid,
            'filter_var': ir_config.FILTER_VAR,
            'filter': ir_config.FILTER,
            'deduplicate': ir_config.DEDUPLICATE,
            # 'prune': True,
            'prune': False,
        }

        retrieved_items = ir_tools.retrieve(**retrieval_params)
        ir_tools.dump_retrieval(fp=join(ir_rec_dp, cid),
                                retrieved_items=retrieved_items)