def rank2records_in_batch(): interval = 10 start = 20 end = 150 + interval filter_var_range = range(start, end, interval) for filter_var in tqdm(filter_var_range): qa_rec_dn = ensemble_config.QA_RECORD_DIR_NAME_PATTERN.format(ensemble_config.MODEL_NAME, filter_var, ensemble_config.FILTER) qa_rec_dp = join(path_parser.summary_rank, qa_rec_dn) if exists(qa_rec_dp): raise ValueError('qa_rec_dp exists: {}'.format(qa_rec_dp)) os.mkdir(qa_rec_dp) for cid in cids: retrieval_params = { 'model_name': ensemble_config.MODEL_NAME, 'cid': cid, 'filter_var': filter_var, 'filter': ensemble_config.FILTER, 'deduplicate': None, } retrieved_items = ir_tools.retrieve(**retrieval_params) ir_tools.dump_retrieval(fp=join(qa_rec_dp, cid), retrieved_items=retrieved_items)
def qa_rank2records_in_batch(): if qa_config.FILTER == 'conf': filter_var_range = np.arange(0.05, 1.05, 0.05) else: # topK interval = 10 if qa_config.ir_config.FILTER == 'topK': start = interval end = qa_config.ir_config.FILTER_VAR + interval else: start = 40 end = 150 + interval filter_var_range = range(start, end, interval) for filter_var in tqdm(filter_var_range): qa_rec_dn = qa_config.QA_RECORD_DIR_NAME_PATTERN.format( qa_config.QA_MODEL_NAME_BERT, filter_var, qa_config.FILTER) qa_rec_dp = join(path_parser.summary_rank, qa_rec_dn) if exists(qa_rec_dp): raise ValueError('qa_rec_dp exists: {}'.format(qa_rec_dp)) os.mkdir(qa_rec_dp) for cid in cids: retrieval_params = { 'model_name': qa_config.QA_MODEL_NAME_BERT, 'cid': cid, 'filter_var': filter_var, 'filter': qa_config.FILTER, 'deduplicate': None, } retrieved_items = ir_tools.retrieve(**retrieval_params) ir_tools.dump_retrieval(fp=join(qa_rec_dp, cid), retrieved_items=retrieved_items)
def rank2records(): rec_dp = join(path_parser.summary_rank, ensemble_config.QA_RECORD_DIR_NAME) if exists(rec_dp): raise ValueError('rec_dp exists: {}'.format(rec_dp)) os.mkdir(rec_dp) for cid in tqdm(cids): retrieval_params = { 'model_name': ensemble_config.MODEL_NAME, 'cid': cid, 'filter_var': ensemble_config.FILTER_VAR, 'filter': ensemble_config.FILTER, 'deduplicate': None, } retrieved_items = ir_tools.retrieve(**retrieval_params) ir_tools.dump_retrieval(fp=join(rec_dp, cid), retrieved_items=retrieved_items)
def ir_rank2records(): ir_rec_dp = join(path_parser.summary_rank, ir_config.IR_RECORDS_DIR_NAME_TF) if exists(ir_rec_dp): raise ValueError('qa_rec_dp exists: {}'.format(ir_rec_dp)) os.mkdir(ir_rec_dp) for cid in tqdm(cids): retrieval_params = { 'model_name': ir_config.IR_MODEL_NAME_TF, 'cid': cid, 'filter_var': ir_config.FILTER_VAR, 'filter': ir_config.FILTER, 'deduplicate': ir_config.DEDUPLICATE, 'prune': True, } retrieved_items = ir_tools.retrieve(**retrieval_params) ir_tools.dump_retrieval(fp=join(ir_rec_dp, cid), retrieved_items=retrieved_items)
def ir_rank2records(): ir_rec_dp = join(path_parser.summary_rank, ir_config.IR_RECORDS_DIR_NAME_TF) assert not exists(ir_rec_dp), f'ir_rec_dp exists: {ir_rec_dp}' os.mkdir(ir_rec_dp) # cids = tools.get_test_cc_ids() cids = [c_q_dict['cid'] for c_q_dict in test_cid_query_dicts] for cid in tqdm(cids): retrieval_params = { 'model_name': ir_config.IR_MODEL_NAME_TF, 'cid': cid, 'filter_var': ir_config.FILTER_VAR, 'filter': ir_config.FILTER, 'deduplicate': ir_config.DEDUPLICATE, # 'prune': True, 'prune': False, } retrieved_items = ir_tools.retrieve(**retrieval_params) ir_tools.dump_retrieval(fp=join(ir_rec_dp, cid), retrieved_items=retrieved_items)