def Main(): eval_dir = '' # TODO config_dict = load_configs(eval_dir + '/param_config') # TODO: Define parameters np_list = load_q_schema_data(data_size, max_len, n_emb, PN) # TODO: parameters Tvt_split_list = [0, 3022, 3778, 5810] # TODO: Webq for idx, Tvt in enumerate(['T', 'v', 't']): indices = range(Tvt_split_list[idx], Tvt_split_list[idx+1])
data['entity'][idx * PN * rows * cols: (idx + real_batch_size) * PN * rows * cols] batch_data['coherence'] = \ data['coherence'][idx * PN: (idx + real_batch_size) * PN] batch_data['context'] = \ data['context'][idx * PN * rows * cols: (idx + real_batch_size) * PN * rows * cols] batch_data['corrupt'] = \ data['corrupt'][idx: idx + real_batch_size] return batch_data if __name__ == '__main__': setting_dir = sys.argv[1] try_dir = sys.argv[2] root_path = 'runnings/tabel/e2e/%s/%s' % (setting_dir, try_dir) config_fp = '%s/param_config' % root_path config_dict = load_configs(config_fp) # data path joint_data_fp = config_dict['joint_data_fp'] # model selection model_num = int(config_dict['model']) trans = config_dict['translation_setting'] nonj = config_dict['non_joint_setting'] coherence_norm = 0 if 'coherence_norm' in config_dict: coherence_norm = int(config_dict['coherence_norm']) # table settings candidate_num = 50 if 'candidate_num' in config_dict: candidate_num = int(config_dict['candidate_num']) epochs = int(config_dict['epochs'])
def main(): LogInfo.begin_track('[cand_gen] starts ... ') import sys from kangqi.util.config import load_configs root_path = '/home/kangqi/workspace/PythonProject' try_dir = sys.argv[1] config_fp = '%s/runnings/candgen/%s/param_config' % (root_path, try_dir) config_dict = load_configs(config_fp) parser_ip = config_dict['parser_ip'] parser_port = int(config_dict['parser_port']) use_sparql_cache = True if config_dict[ 'use_sparql_cache'] == 'True' else False cache_dir = config_dict['cache_dir'] check_only = True if config_dict['check_only'] == 'True' else False use_ext_sk = True if config_dict['use_ext_sk'] == 'True' else False s_mart = True if config_dict['S-MART'] == 'True' else False min_ratio = float(config_dict['min_ratio']) min_surface_score = float(config_dict['min_surface_score']) min_pop = int(config_dict['min_pop']) # from ..data_prepare.u import load_complex_questions # train_qa_list, test_qa_list = load_complex_questions() # compq_list = train_qa_list + test_qa_list # LogInfo.logs('%d ComplexQuestions loaded.', len(compq_list)) # surface_list = [qa.q for qa in compq_list] import json webq_fp = '/home/kangqi/Webquestions/Json/webquestions.examples.json' with open(webq_fp, 'r') as br: webq_data = json.load(br) surface_list = [webq['utterance'] for webq in webq_data] LogInfo.logs('%d WebQuesetions loaded.', len(webq_data)) # cand_gen = CandidateGenerator(use_sparql_cache=False, check_only=True) # q_list = [qa.q for qa in train_qa_list] # check_el_quality(cand_gen, q_list) cand_gen = CandidateGenerator(use_sparql_cache=use_sparql_cache, cache_dir=cache_dir, check_only=check_only, parser_ip=parser_ip, parser_port=parser_port, k_hop=1, max_hops=2) for q_idx, q in enumerate(surface_list): LogInfo.begin_track('Checking Q %d / %d: ', q_idx + 1, len(surface_list)) LogInfo.logs('Surface: %s', q) schema_list = cand_gen.run_candgen(q_idx=q_idx, q=q, min_surface_score=min_surface_score, min_pop=min_pop, use_ext_sk=use_ext_sk, min_ratio=min_ratio, s_mart=s_mart, vb=1) LogInfo.logs('Finally: generated %d schemas.', len(schema_list)) sc_sz = len(schema_list) for idx in range(sc_sz): LogInfo.begin_track('Showing schema %d / %d: ', idx + 1, sc_sz) sc = schema_list[idx] sc.display() # sc.display_sparql() LogInfo.end_track() LogInfo.end_track() # cand_gen = CandidateGenerator(use_sparql=False) # q = 'what language do most people speak in afghanistan?' # cand_gen.linking(q, vb=0) LogInfo.end_track()