def xQuAD_clean(topics=EBOLA_TOPICS, suggestor=None, if_use_clean_text=True, boost_params=1, if_stem=True, candidate_doc_cnt=700, tot_itr_times=2, every_itr_doc_cnt=5, use_subquery_cnt=5, lm_lmd=1.0, xquad_lmd=0.6, idf_dict=None, jig=None, irsys=None): # from src.utils.data_utils import basic_preprocess # logging.info("loading... LMD...") lm = LMDirichlet(lmd=lm_lmd) # if if_use_clean_text: # print "load:", LMDirichlet_without_stem_lower # lm.load(LMDirichlet_clean_Json) # else: # print "load:", LMDirichlet_without_stem_lower # # lm.load(LMDirichlet_Json) # lm.load(LMDirichlet_without_stem_lower) logging.info("initing xQuAD...") xquad = xQuAD(lm, lmd=xquad_lmd, alpha=1.0) # logging.info("get all solrs...") # solrs = get_all_ebola_solrs() # print "solr cnt:", len(solrs) # # w = [1] * len(solrs) # # w = [3, 1, 1, 1, 1] #提高1.5% # irsys = IRSys(solrs, ws=w) # # jig = JigClient(tot_itr_times=tot_itr_times) for tid, topic in topics: print "tot_itr_times:", tot_itr_times print "every_itr_doc_cnt:", every_itr_doc_cnt print "use_subquery_cnt:", use_subquery_cnt print "lm_lmd:", lm_lmd print "xquad_lmd:", xquad_lmd print "if_stem:", if_stem print "if_use_clean_text:", if_use_clean_text print "candidate doc cnt:", candidate_doc_cnt # already_select_key_set表示的是 已经选的key set, D表示的是已经选的文章,文章的格式是{}这种而不是IRSys的 already_select_key_set = set() D = [] logging.info("search for topic %s %s" % (tid, topic)) logging.info("preprocess data...") # query_word_list = basic_preprocess(topic, if_lower=True, if_stem=if_stem) query_word_list = basic_preprocess_for_query(topic, if_lower=True, if_stem=if_stem) print "===> !!!! query_word_list:", query_word_list for _ in query_word_list: if not idf_dict.has_key(_): print "!!!!==> idf_dict not has key:", _ docs_list = irsys.retrieve_docs( [topic], with_query_field=True)[0:candidate_doc_cnt] docs_list = preproces_docs_list(docs_list, if_stem=if_stem) logging.info("cal dcs...") dc_dict = cal_dc_dicts(docs_list) check_cnt = 0 print "??????????++++++!!!!!!!!!>>>>>>>>>CHECK DC DICT :" for k in dc_dict.keys(): print "dc k,v:", k, dc_dict[k] check_cnt += 1 if check_cnt >= 1: break subquerys = suggestor.get_subquery_by_topic_id( tid, if_related=False)[0:use_subquery_cnt] # subquerys = clean_subquerys_to_query_lists(subquerys, lm, if_stem=if_stem) subquerys = clean_subquery_list(subquerys, idf_dict, if_stem=if_stem, query_words=query_word_list) # subquerys = clean_subquerys_to_query_lists_and_filter_query(subquerys, lm, if_stem=if_stem, # query_words=query_word_list) print "===> subqueries:", subquerys file_ptr = 0 for i in range(tot_itr_times): print "itr:", i, " tid:", tid this_itr_select_docs = [] if i == 0 or len(subquerys) == 0: if len(subquerys) == 0: print "======@@@@@@@@@@@@> subquery cnt is zero, tid, topic:", tid, topic print docs_list[0] while len(this_itr_select_docs) < 5 and file_ptr < len( docs_list): if docs_list[file_ptr][0] in already_select_key_set: continue this_itr_select_docs.append(docs_list[file_ptr][1][2]) already_select_key_set.add( docs_list[file_ptr][1][2]['key']) #TODO CHECK:D # D.append( docs_list[file_ptr][1][2] ) file_ptr += 1 jig_format_docs = irsys.items2jigdocs( docs_list)[i * every_itr_doc_cnt:i * every_itr_doc_cnt + every_itr_doc_cnt] iresult = jig.run_itr(jig_format_docs, topic_id=tid) if iresult is not None: print "itr result , i:", i if type(iresult) is list: for _ in iresult: print _ else: print iresult # elif i == 1: else: #use xQuAD to select best docs R_left = get_R_left(docs_list, already_select_key_set) this_itr_select_docs = [] for ixquad_selected in range(every_itr_doc_cnt): print "==== [INFO] R_left cnt:", len(R_left) ranked_docs = xquad.select_doc_u_cos( query_word_list, R_left, D, subquerys, dc_dicts=dc_dict, ret_rel_div_score=True) ptr_ = 0 while ranked_docs[ptr_][0][KEY] in already_select_key_set: ptr_ += 1 continue d = ranked_docs[ ptr_] # 这个d的格式是[doc{}, xquad score, rel_score, div_score格式] if d[0][KEY] in already_select_key_set: print "############!!!!!!!!!ERROR >>>>>>>>>>>> SELECT DUP:", d[ KEY] # if i == 0: print "-----CHECK SCORE SELECTED, [ xquad score, rel_score, div_score格式]->>>:", d[ 0][KEY], d[1:] this_itr_select_docs.append(d) D.append(d[0]) D[-1][SCORE] = d[1] already_select_key_set.add(d[0][KEY]) R_left.remove(d[0]) print "len R_left, D, this_itr_select_docs, already_select_keys:", len( R_left), len(D), len(this_itr_select_docs), len( already_select_key_set) # ranked_docs = [] # for ixquad_selected in range(every_itr_doc_cnt): # print "==== [INFO] R_left cnt:", len(R_left) # ranked_docs = xquad.select_doc_u_cos(query_word_list, R_left, D, subquerys, dc_dicts=dc_dict, # ret_rel_div_score=True) # d = ranked_docs[0] #这个d的格式是[doc{}, xquad score, rel_score, div_score格式] # if d[0][KEY] in already_select_key_set: # print "############!!!!!!!!!ERROR >>>>>>>>>>>> SELECT DUP:", d[KEY] # # if i == 0: # print "-----CHECK SCORE SELECTED, [ xquad score, rel_score, div_score格式]->>>:", d[1:] # D.append(d[0]) # D[-1][SCORE] = d[1] # already_select_key_set.add(d[0][KEY]) # R_left.remove(d[0]) # print "len R_left, D, this_itr_select_docs, already_select_keys:", len(R_left), len(D), len( # this_itr_select_docs), len(already_select_key_set) # # # this_itr_select_docs = ranked_docs[0:every_itr_doc_cnt] # this_itr_select_docs = [] # for i,_ in enumerate(ranked_docs): # if _[0][KEY] in already_select_key_set:continue # this_itr_select_docs.append(_) # if len(this_itr_select_docs) >= 5: # if i >= 5: # print "^^^^^^^^^ [ERROR] ThErE must be DUP......!!, i:", i # break jig_format_docs = [] for d in this_itr_select_docs: #TODO:需要检查一下,这里的score,因为第一轮的score和这里太不一样了,需要考虑下怎么处理,需要验证一下score随便设置是不是可以的... jig_format_docs.append((0, d[0][KEY], d[1] * boost_params)) iresult = jig.run_itr(jig_format_docs, topic_id=tid) if iresult is not None: print "itr result , i:", i if type(iresult) is list: for _ in iresult: print _ else: print iresult print "======== CHECK DUP:", len( already_select_key_set), tot_itr_times * 5 if tot_itr_times * 5 != len(already_select_key_set): print "[ERROR] F**K" jig.judge()
def xQuAD_by_IRSys_ebola_without_query_feedback(topics=EBOLA_TOPICS, w=None, suggestor=None, if_use_clean_text=False, boost_params=1e11): logging.info("loading... LMD...") lm = LMDirichlet() if if_use_clean_text: lm.load(LMDirichlet_clean_Json) else: lm.load(LMDirichlet_Json) logging.info("initing xQuAD...") xquad = xQuAD(lm, lmd=0.5, alpha=0.5) logging.info("get all solrs...") solrs = get_all_ebola_solrs() print "solr cnt:", len(solrs) # w = [1] * len(solrs) # w = [3, 1, 1, 1, 1] #提高1.5% irsys = IRSys(solrs, ws=w) tot_itr_times = 5 every_itr_doc_cnt = 5 jig = JigClient(tot_itr_times=tot_itr_times) # already_select_key_set表示的是 已经选的key set, D表示的是已经选的文章,文章的格式是{}这种而不是IRSys的 already_select_key_set = set() D = [] for tid, topic in topics: logging.info("search for topic %s %s" % (tid, topic)) logging.info("preprocess data...") query_word_list = basic_preprocess(topic) print "query_word_list:", query_word_list docs_list = irsys.retrieve_docs([topic], with_query_field=True) docs_list = preproces_docs_list(docs_list)[0:1000] logging.info("cal dcs...") dcs_dict = cal_dc_dicts(docs_list) key_set = set() #强制再搞一次去重 logging.info("======> STRICT REMOVE DUP") print "before remove dup by key:", len(docs_list) new_docs_list = [] for d in docs_list: key = d[0].strip() if key not in key_set: new_docs_list.append(d) print "after remove dup by key:", len(new_docs_list) logging.info("======> REMOVE DUP END") docs_list = new_docs_list file_ptr = 0 for i in range(tot_itr_times): print "itr:", i, " tid:", tid this_itr_select_docs = [] if i == 0: print docs_list[0] while len(this_itr_select_docs) < 5 and file_ptr < len( docs_list): if docs_list[file_ptr][0] in already_select_key_set: continue this_itr_select_docs.append(docs_list[file_ptr][1][2]) already_select_key_set.add( docs_list[file_ptr][1][2]['key']) file_ptr += 1 jig_format_docs = irsys.items2jigdocs( docs_list)[i * every_itr_doc_cnt:i * every_itr_doc_cnt + every_itr_doc_cnt] jig.run_itr(jig_format_docs, topic_id=tid) # elif i == 1: else: #use xQuAD to select best docs docs_left = docs_list[file_ptr:] R_left = get_R_left(docs_left, already_select_key_set) subquerys = suggestor.get_subquery_by_topic_id( tid, if_related=False)[0:5] subquerys = clean_subquerys_to_query_lists(subquerys) print "===> subqueries:", subquerys ranked_docs = xquad.select_doc_u(query_word_list, R_left, D, subquerys) for d in ranked_docs[0:5]: D.append(d[0]) D[-1][SCORE] = d[1] this_itr_select_docs = ranked_docs[0:every_itr_doc_cnt] jig_format_docs = [] for d in this_itr_select_docs: #TODO:需要检查一下,这里的score,因为第一轮的score和这里太不一样了,需要考虑下怎么处理,需要验证一下score随便设置是不是可以的... jig_format_docs.append((0, d[0][KEY], d[1] * boost_params)) jig.run_itr(jig_format_docs, topic_id=tid) jig.judge()
def OLD_xQuAD__without_query_feedback_select_one_by_one_cos_sim_wc( topics=EBOLA_TOPICS, w=None, suggestor=None, if_use_clean_text=False, boost_params=1, if_stem=True, candidate_doc_cnt=700): tot_itr_times = 2 every_itr_doc_cnt = 5 use_subquery_cnt = 5 lm_lmd = 1.0 xquad_lmd = 0.6 logging.info("loading idf dict") idf_dict = json.load(codecs.open(STEM_IDF_DICT_EBOLA, 'r', 'utf-8')) print "tot word BEFORE to str cnt:", len(idf_dict.items()) err_cnt = 0 for k in idf_dict.keys(): v = idf_dict[k] idf_dict.pop(k) try: k = str(k) idf_dict[k] = v except: err_cnt += 1 # print "UNICODE TO STR ERR:", k print "UNICODE TO STR ERR CNT:", err_cnt print "tot word after to str cnt:", len(idf_dict.items()) # from src.utils.data_utils import basic_preprocess logging.info("loading... LMD...") lm = LMDirichlet(lmd=lm_lmd) if if_use_clean_text: print "load:", LMDirichlet_without_stem_lower lm.load(LMDirichlet_clean_Json) else: print "load:", LMDirichlet_without_stem_lower # lm.load(LMDirichlet_Json) lm.load(LMDirichlet_without_stem_lower) logging.info("initing xQuAD...") xquad = xQuAD(lm, lmd=xquad_lmd, alpha=1.0) logging.info("get all solrs...") solrs = get_all_ebola_solrs() print "solr cnt:", len(solrs) # w = [1] * len(solrs) # w = [3, 1, 1, 1, 1] #提高1.5% irsys = IRSys(solrs, ws=w) # jig = JigClient(tot_itr_times=tot_itr_times) jig = JigClient_OLD(tot_itr_times=2, base_jig_dir=EBOLA_POLAR_JIG_DIR) for tid, topic in topics: print "tot_itr_times:", tot_itr_times print "every_itr_doc_cnt:", every_itr_doc_cnt print "use_subquery_cnt:", use_subquery_cnt print "lm_lmd:", lm_lmd print "xquad_lmd:", xquad_lmd print "if_stem:", if_stem print "candidate doc cnt:", candidate_doc_cnt # already_select_key_set表示的是 已经选的key set, D表示的是已经选的文章,文章的格式是{}这种而不是IRSys的 already_select_key_set = set() D = [] logging.info("search for topic %s %s" % (tid, topic)) logging.info("preprocess data...") # query_word_list = basic_preprocess(topic, if_lower=True, if_stem=if_stem) query_word_list = basic_preprocess_for_query(topic, if_lower=True, if_stem=if_stem) print "===> !!!! query_word_list:", query_word_list for _ in query_word_list: if not lm.C.has_key(_): print "!!!!==> LM not has key:", _ docs_list = irsys.retrieve_docs( [topic], with_query_field=True)[0:candidate_doc_cnt] docs_list = preproces_docs_list(docs_list, if_stem=if_stem) logging.info("cal dcs...") dc_dict = cal_dc_dicts(docs_list) check_cnt = 0 print "??????????++++++!!!!!!!!!>>>>>>>>>CHECK DC DICT :" for k in dc_dict.keys(): print "dc k,v:", k, dc_dict[k] check_cnt += 1 if check_cnt >= 1: break subquerys = suggestor.get_subquery_by_topic_id( tid, if_related=False)[0:use_subquery_cnt] # subquerys = clean_subquerys_to_query_lists(subquerys, lm, if_stem=if_stem) subquerys = clean_subquerys_to_query_lists_and_filter_query( subquerys, lm, if_stem=if_stem, query_words=query_word_list) print "===> subqueries:", subquerys file_ptr = 0 for i in range(tot_itr_times): print "itr:", i, " tid:", tid this_itr_select_docs = [] if i == 0 or len(subquerys) == 0: if len(subquerys) == 0: print "======@@@@@@@@@@@@> subquery cnt is zero, tid, topic:", tid, topic print docs_list[0] while len(this_itr_select_docs) < 5 and file_ptr < len( docs_list): if docs_list[file_ptr][0] in already_select_key_set: continue this_itr_select_docs.append(docs_list[file_ptr][1][2]) already_select_key_set.add( docs_list[file_ptr][1][2]['key']) # D.append( docs_list[file_ptr][1][2] ) file_ptr += 1 jig_format_docs = irsys.items2jigdocs( docs_list)[i * every_itr_doc_cnt:i * every_itr_doc_cnt + every_itr_doc_cnt] jig.run_itr(jig_format_docs, topic_id=tid) # elif i == 1: else: #use xQuAD to select best docs R_left = get_R_left(docs_list, already_select_key_set) ranked_docs = [] for ixquad_selected in range(every_itr_doc_cnt): ranked_docs = xquad.select_doc_u_cos( query_word_list, R_left, D, subquerys, dc_dicts=dc_dict, ret_rel_div_score=True) d = ranked_docs[ 0] #这个d的格式是[doc{}, xquad score, rel_score, div_score格式] # if i == 0: if d[0][KEY] in already_select_key_set: print "############!!!!!!!!!ERROR >>>>>>>>>>>> SELECT DUP:", d[ KEY] print "-----CHECK SCORE SELECTED, [ xquad score, rel_score, div_score格式]->>>:", d[ 1:] #TODO:这里需要检查一下要不要加D D.append(d[0]) D[-1][SCORE] = d[1] already_select_key_set.add(d[0][KEY]) R_left.remove(d[0]) # this_itr_select_docs = ranked_docs[0:every_itr_doc_cnt] # this_itr_select_docs = ranked_docs[0:every_itr_doc_cnt] this_itr_select_docs = [] for i, _ in enumerate(ranked_docs): if _[0][KEY] in already_select_key_set: continue this_itr_select_docs.append(_) if len(this_itr_select_docs) >= 5: if i >= 5: print "^^^^^^^^^ [ERROR] ThErE must be DUP......!!, i:", i break jig_format_docs = [] for d in this_itr_select_docs: #TODO:需要检查一下,这里的score,因为第一轮的score和这里太不一样了,需要考虑下怎么处理,需要验证一下score随便设置是不是可以的... jig_format_docs.append((0, d[0][KEY], d[1] * boost_params)) iresult = jig.run_itr(jig_format_docs, topic_id=tid) if iresult is not None: print "itr result , i:", i if type(iresult) == list: for _ in iresult: print _ else: print iresult print "======== CHECK DUP:", len( already_select_key_set), tot_itr_times * 5 jig.judge()
def xQuAD_clean_use_local_data_without_feedback_div_from1( topics=EBOLA_TOPICS, suggestor=None, if_use_clean_text=True, boost_params=1, if_stem=True, candidate_doc_cnt=700, tot_itr_times=2, every_itr_doc_cnt=5, use_subquery_cnt=5, lm_lmd=1.0, xquad_lmd=0.6, idf_dict=None, jig=None, irsys=None, data_dir=EBOLA_CLEAN_FULL_DATA_DIR, data_field='content', key2id_dict={}, use_jig_feedback_cnt_limit=2, ret_words=10, ): lm = LMDirichlet(lmd=lm_lmd) logging.info("initing xQuAD...") xquad = xQuAD(lm, lmd=xquad_lmd, alpha=1.0) subqueries_statics = { } # key: topic_id, v:[ 使用suggested subquery的次数, 使用jig feedback的次数 ] for tid, topic in topics: subqueries_statics[tid] = [0, 0] logging.info("search for topic %s %s" % (tid, topic)) print "tot_itr_times:", tot_itr_times print "every_itr_doc_cnt:", every_itr_doc_cnt print "use_subquery_cnt:", use_subquery_cnt print "lm_lmd:", lm_lmd print "xquad_lmd:", xquad_lmd print "if_stem:", if_stem print "if_use_clean_text:", if_use_clean_text print "candidate doc cnt:", candidate_doc_cnt print "use_jig_feedback_cnt_limit:", use_jig_feedback_cnt_limit # already_select_key_set表示的是 已经选的key set, D表示的是已经选的文章,文章的格式是{}这种而不是IRSys的 already_select_key_set = set() D = [] # already_cover_topic_dict # already_cover_topic_dict,格式 key:subtopic_id, # v: dict形式, key是相关度, v [这个subtopic下已经有的文章个数, [passage_text], 筛选出来的词的list, ] already_cover_topic_dict = {} logging.info("preprocess data...") query_word_list = basic_preprocess_for_query(topic, if_lower=True, if_stem=if_stem) print "===> !!!! query_word_list:", query_word_list for _ in query_word_list: if not idf_dict.has_key(_): print "!!!!==> idf_dict not has key:", _ docs_list = irsys.retrieve_docs( [topic], query_field=data_field, with_query_field=False)[0:candidate_doc_cnt] docs_keys = get_doc_keys_from_doc_list(docs_list) # print docs_list[0] print "CHECK docs_list, docs_keys cnt:", len(docs_list), len(docs_keys) corpus = get_corpus_by_keys(data_dir, key2id_dict, docs_keys, if_stem=if_stem, field=data_field) # print "CHECK corpus[0]:", corpus[0] docs_list = append_docs_to_doc_list(docs_list, docs_keys, corpus, field=data_field, if_filter_null=True) logging.info("cal dcs...") dc_dict = cal_dc_dicts(docs_list) google_subquerys = suggestor.get_subquery_by_topic_id( tid, if_related=False)[0:use_subquery_cnt] google_subquerys = clean_subquery_list(google_subquerys, idf_dict, if_stem=if_stem, query_words=query_word_list) print "===> google_subquerys:", google_subquerys file_ptr = 0 for i in range(tot_itr_times): print "itr:", i, " tid:", tid if i == 0: if_use_jig_feedback = False else: # 先决定用suggestor还是用jig返回的, 然后取top2的相关度, 除非没有3-4 already_get_subtopics = 0 for subtopic_id, info in already_cover_topic_dict.items(): for rating in RATES: if info.has_key(rating): already_get_subtopics += 1 break # 有use_jig_feedback_cnt_limit个以上的子话题已经拿到了 if already_get_subtopics >= use_jig_feedback_cnt_limit: if_use_jig_feedback = True else: if_use_jig_feedback = False if if_use_jig_feedback: print "USE JIG PASSAGE TEXT AS subquery, itr:", i # TODO:这里要试试是不是要做筛选词处理,另外要考虑passage_text是不是已经用过 subqueries = passage_text_to_subqueries( already_cover_topic_dict) subqueries = clean_subquery_list(subqueries, idf_dict, if_stem=if_stem, query_words=query_word_list) subqueries = cut_off_jig_feedback(subqueries, idf_dict, ret_words) subqueries_statics[tid][1] += 1 else: print "USE Suggested query as subquery, itr:", i subqueries = google_subquerys subqueries_statics[tid][0] += 1 print "if_use_jig_feedback:", if_use_jig_feedback print "USE SUBQUERIES:", subqueries this_itr_select_docs = [] if not if_use_jig_feedback and len(subqueries) == 0: if len(subqueries) == 0: print "======@@@@@@@@@@@@> subquery cnt is zero, tid, topic:", tid, topic # print docs_list[0] while len(this_itr_select_docs) < 5 and file_ptr < len( docs_list): if docs_list[file_ptr][1][2][ 'key'] in already_select_key_set: continue this_itr_select_docs.append(docs_list[file_ptr]) already_select_key_set.add( docs_list[file_ptr][1][2]['key']) #TODO CHECK:D # D.append( docs_list[file_ptr][1][2] ) file_ptr += 1 # jig_format_docs = irsys.items2jigdocs(docs_list)[i * every_itr_doc_cnt:i * every_itr_doc_cnt + every_itr_doc_cnt] # print "already_select_key_set:", already_select_key_set jig_format_docs = irsys.items2jigdocs(this_itr_select_docs) iresult = jig.run_itr(jig_format_docs, topic_id=tid) print "i itr:", i if iresult is not None: for _ in iresult: print _ update_state(already_cover_topic_dict, iresult) continue else: #use xQuAD to select best docs R_left = get_R_left(docs_list, already_select_key_set) this_itr_select_docs = [] for ixquad_selected in range(every_itr_doc_cnt): print "==== [INFO] R_left cnt:", len(R_left) ranked_docs = xquad.select_doc_u_cos( query_word_list, R_left, D, subqueries, dc_dicts=dc_dict, ret_rel_div_score=True) ptr_ = 0 while ranked_docs[ptr_][0][KEY] in already_select_key_set: ptr_ += 1 continue d = ranked_docs[ ptr_] #这个d的格式是[doc{}, xquad score, rel_score, div_score格式] if d[0][KEY] in already_select_key_set: print "############!!!!!!!!!ERROR >>>>>>>>>>>> SELECT DUP:", d[ KEY] # if i == 0: print "-----CHECK SCORE SELECTED, [ xquad score, rel_score, div_score格式]->>>:", d[ 1:], d[0] this_itr_select_docs.append(d) D.append(d[0]) D[-1][SCORE] = d[1] already_select_key_set.add(d[0][KEY]) R_left.remove(d[0]) print "len R_left, D, this_itr_select_docs, already_select_keys:", len( R_left), len(D), len(this_itr_select_docs), len( already_select_key_set) # this_itr_select_docs = ranked_docs[0:every_itr_doc_cnt] # for idx_,_ in enumerate(ranked_docs): # if _[0][KEY] in already_select_key_set:continue # this_itr_select_docs.append(_) # if len(this_itr_select_docs) >= 5:break jig_format_docs = [] for d in this_itr_select_docs: #TODO:需要检查一下,这里的score,因为第一轮的score和这里太不一样了,需要考虑下怎么处理,需要验证一下score随便设置是不是可以的... jig_format_docs.append((0, d[0][KEY], d[1] * boost_params)) iresult = jig.run_itr(jig_format_docs, topic_id=tid) if iresult is not None: print "itr result , i:", if type(iresult) is list: for _ in iresult: print _ update_state(already_cover_topic_dict, iresult) else: print iresult print "======== CHECK DUP:", len( already_select_key_set), tot_itr_times * 5 if tot_itr_times * 5 != len(already_select_key_set): print "[ERROR] F**K, tid, itr:", tid, i exit(-1) jig.judge() for tid, v in subqueries_statics.items(): print "tid:", tid, "suggested, jig feedback:", v