Exemple #1
0
def nyt_seg_LMD(topic_id, query):
    tot_itr_times = 1
    solr = SolrClient(SOLR_SEG_nyt_LMD_URL)
    jig = JigClient(topic_id=topic_id,
                    tot_itr_times=tot_itr_times,
                    base_jig_dir=NYT_JIG_DIR)
    print "jig dir:", jig.base_dir
    query_range = [
        # '',
        'content_full_text',
    ]

    docs = retrieval_top_k_doc_full(query,
                                    solr,
                                    k=1000,
                                    query_range=query_range)
    # interact_with_jig(jig, docs, tot_itr_times)

    st_ptr = 0

    for i in range(tot_itr_times):
        rslt = jig.run_itr(docs[st_ptr:st_ptr + 5], topic_id=topic_id)
        st_ptr += 5
        print "itr:", i  # , " rslt:", rslt
        if rslt is not None:
            for _ in rslt:
                print _
        else:
            print "None"
Exemple #2
0
def rejudege(all_subs, iter_counts, max_iter_count):
    for iter_count in iter_counts:
        jig = JigClient(tot_itr_times=iter_count,
                        topic_id=None,
                        base_jig_dir=EBOLA_NYT_JIG_DIR)
        should_judge = False
        if iter_count > max_iter_count:
            logging.error("iter_count: %s is larger than max_iter_count: %s",
                          max_iter_count)
        for topic_id in all_subs:
            sub_list = all_subs[topic_id]
            # print topic_id, len(sub_list)
            if len(sub_list) <= iter_count * 5:
                continue
            for i in range(iter_count):
                should_judge = True
                jig_result = jig.run_itr(sub_list[:5], topic_id=topic_id)

                print_jig_result(jig_result)
                sub_list = sub_list[5:]
        try:
            if should_judge:
                jig.judge()
        except:
            print "shit judge error"
Exemple #3
0
def nyt_irsys_blending():
    tot_itr_times = 1
    topic_id = 'dd17-1'
    ws = [3, 1, 1, 1, 1]
    jig = JigClient(topic_id=topic_id,
                    tot_itr_times=tot_itr_times,
                    base_jig_dir=NYT_JIG_DIR)
    solrs = get_all_nyt_solrs()
    irsys = IRSys(solrs, ws=ws)

    for topic_id, topic_name in NYT_TOPICS:
        print "topic_id, topic name:", topic_id, topic_name
        query = [topic_name]
        query_range = [
            # '',
            # 'content_full_text',
            'content',
        ]
        for i in range(tot_itr_times):
            docs = irsys.retrieve_docs(query,
                                       query_field='content',
                                       with_query_field=False)
            print "docs 0~3:", docs[0:3]
            jig_format_docs = irsys.items2jigdocs(docs)
            iresult = jig.run_itr(jig_format_docs[i * 5:i * 5 + 5],
                                  topic_id=topic_id)
            print "iresult, i:", i
            if iresult is not None:
                for _ in iresult:
                    print _
            else:
                logging.error("[ERROR] iresult None ")
        jig.judge()
Exemple #4
0
def test_6():
    logging.info("read idf dic... ")
    idf_dic = json.load(codecs.open(idf_dic_path, 'r'))
    logging.info("load idf dic end...")

    solr = SolrClient(BASE_SOLR_URL)
    tot_itr_times = 1
    topic_id = "DD16-1"
    jig = JigClient(topic_id, tot_itr_times=tot_itr_times)
    # topic_query = ["US Military Crisis Response"]
    #

    for topic in EBOLA_TOPICS:
        topic_query = [preprocess_query(topic[1])]
        topic_id = topic[0]

        print "topic query:", topic_query
        print "topic id:", topic_id

        # interact_with_jig_to_change_vec(jig, solr, query, idf_dic, itr_cnt=5)
        #interact_with_jig_to_change_vec(jig, solr, topic_query, idf_dic, itr_cnt=tot_itr_times)

        interact_with_jig_to_change_vec_use_jig_ret(
            jig,
            solr,
            topic_query,
            idf_dic,
            itr_cnt=tot_itr_times,
            topic_id=topic_id,
            query_range=[
                "content_p", "content_h1", "content_h2", "content_h3",
                "content_h4", "content_h5"
            ])

        jig.judge()
def use_full():
    topic_query = ["US Military Crisis Response"]
    # topic_query = ["Crisis Response"]

    # topic_query = [ "US Military Crisis Response ebola outbreak epidemic"  ]
    # topic_query = ["US Military Crisis Response ebola"] #fight combat commits against seeks" ]
    # topic_query = ["US Military Crisis Response outbreak"]

    # topic_query = ["Who are key leaders (field grade officers and senior NCO’s) in charge of U.S. military units combating the ebola epidemic in Africa, what are the protocols for personnel safety, and what is their mission"]

    # topic_query = ["US Military Crisis Response fight combat"]

    topic_query = [','.join(topic_query[0].split())]
    topic_id = "DD16-1"

    # topic_id, topic_query = ('DD16-26', ' African Culture') #('DD16-24', 'Olu-Ibukun Koye Spread EVD to Port Harcourt') #('DD16-3', 'healthcare impacts of ebola')
    # topic_query = [','.join(topic_query[0].split())]

    tot_itr_times = 1

    print "topic query:", topic_query

    solr = SolrClient(FULL_SOLR_URL)
    jig = JigClient(topic_id, tot_itr_times=tot_itr_times)

    docs = retrieval_top_k_doc_full(topic_query,
                                    solr,
                                    1000,
                                    query_range=['title', 'content'])

    interact_with_jig(jig, docs, tot_itr_times)

    jig.judge()
Exemple #6
0
def test_12():
    logging.info("read idf dic... ")
    idf_dic = json.load(codecs.open(idf_dic_path, 'r'))
    logging.info("load idf dic end...")

    topic_query = ["US Military Crisis Response"]

    topic_id = "DD16-1"

    print "topic query:", topic_query

    solr = SolrClient(BASE_SOLR_URL)
    tot_itr_times = 5
    jig = JigClient(topic_id, tot_itr_times=tot_itr_times)

    for tid, topic_query in EBOLA_TOPICS:

        #interact_with_jig_to_change_vec(jig, solr, query, idf_dic, itr_cnt=5)
        interact_with_jig_to_change_vec(jig,
                                        solr, [topic_query],
                                        idf_dic,
                                        topic_id=tid,
                                        itr_cnt=tot_itr_times)

        jig.judge()
def use_psg():
    topic_query = ["US Military Crisis Response"]
    # topic_query = ["US Military Crisis Response fight combat commits against seeks"]
    # topic_query = [ "US Military Crisis Response ebola outbreak epidemic"  ]
    # topic_query = ["US Military Crisis Response ebola"] #fight combat commits against seeks" ]
    # topic_query = ["US Military Crisis Response outbreak"]
    # topic_query = [','.join(topic_query[0].split() )]
    # topic_query = [ ','.join(topic_query[0].split()  + [ "US Military", "Military Crisis", "Crisis Response", ] ) ]

    # topic_query = ["Who are key leaders (field grade officers and senior NCO’s) in charge of U.S. military units combating the ebola epidemic in Africa, what are the protocols for personnel safety, and what is their mission"]

    topic_id = "DD16-1"

    tot_itr_times = 1
    print "topic query:", topic_query

    query_range = [
        "content_title", "content_p", "content_h1", "content_h2", "content_h3",
        "content_h4", "content_h5"
    ]
    query_range = ["content_p", "content_h3", "content_h4", "content_h5"]

    solr = SolrClient(BASE_SOLR_URL)
    jig = JigClient(topic_id, tot_itr_times=tot_itr_times)

    docs = retrieval_top_k_doc(topic_query,
                               solr,
                               1000,
                               query_range=query_range)
    interact_with_jig(jig, docs, tot_itr_times)

    jig.judge()
Exemple #8
0
def test_full_irsys(w=None, topics=EBOLA_TOPICS):

    logging.info("get all solrs...")
    solrs = get_all_ebola_solrs()
    print "solr cnt:", len(solrs)
    # w = [1] * len(solrs)
    # solrs += [SolrClient(solr_url=SOLR_EBOLA_LMD2500)]
    w = [
        3,
        1,
        1,
        1,
        1,
    ]  #提高1.5%
    irsys = IRSys(solrs, ws=w)

    tot_itr_times = 1
    every_itr_doc_cnt = 5

    jig = JigClient(tot_itr_times=tot_itr_times)

    for tid, topic in topics:

        logging.info("search for topic %s %s" % (tid, topic))
        #
        docs_list = irsys.retrieve_docs([topic])
        # docs_list = irsys.retrieve_docs(topic.split())

        print " =====>>>  CHECK:", docs_list[0]

        key_set = set()
        # 强制再搞一次去重
        logging.info("======> STRICT REMOVE DUP")
        print "before remove dup by key:", len(docs_list)
        new_docs_list = []
        for d in docs_list:
            key = d[0].strip()
            if key not in key_set:
                new_docs_list.append(d)
        print "after remove dup by key:", len(new_docs_list)

        docs_list = new_docs_list

        logging.info("======> REMOVE DUP END")
        for i in range(tot_itr_times):
            jig_format_docs = irsys.items2jigdocs(
                docs_list)[i * every_itr_doc_cnt:i * every_itr_doc_cnt +
                           every_itr_doc_cnt]

            print "itr:", i, " tid:", tid
            irslt = jig.run_itr(jig_format_docs, topic_id=tid)
            print "itr i:", i, " rslt:"
            if irslt is not None:
                for _ in irslt:
                    print _
            else:
                print None

        jig.judge()
Exemple #9
0
def base_nyt_seg_data():
    # NYT_TOPICS = [
    #     ('dd17-2', 'Who Outed Valerie Plame?'),
    # ]
    topic_id = 'dd17-1'
    # topic_name = "Return of Klimt paintings to Maria Altmann"#.split(' ')
    tot_itr_times = 1
    solr_url = SOLR_SEG_nyt_LMD768_URL
    solr = SolrClient(solr_url)

    jig = JigClient(topic_id=topic_id,
                    tot_itr_times=tot_itr_times,
                    base_jig_dir=NYT_JIG_DIR)
    print "jig dir:", jig.base_dir

    for topic_id, topic_name in NYT_TOPICS:
        # print topic_name
        print "topic id, topic name:", topic_id, topic_name

        query = [topic_name]
        query_range = [
            # '',
            'content_full_text',
        ]

        docs = retrieval_top_k_doc_full(query,
                                        solr,
                                        k=1000,
                                        query_range=query_range)
        # interact_with_jig(jig, docs, tot_itr_times)
        print "BEFORE REMOVE DUP:", len(docs)
        docs = remove_dup(docs)
        print "AFTER REMOVE DUP:", len(docs)

        st_ptr = 0

        for i in range(tot_itr_times):
            rslt = jig.run_itr(docs[st_ptr:st_ptr + 5], topic_id=topic_id)
            st_ptr += 5
            print "itr:", i  # , " rslt:", rslt
            if rslt is not None:
                for _ in rslt:
                    print _
            else:
                print "None"

        jig.judge()
Exemple #10
0
def test_10():

    logging.info("read idf dic... ")
    idf_dic = json.load(codecs.open(idf_dic_path, 'r'))
    logging.info("load idf dic end...")

    solr = SolrClient(BASE_SOLR_URL)
    tot_itr_times = 5
    topic_id = "DD16-1"
    jig = JigClient(topic_id, tot_itr_times=tot_itr_times)
    # topic_query = ["US Military Crisis Response"]
    #
    # vecutils = VecUtils()

    pseudo_cnt = 5
    pseudo_word_cnt = 0

    for topic in EBOLA_TOPICS:
        topic_query = [preprocess_query(topic[1])]
        init_query_words = topic_query[0].split()
        topic_id = topic[0]

        logging.info("qe end...")

        # interact_with_jig_to_change_vec(jig, solr, query, idf_dic, itr_cnt=5)
        interact_with_jig_to_change_vec(jig,
                                        solr,
                                        topic_query,
                                        idf_dic,
                                        itr_cnt=tot_itr_times,
                                        topic_id=topic_id,
                                        query_range=[
                                            "content_p", "content_h1",
                                            "content_h2", "content_h3",
                                            "content_h4", "content_h5"
                                        ])

        # interact_with_jig_to_change_vec_use_jig_ret_use_pseudo_query(jig, solr, topic_query, idf_dic, itr_cnt=tot_itr_times,
        #                                             topic_id=topic_id,
        #                                             query_range=Query_Range, use_pseudo=True, pseudo_query =pseudoquery)

        jig.judge()
def use_psg_all_topic():
    tot_itr_times = 1
    jig = JigClient(topic_id="DD16-1", tot_itr_times=tot_itr_times)
    solr = SolrClient(BASE_SOLR_URL)
    for topic_id, topic_query in EBOLA_TOPICS:

        print "topic query:", topic_query

        query_range = [
            "content_title", "content_p", "content_h1", "content_h2",
            "content_h3", "content_h4", "content_h5"
        ]
        # query_range = ["content_h5", ]
        query_range = ["content_p", "content_h2"]

        print "!!!!!+++++++++++> query_range:", query_range

        docs = retrieval_top_k_doc([topic_query],
                                   solr,
                                   1000,
                                   query_range=query_range)
        interact_with_jig_by_topic(jig, docs, tot_itr_times, tid=topic_id)

        jig.judge()
Exemple #12
0
def test_13():
    logging.info("read idf dic... ")
    idf_dic = json.load(codecs.open(idf_dic_path, 'r'))
    logging.info("load idf dic end...")

    topic_query = ["US Military Crisis Response"]

    topic_id = "DD16-1"

    print "topic query:", topic_query

    solr = SolrClient(BASE_SOLR_URL)
    tot_itr_times = 5
    jig = JigClient(topic_id, tot_itr_times=tot_itr_times)

    for tid, topic_query in EBOLA_TOPICS:
        pass
Exemple #13
0
def rejudege(process_count,
             iter_counts,
             max_iter_count,
             out_dir,
             dtype="ebola"):
    import time
    frames = []
    for i in range(process_count):
        ps = "{}-{}".format(process_count, i)
        try:
            frames.extend(
                json.load(codecs.open(out_dir.format(ps), "r", "utf-8")))
        except:
            pass
    print "queue len:", len(frames)
    json.dump(frames,
              codecs.open(out_dir.format(int(time.time())), "w", "utf-8"))

    rank = {frame["topic_id"]: frame["sub_list"] for frame in frames}
    json.dump(rank,
              codecs.open("frames/{}_rank.json".format(dtype), "w", "utf-8"))

    for iter_count in iter_counts:
        jig = JigClient(tot_itr_times=iter_count,
                        topic_id=None,
                        base_jig_dir=EBOLA_NYT_JIG_DIR)
        if iter_count > max_iter_count:
            logging.error("iter_count: %s is larger than max_iter_count: %s",
                          max_iter_count)
        for frame in frames:
            topic_id = frame["topic_id"]
            sub_list = frame["sub_list"]
            ddocs = frame["ddocs"]
            for i in range(iter_count):
                jig.run_itr([ddocs[str(did)] for did in sub_list[:5]],
                            topic_id=topic_id)
                sub_list = sub_list[5:]
        jig.judge()
Exemple #14
0
    st_ptr = 0

    for i in range(interact_times):
        rslt = jig.run_itr(docs[st_ptr:st_ptr+5])
        st_ptr += 5
        print "itr:", i #, " rslt:", rslt
        for _ in rslt:
            print _

if __name__ == '__main__':

    topic_query = ["US Military Crisis Response"]
    # topic_query = [ "US Military Crisis Response ebola outbreak epidemic fight seeks spread"  ]
    # topic_query = ["US Military Crisis Response ebola"] #fight combat commits against seeks" ]
    topic_query = ["US Military Crisis Response outbreak"]

    # topic_query = ["Who are key leaders (field grade officers and senior NCO’s) in charge of U.S. military units combating the ebola epidemic in Africa, what are the protocols for personnel safety, and what is their mission"]

    topic_id = "DD16-1"

    print "topic query:", topic_query

    solr = SolrClient(BASE_SOLR_URL)
    jig = JigClient(topic_id)

    docs = retrieval_top_k_doc(topic_query, solr, 1000)
    interact_with_jig(jig, docs, 5)

    jig.judge()

__END__ = True
Exemple #15
0
def _thread_main(process_id,
                 process_count,
                 in_dir,
                 out_dir,
                 dtype,
                 se_name,
                 iter_count,
                 return_count,
                 likehood,
                 test=False):
    if "nytimes" in dtype:
        solr = SolrClient(SOLR_SEG_nyt_LMD_URL)
    else:
        solr = SolrClient(FULL_SOLR_URL)
    key2id = du.load_ebola_map("key2id")
    se_results = du.load_se_results(se_name=se_name, dtype=dtype)
    # id2key = du.load_ebola_map("id2key")
    # jig = JigClient(tot_itr_times=iter_count, topic_id=None)
    jig = JigClient(tot_itr_times=iter_count,
                    topic_id=None,
                    base_jig_dir=EBOLA_NYT_JIG_DIR)
    if "ebola" in dtype:
        topics = eb_topics
    if "nytimes" in dtype:
        topics = ny_topics
    if test:
        topics = [eb_topics[2], eb_topics[3], eb_topics[5], eb_topics[24]]
        logging.root.setLevel(logging.WARNING)
    frame_list = []
    topics = topics[process_id::process_count]
    print topics
    for tid, topic in topics:
        logging.info("[#] id: %s, topic: %s\n", tid, topic)
        if "ebola" in dtype:
            solr_docs = retrieval_top_k_doc_full([topic],
                                                 solr,
                                                 600,
                                                 query_range=['content'],
                                                 key2id=key2id)
        else:
            solr_docs = solr.query_fields(keywords=[topic],
                                          fl="title,key,date",
                                          rows=return_count)
            solr_docs = [(d["key"], d["key"], d["score"]) for d in solr_docs]
        ddocs = {d[0]: d for d in solr_docs}
        doc_ids = ddocs.keys()
        if "ebola" in dtype:
            jdocs = {
                did: exu.extract_ebola(in_file=in_dir.format(did))
                for did in doc_ids[:return_count]
            }
        else:
            jdocs = {}
            for did in doc_ids[:return_count]:
                try:
                    jdocs[did] = json.load(
                        codecs.open(in_dir.format(did), "r", "utf-8"))
                except:
                    pass
        sim_docs = get_se_sim(jdocs, se_results[tid], likehood, dtype=dtype)
        sim_ratings = [(did, (sim_docs[did]), ddocs[did][2])
                       for did in sim_docs]
        limit_score = likehood  # (likehood + 0.1) * solr_docs[return_count][2]
        sim_ratings = sorted(sim_ratings, key=itemgetter(1, 2), reverse=True)
        logging.info("[#] sim_ratings: %s", sim_ratings)
        frame = {
            "solr_docs": solr_docs,
            "return_count": return_count,
            "sim_docs": sim_docs,
            "ddocs": ddocs,  # "jdocs": jdocs
            "sim_ratings": sim_ratings,
            "iter_count": iter_count,
            "topic_id": tid,
            "topic": topic
        }
        sim_ids = [item[0] for item in sim_ratings if item[1] > limit_score]
        sub_list, sub_set = [], set()
        solr_p = 0
        for i in range(iter_count):
            sub_ids = set(doc_ids[:solr_p])
            doc_ids = doc_ids[solr_p:]
            while len(sub_ids) < 5:
                if len(sim_ids) > 0:
                    sub_ids.add(sim_ids[0])
                    sim_ids = sim_ids[1:]
                elif len(doc_ids) > 0:
                    sub_ids.add(doc_ids[0])
                    doc_ids = doc_ids[1:]
                else:
                    break
                sub_ids = set([did for did in sub_ids if did not in sub_set])
            sub_set.update(sub_ids)
            sub_list.extend(list(sub_ids))
            # result = jig.run_itr(
            #     [ddocs[did] for did in sub_ids], topic_id=tid)
            # print_jig_result(result)
        frame["sub_list"] = sub_list
        frame_list.append(frame)
        # jig.judge()
    ps = "{}-{}".format(process_count, process_id)
    json.dump(frame_list, codecs.open(out_dir.format(ps), "w", "utf-8"))
    if test:
        jig.judge()
        rejudege(process_count,
                 iter_counts=[1, 2, 3, 5],
                 max_iter_count=iter_count,
                 out_dir=out_dir)
Exemple #16
0
def test_lm_weight_field(w=None, topics=EBOLA_TOPICS):

    logging.info("get all solrs...")
    solrs = get_all_ebola_solrs()
    solrs = [SolrClient(solr_url=SOLR_EBOLA_CLEAN_FULL_WITH_A)]
    print "solr cnt:", len(solrs)
    w = [1] * len(solrs)
    # w = [3, 1, 1, 1, 1] #提高1.5%
    # irsys = IRSys(solrs, ws=w)
    solr = SolrClient(solr_url=SOLR_EBOLA_CLEAN_FULL_WITH_A)

    tot_itr_times = 1
    every_itr_doc_cnt = 5

    jig = JigClient(tot_itr_times=tot_itr_times)

    for tid, topic in topics:

        logging.info("search for topic %s %s" % (tid, topic))
        #
        docs_list = solr.query_fields_by_weight(
            keywords=[topic],
            query_fields=['title', 'content', 'a'],
            ws=[0.3, 0.7, 0.1],
            fl='key')
        # docs_list = irsys.retrieve_docs(topic.split())

        print " =====>>>  CHECK:", docs_list[0]

        key_set = set()
        # 强制再搞一次去重
        logging.info("======> STRICT REMOVE DUP")
        print "before remove dup by key:", len(docs_list)
        new_docs_list = []
        for d in docs_list:
            key = d['key'].strip()
            if key not in key_set:
                new_docs_list.append(d)
        print "after remove dup by key:", len(new_docs_list)

        docs_list = new_docs_list

        logging.info("======> REMOVE DUP END")
        for i in range(tot_itr_times):
            st = i * every_itr_doc_cnt
            en = i * every_itr_doc_cnt + every_itr_doc_cnt
            jig_format_docs = []
            for j_ in range(st, en):
                jig_format_docs.append(
                    (0, docs_list[j_]['key'], docs_list[j_]['score']))
            # jig_format_docs = irsys.items2jigdocs(docs_list)[i*every_itr_doc_cnt:i*every_itr_doc_cnt + every_itr_doc_cnt]

            print "itr:", i, " tid:", tid
            irslt = jig.run_itr(jig_format_docs, topic_id=tid)
            print "itr i:", i, " rslt:"
            if irslt is not None:
                for _ in irslt:
                    print _
            else:
                print None

        jig.judge()
Exemple #17
0
        rslt = jig.run_itr(docs[st_ptr:st_ptr+5])
        st_ptr += 5
        print ("itr:", i) #, " rslt:", rslt
        for _ in rslt:
            print (_)
            
if __name__ == '__main__':

    topic_query = ["US,Military,Crisis,Response"]
    topic_query = [','.join(topic_query[0].split() )]
    topic_id = "DD16-1"

    tot_itr_times = 1

    #solr = SolrClient("http://172.22.0.11:8983/solr/ebola_extract/select?")
    solr = SolrClient("http://10.61.2.168:8989/solr/ebola_paragraph/select?")
    jig = JigClient(topic_id, tot_itr_times=tot_itr_times)

    docs = retrieval_top_k_doc(topic_query, solr, 1000)
    interact_with_jig(jig, docs, tot_itr_times)

    jig.judge()
    dict = jig.get_result_dict()
    
    #fl = codecs.open("dict.txt", "w", "utf-8")
    #fl.write(json.dumps(dict))

    print(dict)

__END__ = True
Exemple #18
0
def test_8_tf_idf():

    logging.info("read idf dic... ")
    idf_dic = json.load(codecs.open(idf_dic_path, 'r'))
    logging.info("load idf dic end...")

    solr = SolrClient(BASE_SOLR_URL)
    tot_itr_times = 1
    topic_id = "DD16-1"
    jig = JigClient(topic_id, tot_itr_times=tot_itr_times)
    # topic_query = ["US Military Crisis Response"]
    #
    # vecutils = VecUtils()

    pseudo_cnt = 5
    pseudo_word_cnt = 0

    for topic in EBOLA_TOPICS:
        topic_query = [preprocess_query(topic[1])]
        init_query_words = topic_query[0].split()
        topic_id = topic[0]

        print "pseudo_cnt, pseudo_word_cnt:", pseudo_cnt, pseudo_word_cnt
        print "topic query:", topic_query
        print "topic id:", topic_id

        logging.info("qe...")
        # qe = QE_w2v(topic_query[0], vecutils)
        Query_Range = ["content_p"]
        pre_docs = retrieval_top_k_doc_with_content(topic_query,
                                                    solr,
                                                    k=pseudo_cnt,
                                                    query_range=Query_Range)

        pseudo_top_doc = []
        for d in pre_docs:
            pseudo_top_doc.append(d[3])

        pseudo_top_doc = ' '.join(pseudo_top_doc)

        pseudo_top_doc = pseudo_top_doc.split()
        pseudo_top_words = expand_by_tfidf_candidate_words(
            idf_dic,
            init_words=init_query_words,
            cwords=pseudo_top_doc,
            ret_cnt=pseudo_word_cnt)

        pseudoquery = word2query_by_sim(pseudo_top_words)
        pseudoquery = topic_query[0] + ' ' + pseudoquery
        print "init query before pseudoquery:", topic_query[0]
        print "form pseudoquery:", pseudoquery

        logging.info("qe end...")

        # interact_with_jig_to_change_vec(jig, solr, query, idf_dic, itr_cnt=5)
        # interact_with_jig_to_change_vec(jig, solr, topic_query, idf_dic, itr_cnt=tot_itr_times)

        interact_with_jig_to_change_vec_use_jig_ret_use_pseudo_query(
            jig,
            solr,
            topic_query,
            idf_dic,
            itr_cnt=tot_itr_times,
            topic_id=topic_id,
            query_range=Query_Range,
            use_pseudo=True,
            pseudo_query=pseudoquery)

        jig.judge()