def test_cluster_terms_bow(self): return mysearcher = searcher("../data/index/inv", "../data/index/docvec", in_memory=True, score="bow", filter_type="cluster") while True: query = input("query input: ") if query == "q": break print(mysearcher.search(query))
def test_high_idf_tf_idf(self): return mysearcher = searcher("../data/index/inv", "../data/index/docvec", in_memory=True, score="tf-idf", filter_type="high-idf") while True: query = input("query input: ") if query == "q": break print(mysearcher.search(query)) break
def test_heap_bow(self): return mysearcher = searcher("../data/index/inv", "../data/index/docvec", in_memory=True, score="bow") while True: query = input("query input: ") if query == "q": break print(mysearcher.search(query)) break
def test_new_searcher(self): # return mysearcher = searcher("../data/index/inv", "../data/index/docvec", "../data/index/meta.json", "../resources/stopwords/cn_stopwords.txt", in_memory=True, cluster_load=1,tf_idf=1) while True: query = input("query input: ") if query == "q": break print(mysearcher.search(query, score="tf-idf", filter_type="cluster"))
for s in score: begin = time.time() result, words = my_searcher.search(query, score=s, filter_type=t) end = time.time() - begin res[t][s] = {'result': result, 'time': end} res['time'] = time.time() - begin query_result_file.write(json.dumps(res, ensure_ascii=False) + "\n") query_result_file.close() if __name__ == "__main__": title_query = read_file("./title_query.txt") content_query = read_file("./content_query.txt") print("load searcher") begin = time.time() my_searcher = searcher("../data/index/inv", "../data/index/docvec", "../data/index/meta.json", "../resources/stopwords/cn_stopwords.txt", in_memory=True, proc_num=8, cluster_load=1, tf_idf=1) print("load done, use: %fs" % (time.time() - begin)) print("run title query") run_and_write("./title_query_result.json", title_query) print("run content query") run_and_write("./content_query_result.json", content_query)