def main(): # zz = parse_all_metainfo(POSTS_PATH, save_to=METADATA_PATH) xx = load_all_metainfo(METADATA_PATH) mk_index() return lucifer.loadIndex(INDEX_PATH) ss = "Goethe saw the sea for the first time in his life when he" raw_res = lucifer.searchQuery(ss, 17, 1) print(raw_res) # res = json.loads(raw_res) # trunc = sorted(res.items(), key= lambda x: x[1], reverse=True)[:10] # pprint(trunc) # print("res size =", len(res)) # print("index size = %d kB" % (os.path.getsize(INDEX_PATH) // 1024)) # return return lucifer.addDocument(doc2) lucifer.addDocument(doc3) lucifer.showIndex("c++") lucifer.showIndex("garbage")
def proc_file(path): f = open(path) txt1 = f.read() f.close() url = get_url(txt1) i = 0 for _id, doc in split_text(txt1): chapter_id = url + "index.xhtml#" + _id hh = get_header(doc) # print(chapter_id) # print(">> ", hh) lucifer.addDocument(chapter_id, doc) i += 1 return i