コード例 #1
0
ファイル: miner.py プロジェクト: spetz911/lucifer
def main():
    # zz = parse_all_metainfo(POSTS_PATH, save_to=METADATA_PATH)
    xx = load_all_metainfo(METADATA_PATH)
    mk_index()
    return

    lucifer.loadIndex(INDEX_PATH)

    ss = "Goethe saw the sea for the first time in his life when he"
    raw_res = lucifer.searchQuery(ss, 17, 1)
    print(raw_res)
    # res = json.loads(raw_res)
    # trunc = sorted(res.items(), key= lambda x: x[1], reverse=True)[:10]
    # pprint(trunc)
    # print("res size =", len(res))
    # print("index size = %d kB" % (os.path.getsize(INDEX_PATH) // 1024))
    # return



    return

    lucifer.addDocument(doc2)
    lucifer.addDocument(doc3)
    lucifer.showIndex("c++")
    lucifer.showIndex("garbage")
コード例 #2
0
ファイル: miner.py プロジェクト: obask/lucifer
def proc_file(path):
    f = open(path)
    txt1 = f.read()
    f.close()
    url = get_url(txt1)
    i = 0
    for _id, doc in split_text(txt1):
        chapter_id = url + "index.xhtml#" + _id
        hh = get_header(doc)
        # print(chapter_id)
        # print(">> ",  hh)
        lucifer.addDocument(chapter_id, doc)
        i += 1
    return i
コード例 #3
0
ファイル: miner.py プロジェクト: spetz911/lucifer
def proc_file(path):
    f = open(path)
    txt1 = f.read()
    f.close()
    url = get_url(txt1)
    i = 0
    for _id, doc in split_text(txt1):
        chapter_id = url + "index.xhtml#" + _id
        hh = get_header(doc)
        # print(chapter_id)
        # print(">> ",  hh)
        lucifer.addDocument(chapter_id, doc)
        i += 1
    return i
コード例 #4
0
ファイル: miner.py プロジェクト: obask/lucifer
def main():
    # zz = parse_all_metainfo(POSTS_PATH, save_to=METADATA_PATH)
    xx = load_all_metainfo(METADATA_PATH)
    mk_index()
    return

    lucifer.loadIndex(INDEX_PATH)

    ss = "Goethe saw the sea for the first time in his life when he"
    raw_res = lucifer.searchQuery(ss, 17, 1)
    print(raw_res)
    # res = json.loads(raw_res)
    # trunc = sorted(res.items(), key= lambda x: x[1], reverse=True)[:10]
    # pprint(trunc)
    # print("res size =", len(res))
    # print("index size = %d kB" % (os.path.getsize(INDEX_PATH) // 1024))
    # return

    return

    lucifer.addDocument(doc2)
    lucifer.addDocument(doc3)
    lucifer.showIndex("c++")
    lucifer.showIndex("garbage")