예제 #1
0
if __name__ == "__main__":
    db = pickle.load(open("dump/database-simplified.dump"))

    fin = codecs.open(INPUT_FILE, "r", "utf-8")
    lines = map(lambda l: l.strip(), fin.readlines())
    fin.close()
    ids = []
    queries = []
    for line in lines:
        parts = line.strip().split()
        ids.append(parts[0])
        if INPUT_FILE.startswith("test"):
            queries.append(parts[1])
        else:
            queries.append(parts[0])

    if USE_KNN:
        search = Search(database=db)
        search.embedding = Word2Vec.load("dump/w2v-model-simplified.dump")
    else:
        search = Search(database=db)

    fout = codecs.open(INPUT_FILE + "-output.txt", "w", "utf-8")
    for id, query in zip(ids, queries):
        results = search.answer(query, length=2, open=OPEN_MODE)
        if results:
            score, _, answer, sentence = results[0]
            # result = (score, sentence, answer)
            fout.write("%s\t%s\n" % (id, answer))
        else:
            fout.write("%s\t%s\n" % (id, "none"))
예제 #2
0
파일: test2.py 프로젝트: cjx3721/QA
#coding=utf-8

import codecs
from data import Database
from search import Search
from question import parseQuestion

if __name__ == "__main__":
    #db = Database("data/zhwiki-extracted/", conditions = [
    #	Database.cond_length(50),
    #	Database.cond_title(lambda t: not t.startswith("Wikipedia:")),
    #	Database.cond_title(lambda t: not t.startswith("File:")),
    #	Database.cond_title(lambda t: not t.startswith("Draft:"))
    #])
    import pickle
    db = pickle.load(open("dump/database-simplified.dump"))

    fin = codecs.open("sample.txt", "r", "utf-8")
    lines = map(lambda l: l.strip(), fin.readlines())
    fin.close()
    queries = map(lambda l: l.split()[0], lines)

    fout = codecs.open("output-50.txt", "w", "utf-8")
    search = Search(db)
    for query in queries:
        fout.write("%s\n" % query)
        results = search.answer(query, length=2, open=True)
        for score, _, answer, sentence in results:
            # result = (score, sentence, answer)
            fout.write("%d\t%s\t%s\n" % (score, answer, sentence))
        fout.write("---------------------------------\n")