if __name__ == "__main__": db = pickle.load(open("dump/database-simplified.dump")) fin = codecs.open(INPUT_FILE, "r", "utf-8") lines = map(lambda l: l.strip(), fin.readlines()) fin.close() ids = [] queries = [] for line in lines: parts = line.strip().split() ids.append(parts[0]) if INPUT_FILE.startswith("test"): queries.append(parts[1]) else: queries.append(parts[0]) if USE_KNN: search = Search(database=db) search.embedding = Word2Vec.load("dump/w2v-model-simplified.dump") else: search = Search(database=db) fout = codecs.open(INPUT_FILE + "-output.txt", "w", "utf-8") for id, query in zip(ids, queries): results = search.answer(query, length=2, open=OPEN_MODE) if results: score, _, answer, sentence = results[0] # result = (score, sentence, answer) fout.write("%s\t%s\n" % (id, answer)) else: fout.write("%s\t%s\n" % (id, "none"))
#coding=utf-8 import codecs from data import Database from search import Search from question import parseQuestion if __name__ == "__main__": #db = Database("data/zhwiki-extracted/", conditions = [ # Database.cond_length(50), # Database.cond_title(lambda t: not t.startswith("Wikipedia:")), # Database.cond_title(lambda t: not t.startswith("File:")), # Database.cond_title(lambda t: not t.startswith("Draft:")) #]) import pickle db = pickle.load(open("dump/database-simplified.dump")) fin = codecs.open("sample.txt", "r", "utf-8") lines = map(lambda l: l.strip(), fin.readlines()) fin.close() queries = map(lambda l: l.split()[0], lines) fout = codecs.open("output-50.txt", "w", "utf-8") search = Search(db) for query in queries: fout.write("%s\n" % query) results = search.answer(query, length=2, open=True) for score, _, answer, sentence in results: # result = (score, sentence, answer) fout.write("%d\t%s\t%s\n" % (score, answer, sentence)) fout.write("---------------------------------\n")