Example #1
0
            doc.add(Field("answer", ii['Answer'], t1))
            doc.add(Field("qid", ii['Question ID'], t1))
            doc.add(Field("category", ii['category'], t1))
            doc.add(Field("position", ii['Sentence Position'], t1))
            doc.add(Field("question", ii['Question Text'], t2))
            doc.add(Field("wiki_plain",
                          self.wiki_reader.get_text(ii['Answer']), t2))
            writer.addDocument(doc)


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print IndexDocs.__doc__
        sys.exit(1)
    lucene.initVM(vmargs=['-Djava.awt.headless=true'])
    print 'lucene', lucene.VERSION
    start = datetime.now()
    try:
        train_path = sys.argv[1]
        train_set = Corpus()
        train_set.read(train_path)
        train_bench, test_bench = train_set.train_test_split()
        base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
        IndexDocs(train_bench, os.path.join(base_dir, INDEX_DIR),
                   StandardAnalyzer(Version.LUCENE_CURRENT))
        end = datetime.now()
        print end - start
    except Exception, e:
        print "Failed: ", e
        raise e