doc.add(Field("answer", ii['Answer'], t1)) doc.add(Field("qid", ii['Question ID'], t1)) doc.add(Field("category", ii['category'], t1)) doc.add(Field("position", ii['Sentence Position'], t1)) doc.add(Field("question", ii['Question Text'], t2)) doc.add(Field("wiki_plain", self.wiki_reader.get_text(ii['Answer']), t2)) writer.addDocument(doc) if __name__ == '__main__': if len(sys.argv) < 2: print IndexDocs.__doc__ sys.exit(1) lucene.initVM(vmargs=['-Djava.awt.headless=true']) print 'lucene', lucene.VERSION start = datetime.now() try: train_path = sys.argv[1] train_set = Corpus() train_set.read(train_path) train_bench, test_bench = train_set.train_test_split() base_dir = os.path.dirname(os.path.abspath(sys.argv[0])) IndexDocs(train_bench, os.path.join(base_dir, INDEX_DIR), StandardAnalyzer(Version.LUCENE_CURRENT)) end = datetime.now() print end - start except Exception, e: print "Failed: ", e raise e