# coding: utf-8 """ created by artemkorkhov at 2016/01/19 """ import argparse import utils if __name__ == '__main__': utils.get_wiki_docs()
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--keywords', type=str, default='data/keywords.txt', help='keywords filename') parser.add_argument('--get_data', type=int, default= 0, help='flag to get wiki data for IR') parser.add_argument('--index', type=int, default= 0, help='flag to get wiki data for IR') parser.add_argument('--qname', type=str, default= 'data/validation_set.tsv', help='file name for validation') args = parser.parse_args() lucene.initVM(vmargs=['-Djava.awt.headless=true']) print 'lucene', lucene.VERSION base_dir = os.path.dirname(os.path.abspath(sys.argv[0])) if args.get_data: utils.get_wiki_docs(args.keywords) if args.index: IndexFiles.IndexFiles(DATA_DIR, os.path.join(base_dir, INDEX_DIR), StandardAnalyzer(Version.LUCENE_CURRENT)) directory = SimpleFSDirectory(File(os.path.join(base_dir, INDEX_DIR))) searcher = IndexSearcher(DirectoryReader.open(directory)) analyzer = StandardAnalyzer(Version.LUCENE_CURRENT) #run(searcher, analyzer) run_method2(searcher, analyzer, args.qname) del searcher