def init_lucene_search(): lucene.initVM(vmargs=['-Djava.awt.headless=true']) print 'lucene', lucene.VERSION print 'Index ', INDEX_DIR base_dir = os.path.dirname(os.path.abspath(sys.argv[0])) # current dir directory = SimpleFSDirectory(File(INDEX_DIR)) searcher = IndexSearcher(DirectoryReader.open(directory)) analyzer = SmartChineseAnalyzer(Version.LUCENE_CURRENT, SmartChineseAnalyzer.getDefaultStopSet()) return searcher, analyzer
def __init__(self, root, storeDir, analyzer): if not os.path.exists(storeDir): os.mkdir(storeDir) store = SimpleFSDirectory(File(storeDir)) # analyzer = LimitTokenCountAnalyzer(analyzer, 1048576) # use smart chinese analyzer analyzer = SmartChineseAnalyzer(Version.LUCENE_CURRENT, SmartChineseAnalyzer.getDefaultStopSet()) config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer) config.setOpenMode(IndexWriterConfig.OpenMode.CREATE) writer = IndexWriter(store, config) self.indexDocs(root, writer) ticker = Ticker() print 'commit index', threading.Thread(target=ticker.run).start() writer.commit() writer.close() ticker.tick = False print 'done'
def __init__(self, root, storeDir, analyzer): if not os.path.exists(storeDir): os.mkdir(storeDir) store = SimpleFSDirectory(File(storeDir)) # analyzer = LimitTokenCountAnalyzer(analyzer, 1048576) # use smart chinese analyzer analyzer = SmartChineseAnalyzer( Version.LUCENE_CURRENT, SmartChineseAnalyzer.getDefaultStopSet()) config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer) config.setOpenMode(IndexWriterConfig.OpenMode.CREATE) writer = IndexWriter(store, config) self.indexDocs(root, writer) ticker = Ticker() print 'commit index', threading.Thread(target=ticker.run).start() writer.commit() writer.close() ticker.tick = False print 'done'
doc = Document() doc.add(Field("name", filename, t1)) doc.add(Field("path", root, t1)) if len(contents) > 0: doc.add(Field("contents", contents, t2)) else: print "warning: no content in %s" % filename writer.addDocument(doc) except Exception, e: print "Failed in indexDocs:", e if __name__ == '__main__': if len(sys.argv) < 2: print IndexFiles.__doc__ sys.exit(1) lucene.initVM(vmargs=['-Djava.awt.headless=true']) print 'lucene', lucene.VERSION start = datetime.now() try: # base_dir = os.path.dirname(os.path.abspath(sys.argv[0])) IndexFiles( sys.argv[1], INDEX_DIR, SmartChineseAnalyzer(Version.LUCENE_CURRENT, SmartChineseAnalyzer.getDefaultStopSet())) end = datetime.now() print end - start except Exception, e: print "Failed: ", e raise e
contents = unicode(file.read(), 'utf-8') file.close() doc = Document() doc.add(Field("name", filename, t1)) doc.add(Field("path", root, t1)) if len(contents) > 0: doc.add(Field("contents", contents, t2)) else: print "warning: no content in %s" % filename writer.addDocument(doc) except Exception, e: print "Failed in indexDocs:", e if __name__ == '__main__': if len(sys.argv) < 2: print IndexFiles.__doc__ sys.exit(1) lucene.initVM(vmargs=['-Djava.awt.headless=true']) print 'lucene', lucene.VERSION start = datetime.now() try: # base_dir = os.path.dirname(os.path.abspath(sys.argv[0])) IndexFiles(sys.argv[1], INDEX_DIR, SmartChineseAnalyzer(Version.LUCENE_CURRENT, SmartChineseAnalyzer.getDefaultStopSet())) end = datetime.now() print end - start except Exception, e: print "Failed: ", e raise e