def func_pic(command): global vm_env vm_env = lucene.getVMEnv() vm_env.attachCurrentThread() STORE_DIR = "index for pic" directory = SimpleFSDirectory(File(STORE_DIR)) searcher = IndexSearcher(directory, True) analyzer = lucene.WhitespaceAnalyzer(Version.LUCENE_CURRENT) imgurl = [] url = [] urltitle = [] imgurl, url, urltitle = run_pic(command, searcher, analyzer) searcher.close() return imgurl, url, urltitle
def func(command): global vm_env vm_env = lucene.getVMEnv() vm_env.attachCurrentThread() STORE_DIR = "index for website" directory = SimpleFSDirectory(File(STORE_DIR)) searcher = IndexSearcher(directory, True) analyzer = lucene.WhitespaceAnalyzer(Version.LUCENE_CURRENT) title = [] url = [] surround = [] title, url, surround = run(searcher, analyzer, command) searcher.close() return title, url, surround
print "Searching for:", command query = QueryParser(Version.LUCENE_CURRENT, "contents", analyzer).parse(command) #用analyzer来对查询语句进行词法分析和语言处理。 #QueryParser调用parser进行语法分析,形成查询语法树,放到Query中。 scoreDocs = searcher.search(query, 50).scoreDocs #IndexSearcher调用search对查询语法树Query进行搜索,得到结果 print "%s total matching documents." % len(scoreDocs), '\n' for scoreDoc in scoreDocs: doc = searcher.doc(scoreDoc.doc) print "------------------------" print 'path:', doc.get("path") print 'title:', doc.get("title") print 'url:', doc.get("url") print 'name:', doc.get("name") if __name__ == '__main__': STORE_DIR = "index" initVM() print 'lucene', VERSION directory = SimpleFSDirectory(File(STORE_DIR)) #索引文件存放的位置 searcher = IndexSearcher(directory, True) #索引信息读入到内存,创建IndexSearcher准备进行搜索 analyzer = lucene.WhitespaceAnalyzer( Version.LUCENE_CURRENT ) #analyzer用来对查询语句进行词法分析和语言处理的,和IndexFiles.py中使用同样的analyzer。 run(searcher, analyzer) searcher.close()
lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED)) qestion.add( lucene.Field("qst_follow", qst_follow, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED)) qestion.add( lucene.Field("qst_ans", qst_ans, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED)) qestion.add( lucene.Field("qst_num", qst_num, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED)) writer.addDocument(qestion) except Exception, e: print "Failed in indexDocs:", e f.close() if __name__ == '__main__': lucene.initVM() print 'lucene', lucene.VERSION start = datetime.now() try: IndexFiles('analyzed_zhihu', "index_qst", lucene.WhitespaceAnalyzer(lucene.Version.LUCENE_CURRENT)) end = datetime.now() print end - start except Exception, e: print "Failed: ", e
lucene.Field.Index.ANALYZED)) doc.add(lucene.Field("url", url, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED)) doc.add(lucene.Field("urltitle", title, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED)) writer.addDocument(doc) #IndexWriter调用函数addDocument将索引写到索引文件夹中 print "----------------------------------------------------" except Exception, e: print "Failed in indexDocs:", e else: break t.close() if __name__ == '__main__': ## if len(sys.argv) < 2: ## print IndexFiles.__doc__ ## sys.exit(1) lucene.initVM() #初始化Java虚拟机 print 'lucene', lucene.VERSION start = datetime.now() try: ## IndexFiles(sys.argv[1], "index", lucene.StandardAnalyzer(lucene.Version.LUCENE_CURRENT)) # IndexFiles('html', "index", lucene.StandardAnalyzer(lucene.Version.LUCENE_CURRENT)) IndexFiles('html', "index for pic", lucene.WhitespaceAnalyzer(lucene.Version.LUCENE_CURRENT)) end = datetime.now() print end - start except Exception, e: print "Failed: ", e