def main(argv): # defaults query = '' start = 0 nb_results = 10 try: opts, args = getopt.getopt(argv, "i:q:s:r:", \ ["lucene_index=", "query=", "start=", "nb_results="]) except getopt.GetoptError: usage() sys.exit(2) if not opts: usage() sys.exit() for opt, arg in opts: if opt in ("-i", "--lucene_index"): fsDir = FSDirectory.getDirectory(arg, False) searcher = IndexSearcher(fsDir) elif opt in ("-q", "--query"): query = arg elif opt in ("-s", "--start"): start = int(arg) elif opt in ("-r", "--nb_results"): nb_results = int(arg) doQuery(searcher, query, start, nb_results)
def searchDocuments(self, version, query, attribute=None): directory = DbDirectory(self.store.txn, self._db, self.store._blocks._db, self._flags) searcher = IndexSearcher(directory) query = QueryParser.parse(query, "contents", StandardAnalyzer()) docs = {} for i, doc in searcher.search(query): ver = long(doc['version']) if ver <= version: uuid = UUID(doc['owner']) dv = docs.get(uuid, None) if dv is None or dv[0] < ver: docAttr = doc['attribute'] if attribute is None or attribute == docAttr: docs[uuid] = (ver, docAttr) searcher.close() return docs
def _getdoc(self, filename, args): doc = "0 |" m = self.__class__.p.search(args) if m and filename == '/search': # doc = doQuery(m.group(1).split('+'), int(m.group(2)), int(m.group(3))) doc = doQuery(urllib.unquote(m.group(1)), int(m.group(2)), int(m.group(3))) return doc def do_GET(self): url = urlparse.urlsplit(self.path) doc = self._getdoc(url[2], url[3]) self._writeheaders(doc) self.wfile.write(doc) class SearchServer(HTTPServer): allow_reuse_address = 1 if __name__ == '__main__': if len(sys.argv) != 2: print "Usage: python lucene_server.py index_dir" else: indexDir = sys.argv[1] fsDir = FSDirectory.getDirectory(indexDir, False) searcher = IndexSearcher(fsDir) serveraddr = ('', SERVER_PORT) srvr = SearchServer(serveraddr, SearchRequestHandler) print "Ready to serve search queries" srvr.serve_forever()
#!/usr/bin/python from sys import argv from PyLucene import FSDirectory, IndexSearcher, TermQuery, Term id = argv[1].strip() directory = FSDirectory.getDirectory( 'chipy-index', False ) searcher = IndexSearcher( directory ) query = TermQuery( Term( 'id', id ) ) hits = searcher.search( query ) doc = hits.doc(0) print "ID: %s" % doc.getField('id').stringValue() print "From: %s" % doc.getField('from').stringValue() print "Subject: %s" % doc.getField('subject').stringValue() print "Date: %s" % doc.getField('date').stringValue() print doc.getField('body').stringValue() print
results = [] for i, doc in hits: results.append([doc.get("name"), doc.get("owner").encode('gbk'), doc.get("title").encode('gbk')]) # sort result results.sort(lambda x,y: cmp(x[0],y[0])) for name,owner,title in results: print name, owner, title def test_fixture(): global BOARDSPATH BOARDSPATH = './' if __name__ == '__main__': #test_fixture() board = sys.argv[1] querystr = sys.argv[2].decode('gbk').strip() path = BOARDSPATH+board+'/'+RECENT_INDEX if not os.path.exists(path) or len(querystr) == 0: sys.exit(-1) directory = FSDirectory.getDirectory(path, False) searcher = IndexSearcher(directory) analyzer = StandardAnalyzer() run(searcher, analyzer, querystr) searcher.close()
'path' and 'name' fields for each of the hits it finds in the index. Note that search.close() is currently commented out because it causes a stack overflow in some cases. """ def run(searcher, analyzer): while True: print print "Hit enter with no input to quit." command = raw_input("Query:") if command == '': return print print "Searching for:", command query = QueryParser("contents", analyzer).parse(command) hits = searcher.search(query) print "%s total matching documents" % hits.length() for i, doc in hits: print 'path:', doc.get("path"), 'name:', doc.get("name"), 100*hits.score(i) if __name__ == '__main__': STORE_DIR = "index" print 'PyLucene', VERSION, 'Lucene', LUCENE_VERSION directory = FSDirectory.getDirectory(STORE_DIR, False) searcher = IndexSearcher(directory) analyzer = StandardAnalyzer() run(searcher, analyzer) searcher.close()
def getIndexSearcher(self): # if not IndexSupport.searchers.has_key(self.indexPath) or not IndexSupport.searchers[self.indexPath].getIndexReader().isCurrent(): # IndexSupport.searchers[self.indexPath] = IndexSearcher(self.getIndexReader()) # return IndexSupport.searchers[self.indexPath] return IndexSearcher(self.getIndexReader())
#!/usr/bin/env python2.4 from sys import argv from PyLucene import FSDirectory, IndexSearcher, QueryParser, StandardAnalyzer string = argv[1].strip() directory = FSDirectory.getDirectory( 'chipy-index', False ) searcher = IndexSearcher( directory ) query = QueryParser.parse( string, 'all', StandardAnalyzer() ) hits = searcher.search( query ) for i in range(0,hits.length()): doc = hits.doc(i) print "ID: %s" % doc.getField('id').stringValue() print "From: %s" % doc.getField('from').stringValue() print "Subject: %s" % doc.getField('subject').stringValue() print "Date: %s" % doc.getField('date').stringValue() print
def _prepareSearcher(self): # TODO: test index.currentVersion to update searcher, # allowing concurrence in search and write if self._searcher: self._searcher.close() self._searcher = IndexSearcher(self._directory)
def __init__(self, directory, shaManager): self._directory = directory self._searcher = IndexSearcher(self._directory) self._shaManager = shaManager
class SearchAppService: def __init__(self, directory, shaManager): self._directory = directory self._searcher = IndexSearcher(self._directory) self._shaManager = shaManager def _prepareSearcher(self): # TODO: test index.currentVersion to update searcher, # allowing concurrence in search and write if self._searcher: self._searcher.close() self._searcher = IndexSearcher(self._directory) def search(self, query): print "Searching ", query import re if re.match("^[a-f0-9]{40}$", query): return self.searchBySHA(query) elif query.startswith("http://"): return self.searchByURI(query) elif '@' in query: if not query.startswith('mailto'): query = "mailto:" + query return self.searchBySHA(sha.new(query).hexdigest()) else: return self.searchByName(query) def searchByURI(self, query): #print "Searching by URI" query = "\"" + query + "\"" parser = QueryParser("uri", KeywordAnalyzer()) return self._performSearch(parser, query) def searchByName(self, query): #print "Preguntando por nombre" parser = QueryParser("name", StandardAnalyzer()) return self._performSearch(parser, query) def searchBySHA(self, query): print "Preguntando por SHA" uris = self._shaManager.searchSha(query) if uris == None or len(uris) == 0 : return [] def rebuildFoafs(uri): foaf = self.searchByURI(uri) if foaf == []: return [{'sha':query, 'uri':uri}] else: return foaf return reduce(operator.concat, map(rebuildFoafs, uris)) def _performSearch(self, queryParser,query): q = queryParser.parse(query) self._prepareSearcher() hits = self._searcher.search(q) result = [] for i in range(0, hits.length()): d = hits.doc(i) result.append(FoafDocumentFactory.getFOAFFromDocument(d)) return result def close(self): self._shaManager.close()