def main(): """An example of using the Indexer wrapper. """ # TODO Command line argument passing # TODO e.g. # TODO -d directory to store index in # TODO -i directory to recursively index import time tt = time.time() filedir = 'aesop' indexName = 'aesopind' if os.path.exists(indexName): for f in os.listdir(indexName): os.remove(os.path.join(indexName, f)) # Remove results of previous runs os.rmdir(indexName) # Create a new Index index = Index(indexName, create=True) index.setMergeFactor(20) # Get the files files = os.listdir(filedir) for name in files: f = os.path.join(filedir, name) if os.path.isdir(f) or os.path.islink(f): continue text = open(f, 'rb').read().decode("latin-1") title = text.split('\n\n\n')[0] print 'indexing:', f # the next line creates a Document with 2 fields # one field is named text and the other is named # filename. The latter is created as Keyword since # the name is preceded by '_'. Naughty but expdient. index.index(text=text, __title=title, _filename=f) # Uncomment the following line to optimize the index. # Have a look in the index dir before you optimize. # You will probably see a dozens of files from # several segments. optimize() merges all the segments # into one. It can be quite an expensive operation, but # it can save space and speed up searches. #index.optimize() queries = [ 'fox', u'intô', 'python', 'fox python', '"the Fox and the"', 'the fox and python' ] for q in queries: hits = index.find(q) print q.encode('utf8'), hits for h in hits: print '\tFound in %s (%s)' % (h.get('filename'), h.get('title')) index.close() print 'Elapsed time:', time.time() - tt
def main(): """An example of using the Indexer wrapper. """ # TODO Command line argument passing # TODO e.g. # TODO -d directory to store index in # TODO -i directory to recursively index import time tt = time.time() filedir = 'aesop' indexName = 'aesopind' if os.path.exists(indexName): for f in os.listdir(indexName): os.remove(os.path.join(indexName, f)) # Remove results of previous runs os.rmdir(indexName) # Create a new Index index = Index(indexName, create = True) index.setMergeFactor(20) # Get the files files = os.listdir(filedir) for name in files: f = os.path.join(filedir, name) if os.path.isdir(f) or os.path.islink(f): continue text = open(f, 'rb').read().decode("latin-1") title = text.split('\n\n\n')[0] print 'indexing:', f # the next line creates a Document with 2 fields # one field is named text and the other is named # filename. The latter is created as Keyword since # the name is preceded by '_'. Naughty but expdient. index.index(text=text, __title=title, _filename=f) # Uncomment the following line to optimize the index. # Have a look in the index dir before you optimize. # You will probably see a dozens of files from # several segments. optimize() merges all the segments # into one. It can be quite an expensive operation, but # it can save space and speed up searches. #index.optimize() queries = ['fox', u'intô', 'python', 'fox python', '"the Fox and the"', 'the fox and python'] for q in queries: hits = index.find(q) print q.encode('utf8'), hits for h in hits: print '\tFound in %s (%s)' % (h.get('filename'), h.get('title')) index.close() print 'Elapsed time:', time.time() - tt
# you can ask the index to delete all docs containing # that Term in that Field. index.index(text = body, _uid = str(i), __subject=subj, __frm=frm) i += 1 index.optimize() index.commit() # pretend it never happened del(index) # re-open it for search index = Index(indexName, create=False) # search for the word 'today' hits = index.find('you') print 'Finding you', hits for h in hits: print h index.delete(uid='41') hits = index.find('you') print 'Finding you', hits for h in hits: print h index.close() print time.time() - tt