예제 #1
0
def main():
    """An example of using the Indexer wrapper.
    """

    # TODO Command line argument passing
    # TODO e.g.
    # TODO -d directory to store index in
    # TODO -i directory to recursively index

    import time
    tt = time.time()

    filedir = 'aesop'
    indexName = 'aesopind'

    if os.path.exists(indexName):
        for f in os.listdir(indexName):
            os.remove(os.path.join(indexName, f))
        # Remove results of previous runs
        os.rmdir(indexName)

    # Create a new Index
    index = Index(indexName, create=True)
    index.setMergeFactor(20)
    # Get the files
    files = os.listdir(filedir)
    for name in files:
        f = os.path.join(filedir, name)
        if os.path.isdir(f) or os.path.islink(f):
            continue
        text = open(f, 'rb').read().decode("latin-1")
        title = text.split('\n\n\n')[0]
        print 'indexing:', f
        # the next line creates a Document with 2 fields
        # one field is named text and the other is named
        # filename. The latter is created as Keyword since
        # the name is preceded by '_'. Naughty but expdient.
        index.index(text=text, __title=title, _filename=f)

    # Uncomment the following line to optimize the index.
    # Have a look in the index dir before you optimize.
    # You will probably see a dozens of files from
    # several segments. optimize() merges all the segments
    # into one. It can be quite an expensive operation, but
    # it can save space and speed up searches.

    #index.optimize()

    queries = [
        'fox', u'intô', 'python', 'fox python', '"the Fox and the"',
        'the fox and python'
    ]
    for q in queries:
        hits = index.find(q)
        print q.encode('utf8'), hits
        for h in hits:
            print '\tFound in %s (%s)' % (h.get('filename'), h.get('title'))
    index.close()
    print 'Elapsed time:', time.time() - tt
예제 #2
0
파일: simple.py 프로젝트: Atom66/tain335
def main():
    """An example of using the Indexer wrapper.
    """
    
    # TODO Command line argument passing
    # TODO e.g.
    # TODO -d directory to store index in
    # TODO -i directory to recursively index
    
    import time
    tt = time.time()

    filedir = 'aesop'
    indexName = 'aesopind'

    if os.path.exists(indexName):
        for f in os.listdir(indexName):
            os.remove(os.path.join(indexName, f))
        # Remove results of previous runs
        os.rmdir(indexName)

    # Create a new Index
    index = Index(indexName, create = True)
    index.setMergeFactor(20)
    # Get the files
    files = os.listdir(filedir)
    for name in files:
        f = os.path.join(filedir, name)
        if os.path.isdir(f) or os.path.islink(f):
            continue
        text = open(f, 'rb').read().decode("latin-1")
        title = text.split('\n\n\n')[0]
        print 'indexing:', f
        # the next line creates a Document with 2 fields
        # one field is named text and the other is named
        # filename. The latter is created as Keyword since
        # the name is preceded by '_'. Naughty but expdient.
        index.index(text=text, __title=title, _filename=f)
        
    # Uncomment the following line to optimize the index.
    # Have a look in the index dir before you optimize.
    # You will probably see a dozens of files from
    # several segments. optimize() merges all the segments
    # into one. It can be quite an expensive operation, but
    # it can save space and speed up searches.
    
    #index.optimize()

    queries = ['fox', u'intô', 'python', 'fox python',
               '"the Fox and the"',
               'the fox and python']
    for q in queries:
        hits = index.find(q)
        print q.encode('utf8'), hits
        for h in hits:
            print '\tFound in %s (%s)' % (h.get('filename'), h.get('title'))
    index.close()
    print 'Elapsed time:', time.time() - tt
예제 #3
0
파일: indexemail.py 프로젝트: ythvg/tain335
    # you can ask the index to delete all docs containing
    # that Term in that Field.
    index.index(text = body, _uid = str(i), __subject=subj, __frm=frm)
    i += 1

index.optimize()
index.commit()

# pretend it never happened
del(index)

# re-open it for search
index = Index(indexName, create=False)

# search for the word 'today'
hits = index.find('you')
print 'Finding you', hits
for h in hits:
    print h

index.delete(uid='41')
hits = index.find('you')
print 'Finding you', hits
for h in hits:
    print h

index.close()
print time.time() - tt