예제 #1
0
    def getIndexWriter(self):

        writer = IndexWriter(
            DbDirectory(self.store.txn, self._db, self.store._blocks._db,
                        self._flags), StandardAnalyzer(), False)
        writer.setUseCompoundFile(False)

        return writer
예제 #2
0
    def open(self, name, txn, **kwds):

        super(IndexContainer, self).open(name, txn, **kwds)

        if kwds.get('create', False):
            directory = DbDirectory(txn, self._db, self.store._blocks._db,
                                    self._flags)
            indexWriter = IndexWriter(directory, StandardAnalyzer(), True)
            indexWriter.close()
예제 #3
0
#!/usr/bin/env python2.4

from mailbox import UnixMailbox
from PyLucene import StandardAnalyzer, FSDirectory, IndexWriter
from email import EmailDoc

store = FSDirectory.getDirectory('chipy-index', True)
writer = IndexWriter(store, StandardAnalyzer(), True)

mailbox = UnixMailbox(open('chipy.mbox'))
while True:
    msg = mailbox.next()
    if msg == None: break
    writer.addDocument(EmailDoc(msg))

writer.close()
예제 #4
0
                      Field.Index.UN_TOKENIZED))
            doc.add(
                Field("pmid", pmid, Field.Store.YES, Field.Index.UN_TOKENIZED))
            doc.add(
                Field("text", span_text, Field.Store.YES,
                      Field.Index.TOKENIZED))
            addAnnotations(doc, span_id)
            writer.addDocument(doc)
        except Exception, e:
            sys.stderr.write("error: %s pmid: %s span_id: %s\n" %
                             (e, pmid, span_id))
        i += 2


if __name__ == '__main__':
    if len(sys.argv) == 1:
        print "Usage: python index_spans.py data_norm index_dir annotation_files"
    else:
        (data_norm, index_dir, annotation_files) = \
            (sys.argv[1], sys.argv[2], sys.argv[3:])
        print "Loading annotations ..."
        load(annotation_files)
        print "Making the index ..."
        writer = IndexWriter(index_dir, StandardAnalyzer(), True)
        writer.setMaxFieldLength(7 * 1000 * 1000 * 10)
        indexData(data_norm)
        print "Optimizing index ..."
        writer.optimize()
        print "Indexing complete"
        writer.close()