def getIndexWriter(self): writer = IndexWriter( DbDirectory(self.store.txn, self._db, self.store._blocks._db, self._flags), StandardAnalyzer(), False) writer.setUseCompoundFile(False) return writer
def open(self, name, txn, **kwds): super(IndexContainer, self).open(name, txn, **kwds) if kwds.get('create', False): directory = DbDirectory(txn, self._db, self.store._blocks._db, self._flags) indexWriter = IndexWriter(directory, StandardAnalyzer(), True) indexWriter.close()
#!/usr/bin/env python2.4 from mailbox import UnixMailbox from PyLucene import StandardAnalyzer, FSDirectory, IndexWriter from email import EmailDoc store = FSDirectory.getDirectory('chipy-index', True) writer = IndexWriter(store, StandardAnalyzer(), True) mailbox = UnixMailbox(open('chipy.mbox')) while True: msg = mailbox.next() if msg == None: break writer.addDocument(EmailDoc(msg)) writer.close()
Field.Index.UN_TOKENIZED)) doc.add( Field("pmid", pmid, Field.Store.YES, Field.Index.UN_TOKENIZED)) doc.add( Field("text", span_text, Field.Store.YES, Field.Index.TOKENIZED)) addAnnotations(doc, span_id) writer.addDocument(doc) except Exception, e: sys.stderr.write("error: %s pmid: %s span_id: %s\n" % (e, pmid, span_id)) i += 2 if __name__ == '__main__': if len(sys.argv) == 1: print "Usage: python index_spans.py data_norm index_dir annotation_files" else: (data_norm, index_dir, annotation_files) = \ (sys.argv[1], sys.argv[2], sys.argv[3:]) print "Loading annotations ..." load(annotation_files) print "Making the index ..." writer = IndexWriter(index_dir, StandardAnalyzer(), True) writer.setMaxFieldLength(7 * 1000 * 1000 * 10) indexData(data_norm) print "Optimizing index ..." writer.optimize() print "Indexing complete" writer.close()