def main(cls, argv): if len(argv) < 5: print "Usage: python IndexTuningDemo.py <numDocs> <mergeFactor> <maxMergeDocs> <maxBufferedDocs>" return docsInIndex = int(argv[1]) # create an index called 'index-dir' in a temp directory indexDir = os.path.join(System.getProperty('java.io.tmpdir', 'tmp'), 'index-dir') dir = FSDirectory.getDirectory(indexDir, True) analyzer = SimpleAnalyzer() writer = IndexWriter(dir, analyzer, True) # set variables that affect speed of indexing writer.setMergeFactor(int(argv[2])) writer.setMaxMergeDocs(int(argv[3])) writer.setMaxBufferedDocs(int(argv[4])) # writer.infoStream = System.out print "Merge factor: ", writer.getMergeFactor() print "Max merge docs:", writer.getMaxMergeDocs() print "Max buffered docs:", writer.getMaxBufferedDocs() start = time() for i in xrange(docsInIndex): doc = Document() doc.add( Field("fieldname", "Bibamus", Field.Store.YES, Field.Index.TOKENIZED)) writer.addDocument(doc) writer.close() print "Time: ", timedelta(seconds=time() - start)
def main(cls, argv): if len(argv) < 5: print "Usage: python IndexTuningDemo.py <numDocs> <mergeFactor> <maxMergeDocs> <maxBufferedDocs>" return docsInIndex = int(argv[1]) # create an index called 'index-dir' in a temp directory indexDir = os.path.join(tempfile.gettempdir(), 'index-dir') dir = FSDirectory.open(indexDir,) analyzer = SimpleAnalyzer() writer = IndexWriter(dir, analyzer, True) # set variables that affect speed of indexing writer.setMergeFactor(int(argv[2])) writer.setMaxMergeDocs(int(argv[3])) writer.setMaxBufferedDocs(int(argv[4])) # writer.infoStream = tempfile.out print "Merge factor: ", writer.getMergeFactor() print "Max merge docs:", writer.getMaxMergeDocs() print "Max buffered docs:", writer.getMaxBufferedDocs() start = time() for i in xrange(docsInIndex): doc = Document() doc.add(Field("fieldname", "Bibamus", Field.Store.YES, Field.Index.ANALYZED)) writer.addDocument(doc) writer.close() print "Time: ", timedelta(seconds=time() - start)