def test_numDocs_AND_maxDoc(self): indLoc = self.extractTestIndex() r = lucene.IndexReader.open(indLoc) totalDocsInFileSystem = len(test_base.listFilesDestinedForTestIndex()) self.assertEqual(r.numDocs(), totalDocsInFileSystem) # maxDoc is one greater than the greatest document identifier in the # index. Since this index is perfectly optimized (no gaps in doc ids), # maxDoc will be equal to numDocs (maxDoc is 0-based; numDocs is # (obviously) 1-based). self.assertEqual(r.maxDoc(), r.numDocs()) r.close() self.assertRaises(IOError, r.numDocs) self.assertRaises(IOError, r.maxDoc)
def createTestIndexArchive(): # Create a tar file containing the index of pyclene's own source code. # Place that tar file in the temp directory, and set a global reference # to it so that test code that requires a sample index can use it # (typically via one of the test_base.CommonBaseTest.extractTestIndex* # methods). pv('--- CREATING TEST INDEX OF OWN SOURCE CODE ---') allFilenames = test_base.listFilesDestinedForTestIndex() commonPrefix = os.path.commonprefix(allFilenames) commonPrefixLen = len(commonPrefix) tempIndexDir = test_base.generateTempFilename(suffix='_pyclene_test_index') assert not os.path.exists(tempIndexDir) fsDir = lucene.FSDirectory(tempIndexDir, True) assert os.path.isdir(tempIndexDir) w = lucene.IndexWriter(fsDir, lucene.StandardAnalyzer(), True) w.maxFieldLength = sys.maxint assert w.docCount() == 0 for filename in allFilenames: doc = test_index.FileDocument(filename) pv('Indexing [%s]' % filename[commonPrefixLen:]) w.addDocument(doc) assert w.docCount() == len(allFilenames) w.close() pv('\n') indexFNs = [] try: try: for optFlag in ('UNOPTIMIZED', 'OPTIMIZED'): tarFilename = test_base.generateTempFilename( '.pyclene_test_index-%s.tar' % optFlag ) indexFNs.append(tarFilename) tarFile = tarfile.open(tarFilename, 'w') for indexComponent in fsDir: fullIndexComponent = os.path.join(fsDir.name, indexComponent) tarFile.add(fullIndexComponent, indexComponent) tarFile.close() w = lucene.IndexWriter(fsDir, lucene.StandardAnalyzer(), False) w.optimize() w.close() pv('%s: %s KB' % ( ('Test index size (%s)' % optFlag).ljust(29), str(os.path.getsize(tarFilename) / 1024).rjust(10) )) except: origEx = sys.exc_info()[1] for tempFN in indexFNs: if os.path.isfile(tempFN): os.remove(tempFN) raise origEx finally: shutil.rmtree(tempIndexDir) return indexFNs