class Indexer: def __init__(self, directoryToBeIndexed, indexStorageDirectory): self._directory = directoryToBeIndexed self._indexStorageDirectory = indexStorageDirectory self._filestorage = FileStorage() self._fileNameToDocIdMapper = FileNameToDocIdMapper() self._tokenizer = Tokenizer() def iterateThroughFilesInDirectory(self): docId = 1 for subdir, dirs, files in os.walk(self._directory): for file in files: filepath = subdir + os.sep + file self._tokenizer.tokenizeFiles(filepath, docId) self._fileNameToDocIdMapper.storeFileNameToDictId( filepath, docId) docId += 1 self._filestorage.pickleIndexFile(self._indexStorageDirectory) self._fileNameToDocIdMapper.pickleFileNameToDocMapper( self._indexStorageDirectory)