コード例 #1
0
class Indexer:
    def __init__(self, directoryToBeIndexed, indexStorageDirectory):
        self._directory = directoryToBeIndexed
        self._indexStorageDirectory = indexStorageDirectory
        self._filestorage = FileStorage()
        self._fileNameToDocIdMapper = FileNameToDocIdMapper()
        self._tokenizer = Tokenizer()

    def iterateThroughFilesInDirectory(self):

        docId = 1
        for subdir, dirs, files in os.walk(self._directory):
            for file in files:
                filepath = subdir + os.sep + file

                self._tokenizer.tokenizeFiles(filepath, docId)
                self._fileNameToDocIdMapper.storeFileNameToDictId(
                    filepath, docId)
                docId += 1

        self._filestorage.pickleIndexFile(self._indexStorageDirectory)
        self._fileNameToDocIdMapper.pickleFileNameToDocMapper(
            self._indexStorageDirectory)