def indexFile(self, filepath, isPrivate=True): docId = None try: if not self.isIndexable(filepath): return state = docState(isPrivate) fileSize = os.path.getsize(filepath) lastModificationTime = os.path.getmtime(filepath) lastIdxTime, lastIdxState = self.getLastIndexationTimeAndState(filepath) if lastIdxState == state and lastIdxTime >= lastModificationTime: for obs in self.observers: obs.documentUntouched(filepath, state) print "%s didn't change since last indexation" % (safe_encode(filepath),) return try: title, text, _, _ = converter.extractWordsFromFile(filepath) except converter.IndexationFailure, exc: raise FileIndexationFailure(safe_encode(filepath), "converter thus complained : %s" % exc) docId = makeDocumentId(filepath) mime_type = mimetypes.guess_type(filepath)[0] doc = FutureDocument(filename=filepath, title=title, text=text, fileSize=fileSize, lastModificationTime=lastModificationTime, content_hash=docId, mime_type=mime_type, state=state) self.indexDocument(doc)
def start(self): existingFiles = Set() for filename in self.getFileIterator(): existingFiles.add(filename) if not self.isIndexable(filename): continue lastModificationTime = os.path.getmtime(filename) lastIndexationTime = self.getLastIndexationTime(filename) if lastIndexationTime >= lastModificationTime: if self.verbose: print "%s didn't change since last indexation" % filename else: fileSize = os.path.getsize(filename) try: title, text, links, offset = converter.extractWordsFromFile(filename) except converter.IndexationFailure, exc: if self.verbose: print exc continue docId = makeDocumentId(filename) mime_type = mimetypes.guess_type(filename)[0] self.indexDocument(filename, title, text, fileSize, lastModificationTime, docId, mime_type, Document.PUBLISHED_STATE)
def start(self): existingFiles = Set() for filename in self.getFileIterator(): existingFiles.add(filename) if not self.isIndexable(filename): continue lastModificationTime = os.path.getmtime(filename) lastIndexationTime = self.getLastIndexationTime(filename) if lastIndexationTime >= lastModificationTime: if self.verbose: print "%s didn't change since last indexation" % filename else: fileSize = os.path.getsize(filename) try: title, text, links, offset = converter.extractWordsFromFile( filename) except converter.IndexationFailure, exc: if self.verbose: print exc continue docId = makeDocumentId(filename) mime_type = mimetypes.guess_type(filename)[0] self.indexDocument(filename, title, text, fileSize, lastModificationTime, docId, mime_type, Document.PUBLISHED_STATE)
def runIndexer(self, isPrivate=True): existingFiles = Set() if isPrivate: state = Document.PRIVATE_STATE else: state = Document.PUBLISHED_STATE for filename in self.getFileIterator(isPrivate): existingFiles.add(filename) if not self.isIndexable(filename): continue lastModificationTime = os.path.getmtime(filename) lastIdxTime, lastIdxState = self.getLastIndexationTimeAndState( filename) if lastIdxState == state and lastIdxTime >= lastModificationTime: if self.verbose: print "%s didn't change since last indexation" % filename continue else: fileSize = os.path.getsize(filename) try: title, text, links, offset = converter.extractWordsFromFile( filename) except converter.IndexationFailure, exc: if self.verbose: print exc continue docId = makeDocumentId(filename) mime_type = mimetypes.guess_type(filename)[0] self.indexDocument( FutureDocument(filename=unicode(filename, self.filesystemEncoding), title=title, text=text, fileSize=fileSize, lastModificationTime=lastModificationTime, content_hash=docId, mime_type=mime_type, state=state))
def runIndexer(self, isPrivate=True): existingFiles = Set() if isPrivate: state = Document.PRIVATE_STATE else: state = Document.PUBLISHED_STATE for filename in self.getFileIterator(isPrivate): existingFiles.add(filename) if not self.isIndexable(filename): continue lastModificationTime = os.path.getmtime(filename) lastIdxTime, lastIdxState = self.getLastIndexationTimeAndState(filename) if lastIdxState == state and lastIdxTime >= lastModificationTime: if self.verbose: print "%s didn't change since last indexation" % filename continue else: fileSize = os.path.getsize(filename) try: title, text, links, offset = converter.extractWordsFromFile(filename) except converter.IndexationFailure, exc: if self.verbose: print exc continue docId = makeDocumentId(filename) mime_type = mimetypes.guess_type(filename)[0] self.indexDocument(FutureDocument(filename=unicode(filename, self.filesystemEncoding), title=title, text=text, fileSize=fileSize, lastModificationTime=lastModificationTime, content_hash=docId, mime_type=mime_type, state=state))