def load_ifilter(filename, log=log): try: return ifilter.LoadIFilter(filename) except pythoncom.com_error, e: if e[0] == FILTER_E_UNKNOWNFORMAT: log.warning("File %s is not a recognized format" % filename) else: log.warning("LoadIFilter for file %s, raised error %s, file cannot be processed" % (filename, str(e))) raise
def _bind_to_filter(self, fileName): """ See if the file is a structured storage file or a normal file and then return an ifilter interface by calling the appropriate bind/load function """ if pythoncom.StgIsStorageFile(fileName): self.stg = pythoncom.StgOpenStorage( fileName, None, storagecon.STGM_READ | storagecon.STGM_SHARE_DENY_WRITE) try: self.f = ifilter.BindIFilterFromStorage(self.stg) except pythoncom.com_error as e: if e[0] == -2147467262: # 0x80004002: # no interface, try the load interface (this happens for some MSoft files) self.f = ifilter.LoadIFilter(fileName) else: raise else: self.f = ifilter.LoadIFilter(fileName) self.stg = None
def _bind_to_filter(self, fileName): """ See if the file is a structured storage file or a normal file and then return an ifilter interface by calling the appropriate bind/load function """ if pythoncom.StgIsStorageFile(fileName): self.stg = pythoncom.StgOpenStorage(fileName, None, storagecon.STGM_READ | storagecon.STGM_SHARE_DENY_WRITE) try: self.f = ifilter.BindIFilterFromStorage(self.stg) except pythoncom.com_error, e: if e[0] == -2147467262: # 0x80004002: # no interface, try the load interface (this happens for some MSoft files) self.f = ifilter.LoadIFilter(fileName) else: raise else: self.f = ifilter.LoadIFilter(fileName) self.stg = None def _get_text(self, body_chunks): """ Gets all the text for a particular chunk. We need to keep calling get text till all the segments for this chunk are retrieved """ while True: try: body_chunks.append(self.f.GetText()) except pythoncom.com_error, e: if e[0] in [FILTER_E_NO_MORE_TEXT, FILTER_E_NO_MORE_TEXT, FILTER_E_NO_TEXT]: break else: raise # not one of the values we were expecting