Esempio n. 1
0
def load_ifilter(filename, log=log):
    try:
        return ifilter.LoadIFilter(filename)
    except pythoncom.com_error, e:
        if e[0] == FILTER_E_UNKNOWNFORMAT:
            log.warning("File %s is not a recognized format" % filename)
        else:
            log.warning("LoadIFilter for file %s, raised error %s, file cannot be processed" % (filename, str(e)))
        raise
Esempio n. 2
0
 def _bind_to_filter(self, fileName):
     """
     See if the file is a structured storage file or a normal file
     and then return an ifilter interface by calling the appropriate bind/load function
     """
     if pythoncom.StgIsStorageFile(fileName):
         self.stg = pythoncom.StgOpenStorage(
             fileName, None,
             storagecon.STGM_READ | storagecon.STGM_SHARE_DENY_WRITE)
         try:
             self.f = ifilter.BindIFilterFromStorage(self.stg)
         except pythoncom.com_error as e:
             if e[0] == -2147467262:  # 0x80004002: # no interface, try the load interface (this happens for some MSoft files)
                 self.f = ifilter.LoadIFilter(fileName)
             else:
                 raise
     else:
         self.f = ifilter.LoadIFilter(fileName)
         self.stg = None
Esempio n. 3
0
    def _bind_to_filter(self, fileName):
        """
        See if the file is a structured storage file or a normal file
        and then return an ifilter interface by calling the appropriate bind/load function
        """
        if pythoncom.StgIsStorageFile(fileName):
            self.stg  = pythoncom.StgOpenStorage(fileName, None, storagecon.STGM_READ | storagecon.STGM_SHARE_DENY_WRITE)
            try:
                self.f = ifilter.BindIFilterFromStorage(self.stg)
            except pythoncom.com_error, e:
                if e[0] == -2147467262: # 0x80004002: # no interface, try the load interface (this happens for some MSoft files)
                    self.f = ifilter.LoadIFilter(fileName)
                else:
                    raise
        else:
            self.f = ifilter.LoadIFilter(fileName)
            self.stg = None

    def _get_text(self, body_chunks):
        """
        Gets all the text for a particular chunk. We need to keep calling get text till all the
        segments for this chunk are retrieved
        """
        while True:
            try:
               body_chunks.append(self.f.GetText())
            except pythoncom.com_error, e:
                if e[0] in [FILTER_E_NO_MORE_TEXT, FILTER_E_NO_MORE_TEXT, FILTER_E_NO_TEXT]:
                     break
                else:
                    raise # not one of the values we were expecting