Exemple #1
0
    def decompress_stream(self, key):
        """
         Decompress  compressed object Streams. 
         """
        self.key = key
        #print "++++++++++++ decompressing stream +++++++++++++++++++++++++" ,key
        try:
            data = self.objects[self.key]
            start = data.find("stream")
            end = data.find("endstream")
            self.buff = data[start + 6:].strip()
            if len(self.buff) < 2: return

            self.methods = self.get_compMethod(key, data)
            self.data = self.buff.strip()
            for self.method in self.methods:
                #print self.method
                if 'fl' == self.method.lower():
                    self.data = decompress(self.data)
                if 'ascii85decode' == self.method.lower():
                    self.data = ascii85decode(self.data)
                if 'asciihexdecode' == self.method.lower():
                    self.data = asciihexdecode(self.data)
                if 'lzwdecode' == self.method.lower():
                    self.data = lzwdecode(self.data)

            if len(self.methods) == 0:
                self.handle_evasion(key, data[:start])

        except Exception, err:
            pass
Exemple #2
0
 def decode(self):
   assert self.data == None and self.rawdata != None
   data = self.rawdata
   if self.decipher:
     # Handle encryption
     data = self.decipher(self.objid, self.genno, data)
   if 'Filter' not in self.dic:
     self.data = data
     self.rawdata = None
     return
   filters = self.dic['Filter']
   if not isinstance(filters, list):
     filters = [ filters ]
   for f in filters:
     if f in LITERALS_FLATE_DECODE:
       # will get errors if the document is encrypted.
       data = self.decomp(data)
     elif f in LITERALS_LZW_DECODE:
       try:
         from cStringIO import StringIO
       except ImportError:
         from StringIO import StringIO
       data = ''.join(LZWDecoder(StringIO(data)).run())
     elif f in LITERALS_ASCII85_DECODE:
       import ascii85
       data = ascii85.ascii85decode(data)
     elif f in LITERALS_ASCIIHEX_DECODE:
       import ascii85
       data = ascii85.asciihexdecode(data)
     elif f == LITERAL_CRYPT:
       raise PDFNotImplementedError('/Crypt filter is unsupported')
     else:
       raise PDFNotImplementedError('Unsupported filter: %r' % f)
     # apply predictors
     if 'DP' in self.dic:
       params = self.dic['DP']
     else:
       params = self.dic.get('DecodeParms', {})
     if 'Predictor' in params:
       pred = int_value(params['Predictor'])
       if pred:
         if pred != 12:
           raise PDFNotImplementedError('Unsupported predictor: %r' % pred)
         if 'Columns' not in params:
           raise PDFValueError('Columns undefined for predictor=12')
         columns = int_value(params['Columns'])
         buf = ''
         ent0 = '\x00' * columns
         for i in xrange(0, len(data), columns+1):
           pred = data[i]
           ent1 = data[i+1:i+1+columns]
           if pred == '\x02':
             ent1 = ''.join( chr((ord(a)+ord(b)) & 255) for (a,b) in zip(ent0,ent1) )
           buf += ent1
           ent0 = ent1
         data = buf
   self.data = data
   self.rawdata = None
   return
 def decode(self):
     assert self.data is None and self.rawdata != None
     data = self.rawdata
     if self.decipher:
         # Handle encryption
         data = self.decipher(self.objid, self.genno, data)
     filters = self.get_filters()
     if not filters:
         self.data = data
         self.rawdata = None
         return
     for f in filters:
         if f in LITERALS_FLATE_DECODE:
             # will get errors if the document is encrypted.
             try:
                 data = zlib.decompress(data)
             except zlib.error:
                 data = ''
         elif f in LITERALS_LZW_DECODE:
             data = lzwdecode(data)
         elif f in LITERALS_ASCII85_DECODE:
             data = ascii85decode(data)
         elif f in LITERALS_ASCIIHEX_DECODE:
             data = asciihexdecode(data)
         elif f in LITERALS_RUNLENGTH_DECODE:
             data = rldecode(data)
         elif f in LITERALS_CCITTFAX_DECODE:
             #data = ccittfaxdecode(data)
             raise PDFNotImplementedError('Unsupported filter: %r' % f)
         elif f == LITERAL_CRYPT:
             # not yet..
             raise PDFNotImplementedError('/Crypt filter is unsupported')
         else:
             raise PDFNotImplementedError('Unsupported filter: %r' % f)
         # apply predictors
         params = self.get_any(('DP', 'DecodeParms', 'FDecodeParms'), {})
         if 'Predictor' in params and 'Columns' in params:
             pred = int_value(params['Predictor'])
             columns = int_value(params['Columns'])
             if pred:
                 if pred != 12:
                     raise PDFNotImplementedError(
                         'Unsupported predictor: %r' % pred)
                 buf = ''
                 ent0 = '\x00' * columns
                 for i in xrange(0, len(data), columns + 1):
                     pred = data[i]
                     ent1 = data[i + 1:i + 1 + columns]
                     if pred == '\x02':
                         ent1 = ''.join(
                             chr((ord(a) + ord(b)) & 255)
                             for (a, b) in zip(ent0, ent1))
                     buf += ent1
                     ent0 = ent1
                 data = buf
     self.data = data
     self.rawdata = None
     return
Exemple #4
0
 def decode(self):
     assert self.data is None and self.rawdata is not None
     data = self.rawdata
     if self.decipher:
         # Handle encryption
         data = self.decipher(self.objid, self.genno, data, self.attrs)
     filters = self.get_filters()
     if not filters:
         self.data = data
         self.rawdata = None
         return
     for f in filters:
         params = self.get_any(('DP', 'DecodeParms', 'FDecodeParms'), {})
         if f in LITERALS_FLATE_DECODE:
             # will get errors if the document is encrypted.
             try:
                 data = zlib.decompress(data)
             except zlib.error as e:
                 if STRICT:
                     raise PDFException('Invalid zlib bytes: %r, %r' % (e, data))
                 data = ''
         elif f in LITERALS_LZW_DECODE:
             data = lzwdecode(data)
         elif f in LITERALS_ASCII85_DECODE:
             data = ascii85decode(data)
         elif f in LITERALS_ASCIIHEX_DECODE:
             data = asciihexdecode(data)
         elif f in LITERALS_RUNLENGTH_DECODE:
             data = rldecode(data)
         elif f in LITERALS_CCITTFAX_DECODE:
             data = ccittfaxdecode(data, params)
         elif f in LITERALS_DCT_DECODE:
             # This is probably a JPG stream - it does not need to be decoded twice.
             # Just return the stream to the user.
             pass
         elif f == LITERAL_CRYPT:
             # not yet..
             raise PDFNotImplementedError('/Crypt filter is unsupported')
         else:
             raise PDFNotImplementedError('Unsupported filter: %r' % f)
         # apply predictors
         if 'Predictor' in params:
             pred = int_value(params['Predictor'])
             if pred == 1:
                 # no predictor
                 pass
             elif 10 <= pred:
                 # PNG predictor
                 colors = int_value(params.get('Colors', 1))
                 columns = int_value(params.get('Columns', 1))
                 bitspercomponent = int_value(params.get('BitsPerComponent', 8))
                 data = apply_png_predictor(pred, colors, columns, bitspercomponent, data)
             else:
                 raise PDFNotImplementedError('Unsupported predictor: %r' % pred)
     self.data = data
     self.rawdata = None
     return
Exemple #5
0
 def decode(self):
     assert self.data is None and self.rawdata != None
     data = self.rawdata
     if self.decipher:
         # Handle encryption
         data = self.decipher(self.objid, self.genno, data)
     filters = self.get_filters()
     if not filters:
         self.data = data
         self.rawdata = None
         return
     for f in filters:
         if f in LITERALS_FLATE_DECODE:
             # will get errors if the document is encrypted.
             try:
                 data = zlib.decompress(data)
             except zlib.error as e:
                 if STRICT:
                     raise PDFException("Invalid zlib bytes: %r, %r" % (e, data))
                 data = ""
         elif f in LITERALS_LZW_DECODE:
             data = lzwdecode(data)
         elif f in LITERALS_ASCII85_DECODE:
             data = ascii85decode(data)
         elif f in LITERALS_ASCIIHEX_DECODE:
             data = asciihexdecode(data)
         elif f in LITERALS_RUNLENGTH_DECODE:
             data = rldecode(data)
         elif f in LITERALS_CCITTFAX_DECODE:
             # data = ccittfaxdecode(data)
             raise PDFNotImplementedError("Unsupported filter: %r" % f)
         elif f == LITERAL_CRYPT:
             # not yet..
             raise PDFNotImplementedError("/Crypt filter is unsupported")
         else:
             raise PDFNotImplementedError("Unsupported filter: %r" % f)
         # apply predictors
         params = self.get_any(("DP", "DecodeParms", "FDecodeParms"), {})
         if "Predictor" in params and "Columns" in params:
             pred = int_value(params["Predictor"])
             columns = int_value(params["Columns"])
             if pred:
                 if pred != 12:
                     raise PDFNotImplementedError("Unsupported predictor: %r" % pred)
                 buf = ""
                 ent0 = "\x00" * columns
                 for i in xrange(0, len(data), columns + 1):
                     pred = data[i]
                     ent1 = data[i + 1 : i + 1 + columns]
                     if pred == "\x02":
                         ent1 = "".join(chr((ord(a) + ord(b)) & 255) for (a, b) in zip(ent0, ent1))
                     buf += ent1
                     ent0 = ent1
                 data = buf
     self.data = data
     self.rawdata = None
     return
Exemple #6
0
 def decode(self):
     assert self.data is None and self.rawdata != None
     data = self.rawdata
     if self.decipher:
         # Handle encryption
         data = self.decipher(self.objid, self.genno, data)
     filters = self.get_filters()
     if not filters:
         self.data = data
         self.rawdata = None
         return
     for f in filters:
         if f in LITERALS_FLATE_DECODE:
             # will get errors if the document is encrypted.
             try:
                 data = zlib.decompress(data)
             except zlib.error:
                 data = ''
         elif f in LITERALS_LZW_DECODE:
             data = lzwdecode(data)
         elif f in LITERALS_ASCII85_DECODE:
             data = ascii85decode(data)
         elif f in LITERALS_ASCIIHEX_DECODE:
             data = asciihexdecode(data)
         elif f in LITERALS_RUNLENGTH_DECODE:
             data = rldecode(data)
         elif f in LITERALS_CCITTFAX_DECODE:
             #data = ccittfaxdecode(data)
             raise PDFNotImplementedError('Unsupported filter: %r' % f)
         elif f == LITERAL_CRYPT:
             # not yet..
             raise PDFNotImplementedError('/Crypt filter is unsupported')
         else:
             raise PDFNotImplementedError('Unsupported filter: %r' % f)
         # apply predictors
         params = self.get_any(('DP', 'DecodeParms', 'FDecodeParms'), {})
         if 'Predictor' in params and 'Columns' in params:
             pred = int_value(params['Predictor'])
             columns = int_value(params['Columns'])
             if pred:
                 if pred != 12:
                     raise PDFNotImplementedError('Unsupported predictor: %r' % pred)
                 buf = ''
                 ent0 = '\x00' * columns
                 for i in xrange(0, len(data), columns+1):
                     pred = data[i]
                     ent1 = data[i+1:i+1+columns]
                     if pred == '\x02':
                         ent1 = ''.join( chr((ord(a)+ord(b)) & 255) for (a,b) in zip(ent0,ent1) )
                     buf += ent1
                     ent0 = ent1
                 data = buf
     self.data = data
     self.rawdata = None
     return
 for f in filters:
     params = self.get_any(('DP', 'DecodeParms', 'FDecodeParms'), {})
     if f in LITERALS_FLATE_DECODE:
         # will get errors if the document is encrypted.
         try:
             data = zlib.decompress(data)
         except zlib.error, e:
             if STRICT:
                 raise PDFException('Invalid zlib bytes: %r, %r' % (e, data))
             data = ''
     elif f in LITERALS_LZW_DECODE:
         data = lzwdecode(data)
     elif f in LITERALS_ASCII85_DECODE:
         data = ascii85decode(data)
     elif f in LITERALS_ASCIIHEX_DECODE:
         data = asciihexdecode(data)
     elif f in LITERALS_RUNLENGTH_DECODE:
         data = rldecode(data)
     elif f in LITERALS_CCITTFAX_DECODE:
         data = ccittfaxdecode(data, params)
     elif f == LITERAL_CRYPT:
         # not yet..
         raise PDFNotImplementedError('/Crypt filter is unsupported')
     else:
         raise PDFNotImplementedError('Unsupported filter: %r' % f)
     # apply predictors
     if 'Predictor' in params:
         pred = int_value(params['Predictor'])
         if pred == 1:
             # no predictor
             pass