def decompress_stream(self, key): """ Decompress compressed object Streams. """ self.key = key #print "++++++++++++ decompressing stream +++++++++++++++++++++++++" ,key try: data = self.objects[self.key] start = data.find("stream") end = data.find("endstream") self.buff = data[start + 6:].strip() if len(self.buff) < 2: return self.methods = self.get_compMethod(key, data) self.data = self.buff.strip() for self.method in self.methods: #print self.method if 'fl' == self.method.lower(): self.data = decompress(self.data) if 'ascii85decode' == self.method.lower(): self.data = ascii85decode(self.data) if 'asciihexdecode' == self.method.lower(): self.data = asciihexdecode(self.data) if 'lzwdecode' == self.method.lower(): self.data = lzwdecode(self.data) if len(self.methods) == 0: self.handle_evasion(key, data[:start]) except Exception, err: pass
def decode(self): assert self.data == None and self.rawdata != None data = self.rawdata if self.decipher: # Handle encryption data = self.decipher(self.objid, self.genno, data) if 'Filter' not in self.dic: self.data = data self.rawdata = None return filters = self.dic['Filter'] if not isinstance(filters, list): filters = [ filters ] for f in filters: if f in LITERALS_FLATE_DECODE: # will get errors if the document is encrypted. data = self.decomp(data) elif f in LITERALS_LZW_DECODE: try: from cStringIO import StringIO except ImportError: from StringIO import StringIO data = ''.join(LZWDecoder(StringIO(data)).run()) elif f in LITERALS_ASCII85_DECODE: import ascii85 data = ascii85.ascii85decode(data) elif f in LITERALS_ASCIIHEX_DECODE: import ascii85 data = ascii85.asciihexdecode(data) elif f == LITERAL_CRYPT: raise PDFNotImplementedError('/Crypt filter is unsupported') else: raise PDFNotImplementedError('Unsupported filter: %r' % f) # apply predictors if 'DP' in self.dic: params = self.dic['DP'] else: params = self.dic.get('DecodeParms', {}) if 'Predictor' in params: pred = int_value(params['Predictor']) if pred: if pred != 12: raise PDFNotImplementedError('Unsupported predictor: %r' % pred) if 'Columns' not in params: raise PDFValueError('Columns undefined for predictor=12') columns = int_value(params['Columns']) buf = '' ent0 = '\x00' * columns for i in xrange(0, len(data), columns+1): pred = data[i] ent1 = data[i+1:i+1+columns] if pred == '\x02': ent1 = ''.join( chr((ord(a)+ord(b)) & 255) for (a,b) in zip(ent0,ent1) ) buf += ent1 ent0 = ent1 data = buf self.data = data self.rawdata = None return
def decode(self): assert self.data is None and self.rawdata != None data = self.rawdata if self.decipher: # Handle encryption data = self.decipher(self.objid, self.genno, data) filters = self.get_filters() if not filters: self.data = data self.rawdata = None return for f in filters: if f in LITERALS_FLATE_DECODE: # will get errors if the document is encrypted. try: data = zlib.decompress(data) except zlib.error: data = '' elif f in LITERALS_LZW_DECODE: data = lzwdecode(data) elif f in LITERALS_ASCII85_DECODE: data = ascii85decode(data) elif f in LITERALS_ASCIIHEX_DECODE: data = asciihexdecode(data) elif f in LITERALS_RUNLENGTH_DECODE: data = rldecode(data) elif f in LITERALS_CCITTFAX_DECODE: #data = ccittfaxdecode(data) raise PDFNotImplementedError('Unsupported filter: %r' % f) elif f == LITERAL_CRYPT: # not yet.. raise PDFNotImplementedError('/Crypt filter is unsupported') else: raise PDFNotImplementedError('Unsupported filter: %r' % f) # apply predictors params = self.get_any(('DP', 'DecodeParms', 'FDecodeParms'), {}) if 'Predictor' in params and 'Columns' in params: pred = int_value(params['Predictor']) columns = int_value(params['Columns']) if pred: if pred != 12: raise PDFNotImplementedError( 'Unsupported predictor: %r' % pred) buf = '' ent0 = '\x00' * columns for i in xrange(0, len(data), columns + 1): pred = data[i] ent1 = data[i + 1:i + 1 + columns] if pred == '\x02': ent1 = ''.join( chr((ord(a) + ord(b)) & 255) for (a, b) in zip(ent0, ent1)) buf += ent1 ent0 = ent1 data = buf self.data = data self.rawdata = None return
def decode(self): assert self.data is None and self.rawdata is not None data = self.rawdata if self.decipher: # Handle encryption data = self.decipher(self.objid, self.genno, data, self.attrs) filters = self.get_filters() if not filters: self.data = data self.rawdata = None return for f in filters: params = self.get_any(('DP', 'DecodeParms', 'FDecodeParms'), {}) if f in LITERALS_FLATE_DECODE: # will get errors if the document is encrypted. try: data = zlib.decompress(data) except zlib.error as e: if STRICT: raise PDFException('Invalid zlib bytes: %r, %r' % (e, data)) data = '' elif f in LITERALS_LZW_DECODE: data = lzwdecode(data) elif f in LITERALS_ASCII85_DECODE: data = ascii85decode(data) elif f in LITERALS_ASCIIHEX_DECODE: data = asciihexdecode(data) elif f in LITERALS_RUNLENGTH_DECODE: data = rldecode(data) elif f in LITERALS_CCITTFAX_DECODE: data = ccittfaxdecode(data, params) elif f in LITERALS_DCT_DECODE: # This is probably a JPG stream - it does not need to be decoded twice. # Just return the stream to the user. pass elif f == LITERAL_CRYPT: # not yet.. raise PDFNotImplementedError('/Crypt filter is unsupported') else: raise PDFNotImplementedError('Unsupported filter: %r' % f) # apply predictors if 'Predictor' in params: pred = int_value(params['Predictor']) if pred == 1: # no predictor pass elif 10 <= pred: # PNG predictor colors = int_value(params.get('Colors', 1)) columns = int_value(params.get('Columns', 1)) bitspercomponent = int_value(params.get('BitsPerComponent', 8)) data = apply_png_predictor(pred, colors, columns, bitspercomponent, data) else: raise PDFNotImplementedError('Unsupported predictor: %r' % pred) self.data = data self.rawdata = None return
def decode(self): assert self.data is None and self.rawdata != None data = self.rawdata if self.decipher: # Handle encryption data = self.decipher(self.objid, self.genno, data) filters = self.get_filters() if not filters: self.data = data self.rawdata = None return for f in filters: if f in LITERALS_FLATE_DECODE: # will get errors if the document is encrypted. try: data = zlib.decompress(data) except zlib.error as e: if STRICT: raise PDFException("Invalid zlib bytes: %r, %r" % (e, data)) data = "" elif f in LITERALS_LZW_DECODE: data = lzwdecode(data) elif f in LITERALS_ASCII85_DECODE: data = ascii85decode(data) elif f in LITERALS_ASCIIHEX_DECODE: data = asciihexdecode(data) elif f in LITERALS_RUNLENGTH_DECODE: data = rldecode(data) elif f in LITERALS_CCITTFAX_DECODE: # data = ccittfaxdecode(data) raise PDFNotImplementedError("Unsupported filter: %r" % f) elif f == LITERAL_CRYPT: # not yet.. raise PDFNotImplementedError("/Crypt filter is unsupported") else: raise PDFNotImplementedError("Unsupported filter: %r" % f) # apply predictors params = self.get_any(("DP", "DecodeParms", "FDecodeParms"), {}) if "Predictor" in params and "Columns" in params: pred = int_value(params["Predictor"]) columns = int_value(params["Columns"]) if pred: if pred != 12: raise PDFNotImplementedError("Unsupported predictor: %r" % pred) buf = "" ent0 = "\x00" * columns for i in xrange(0, len(data), columns + 1): pred = data[i] ent1 = data[i + 1 : i + 1 + columns] if pred == "\x02": ent1 = "".join(chr((ord(a) + ord(b)) & 255) for (a, b) in zip(ent0, ent1)) buf += ent1 ent0 = ent1 data = buf self.data = data self.rawdata = None return
def decode(self): assert self.data is None and self.rawdata != None data = self.rawdata if self.decipher: # Handle encryption data = self.decipher(self.objid, self.genno, data) filters = self.get_filters() if not filters: self.data = data self.rawdata = None return for f in filters: if f in LITERALS_FLATE_DECODE: # will get errors if the document is encrypted. try: data = zlib.decompress(data) except zlib.error: data = '' elif f in LITERALS_LZW_DECODE: data = lzwdecode(data) elif f in LITERALS_ASCII85_DECODE: data = ascii85decode(data) elif f in LITERALS_ASCIIHEX_DECODE: data = asciihexdecode(data) elif f in LITERALS_RUNLENGTH_DECODE: data = rldecode(data) elif f in LITERALS_CCITTFAX_DECODE: #data = ccittfaxdecode(data) raise PDFNotImplementedError('Unsupported filter: %r' % f) elif f == LITERAL_CRYPT: # not yet.. raise PDFNotImplementedError('/Crypt filter is unsupported') else: raise PDFNotImplementedError('Unsupported filter: %r' % f) # apply predictors params = self.get_any(('DP', 'DecodeParms', 'FDecodeParms'), {}) if 'Predictor' in params and 'Columns' in params: pred = int_value(params['Predictor']) columns = int_value(params['Columns']) if pred: if pred != 12: raise PDFNotImplementedError('Unsupported predictor: %r' % pred) buf = '' ent0 = '\x00' * columns for i in xrange(0, len(data), columns+1): pred = data[i] ent1 = data[i+1:i+1+columns] if pred == '\x02': ent1 = ''.join( chr((ord(a)+ord(b)) & 255) for (a,b) in zip(ent0,ent1) ) buf += ent1 ent0 = ent1 data = buf self.data = data self.rawdata = None return
for f in filters: params = self.get_any(('DP', 'DecodeParms', 'FDecodeParms'), {}) if f in LITERALS_FLATE_DECODE: # will get errors if the document is encrypted. try: data = zlib.decompress(data) except zlib.error, e: if STRICT: raise PDFException('Invalid zlib bytes: %r, %r' % (e, data)) data = '' elif f in LITERALS_LZW_DECODE: data = lzwdecode(data) elif f in LITERALS_ASCII85_DECODE: data = ascii85decode(data) elif f in LITERALS_ASCIIHEX_DECODE: data = asciihexdecode(data) elif f in LITERALS_RUNLENGTH_DECODE: data = rldecode(data) elif f in LITERALS_CCITTFAX_DECODE: data = ccittfaxdecode(data, params) elif f == LITERAL_CRYPT: # not yet.. raise PDFNotImplementedError('/Crypt filter is unsupported') else: raise PDFNotImplementedError('Unsupported filter: %r' % f) # apply predictors if 'Predictor' in params: pred = int_value(params['Predictor']) if pred == 1: # no predictor pass