Exemple #1
0
    def decompress_stream(self, key):
        """
         Decompress  compressed object Streams. 
         """
        self.key = key
        #print "++++++++++++ decompressing stream +++++++++++++++++++++++++" ,key
        try:
            data = self.objects[self.key]
            start = data.find("stream")
            end = data.find("endstream")
            self.buff = data[start + 6:].strip()
            if len(self.buff) < 2: return

            self.methods = self.get_compMethod(key, data)
            self.data = self.buff.strip()
            for self.method in self.methods:
                #print self.method
                if 'fl' == self.method.lower():
                    self.data = decompress(self.data)
                if 'ascii85decode' == self.method.lower():
                    self.data = ascii85decode(self.data)
                if 'asciihexdecode' == self.method.lower():
                    self.data = asciihexdecode(self.data)
                if 'lzwdecode' == self.method.lower():
                    self.data = lzwdecode(self.data)

            if len(self.methods) == 0:
                self.handle_evasion(key, data[:start])

        except Exception, err:
            pass
Exemple #2
0
 def decode(self):
     assert self.data is None and self.rawdata is not None
     data = self.rawdata
     if self.decipher:
         # Handle encryption
         data = self.decipher(self.objid, self.genno, data)
     filters = self.get_filters()
     if not filters:
         self.data = data
         self.rawdata = None
         return
     for f in filters:
         if isinstance(f, PDFObjRef):
             filters += f.resolve()
             continue
         params = self.get_any(('DP', 'DecodeParms', 'FDecodeParms'), {})
         if f in LITERALS_FLATE_DECODE:
             # will get errors if the document is encrypted.
             try:
                 data = zlib.decompress(data)
             except zlib.error, e:
                 if STRICT:
                     raise PDFException('Invalid zlib bytes: %r, %r' % (e, data))
                 data = ''
         elif f in LITERALS_LZW_DECODE:
             data = lzwdecode(data)
Exemple #3
0
 def decode(self):
     assert self.data is None and self.rawdata is not None
     data = self.rawdata
     if self.decipher:
         # Handle encryption
         data = self.decipher(self.objid, self.genno, data)
     filters = self.get_filters()
     if not filters:
         self.data = data
         self.rawdata = None
         return
     for f in filters:
         if isinstance(f, PDFObjRef):
             filters += f.resolve()
             continue
         params = self.get_any(('DP', 'DecodeParms', 'FDecodeParms'), {})
         if f in LITERALS_FLATE_DECODE:
             # will get errors if the document is encrypted.
             try:
                 data = zlib.decompress(data)
             except zlib.error, e:
                 if STRICT:
                     raise PDFException('Invalid zlib bytes: %r, %r' %
                                        (e, data))
                 data = ''
         elif f in LITERALS_LZW_DECODE:
             data = lzwdecode(data)
 def decode(self):
     assert self.data is None and self.rawdata != None
     data = self.rawdata
     if self.decipher:
         # Handle encryption
         data = self.decipher(self.objid, self.genno, data)
     filters = self.get_filters()
     if not filters:
         self.data = data
         self.rawdata = None
         return
     for f in filters:
         if f in LITERALS_FLATE_DECODE:
             # will get errors if the document is encrypted.
             try:
                 data = zlib.decompress(data)
             except zlib.error:
                 data = ''
         elif f in LITERALS_LZW_DECODE:
             data = lzwdecode(data)
         elif f in LITERALS_ASCII85_DECODE:
             data = ascii85decode(data)
         elif f in LITERALS_ASCIIHEX_DECODE:
             data = asciihexdecode(data)
         elif f in LITERALS_RUNLENGTH_DECODE:
             data = rldecode(data)
         elif f in LITERALS_CCITTFAX_DECODE:
             #data = ccittfaxdecode(data)
             raise PDFNotImplementedError('Unsupported filter: %r' % f)
         elif f == LITERAL_CRYPT:
             # not yet..
             raise PDFNotImplementedError('/Crypt filter is unsupported')
         else:
             raise PDFNotImplementedError('Unsupported filter: %r' % f)
         # apply predictors
         params = self.get_any(('DP', 'DecodeParms', 'FDecodeParms'), {})
         if 'Predictor' in params and 'Columns' in params:
             pred = int_value(params['Predictor'])
             columns = int_value(params['Columns'])
             if pred:
                 if pred != 12:
                     raise PDFNotImplementedError(
                         'Unsupported predictor: %r' % pred)
                 buf = ''
                 ent0 = '\x00' * columns
                 for i in xrange(0, len(data), columns + 1):
                     pred = data[i]
                     ent1 = data[i + 1:i + 1 + columns]
                     if pred == '\x02':
                         ent1 = ''.join(
                             chr((ord(a) + ord(b)) & 255)
                             for (a, b) in zip(ent0, ent1))
                     buf += ent1
                     ent0 = ent1
                 data = buf
     self.data = data
     self.rawdata = None
     return
Exemple #5
0
 def decode(self):
     assert self.data is None and self.rawdata is not None
     data = self.rawdata
     if self.decipher:
         # Handle encryption
         data = self.decipher(self.objid, self.genno, data, self.attrs)
     filters = self.get_filters()
     if not filters:
         self.data = data
         self.rawdata = None
         return
     for f in filters:
         params = self.get_any(('DP', 'DecodeParms', 'FDecodeParms'), {})
         if f in LITERALS_FLATE_DECODE:
             # will get errors if the document is encrypted.
             try:
                 data = zlib.decompress(data)
             except zlib.error as e:
                 if STRICT:
                     raise PDFException('Invalid zlib bytes: %r, %r' % (e, data))
                 data = ''
         elif f in LITERALS_LZW_DECODE:
             data = lzwdecode(data)
         elif f in LITERALS_ASCII85_DECODE:
             data = ascii85decode(data)
         elif f in LITERALS_ASCIIHEX_DECODE:
             data = asciihexdecode(data)
         elif f in LITERALS_RUNLENGTH_DECODE:
             data = rldecode(data)
         elif f in LITERALS_CCITTFAX_DECODE:
             data = ccittfaxdecode(data, params)
         elif f in LITERALS_DCT_DECODE:
             # This is probably a JPG stream - it does not need to be decoded twice.
             # Just return the stream to the user.
             pass
         elif f == LITERAL_CRYPT:
             # not yet..
             raise PDFNotImplementedError('/Crypt filter is unsupported')
         else:
             raise PDFNotImplementedError('Unsupported filter: %r' % f)
         # apply predictors
         if 'Predictor' in params:
             pred = int_value(params['Predictor'])
             if pred == 1:
                 # no predictor
                 pass
             elif 10 <= pred:
                 # PNG predictor
                 colors = int_value(params.get('Colors', 1))
                 columns = int_value(params.get('Columns', 1))
                 bitspercomponent = int_value(params.get('BitsPerComponent', 8))
                 data = apply_png_predictor(pred, colors, columns, bitspercomponent, data)
             else:
                 raise PDFNotImplementedError('Unsupported predictor: %r' % pred)
     self.data = data
     self.rawdata = None
     return
Exemple #6
0
 def decode(self):
     assert self.data is None and self.rawdata != None
     data = self.rawdata
     if self.decipher:
         # Handle encryption
         data = self.decipher(self.objid, self.genno, data)
     filters = self.get_filters()
     if not filters:
         self.data = data
         self.rawdata = None
         return
     for f in filters:
         if f in LITERALS_FLATE_DECODE:
             # will get errors if the document is encrypted.
             try:
                 data = zlib.decompress(data)
             except zlib.error as e:
                 if STRICT:
                     raise PDFException("Invalid zlib bytes: %r, %r" % (e, data))
                 data = ""
         elif f in LITERALS_LZW_DECODE:
             data = lzwdecode(data)
         elif f in LITERALS_ASCII85_DECODE:
             data = ascii85decode(data)
         elif f in LITERALS_ASCIIHEX_DECODE:
             data = asciihexdecode(data)
         elif f in LITERALS_RUNLENGTH_DECODE:
             data = rldecode(data)
         elif f in LITERALS_CCITTFAX_DECODE:
             # data = ccittfaxdecode(data)
             raise PDFNotImplementedError("Unsupported filter: %r" % f)
         elif f == LITERAL_CRYPT:
             # not yet..
             raise PDFNotImplementedError("/Crypt filter is unsupported")
         else:
             raise PDFNotImplementedError("Unsupported filter: %r" % f)
         # apply predictors
         params = self.get_any(("DP", "DecodeParms", "FDecodeParms"), {})
         if "Predictor" in params and "Columns" in params:
             pred = int_value(params["Predictor"])
             columns = int_value(params["Columns"])
             if pred:
                 if pred != 12:
                     raise PDFNotImplementedError("Unsupported predictor: %r" % pred)
                 buf = ""
                 ent0 = "\x00" * columns
                 for i in xrange(0, len(data), columns + 1):
                     pred = data[i]
                     ent1 = data[i + 1 : i + 1 + columns]
                     if pred == "\x02":
                         ent1 = "".join(chr((ord(a) + ord(b)) & 255) for (a, b) in zip(ent0, ent1))
                     buf += ent1
                     ent0 = ent1
                 data = buf
     self.data = data
     self.rawdata = None
     return
Exemple #7
0
 def decode(self):
     assert self.data is None and self.rawdata != None
     data = self.rawdata
     if self.decipher:
         # Handle encryption
         data = self.decipher(self.objid, self.genno, data)
     filters = self.get_filters()
     if not filters:
         self.data = data
         self.rawdata = None
         return
     for f in filters:
         if f in LITERALS_FLATE_DECODE:
             # will get errors if the document is encrypted.
             try:
                 data = zlib.decompress(data)
             except zlib.error:
                 data = ''
         elif f in LITERALS_LZW_DECODE:
             data = lzwdecode(data)
         elif f in LITERALS_ASCII85_DECODE:
             data = ascii85decode(data)
         elif f in LITERALS_ASCIIHEX_DECODE:
             data = asciihexdecode(data)
         elif f in LITERALS_RUNLENGTH_DECODE:
             data = rldecode(data)
         elif f in LITERALS_CCITTFAX_DECODE:
             #data = ccittfaxdecode(data)
             raise PDFNotImplementedError('Unsupported filter: %r' % f)
         elif f == LITERAL_CRYPT:
             # not yet..
             raise PDFNotImplementedError('/Crypt filter is unsupported')
         else:
             raise PDFNotImplementedError('Unsupported filter: %r' % f)
         # apply predictors
         params = self.get_any(('DP', 'DecodeParms', 'FDecodeParms'), {})
         if 'Predictor' in params and 'Columns' in params:
             pred = int_value(params['Predictor'])
             columns = int_value(params['Columns'])
             if pred:
                 if pred != 12:
                     raise PDFNotImplementedError('Unsupported predictor: %r' % pred)
                 buf = ''
                 ent0 = '\x00' * columns
                 for i in xrange(0, len(data), columns+1):
                     pred = data[i]
                     ent1 = data[i+1:i+1+columns]
                     if pred == '\x02':
                         ent1 = ''.join( chr((ord(a)+ord(b)) & 255) for (a,b) in zip(ent0,ent1) )
                     buf += ent1
                     ent0 = ent1
                 data = buf
     self.data = data
     self.rawdata = None
     return
Exemple #8
0
def lzwDecode(stream, parameters):
    '''
        Method to decode streams using the LZW algorithm
    
        @param stream: A PDF stream
        @return: A tuple (status,statusContent), where statusContent is the decoded PDF stream in case status = 0 or an error in case status = -1
    '''
    decodedStream = ''
    try:
        decodedStream = lzw.lzwdecode(stream)
    except:
        return (-1, 'Error decompressing string')

    if parameters == None or parameters == {}:
        return (0, decodedStream)
    else:
        if parameters.has_key('/Predictor'):
            predictor = parameters['/Predictor'].getRawValue()
        else:
            predictor = 1
        # Columns = number of samples per row
        if parameters.has_key('/Columns'):
            columns = parameters['/Columns'].getRawValue()
        else:
            columns = 1
        # Colors = number of components per sample
        if parameters.has_key('/Colors'):
            colors = parameters['/Colors'].getRawValue()
            if colors < 1:
                colors = 1
        else:
            colors = 1
        # BitsPerComponent: number of bits per color component
        if parameters.has_key('/BitsPerComponent'):
            bits = parameters['/BitsPerComponent'].getRawValue()
            if bits not in [1, 2, 4, 8, 16]:
                bits = 8
        else:
            bits = 8
        if parameters.has_key('/EarlyChange'):
            earlyChange = parameters['/EarlyChange'].getRawValue()
        else:
            earlyChange = 1
        if predictor != None and predictor != 1:
            ret = post_prediction(decodedStream, predictor, columns, colors,
                                  bits)
            return ret
        else:
            return (0, decodedStream)
Exemple #9
0
def lzwDecode(stream, parameters):
	'''
		Method to decode streams using the LZW algorithm
	
		@param stream: A PDF stream
		@return: A tuple (status,statusContent), where statusContent is the decoded PDF stream in case status = 0 or an error in case status = -1
	'''
	decodedStream = ''
	try:
		decodedStream = lzw.lzwdecode(stream)
	except:
		return (-1,'Error decompressing string')
	
	if parameters == None or parameters == {}:
		return (0,decodedStream)
	else:
		if parameters.has_key('/Predictor'):
			predictor = parameters['/Predictor'].getRawValue()
		else:
			predictor = 1
		# Columns = number of samples per row
		if parameters.has_key('/Columns'):
			columns = parameters['/Columns'].getRawValue()
		else:
			columns = 1
		# Colors = number of components per sample
		if parameters.has_key('/Colors'):
			colors = parameters['/Colors'].getRawValue()
			if colors < 1:
				colors = 1
		else:
			colors = 1
		# BitsPerComponent: number of bits per color component
		if parameters.has_key('/BitsPerComponent'):
			bits = parameters['/BitsPerComponent'].getRawValue()
			if bits not in [1,2,4,8,16]:
				bits = 8
		else:
			bits = 8
		if parameters.has_key('/EarlyChange'):
			earlyChange = parameters['/EarlyChange'].getRawValue()
		else:
			earlyChange = 1
		if predictor != None and predictor != 1:
			ret = post_prediction(decodedStream, predictor, columns, colors, bits)
			return ret
		else:
			return (0,decodedStream)
Exemple #10
0
def lzwDecode(stream, parameters):
    """
    Method to decode streams using the LZW algorithm

    @param stream: A PDF stream
    @return: A tuple (status,statusContent), where statusContent is the decoded PDF stream in case status = 0 or an error in case status = -1
    """
    decodedStream = ""
    try:
        decodedStream = lzw.lzwdecode(stream)
    except:
        return (-1, "Error decompressing string")

    if parameters == None or parameters == {}:
        return (0, decodedStream)
    else:
        if "/Predictor" in parameters:
            predictor = parameters["/Predictor"].getRawValue()
        else:
            predictor = 1
        # Columns = number of samples per row
        if "/Columns" in parameters:
            columns = parameters["/Columns"].getRawValue()
        else:
            columns = 1
        # Colors = number of components per sample
        if "/Colors" in parameters:
            colors = parameters["/Colors"].getRawValue()
            if colors < 1:
                colors = 1
        else:
            colors = 1
        # BitsPerComponent: number of bits per color component
        if "/BitsPerComponent" in parameters:
            bits = parameters["/BitsPerComponent"].getRawValue()
            if bits not in [1, 2, 4, 8, 16]:
                bits = 8
        else:
            bits = 8
        if "/EarlyChange" in parameters:
            earlyChange = parameters["/EarlyChange"].getRawValue()
        else:
            earlyChange = 1
        if predictor != None and predictor != 1:
            ret = post_prediction(decodedStream, predictor, columns, colors, bits)
            return ret
        else:
            return (0, decodedStream)
Exemple #11
0
 def decode(self):
     assert self.data is None and self.rawdata != None
     data = self.rawdata
     if self.decipher:
         # Handle encryption
         data = self.decipher(self.objid, self.genno, data)
     filters = self.get_filters()
     if not filters:
         self.data = data
         self.rawdata = None
         return
     for f in filters:
         if f in LITERALS_FLATE_DECODE:
             # will get errors if the document is encrypted.
             try:
                 data = zlib.decompress(data)
             except zlib.error, e:
                 if STRICT:
                     raise PDFException('Invalid zlib bytes: %r, %r' % (e, data))
                 data = ''
         elif f in LITERALS_LZW_DECODE:
             data = lzwdecode(data)
Exemple #12
0
 def decode(self):
     assert self.data is None and self.rawdata != None
     data = self.rawdata
     if self.decipher:
         # Handle encryption
         data = self.decipher(self.objid, self.genno, data)
     filters = self.get_filters()
     if not filters:
         self.data = data
         self.rawdata = None
         return
     for f in filters:
         if f in LITERALS_FLATE_DECODE:
             # will get errors if the document is encrypted.
             try:
                 data = zlib.decompress(data)
             except zlib.error, e:
                 if STRICT:
                     raise PDFException('Invalid zlib bytes: %r, %r' %
                                        (e, data))
                 data = ''
         elif f in LITERALS_LZW_DECODE:
             data = lzwdecode(data)
Exemple #13
0
def getdictionary(file, followlinks=False):
    # sequence of key - object pairs, of which at least the key is a /name (without slash)
    # it may be followed by a stream, which is encapsulated by the words "stream" and "endstream"
    # TODO: handling of specific keys and return values (Length, Size): from global to variables to returns from this function (as this function may be called recursively)
    # TODO: the stream itself needs to be checked "in the context to which it is referred to"
    global globaldictvaluesize  # TODO: from global variable to return'd value from this function
    vprint("[DICT]", 2, '')
    getkey = ""
    getobject = ""
    dictionary = {}
    while getkey != '>' and getobject != '>':
        getkey = readobject(file)
        getobject = readobject(file, getkey in followlinkslist)
        dictionary[getkey] = getobject
    nword, nnword = getnexttwowords(file)
    if nword == 'stream':
        #TODO: followsymlinks handling in case stream can have symlinks
        length = num(dictionary.get("Length"))
        getword(
            file
        )  # actually read the word 'stream' (including trailing delimiter)
        file.seek(-1, 1)  #stream follows after 'stream\r\n' or 'stream\n'
        if ord(nextchar(file)) == 13:  # \r
            file.read(2)  # read 'stream\r\n'
        else:
            file.read(1)  # read 'stream\n'
        stream = file.read(length)
        vprint(" ", 2)
        vprint("[STREAM] " + str(length) + " bytes", 2)
        if "Filter" in list(dictionary.keys()):
            filterlist = dictionary.get("Filter")
            if isinstance(filterlist, str):
                filterlist = [filterlist]
            for streamfilter in list(filterlist):
                vprint("[DECODE]: " + streamfilter + " ", 2, '')
                if streamfilter == "FlateDecode":
                    #TODO: Predictor filters not implemented yet, see getxrefstream(). It is possible we encounter an xrefstream as we might not explicitly have searched and parsed one.
                    try:
                        stream = zlib.decompress(stream)
                        vprint(makeprintable(stream), 3)
                    except:
                        try:
                            vprint(
                                "zlib error; streamlength: " +
                                str(len(stream)) + ", firstbyte: " +
                                str(stream[0]) + ", lastbyte: " +
                                str(stream[len(stream) - 1]) +
                                ", ZLIB runtime version: " +
                                zlib.ZLIB_RUNTIME_VERSION, 2)
                        except:
                            vprint("No return stream given by zlib", 2)
                elif streamfilter == 'ASCII85Decode':
                    stream = ascii85decode(stream)
                elif streamfilter == 'LZWDecode':
                    stream = lzwdecode(stream)


#				elif streamfilter == 'CCITTFaxDecode':
#					vprint("[FILTER]: "+streamfilter,3)
#					ccittfaxdecode(stream) #TODO: needs additional arguments
                elif streamfilter == '/':
                    pass
                else:
                    vprint(
                        "Filter not implemented: " + streamfilter +
                        ", found in object: " + str(currentobject[0]) + " " +
                        str(currentobject[1]), 1)
                    # TODO: use counttable instead to give list of unimplemented filters with objects at the end of the scan.
                    # TODO: need to break here if multiple compressions are used of which one fails to prevent error out.
        getword(file)  # the word endstream
        vprint("[STREAM: end]", 2)
        if dictionary.get("Type") == "XRef":
            vprint("[XRef]", 2)
            dictionary["Stream"] = stream
        elif dictionary.get("Type") == "ObjStm":
            vprint("[STREAM]: open ObjStm", 2)
            f = open(".pdfaudit", "w+b")
            f.write(stream)
            f.write(bytearray([13, 13, 13]))
            f.seek(0)
            iterateobjstm(f, num(dictionary.get("N")))
            f.close()
            #TODO: delete file
            vprint("[STREAM]: close ObjStm", 2)
        else:
            dictionary["Stream"] = stream.decode('utf-8', 'ignore')
        #TODO: followsymlinks handling in case stream can have symlinks
    vprint("[DICT: end]", 2, '')
    checkdictionary(dictionary)
    return dictionary  # TODO: check if we can return more here