Example #1
0
 def readFromStream(stream, pdf):
     idnum = b_("")
     while True:
         tok = stream.read(1)
         if not tok:
             # stream has truncated prematurely
             raise PdfStreamError("Stream has ended unexpectedly")
         if tok.isspace():
             break
         idnum += tok
     generation = b_("")
     while True:
         tok = stream.read(1)
         if not tok:
             # stream has truncated prematurely
             raise PdfStreamError("Stream has ended unexpectedly")
         if tok.isspace():
             break
         generation += tok
     r = stream.read(1)
     if r != b_("R"):
         raise utils.PdfReadError(
             "Error reading indirect object reference at byte %s" %
             utils.hexStr(stream.tell()))
     return IndirectObject(int(idnum), int(generation), pdf)
Example #2
0
 def readFromStream(stream, pdf):
     idnum = b_("")
     while True:
         tok = stream.read(1)
         if tok.isspace():
             break
         idnum += tok
     generation = b_("")
     while True:
         tok = stream.read(1)
         if tok.isspace():
             break
         generation += tok
     r = stream.read(1)
     if r != b_("R"):
         raise utils.PdfReadError(
             "Error reading indirect object reference at byte %s" %
             utils.hexStr(stream.tell()))
     return IndirectObject(int(idnum), int(generation), pdf)
Example #3
0
 def readFromStream(stream, pdf):
     idnum = b_("")
     while True:
         tok = stream.read(1)
         if tok.isspace():
             break
         idnum += tok
     generation = b_("")
     while True:
         tok = stream.read(1)
         if tok.isspace():
             break
         generation += tok
     r = stream.read(1)
     if r != b_("R"):
         raise utils.PdfReadError(
             "Error reading indirect object reference at byte %s" %
             utils.hexStr(stream.tell()))
     return IndirectObject(int(idnum), int(generation), pdf)
Example #4
0
    def readFromStream(stream, pdf):
        debug = False
        tmp = stream.read(2)
        if tmp != b_("<<"):
            raise utils.PdfReadError, \
                ("Dictionary read error at byte %s: stream must begin with '<<'" % utils.hexStr(stream.tell()))
        data = {}
        while True:
            tok = readNonWhitespace(stream)
            if tok == b_('\x00'):
                continue
            if not tok:
                # stream has truncated prematurely
                raise PdfStreamError("Stream has ended unexpectedly")

            if debug: print "Tok:",tok
            if tok == b_(">"):
                stream.read(1)
                break
            stream.seek(-1, 1)
            key = readObject(stream, pdf)
            tok = readNonWhitespace(stream)
            stream.seek(-1, 1)
            value = readObject(stream, pdf)
            if not data.has_key(key):
                data[key] = value
        pos = stream.tell()
        s = readNonWhitespace(stream)
        if s == b_('s') and stream.read(5) == b_('tream'):
            eol = stream.read(1)
            # odd PDF file output has spaces after 'stream' keyword but before EOL.
            # patch provided by Danial Sandler
            while eol == b_(' '):
                eol = stream.read(1)
            assert eol in (b_("\n"), b_("\r"))
            if eol == b_("\r"):
                # read \n after
                if stream.read(1)  != '\n':
                    stream.seek(-1, 1)
            # this is a stream object, not a dictionary
            assert data.has_key("/Length")
            length = data["/Length"]
            if debug: print data
            if isinstance(length, IndirectObject):
                t = stream.tell()
                length = pdf.getObject(length)
                stream.seek(t, 0)
            data["__streamdata__"] = stream.read(length)
            if debug: print "here"
            #if debug: print debugging.printAsHex(data["__streamdata__"])
            e = readNonWhitespace(stream)
            ndstream = stream.read(8)
            if (e + ndstream) != b_("endstream"):
                # (sigh) - the odd PDF file has a length that is too long, so
                # we need to read backwards to find the "endstream" ending.
                # ReportLab (unknown version) generates files with this bug,
                # and Python users into PDF files tend to be our audience.
                # we need to do this to correct the streamdata and chop off
                # an extra character.
                pos = stream.tell()
                stream.seek(-10, 1)
                end = stream.read(9)
                if end == b_("endstream"):
                    # we found it by looking back one character further.
                    data["__streamdata__"] = data["__streamdata__"][:-1]
                else:
                    if pdf.strict == False:
                        warnings.warn("Ignoring missing endstream. This could affect PDF output.")
                        pass
                    else:
                        if debug: print "E", e, ndstream, debugging.toHex(end)
                        stream.seek(pos, 0)
                        raise utils.PdfReadError, \
                            ("Unable to find 'endstream' marker after stream at byte %s." % utils.hexStr(stream.tell()))
        else:
            stream.seek(pos, 0)
        if data.has_key("__streamdata__"):
            return StreamObject.initializeFromDictionary(data)
        else:
            retval = DictionaryObject()
            retval.update(data)
            return retval
Example #5
0
 def readFromStream(stream, pdf):
     idnum = b_("")
     while True:
         tok = stream.read(1)
         if not tok:
             # stream has truncated prematurely
             raise PdfStreamError("Stream has ended unexpectedly")
         if tok.isspace():
             break
         idnum += tok
     generation = b_("")
     while True:
         tok = stream.read(1)
         if not tok:
             # stream has truncated prematurely
             raise PdfStreamError("Stream has ended unexpectedly")
         if tok.isspace():
             break
         generation += tok
     r = stream.read(1)
     if r != b_("R"):
         raise utils.PdfReadError("Error reading indirect object reference at byte %s" % utils.hexStr(stream.tell()))
     return IndirectObject(int(idnum), int(generation), pdf)
Example #6
0
 def readFromStream(stream, pdf):
     debug = False
     tmp = stream.read(2)
     if tmp != b_("<<"):
         raise utils.PdfReadError, \
             ("Dictionary read error at byte %s: stream must begin with '<<'" % utils.hexStr(stream.tell()))
     data = {}
     while True:
         tok = readNonWhitespace(stream)
         if debug: print "Tok:", tok
         if tok == b_(">"):
             stream.read(1)
             break
         stream.seek(-1, 1)
         key = readObject(stream, pdf)
         tok = readNonWhitespace(stream)
         stream.seek(-1, 1)
         value = readObject(stream, pdf)
         if data.has_key(key):
             # multiple definitions of key not permitted
             raise utils.PdfReadError, ("Multiple definitions in dictionary at byte %s for key %s" \
                                        % (utils.hexStr(stream.tell()), key))
         data[key] = value
     pos = stream.tell()
     s = readNonWhitespace(stream)
     if s == b_('s') and stream.read(5) == b_('tream'):
         eol = stream.read(1)
         # odd PDF file output has spaces after 'stream' keyword but before EOL.
         # patch provided by Danial Sandler
         while eol == b_(' '):
             eol = stream.read(1)
         assert eol in (b_("\n"), b_("\r"))
         if eol == b_("\r"):
             # read \n after
             stream.read(1)
         # this is a stream object, not a dictionary
         assert data.has_key("/Length")
         length = data["/Length"]
         if debug: print data
         if isinstance(length, IndirectObject):
             t = stream.tell()
             length = pdf.getObject(length)
             stream.seek(t, 0)
         data["__streamdata__"] = stream.read(length)
         if debug: print "here"
         #if debug: print debugging.printAsHex(data["__streamdata__"])
         e = readNonWhitespace(stream)
         ndstream = stream.read(8)
         if (e + ndstream) != b_("endstream"):
             # (sigh) - the odd PDF file has a length that is too long, so
             # we need to read backwards to find the "endstream" ending.
             # ReportLab (unknown version) generates files with this bug,
             # and Python users into PDF files tend to be our audience.
             # we need to do this to correct the streamdata and chop off
             # an extra character.
             pos = stream.tell()
             stream.seek(-10, 1)
             end = stream.read(9)
             if end == b_("endstream"):
                 # we found it by looking back one character further.
                 data["__streamdata__"] = data["__streamdata__"][:-1]
             else:
                 # if debug: print "E", e, ndstream, debugging.toHex(end)
                 stream.seek(pos, 0)
                 raise utils.PdfReadError, \
                     ("Unable to find 'endstream' marker after stream at byte %s." % utils.hexStr(stream.tell()))
     else:
         stream.seek(pos, 0)
     if data.has_key("__streamdata__"):
         return StreamObject.initializeFromDictionary(data)
     else:
         retval = DictionaryObject()
         retval.update(data)
         return retval
Example #7
0
 def readFromStream(stream, pdf):
     debug = False
     tmp = stream.read(2)
     if tmp != "<<":
         raise utils.PdfReadError, (
             "Dictionary read error at byte %s: stream must begin with '<<'" % utils.hexStr(stream.tell())
         )
     data = {}
     while True:
         tok = readNonWhitespace(stream)
         if debug:
             print "Tok:", tok
         if tok == ">":
             stream.read(1)
             break
         stream.seek(-1, 1)
         key = readObject(stream, pdf)
         tok = readNonWhitespace(stream)
         stream.seek(-1, 1)
         value = readObject(stream, pdf)
         if data.has_key(key):
             # multiple definitions of key not permitted
             raise utils.PdfReadError, (
                 "Multiple definitions in dictionary at byte %s for key %s" % (utils.hexStr(stream.tell()), key)
             )
         data[key] = value
     pos = stream.tell()
     s = readNonWhitespace(stream)
     if s == "s" and stream.read(5) == "tream":
         eol = stream.read(1)
         # odd PDF file output has spaces after 'stream' keyword but before EOL.
         # patch provided by Danial Sandler
         while eol == " ":
             eol = stream.read(1)
         assert eol in ("\n", "\r")
         if eol == "\r":
             # read \n after
             stream.read(1)
         # this is a stream object, not a dictionary
         assert data.has_key("/Length")
         length = data["/Length"]
         if debug:
             print data
         if isinstance(length, IndirectObject):
             t = stream.tell()
             length = pdf.getObject(length)
             stream.seek(t, 0)
         data["__streamdata__"] = stream.read(length)
         if debug:
             print "here"
         # if debug: print debugging.printAsHex(data["__streamdata__"])
         e = readNonWhitespace(stream)
         ndstream = stream.read(8)
         if (e + ndstream) != "endstream":
             # (sigh) - the odd PDF file has a length that is too long, so
             # we need to read backwards to find the "endstream" ending.
             # ReportLab (unknown version) generates files with this bug,
             # and Python users into PDF files tend to be our audience.
             # we need to do this to correct the streamdata and chop off
             # an extra character.
             pos = stream.tell()
             stream.seek(-10, 1)
             end = stream.read(9)
             if end == "endstream":
                 # we found it by looking back one character further.
                 data["__streamdata__"] = data["__streamdata__"][:-1]
             else:
                 # if debug: print "E", e, ndstream, debugging.toHex(end)
                 stream.seek(pos, 0)
                 raise utils.PdfReadError, (
                     "Unable to find 'endstream' marker after stream at byte %s." % utils.hexStr(stream.tell())
                 )
     else:
         stream.seek(pos, 0)
     if data.has_key("__streamdata__"):
         return StreamObject.initializeFromDictionary(data)
     else:
         retval = DictionaryObject()
         retval.update(data)
         return retval
Example #8
0
 def readFromStream(stream, pdf):
     tmp = stream.read(2)
     if tmp != b_("<<"):
         raise utils.PdfReadError(
             ("Dictionary read error at byte %s: "
              "stream must begin with '<<'" %
                 utils.hexStr(stream.tell())))
     data = {}
     while True:
         tok = readNonWhitespace(stream)
         if not tok:
             # stream has truncated prematurely
             raise utils.PdfStreamError("Stream has ended unexpectedly")
         if tok == b_(">"):
             stream.read(1)
             break
         stream.seek(-1, 1)
         key = readObject(stream, pdf)
         tok = readNonWhitespace(stream)
         stream.seek(-1, 1)
         value = readObject(stream, pdf)
         if key in data:
             # multiple definitions of key not permitted
             raise utils.PdfReadError, ("Multiple definitions in "
                                        "dictionary at byte %s for key %s"
                                        % (utils.hexStr(stream.tell()),
                                           key))
         data[key] = value
     pos = stream.tell()
     s = readNonWhitespace(stream)
     if s == b_('s') and stream.read(5) == b_('tream'):
         eol = stream.read(1)
         # odd PDF file output has spaces after 'stream'
         # keyword but before EOL.
         # patch provided by Danial Sandler
         while eol == b_(' '):
             eol = stream.read(1)
         assert eol in (b_("\n"), b_("\r"))
         if eol == b_("\r"):
             # read \n after
             if stream.read(1) != '\n':
                 stream.seek(-1, 1)
         # this is a stream object, not a dictionary
         assert "/Length" in data
         length = data["/Length"]
         if isinstance(length, IndirectObject):
             t = stream.tell()
             length = pdf.getObject(length)
             stream.seek(t, 0)
         data["__streamdata__"] = stream.read(length)
         e = readNonWhitespace(stream)
         ndstream = stream.read(8)
         if (e + ndstream) != b_("endstream"):
             # (sigh) - the odd PDF file has a length that is too long, so
             # we need to read backwards to find the "endstream" ending.
             # ReportLab (unknown version) generates files with this bug,
             # and Python users into PDF files tend to be our audience.
             # we need to do this to correct the streamdata and chop off
             # an extra character.
             pos = stream.tell()
             stream.seek(-10, 1)
             end = stream.read(9)
             if end == b_("endstream"):
                 # we found it by looking back one character further.
                 data["__streamdata__"] = data["__streamdata__"][:-1]
             else:
                 stream.seek(pos, 0)
                 raise utils.PdfReadError, \
                     ("Unable to find 'endstream' marker after "
                      "stream at byte %s." % utils.hexStr(stream.tell()))
     else:
         stream.seek(pos, 0)
     if "__streamdata__" in data:
         return StreamObject.initializeFromDictionary(data)
     else:
         retval = DictionaryObject()
         retval.update(data)
         return retval