def readFromStream(stream, pdf): idnum = b_("") while True: tok = stream.read(1) if not tok: # stream has truncated prematurely raise PdfStreamError("Stream has ended unexpectedly") if tok.isspace(): break idnum += tok generation = b_("") while True: tok = stream.read(1) if not tok: # stream has truncated prematurely raise PdfStreamError("Stream has ended unexpectedly") if tok.isspace(): break generation += tok r = stream.read(1) if r != b_("R"): raise utils.PdfReadError( "Error reading indirect object reference at byte %s" % utils.hexStr(stream.tell())) return IndirectObject(int(idnum), int(generation), pdf)
def readFromStream(stream, pdf): idnum = b_("") while True: tok = stream.read(1) if tok.isspace(): break idnum += tok generation = b_("") while True: tok = stream.read(1) if tok.isspace(): break generation += tok r = stream.read(1) if r != b_("R"): raise utils.PdfReadError( "Error reading indirect object reference at byte %s" % utils.hexStr(stream.tell())) return IndirectObject(int(idnum), int(generation), pdf)
def readFromStream(stream, pdf): debug = False tmp = stream.read(2) if tmp != b_("<<"): raise utils.PdfReadError, \ ("Dictionary read error at byte %s: stream must begin with '<<'" % utils.hexStr(stream.tell())) data = {} while True: tok = readNonWhitespace(stream) if tok == b_('\x00'): continue if not tok: # stream has truncated prematurely raise PdfStreamError("Stream has ended unexpectedly") if debug: print "Tok:",tok if tok == b_(">"): stream.read(1) break stream.seek(-1, 1) key = readObject(stream, pdf) tok = readNonWhitespace(stream) stream.seek(-1, 1) value = readObject(stream, pdf) if not data.has_key(key): data[key] = value pos = stream.tell() s = readNonWhitespace(stream) if s == b_('s') and stream.read(5) == b_('tream'): eol = stream.read(1) # odd PDF file output has spaces after 'stream' keyword but before EOL. # patch provided by Danial Sandler while eol == b_(' '): eol = stream.read(1) assert eol in (b_("\n"), b_("\r")) if eol == b_("\r"): # read \n after if stream.read(1) != '\n': stream.seek(-1, 1) # this is a stream object, not a dictionary assert data.has_key("/Length") length = data["/Length"] if debug: print data if isinstance(length, IndirectObject): t = stream.tell() length = pdf.getObject(length) stream.seek(t, 0) data["__streamdata__"] = stream.read(length) if debug: print "here" #if debug: print debugging.printAsHex(data["__streamdata__"]) e = readNonWhitespace(stream) ndstream = stream.read(8) if (e + ndstream) != b_("endstream"): # (sigh) - the odd PDF file has a length that is too long, so # we need to read backwards to find the "endstream" ending. # ReportLab (unknown version) generates files with this bug, # and Python users into PDF files tend to be our audience. # we need to do this to correct the streamdata and chop off # an extra character. pos = stream.tell() stream.seek(-10, 1) end = stream.read(9) if end == b_("endstream"): # we found it by looking back one character further. data["__streamdata__"] = data["__streamdata__"][:-1] else: if pdf.strict == False: warnings.warn("Ignoring missing endstream. This could affect PDF output.") pass else: if debug: print "E", e, ndstream, debugging.toHex(end) stream.seek(pos, 0) raise utils.PdfReadError, \ ("Unable to find 'endstream' marker after stream at byte %s." % utils.hexStr(stream.tell())) else: stream.seek(pos, 0) if data.has_key("__streamdata__"): return StreamObject.initializeFromDictionary(data) else: retval = DictionaryObject() retval.update(data) return retval
def readFromStream(stream, pdf): idnum = b_("") while True: tok = stream.read(1) if not tok: # stream has truncated prematurely raise PdfStreamError("Stream has ended unexpectedly") if tok.isspace(): break idnum += tok generation = b_("") while True: tok = stream.read(1) if not tok: # stream has truncated prematurely raise PdfStreamError("Stream has ended unexpectedly") if tok.isspace(): break generation += tok r = stream.read(1) if r != b_("R"): raise utils.PdfReadError("Error reading indirect object reference at byte %s" % utils.hexStr(stream.tell())) return IndirectObject(int(idnum), int(generation), pdf)
def readFromStream(stream, pdf): debug = False tmp = stream.read(2) if tmp != b_("<<"): raise utils.PdfReadError, \ ("Dictionary read error at byte %s: stream must begin with '<<'" % utils.hexStr(stream.tell())) data = {} while True: tok = readNonWhitespace(stream) if debug: print "Tok:", tok if tok == b_(">"): stream.read(1) break stream.seek(-1, 1) key = readObject(stream, pdf) tok = readNonWhitespace(stream) stream.seek(-1, 1) value = readObject(stream, pdf) if data.has_key(key): # multiple definitions of key not permitted raise utils.PdfReadError, ("Multiple definitions in dictionary at byte %s for key %s" \ % (utils.hexStr(stream.tell()), key)) data[key] = value pos = stream.tell() s = readNonWhitespace(stream) if s == b_('s') and stream.read(5) == b_('tream'): eol = stream.read(1) # odd PDF file output has spaces after 'stream' keyword but before EOL. # patch provided by Danial Sandler while eol == b_(' '): eol = stream.read(1) assert eol in (b_("\n"), b_("\r")) if eol == b_("\r"): # read \n after stream.read(1) # this is a stream object, not a dictionary assert data.has_key("/Length") length = data["/Length"] if debug: print data if isinstance(length, IndirectObject): t = stream.tell() length = pdf.getObject(length) stream.seek(t, 0) data["__streamdata__"] = stream.read(length) if debug: print "here" #if debug: print debugging.printAsHex(data["__streamdata__"]) e = readNonWhitespace(stream) ndstream = stream.read(8) if (e + ndstream) != b_("endstream"): # (sigh) - the odd PDF file has a length that is too long, so # we need to read backwards to find the "endstream" ending. # ReportLab (unknown version) generates files with this bug, # and Python users into PDF files tend to be our audience. # we need to do this to correct the streamdata and chop off # an extra character. pos = stream.tell() stream.seek(-10, 1) end = stream.read(9) if end == b_("endstream"): # we found it by looking back one character further. data["__streamdata__"] = data["__streamdata__"][:-1] else: # if debug: print "E", e, ndstream, debugging.toHex(end) stream.seek(pos, 0) raise utils.PdfReadError, \ ("Unable to find 'endstream' marker after stream at byte %s." % utils.hexStr(stream.tell())) else: stream.seek(pos, 0) if data.has_key("__streamdata__"): return StreamObject.initializeFromDictionary(data) else: retval = DictionaryObject() retval.update(data) return retval
def readFromStream(stream, pdf): debug = False tmp = stream.read(2) if tmp != "<<": raise utils.PdfReadError, ( "Dictionary read error at byte %s: stream must begin with '<<'" % utils.hexStr(stream.tell()) ) data = {} while True: tok = readNonWhitespace(stream) if debug: print "Tok:", tok if tok == ">": stream.read(1) break stream.seek(-1, 1) key = readObject(stream, pdf) tok = readNonWhitespace(stream) stream.seek(-1, 1) value = readObject(stream, pdf) if data.has_key(key): # multiple definitions of key not permitted raise utils.PdfReadError, ( "Multiple definitions in dictionary at byte %s for key %s" % (utils.hexStr(stream.tell()), key) ) data[key] = value pos = stream.tell() s = readNonWhitespace(stream) if s == "s" and stream.read(5) == "tream": eol = stream.read(1) # odd PDF file output has spaces after 'stream' keyword but before EOL. # patch provided by Danial Sandler while eol == " ": eol = stream.read(1) assert eol in ("\n", "\r") if eol == "\r": # read \n after stream.read(1) # this is a stream object, not a dictionary assert data.has_key("/Length") length = data["/Length"] if debug: print data if isinstance(length, IndirectObject): t = stream.tell() length = pdf.getObject(length) stream.seek(t, 0) data["__streamdata__"] = stream.read(length) if debug: print "here" # if debug: print debugging.printAsHex(data["__streamdata__"]) e = readNonWhitespace(stream) ndstream = stream.read(8) if (e + ndstream) != "endstream": # (sigh) - the odd PDF file has a length that is too long, so # we need to read backwards to find the "endstream" ending. # ReportLab (unknown version) generates files with this bug, # and Python users into PDF files tend to be our audience. # we need to do this to correct the streamdata and chop off # an extra character. pos = stream.tell() stream.seek(-10, 1) end = stream.read(9) if end == "endstream": # we found it by looking back one character further. data["__streamdata__"] = data["__streamdata__"][:-1] else: # if debug: print "E", e, ndstream, debugging.toHex(end) stream.seek(pos, 0) raise utils.PdfReadError, ( "Unable to find 'endstream' marker after stream at byte %s." % utils.hexStr(stream.tell()) ) else: stream.seek(pos, 0) if data.has_key("__streamdata__"): return StreamObject.initializeFromDictionary(data) else: retval = DictionaryObject() retval.update(data) return retval
def readFromStream(stream, pdf): tmp = stream.read(2) if tmp != b_("<<"): raise utils.PdfReadError( ("Dictionary read error at byte %s: " "stream must begin with '<<'" % utils.hexStr(stream.tell()))) data = {} while True: tok = readNonWhitespace(stream) if not tok: # stream has truncated prematurely raise utils.PdfStreamError("Stream has ended unexpectedly") if tok == b_(">"): stream.read(1) break stream.seek(-1, 1) key = readObject(stream, pdf) tok = readNonWhitespace(stream) stream.seek(-1, 1) value = readObject(stream, pdf) if key in data: # multiple definitions of key not permitted raise utils.PdfReadError, ("Multiple definitions in " "dictionary at byte %s for key %s" % (utils.hexStr(stream.tell()), key)) data[key] = value pos = stream.tell() s = readNonWhitespace(stream) if s == b_('s') and stream.read(5) == b_('tream'): eol = stream.read(1) # odd PDF file output has spaces after 'stream' # keyword but before EOL. # patch provided by Danial Sandler while eol == b_(' '): eol = stream.read(1) assert eol in (b_("\n"), b_("\r")) if eol == b_("\r"): # read \n after if stream.read(1) != '\n': stream.seek(-1, 1) # this is a stream object, not a dictionary assert "/Length" in data length = data["/Length"] if isinstance(length, IndirectObject): t = stream.tell() length = pdf.getObject(length) stream.seek(t, 0) data["__streamdata__"] = stream.read(length) e = readNonWhitespace(stream) ndstream = stream.read(8) if (e + ndstream) != b_("endstream"): # (sigh) - the odd PDF file has a length that is too long, so # we need to read backwards to find the "endstream" ending. # ReportLab (unknown version) generates files with this bug, # and Python users into PDF files tend to be our audience. # we need to do this to correct the streamdata and chop off # an extra character. pos = stream.tell() stream.seek(-10, 1) end = stream.read(9) if end == b_("endstream"): # we found it by looking back one character further. data["__streamdata__"] = data["__streamdata__"][:-1] else: stream.seek(pos, 0) raise utils.PdfReadError, \ ("Unable to find 'endstream' marker after " "stream at byte %s." % utils.hexStr(stream.tell())) else: stream.seek(pos, 0) if "__streamdata__" in data: return StreamObject.initializeFromDictionary(data) else: retval = DictionaryObject() retval.update(data) return retval