Ejemplo n.º 1
0
 def readFromStream(stream, pdf):
     idnum = b_("")
     while True:
         tok = stream.read(1)
         if not tok:
             # stream has truncated prematurely
             raise PdfStreamError("Stream has ended unexpectedly")
         if tok.isspace():
             break
         idnum += tok
     generation = b_("")
     while True:
         tok = stream.read(1)
         if not tok:
             # stream has truncated prematurely
             raise PdfStreamError("Stream has ended unexpectedly")
         if tok.isspace():
             break
         generation += tok
     r = stream.read(1)
     if r != b_("R"):
         raise utils.PdfReadError(
             "Error reading indirect object reference at byte %s" %
             utils.hexStr(stream.tell()))
     return IndirectObject(int(idnum), int(generation), pdf)
Ejemplo n.º 2
0
def readStringFromStream(stream):
    tok = stream.read(1)
    parens = 1
    txt = b_("")
    while True:
        tok = stream.read(1)
        if not tok:
            # stream has truncated prematurely
            raise PdfStreamError("Stream has ended unexpectedly")
        if tok == b_("("):
            parens += 1
        elif tok == b_(")"):
            parens -= 1
            if parens == 0:
                break
        elif tok == b_("\\"):
            tok = stream.read(1)
            if tok == b_("n"):
                tok = b_("\n")
            elif tok == b_("r"):
                tok = b_("\r")
            elif tok == b_("t"):
                tok = b_("\t")
            elif tok == b_("b"):
                tok = b_("\b")
            elif tok == b_("f"):
                tok = b_("\f")
            elif tok == b_("("):
                tok = b_("(")
            elif tok == b_(")"):
                tok = b_(")")
            elif tok == b_("\\"):
                tok = b_("\\")
            elif tok.isdigit():
                # "The number ddd may consist of one, two, or three
                # octal digits; high-order overflow shall be ignored.
                # Three octal digits shall be used, with leading zeros
                # as needed, if the next character of the string is also
                # a digit." (PDF reference 7.3.4.2, p 16)
                for i in range(2):
                    ntok = stream.read(1)
                    if ntok.isdigit():
                        tok += ntok
                    else:
                        break
                tok = b_(chr(int(tok, base=8)))
            elif tok in b_("\n\r"):
                # This case is  hit when a backslash followed by a line
                # break occurs.  If it's a multi-char EOL, consume the
                # second character:
                tok = stream.read(1)
                if not tok in b_("\n\r"):
                    stream.seek(-1, 1)
                # Then don't add anything to the actual string, since this
                # line break was escaped:
                tok = b_('')
            else:
                raise utils.PdfReadError("Unexpected escaped string")
        txt += tok
    return createStringObject(txt)
Ejemplo n.º 3
0
 def readFromStream(stream):
     debug = False
     if debug: print stream.tell()
     name = stream.read(1)
     if name != b_("/"):
         raise utils.PdfReadError, "name read error"
     while True:
         tok = stream.read(1)
         if not tok:
             # stream has truncated prematurely
             raise PdfStreamError("Stream has ended unexpectedly")
         if tok.isspace() or tok in NameObject.delimiterCharacters:
             stream.seek(-1, 1)
             break
         name += tok
     if debug: print name
     return NameObject(name.decode('utf-8'))
Ejemplo n.º 4
0
def readHexStringFromStream(stream):
    stream.read(1)
    txt = ""
    x = b_("")
    while True:
        tok = readNonWhitespace(stream)
        if not tok:
            # stream has truncated prematurely
            raise PdfStreamError("Stream has ended unexpectedly")
        if tok == b_(">"):
            break
        x += tok
        if len(x) == 2:
            txt += chr(int(x, base=16))
            x = b_("")
    if len(x) == 1:
        x += b_("0")
    if len(x) == 2:
        txt += chr(int(x, base=16))
    return createStringObject(b_(txt))
Ejemplo n.º 5
0
    def readFromStream(stream, pdf):
        debug = False
        tmp = stream.read(2)
        if tmp != b_("<<"):
            raise utils.PdfReadError, \
                ("Dictionary read error at byte %s: stream must begin with '<<'" % utils.hexStr(stream.tell()))
        data = {}
        while True:
            tok = readNonWhitespace(stream)
            if not tok:
                # stream has truncated prematurely
                raise PdfStreamError("Stream has ended unexpectedly")

            if debug: print "Tok:", tok
            if tok == b_(">"):
                stream.read(1)
                break
            stream.seek(-1, 1)
            key = readObject(stream, pdf)
            tok = readNonWhitespace(stream)
            stream.seek(-1, 1)
            value = readObject(stream, pdf)
            if data.has_key(key):
                # multiple definitions of key not permitted
                raise utils.PdfReadError, ("Multiple definitions in dictionary at byte %s for key %s" \
                                           % (utils.hexStr(stream.tell()), key))
            data[key] = value
        pos = stream.tell()
        s = readNonWhitespace(stream)
        if s == b_('s') and stream.read(5) == b_('tream'):
            eol = stream.read(1)
            # odd PDF file output has spaces after 'stream' keyword but before EOL.
            # patch provided by Danial Sandler
            while eol == b_(' '):
                eol = stream.read(1)
            assert eol in (b_("\n"), b_("\r"))
            if eol == b_("\r"):
                # read \n after
                if stream.read(1) != '\n':
                    stream.seek(-1, 1)
            # this is a stream object, not a dictionary
            assert data.has_key("/Length")
            length = data["/Length"]
            if debug: print data
            if isinstance(length, IndirectObject):
                t = stream.tell()
                length = pdf.getObject(length)
                stream.seek(t, 0)
            data["__streamdata__"] = stream.read(length)
            if debug: print "here"
            #if debug: print debugging.printAsHex(data["__streamdata__"])
            e = readNonWhitespace(stream)
            ndstream = stream.read(8)
            if (e + ndstream) != b_("endstream"):
                # (sigh) - the odd PDF file has a length that is too long, so
                # we need to read backwards to find the "endstream" ending.
                # ReportLab (unknown version) generates files with this bug,
                # and Python users into PDF files tend to be our audience.
                # we need to do this to correct the streamdata and chop off
                # an extra character.
                pos = stream.tell()
                stream.seek(-10, 1)
                end = stream.read(9)
                if end == b_("endstream"):
                    # we found it by looking back one character further.
                    data["__streamdata__"] = data["__streamdata__"][:-1]
                else:
                    # if debug: print "E", e, ndstream, debugging.toHex(end)
                    stream.seek(pos, 0)
                    raise utils.PdfReadError, \
                        ("Unable to find 'endstream' marker after stream at byte %s." % utils.hexStr(stream.tell()))
        else:
            stream.seek(pos, 0)
        if data.has_key("__streamdata__"):
            return StreamObject.initializeFromDictionary(data)
        else:
            retval = DictionaryObject()
            retval.update(data)
            return retval