Esempio n. 1
0
 def writeToStream(self, stream, encryption_key):
     bytearr = self
     if encryption_key:
         bytearr = RC4_encrypt(encryption_key, bytearr)
     stream.write(b_("<"))
     stream.write(utils.hexencode(bytearr))
     stream.write(b_(">"))
Esempio n. 2
0
 def _decrypt(self, password):
     encrypt = self.trailer['/Encrypt'].getObject()
     if encrypt['/Filter'] != '/Standard':
         raise NotImplementedError(
             "only Standard PDF encryption handler is available")
     if not (encrypt['/V'] in (1, 2)):
         raise NotImplementedError(
             "only algorithm code 1 and 2 are supported")
     user_password, key = self._authenticateUserPassword(password)
     if user_password:
         self._decryption_key = key
         return 1
     else:
         rev = encrypt['/R'].getObject()
         if rev == 2:
             keylen = 5
         else:
             keylen = encrypt['/Length'].getObject() // 8
         key = _alg33_1(password, rev, keylen)
         real_O = encrypt["/O"].getObject()
         if rev == 2:
             userpass = utils.RC4_encrypt(key, real_O)
         else:
             val = real_O
             for i in range(19, -1, -1):
                 new_key = b_('')
                 for l in range(len(key)):
                     new_key += b_(chr(utils.ord_(key[l]) ^ i))
                 val = utils.RC4_encrypt(new_key, val)
             userpass = val
         owner_password, key = self._authenticateUserPassword(userpass)
         if owner_password:
             self._decryption_key = key
             return 2
     return 0
Esempio n. 3
0
 def readFromStream(stream, pdf):
     idnum = b_("")
     while True:
         tok = stream.read(1)
         if not tok:
             # stream has truncated prematurely
             raise PdfStreamError("Stream has ended unexpectedly")
         if tok.isspace():
             break
         idnum += tok
     generation = b_("")
     while True:
         tok = stream.read(1)
         if not tok:
             # stream has truncated prematurely
             raise PdfStreamError("Stream has ended unexpectedly")
         if tok.isspace():
             if not generation:
                 continue
             break
         generation += tok
     r = readNonWhitespace(stream)
     if r != b_("R"):
         raise utils.PdfReadError("Error reading indirect object reference at byte %s" % utils.hexStr(stream.tell()))
     return IndirectObject(int(idnum), int(generation), pdf)
Esempio n. 4
0
 def readFromStream(stream):
     word = stream.read(4)
     if word == b_("true"):
         return BooleanObject(True)
     elif word == b_("fals"):
         stream.read(1)
         return BooleanObject(False)
     assert False
Esempio n. 5
0
 def writeToStream(self, stream, encryption_key):
     stream.write(b_("<<\n"))
     for key, value in self.items():
         key.writeToStream(stream, encryption_key)
         stream.write(b_(" "))
         value.writeToStream(stream, encryption_key)
         stream.write(b_("\n"))
     stream.write(b_(">>"))
Esempio n. 6
0
def encode_pdfdocencoding(unicode_string):
    retval = b_('')
    for c in unicode_string:
        try:
            retval += b_(chr(_pdfDocEncoding_rev[c]))
        except KeyError:
            raise UnicodeEncodeError("pdfdocencoding", c, -1, -1,
                    "does not exist in translation table")
    return retval
Esempio n. 7
0
 def readFromStream(stream):
     word = stream.read(4)
     if word == b_("true"):
         return BooleanObject(True)
     elif word == b_("fals"):
         stream.read(1)
         return BooleanObject(False)
     else:
         raise utils.PdfReadError('Could not read Boolean object')
Esempio n. 8
0
 def write_to_file(self, f: io.BufferedReader):
     # TODO: EOL is not needed in some cases
     f.write(b_(self.obj_no))
     f.write(b' ')
     f.write(b_(self.gen_no))
     f.write(b' obj')
     f.write(b'\n')
     self.value.write_to_file(f)
     f.write(b'\n')
     f.write(b'endobj')
Esempio n. 9
0
 def writeToStream(self, stream, encryption_key):
     self[NameObject("/Length")] = NumberObject(len(self._data))
     DictionaryObject.writeToStream(self, stream, encryption_key)
     del self["/Length"]
     stream.write(b_("\nstream\n"))
     data = self._data
     if encryption_key:
         data = RC4_encrypt(encryption_key, data)
     stream.write(data)
     stream.write(b_("\nendstream"))
Esempio n. 10
0
 def writeToStream(self, stream, encryption_key):
     self[NameObject("/Length")] = NumberObject(len(self._data))
     DictionaryObject.writeToStream(self, stream, encryption_key)
     del self["/Length"]
     stream.write(b_("\nstream\n"))
     data = self._data
     if encryption_key:
         data = RC4_encrypt(encryption_key, data)
     stream.write(data)
     stream.write(b_("\nendstream"))
Esempio n. 11
0
 def readFromStream(stream):
     name = b_("")
     while True:
         tok = stream.read(1)
         if tok != b_('+') and tok != b_('-') and tok != b_('.') and not tok.isdigit():
             stream.seek(-1, 1)
             break
         name += tok
     if name.find(b_(".")) != -1:
         return FloatObject(name)
     else:
         return NumberObject(name)
Esempio n. 12
0
 def readFromStream(stream):
     num = b_("")
     while True:
         tok = stream.read(1)
         if tok != b_('+') and tok != b_('-') and tok != b_('.') and not tok.isdigit():
             stream.seek(-1, 1)
             break
         num += tok
     if num.find(b_(".")) != -1:
         return FloatObject(num)
     else:
         return NumberObject(num)
Esempio n. 13
0
    def writeToStream(self, stream, encryption_key):
        stream.write(b_("<<\n"))
        key = NameObject('/D')
        key.writeToStream(stream, encryption_key)
        stream.write(b_(" "))
        value = self.getDestArray()
        value.writeToStream(stream, encryption_key)

        key = NameObject("/S")
        key.writeToStream(stream, encryption_key)
        stream.write(b_(" "))
        value = NameObject("/GoTo")
        value.writeToStream(stream, encryption_key)
        
        stream.write(b_("\n"))
        stream.write(b_(">>"))
Esempio n. 14
0
 def __init__(self):
     self._header = b_("%PDF-1.3")
     self._objects = []  # array of indirect objects
     # The root of our page tree node.
     pages = DictionaryObject()
     pages.update({
         NameObject("/Type"): NameObject("/Pages"),
         NameObject("/Count"): NumberObject(0),
         NameObject("/Kids"): ArrayObject()
     })
     self._pages = self._addObject(pages)
     # info object
     info = DictionaryObject()
     info.update({
         NameObject("/Producer"):
         createStringObject(
             u"Python PDF Library - http://pybrary.net/pyPdf/")
     })
     self._info = self._addObject(info)
     # root object
     root = DictionaryObject()
     root.update({
         NameObject("/Type"): NameObject("/Catalog"),
         NameObject("/Pages"): self._pages
     })
     self._root = self._addObject(root)
Esempio n. 15
0
    def writeToStream(self, stream, encryption_key):
        stream.write(b_("<<\n"))
        key = NameObject('/D')
        key.writeToStream(stream, encryption_key)
        stream.write(b_(" "))
        value = self.getDestArray()
        value.writeToStream(stream, encryption_key)

        key = NameObject("/S")
        key.writeToStream(stream, encryption_key)
        stream.write(b_(" "))
        value = NameObject("/GoTo")
        value.writeToStream(stream, encryption_key)

        stream.write(b_("\n"))
        stream.write(b_(">>"))
Esempio n. 16
0
def readHexStringFromStream(stream):
    stream.read(1)
    txt = ""
    x = b_("")
    while True:
        tok = readNonWhitespace(stream)
        if tok == b_(">"):
            break
        x += tok
        if len(x) == 2:
            txt += chr(int(x, base=16))
            x = b_("")
    if len(x) == 1:
        x += b_("0")
    if len(x) == 2:
        txt += chr(int(x, base=16))
    return createStringObject(b_(txt))
Esempio n. 17
0
 def readFromStream(stream, pdf):
     idnum = b_("")
     while True:
         tok = stream.read(1)
         if tok.isspace():
             break
         idnum += tok
     generation = b_("")
     while True:
         tok = stream.read(1)
         if tok.isspace():
             break
         generation += tok
     r = stream.read(1)
     if r != b_("R"):
         raise utils.PdfReadError("Error reading indirect object reference at byte %s" % utils.hexStr(stream.tell()))
     return IndirectObject(int(idnum), int(generation), pdf)
Esempio n. 18
0
def readHexStringFromStream(stream):
    stream.read(1)
    txt = ""
    x = b_("")
    while True:
        tok = readNonWhitespace(stream)
        if tok == b_(">"):
            break
        x += tok
        if len(x) == 2:
            txt += chr(int(x, base=16))
            x = b_("")
    if len(x) == 1:
        x += b_("0")
    if len(x) == 2:
        txt += chr(int(x, base=16))
    return createStringObject(b_(txt))
Esempio n. 19
0
def readObject(stream, pdf):
    tok = stream.read(1)
    stream.seek(-1, 1)  # reset to start
    idx = ObjectPrefix.find(tok)
    if idx == 0:
        # name object
        return NameObject.readFromStream(stream, pdf)
    elif idx == 1:
        # hexadecimal string OR dictionary
        peek = stream.read(2)
        stream.seek(-2, 1)  # reset to start
        if peek == b_('<<'):
            return DictionaryObject.readFromStream(stream, pdf)
        else:
            return readHexStringFromStream(stream)
    elif idx == 2:
        # array object
        return ArrayObject.readFromStream(stream, pdf)
    elif idx == 3 or idx == 4:
        # boolean object
        return BooleanObject.readFromStream(stream)
    elif idx == 5:
        # string object
        return readStringFromStream(stream)
    elif idx == 6:
        # null object
        return NullObject.readFromStream(stream)
    elif idx == 7:
        # comment
        while tok not in (b_('\r'), b_('\n')):
            tok = stream.read(1)
        tok = readNonWhitespace(stream)
        stream.seek(-1, 1)
        return readObject(stream, pdf)
    else:
        # number object OR indirect reference
        if tok in NumberSigns:
            # number
            return NumberObject.readFromStream(stream)
        peek = stream.read(20)
        stream.seek(-len(peek), 1)  # reset to start
        if IndirectPattern.match(peek) != None:
            return IndirectObject.readFromStream(stream, pdf)
        else:
            return NumberObject.readFromStream(stream)
Esempio n. 20
0
class NameObject(str, PdfObject):
    delimiterCharacters = b_("("), b_(")"), b_("<"), b_(">"), b_("["), b_("]"), b_("{"), b_("}"), b_("/"), b_("%")

    def __init__(self, data):
        str.__init__(data)

    def writeToStream(self, stream, encryption_key):
        stream.write(b_(self))

    def readFromStream(stream):
        debug = False
        if debug: print stream.tell()
        name = stream.read(1)
        if name != b_("/"):
            raise utils.PdfReadError, "name read error"
        while True:
            tok = stream.read(1)
            if not tok:
                # stream has truncated prematurely
                raise PdfStreamError("Stream has ended unexpectedly")
            if tok.isspace() or tok in NameObject.delimiterCharacters:
                stream.seek(-1, 1)
                break
            name += tok
        if debug: print name
        return NameObject(name.decode('utf-8'))
    readFromStream = staticmethod(readFromStream)
Esempio n. 21
0
def readObject(stream, pdf):
    tok = stream.read(1)
    stream.seek(-1, 1) # reset to start
    idx = ObjectPrefix.find(tok)
    if idx == 0:
        # name object
        return NameObject.readFromStream(stream, pdf)
    elif idx == 1:
        # hexadecimal string OR dictionary
        peek = stream.read(2)
        stream.seek(-2, 1) # reset to start
        if peek == b_('<<'):
            return DictionaryObject.readFromStream(stream, pdf)
        else:
            return readHexStringFromStream(stream)
    elif idx == 2:
        # array object
        return ArrayObject.readFromStream(stream, pdf)
    elif idx == 3 or idx == 4:
        # boolean object
        return BooleanObject.readFromStream(stream)
    elif idx == 5:
        # string object
        return readStringFromStream(stream)
    elif idx == 6:
        # null object
        return NullObject.readFromStream(stream)
    elif idx == 7:
        # comment
        while tok not in (b_('\r'), b_('\n')):
            tok = stream.read(1)
        tok = readNonWhitespace(stream)
        stream.seek(-1, 1)
        return readObject(stream, pdf)
    else:
        # number object OR indirect reference
        if tok in NumberSigns:
            # number
            return NumberObject.readFromStream(stream)
        peek = stream.read(20)
        stream.seek(-len(peek), 1) # reset to start
        if IndirectPattern.match(peek) != None:
            return IndirectObject.readFromStream(stream, pdf)
        else:
            return NumberObject.readFromStream(stream)
Esempio n. 22
0
 def readFromStream(stream, pdf):
     arr = ArrayObject()
     tmp = stream.read(1)
     if tmp != b_("["):
         raise utils.PdfReadError("Could not read array")
     while True:
         # skip leading whitespace
         tok = stream.read(1)
         while tok.isspace():
             tok = stream.read(1)
         stream.seek(-1, 1)
         # check for array ending
         peekahead = stream.read(1)
         if peekahead == b_("]"):
             break
         stream.seek(-1, 1)
         # read and append obj
         arr.append(readObject(stream, pdf))
     return arr
Esempio n. 23
0
 def readFromStream(stream, pdf):
     idnum = b_("")
     while True:
         tok = stream.read(1)
         if tok.isspace():
             break
         idnum += tok
     generation = b_("")
     while True:
         tok = stream.read(1)
         if tok.isspace():
             break
         generation += tok
     r = stream.read(1)
     if r != b_("R"):
         raise utils.PdfReadError(
             "Error reading indirect object reference at byte %s" %
             utils.hexStr(stream.tell()))
     return IndirectObject(int(idnum), int(generation), pdf)
Esempio n. 24
0
 def readFromStream(stream, pdf):
     arr = ArrayObject()
     tmp = stream.read(1)
     if tmp != b_("["):
         raise utils.PdfReadError, "error reading array"
     while True:
         # skip leading whitespace
         tok = stream.read(1)
         while tok.isspace():
             tok = stream.read(1)
         stream.seek(-1, 1)
         # check for array ending
         peekahead = stream.read(1)
         if peekahead == b_("]"):
             break
         stream.seek(-1, 1)
         # read and append obj
         arr.append(readObject(stream, pdf))
     return arr
Esempio n. 25
0
 def readFromStream(stream):
     name = stream.read(1)
     if name != b_("/"):
         raise utils.PdfReadError, "name read error"
     while True:
         tok = stream.read(1)
         if tok.isspace() or tok in NameObject.delimiterCharacters:
             stream.seek(-1, 1)
             break
         name += tok
     return NameObject(name.decode('utf-8'))
Esempio n. 26
0
 def readFromStream(stream):
     name = stream.read(1)
     if name != b_("/"):
         raise utils.PdfReadError, "name read error"
     while True:
         tok = stream.read(1)
         if tok.isspace() or tok in NameObject.delimiterCharacters:
             stream.seek(-1, 1)
             break
         name += tok
     return NameObject(name.decode('utf-8'))
Esempio n. 27
0
 def writeToStream(self, stream, encryption_key):
     # Try to write the string out as a PDFDocEncoding encoded string.  It's
     # nicer to look at in the PDF file.  Sadly, we take a performance hit
     # here for trying...
     try:
         bytearr = encode_pdfdocencoding(self)
     except UnicodeEncodeError:
         bytearr = codecs.BOM_UTF16_BE + self.encode("utf-16be")
     if encryption_key:
         bytearr = RC4_encrypt(encryption_key, bytearr)
         obj = ByteStringObject(bytearr)
         obj.writeToStream(stream, None)
     else:
         stream.write(b_("("))
         for c in bytearr:
             if not chr_(c).isalnum() and c != b_(' '):
                 stream.write(b_("\\%03o" % ord_(c)))
             else:
                 stream.write(b_(chr_(c)))
         stream.write(b_(")"))
Esempio n. 28
0
def readHexStringFromStream(stream):
    stream.read(1)
    txt = ""
    x = b_("")
    while True:
        tok = readNonWhitespace(stream)
        if not tok:
            # stream has truncated prematurely
            raise PdfStreamError("Stream has ended unexpectedly")
        if tok == b_(">"):
            break
        x += tok
        if len(x) == 2:
            txt += chr(int(x, base=16))
            x = b_("")
    if len(x) == 1:
        x += b_("0")
    if len(x) == 2:
        txt += chr(int(x, base=16))
    return createStringObject(b_(txt))
Esempio n. 29
0
 def writeToStream(self, stream, encryption_key):
     # Try to write the string out as a PDFDocEncoding encoded string.  It's
     # nicer to look at in the PDF file.  Sadly, we take a performance hit
     # here for trying...
     try:
         bytearr = encode_pdfdocencoding(self)
     except UnicodeEncodeError:
         bytearr = codecs.BOM_UTF16_BE + self.encode("utf-16be")
     if encryption_key:
         bytearr = RC4_encrypt(encryption_key, bytearr)
         obj = ByteStringObject(bytearr)
         obj.writeToStream(stream, None)
     else:
         stream.write(b_("("))
         for c in bytearr:
             if not chr_(c).isalnum() and c != b_(' '):
                 stream.write(b_("\\%03o" % ord_(c)))
             else:
                 stream.write(b_(chr_(c)))
         stream.write(b_(")"))
Esempio n. 30
0
def readHexStringFromStream(stream):
    stream.read(1)
    txt = ""
    x = b_("")
    while True:
        tok = readNonWhitespace(stream)
        if not tok:
            # stream has truncated prematurely
            raise PdfStreamError("Stream has ended unexpectedly")
        if tok == b_(">"):
            break
        x += tok
        if len(x) == 2:
            txt += chr(int(x, base=16))
            x = b_("")
    if len(x) == 1:
        x += b_("0")
    if len(x) == 2:
        txt += chr(int(x, base=16))
    return createStringObject(b_(txt))
Esempio n. 31
0
 def __new__(cls, s):
     b = bytes()
     if isinstance(s, bytes) or isinstance(s, bytearray):
         b = s
     elif isinstance(s, str):
         b = bytearray(s, 'utf_8')
         from itertools import chain
         for c in chain(range(33), range(127, 256)):
             if c in b:
                 b = b.replace(bytes([c]), b_(('#' + hex(c)[2:]).upper()))
     else:
         raise ValueError()
     return PdfNameObjectBase.__new__(cls, b)
Esempio n. 32
0
 def readFromStream(stream, pdf):
     idnum = b_("")
     while True:
         tok = stream.read(1)
         if not tok:
             # stream has truncated prematurely
             raise PdfStreamError("Stream has ended unexpectedly")
         if tok.isspace():
             break
         idnum += tok
     generation = b_("")
     while True:
         tok = stream.read(1)
         if not tok:
             # stream has truncated prematurely
             raise PdfStreamError("Stream has ended unexpectedly")
         if tok.isspace():
             break
         generation += tok
     r = stream.read(1)
     if r != b_("R"):
         raise utils.PdfReadError("Error reading indirect object reference at byte %s" % utils.hexStr(stream.tell()))
     return IndirectObject(int(idnum), int(generation), pdf)
Esempio n. 33
0
class NameObject(str, PdfObject):
    delimiterCharacters = b_("("), b_(")"), b_("<"), b_(">"), b_("["), \
        b_("]"), b_("{"), b_("}"), b_("/"), b_("%")

    def __init__(self, data):
        str.__init__(data)

    def writeToStream(self, stream, encryption_key):
        stream.write(b_(self))

    def readFromStream(stream):
        name = stream.read(1)
        if name != b_("/"):
            raise utils.PdfReadError, "name read error"
        while True:
            tok = stream.read(1)
            if tok.isspace() or tok in NameObject.delimiterCharacters:
                stream.seek(-1, 1)
                break
            name += tok
        return NameObject(name.decode('utf-8'))

    readFromStream = staticmethod(readFromStream)
Esempio n. 34
0
def _alg35(password, rev, keylen,
           owner_entry, p_entry, id1_entry, metadata_encrypt):
    # 1. Create an encryption key based on the user password string, as
    # described in Algorithm 3.2.
    key = _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry)
    # 2. Initialize the MD5 hash function and pass the 32-byte padding string
    # shown in step 1 of Algorithm 3.2 as input to this function.
    m = md5()
    m.update(_encryption_padding)
    # 3. Pass the first element of the file's file identifier array (the value
    # of the ID entry in the document's trailer dictionary; see Table 3.13 on
    # page 73) to the hash function and finish the hash.  (See implementation
    # note 25 in Appendix H.)
    m.update(id1_entry.original_bytes)
    md5_hash = m.digest()
    # 4. Encrypt the 16-byte result of the hash, using an RC4 encryption
    # function with the encryption key from step 1.
    val = utils.RC4_encrypt(key, md5_hash)
    # 5. Do the following 19 times: Take the output from the previous
    # invocation of the RC4 function and pass it as input to a new invocation
    # of the function; use an encryption key generated by taking each byte of
    # the original encryption key (obtained in step 2) and performing an XOR
    # operation between that byte and the single-byte value of the iteration
    # counter (from 1 to 19).
    for i in range(1, 20):
        new_key = b_('')
        for l in range(len(key)):
            new_key += b_(chr(utils.ord_(key[l]) ^ i))
        val = utils.RC4_encrypt(new_key, val)
    # 6. Append 16 bytes of arbitrary padding to the output from the final
    # invocation of the RC4 function and store the 32-byte result as the value
    # of the U entry in the encryption dictionary.
    # (implementator note: I don't know what "arbitrary padding" is supposed to
    # mean, so I have used null bytes.  This seems to match a few other
    # people's implementations)
    return val + (b_('\x00') * 16), key
Esempio n. 35
0
 def readFromStream(stream):
     debug = False
     if debug: print stream.tell()
     name = stream.read(1)
     if name != b_("/"):
         raise utils.PdfReadError, "name read error"
     while True:
         tok = stream.read(1)
         if not tok:
             # stream has truncated prematurely
             raise PdfStreamError("Stream has ended unexpectedly")
         if tok.isspace() or tok in NameObject.delimiterCharacters:
             stream.seek(-1, 1)
             break
         name += tok
     if debug: print name
     return NameObject(name.decode('utf-8'))
Esempio n. 36
0
 def readFromStream(stream):
     debug = False
     if debug: print stream.tell()
     name = stream.read(1)
     if name != b_("/"):
         raise utils.PdfReadError, "name read error"
     while True:
         tok = stream.read(1)
         if not tok:
             # stream has truncated prematurely
             raise PdfStreamError("Stream has ended unexpectedly")
         if tok.isspace() or tok in NameObject.delimiterCharacters:
             stream.seek(-1, 1)
             break
         name += tok
     if debug: print name
     return NameObject(name.decode('utf-8'))
Esempio n. 37
0
 def __init__(self):
     self._header = b_("%PDF-1.3")
     self._objects = []  # array of indirect objects
     # The root of our page tree node.
     pages = DictionaryObject()
     pages.update({NameObject("/Type"): NameObject("/Pages"),
                   NameObject("/Count"): NumberObject(0),
                   NameObject("/Kids"): ArrayObject()})
     self._pages = self._addObject(pages)
     # info object
     info = DictionaryObject()
     info.update({NameObject("/Producer"): createStringObject(
         u"Python PDF Library - http://pybrary.net/pyPdf/")})
     self._info = self._addObject(info)
     # root object
     root = DictionaryObject()
     root.update({NameObject("/Type"): NameObject("/Catalog"),
                  NameObject("/Pages"): self._pages})
     self._root = self._addObject(root)
Esempio n. 38
0
 def writeToStream(self, stream, encryption_key):
     stream.write(b_("<<\n"))
     for key in [NameObject(x) for x in ['/Title', '/Parent', '/First', '/Last', '/Next', '/Prev'] if self.has_key(x)]:
         key.writeToStream(stream, encryption_key)
         stream.write(b_(" "))
         value = self.raw_get(key)
         value.writeToStream(stream, encryption_key)
         stream.write(b_("\n"))
     key = NameObject('/Dest')
     key.writeToStream(stream, encryption_key)
     stream.write(b_(" "))
     value = self.getDestArray()
     value.writeToStream(stream, encryption_key)
     stream.write(b_("\n"))
     stream.write(b_(">>"))
Esempio n. 39
0
 def writeToStream(self, stream, encryption_key):
     stream.write(b_("<<\n"))
     for key in [NameObject(x) for x in ['/Title', '/Parent', '/First', '/Last', '/Next', '/Prev'] if self.has_key(x)]:
         key.writeToStream(stream, encryption_key)
         stream.write(b_(" "))
         value = self.raw_get(key)
         value.writeToStream(stream, encryption_key)
         stream.write(b_("\n"))
     key = NameObject('/Dest')
     key.writeToStream(stream, encryption_key)
     stream.write(b_(" "))
     value = self.getDestArray()
     value.writeToStream(stream, encryption_key)
     stream.write(b_("\n"))
     stream.write(b_(">>"))
Esempio n. 40
0
 def readNextEndLine(self, stream):
     line = b_("")
     while True:
         x = stream.read(1)
         stream.seek(-2, 1)
         if x == b_('\n') or x == b_('\r'):  # \n = LF; \r = CR
             crlf = False
             while x == b_('\n') or x == b_('\r'):
                 x = stream.read(1)
                 if x == b_('\n') or x == b_('\r'):  # account for CR+LF
                     stream.seek(-1, 1)
                     crlf = True
                 stream.seek(-2, 1)
             # if using CR+LF, go back 2 bytes, else 1
             stream.seek(2 if crlf else 1, 1)
             break
         else:
             line = x + line
     return line
Esempio n. 41
0
 def readNextEndLine(self, stream):
     line = b_("")
     while True:
         x = stream.read(1)
         stream.seek(-2, 1)
         if x == b_('\n') or x == b_('\r'):  # \n = LF; \r = CR
             crlf = False
             while x == b_('\n') or x == b_('\r'):
                 x = stream.read(1)
                 if x == b_('\n') or x == b_('\r'):  # account for CR+LF
                     stream.seek(-1, 1)
                     crlf = True
                 stream.seek(-2, 1)
             # if using CR+LF, go back 2 bytes, else 1
             stream.seek(2 if crlf else 1, 1)
             break
         else:
             line = x + line
     return line
Esempio n. 42
0
def _alg32(password, rev, keylen,
           owner_entry, p_entry, id1_entry, metadata_encrypt=True):
    # 1. Pad or truncate the password string to exactly 32 bytes.  If the
    # password string is more than 32 bytes long, use only its first 32 bytes;
    # if it is less than 32 bytes long, pad it by appending the required number
    # of additional bytes from the beginning of the padding string
    # (_encryption_padding).
    password = (password + _encryption_padding)[:32]
    # 2. Initialize the MD5 hash function and pass the result of step 1 as
    # input to this function.
    m = md5(password)
    # 3. Pass the value of the encryption dictionary's /O entry to the MD5 hash
    # function.
    m.update(owner_entry.original_bytes)
    # 4. Treat the value of the /P entry as an unsigned 4-byte integer and pass
    # these bytes to the MD5 hash function, low-order byte first.
    p_entry = struct.pack('<i', p_entry)
    m.update(p_entry)
    # 5. Pass the first element of the file's file identifier array to the MD5
    # hash function.
    m.update(id1_entry.original_bytes)
    # 6. (Revision 3 or greater) If document metadata is not being encrypted,
    # pass 4 bytes with the value 0xFFFFFFFF to the MD5 hash function.
    if rev >= 3 and not metadata_encrypt:
        m.update(b_("\xff\xff\xff\xff"))
    # 7. Finish the hash.
    md5_hash = m.digest()
    # 8. (Revision 3 or greater) Do the following 50 times: Take the output
    # from the previous MD5 hash and pass the first n bytes of the output as
    # input into a new MD5 hash, where n is the number of bytes of the
    # encryption key as defined by the value of the encryption dictionary's
    # /Length entry.
    if rev >= 3:
        for i in range(50):
            md5_hash = md5(md5_hash[:keylen]).digest()
    # 9. Set the encryption key to the first n bytes of the output from the
    # final MD5 hash, where n is always 5 for revision 2 but, for revision 3 or
    # greater, depends on the value of the encryption dictionary's /Length
    # entry.
    return md5_hash[:keylen]
Esempio n. 43
0
 def as_numeric(self):
     return int(b_(repr(self)))
Esempio n. 44
0
def readObject(stream, pdf):
    tok = stream.read(1)
    stream.seek(-1, 1) # reset to start
    if tok == b_('t') or tok == b_('f'):
        # boolean object
        return BooleanObject.readFromStream(stream)
    elif tok == b_('('):
        # string object
        return readStringFromStream(stream)
    elif tok == b_('/'):
        # name object
        return NameObject.readFromStream(stream)
    elif tok == b_('['):
        # array object
        return ArrayObject.readFromStream(stream, pdf)
    elif tok == b_('n'):
        # null object
        return NullObject.readFromStream(stream)
    elif tok == b_('<'):
        # hexadecimal string OR dictionary
        peek = stream.read(2)
        stream.seek(-2, 1) # reset to start
        if peek == b_('<<'):
            return DictionaryObject.readFromStream(stream, pdf)
        else:
            return readHexStringFromStream(stream)
    elif tok == b_('%'):
        # comment
        while tok not in (b_('\r'), b_('\n')):
            tok = stream.read(1)
        tok = readNonWhitespace(stream)
        stream.seek(-1, 1)
        return readObject(stream, pdf)
    else:
        # number object OR indirect reference
        if tok == b_('+') or tok == b_('-'):
            # number
            return NumberObject.readFromStream(stream)
        peek = stream.read(20)
        stream.seek(-len(peek), 1) # reset to start
        if re.match(b_(r"(\d+)\s(\d+)\sR[^a-zA-Z]"), peek) != None:
            return IndirectObject.readFromStream(stream, pdf)
        else:
            return NumberObject.readFromStream(stream)
Esempio n. 45
0
 def writeToStream(self, stream, encryption_key):
     if self.value:
         stream.write(b_("true"))
     else:
         stream.write(b_("false"))
Esempio n. 46
0
def readStringFromStream(stream):
    tok = stream.read(1)
    parens = 1
    txt = b_("")
    while True:
        tok = stream.read(1)
        if not tok:
            # stream has truncated prematurely
            raise PdfStreamError("Stream has ended unexpectedly")
        if tok == b_("("):
            parens += 1
        elif tok == b_(")"):
            parens -= 1
            if parens == 0:
                break
        elif tok == b_("\\"):
            tok = stream.read(1)
            if tok == b_("n"):
                tok = b_("\n")
            elif tok == b_("r"):
                tok = b_("\r")
            elif tok == b_("t"):
                tok = b_("\t")
            elif tok == b_("b"):
                tok = b_("\b")
            elif tok == b_("f"):
                tok = b_("\f")
            elif tok == b_("("):
                tok = b_("(")
            elif tok == b_(")"):
                tok = b_(")")
            elif tok == b_("\\"):
                tok = b_("\\")
            elif tok in (b_(" "), b_("/"), b_("%"), b_("<"), b_(">"), b_("["), b_("]")):
                # odd/unnessecary escape sequences we have encountered
                tok = b_(tok)
            elif tok.isdigit():
                # "The number ddd may consist of one, two, or three
                # octal digits; high-order overflow shall be ignored.
                # Three octal digits shall be used, with leading zeros
                # as needed, if the next character of the string is also
                # a digit." (PDF reference 7.3.4.2, p 16)
                for i in range(2):
                    ntok = stream.read(1)
                    if ntok.isdigit():
                        tok += ntok
                    else:
                        break
                tok = b_(chr(int(tok, base=8)))
            elif tok in b_("\n\r"):
                # This case is  hit when a backslash followed by a line
                # break occurs.  If it's a multi-char EOL, consume the
                # second character:
                tok = stream.read(1)
                if not tok in b_("\n\r"):
                    stream.seek(-1, 1)
                # Then don't add anything to the actual string, since this
                # line break was escaped:
                tok = b_('')
            else:
                raise utils.PdfReadError("Unexpected escaped string")
        txt += tok
    return createStringObject(txt)
Esempio n. 47
0
 def readFromStream(stream, pdf):
     debug = False
     tmp = stream.read(2)
     if tmp != b_("<<"):
         raise utils.PdfReadError, \
             ("Dictionary read error at byte %s: stream must begin with '<<'" % utils.hexStr(stream.tell()))
     data = {}
     while True:
         tok = readNonWhitespace(stream)
         if debug: print "Tok:", tok
         if tok == b_(">"):
             stream.read(1)
             break
         stream.seek(-1, 1)
         key = readObject(stream, pdf)
         tok = readNonWhitespace(stream)
         stream.seek(-1, 1)
         value = readObject(stream, pdf)
         if data.has_key(key):
             # multiple definitions of key not permitted
             raise utils.PdfReadError, ("Multiple definitions in dictionary at byte %s for key %s" \
                                        % (utils.hexStr(stream.tell()), key))
         data[key] = value
     pos = stream.tell()
     s = readNonWhitespace(stream)
     if s == b_('s') and stream.read(5) == b_('tream'):
         eol = stream.read(1)
         # odd PDF file output has spaces after 'stream' keyword but before EOL.
         # patch provided by Danial Sandler
         while eol == b_(' '):
             eol = stream.read(1)
         assert eol in (b_("\n"), b_("\r"))
         if eol == b_("\r"):
             # read \n after
             stream.read(1)
         # this is a stream object, not a dictionary
         assert data.has_key("/Length")
         length = data["/Length"]
         if debug: print data
         if isinstance(length, IndirectObject):
             t = stream.tell()
             length = pdf.getObject(length)
             stream.seek(t, 0)
         data["__streamdata__"] = stream.read(length)
         if debug: print "here"
         #if debug: print debugging.printAsHex(data["__streamdata__"])
         e = readNonWhitespace(stream)
         ndstream = stream.read(8)
         if (e + ndstream) != b_("endstream"):
             # (sigh) - the odd PDF file has a length that is too long, so
             # we need to read backwards to find the "endstream" ending.
             # ReportLab (unknown version) generates files with this bug,
             # and Python users into PDF files tend to be our audience.
             # we need to do this to correct the streamdata and chop off
             # an extra character.
             pos = stream.tell()
             stream.seek(-10, 1)
             end = stream.read(9)
             if end == b_("endstream"):
                 # we found it by looking back one character further.
                 data["__streamdata__"] = data["__streamdata__"][:-1]
             else:
                 # if debug: print "E", e, ndstream, debugging.toHex(end)
                 stream.seek(pos, 0)
                 raise utils.PdfReadError, \
                     ("Unable to find 'endstream' marker after stream at byte %s." % utils.hexStr(stream.tell()))
     else:
         stream.seek(pos, 0)
     if data.has_key("__streamdata__"):
         return StreamObject.initializeFromDictionary(data)
     else:
         retval = DictionaryObject()
         retval.update(data)
         return retval
Esempio n. 48
0
 def writeToStream(self, stream, encryption_key):
     stream.write(b_("%s %s R" % (self.idnum, self.generation)))
Esempio n. 49
0
 def writeToStream(self, stream, encryption_key):
     stream.write(b_("["))
     for data in self:
         stream.write(b_(" "))
         data.writeToStream(stream, encryption_key)
     stream.write(b_(" ]"))
Esempio n. 50
0
 def writeToStream(self, stream, encryption_key):
     stream.write(b_("null"))
Esempio n. 51
0
    def readFromStream(stream, pdf):
        debug = False
        tmp = stream.read(2)
        if tmp != b_("<<"):
            raise utils.PdfReadError, \
                ("Dictionary read error at byte %s: stream must begin with '<<'" % utils.hexStr(stream.tell()))
        data = {}
        while True:
            tok = readNonWhitespace(stream)
            if tok == b_('\x00'):
                continue
            if not tok:
                # stream has truncated prematurely
                raise PdfStreamError("Stream has ended unexpectedly")

            if debug: print "Tok:",tok
            if tok == b_(">"):
                stream.read(1)
                break
            stream.seek(-1, 1)
            key = readObject(stream, pdf)
            tok = readNonWhitespace(stream)
            stream.seek(-1, 1)
            value = readObject(stream, pdf)
            if not data.has_key(key):
                data[key] = value
        pos = stream.tell()
        s = readNonWhitespace(stream)
        if s == b_('s') and stream.read(5) == b_('tream'):
            eol = stream.read(1)
            # odd PDF file output has spaces after 'stream' keyword but before EOL.
            # patch provided by Danial Sandler
            while eol == b_(' '):
                eol = stream.read(1)
            assert eol in (b_("\n"), b_("\r"))
            if eol == b_("\r"):
                # read \n after
                if stream.read(1)  != '\n':
                    stream.seek(-1, 1)
            # this is a stream object, not a dictionary
            assert data.has_key("/Length")
            length = data["/Length"]
            if debug: print data
            if isinstance(length, IndirectObject):
                t = stream.tell()
                length = pdf.getObject(length)
                stream.seek(t, 0)
            data["__streamdata__"] = stream.read(length)
            if debug: print "here"
            #if debug: print debugging.printAsHex(data["__streamdata__"])
            e = readNonWhitespace(stream)
            ndstream = stream.read(8)
            if (e + ndstream) != b_("endstream"):
                # (sigh) - the odd PDF file has a length that is too long, so
                # we need to read backwards to find the "endstream" ending.
                # ReportLab (unknown version) generates files with this bug,
                # and Python users into PDF files tend to be our audience.
                # we need to do this to correct the streamdata and chop off
                # an extra character.
                pos = stream.tell()
                stream.seek(-10, 1)
                end = stream.read(9)
                if end == b_("endstream"):
                    # we found it by looking back one character further.
                    data["__streamdata__"] = data["__streamdata__"][:-1]
                else:
                    if pdf.strict == False:
                        warnings.warn("Ignoring missing endstream. This could affect PDF output.")
                        pass
                    else:
                        if debug: print "E", e, ndstream, debugging.toHex(end)
                        stream.seek(pos, 0)
                        raise utils.PdfReadError, \
                            ("Unable to find 'endstream' marker after stream at byte %s." % utils.hexStr(stream.tell()))
        else:
            stream.seek(pos, 0)
        if data.has_key("__streamdata__"):
            return StreamObject.initializeFromDictionary(data)
        else:
            retval = DictionaryObject()
            retval.update(data)
            return retval
Esempio n. 52
0
 def as_numeric(self):
     return float(b_(repr(self)))
Esempio n. 53
0
 def writeToStream(self, stream, encryption_key):
     stream.write(b_(repr(self)))
Esempio n. 54
0
def readObject(stream, pdf):
    tok = stream.read(1)
    stream.seek(-1, 1)  # reset to start
    if tok == b_('t') or tok == b_('f'):
        # boolean object
        return BooleanObject.readFromStream(stream)
    elif tok == b_('('):
        # string object
        return readStringFromStream(stream)
    elif tok == b_('/'):
        # name object
        return NameObject.readFromStream(stream)
    elif tok == b_('['):
        # array object
        return ArrayObject.readFromStream(stream, pdf)
    elif tok == b_('n'):
        # null object
        return NullObject.readFromStream(stream)
    elif tok == b_('<'):
        # hexadecimal string OR dictionary
        peek = stream.read(2)
        stream.seek(-2, 1)  # reset to start
        if peek == b_('<<'):
            return DictionaryObject.readFromStream(stream, pdf)
        else:
            return readHexStringFromStream(stream)
    elif tok == b_('%'):
        # comment
        while tok not in (b_('\r'), b_('\n')):
            tok = stream.read(1)
        tok = readNonWhitespace(stream)
        stream.seek(-1, 1)
        return readObject(stream, pdf)
    else:
        # number object OR indirect reference
        if tok == b_('+') or tok == b_('-'):
            # number
            return NumberObject.readFromStream(stream)
        peek = stream.read(20)
        stream.seek(-len(peek), 1)  # reset to start
        if re.match(b_(r"(\d+)\s(\d+)\sR[^a-zA-Z]"), peek) != None:
            return IndirectObject.readFromStream(stream, pdf)
        else:
            return NumberObject.readFromStream(stream)
Esempio n. 55
0
    def write(self, stream):
        externalReferenceMap = {}

        # PDF objects sometimes have circular references to their /Page objects
        # inside their object tree (for example, annotations).  Those will be
        # indirect references to objects that we've recreated in this PDF.  To
        # address this problem, PageObject's store their original object
        # reference number, and we add it to the external reference map before
        # we sweep for indirect references.  This forces self-page-referencing
        # trees to reference the correct new object location, rather than
        # copying in a new copy of the page object.
        for objIndex in xrange(len(self._objects)):
            obj = self._objects[objIndex]
            if isinstance(obj, PageObject) and obj.indirectRef is not None:
                data = obj.indirectRef
                externalReferenceMap.setdefault(data.pdf, {})
                externalReferenceMap[data.pdf].setdefault(data.generation, {})
                externalReferenceMap[data.pdf][data.generation][data.idnum] = \
                    IndirectObject(objIndex + 1, 0, self)

        self.stack = []
        self._sweepIndirectReferences(externalReferenceMap, self._root)
        del self.stack

        # Begin writing:
        object_positions = []
        stream.write(self._header + b_("\n"))
        for i in range(len(self._objects)):
            idnum = (i + 1)
            obj = self._objects[i]
            object_positions.append(stream.tell())
            stream.write(b_(str(idnum) + " 0 obj\n"))
            key = None
            if hasattr(self, "_encrypt") and idnum != self._encrypt.idnum:
                pack1 = struct.pack("<i", i + 1)[:3]
                pack2 = struct.pack("<i", 0)[:2]
                key = self._encrypt_key + pack1 + pack2
                assert len(key) == (len(self._encrypt_key) + 5)
                md5_hash = md5(key).digest()
                key = md5_hash[:min(16, len(self._encrypt_key) + 5)]
            if obj is not None:
                obj.writeToStream(stream, key)
                stream.write(b_("\nendobj\n"))

        # xref table
        xref_location = stream.tell()
        stream.write(b_("xref\n"))
        stream.write(b_("0 %s\n" % (len(self._objects) + 1)))
        stream.write(b_("%010d %05d f \n" % (0, 65535)))
        for offset in object_positions:
            stream.write(b_("%010d %05d n \n" % (offset, 0)))

        # trailer
        stream.write(b_("trailer\n"))
        trailer = DictionaryObject()
        trailer.update({
            NameObject("/Size"):
            NumberObject(len(self._objects) + 1),
            NameObject("/Root"):
            self._root,
            NameObject("/Info"):
            self._info
        })
        if hasattr(self, "_ID"):
            trailer[NameObject("/ID")] = self._ID
        if hasattr(self, "_encrypt"):
            trailer[NameObject("/Encrypt")] = self._encrypt
        trailer.writeToStream(stream, None)

        # eof
        stream.write(b_("\nstartxref\n%s\n%%%%EOF\n" % (xref_location)))
Esempio n. 56
0
 def writeToStream(self, stream, encryption_key):
     stream.write(b_(repr(self)))
Esempio n. 57
0
 def writeToStream(self, stream, encryption_key):
     stream.write(b_("null"))
Esempio n. 58
0
def readStringFromStream(stream):
    tok = stream.read(1)
    parens = 1
    txt = b_("")
    while True:
        tok = stream.read(1)
        if tok == b_("("):
            parens += 1
        elif tok == b_(")"):
            parens -= 1
            if parens == 0:
                break
        elif tok == b_("\\"):
            tok = stream.read(1)
            if tok == b_("n"):
                tok = b_("\n")
            elif tok == b_("r"):
                tok = b_("\r")
            elif tok == b_("t"):
                tok = b_("\t")
            elif tok == b_("b"):
                tok = b_("\b")
            elif tok == b_("f"):
                tok = b_("\f")
            elif tok == b_("("):
                tok = b_("(")
            elif tok == b_(")"):
                tok = b_(")")
            elif tok == b_("\\"):
                tok = b_("\\")
            elif tok.isdigit():
                # "The number ddd may consist of one, two, or three
                # octal digits; high-order overflow shall be ignored.
                # Three octal digits shall be used, with leading zeros
                # as needed, if the next character of the string is also
                # a digit." (PDF reference 7.3.4.2, p 16)
                for i in range(2):
                    ntok = stream.read(1)
                    if ntok.isdigit():
                        tok += ntok
                    else:
                        break
                tok = b_(chr(int(tok, base=8)))
            elif tok in b_("\n\r"):
                # This case is  hit when a backslash followed by a line
                # break occurs.  If it's a multi-char EOL, consume the
                # second character:
                tok = stream.read(1)
                if not tok in b_("\n\r"):
                    stream.seek(-1, 1)
                # Then don't add anything to the actual string, since this
                # line break was escaped:
                tok = b_('')
            else:
                raise utils.PdfReadError("Unexpected escaped string")
        txt += tok
    return createStringObject(txt)