def initialize(self, password=''): if not self.encryption: self.is_printable = self.is_modifiable = self.is_extractable = True self._initialized = True return (docid, param) = self.encryption if literal_name(param['Filter']) != 'Standard': raise PDFEncryptionError('Unknown filter: param=%r' % param) V = int_value(param.get('V', 0)) if not (V == 1 or V == 2): raise PDFEncryptionError('Unknown algorithm: param=%r' % param) length = int_value(param.get('Length', 40)) # Key length (bits) O = str_value(param['O']) R = int_value(param['R']) # Revision if 5 <= R: raise PDFEncryptionError('Unknown revision: %r' % R) U = str_value(param['U']) P = int_value(param['P']) self.is_printable = bool(P & 4) self.is_modifiable = bool(P & 8) self.is_extractable = bool(P & 16) # Algorithm 3.2 password = (password + self.PASSWORD_PADDING)[:32] # 1 hash = md5.md5(password) # 2 hash.update(O) # 3 hash.update(struct.pack('<l', P)) # 4 hash.update(docid[0]) # 5 if 4 <= R: # 6 raise PDFNotImplementedError( 'Revision 4 encryption is currently unsupported') if 3 <= R: # 8 for _ in xrange(50): hash = md5.md5(hash.digest()[:length / 8]) key = hash.digest()[:length / 8] if R == 2: # Algorithm 3.4 u1 = Arcfour(key).process(self.PASSWORD_PADDING) elif R == 3: # Algorithm 3.5 hash = md5.md5(self.PASSWORD_PADDING) # 2 hash.update(docid[0]) # 3 x = Arcfour(key).process(hash.digest()[:16]) # 4 for i in xrange(1, 19 + 1): k = ''.join(chr(ord(c) ^ i) for c in key) x = Arcfour(k).process(x) u1 = x + x # 32bytes total if R == 2: is_authenticated = (u1 == U) else: is_authenticated = (u1[:16] == U[:16]) if not is_authenticated: raise PDFPasswordIncorrect self.decrypt_key = key self.decipher = self.decrypt_rc4 # XXX may be AES self._initialized = True return
def decrypt_rc4(self, objid, genno, data): key = self.decrypt_key + struct.pack('<L', objid)[:3] + struct.pack( '<L', genno)[:2] hash = md5.md5(key) key = hash.digest()[:min(len(key), 16)] return Arcfour(key).process(data)