def encryptionkey(password, OwnerKey, Permissions, FileId1, revision=2): # FileId1 is first string of the fileid array # add padding string #AR force same as iText example #Permissions = -1836 #int(Permissions - 2**31) password = asBytes(password) + PadString # truncate to 32 bytes password = password[:32] # translate permissions to string, low order byte first p = Permissions # + 2**32L permissionsString = b"" for i in range(4): byte = (p & 0xff) # seems to match what iText does p = p >> 8 permissionsString += int2Byte(byte % 256) hash = md5(asBytes(password)) hash.update(asBytes(OwnerKey)) hash.update(asBytes(permissionsString)) hash.update(asBytes(FileId1)) md5output = hash.digest() if revision == 2: key = md5output[:5] elif revision == 3: #revision 3 algorithm - loop 50 times for x in range(50): md5output = md5(md5output).digest() key = md5output[:16] if DEBUG: print('encryptionkey(%s,%s,%s,%s,%s)==>%s' % tuple([ hexText(str(x)) for x in (password, OwnerKey, Permissions, FileId1, revision, key) ])) return key
def computeO(userPassword, ownerPassword, revision): from reportlab.lib.arciv import ArcIV #print 'digest of hello is %s' % md5('hello').digest() assert revision in (2,3), 'Unknown algorithm revision %s' % revision if not ownerPassword: ownerPassword = userPassword ownerPad = asBytes(ownerPassword) + PadString ownerPad = ownerPad[0:32] password = asBytes(userPassword) + PadString userPad = password[:32] digest = md5(ownerPad).digest() if DEBUG: print('PadString=%s\nownerPad=%s\npassword=%s\nuserPad=%s\ndigest=%s\nrevision=%s' % (ascii(PadString),ascii(ownerPad),ascii(password),ascii(userPad),ascii(digest),revision)) if revision == 2: O = ArcIV(digest[:5]).encode(userPad) elif revision == 3: for i in range(50): digest = md5(digest).digest() digest = digest[:16] O = userPad for i in range(20): thisKey = xorKey(i, digest) O = ArcIV(thisKey).encode(O) if DEBUG: print('computeO(%s,%s,%s)==>%s' % tuple([hexText(str(x)) for x in (userPassword, ownerPassword, revision,O)])) return O
def computeO(userPassword, ownerPassword, revision): from reportlab.lib.arciv import ArcIV #print 'digest of hello is %s' % md5('hello').digest() assert revision in (2, 3), 'Unknown algorithm revision %s' % revision if not ownerPassword: ownerPassword = userPassword ownerPad = asBytes(ownerPassword) + PadString ownerPad = ownerPad[0:32] password = asBytes(userPassword) + PadString userPad = password[:32] digest = md5(ownerPad).digest() if DEBUG: print( 'PadString=%s\nownerPad=%s\npassword=%s\nuserPad=%s\ndigest=%s\nrevision=%s' % (ascii(PadString), ascii(ownerPad), ascii(password), ascii(userPad), ascii(digest), revision)) if revision == 2: O = ArcIV(digest[:5]).encode(userPad) elif revision == 3: for i in range(50): digest = md5(digest).digest() digest = digest[:16] O = userPad for i in range(20): thisKey = xorKey(i, digest) O = ArcIV(thisKey).encode(O) if DEBUG: print('computeO(%s,%s,%s)==>%s' % tuple([ hexText(str(x)) for x in (userPassword, ownerPassword, revision, O) ])) return O
def encryptionkey(password, OwnerKey, Permissions, FileId1, revision=2): # FileId1 is first string of the fileid array # add padding string #AR force same as iText example #Permissions = -1836 #int(Permissions - 2**31) password = asBytes(password) + PadString # truncate to 32 bytes password = password[:32] # translate permissions to string, low order byte first p = Permissions# + 2**32L permissionsString = b"" for i in range(4): byte = (p & 0xff) # seems to match what iText does p = p>>8 permissionsString += int2Byte(byte % 256) hash = md5(asBytes(password)) hash.update(asBytes(OwnerKey)) hash.update(asBytes(permissionsString)) hash.update(asBytes(FileId1)) md5output = hash.digest() if revision==2: key = md5output[:5] elif revision==3: #revision 3 algorithm - loop 50 times for x in range(50): md5output = md5(md5output).digest() key = md5output[:16] if DEBUG: print('encryptionkey(%s,%s,%s,%s,%s)==>%s' % tuple([hexText(str(x)) for x in (password, OwnerKey, Permissions, FileId1, revision, key)])) return key
def test17(self): self.assertEqual(asUnicode(u'abc'),u'abc') self.assertEqual(asUnicode(b'abc'),u'abc') self.assertRaises(AttributeError,asUnicode,['abc']) self.myAssertRaisesRegex(AttributeError,r"asUnicode\(.*'list' object has no attribute 'decode'", asUnicode,['abc']) self.assertEqual(asUnicodeEx(u'abc'),u'abc') self.assertEqual(asUnicodeEx(b'abc'),u'abc') self.assertEqual(asUnicodeEx(123),u'123') self.assertEqual(asBytes(u'abc'),b'abc') self.assertEqual(asBytes(b'abc'),b'abc') self.assertRaises(AttributeError,asBytes,['abc']) self.myAssertRaisesRegex(AttributeError,"asBytes\(.*'list' object has no attribute 'encode'", asBytes,['abc'])
def create_pdf(catalog, template): """Creates PDF as a binary stream in memory, and returns it This can then be used to write to disk from management commands or crons, or returned to caller via Django views. """ RML_DIR = 'rml' templateName = os.path.join(RML_DIR, template) template = preppy.getModule(templateName) namespace = {'products': catalog, 'RML_DIR': RML_DIR, 'IMG_DIR': 'img'} rml = template.getOutput(namespace, quoteFunc=preppy.stdQuote) open(os.path.join(DATA_DIR, 'latest.rml'), 'wb').write(asBytes(rml)) buf = getBytesIO() rml2pdf.go(asBytes(rml), outputFileName=buf) return buf.getvalue()
def pygments2xpre(s, language="python"): "Return markup suitable for XPreformatted" try: from pygments import highlight from pygments.formatters import HtmlFormatter except ImportError: return s from pygments.lexers import get_lexer_by_name rconv = lambda x: x if isPy3: out = getStringIO() else: if isUnicode(s): s = asBytes(s) rconv = asUnicode out = getBytesIO() l = get_lexer_by_name(language) h = HtmlFormatter() highlight(s,l,h,out) styles = [(cls, style.split(';')[0].split(':')[1].strip()) for cls, (style, ttype, level) in h.class2style.items() if cls and style and style.startswith('color:')] return rconv(_2xpre(out.getvalue(),styles))
def _getCacheFileName(self): """Base this on the directories...same set of directories should give same cache""" fsEncoding = self._fsEncoding hash = md5(b''.join(asBytes(_,enc=fsEncoding) for _ in sorted(self._dirs))).hexdigest() from reportlab.lib.utils import get_rl_tempfile fn = get_rl_tempfile('fonts_%s.dat' % hash) return fn
def _getCacheFileName(self): """Base this on the directories...same set of directories should give same cache""" fsEncoding = self._fsEncoding hash = md5(b''.join( asBytes(_, enc=fsEncoding) for _ in sorted(self._dirs))).hexdigest() from reportlab.lib.utils import get_rl_tempfile fn = get_rl_tempfile('fonts_%s.dat' % hash) return fn
def testAsciiBase85(self): "Test if the obvious test for whether ASCII-Base85 encoding works." msg = "Round-trip AsciiBase85 encoding failed." plain = 'What is the average velocity of a sparrow?' #the remainder block can be absent or from 1 to 4 bytes for i in xrange(256): encoded = asciiBase85Encode(plain) decoded = asciiBase85Decode(encoded) assert decoded == asBytes(plain, 'latin1'), msg plain += chr(i)
def testAsciiBase85(self): "Test if the obvious test for whether ASCII-Base85 encoding works." msg = "Round-trip AsciiBase85 encoding failed." plain = 'What is the average velocity of a sparrow?' #the remainder block can be absent or from 1 to 4 bytes for i in xrange(256): encoded = asciiBase85Encode(plain) decoded = asciiBase85Decode(encoded) assert decoded == asBytes(plain,'latin1'), msg plain += chr(i)
def _escape_and_limit(s): s = asBytes(s) R = [] aR = R.append n = 0 for c in s: c = _ESCAPEDICT[char2int(c)] aR(c) n += len(c) if n>=200: n = 0 aR('\\\n') return ''.join(R)
def _escape_and_limit(s): s = asBytes(s) R = [] aR = R.append n = 0 for c in s: c = _ESCAPEDICT[char2int(c)] aR(c) n += len(c) if n >= 200: n = 0 aR('\\\n') return ''.join(R)
def encodePDF(key, objectNumber, generationNumber, string, revision=None): "Encodes a string or stream" revision = checkRevision(revision) #print 'encodePDF (%s, %d, %d, %s)' % (hexText(key), objectNumber, generationNumber, string) # extend 3 bytes of the object Number, low byte first if revision in (2, 3): newkey = key n = objectNumber for i in range(3): newkey += int2Byte(n & 0xff) n = n >> 8 # extend 2 bytes of the generationNumber n = generationNumber for i in range(2): newkey += int2Byte(n & 0xff) n = n >> 8 md5output = md5(newkey).digest() if revision == 2: key = md5output[:10] elif revision == 3: key = md5output #all 16 bytes from reportlab.lib.arciv import ArcIV encrypted = ArcIV(key).encode(string) #print 'encrypted=', hexText(encrypted) elif revision == 5: iv = os_urandom(16) encrypter = pyaes.Encrypter(pyaes.AESModeOfOperationCBC(key, iv=iv)) # pkcs7 style padding so that the size of the encrypted block is multiple of 16 string_len = len(string) padding = "" padding_len = (16 - (string_len % 16)) if string_len > 16 else (16 - string_len) if padding_len > 0: padding = chr(padding_len) * padding_len if isinstance(string, str): string = (string + padding).encode("utf-8") else: string += asBytes(padding) encrypted = iv + encrypter.feed(string) encrypted += encrypter.feed() if DEBUG: print('encodePDF(%s,%s,%s,%s,%s)==>%s' % tuple([ hexText(str(x)) for x in (key, objectNumber, generationNumber, string, revision, encrypted) ])) return encrypted
def getFamilyNames(self): "Returns a list of the distinct font families found" if not self._fontsByFamily: fonts = self._fonts for font in fonts: fam = font.familyName if fam is None: continue if fam in self._fontsByFamily: self._fontsByFamily[fam].append(font) else: self._fontsByFamily[fam] = [font] fsEncoding = self._fsEncoding names = list(asBytes(_,enc=fsEncoding) for _ in self._fontsByFamily.keys()) names.sort() return names
def getFamilyNames(self): "Returns a list of the distinct font families found" if not self._fontsByFamily: fonts = self._fonts for font in fonts: fam = font.familyName if fam is None: continue if fam in self._fontsByFamily: self._fontsByFamily[fam].append(font) else: self._fontsByFamily[fam] = [font] fsEncoding = self._fsEncoding names = list( asBytes(_, enc=fsEncoding) for _ in self._fontsByFamily.keys()) names.sort() return names
def testAsciiBase85RoundTrip(self): plain = 'What is the average velocity of a sparrow?' eFuncs = getFuncs('asciiBase85Encode') for i in xrange(256): for j,(dfunc, kind) in enumerate(getFuncs('asciiBase85Decode')): efunc = eFuncs[j][0] encoded = efunc(plain) decoded = dfunc(encoded) assert decoded == asBytes(plain,'latin1'), "Round-trip AsciiBase85 failed for %s & %s\nplain=%s\nencoded=%s\ndecoded=%s" % ( ascii(efunc),ascii(dfunc), ascii(plain), ascii(encoded), ascii(decoded)) if not j: enc0 = encoded dec0 = decoded else: assert encoded==enc0, " Python & C encodings differ failed for %s & %s\nplain=%s\nencode0=%s\nencoded=%s\ndecode0=%sdecoded=%s" % ( ascii(efunc),ascii(dfunc), ascii(plain), ascii(enc0), ascii(encoded), ascii(dec0), ascii(decoded)) assert decoded==dec0, " Python & C decodings differ failed for %s & %s\nplain=%s\nencode0=%s\nencoded=%s\ndecode0=%sdecoded=%s" % ( ascii(efunc),ascii(dfunc), ascii(plain), ascii(enc0), ascii(encoded), ascii(dec0), ascii(decoded)) plain += chr(i)
def unicodeToUTF8(tree): tagName, attrs, children, spare = tree newTagName = asBytes(tagName) if attrs is None: newAttrs = None else: newAttrs = {} for key, value in list(attrs.items()): newAttrs[key.encode('utf8')] = value.encode('utf8') if children is None: newChildren = None else: newChildren = [] for child in children: if isinstance(child,bytesT): newChildren.append(child.encode('utf8')) elif isinstance(child,unicodeT): newChildren.append(child) else: newChildren.append(unicodeToUTF8(child)) return (newTagName, newAttrs, newChildren, spare)
def unicodeToUTF8(tree): tagName, attrs, children, spare = tree newTagName = asBytes(tagName) if attrs is None: newAttrs = None else: newAttrs = {} for key, value in list(attrs.items()): newAttrs[key.encode('utf8')] = value.encode('utf8') if children is None: newChildren = None else: newChildren = [] for child in children: if isinstance(child, bytesT): newChildren.append(child.encode('utf8')) elif isinstance(child, unicodeT): newChildren.append(child) else: newChildren.append(unicodeToUTF8(child)) return (newTagName, newAttrs, newChildren, spare)
def _processLine(line, sep=',', conv=0): if isUnicode(line): space = u' ' dquot = u'"' empty = u'' speol = u' \r\n' sep = asUnicode(sep) else: space = b' ' dquot = b'"' empty = b'' speol = b' \r\n' sep = asBytes(sep) fields = [] p = 0 ll = len(line) ls = len(sep) line += space while (ll > 0 and (line[ll-1] in speol)): ll -= 1 while p < ll: #Skip unquoted space at the start of a field while p<ll and line[p]==space: p += 1 field = empty ql = 0 while p < ll: #Skip unquoted space at the end of a field if ql == 0 and line[p] == space: q = p while q < ll and line[q] == space: q += 1 if q >= ll: break elif line[q:q+ls] == sep: p = q if ql == 0 and line[p:p+ls] == sep: break elif line[p:p+1] == dquot: if ql == 0: ql = 1 elif line[p+1:p+2]==dquot: field += dquot p += 1 else: ql = 0 else: field += line[p:p+1] p += 1 p += ls if conv: try: fields.append(int(field)) except ValueError: try: fields.append(float(field)) except ValueError: fields.append(field) else: fields.append(field) if line[ll-ls:ll]==sep: fields.append(empty) #extra field when there's a separator at the end return fields
def bytes3(x): if isinstance(x, basestring): return asBytes(x) else: return b''.join([chr(k) for k in x])
def prepare(self, document, overrideID=None): # get ready to do encryption if DEBUG: print('StandardEncryption.prepare(...) - revision %d' % self.revision) if self.prepared: raise ValueError("encryption already prepared!") # get the unescaped string value of the document id (first array element). # we allow one to be passed in instead to permit reproducible tests # of our algorithm, but in real life overrideID will always be None if overrideID: internalID = overrideID else: externalID = document.ID() # initialize it... internalID = document.signature.digest() #AR debugging if CLOBBERID: internalID = "xxxxxxxxxxxxxxxx" if DEBUG: print('userPassword = %r' % self.userPassword) print('ownerPassword = %r' % self.ownerPassword) print('internalID = %r' % internalID) self.P = int(self.permissionBits() - 2**31) if CLOBBERPERMISSIONS: self.P = -44 # AR hack if DEBUG: print("self.P = %s" % repr(self.P)) if self.revision == 5: # Init vectro for AES cipher (should be 16 bytes null array) iv = b'\x00' * 16 # Random User salts uvs = os_urandom(8) uks = os_urandom(8) # the main encryption key self.key = asBytes(os_urandom(32)) if DEBUG: print("uvs (hex) = %s" % hexText(uvs)) print("uks (hex) = %s" % hexText(uks)) print("self.key (hex) = %s" % hexText(self.key)) # Calculate the sha-256 hash of the User password (U) md = sha256(asBytes(self.userPassword[:127]) + uvs) self.U = md.digest() + uvs + uks if DEBUG: print("self.U (hex) = %s" % hexText(self.U)) # Calculate the User encryption key (UE) md = sha256(asBytes(self.userPassword[:127]) + uks) encrypter = pyaes.Encrypter( pyaes.AESModeOfOperationCBC(md.digest(), iv=iv)) self.UE = encrypter.feed(self.key) self.UE += encrypter.feed() if DEBUG: print("self.UE (hex) = %s" % hexText(self.UE)) # Random Owner salts ovs = os_urandom(8) oks = os_urandom(8) # Calculate the hash of the Owner password (U) md = sha256(asBytes(self.ownerPassword[:127]) + ovs + self.U) self.O = md.digest() + ovs + oks if DEBUG: print("self.O (hex) = %s" % hexText(self.O)) # Calculate the User encryption key (OE) md = sha256(asBytes(self.ownerPassword[:127]) + oks + self.U) encrypter = pyaes.Encrypter( pyaes.AESModeOfOperationCBC(md.digest(), iv=iv)) self.OE = encrypter.feed(self.key) self.OE += encrypter.feed() if DEBUG: print("self.OE (hex) = %s" % hexText(self.OE)) # Compute permissions array permsarr = [ self.P & 0xFF, # store the permission value in the first 32-bits self.P >> 8 & 0xFF, self.P >> 16 & 0xFF, self.P >> 24 & 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, ord( 'T' ), # 'T' if EncryptMetaData is True (default), 'F' otherwise ord('a'), # a, d, b are magic values ord('d'), ord('b'), 0x01, # trailing zeros will be ignored 0x01, 0x01, 0x01 ] # the permission array should be enrypted in the Perms field encrypter = pyaes.Encrypter( pyaes.AESModeOfOperationCBC(self.key, iv=iv)) self.Perms = encrypter.feed(bytes3(permsarr)) self.Perms += encrypter.feed() if DEBUG: print("self.Perms (hex) = %s" % hexText(self.Perms)) elif self.revision in (2, 3): self.O = computeO(self.userPassword, self.ownerPassword, self.revision) if DEBUG: print("self.O (as hex) = %s" % hexText(self.O)) #print "\nself.O", self.O, repr(self.O) self.key = encryptionkey(self.userPassword, self.O, self.P, internalID, revision=self.revision) if DEBUG: print("self.key (hex) = %s" % hexText(self.key)) self.U = computeU(self.key, revision=self.revision, documentId=internalID) if DEBUG: print("self.U (as hex) = %s" % hexText(self.U)) self.objnum = self.version = None self.prepared = 1
def _AsciiHexEncode(self, input): # also based on piddlePDF "Helper function used by images" output = getStringIO() for char in asBytes(input): output.write('%02x' % char2int(char)) return output.getvalue()
def value(self,v): self._value = asBytes(v) self._recalc = True
def asciiBase85Decode(input): """Decodes input using ASCII-Base85 coding. This is not normally used - Acrobat Reader decodes for you - but a round trip is essential for testing.""" #strip all whitespace stripped = ''.join(asNative(input).split()) #check end assert stripped[-2:] == '~>', 'Invalid terminator for Ascii Base 85 Stream' stripped = stripped[:-2] #chop off terminator #may have 'z' in it which complicates matters - expand them stripped = stripped.replace('z','!!!!!') # special rules apply if not a multiple of five bytes. whole_word_count, remainder_size = divmod(len(stripped), 5) #print '%d words, %d leftover' % (whole_word_count, remainder_size) #assert remainder_size != 1, 'invalid Ascii 85 stream!' cut = 5 * whole_word_count body, lastbit = stripped[0:cut], stripped[cut:] out = [].append for i in range(whole_word_count): offset = i*5 c1 = ord(body[offset]) - 33 c2 = ord(body[offset+1]) - 33 c3 = ord(body[offset+2]) - 33 c4 = ord(body[offset+3]) - 33 c5 = ord(body[offset+4]) - 33 num = ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5 temp, b4 = divmod(num,256) temp, b3 = divmod(temp,256) b1, b2 = divmod(temp, 256) assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!' out(chr(b1)) out(chr(b2)) out(chr(b3)) out(chr(b4)) #decode however many bytes we have as usual if remainder_size > 0: while len(lastbit) < 5: lastbit = lastbit + '!' c1 = ord(lastbit[0]) - 33 c2 = ord(lastbit[1]) - 33 c3 = ord(lastbit[2]) - 33 c4 = ord(lastbit[3]) - 33 c5 = ord(lastbit[4]) - 33 num = (((85*c1+c2)*85+c3)*85+c4)*85 + (c5 +(0,0,0xFFFFFF,0xFFFF,0xFF)[remainder_size]) temp, b4 = divmod(num,256) temp, b3 = divmod(temp,256) b1, b2 = divmod(temp, 256) assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!' #print 'decoding: %d %d %d %d %d -> %d -> %d %d %d %d' % ( # c1,c2,c3,c4,c5,num,b1,b2,b3,b4) #the last character needs 1 adding; the encoding loses #data by rounding the number to x bytes, and when #divided repeatedly we get one less if remainder_size == 2: lastword = chr(b1) elif remainder_size == 3: lastword = chr(b1) + chr(b2) elif remainder_size == 4: lastword = chr(b1) + chr(b2) + chr(b3) else: lastword = '' out(lastword) r = ''.join(out.__self__) return asBytes(r,enc='latin1')
def asciiBase85Decode(input): """Decodes input using ASCII-Base85 coding. This is not normally used - Acrobat Reader decodes for you - but a round trip is essential for testing.""" #strip all whitespace stripped = ''.join(asNative(input).split()) #check end assert stripped[ -2:] == '~>', 'Invalid terminator for Ascii Base 85 Stream' stripped = stripped[:-2] #chop off terminator #may have 'z' in it which complicates matters - expand them stripped = stripped.replace('z', '!!!!!') # special rules apply if not a multiple of five bytes. whole_word_count, remainder_size = divmod(len(stripped), 5) #print '%d words, %d leftover' % (whole_word_count, remainder_size) #assert remainder_size != 1, 'invalid Ascii 85 stream!' cut = 5 * whole_word_count body, lastbit = stripped[0:cut], stripped[cut:] out = [].append for i in range(whole_word_count): offset = i * 5 c1 = ord(body[offset]) - 33 c2 = ord(body[offset + 1]) - 33 c3 = ord(body[offset + 2]) - 33 c4 = ord(body[offset + 3]) - 33 c5 = ord(body[offset + 4]) - 33 num = ((85**4) * c1) + ((85**3) * c2) + ( (85**2) * c3) + (85 * c4) + c5 temp, b4 = divmod(num, 256) temp, b3 = divmod(temp, 256) b1, b2 = divmod(temp, 256) assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!' out(chr(b1)) out(chr(b2)) out(chr(b3)) out(chr(b4)) #decode however many bytes we have as usual if remainder_size > 0: while len(lastbit) < 5: lastbit = lastbit + '!' c1 = ord(lastbit[0]) - 33 c2 = ord(lastbit[1]) - 33 c3 = ord(lastbit[2]) - 33 c4 = ord(lastbit[3]) - 33 c5 = ord(lastbit[4]) - 33 num = (((85 * c1 + c2) * 85 + c3) * 85 + c4) * 85 + ( c5 + (0, 0, 0xFFFFFF, 0xFFFF, 0xFF)[remainder_size]) temp, b4 = divmod(num, 256) temp, b3 = divmod(temp, 256) b1, b2 = divmod(temp, 256) assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!' #print 'decoding: %d %d %d %d %d -> %d -> %d %d %d %d' % ( # c1,c2,c3,c4,c5,num,b1,b2,b3,b4) #the last character needs 1 adding; the encoding loses #data by rounding the number to x bytes, and when #divided repeatedly we get one less if remainder_size == 2: lastword = chr(b1) elif remainder_size == 3: lastword = chr(b1) + chr(b2) elif remainder_size == 4: lastword = chr(b1) + chr(b2) + chr(b3) else: lastword = '' out(lastword) r = ''.join(out.__self__) return asBytes(r, enc='latin1')