def disassemble(aCode): codelen = len(aCode) pc = 0 res = [] while pc < codelen: opcode = byteord(aCode[pc:pc + 1]) if opcode > len(aCode_info): instr = aCode_info[0] else: instr = aCode_info[opcode] pc += 1 if instr[1] != 0 and pc >= codelen: return res if instr[1] == -1: count = byteord(aCode[pc]) fmt = "%dB" % count pc += 1 elif instr[1] == 0: fmt = "" else: fmt = instr[1] if fmt == "": res.append(instr[0]) continue parms = struct.unpack_from(fmt, aCode[pc:]) res.append(instr[0] + "(" + ", ".join(map(str, parms)) + ")") pc += struct.calcsize(fmt) return res
def readLWFN(path, onlyHeader=False): """reads an LWFN font file, returns raw data""" from fontTools.misc.macRes import ResourceReader reader = ResourceReader(path) try: data = [] for res in reader.get('POST', []): code = byteord(res.data[0]) if byteord(res.data[1]) != 0: raise T1Error('corrupt LWFN file') if code in [1, 2]: if onlyHeader and code == 2: break data.append(res.data[2:]) elif code in [3, 5]: break elif code == 4: with open(path, "rb") as f: data.append(f.read()) elif code == 0: pass # comment, ignore else: raise T1Error('bad chunk code: ' + repr(code)) finally: reader.close() data = bytesjoin(data) assertType1(data) return data
def decompile(self, data, ttFont): dummy, rest = sstruct.unpack2(SINGFormat, data, self) self.uniqueName = self.decompileUniqueName(self.uniqueName) self.nameLength = byteord(self.nameLength) assert len(rest) == self.nameLength self.baseGlyphName = tostr(rest) rawMETAMD5 = self.METAMD5 self.METAMD5 = "[" + hex(byteord(self.METAMD5[0])) for char in rawMETAMD5[1:]: self.METAMD5 = self.METAMD5 + ", " + hex(byteord(char)) self.METAMD5 = self.METAMD5 + "]"
def decompile(self, data, ttFont): numGlyphs = ttFont['maxp'].numGlyphs glyphOrder = ttFont.getGlyphOrder() dummy, data = sstruct.unpack2(hdmxHeaderFormat, data, self) self.hdmx = {} for i in range(self.numRecords): ppem = byteord(data[0]) maxSize = byteord(data[1]) widths = _GlyphnamedList(ttFont.getReverseGlyphMap(), array.array("B", data[2:2 + numGlyphs])) self.hdmx[ppem] = widths data = data[self.recordSize:] assert len(data) == 0, "too much hdmx data"
def getRow(self, row, bitDepth=1, metrics=None, reverseBytes=False): if metrics is None: metrics = self.metrics assert 0 <= row and row < metrics.height, "Illegal row access in bitmap" # Loop through each byte. This can cover two bytes in the original data or # a single byte if things happen to be aligned. The very last entry might # not be aligned so take care to trim the binary data to size and pad with # zeros in the row data. Bit aligned data is somewhat tricky. # # Example of data cut. Data cut represented in x's. # '|' represents byte boundary. # data = ...0XX|XXXXXX00|000... => XXXXXXXX # or # data = ...0XX|XXXX0000|000... => XXXXXX00 # or # data = ...000|XXXXXXXX|000... => XXXXXXXX # or # data = ...000|00XXXX00|000... => XXXX0000 # dataList = [] bitRange = self._getBitRange(row, bitDepth, metrics) stepRange = bitRange + (8, ) for curBit in range(*stepRange): endBit = min(curBit + 8, bitRange[1]) numBits = endBit - curBit cutPoint = curBit % 8 firstByteLoc = curBit // 8 secondByteLoc = endBit // 8 if firstByteLoc < secondByteLoc: numBitsCut = 8 - cutPoint else: numBitsCut = endBit - curBit curByte = _reverseBytes(self.imageData[firstByteLoc]) firstHalf = byteord(curByte) >> cutPoint firstHalf = ((1 << numBitsCut) - 1) & firstHalf newByte = firstHalf if firstByteLoc < secondByteLoc and secondByteLoc < len( self.imageData): curByte = _reverseBytes(self.imageData[secondByteLoc]) secondHalf = byteord(curByte) << numBitsCut newByte = (firstHalf | secondHalf) & ((1 << numBits) - 1) dataList.append(bytechr(newByte)) # The way the data is kept is opposite the algorithm used. data = bytesjoin(dataList) if not reverseBytes: data = _reverseBytes(data) return data
def setRows(self, dataRows, bitDepth=1, metrics=None, reverseBytes=False): if metrics is None: metrics = self.metrics if not reverseBytes: dataRows = list(map(_reverseBytes, dataRows)) # Keep track of a list of ordinal values as they are easier to modify # than a list of strings. Map to actual strings later. numBytes = (self._getBitRange(len(dataRows), bitDepth, metrics)[0] + 7) // 8 ordDataList = [0] * numBytes for row, data in enumerate(dataRows): bitRange = self._getBitRange(row, bitDepth, metrics) stepRange = bitRange + (8, ) for curBit, curByte in zip(range(*stepRange), data): endBit = min(curBit + 8, bitRange[1]) cutPoint = curBit % 8 firstByteLoc = curBit // 8 secondByteLoc = endBit // 8 if firstByteLoc < secondByteLoc: numBitsCut = 8 - cutPoint else: numBitsCut = endBit - curBit curByte = byteord(curByte) firstByte = curByte & ((1 << numBitsCut) - 1) ordDataList[firstByteLoc] |= (firstByte << cutPoint) if firstByteLoc < secondByteLoc and secondByteLoc < numBytes: secondByte = (curByte >> numBitsCut) & ( (1 << 8 - numBitsCut) - 1) ordDataList[secondByteLoc] |= secondByte # Save the image data with the bits going the correct way. self.imageData = _reverseBytes(bytesjoin(map(bytechr, ordDataList)))
def hintOn(i, hintMaskBytes): # used to add the active hints to the bez string, # when a T2 hintmask operator is encountered. byteIndex = i // 8 byteValue = byteord(hintMaskBytes[byteIndex]) offset = 7 - (i % 8) return ((2**offset) & byteValue) > 0
def hexStr(s): h = string.hexdigits r = '' for c in s: i = byteord(c) r = r + h[(i >> 4) & 0xF] + h[i & 0xF] return r
def stringToLong(s): if len(s) != 4: raise ValueError('string must be 4 bytes long') l = 0 for i in range(4): l += byteord(s[i]) << (i * 8) return l
def doMask(self, index, bezCommand): args = [] if not self.hintMaskBytes: args = self.popallWidth() if args: self.vhints = [] self.updateHints(args, self.vhints, "ry") self.hintMaskBytes = int((self.hintCount + 7) / 8) self.hintMaskString, index = self.callingStack[-1].getBytes( index, self.hintMaskBytes) if self.read_hints: curhhints, curvhints = self.getCurHints(self.hintMaskString) strout = "" mask = [strout + hex(byteord(ch)) for ch in self.hintMaskString] log.debug("%s %s %s %s %s", bezCommand, mask, curhhints, curvhints, args) self.bezProgram.append("beginsubr snc\n") for i, hint in enumerate(curhhints): self.bezProgram.append("%s " % hint) if i % 2: self.bezProgram.append("rb\n") for i, hint in enumerate(curvhints): self.bezProgram.append("%s " % hint) if i % 2: self.bezProgram.append("ry\n") self.bezProgram.extend(["endsubr enc\n", "newcolors\n"]) return self.hintMaskString, index
def doMask(self, index, bezCommand): args = [] if not self.hintMaskBytes: args = self.popallWidth() if args: self.vhints = [] self.updateHints(args, self.vhints, "ry") self.hintMaskBytes = (self.hintCount + 7) // 8 self.hintMaskString, index = self.callingStack[-1].getBytes(index, self.hintMaskBytes) if not self.removeHints: curhhints, curvhints = self.getCurHints( self.hintMaskString) strout = "" mask = [strout + hex(byteord(ch)) for ch in self.hintMaskString] debugMsg(bezCommand, mask, curhhints, curvhints, args) self.bezProgram.append("beginsubr snc\n") i = 0 for hint in curhhints: self.bezProgram.append(str(hint)) if i %2: self.bezProgram.append("rb\n") i +=1 i = 0 for hint in curvhints: self.bezProgram.append(str(hint)) if i %2: self.bezProgram.append("ry\n") i +=1 self.bezProgram.extend(["endsubr enc\n", "newcolors\n"]) return self.hintMaskString, index
def unpackPStrings(data, n): # extract n Pascal strings from data. # if there is not enough data, use "" strings = [] index = 0 dataLen = len(data) for _ in range(n): if dataLen <= index: length = 0 else: length = byteord(data[index]) index += 1 if dataLen <= index + length - 1: name = "" else: name = tostr(data[index:index + length], encoding="latin1") strings.append(name) index += length if index < dataLen: log.warning("%d extra bytes in post.stringData array", dataLen - index) elif dataLen < index: log.warning("not enough data in post.stringData array") return strings
def hintOn(i, hintMaskBytes): # used to add the active hints to the bez string, # when a T2 hintmask operator is encountered. byteIndex = i // 8 byteValue = byteord(hintMaskBytes[byteIndex]) offset = 7 - (i % 8) return ((2 ** offset) & byteValue) > 0
def hexStr(data): """Convert binary data to a hex string.""" h = string.hexdigits r = '' for c in data: i = byteord(c) r = r + h[(i >> 4) & 0xF] + h[i & 0xF] return r
def decompilePoints_(numPoints, data, offset, tableTag): """(numPoints, data, offset, tableTag) --> ([point1, point2, ...], newOffset)""" assert tableTag in ('cvar', 'gvar') pos = offset numPointsInData = byteord(data[pos]) pos += 1 if (numPointsInData & POINTS_ARE_WORDS) != 0: numPointsInData = (numPointsInData & POINT_RUN_COUNT_MASK) << 8 | byteord(data[pos]) pos += 1 if numPointsInData == 0: return (range(numPoints), pos) result = [] while len(result) < numPointsInData: runHeader = byteord(data[pos]) pos += 1 numPointsInRun = (runHeader & POINT_RUN_COUNT_MASK) + 1 point = 0 if (runHeader & POINTS_ARE_WORDS) != 0: points = array.array("H") pointsSize = numPointsInRun * 2 else: points = array.array("B") pointsSize = numPointsInRun points.frombytes(data[pos:pos+pointsSize]) if sys.byteorder != "big": points.byteswap() assert len(points) == numPointsInRun pos += pointsSize result.extend(points) # Convert relative to absolute absolute = [] current = 0 for delta in result: current += delta absolute.append(current) result = absolute del absolute badPoints = {str(p) for p in result if p < 0 or p >= numPoints} if badPoints: log.warning("point %s out of range in '%s' table" % (",".join(sorted(badPoints)), tableTag)) return (result, pos)
def _escapechar(c): """Helper function for tagToIdentifier()""" import re if re.match("[a-z0-9]", c): return "_" + c elif re.match("[A-Z]", c): return c + "_" else: return hex(byteord(c))[2:]
def unpackBase128(data): r""" Read one to five bytes from UIntBase128-encoded input string, and return a tuple containing the decoded integer plus any leftover data. >>> unpackBase128(b'\x3f\x00\x00') == (63, b"\x00\x00") True >>> unpackBase128(b'\x8f\xff\xff\xff\x7f')[0] == 4294967295 True >>> unpackBase128(b'\x80\x80\x3f') # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): File "<stdin>", line 1, in ? TTLibError: UIntBase128 value must not start with leading zeros >>> unpackBase128(b'\x8f\xff\xff\xff\xff\x7f')[0] # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): File "<stdin>", line 1, in ? TTLibError: UIntBase128-encoded sequence is longer than 5 bytes >>> unpackBase128(b'\x90\x80\x80\x80\x00')[0] # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): File "<stdin>", line 1, in ? TTLibError: UIntBase128 value exceeds 2**32-1 """ if len(data) == 0: raise TTLibError('not enough data to unpack UIntBase128') result = 0 if byteord(data[0]) == 0x80: # font must be rejected if UIntBase128 value starts with 0x80 raise TTLibError('UIntBase128 value must not start with leading zeros') for i in range(woff2Base128MaxSize): if len(data) == 0: raise TTLibError('not enough data to unpack UIntBase128') code = byteord(data[0]) data = data[1:] # if any of the top seven bits are set then we're about to overflow if result & 0xFE000000: raise TTLibError('UIntBase128 value exceeds 2**32-1') # set current value = old value times 128 bitwise-or (byte bitwise-and 127) result = (result << 7) | (code & 0x7f) # repeat until the most significant bit of byte is false if (code & 0x80) == 0: # return result plus left over data return result, data # make sure not to exceed the size bound raise TTLibError('UIntBase128-encoded sequence is longer than 5 bytes')
def _AsciiHexEncode(input): """This is a verbose encoding used for binary data within a PDF file. One byte binary becomes two bytes of ASCII.""" "Helper function used by images" output = StringIO() for char in input: output.write('%02x' % byteord(char)) output.write('>') output.seek(0) return output.read()
def unpackPStrings(data): strings = [] index = 0 dataLen = len(data) while index < dataLen: length = byteord(data[index]) strings.append( tostr(data[index + 1:index + 1 + length], encoding="latin1")) index = index + 1 + length return strings
def _reverseBytes(data): if len(data) != 1: return bytesjoin(map(_reverseBytes, data)) byte = byteord(data) result = 0 for i in range(8): result = result << 1 result |= byte & 1 byte = byte >> 1 return bytechr(result)
def bezDecrypt(bezDataBuffer): r = 11586 i = 0 # input buffer byte position index lenBuffer = len(bezDataBuffer) byteCnt = 0 # output buffer byte count. newBuffer = "" while 1: cipher = 0 # restricted to int plain = 0 # restricted to int j = 2 # used to combine two successive bytes # process next two bytes, skipping whitespace. while j > 0: j -=1 try: while bezDataBuffer[i].isspace(): i +=1 ch = bezDataBuffer[i] except IndexError: return newBuffer if not ch.islower(): ch = ch.lower() if ch.isdigit(): ch = byteord(ch) - byteord('0') else: ch = byteord(ch) - byteord('a') + 10 cipher = (cipher << 4) & 0xFFFF cipher = cipher | ch i += 1 plain = cipher ^ (r >> 8) r = (cipher + r) * 902381661 + 341529579 if r > 0xFFFF: r = r & 0xFFFF byteCnt +=1 if (byteCnt > LEN_IV): newBuffer += bytechr(plain) if i >= lenBuffer: break return newBuffer
def unpack255UShort(data): """ Read one to three bytes from 255UInt16-encoded input string, and return a tuple containing the decoded integer plus any leftover data. >>> unpack255UShort(bytechr(252))[0] 252 Note that some numbers (e.g. 506) can have multiple encodings: >>> unpack255UShort(struct.pack("BB", 254, 0))[0] 506 >>> unpack255UShort(struct.pack("BB", 255, 253))[0] 506 >>> unpack255UShort(struct.pack("BBB", 253, 1, 250))[0] 506 """ code = byteord(data[:1]) data = data[1:] if code == 253: # read two more bytes as an unsigned short if len(data) < 2: raise TTLibError('not enough data to unpack 255UInt16') result, = struct.unpack(">H", data[:2]) data = data[2:] elif code == 254: # read another byte, plus 253 * 2 if len(data) == 0: raise TTLibError('not enough data to unpack 255UInt16') result = byteord(data[:1]) result += 506 data = data[1:] elif code == 255: # read another byte, plus 253 if len(data) == 0: raise TTLibError('not enough data to unpack 255UInt16') result = byteord(data[:1]) result += 253 data = data[1:] else: # leave as is if lower than 253 result = code # return result plus left over data return result, data
def getnexttoken( self, # localize some stuff, for performance len=len, ps_special=ps_special, stringmatch=stringRE.match, hexstringmatch=hexstringRE.match, commentmatch=commentRE.match, endmatch=endofthingRE.match): self.skipwhite() if self.pos >= self.len: return None, None pos = self.pos buf = self.buf char = bytechr(byteord(buf[pos])) if char in ps_special: if char in b'{}[]': tokentype = 'do_special' token = char elif char == b'%': tokentype = 'do_comment' _, nextpos = commentmatch(buf, pos).span() token = buf[pos:nextpos] elif char == b'(': tokentype = 'do_string' m = stringmatch(buf, pos) if m is None: raise PSTokenError('bad string at character %d' % pos) _, nextpos = m.span() token = buf[pos:nextpos] elif char == b'<': tokentype = 'do_hexstring' m = hexstringmatch(buf, pos) if m is None: raise PSTokenError('bad hexstring at character %d' % pos) _, nextpos = m.span() token = buf[pos:nextpos] else: raise PSTokenError('bad token at character %d' % pos) else: if char == b'/': tokentype = 'do_literal' m = endmatch(buf, pos + 1) else: tokentype = '' m = endmatch(buf, pos) if m is None: raise PSTokenError('bad token at character %d' % pos) _, nextpos = m.span() token = buf[pos:nextpos] self.pos = pos + len(token) token = tostr(token, encoding=self.encoding) return tokentype, token
def decompile(self, data): index = 0 lenData = len(data) push = self.stack.append while index < lenData: b0 = byteord(data[index]) index = index + 1 handler = self.operandEncoding[b0] value, index = handler(self, b0, data, index) if value is not None: push(value)
def read_operator(self, b0, data, index): if b0 == 12: op = (b0, byteord(data[index])) index = index + 1 else: op = b0 try: operator = self.operators[op] except KeyError: return None, index value = self.handle_operator(operator) return value, index
def _data2binary(data, numBits): binaryList = [] for curByte in data: value = byteord(curByte) numBitsCut = min(8, numBits) for i in range(numBitsCut): if value & 0x1: binaryList.append('1') else: binaryList.append('0') value = value >> 1 numBits -= numBitsCut return strjoin(binaryList)
def toUnicode(self, errors='strict'): """ If self.string is a Unicode string, return it; otherwise try decoding the bytes in self.string to a Unicode string using the encoding of this entry as returned by self.getEncoding(); Note that self.getEncoding() returns 'ascii' if the encoding is unknown to the library. Certain heuristics are performed to recover data from bytes that are ill-formed in the chosen encoding, or that otherwise look misencoded (mostly around bad UTF-16BE encoded bytes, or bytes that look like UTF-16BE but marked otherwise). If the bytes are ill-formed and the heuristics fail, the error is handled according to the errors parameter to this function, which is passed to the underlying decode() function; by default it throws a UnicodeDecodeError exception. Note: The mentioned heuristics mean that roundtripping a font to XML and back to binary might recover some misencoded data whereas just loading the font and saving it back will not change them. """ def isascii(b): return (b >= 0x20 and b <= 0x7E) or b in [0x09, 0x0A, 0x0D] encoding = self.getEncoding() string = self.string if isinstance( string, bytes) and encoding == 'utf_16_be' and len(string) % 2 == 1: # Recover badly encoded UTF-16 strings that have an odd number of bytes: # - If the last byte is zero, drop it. Otherwise, # - If all the odd bytes are zero and all the even bytes are ASCII, # prepend one zero byte. Otherwise, # - If first byte is zero and all other bytes are ASCII, insert zero # bytes between consecutive ASCII bytes. # # (Yes, I've seen all of these in the wild... sigh) if byteord(string[-1]) == 0: string = string[:-1] elif all( byteord(b) == 0 if i % 2 else isascii(byteord(b)) for i, b in enumerate(string)): string = b'\0' + string elif byteord(string[0]) == 0 and all( isascii(byteord(b)) for b in string[1:]): string = bytesjoin(b'\0' + bytechr(byteord(b)) for b in string[1:]) string = tostr(string, encoding=encoding, errors=errors) # If decoded strings still looks like UTF-16BE, it suggests a double-encoding. # Fix it up. if all( ord(c) == 0 if i % 2 == 0 else isascii(ord(c)) for i, c in enumerate(string)): # If string claims to be Mac encoding, but looks like UTF-16BE with ASCII text, # narrow it down. string = ''.join(c for c in string[1::2]) return string
def test_calcCodePageRanges(emptyufo, unicodes, expected): font = emptyufo for i, c in enumerate(unicodes): font.newGlyph("glyph%d" % i).unicode = byteord(c) compiler = OutlineOTFCompiler(font) compiler.compile() assert compiler.otf["OS/2"].ulCodePageRange1 == intListToNum( expected, start=0, length=32 ) assert compiler.otf["OS/2"].ulCodePageRange2 == intListToNum( expected, start=32, length=32 )
def read_realNumber(self, b0, data, index): number = '' while True: b = byteord(data[index]) index = index + 1 nibble0 = (b & 0xf0) >> 4 nibble1 = b & 0x0f if nibble0 == 0xf: break number = number + realNibbles[nibble0] if nibble1 == 0xf: break number = number + realNibbles[nibble1] return float(number), index
def block(char): """ Return the block property assigned to the Unicode character 'char' as a string. >>> block("a") 'Basic Latin' >>> block(chr(0x060C)) 'Arabic' >>> block(chr(0xEFFFF)) 'No_Block' """ code = byteord(char) i = bisect_right(Blocks.RANGES, code) return Blocks.VALUES[i - 1]
def getToken(self, index, len=len, byteord=byteord, isinstance=isinstance): if self.bytecode is not None: if index >= len(self.bytecode): return None, 0, 0 b0 = byteord(self.bytecode[index]) index = index + 1 handler = self.operandEncoding[b0] token, index = handler(self, b0, self.bytecode, index) else: if index >= len(self.program): return None, 0, 0 token = self.program[index] index = index + 1 isOperator = isinstance(token, str) return token, isOperator, index
def decompileUniqueName(self, data): name = "" for char in data: val = byteord(char) if val == 0: break if (val > 31) or (val < 128): name += chr(val) else: octString = oct(val) if len(octString) > 3: octString = octString[1:] # chop off that leading zero. elif len(octString) < 3: octString.zfill(3) name += "\\" + octString return name
def script_extension(char): """ Return the script extension property assigned to the Unicode character 'char' as a set of string. >>> script_extension("a") == {'Latn'} True >>> script_extension(chr(0x060C)) == {'Rohg', 'Syrc', 'Yezi', 'Arab', 'Thaa'} True >>> script_extension(chr(0x10FFFF)) == {'Zzzz'} True """ code = byteord(char) i = bisect_right(ScriptExtensions.RANGES, code) value = ScriptExtensions.VALUES[i - 1] if value is None: # code points not explicitly listed for Script Extensions # have as their value the corresponding Script property value return {script(char)} return value
def _AsciiBase85Encode(input): """This is a compact encoding used for binary data within a PDF file. Four bytes of binary data become five bytes of ASCII. This is the default method used for encoding images.""" outstream = StringIO() # special rules apply if not a multiple of four bytes. whole_word_count, remainder_size = divmod(len(input), 4) cut = 4 * whole_word_count body, lastbit = input[0:cut], input[cut:] for i in range(whole_word_count): offset = i*4 b1 = byteord(body[offset]) b2 = byteord(body[offset+1]) b3 = byteord(body[offset+2]) b4 = byteord(body[offset+3]) num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4 if num == 0: #special case outstream.write('z') else: #solve for five base-85 numbers temp, c5 = divmod(num, 85) temp, c4 = divmod(temp, 85) temp, c3 = divmod(temp, 85) c1, c2 = divmod(temp, 85) assert ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5 == num, 'dodgy code!' outstream.write(chr(c1+33)) outstream.write(chr(c2+33)) outstream.write(chr(c3+33)) outstream.write(chr(c4+33)) outstream.write(chr(c5+33)) # now we do the final bit at the end. I repeated this separately as # the loop above is the time-critical part of a script, whereas this # happens only once at the end. #encode however many bytes we have as usual if remainder_size > 0: while len(lastbit) < 4: lastbit = lastbit + b'\000' b1 = byteord(lastbit[0]) b2 = byteord(lastbit[1]) b3 = byteord(lastbit[2]) b4 = byteord(lastbit[3]) num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4 #solve for c1..c5 temp, c5 = divmod(num, 85) temp, c4 = divmod(temp, 85) temp, c3 = divmod(temp, 85) c1, c2 = divmod(temp, 85) #print 'encoding: %d %d %d %d -> %d -> %d %d %d %d %d' % ( # b1,b2,b3,b4,num,c1,c2,c3,c4,c5) lastword = chr(c1+33) + chr(c2+33) + chr(c3+33) + chr(c4+33) + chr(c5+33) #write out most of the bytes. outstream.write(lastword[0:remainder_size + 1]) #terminator code for ascii 85 outstream.write('~>') outstream.seek(0) return outstream.read()
def _AsciiBase85Decode(input): """This is not used - Acrobat Reader decodes for you - but a round trip is essential for testing.""" outstream = StringIO() #strip all whitespace stripped = ''.join(input.split()) #check end assert stripped[-2:] == '~>', 'Invalid terminator for Ascii Base 85 Stream' stripped = stripped[:-2] #chop off terminator #may have 'z' in it which complicates matters - expand them stripped = stripped.replace('z', '!!!!!') # special rules apply if not a multiple of five bytes. whole_word_count, remainder_size = divmod(len(stripped), 5) #print '%d words, %d leftover' % (whole_word_count, remainder_size) assert remainder_size != 1, 'invalid Ascii 85 stream!' cut = 5 * whole_word_count body, lastbit = stripped[0:cut], stripped[cut:] for i in range(whole_word_count): offset = i*5 c1 = byteord(body[offset]) - 33 c2 = byteord(body[offset+1]) - 33 c3 = byteord(body[offset+2]) - 33 c4 = byteord(body[offset+3]) - 33 c5 = byteord(body[offset+4]) - 33 num = ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5 temp, b4 = divmod(num,256) temp, b3 = divmod(temp,256) b1, b2 = divmod(temp, 256) assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!' outstream.write(chr(b1)) outstream.write(chr(b2)) outstream.write(chr(b3)) outstream.write(chr(b4)) #decode however many bytes we have as usual if remainder_size > 0: while len(lastbit) < 5: lastbit = lastbit + '!' c1 = byteord(lastbit[0]) - 33 c2 = byteord(lastbit[1]) - 33 c3 = byteord(lastbit[2]) - 33 c4 = byteord(lastbit[3]) - 33 c5 = byteord(lastbit[4]) - 33 num = ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5 temp, b4 = divmod(num,256) temp, b3 = divmod(temp,256) b1, b2 = divmod(temp, 256) assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!' #print 'decoding: %d %d %d %d %d -> %d -> %d %d %d %d' % ( # c1,c2,c3,c4,c5,num,b1,b2,b3,b4) #the last character needs 1 adding; the encoding loses #data by rounding the number to x bytes, and when #divided repeatedly we get one less if remainder_size == 2: lastword = chr(b1+1) elif remainder_size == 3: lastword = chr(b1) + chr(b2+1) elif remainder_size == 4: lastword = chr(b1) + chr(b2) + chr(b3+1) outstream.write(lastword) #terminator code for ascii 85 outstream.seek(0) return outstream.read()