def ipv6_b85decode_naive(encoded, _base85_ords=RFC1924_ORDS): """ Decodes an RFC1924 Base-85 encoded string to its 128-bit unsigned integral representation. Used to base85-decode IPv6 addresses or 128-bit chunks. Whitespace is ignored. Raises an ``OverflowError`` if stray characters are found. :param encoded: RFC1924 Base85-encoded string. :param _base85_ords: (Internal) Look up table. :returns: A 128-bit unsigned integer. """ if not is_bytes(encoded): raise TypeError( "Encoded sequence must be bytes: got %r" % type(encoded).__name__ ) # Ignore whitespace. encoded = EMPTY_BYTE.join(encoded.split()) if len(encoded) != 20: raise ValueError("Not 20 encoded bytes: %r" % encoded) uint128 = 0 try: for char in encoded: uint128 = uint128 * 85 + _base85_ords[char] except KeyError: raise OverflowError("Cannot decode `%r -- may contain stray "\ "ASCII bytes" % encoded) if uint128 > UINT128_MAX: raise OverflowError("Cannot decode `%r` -- may contain stray "\ "ASCII bytes" % encoded) return uint128
def b85decode(encoded, prefix=None, suffix=None, _base85_bytes=ASCII85_BYTES, _base85_ords=ASCII85_ORDS, _uncompact_zero=True, _compact_char=ZERO_GROUP_CHAR): """ Decodes an ASCII85-encoded string into raw bytes. :param encoded: Encoded ASCII string. :param prefix: The prefix used by the encoded text. None by default. :param suffix: The suffix used by the encoded text. None by default. :param _base85_bytes: (Internal) Character set to use. :param _base85_ords: (Internal) A function to convert a base85 character to its ordinal value. You should not need to use this. :param _uncompact_zero: (Internal) Treats 'z' (a zero-group (\x00\x00\x00\x00)) as a '!!!!!' if ``True`` (default). :param _compact_char: (Internal) Character used to represent compact groups ('z' default) :returns: ASCII85-decoded raw bytes. """ prefix = prefix or EMPTY_BYTE suffix = suffix or EMPTY_BYTE if not (is_bytes(prefix) and is_bytes(suffix)): raise TypeError( "Prefix/suffix must be bytes: got prefix %r, %r" % (type(prefix).__name__, type(suffix).__name__) ) if not is_bytes(_compact_char): raise TypeError("compat character must be raw byte: got %r" % type(_compact_char).__name__) if not is_bytes(encoded): raise TypeError( "Encoded sequence must be bytes: got %r" % type(encoded).__name__ ) # ASCII-85 ignores whitespace. encoded = EMPTY_BYTE.join(encoded.split()) # Strip the prefix and suffix. if prefix and encoded.startswith(prefix): encoded = encoded[len(prefix):] if suffix and encoded.endswith(suffix): encoded = encoded[:-len(suffix)] # Replace all the 'z' occurrences with '!!!!!' if _uncompact_zero: _check_compact_char_occurrence(encoded, _compact_char) encoded = encoded.replace(_compact_char, EXCLAMATION_CHUNK) return _b85decode_chunks(encoded, _base85_bytes, _base85_ords)
def base_decode(encoded, base, base_ords, base_zero, powers): """Decode from base to base 256.""" if not is_bytes(encoded): raise TypeError("encoded data must be bytes: got %r" % type(encoded).__name__) # Ignore whitespace. encoded = EMPTY_BYTE.join(encoded.split()) # Convert to big integer. number = base_to_uint(encoded, base, base_ords, powers) return uint_to_base256(number, encoded, base_zero)
def uint_to_bytes_simple(num): """Simple uint to bytes converter.""" assert num >= 0 if num == 0: return ZERO_BYTE byte_array = [] while num: byte_array.append(byte(num & 0xff)) num >>= 8 return EMPTY_BYTE.join(reversed(byte_array))
def bin_decode(encoded): """ Decodes binary-encoded bytes into raw bytes. :param encoded: Binary representation. :returns: Raw bytes. """ if not is_bytes(encoded): raise TypeError("argument must be bytes: got %r" % type(encoded).__name__) return binascii.a2b_hex(EMPTY_BYTE.join(_BIN_TO_HEX_LOOKUP[nibble] for nibble in chunks(encoded, 4)))
def bin_encode(raw_bytes): """ Encodes raw bytes into binary representation. Encode your Unicode strings to a byte encoding before binary-encoding them. :param raw_bytes: Raw bytes. :returns: Binary representation. """ if not is_bytes(raw_bytes): raise TypeError("argument must be raw bytes: got %r" % type(raw_bytes).__name__) return EMPTY_BYTE.join(_HEX_TO_BIN_LOOKUP[hex_char] for hex_char in binascii.b2a_hex(raw_bytes))
def uint_to_bytes_naive(number, block_size=0): """ Naive slow and accurate implementation. Base for all our tests. Converts a number to a string of bytes. :param number: the number to convert :param block_size: the number of bytes to output. If the number encoded to bytes is less than this, the block will be zero-padded. When not given, the returned block is not padded. :raises: ``OverflowError`` when block_size is given and the number takes up more bytes than fit into the block. """ if number < 0: raise ValueError('Negative numbers cannot be used: %d' % number) # Do some bounds checking needed_bytes = integer_byte_length(number) if block_size > 0: if needed_bytes > block_size: raise OverflowError('Needed %i bytes for number, but block size ' 'is %i' % (needed_bytes, block_size)) # Convert the number to bytes. if number == 0: raw_bytes = [ZERO_BYTE] else: raw_bytes = [] num = number while num > 0: raw_bytes.insert(0, byte(num & 0xFF)) num >>= 8 # Pad with zeroes to fill the block if block_size > 0: padding_size = (block_size - needed_bytes) if number == 0: padding_size -= 1 padding = ZERO_BYTE * padding_size else: padding = EMPTY_BYTE return padding + EMPTY_BYTE.join(raw_bytes)
def b36decode(encoded, base_bytes=ASCII36_BYTES): """ Base-36 decodes a sequence of bytes into raw bytes. Leading, trailing, and internal whitespace is ignored. The case of the encoded byte string is also ignored. For example, you may pass in ``AbCd`` instead of ``ABCD``. :param encoded: Case-insensitive base-36 encoded bytes. :param base_bytes: (Internal) The character set to use. Defaults to ``ASCII36_BYTES`` that uses natural ASCII order. :returns: Raw bytes. """ # Ignore whitespace. encoded = EMPTY_BYTE.join(encoded.split()) return uint_to_base256(int(encoded, 36), encoded, base_bytes[0])
def rfc1924_b85decode(encoded): """ Base85 decodes using the RFC1924 character set. This is the encoding method used by Mercurial (and git) to generate binary diffs, for example. They chose the IPv6 character set and encode using the ASCII85 encoding method while not compacting zero-byte sequences. :see: http://tools.ietf.org/html/rfc1924 :param encoded: RFC1924 Base85 encoded string. :returns: Decoded bytes. """ if not is_bytes(encoded): raise TypeError( "Encoded sequence must be bytes: got %r" % type(encoded).__name__ ) # Ignore whitespace. encoded = EMPTY_BYTE.join(encoded.split()) return _b85decode_chunks(encoded, RFC1924_BYTES, RFC1924_ORDS)
def data_urlencode(raw_bytes, mime_type=b('text/plain'), charset=b('US-ASCII'), encoder="base64"): """ Encodes raw bytes into a data URL scheme string. :param raw_bytes: Raw bytes :param mime_type: The mime type, e.g. b"text/css" or b"image/png". Default b"text/plain". :param charset: b"utf-8" if you want the data URL to contain a b"charset=utf-8" component. Default b'US-ASCII'. This does not mean however, that your raw_bytes will be encoded by this function. You must ensure that if you specify, b"utf-8" (or anything else) as the encoding, you have encoded your raw data appropriately. :param encoder: "base64" or None. :returns: Data URL. """ if not is_bytes(raw_bytes): raise TypeError( "only raw bytes can be encoded: got %r" % type(raw_bytes).__name__ ) if encoder == "base64": encode = base64_encode codec = b(";base64,") else: # We want ASCII bytes. encode = lambda data: quote(data).encode('ascii') codec = b(",") mime_type = mime_type or EMPTY_BYTE charset = b(";charset=") + charset if charset else EMPTY_BYTE encoded = encode(raw_bytes) return EMPTY_BYTE.join((b("data:"), mime_type, charset, codec, encoded))
def ipv6_b85decode(encoded, _base85_ords=RFC1924_ORDS): """ Decodes an RFC1924 Base-85 encoded string to its 128-bit unsigned integral representation. Used to base85-decode IPv6 addresses or 128-bit chunks. Whitespace is ignored. Raises an ``OverflowError`` if stray characters are found. :param encoded: RFC1924 Base85-encoded string. :param _base85_ords: (Internal) Look up table. :returns: A 128-bit unsigned integer. """ if not is_bytes(encoded): raise TypeError( "Encoded sequence must be bytes: got %r" % type(encoded).__name__ ) # Ignore whitespace. encoded = EMPTY_BYTE.join(encoded.split()) if len(encoded) != 20: raise ValueError("Not 20 encoded bytes: %r" % encoded) #uint128 = 0 #for char in encoded: # uint128 = uint128 * 85 + _base85_ords[byte_ord(char)] # Above loop unrolled to process 4 5-tuple chunks instead: try: #v, w, x, y, z = encoded[0:5] # v = encoded[0]..z = encoded[4] uint128 = ((((_base85_ords[encoded[0]] * 85 + _base85_ords[encoded[1]]) * 85 + _base85_ords[encoded[2]]) * 85 + _base85_ords[encoded[3]]) * 85 + _base85_ords[encoded[4]]) #v, w, x, y, z = encoded[5:10] # v = encoded[5]..z = encoded[9] uint128 = (((((uint128 * 85 + _base85_ords[encoded[5]]) * 85 + _base85_ords[encoded[6]]) * 85 + _base85_ords[encoded[7]]) * 85 + _base85_ords[encoded[8]]) * 85 + _base85_ords[encoded[9]]) #v, w, x, y, z = encoded[10:15] # v = encoded[10]..z = encoded[14] uint128 = (((((uint128 * 85 + _base85_ords[encoded[10]]) * 85 + _base85_ords[encoded[11]]) * 85 + _base85_ords[encoded[12]]) * 85 + _base85_ords[encoded[13]]) * 85 + _base85_ords[encoded[14]]) #v, w, x, y, z = encoded[15:20] # v = encoded[15]..z = encoded[19] uint128 = (((((uint128 * 85 + _base85_ords[encoded[15]]) * 85 + _base85_ords[encoded[16]]) * 85 + _base85_ords[encoded[17]]) * 85 + _base85_ords[encoded[18]]) * 85 + _base85_ords[encoded[19]]) except KeyError: raise OverflowError("Cannot decode `%r -- may contain stray "\ "ASCII bytes" % encoded) if uint128 > UINT128_MAX: raise OverflowError("Cannot decode `%r` -- may contain stray "\ "ASCII bytes" % encoded) return uint128