def base2ba(n, s, endian=None): """base2ba(n, asciistr, /, endian=None) -> bitarray Bitarray of the base `n` ASCII representation. Allowed values for `n` are 2, 4, 8, 16 and 32. For `n=16` (hexadecimal), `hex2ba()` will be much faster, as `base2ba()` does not take advantage of byte level operations. For `n=32` the RFC 4648 Base32 alphabet is used, and for `n=64` the standard base 64 alphabet is used. """ if not isinstance(n, int): raise TypeError("integer expected") try: m = {2: 1, 4: 2, 8: 3, 16: 4, 32: 5, 64: 6}[n] except KeyError: raise ValueError("base must be 2, 4, 8, 16, 32 or 64") if not isinstance(s, (str, unicode if _is_py2 else bytes)): raise TypeError("str expected, got: '%s'" % type(s).__name__) if isinstance(s, unicode if _is_py2 else str): s = s.encode('ascii') assert isinstance(s, bytes) a = bitarray(m * len(s), get_default_endian() if endian is None else endian) _base2ba(n, a, s) return a
def int2ba(i, length=None, endian=None, signed=False): """int2ba(int, /, length=None, endian=None, signed=False) -> bitarray Convert the given integer to a bitarray (with given endianness, and no leading (big-endian) / trailing (little-endian) zeros), unless the `length` of the bitarray is provided. An `OverflowError` is raised if the integer is not representable with the given number of bits. `signed` determines whether two's complement is used to represent the integer, and requires `length` to be provided. If signed is False and a negative integer is given, an OverflowError is raised. """ if not isinstance(i, (int, long) if _is_py2 else int): raise TypeError("int expected, got '%s'" % type(i).__name__) if length is not None: if not isinstance(length, int): raise TypeError("int expected for length") if length <= 0: raise ValueError("integer larger than 0 expected for length") if signed and length is None: raise TypeError("signed requires length") if i == 0: # there are special cases for 0 which we'd rather not deal with below return zeros(length or 1, endian) if signed: if i >= 1 << (length - 1) or i < -(1 << (length - 1)): raise OverflowError("signed integer out of range") if i < 0: i += 1 << length elif i < 0 or (length and i >= 1 << length): raise OverflowError("unsigned integer out of range") a = bitarray(0, get_default_endian() if endian is None else endian) big_endian = bool(a.endian() == 'big') if _is_py2: c = bytearray() while i: i, r = divmod(i, 256) c.append(r) if big_endian: c.reverse() b = bytes(c) else: # py3 b = i.to_bytes(bits2bytes(i.bit_length()), byteorder=a.endian()) a.frombytes(b) if length is None: return strip(a, 'left' if big_endian else 'right') la = len(a) if la > length: a = a[-length:] if big_endian else a[:length] if la < length: pad = zeros(length - la, endian) a = pad + a if big_endian else a + pad assert len(a) == length return a
def urandom(length, endian=None): """urandom(length, /, endian=None) -> bitarray Return a bitarray of `length` random bits (uses `os.urandom`). """ a = bitarray(0, get_default_endian() if endian is None else endian) a.frombytes(os.urandom(bits2bytes(length))) del a[length:] return a
def zeros(length, endian=None): """zeros(length, /, endian=None) -> bitarray Create a bitarray of length, with all values 0, and optional endianness, which may be 'big', 'little'. """ if not isinstance(length, (int, long) if _is_py2 else int): raise TypeError("int expected, got '%s'" % type(length).__name__) a = bitarray(length, get_default_endian() if endian is None else endian) a.setall(0) return a
def zeros(length, endian=None): """zeros(length, /, endian=None) -> bitarray Create a bitarray of length, with all values 0, and optional endianness, which may be 'big', 'little'. """ if not isinstance(length, (int, long) if _is_py2 else int): raise TypeError("integer expected") a = bitarray(length, endian or get_default_endian()) a.setall(0) return a
def vl_decode(__stream, endian=None): """vl_decode(stream, /, endian=None) -> bitarray Decode binary stream (an integer iterator, or bytes object), and return the decoded bitarray. This function consumes only one bitarray and leaves the remaining stream untouched. `StopIteration` is raised when no terminating byte is found. Use `vl_encode()` for encoding. """ if isinstance(__stream, bytes): __stream = iter(__stream) a = bitarray(256, get_default_endian() if endian is None else endian) _vl_decode(__stream, a) return bitarray(a) # drop previously over-allocated bitarray
def hex2ba(s, endian=None): """hex2ba(hexstr, /, endian=None) -> bitarray Bitarray of hexadecimal representation. hexstr may contain any number of hex digits (upper or lower case). """ if not isinstance(s, (str, unicode if _is_py2 else bytes)): raise TypeError("str expected, got: '%s'" % type(s).__name__) if isinstance(s, unicode if _is_py2 else str): s = s.encode('ascii') assert isinstance(s, bytes) a = bitarray(4 * len(s), get_default_endian() if endian is None else endian) _hex2ba(a, s) return a
def hex2ba(s, endian=None): """hex2ba(hexstr, /, endian=None) -> bitarray Bitarray of hexadecimal representation. hexstr may contain any number of hex digits (upper or lower case). """ if not isinstance(s, (str, unicode if _is_py2 else bytes)): raise TypeError("string expected, got: %r" % s) strlen = len(s) if strlen % 2: s = s + ('0' if isinstance(s, str) else b'0') a = bitarray(0, endian or get_default_endian()) b = binascii.unhexlify(s) if a.endian() == 'little': b = b.translate(_swap_hilo_bytes) a.frombytes(b) if strlen % 2: del a[-4:] return a
def all_perm(n, k, endian=None): """all_perm(n, k, endian=None) -> iterator Return an iterator over all bitarrays of length `n` with `k` bits set to 1 in lexicographical order. """ n = int(n) if n < 0: raise ValueError("length must be >= 0") k = int(k) if k < 0 or k > n: raise ValueError("number of set bits must be in range(0, n + 1)") if k == 0: yield zeros(n, endian) return v = (1 << k) - 1 for _ in range(binomial(n, k)): yield int2ba(v, length=n, endian=get_default_endian() if endian is None else endian) t = (v | (v - 1)) + 1 v = t | ((((t & -t) // (v & -v)) >> 1) - 1)
def hex2ba(s, endian=None): a = bitarray(0, endian or get_default_endian()) a.encode(CODEDICT[a.endian()], s) return a
def huffman_code(freq_map, endian=None): """huffman_code(dict, /, endian=None) -> dict Given a frequency map, a dictionary mapping symbols to their frequency, calculate the Huffman code, i.e. a dict mapping those symbols to bitarrays (with given endianness). Note that the symbols may be any hashable object (including `None`). """ import heapq if not isinstance(freq_map, dict): raise TypeError("dict expected, got '%s'" % type(freq_map).__name__) if len(freq_map) == 0: raise ValueError("non-empty dict expected") if endian is None: endian = get_default_endian() class Node(object): # a Node object will have either .symbol or .child set below, # .freq will always be set def __lt__(self, other): # heapq needs to be able to compare the nodes return self.freq < other.freq def huff_tree(freq_map): # given a dictionary mapping symbols to thier frequency, # construct a Huffman tree and return its root node minheap = [] # create all the leaf nodes and push them onto the queue for sym, f in freq_map.items(): nd = Node() nd.symbol = sym nd.freq = f heapq.heappush(minheap, nd) # repeat the process until only one node remains while len(minheap) > 1: # take the nodes with smallest frequencies from the queue child_0 = heapq.heappop(minheap) child_1 = heapq.heappop(minheap) # construct the new internal node and push it onto the queue parent = Node() parent.child = [child_0, child_1] parent.freq = child_0.freq + child_1.freq heapq.heappush(minheap, parent) # the single remaining node is the root of the Huffman tree return minheap[0] result = {} def traverse(nd, prefix=bitarray(0, endian)): if hasattr(nd, 'symbol'): # leaf result[nd.symbol] = prefix else: # parent, so traverse each of the children traverse(nd.child[0], prefix + bitarray([0])) traverse(nd.child[1], prefix + bitarray([1])) traverse(huff_tree(freq_map)) return result
def traverse(nd, prefix=bitarray(0, endian or get_default_endian())): if hasattr(nd, 'symbol'): # leaf result[nd.symbol] = prefix else: # parent, so traverse each of the children traverse(nd.child[0], prefix + bitarray([0])) traverse(nd.child[1], prefix + bitarray([1]))
def int2ba(i, length=None, endian=None): """int2ba(int, /, length=None, endian=None) -> bitarray Convert the given integer into a bitarray (with given endianness, and no leading (big-endian) / trailing (little-endian) zeros). If length is provided, the result will be of this length, and an `OverflowError` will be raised, if the integer cannot be represented within length bits. """ if not isinstance(i, (int, long) if _is_py2 else int): raise TypeError("integer expected") if i < 0: raise ValueError("non-negative integer expected") if length is not None: if not isinstance(length, int): raise TypeError("integer expected for length") if length <= 0: raise ValueError("integer larger than 0 expected for length") if endian is None: endian = get_default_endian() if not isinstance(endian, str): raise TypeError("string expected for endian") if endian not in ('big', 'little'): raise ValueError("endian can only be 'big' or 'little'") if i == 0: # there a special cases for 0 which we'd rather not deal with below return zeros(length or 1, endian) big_endian = bool(endian == 'big') if _is_py2: c = bytearray() while i: i, r = divmod(i, 256) c.append(r) if big_endian: c.reverse() b = bytes(c) else: # py3 b = i.to_bytes(bits2bytes(i.bit_length()), byteorder=endian) a = bitarray(0, endian) a.frombytes(b) la = a.length() if la == length: return a if length is None: return strip(a, 'left' if big_endian else 'right') if la > length: size = (la - a.index(1)) if big_endian else (rindex(a) + 1) if size > length: raise OverflowError("cannot represent %d bit integer in " "%d bits" % (size, length)) a = a[la - length:] if big_endian else a[:length - la] if la < length: if big_endian: a = zeros(length - la, 'big') + a else: a += zeros(length - la, 'little') assert a.length() == length return a