def ichunks(iterable, size, *args, **kwargs): """ Splits an iterable into iterators for chunks each of specified size. :param iterable: The iterable to split. Must be an ordered sequence to guarantee order. :param size: Chunk size. :param padding: If a pad value is specified appropriate multiples of it will be appended to the end of the iterator if the size is not an integral multiple of the length of the iterable: map(tuple, ichunks("aaabccd", 3, "-")) -> [("a", "a", "a"), ("b", "c", "c"), ("d", "-", "-")] map(tuple, ichunks("aaabccd", 3, None)) -> [("a", "a", "a"), ("b", "c", "c"), ("d", None, None)] If no padding is specified, nothing will be appended if the chunk size is not an integral multiple of the length of the iterable. That is, the last chunk will have chunk size less than the specified chunk size. :yields: Generator of chunk iterators. """ length = len(iterable) if args or kwargs: padding = kwargs["padding"] if kwargs else args[0] for i in range(0, length, size): yield islice(chain(iterable, repeat(padding, (size - (length % size)))), i, i + size) else: for i in range(0, length, size): yield islice(iterable, i, i + size)
def make_prime_sieve(max_n): # def _rwh_primes1(max_n): """Returns a list of primes < n""" sieve = [True] * (max_n // 2) for i in _compat.range(3, int(max_n ** 0.5) + 1, 2): if sieve[i // 2]: sieve[i * i // 2::i] = ([False] * ((max_n - i * i - 1) // (2 * i) + 1)) return [2] + [2 * i + 1 for i in _compat.range(1, max_n // 2) if sieve[i]]
def _rwh_primes1(n): # http://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188 """ Returns a list of primes < n """ sieve = [True] * (n//2) for i in range(3,int(n**0.5)+1,2): if sieve[i//2]: sieve[i*i//2::i] = [False] * ((n-i*i-1)//(2*i)+1) return [2] + [2*i+1 for i in range(1,n//2) if sieve[i]]
def make_prime_sieve(max_n): # def _rwh_primes1(max_n): """Returns a list of primes < n""" sieve = [True] * (max_n // 2) for i in _compat.range(3, int(max_n**0.5) + 1, 2): if sieve[i // 2]: sieve[i * i // 2::i] = ([False] * ((max_n - i * i - 1) // (2 * i) + 1)) return [2] + [ 2 * i + 1 for i in _compat.range(1, max_n // 2) if sieve[i] ]
def permutations(iterable, r=None): """ Return successive `r` length permutations of elements in the `iterable`. If `r` is not specified or is ``None``, then `r` defaults to the length of the `iterable` and all possible full-length permutations are generated. Permutations are emitted in lexicographic sort order. So, if the input `iterable` is sorted, the permutation tuples will be produced in sorted order. Elements are treated as unique based on their position, not on their value. So if the input elements are unique, there will be no repeating value in each permutation. The number of items returned is ``n! / (n - r)!`` when ``0 <= r <= n`` or zero when `r > n`. .. note:: Software and documentation for this function are taken from CPython, :ref:`license details <psf-license>`. """ pool = tuple(iterable) pool_length = len(pool) r = pool_length if r is None else r for indices in product(range(pool_length), repeat=r): if len(set(indices)) == r: yield tuple(pool[i] for i in indices)
def uint_to_bytes_pycrypto(uint, blocksize=0): """long_to_bytes(n:long, blocksize:int) : string Convert a long integer to a byte string. If optional blocksize is given and greater than zero, pad the front of the byte string with binary zeros so that the length is a multiple of blocksize. """ # after much testing, this algorithm was deemed to be the fastest raw_bytes = EMPTY_BYTE uint = int(uint) while uint > 0: raw_bytes = pack('>I', uint & 0xffffffff) + raw_bytes uint >>= 32 # strip off leading zeros i = 0 for i in range(len(raw_bytes)): if raw_bytes[i] != ZERO_BYTE[0]: break else: # only happens when n == 0 raw_bytes = ZERO_BYTE i = 0 raw_bytes = raw_bytes[i:] # add back some pad bytes. this could be done more efficiently w.r.t. the # de-padding being done above, but sigh... if blocksize > 0 and len(raw_bytes) % blocksize: raw_bytes = (blocksize - len(raw_bytes) % blocksize) * ZERO_BYTE +\ raw_bytes return raw_bytes
def chunks(iterable, size, *args, **kwargs): """ Splits an iterable into materialized chunks each of specified size. :param iterable: The iterable to split. Must be an ordered sequence to guarantee order. :param size: Chunk size. :param padding: This must be an iterable or None. So if you want a ``True`` filler, use [True] or (True, ) depending on whether the iterable is a list or a tuple. Essentially, it must be the same type as the iterable. If a pad value is specified appropriate multiples of it will be concatenated at the end of the iterable if the size is not an integral multiple of the length of the iterable: tuple(chunks("aaabccd", 3, "-")) -> ("aaa", "bcc", "d--") tuple(chunks((1, 1, 1, 2, 2), 3, (None,))) -> ((1, 1, 1, ), (2, 2, None)) If no padding is specified, nothing will be appended if the chunk size is not an integral multiple of the length of the iterable. That is, the last chunk will have chunk size less than the specified chunk size. :yields: Generator of materialized chunks. """ length = len(iterable) if args or kwargs: padding = kwargs["padding"] if kwargs else args[0] if padding is None: if is_bytes_or_unicode(iterable): padding = "" elif isinstance(iterable, tuple): padding = (padding,) else: iterable = list(iterable) padding = [padding] sequence = iterable + (padding * (size - (length % size))) for i in range(0, length, size): yield sequence[i:i + size] else: for i in range(0, length, size): yield iterable[i:i + size]
def make_prime_sieve(max_n): # def _numpy_primesfrom2to(max_n): """Input n>=6, Returns a array of primes, 2 <= p < n""" sieve = np.ones(max_n // 3 + (max_n % 6 == 2), dtype=np.bool) sieve[0] = False for i in _compat.range(int(max_n ** 0.5) // 3 + 1): if sieve[i]: k = 3 * i + 1 | 1 sieve[((k * k) // 3)::2 * k] = False sieve[(k * k + 4 * k - 2 * k * (i & 1)) // 3::2 * k] = False return np.r_[2, 3, ((3 * np.nonzero(sieve)[0] + 1) | 1)]
def make_prime_sieve(max_n): # def _numpy_primesfrom2to(max_n): """Input n>=6, Returns a array of primes, 2 <= p < n""" sieve = np.ones(max_n // 3 + (max_n % 6 == 2), dtype=np.bool) sieve[0] = False for i in _compat.range(int(max_n**0.5) // 3 + 1): if sieve[i]: k = 3 * i + 1 | 1 sieve[((k * k) // 3)::2 * k] = False sieve[(k * k + 4 * k - 2 * k * (i & 1)) // 3::2 * k] = False return np.r_[2, 3, ((3 * np.nonzero(sieve)[0] + 1) | 1)]
def _numpy_primesfrom2to(n): # http://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188 """ Input n>=6, Returns a array of primes, 2 <= p < n """ sieve = np.ones(n//3 + (n%6==2), dtype=np.bool) sieve[0] = False for i in range(int(n**0.5)//3+1): if sieve[i]: k=3*i+1|1 sieve[ ((k*k)//3) ::2*k] = False sieve[(k*k+4*k-2*k*(i&1))//3::2*k] = False return np.r_[2,3,((3*np.nonzero(sieve)[0]+1)|1)]
def random_shuffle(sequence, rand_func=generate_random_bytes): """ Randomly shuffles the sequence in-place. :param sequence: Sequence to shuffle in-place. :returns: The shuffled sequence itself (for convenience). """ copy = list(sequence) # Choose a random item (without replacement) until all the items have been # chosen. for i in range(len(sequence)): random_uint = generate_random_uint_between(0, len(copy), rand_func) sequence[i] = copy[random_uint] del copy[random_uint] return sequence
def generate_random_sequence(length, pool, rand_func=generate_random_bytes): """ Generates a random sequence of given length using the sequence pool specified. :param length: The length of the random sequence. :param pool: A sequence of elements to be used as the pool from which random elements will be chosen. :returns: A list of elements randomly chosen from the pool. """ if not is_integer(length): raise TypeError("Length must be a positive integer: got `%r`" %\ type(length).__name__) if length <= 0: raise ValueError("length must be a positive integer: got %d" % length) return [random_choice(pool, rand_func) for _ in range(length)]
def find(predicate, iterable, start=0): """ Determines the first index where the predicate is true for an element in the iterable. :param predicate: Predicate function of the form:: f(x) -> bool :param iterable: Iterable sequence. :param start: Start index. :returns: -1 if not found; index (>= start) if found. """ for i in range(start, len(iterable)): if predicate(iterable[i]): return i return -1
def _pure_is_prime(num, iterations=5, _sieve=sieve): """ Determines whether a number is prime. :param num: Number :param iterations: Number of iterations. :returns: ``True`` if prime; ``False`` otherwise. """ #Trial division with sieve for x in _sieve: if x >= num: return True if not num % x: return False #Passed trial division, proceed to Rabin-Miller #Rabin-Miller implemented per Ferguson & Schneier #Compute s, t for Rabin-Miller s, t = num-1, 0 while not s % 2: s, t = s // 2, t+1 #Repeat Rabin-Miller x times a = 2 #Use 2 as a base for first iteration speedup, per HAC for count in range(iterations): v = _pure_pow_mod(a, s, num) if v == 1: continue i = 0 while v != num-1: if i == t-1: return False else: v, i = _pure_pow_mod(v, 2, num), i+1 a = generate_random_uint_between(2, num) return True
def _pure_is_prime(num, iterations=5, _sieve=SIEVE): """ Determines whether a number is prime. :param num: Number :param iterations: Number of iterations. :returns: ``True`` if prime; ``False`` otherwise. """ # Trial division with sieve for prime_number in _sieve: if prime_number >= num: return True if not num % prime_number: return False # Passed trial division, proceed to Rabin-Miller # Rabin-Miller implemented per Ferguson & Schneier # Compute s, t for Rabin-Miller num_s, num_t = num - 1, 0 while not num_s % 2: num_s, num_t = num_s // 2, num_t + 1 # Repeat Rabin-Miller x times base = 2 # Use 2 as a base for first iteration speedup, per HAC for _ in range(iterations): num_v = _pure_pow_mod(base, num_s, num) if num_v == 1: continue i = 0 while num_v != num - 1: if i == num_t - 1: return False else: num_v, i = _pure_pow_mod(num_v, 2, num), i + 1 base = generate_random_uint_between(2, num) return True
def uint_to_bytes_naive_array_based(uint, chunk_size=0): """ Converts an integer into bytes. :param uint: Unsigned integer value. :param chunk_size: Chunk size. :returns: Bytes. """ if uint < 0: raise ValueError('Negative numbers cannot be used: %i' % uint) if uint == 0: bytes_count = 1 else: bytes_count = integer_byte_length(uint) byte_array = array('B', [0] * bytes_count) for count in range(bytes_count - 1, -1, -1): byte_array[count] = uint & 0xff uint >>= 8 raw_bytes = byte_array.tostring() if chunk_size > 0: # Bounds checking. We're not doing this up-front because the # most common use case is not specifying a chunk size. In the worst # case, the number will already have been converted to bytes above. length = len(raw_bytes) bytes_needed = bytes_count if bytes_needed > chunk_size: raise OverflowError( "Need %d bytes for number, but chunk size is %d" % (bytes_needed, chunk_size) ) remainder = length % chunk_size if remainder: raw_bytes = (chunk_size - remainder) * ZERO_BYTE + raw_bytes return raw_bytes
def bytes_to_uint_naive(raw_bytes, _zero_byte=ZERO_BYTE): """ Converts bytes (base-256 representation) to integer:: bytes_to_integer(bytes) : integer This is (essentially) the inverse of integer_to_bytes(). Encode your Unicode strings to a byte encoding before converting them. .. WARNING: Does not preserve leading zero bytes. :param raw_bytes: Raw bytes (base-256 representation). :returns: Integer. """ if not is_bytes(raw_bytes): raise TypeError("argument must be raw bytes: got %r" % type(raw_bytes).__name__) length = len(raw_bytes) remainder = length % 4 if remainder: # Ensure we have a length that is a multiple of 4 by prefixing # sufficient zero padding. padding_size = 4 - remainder length += padding_size raw_bytes = _zero_byte * padding_size + raw_bytes # Now unpack integers and accumulate. int_value = 0 for i in range(0, length, 4): chunk = raw_bytes[i:i + 4] int_value = (int_value << 32) + unpack('>I', chunk)[0] return int_value
def combinations_with_replacement(iterable, r): """ Return `r` length sub-sequences of elements from the `iterable` allowing individual elements to be replaced more than once. Combinations are emitted in lexicographic sort order. So, if the input `iterable` is sorted, the combinations tuples will be produced in sorted order. Elements are treated as unique based on their position, not on their value. So if the input elements are unique, the generated combinations will also be unique. The number of items returned is ``(n + r - 1)! / r! / (n - 1)!`` when ``n > 0``. .. note:: Software and documentation for this function are taken from CPython, :ref:`license details <psf-license>`. """ pool = tuple(iterable) n = len(pool) for indices in product(range(n), repeat=r): if sorted(indices) == list(indices): yield tuple(pool[i] for i in indices)
def uint_to_bytes_array_based(number, chunk_size=0): """ Convert a integer to bytes (base-256 representation):: integer_to_bytes(n:int, chunk_size:int) : string .. WARNING: Does not preserve leading zeros if you don't specify a chunk size. :param number: Integer value :param chunk_size: If optional chunk size is given and greater than zero, pad the front of the byte string with binary zeros so that the length is a multiple of ``chunk_size``. Raises an OverflowError if the chunk_size is not sufficient to represent the integer. :returns: Raw bytes (base-256 representation). :raises: ``OverflowError`` when block_size is given and the number takes up more bytes than fit into the block. """ # Machine word aligned byte array based implementation. if number < 0: raise ValueError('Number must be unsigned integer: %d' % number) raw_bytes = EMPTY_BYTE if not number: raw_bytes = ZERO_BYTE # Align packing to machine word size. num = number word_bits, word_bytes, max_uint, pack_type = get_word_alignment(num) pack_format = ">" + pack_type temp_buffer = array("B", [0] * word_bytes) byte_array = array("B", raw_bytes) while num > 0: pack_into(pack_format, temp_buffer, 0, num & max_uint) byte_array = temp_buffer + byte_array num >>= word_bits # Count the number of zero prefix bytes. zero_leading = 0 length = len(byte_array) for zero_leading in range(length): if byte_array[zero_leading]: break raw_bytes = byte_array[zero_leading:].tostring() if chunk_size > 0: # Bounds checking. We're not doing this up-front because the # most common use case is not specifying a chunk size. In the worst # case, the number will already have been converted to bytes above. length = len(raw_bytes) if length > chunk_size: raise OverflowError( "Need %d bytes for number, but chunk size is %d" % (length, chunk_size) ) remainder = length % chunk_size if remainder: raw_bytes = (chunk_size - remainder) * ZERO_BYTE + raw_bytes return raw_bytes
def _pure_pow_mod(base, power, modulus): """ Calculates: base**pow mod modulus Uses multi bit scanning with nBitScan bits at a time. From Bryan G. Olson's post to comp.lang.python Does left-to-right instead of pow()'s right-to-left, thus about 30% faster than the python built-in with small bases :param base: Base :param power: Power :param modulus: Modulus :returns: base**pow mod modulus """ n_bit_scan = 5 #TREV - Added support for negative exponents negative_result = False if power < 0: power *= -1 negative_result = True #exp2 = 2**n_bit_scan exp2 = 1 << n_bit_scan mask = exp2 - 1 # Break power into a list of digits of nBitScan bits. # The list is recursive so easy to read in reverse direction. nibbles = None while power: nibbles = int(power & mask), nibbles power >>= n_bit_scan # Make a table of powers of base up to 2**nBitScan - 1 low_powers = [1] for i in range(1, exp2): low_powers.append((low_powers[i-1] * base) % modulus) # To exponentiate by the first nibble, look it up in the table nib, nibbles = nibbles prod = low_powers[nib] # For the rest, square nBitScan times, then multiply by # base^nibble while nibbles: nib, nibbles = nibbles for i in range(n_bit_scan): prod = (prod * prod) % modulus if nib: prod = (prod * low_powers[nib]) % modulus #TREV - Added support for negative exponents if negative_result: prodInv = inverse_mod(prod, modulus) #Check to make sure the inverse is correct assert (prod * prodInv) % modulus == 1 return prodInv return prod
def b85decode(encoded, prefix=None, suffix=None, _base85_ords=ASCII85_ORDS, _base85_chars=ASCII85_CHARS, _ignore_pattern=WHITESPACE_PATTERN, _uncompact_zero=True, _compact_char=ZERO_GROUP_CHAR): """ Decodes an ASCII85-encoded string into raw bytes. :param encoded: Encoded ASCII string. :param prefix: The prefix used by the encoded text. None by default. :param suffix: The suffix used by the encoded text. None by default. :param _ignore_pattern: (Internal) By default all whitespace is ignored. This must be an ``re.compile()`` instance. You should not need to use this. :param _base85_ords: (Internal) A function to convert a base85 character to its ordinal value. You should not need to use this. :param _uncompact_zero: (Internal) Treats 'z' (a zero-group (\x00\x00\x00\x00)) as a '!!!!!' if ``True`` (default). :param _compact_char: (Internal) Character used to represent compact groups ('z' default) :returns: ASCII85-decoded raw bytes. """ prefix = prefix or b("") suffix = suffix or b("") if not (is_bytes(prefix) and is_bytes(suffix)): raise TypeError( "Prefix/suffix must be bytes: got prefix %r, %r" % (type(prefix).__name__, type(suffix).__name__) ) if not is_bytes(_compact_char): raise TypeError("compat character must be raw byte: got %r" % type(_compact_char).__name__) if not is_bytes(encoded): raise TypeError( "Encoded sequence must be bytes: got %r" % type(encoded).__name__ ) # ASCII-85 ignores whitespace. if _ignore_pattern: encoded = re.sub(_ignore_pattern, b(''), encoded) # Strip the prefix and suffix. if prefix and encoded.startswith(prefix): encoded = encoded[len(prefix):] if suffix and encoded.endswith(suffix): encoded = encoded[:-len(suffix)] # Replace all the 'z' occurrences with '!!!!!' if _uncompact_zero: _check_compact_char_occurrence(encoded, _compact_char, 5) encoded = encoded.replace(_compact_char, EXCLAMATION_CHUNK) # We want 5-tuple chunks, so pad with as many base85_ord == 84 characters # as required to satisfy the length. length = len(encoded) num_uint32s, remainder = divmod(length, 5) if remainder: padding_character = _base85_chars[84] # b'u' for ASCII85. padding_size = 5 - remainder encoded += padding_character * padding_size num_uint32s += 1 length += padding_size else: padding_size = 0 uint32s = [] #for chunk in chunks(encoded, 5): for i in range(0, length, 5): v, w, x, y, z = chunk = encoded[i:i+5] # uint32_value = 0 # try: # for char in chunk: # uint32_value = uint32_value * 85 + _base85_ords[char] # except KeyError: # raise OverflowError("Cannot decode chunk `%r`" % chunk) # Above loop unrolled: try: uint32_value = ((((_base85_ords[v] * 85 + _base85_ords[w]) * 85 + _base85_ords[x]) * 85 + _base85_ords[y]) * 85 + _base85_ords[z]) except KeyError: # Showing the chunk provides more context, which makes debugging # easier. raise OverflowError("Cannot decode chunk `%r`" % chunk) # Groups of characters that decode to a value greater than 2**32 − 1 # (encoded as "s8W-!") will cause a decoding error. if uint32_value > UINT32_MAX: # 2**32 - 1 raise OverflowError("Cannot decode chunk `%r`" % chunk) uint32s.append(uint32_value) raw_bytes = pack(">" + "L" * num_uint32s, *uint32s) if padding_size: # Only as much padding added before decoding is removed after decoding. raw_bytes = raw_bytes[:-padding_size] return raw_bytes
"ASCII85_PREFIX", "ASCII85_SUFFIX", "ipv6_b85encode", "ipv6_b85decode", ] EXCLAMATION_CHUNK = b('!!!!!') ZERO_GROUP_CHAR = b('z') # Use this if you want the base85 codec to encode/decode including # ASCII85 prefixes/suffixes. ASCII85_PREFIX = b('<~') ASCII85_SUFFIX = b('~>') # ASCII85 characters. ASCII85_BYTES = array('B', [(num + 33) for num in range(85)]) # I've left this approach in here to warn you to NOT use it. # This results in a massive amount of calls to byte_ord inside # tight loops. Don't use the array. Use the dictionary. It # removes the need to convert to ords at runtime. #ASCII85_ORDS = array('B', [255] * 128) #for ordinal, _byte in enumerate(ASCII85_BYTES): # ASCII85_ORDS[_byte] = ordinal # http://tools.ietf.org/html/rfc1924 RFC1924_BYTES = array('B', (string.DIGITS + string.ASCII_UPPERCASE + string.ASCII_LOWERCASE + "!#$%&()*+-;<=>?@^_`{|}~").encode("ascii"))
def _b85decode_chunks(encoded, base85_bytes, base85_ords): """ Base-85 decodes. :param encoded: Encoded ASCII string. :param base85_bytes: Character set to use. :param base85_ords: A function to convert a base85 character to its ordinal value. You should not need to use this. :returns: Base-85-decoded raw bytes. """ # We want 5-tuple chunks, so pad with as many base85_ord == 84 characters # as required to satisfy the length. length = len(encoded) num_uint32s, remainder = divmod(length, 5) if remainder: padding_byte = byte(base85_bytes[84]) # 'u' (ASCII85); '~' (RFC1924) padding_size = 5 - remainder encoded += padding_byte * padding_size num_uint32s += 1 length += padding_size else: padding_size = 0 #uint32s = [0] * num_uint32s uint32s = array('I', [0] * num_uint32s) j = 0 chunk = EMPTY_BYTE try: for i in range(0, length, 5): chunk = encoded[i:i + 5] # uint32_value = 0 # for char in chunk: # uint32_value = uint32_value * 85 + _base85_ords[char] # Above loop unrolled: uint32_value = ((((base85_ords[chunk[0]] * 85 + base85_ords[chunk[1]]) * 85 + base85_ords[chunk[2]]) * 85 + base85_ords[chunk[3]]) * 85 + base85_ords[chunk[4]]) # I've left this approach in here to warn you to NOT use it. # This results in a massive amount of calls to byte_ord inside # tight loops. # uint32_value = ((((base85_ords[byte_ord(chunk[0])] * # 85 + base85_ords[byte_ord(chunk[1])]) * # 85 + base85_ords[byte_ord(chunk[2])]) * # 85 + base85_ords[byte_ord(chunk[3])]) * # 85 + base85_ords[byte_ord(chunk[4])]) # Groups of characters that decode to a value greater than 2**32 − 1 # (encoded as "s8W-!") will cause a decoding error. Bad byte? if uint32_value > UINT32_MAX: # 2**32 - 1 raise OverflowError("Cannot decode chunk `%r`" % chunk) uint32s[j] = uint32_value j += 1 except KeyError: raise OverflowError("Cannot decode chunk `%r`" % chunk) raw_bytes = pack(">" + "L" * num_uint32s, *uint32s) if padding_size: # Only as much padding added before decoding is removed after decoding. raw_bytes = raw_bytes[:-padding_size] return raw_bytes
:param char: Base85 character :returns: Ordinal value. """ return ord(char) - 33 # Use this if you want the base85 codec to encode/decode including # ASCII85 prefixes/suffixes. ASCII85_PREFIX = b('<~') ASCII85_SUFFIX = b('~>') WHITESPACE_PATTERN = re.compile(b(r'(\s)*'), re.MULTILINE) # ASCII85 characters. ASCII85_CHARS = tuple(_ascii85_chr(num) for num in range(85)) #"".join(map(_ascii85_chr, range(85))) if have_python3: # Python 3 bytes when indexed return integers, not single-character # byte strings. ASCII85_ORDS = dict((ord(x), _ascii85_ord(x)) for x in ASCII85_CHARS) else: # Python 2 bytes when index return single-character byte strings. ASCII85_ORDS = dict((x, _ascii85_ord(x)) for x in ASCII85_CHARS) # http://tools.ietf.org/html/rfc1924 RFC1924_CHARS = (string.digits + string.ascii_uppercase + string.ascii_lowercase + "!#$%&()*+-;<=>?@^_`{|}~").encode("latin1")