def test_peek(): alist = ["Alice", "Bob", "Carol"] element, blist = peek(alist) element == alist[0] assert list(blist) == alist assert raises(StopIteration, lambda: peek([]))
def _(xs): try: _, xs = peek(xs) for chunk in partition_all(max_records_per_trie, xs): yield _dawg_with_bounds(chunk) except StopIteration: pass
def _dawg_with_bounds(xs, dawg_impl=dawg.IntDAWG): """ :param xs: Iterable[Tuple[str, T]] where T is a type acceptable as a key for dawg_impl :param dawg_impl: Type[dawg.DAWG] """ try: x, xs = peek(xs) has_index = False if isinstance(x, str) else True min_, max_ = (x[0], x[0]) if has_index else (x, x) except StopIteration: has_index = False min_, max_ = None, None xs = [] def _with_stats(xs): nonlocal min_ nonlocal max_ for x in xs: token = x[0] if has_index else x if x is not None: min_ = token if token < min_ else min_ max_ = token if token > max_ else max_ yield x dawg_ = dawg_impl(_with_stats(xs)) return min_, max_, dawg_
def _lookup_from_local(xs, has_index, sort, partition_mapper): """ Build lookup / bounds list from local collection """ try: x, xs = peek(xs) assert has_index or isinstance(x, str) except StopIteration: xs = [] return pipe( DawgLookup._prepare_local(xs, has_index, sort), partition_mapper, list )
def simple_partition_mapper(xs): try: _, xs = peek(xs) yield _dawg_with_bounds(xs) except StopIteration: pass
def disperse(seq): """ Similar to range except that it recursively proceeds through the given range in such a way that values that follow each other are preferably not only non-sequential, but fairly different. This does not always work with small ranges, but works nicely with large ranges. Args: a(int): the lower bound of the range b(int): the upper bound of the range Returns: result(generator): a generator that can be used to iterate through the sequence. Examples: >>> list(disperse(range(10))) [0, 5, 8, 3, 9, 4, 6, 1, 7, 2] """ try: len_seq = len(seq) except TypeError: seq, len_seq = itertools.tee(seq) len_seq = count(len_seq) def disperse_helper(b, part_seq_1): if b != 0: half_diff = float(b) / 2.0 mid_1 = int(math.floor(half_diff)) mid_2 = int(math.ceil(half_diff)) if 0 < mid_1 and b > mid_2: part_seq_1, part_seq_2 = itertools.tee(part_seq_1) front_mid_1_seq, mid_1_val, _ = split(mid_1, part_seq_1) _, mid_2_val, back_mid_2_seq = split(mid_2, part_seq_2) del _ mid_2_val = itertools.tee(mid_2_val) back_mid_2_seq = concat([mid_2_val[0], back_mid_2_seq]) mid_2_val = mid_2_val[1] yield (first(mid_2_val)) for _1, _2 in zip(disperse_helper(mid_1 - 0, front_mid_1_seq), disperse_helper(b - mid_2, back_mid_2_seq)): yield (_2) yield (_1) if mid_1 != mid_2: yield (first(mid_1_val)) if len_seq == 0: return val, seq = peek(seq) yield (val) for each in disperse_helper(len_seq, seq): yield (each)