def test_EqualityHashKey_default_key(): EqualityHashDefault = curry(EqualityHashKey, None) L1 = [1] L2 = [2] data1 = [L1, L1, L2, [], [], [1], [2], {}, ()] set1 = set(map(EqualityHashDefault, data1)) set2 = set(map(EqualityHashDefault, [[], [1], [2], {}, ()])) assert set1 == set2 assert len(set1) == 5 # Test that ``EqualityHashDefault(item)`` is distinct from ``item`` T0 = () T1 = (1, ) data2 = list(map(EqualityHashDefault, [T0, T0, T1, T1, (), (1, )])) data2.extend([T0, T1, (), (1, )]) set3 = set(data2) assert set3 == {(), (1, ), EqualityHashDefault(()), EqualityHashDefault((1, ))} assert len(set3) == 4 assert EqualityHashDefault(()) in set3 assert EqualityHashDefault((1, )) in set3 # Miscellaneous E1 = EqualityHashDefault(L1) E2 = EqualityHashDefault(L2) assert str(E1) == '=[1]=' assert repr(E1) == '=[1]=' assert E1 != E2 assert not (E1 == E2) assert E1 == EqualityHashDefault(L1) assert not (E1 != EqualityHashDefault(L1)) assert E1 != L1 assert not (E1 == L1)
def fold(binop, seq, default=no_default, map=map, chunksize=128, combine=None): """ Reduce without guarantee of ordered reduction. inputs: ``binop`` - associative operator. The associative property allows us to leverage a parallel map to perform reductions in parallel. ``seq`` - a sequence to be aggregated ``default`` - an identity element like 0 for ``add`` or 1 for mul ``map`` - an implementation of ``map``. This may be parallel and determines how work is distributed. ``chunksize`` - Number of elements of ``seq`` that should be handled within a single function call ``combine`` - Binary operator to combine two intermediate results. If ``binop`` is of type (total, item) -> total then ``combine`` is of type (total, total) -> total Defaults to ``binop`` for common case of operators like add Fold chunks up the collection into blocks of size ``chunksize`` and then feeds each of these to calls to ``reduce``. This work is distributed with a call to ``map``, gathered back and then refolded to finish the computation. In this way ``fold`` specifies only how to chunk up data but leaves the distribution of this work to an externally provided ``map`` function. This function can be sequential or rely on multithreading, multiprocessing, or even distributed solutions. If ``map`` intends to serialize functions it should be prepared to accept and serialize lambdas. Note that the standard ``pickle`` module fails here. Example ------- >>> # Provide a parallel map to accomplish a parallel sum >>> from operator import add >>> fold(add, [1, 2, 3, 4], chunksize=2, map=map) 10 """ if combine is None: combine = binop chunks = partition_all(chunksize, seq) # Evaluate sequence in chunks via map if default == no_default: results = map(lambda chunk: reduce(binop, chunk), chunks) else: results = map(lambda chunk: reduce(binop, chunk, default), chunks) results = list(results) # TODO: Support complete laziness if len(results) == 1: # Return completed result return results[0] else: # Recurse to reaggregate intermediate results return fold(combine, results, map=map, chunksize=chunksize)
def pluck(ind, seqs, default=no_default): """ plucks an element or several elements from each item in a sequence. ``pluck`` maps ``itertoolz.get`` over a sequence and returns one or more elements of each item in the sequence. This is equivalent to running `map(curried.get(ind), seqs)` ``ind`` can be either a single string/index or a list of strings/indices. ``seqs`` should be sequence containing sequences or dicts. e.g. >>> data = [{'id': 1, 'name': 'Cheese'}, {'id': 2, 'name': 'Pies'}] >>> list(pluck('name', data)) ['Cheese', 'Pies'] >>> list(pluck([0, 1], [[1, 2, 3], [4, 5, 7]])) [(1, 2), (4, 5)] See Also: get map """ if default == no_default: get = getter(ind) return map(get, seqs) elif isinstance(ind, list): return (tuple(_get(item, seq, default) for item in ind) for seq in seqs) return (_get(ind, seq, default) for seq in seqs)
def _to_lists(seq, n=10): """iter of iters -> finite list of finite lists """ def initial(s): return list(take(n, s)) return initial(map(initial, seq))
def mapcat(func, seqs): """ Apply func to each sequence in seqs, concatenating results. >>> list(mapcat(lambda s: [c.upper() for c in s], ... [["a", "b"], ["c", "d", "e"]])) ['A', 'B', 'C', 'D', 'E'] """ return concat(map(func, seqs))
def interleave(seqs): """ Interleave a sequence of sequences >>> list(interleave([[1, 2], [3, 4]])) [1, 3, 2, 4] >>> ''.join(interleave(('ABC', 'XY'))) 'AXBYC' Both the individual sequences and the sequence of sequences may be infinite Returns a lazy iterator """ iters = itertools.cycle(map(iter, seqs)) while True: try: for itr in iters: yield next(itr) return except StopIteration: predicate = partial(operator.is_not, itr) iters = itertools.cycle(itertools.takewhile(predicate, iters))
def diff(*seqs, **kwargs): """ Return those items that differ between sequences >>> list(diff([1, 2, 3], [1, 2, 10, 100])) [(3, 10)] Shorter sequences may be padded with a ``default`` value: >>> list(diff([1, 2, 3], [1, 2, 10, 100], default=None)) [(3, 10), (None, 100)] A ``key`` function may also be applied to each item to use during comparisons: >>> list(diff(['apples', 'bananas'], ['Apples', 'Oranges'], key=str.lower)) [('bananas', 'Oranges')] """ N = len(seqs) if N == 1 and isinstance(seqs[0], list): seqs = seqs[0] N = len(seqs) if N < 2: raise TypeError('Too few sequences given (min 2 required)') default = kwargs.get('default', no_default) if default == no_default: iters = zip(*seqs) else: iters = zip_longest(*seqs, fillvalue=default) key = kwargs.get('key', None) if key is None: for items in iters: if items.count(items[0]) != N: yield items else: for items in iters: vals = tuple(map(key, items)) if vals.count(vals[0]) != N: yield items
def test_map_filter_are_lazy(): def bad(x): raise Exception() map(bad, [1, 2, 3]) filter(bad, [1, 2, 3])