def fold(binop, seq, default=no_default, map=map, chunksize=128, combine=None): """ Reduce without guarantee of ordered reduction. inputs: ``binop`` - associative operator. The associative property allows us to leverage a parallel map to perform reductions in parallel. ``seq`` - a sequence to be aggregated ``default`` - an identity element like 0 for ``add`` or 1 for mul ``map`` - an implementation of ``map``. This may be parallel and determines how work is distributed. ``chunksize`` - Number of elements of ``seq`` that should be handled within a single function call ``combine`` - Binary operator to combine two intermediate results. If ``binop`` is of type (total, item) -> total then ``combine`` is of type (total, total) -> total Defaults to ``binop`` for common case of operators like add Fold chunks up the collection into blocks of size ``chunksize`` and then feeds each of these to calls to ``reduce``. This work is distributed with a call to ``map``, gathered back and then refolded to finish the computation. In this way ``fold`` specifies only how to chunk up data but leaves the distribution of this work to an externally provided ``map`` function. This function can be sequential or rely on multithreading, multiprocessing, or even distributed solutions. If ``map`` intends to serialize functions it should be prepared to accept and serialize lambdas. Note that the standard ``pickle`` module fails here. Example ------- >>> # Provide a parallel map to accomplish a parallel sum >>> from operator import add >>> fold(add, [1, 2, 3, 4], chunksize=2, map=map) 10 """ if combine is None: combine = binop chunks = partition_all(chunksize, seq) # Evaluate sequence in chunks via map if default is no_default: results = map(lambda chunk: reduce(binop, chunk), chunks) else: results = map(lambda chunk: reduce(binop, chunk, default), chunks) results = list(results) # TODO: Support complete laziness if len(results) == 1: # Return completed result return results[0] else: # Recurse to reaggregate intermediate results return fold(combine, results, map=map, chunksize=chunksize)
def get_in(keys, coll, default=None, no_default=False): """ Returns coll[i0][i1]...[iX] where [i0, i1, ..., iX]==keys. If coll[i0][i1]...[iX] cannot be found, returns ``default``, unless ``no_default`` is specified, then it raises KeyError or IndexError. ``get_in`` is a generalization of ``operator.getitem`` for nested data structures such as dictionaries and lists. >>> transaction = {'name': 'Alice', ... 'purchase': {'items': ['Apple', 'Orange'], ... 'costs': [0.50, 1.25]}, ... 'credit card': '5555-1234-1234-1234'} >>> get_in(['purchase', 'items', 0], transaction) 'Apple' >>> get_in(['name'], transaction) 'Alice' >>> get_in(['purchase', 'total'], transaction) >>> get_in(['purchase', 'items', 'apple'], transaction) >>> get_in(['purchase', 'items', 10], transaction) >>> get_in(['purchase', 'total'], transaction, 0) 0 >>> get_in(['y'], {}, no_default=True) # doctest: +ELLIPSIS Traceback (most recent call last): ... KeyError: 'y' >>> class Person(): ... def __init__(self, age): ... self.age = age >>> class Age(): ... def __init__(self, years, days): ... self.years = years ... self.days = days >>> alice = Person(Age(30, 100)) >>> get_in(['age', 'days'], alice) 100 >>> get_in(['age', 'hours'], alice) >>> get_in(['age', 'hours'], alice, 4) 4 >>> get_in(['occupation'], alice, no_default=True) Traceback (most recent call last): ... AttributeError: Person instance has no attribute 'occupation' See Also: itertoolz.get operator.getitem """ def get(d, key): return d[key] if hasattr(d, '__getitem__') else getattr(d, key) try: return reduce(get, keys, coll) except (KeyError, IndexError, AttributeError, TypeError): if no_default: raise return default
def select_keys(d, keys): """ Returns a new dictionary containing only specified keys. The collection keys is converted to a set, so any compatible collection will work. >>> select_keys({'a':1,'b':2}, ['a']) {'a': 1} """ def mutable_assoc(nd,k): nd[k] = d[k] return nd isec = set(d).intersection(set(keys)) return reduce(mutable_assoc, isec, {})
def get_in(keys, coll, default=None, no_default=False): """ Returns coll[i0][i1]...[iX] where [i0, i1, ..., iX]==keys. If coll[i0][i1]...[iX] cannot be found, returns ``default``, unless ``no_default`` is specified, then it raises KeyError or IndexError. ``get_in`` is a generalization of ``operator.getitem`` for nested data structures such as dictionaries and lists. >>> transaction = {'name': 'Alice', ... 'purchase': {'items': ['Apple', 'Orange'], ... 'costs': [0.50, 1.25]}, ... 'credit card': '5555-1234-1234-1234'} >>> get_in(['purchase', 'items', 0], transaction) 'Apple' >>> get_in(['name'], transaction) 'Alice' >>> get_in(['purchase', 'total'], transaction) >>> get_in(['purchase', 'items', 'apple'], transaction) >>> get_in(['purchase', 'items', 10], transaction) >>> get_in(['purchase', 'total'], transaction, 0) 0 >>> get_in(['y'], {}, no_default=True) Traceback (most recent call last): ... KeyError: 'y' >>> class C: ... def __init__(self, x): ... self.x = x >>> a = C(C(1)) >>> get_in(['x', 'x'], a) 1 >>> get_in(['x', 'b'], a, 2) 2 See Also: itertoolz.get operator.getitem """ reducer = flip( partial(get, default=(utils.no_default if no_default else default))) return reduce(reducer, keys, coll)
def get_in(keys, coll, default=None, no_default=False): """ Returns coll[i0][i1]...[iX] where [i0, i1, ..., iX]==keys. If coll[i0][i1]...[iX] cannot be found, returns ``default``, unless ``no_default`` is specified, then it raises KeyError or IndexError. ``get_in`` is a generalization of ``operator.getitem`` for nested data structures such as dictionaries and lists. >>> transaction = {'name': 'Alice', ... 'purchase': {'items': ['Apple', 'Orange'], ... 'costs': [0.50, 1.25]}, ... 'credit card': '5555-1234-1234-1234'} >>> get_in(['purchase', 'items', 0], transaction) 'Apple' >>> get_in(['name'], transaction) 'Alice' >>> get_in(['purchase', 'total'], transaction) >>> get_in(['purchase', 'items', 'apple'], transaction) >>> get_in(['purchase', 'items', 10], transaction) >>> get_in(['purchase', 'total'], transaction, 0) 0 >>> get_in(['y'], {}, no_default=True) Traceback (most recent call last): ... KeyError: 'y' See Also: itertoolz.get operator.getitem """ try: return reduce(operator.getitem, keys, coll) except (KeyError, IndexError, TypeError): if no_default: raise return default