def merge_with(func, *dicts): """ Merge dictionaries and apply function to combined values A key may occur in more than one dict, and all values mapped from the key will be passed to the function as a list, such as func([val1, val2, ...]). >>> merge_with(sum, {1: 1, 2: 2}, {1: 10, 2: 20}) {1: 11, 2: 22} >>> merge_with(first, {1: 1, 2: 2}, {2: 20, 3: 30}) # doctest: +SKIP {1: 1, 2: 2, 3: 30} See Also: merge """ if len(dicts) == 1 and not isinstance(dicts[0], dict): dicts = dicts[0] result = {} for d in dicts: for k, v in iteritems(d): if k not in result: result[k] = [v] else: result[k].append(v) return dict((k, func(v)) for k, v in iteritems(result))
def test_dict_iteration(): d = {'a': 1, 'b': 2, 'c': 3} assert not isinstance(iteritems(d), list) assert not isinstance(iterkeys(d), list) assert not isinstance(itervalues(d), list) assert set(iteritems(d)) == set(d.items()) assert set(iterkeys(d)) == set(d.keys()) assert set(itervalues(d)) == set(d.values())
def groupby(key, seq): """ Group a collection by a key function >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank'] >>> groupby(len, names) # doctest: +SKIP {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']} >>> iseven = lambda x: x % 2 == 0 >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8]) # doctest: +SKIP {False: [1, 3, 5, 7], True: [2, 4, 6, 8]} Non-callable keys imply grouping on a member. >>> groupby('gender', [{'name': 'Alice', 'gender': 'F'}, ... {'name': 'Bob', 'gender': 'M'}, ... {'name': 'Charlie', 'gender': 'M'}]) # doctest:+SKIP {'F': [{'gender': 'F', 'name': 'Alice'}], 'M': [{'gender': 'M', 'name': 'Bob'}, {'gender': 'M', 'name': 'Charlie'}]} See Also: countby """ if not callable(key): key = getter(key) d = collections.defaultdict(lambda: [].append) for item in seq: d[key(item)](item) rv = {} for k, v in iteritems(d): rv[k] = v.__self__ return rv
def merge_frequencies(seqs): first, rest = seqs[0], seqs[1:] if not rest: return first out = defaultdict(int) out.update(first) for d in rest: for k, v in iteritems(d): out[k] += v return out
def _unify(u, v, s): if len(u) != len(v): return False for key, uval in iteritems(u): if key not in v: return False s = unify(uval, v[key], s) if s is False: return False return s
def _unify(u, v, s): # assert isinstance(u, dict) and isinstance(v, dict) if len(u) != len(v): return False for key, uval in iteritems(u): if key not in v: return False s = unify(uval, v[key], s) if s is False: return False return s
def itemmap(func, d): """ Apply function to items of dictionary >>> accountids = {"Alice": 10, "Bob": 20} >>> itemmap(reversed, accountids) # doctest: +SKIP {10: "Alice", 20: "Bob"} See Also: keymap valmap """ return dict(map(func, iteritems(d)))
def itemmap(func, d, factory=dict): """ Apply function to items of dictionary >>> accountids = {"Alice": 10, "Bob": 20} >>> itemmap(reversed, accountids) # doctest: +SKIP {10: "Alice", 20: "Bob"} See Also: keymap valmap """ rv = factory() rv.update(map(func, iteritems(d))) return rv
def valfilter(predicate, d): """ Filter items in dictionary by value >>> iseven = lambda x: x % 2 == 0 >>> d = {1: 2, 2: 3, 3: 4, 4: 5} >>> valfilter(iseven, d) {1: 2, 3: 4} See Also: keyfilter valmap """ rv = {} for k, v in iteritems(d): if predicate(v): rv[k] = v return rv
def keyfilter(predicate, d): """ Filter items in dictionary by key >>> iseven = lambda x: x % 2 == 0 >>> d = {1: 2, 2: 3, 3: 4, 4: 5} >>> keyfilter(iseven, d) {2: 3, 4: 5} See Also: valfilter keymap """ rv = {} for k, v in iteritems(d): if predicate(k): rv[k] = v return rv
def groupby(func, seq): """ Group a collection by a key function >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank'] >>> groupby(len, names) {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']} >>> iseven = lambda x: x % 2 == 0 >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8]) {False: [1, 3, 5, 7], True: [2, 4, 6, 8]} See Also: countby """ d = collections.defaultdict(lambda: [].append) for item in seq: d[func(item)](item) rv = {} for k, v in iteritems(d): rv[k] = v.__self__ return rv
def unpack_kwargs(kwargs): """ Extracts dask values from kwargs Currently only dask.bag.Item and python literal values are supported. Returns a merged dask graph and a list of [key, val] pairs suitable for eventually constructing a dict. """ dsk = {} kw_pairs = [] for key, val in iteritems(kwargs): if isinstance(val, Item): dsk.update(val.dask) val = val.key # TODO elif isinstance(val, Value): elif isinstance(val, Base): raise NotImplementedError( '%s not supported as kwarg value to Bag.map_partitions' % type(val).__name__) kw_pairs.append([key, val]) return dsk, kw_pairs
def itemfilter(predicate, d, factory=dict): """ Filter items in dictionary by item >>> def isvalid(item): ... k, v = item ... return k % 2 == 0 and v < 4 >>> d = {1: 2, 2: 3, 3: 4, 4: 5} >>> itemfilter(isvalid, d) {2: 3} See Also: keyfilter valfilter itemmap """ rv = factory() for item in iteritems(d): if predicate(item): k, v = item rv[k] = v return rv
def join(leftkey, leftseq, rightkey, rightseq, left_default=no_default, right_default=no_default): """ Join two sequences on common attributes This is a semi-streaming operation. The LEFT sequence is fully evaluated and placed into memory. The RIGHT sequence is evaluated lazily and so can be arbitrarily large. (Note: If right_default is defined, then unique keys of rightseq will also be stored in memory.) >>> friends = [('Alice', 'Edith'), ... ('Alice', 'Zhao'), ... ('Edith', 'Alice'), ... ('Zhao', 'Alice'), ... ('Zhao', 'Edith')] >>> cities = [('Alice', 'NYC'), ... ('Alice', 'Chicago'), ... ('Dan', 'Syndey'), ... ('Edith', 'Paris'), ... ('Edith', 'Berlin'), ... ('Zhao', 'Shanghai')] >>> # Vacation opportunities >>> # In what cities do people have friends? >>> result = join(second, friends, ... first, cities) >>> for ((a, b), (c, d)) in sorted(unique(result)): ... print((a, d)) ('Alice', 'Berlin') ('Alice', 'Paris') ('Alice', 'Shanghai') ('Edith', 'Chicago') ('Edith', 'NYC') ('Zhao', 'Chicago') ('Zhao', 'NYC') ('Zhao', 'Berlin') ('Zhao', 'Paris') Specify outer joins with keyword arguments ``left_default`` and/or ``right_default``. Here is a full outer join in which unmatched elements are paired with None. >>> identity = lambda x: x >>> list(join(identity, [1, 2, 3], ... identity, [2, 3, 4], ... left_default=None, right_default=None)) [(2, 2), (3, 3), (None, 4), (1, None)] Usually the key arguments are callables to be applied to the sequences. If the keys are not obviously callable then it is assumed that indexing was intended, e.g. the following is a legal change. The join is implemented as a hash join and the keys of leftseq must be hashable. Additionally, if right_default is defined, then keys of rightseq must also be hashable. >>> # result = join(second, friends, first, cities) >>> result = join(1, friends, 0, cities) # doctest: +SKIP """ if not callable(leftkey): leftkey = getter(leftkey) if not callable(rightkey): rightkey = getter(rightkey) d = groupby(leftkey, leftseq) if left_default == no_default and right_default == no_default: # Inner Join for item in rightseq: key = rightkey(item) if key in d: for left_match in d[key]: yield (left_match, item) elif left_default != no_default and right_default == no_default: # Right Join for item in rightseq: key = rightkey(item) if key in d: for left_match in d[key]: yield (left_match, item) else: yield (left_default, item) elif right_default != no_default: seen_keys = set() seen = seen_keys.add if left_default == no_default: # Left Join for item in rightseq: key = rightkey(item) seen(key) if key in d: for left_match in d[key]: yield (left_match, item) else: # Full Join for item in rightseq: key = rightkey(item) seen(key) if key in d: for left_match in d[key]: yield (left_match, item) else: yield (left_default, item) for key, matches in iteritems(d): if key not in seen_keys: for match in matches: yield (match, right_default)