Ejemplo n.º 1
0
def merge_with(func, *dicts):
    """ Merge dictionaries and apply function to combined values

    A key may occur in more than one dict, and all values mapped from the key
    will be passed to the function as a list, such as func([val1, val2, ...]).

    >>> merge_with(sum, {1: 1, 2: 2}, {1: 10, 2: 20})
    {1: 11, 2: 22}

    >>> merge_with(first, {1: 1, 2: 2}, {2: 20, 3: 30})  # doctest: +SKIP
    {1: 1, 2: 2, 3: 30}

    See Also:
        merge
    """
    if len(dicts) == 1 and not isinstance(dicts[0], dict):
        dicts = dicts[0]

    result = {}
    for d in dicts:
        for k, v in iteritems(d):
            if k not in result:
                result[k] = [v]
            else:
                result[k].append(v)
    return dict((k, func(v)) for k, v in iteritems(result))
Ejemplo n.º 2
0
def merge_with(func, *dicts):
    """ Merge dictionaries and apply function to combined values

    A key may occur in more than one dict, and all values mapped from the key
    will be passed to the function as a list, such as func([val1, val2, ...]).

    >>> merge_with(sum, {1: 1, 2: 2}, {1: 10, 2: 20})
    {1: 11, 2: 22}

    >>> merge_with(first, {1: 1, 2: 2}, {2: 20, 3: 30})  # doctest: +SKIP
    {1: 1, 2: 2, 3: 30}

    See Also:
        merge
    """
    if len(dicts) == 1 and not isinstance(dicts[0], dict):
        dicts = dicts[0]

    result = {}
    for d in dicts:
        for k, v in iteritems(d):
            if k not in result:
                result[k] = [v]
            else:
                result[k].append(v)
    return dict((k, func(v)) for k, v in iteritems(result))
Ejemplo n.º 3
0
def test_dict_iteration():
    d = {'a': 1, 'b': 2, 'c': 3}
    assert not isinstance(iteritems(d), list)
    assert not isinstance(iterkeys(d), list)
    assert not isinstance(itervalues(d), list)
    assert set(iteritems(d)) == set(d.items())
    assert set(iterkeys(d)) == set(d.keys())
    assert set(itervalues(d)) == set(d.values())
def test_dict_iteration():
    d = {'a': 1, 'b': 2, 'c': 3}
    assert not isinstance(iteritems(d), list)
    assert not isinstance(iterkeys(d), list)
    assert not isinstance(itervalues(d), list)
    assert set(iteritems(d)) == set(d.items())
    assert set(iterkeys(d)) == set(d.keys())
    assert set(itervalues(d)) == set(d.values())
Ejemplo n.º 5
0
def groupby(key, seq):
    """ Group a collection by a key function

    >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank']
    >>> groupby(len, names)  # doctest: +SKIP
    {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']}

    >>> iseven = lambda x: x % 2 == 0
    >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8])  # doctest: +SKIP
    {False: [1, 3, 5, 7], True: [2, 4, 6, 8]}

    Non-callable keys imply grouping on a member.

    >>> groupby('gender', [{'name': 'Alice', 'gender': 'F'},
    ...                    {'name': 'Bob', 'gender': 'M'},
    ...                    {'name': 'Charlie', 'gender': 'M'}]) # doctest:+SKIP
    {'F': [{'gender': 'F', 'name': 'Alice'}],
     'M': [{'gender': 'M', 'name': 'Bob'},
           {'gender': 'M', 'name': 'Charlie'}]}

    See Also:
        countby
    """
    if not callable(key):
        key = getter(key)
    d = collections.defaultdict(lambda: [].append)
    for item in seq:
        d[key(item)](item)
    rv = {}
    for k, v in iteritems(d):
        rv[k] = v.__self__
    return rv
Ejemplo n.º 6
0
def groupby(key, seq):
    """ Group a collection by a key function

    >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank']
    >>> groupby(len, names)  # doctest: +SKIP
    {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']}

    >>> iseven = lambda x: x % 2 == 0
    >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8])  # doctest: +SKIP
    {False: [1, 3, 5, 7], True: [2, 4, 6, 8]}

    Non-callable keys imply grouping on a member.

    >>> groupby('gender', [{'name': 'Alice', 'gender': 'F'},
    ...                    {'name': 'Bob', 'gender': 'M'},
    ...                    {'name': 'Charlie', 'gender': 'M'}]) # doctest:+SKIP
    {'F': [{'gender': 'F', 'name': 'Alice'}],
     'M': [{'gender': 'M', 'name': 'Bob'},
           {'gender': 'M', 'name': 'Charlie'}]}

    See Also:
        countby
    """
    if not callable(key):
        key = getter(key)
    d = collections.defaultdict(lambda: [].append)
    for item in seq:
        d[key(item)](item)
    rv = {}
    for k, v in iteritems(d):
        rv[k] = v.__self__
    return rv
Ejemplo n.º 7
0
def merge_frequencies(seqs):
    first, rest = seqs[0], seqs[1:]
    if not rest:
        return first
    out = defaultdict(int)
    out.update(first)
    for d in rest:
        for k, v in iteritems(d):
            out[k] += v
    return out
Ejemplo n.º 8
0
def _unify(u, v, s):
    if len(u) != len(v):
        return False
    for key, uval in iteritems(u):
        if key not in v:
            return False
        s = unify(uval, v[key], s)
        if s is False:
            return False
    return s
Ejemplo n.º 9
0
Archivo: core.py Proyecto: jcorbin/dask
def merge_frequencies(seqs):
    first, rest = seqs[0], seqs[1:]
    if not rest:
        return first
    out = defaultdict(int)
    out.update(first)
    for d in rest:
        for k, v in iteritems(d):
            out[k] += v
    return out
Ejemplo n.º 10
0
def _unify(u, v, s):
    # assert isinstance(u, dict) and isinstance(v, dict)
    if len(u) != len(v):
        return False
    for key, uval in iteritems(u):
        if key not in v:
            return False
        s = unify(uval, v[key], s)
        if s is False:
            return False
    return s
Ejemplo n.º 11
0
def _unify(u, v, s):
    # assert isinstance(u, dict) and isinstance(v, dict)
    if len(u) != len(v):
        return False
    for key, uval in iteritems(u):
        if key not in v:
            return False
        s = unify(uval, v[key], s)
        if s is False:
            return False
    return s
Ejemplo n.º 12
0
def itemmap(func, d):
    """ Apply function to items of dictionary

    >>> accountids = {"Alice": 10, "Bob": 20}
    >>> itemmap(reversed, accountids)  # doctest: +SKIP
    {10: "Alice", 20: "Bob"}

    See Also:
        keymap
        valmap
    """
    return dict(map(func, iteritems(d)))
Ejemplo n.º 13
0
def itemmap(func, d, factory=dict):
    """ Apply function to items of dictionary

    >>> accountids = {"Alice": 10, "Bob": 20}
    >>> itemmap(reversed, accountids)  # doctest: +SKIP
    {10: "Alice", 20: "Bob"}

    See Also:
        keymap
        valmap
    """
    rv = factory()
    rv.update(map(func, iteritems(d)))
    return rv
Ejemplo n.º 14
0
def itemmap(func, d, factory=dict):
    """ Apply function to items of dictionary

    >>> accountids = {"Alice": 10, "Bob": 20}
    >>> itemmap(reversed, accountids)  # doctest: +SKIP
    {10: "Alice", 20: "Bob"}

    See Also:
        keymap
        valmap
    """
    rv = factory()
    rv.update(map(func, iteritems(d)))
    return rv
Ejemplo n.º 15
0
def valfilter(predicate, d):
    """ Filter items in dictionary by value

    >>> iseven = lambda x: x % 2 == 0
    >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
    >>> valfilter(iseven, d)
    {1: 2, 3: 4}

    See Also:
        keyfilter
        valmap
    """
    rv = {}
    for k, v in iteritems(d):
        if predicate(v):
            rv[k] = v
    return rv
Ejemplo n.º 16
0
def valfilter(predicate, d):
    """ Filter items in dictionary by value

    >>> iseven = lambda x: x % 2 == 0
    >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
    >>> valfilter(iseven, d)
    {1: 2, 3: 4}

    See Also:
        keyfilter
        valmap
    """
    rv = {}
    for k, v in iteritems(d):
        if predicate(v):
            rv[k] = v
    return rv
Ejemplo n.º 17
0
def keyfilter(predicate, d):
    """ Filter items in dictionary by key

    >>> iseven = lambda x: x % 2 == 0
    >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
    >>> keyfilter(iseven, d)
    {2: 3, 4: 5}

    See Also:
        valfilter
        keymap
    """
    rv = {}
    for k, v in iteritems(d):
        if predicate(k):
            rv[k] = v
    return rv
Ejemplo n.º 18
0
def keyfilter(predicate, d):
    """ Filter items in dictionary by key

    >>> iseven = lambda x: x % 2 == 0
    >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
    >>> keyfilter(iseven, d)
    {2: 3, 4: 5}

    See Also:
        valfilter
        keymap
    """
    rv = {}
    for k, v in iteritems(d):
        if predicate(k):
            rv[k] = v
    return rv
Ejemplo n.º 19
0
def groupby(func, seq):
    """ Group a collection by a key function

    >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank']
    >>> groupby(len, names)
    {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']}

    >>> iseven = lambda x: x % 2 == 0
    >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8])
    {False: [1, 3, 5, 7], True: [2, 4, 6, 8]}

    See Also:
        countby
    """
    d = collections.defaultdict(lambda: [].append)
    for item in seq:
        d[func(item)](item)
    rv = {}
    for k, v in iteritems(d):
        rv[k] = v.__self__
    return rv
Ejemplo n.º 20
0
def unpack_kwargs(kwargs):
    """ Extracts dask values from kwargs

    Currently only dask.bag.Item and python literal values are supported.

    Returns a merged dask graph and a list of [key, val] pairs suitable for
    eventually constructing a dict.
    """
    dsk = {}
    kw_pairs = []
    for key, val in iteritems(kwargs):
        if isinstance(val, Item):
            dsk.update(val.dask)
            val = val.key
        # TODO elif isinstance(val, Value):
        elif isinstance(val, Base):
            raise NotImplementedError(
                '%s not supported as kwarg value to Bag.map_partitions'
                % type(val).__name__)
        kw_pairs.append([key, val])
    return dsk, kw_pairs
Ejemplo n.º 21
0
def unpack_kwargs(kwargs):
    """ Extracts dask values from kwargs

    Currently only dask.bag.Item and python literal values are supported.

    Returns a merged dask graph and a list of [key, val] pairs suitable for
    eventually constructing a dict.
    """
    dsk = {}
    kw_pairs = []
    for key, val in iteritems(kwargs):
        if isinstance(val, Item):
            dsk.update(val.dask)
            val = val.key
        # TODO elif isinstance(val, Value):
        elif isinstance(val, Base):
            raise NotImplementedError(
                '%s not supported as kwarg value to Bag.map_partitions' %
                type(val).__name__)
        kw_pairs.append([key, val])
    return dsk, kw_pairs
Ejemplo n.º 22
0
def groupby(func, seq):
    """ Group a collection by a key function

    >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank']
    >>> groupby(len, names)
    {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']}

    >>> iseven = lambda x: x % 2 == 0
    >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8])
    {False: [1, 3, 5, 7], True: [2, 4, 6, 8]}

    See Also:
        countby
    """
    d = collections.defaultdict(lambda: [].append)
    for item in seq:
        d[func(item)](item)
    rv = {}
    for k, v in iteritems(d):
        rv[k] = v.__self__
    return rv
Ejemplo n.º 23
0
def itemfilter(predicate, d, factory=dict):
    """ Filter items in dictionary by item

    >>> def isvalid(item):
    ...     k, v = item
    ...     return k % 2 == 0 and v < 4

    >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
    >>> itemfilter(isvalid, d)
    {2: 3}

    See Also:
        keyfilter
        valfilter
        itemmap
    """
    rv = factory()
    for item in iteritems(d):
        if predicate(item):
            k, v = item
            rv[k] = v
    return rv
Ejemplo n.º 24
0
def itemfilter(predicate, d, factory=dict):
    """ Filter items in dictionary by item

    >>> def isvalid(item):
    ...     k, v = item
    ...     return k % 2 == 0 and v < 4

    >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
    >>> itemfilter(isvalid, d)
    {2: 3}

    See Also:
        keyfilter
        valfilter
        itemmap
    """
    rv = factory()
    for item in iteritems(d):
        if predicate(item):
            k, v = item
            rv[k] = v
    return rv
Ejemplo n.º 25
0
def join(leftkey,
         leftseq,
         rightkey,
         rightseq,
         left_default=no_default,
         right_default=no_default):
    """ Join two sequences on common attributes

    This is a semi-streaming operation.  The LEFT sequence is fully evaluated
    and placed into memory.  The RIGHT sequence is evaluated lazily and so can
    be arbitrarily large.
    (Note: If right_default is defined, then unique keys of rightseq
        will also be stored in memory.)

    >>> friends = [('Alice', 'Edith'),
    ...            ('Alice', 'Zhao'),
    ...            ('Edith', 'Alice'),
    ...            ('Zhao', 'Alice'),
    ...            ('Zhao', 'Edith')]

    >>> cities = [('Alice', 'NYC'),
    ...           ('Alice', 'Chicago'),
    ...           ('Dan', 'Syndey'),
    ...           ('Edith', 'Paris'),
    ...           ('Edith', 'Berlin'),
    ...           ('Zhao', 'Shanghai')]

    >>> # Vacation opportunities
    >>> # In what cities do people have friends?
    >>> result = join(second, friends,
    ...               first, cities)
    >>> for ((a, b), (c, d)) in sorted(unique(result)):
    ...     print((a, d))
    ('Alice', 'Berlin')
    ('Alice', 'Paris')
    ('Alice', 'Shanghai')
    ('Edith', 'Chicago')
    ('Edith', 'NYC')
    ('Zhao', 'Chicago')
    ('Zhao', 'NYC')
    ('Zhao', 'Berlin')
    ('Zhao', 'Paris')

    Specify outer joins with keyword arguments ``left_default`` and/or
    ``right_default``.  Here is a full outer join in which unmatched elements
    are paired with None.

    >>> identity = lambda x: x
    >>> list(join(identity, [1, 2, 3],
    ...           identity, [2, 3, 4],
    ...           left_default=None, right_default=None))
    [(2, 2), (3, 3), (None, 4), (1, None)]

    Usually the key arguments are callables to be applied to the sequences.  If
    the keys are not obviously callable then it is assumed that indexing was
    intended, e.g. the following is a legal change.
    The join is implemented as a hash join and the keys of leftseq must be
    hashable. Additionally, if right_default is defined, then keys of rightseq
    must also be hashable.

    >>> # result = join(second, friends, first, cities)
    >>> result = join(1, friends, 0, cities)  # doctest: +SKIP
    """
    if not callable(leftkey):
        leftkey = getter(leftkey)
    if not callable(rightkey):
        rightkey = getter(rightkey)

    d = groupby(leftkey, leftseq)

    if left_default == no_default and right_default == no_default:
        # Inner Join
        for item in rightseq:
            key = rightkey(item)
            if key in d:
                for left_match in d[key]:
                    yield (left_match, item)
    elif left_default != no_default and right_default == no_default:
        # Right Join
        for item in rightseq:
            key = rightkey(item)
            if key in d:
                for left_match in d[key]:
                    yield (left_match, item)
            else:
                yield (left_default, item)
    elif right_default != no_default:
        seen_keys = set()
        seen = seen_keys.add

        if left_default == no_default:
            # Left Join
            for item in rightseq:
                key = rightkey(item)
                seen(key)
                if key in d:
                    for left_match in d[key]:
                        yield (left_match, item)
        else:
            # Full Join
            for item in rightseq:
                key = rightkey(item)
                seen(key)
                if key in d:
                    for left_match in d[key]:
                        yield (left_match, item)
                else:
                    yield (left_default, item)

        for key, matches in iteritems(d):
            if key not in seen_keys:
                for match in matches:
                    yield (match, right_default)