Exemple #1
0
def test_EqualityHashKey_default_key():
    EqualityHashDefault = curry(EqualityHashKey, None)
    L1 = [1]
    L2 = [2]
    data1 = [L1, L1, L2, [], [], [1], [2], {}, ()]
    set1 = set(map(EqualityHashDefault, data1))
    set2 = set(map(EqualityHashDefault, [[], [1], [2], {}, ()]))
    assert set1 == set2
    assert len(set1) == 5

    # Test that ``EqualityHashDefault(item)`` is distinct from ``item``
    T0 = ()
    T1 = (1,)
    data2 = list(map(EqualityHashDefault, [T0, T0, T1, T1, (), (1,)]))
    data2.extend([T0, T1, (), (1,)])
    set3 = set(data2)
    assert set3 == set([(), (1,), EqualityHashDefault(()),
                        EqualityHashDefault((1,))])
    assert len(set3) == 4
    assert EqualityHashDefault(()) in set3
    assert EqualityHashDefault((1,)) in set3

    # Miscellaneous
    E1 = EqualityHashDefault(L1)
    E2 = EqualityHashDefault(L2)
    assert str(E1) == '=[1]='
    assert repr(E1) == '=[1]='
    assert E1 != E2
    assert not (E1 == E2)
    assert E1 == EqualityHashDefault(L1)
    assert not (E1 != EqualityHashDefault(L1))
    assert E1 != L1
    assert not (E1 == L1)
Exemple #2
0
def pluck(ind, seqs, default=no_default):
    """ plucks an element or several elements from each item in a sequence.

    ``pluck`` maps ``itertoolz.get`` over a sequence and returns one or more
    elements of each item in the sequence.

    This is equivalent to running `map(curried.get(ind), seqs)`

    ``ind`` can be either a single string/index or a sequence of
    strings/indices.
    ``seqs`` should be sequence containing sequences or dicts.

    e.g.

    >>> data = [{'id': 1, 'name': 'Cheese'}, {'id': 2, 'name': 'Pies'}]
    >>> list(pluck('name', data))
    ['Cheese', 'Pies']
    >>> list(pluck([0, 1], [[1, 2, 3], [4, 5, 7]]))
    [(1, 2), (4, 5)]

    See Also:
        get
        map
    """
    if default is no_default:
        if isinstance(ind, list):
            return map(operator.itemgetter(*ind), seqs)
        return map(operator.itemgetter(ind), seqs)
    elif isinstance(ind, list):
        return (tuple(_get(item, seq, default) for item in ind)
                for seq in seqs)
    return (_get(ind, seq, default) for seq in seqs)
Exemple #3
0
def pluck(ind, seqs, default=no_default):
    """ plucks an element or several elements from each item in a sequence.

    ``pluck`` maps ``itertoolz.get`` over a sequence and returns one or more
    elements of each item in the sequence.

    This is equivalent to running `map(curried.get(ind), seqs)`

    ``ind`` can be either a single string/index or a sequence of
    strings/indices.
    ``seqs`` should be sequence containing sequences or dicts.

    e.g.
    >>> data = [{'id': 1, 'name': 'Cheese'}, {'id': 2, 'name': 'Pies'}]
    >>> list(pluck('name', data))
    ['Cheese', 'Pies']
    >>> list(pluck([0, 1], [[1, 2, 3], [4, 5, 7]]))
    [(1, 2), (4, 5)]

    See Also:
        get
        map
    """
    if default is no_default:
        if isinstance(ind, list):
            return map(operator.itemgetter(*ind), seqs)
        return map(operator.itemgetter(ind), seqs)
    elif isinstance(ind, list):
        return (tuple(_get(item, seq, default) for item in ind)
                for seq in seqs)
    return (_get(ind, seq, default) for seq in seqs)
Exemple #4
0
def test_EqualityHashKey_default_key():
    EqualityHashDefault = curry(EqualityHashKey, None)
    L1 = [1]
    L2 = [2]
    data1 = [L1, L1, L2, [], [], [1], [2], {}, ()]
    set1 = set(map(EqualityHashDefault, data1))
    set2 = set(map(EqualityHashDefault, [[], [1], [2], {}, ()]))
    assert set1 == set2
    assert len(set1) == 5

    # Test that ``EqualityHashDefault(item)`` is distinct from ``item``
    T0 = ()
    T1 = (1, )
    data2 = list(map(EqualityHashDefault, [T0, T0, T1, T1, (), (1, )]))
    data2.extend([T0, T1, (), (1, )])
    set3 = set(data2)
    assert set3 == set([(), (1, ),
                        EqualityHashDefault(()),
                        EqualityHashDefault((1, ))])
    assert len(set3) == 4
    assert EqualityHashDefault(()) in set3
    assert EqualityHashDefault((1, )) in set3

    # Miscellaneous
    E1 = EqualityHashDefault(L1)
    E2 = EqualityHashDefault(L2)
    assert str(E1) == '=[1]='
    assert repr(E1) == '=[1]='
    assert E1 != E2
    assert not (E1 == E2)
    assert E1 == EqualityHashDefault(L1)
    assert not (E1 != EqualityHashDefault(L1))
    assert E1 != L1
    assert not (E1 == L1)
Exemple #5
0
def fold(binop, seq, default=no_default, map=map, chunksize=128, combine=None):
    """
    Reduce without guarantee of ordered reduction.

    inputs:

    ``binop``     - associative operator. The associative property allows us to
                    leverage a parallel map to perform reductions in parallel.
    ``seq``       - a sequence to be aggregated
    ``default``   - an identity element like 0 for ``add`` or 1 for mul

    ``map``       - an implementation of ``map``. This may be parallel and
                    determines how work is distributed.
    ``chunksize`` - Number of elements of ``seq`` that should be handled
                    within a single function call
    ``combine``   - Binary operator to combine two intermediate results.
                    If ``binop`` is of type (total, item) -> total
                    then ``combine`` is of type (total, total) -> total
                    Defaults to ``binop`` for common case of operators like add

    Fold chunks up the collection into blocks of size ``chunksize`` and then
    feeds each of these to calls to ``reduce``. This work is distributed
    with a call to ``map``, gathered back and then refolded to finish the
    computation. In this way ``fold`` specifies only how to chunk up data but
    leaves the distribution of this work to an externally provided ``map``
    function. This function can be sequential or rely on multithreading,
    multiprocessing, or even distributed solutions.

    If ``map`` intends to serialize functions it should be prepared to accept
    and serialize lambdas. Note that the standard ``pickle`` module fails
    here.

    Example
    -------

    >>> # Provide a parallel map to accomplish a parallel sum
    >>> from operator import add
    >>> fold(add, [1, 2, 3, 4], chunksize=2, map=map)
    10
    """
    if combine is None:
        combine = binop

    chunks = partition_all(chunksize, seq)

    # Evaluate sequence in chunks via map
    if default is no_default:
        results = map(lambda chunk: reduce(binop, chunk), chunks)
    else:
        results = map(lambda chunk: reduce(binop, chunk, default), chunks)

    results = list(results)  # TODO: Support complete laziness

    if len(results) == 1:  # Return completed result
        return results[0]
    else:  # Recurse to reaggregate intermediate results
        return fold(combine, results, map=map, chunksize=chunksize)
Exemple #6
0
def fold(binop, seq, default=no_default, map=map, chunksize=128, combine=None):
    """
    Reduce without guarantee of ordered reduction.

    inputs:

    ``binop``     - associative operator. The associative property allows us to
                    leverage a parallel map to perform reductions in parallel.
    ``seq``       - a sequence to be aggregated
    ``default``   - an identity element like 0 for ``add`` or 1 for mul

    ``map``       - an implementation of ``map``. This may be parallel and
                    determines how work is distributed.
    ``chunksize`` - Number of elements of ``seq`` that should be handled
                    within a single function call
    ``combine``   - Binary operator to combine two intermediate results.
                    If ``binop`` is of type (total, item) -> total
                    then ``combine`` is of type (total, total) -> total
                    Defaults to ``binop`` for common case of operators like add

    Fold chunks up the collection into blocks of size ``chunksize`` and then
    feeds each of these to calls to ``reduce``. This work is distributed
    with a call to ``map``, gathered back and then refolded to finish the
    computation. In this way ``fold`` specifies only how to chunk up data but
    leaves the distribution of this work to an externally provided ``map``
    function. This function can be sequential or rely on multithreading,
    multiprocessing, or even distributed solutions.

    If ``map`` intends to serialize functions it should be prepared to accept
    and serialize lambdas. Note that the standard ``pickle`` module fails
    here.

    Example
    -------

    >>> # Provide a parallel map to accomplish a parallel sum
    >>> from operator import add
    >>> fold(add, [1, 2, 3, 4], chunksize=2, map=map)
    10
    """
    if combine is None:
        combine = binop

    chunks = partition_all(chunksize, seq)

    # Evaluate sequence in chunks via map
    if default is no_default:
        results = map(lambda chunk: reduce(binop, chunk), chunks)
    else:
        results = map(lambda chunk: reduce(binop, chunk, default), chunks)

    results = list(results)  # TODO: Support complete laziness

    if len(results) == 1:    # Return completed result
        return results[0]
    else:                    # Recurse to reaggregate intermediate results
        return fold(combine, results, map=map, chunksize=chunksize)
Exemple #7
0
def multihash(x):
    try:
        return hash(x)
    except TypeError:
        if isinstance(x, (list, tuple, set, frozenset)):
            return hash(tuple(map(multihash, x)))
        if type(x) is dict:
            return hash(frozenset(map(multihash, x.items())))
        if type(x) is slice:
            return hash((x.start, x.stop, x.step))
        raise TypeError("Hashing not covered for " + str(x))
Exemple #8
0
def multihash(x):
    try:
        return hash(x)
    except TypeError:
        if isinstance(x, (list, tuple, set, frozenset)):
            return hash(tuple(map(multihash, x)))
        if type(x) is dict:
            return hash(frozenset(map(multihash, x.items())))
        if type(x) is slice:
            return hash((x.start, x.stop, x.step))
        raise TypeError('Hashing not covered for ' + str(x))
Exemple #9
0
def test_load_from_dir_of_jsonlines(ctx):
    dfs = []
    dfc = df.copy()
    for i in range(3):
        dfc['id'] += i
        dfs.append(dfc.copy())
    expected = pd.concat(dfs, axis=0, ignore_index=True)
    with jslines() as d:
        result = odo(Directory(JSONLines)(d), ctx)
        assert (set(map(frozenset, odo(result, list))) ==
                set(map(frozenset, odo(expected, list))))
Exemple #10
0
def test_load_from_dir_of_jsonlines(ctx):
    dfs = []
    dfc = df.copy()
    for i in range(3):
        dfc['id'] += i
        dfs.append(dfc.copy())
    expected = pd.concat(dfs, axis=0, ignore_index=True)
    with jslines() as d:
        result = odo(Directory(JSONLines)(d), ctx)
        assert (set(map(frozenset, odo(result, list))) ==
                set(map(frozenset, odo(expected, list))))
Exemple #11
0
    def _to_lists(seq, n=10):
        """iter of iters -> finite list of finite lists
        """
        def initial(s):
            return list(take(n, s))

        return initial(map(initial, seq))
Exemple #12
0
    def _to_lists(seq, n=10):
        """iter of iters -> finite list of finite lists
        """
        def initial(s):
            return list(take(n, s))

        return initial(map(initial, seq))
Exemple #13
0
def merge_sorted(*iters, **kwargs):
    """ Merge and sort a collection of sorted collections

    >>> list(merge_sorted([1, 3, 5], [2, 4, 6]))
    [1, 2, 3, 4, 5, 6]

    >>> ''.join(merge_sorted('abc', 'abc', 'abc'))
    'aaabbbccc'
    """
    key = kwargs.get('key', identity)
    iters = map(iter, iters)
    pq = Queue.PriorityQueue()

    def inject_first_element(it, tiebreaker=None):
        try:
            item = next(it)
            pq.put((key(item), item, tiebreaker, it))
        except StopIteration:
            pass

    # Initial population
    for i, it in enumerate(iters):
        inject_first_element(it, i)

    # Repeatedly yield and then repopulate from the same iterator
    while not pq.empty():
        _, item, tb, it = pq.get()
        yield item
        inject_first_element(it, tb)
Exemple #14
0
def attr_pluck(attr, objs, default=no_default):
    """ plucks an attribute or several attributes from each object in a sequence.

    ``attr_pluck`` maps ``itertoolz.attr_get`` over a sequence and returns one or more
    attributes of each object in the sequence.

    This is equivalent to running `map(curried.attr_get(attr), objs)`

    ``attr`` can be either a single string/index or a list of strings.
    ``objs`` should be sequence containing object.

    e.g.

    >>> class A(object):
    >>>     pass
    >>> a1 = A(); a1.id = 1, a1.name = "Cheese"
    >>> a2 = A(); a2.id = 2, a2.name = "Pies"
    >>> list(attr_pluck('name', [a1, a2]))
    ['Cheese', 'Pies']


    See Also:
        attr_get
        map
    """
    if default == no_default:
        get = attr_getter(attr)
        return map(get, objs)
    elif isinstance(attr, list):
        return (tuple(getattr(obj, item, default) for item in attr)
                for obj in objs)
    return (getattr(obj, attr, default) for obj in objs)
Exemple #15
0
def merge_sorted(*iters, **kwargs):
    """ Merge and sort a collection of sorted collections

    >>> list(merge_sorted([1, 3, 5], [2, 4, 6]))
    [1, 2, 3, 4, 5, 6]

    >>> ''.join(merge_sorted('abc', 'abc', 'abc'))
    'aaabbbccc'
    """
    key = kwargs.get('key', identity)
    iters = map(iter, iters)
    pq = Queue.PriorityQueue()

    def inject_first_element(it, tiebreaker=None):
        try:
            item = next(it)
            pq.put((key(item), item, tiebreaker, it))
        except StopIteration:
            pass

    # Initial population
    for i, it in enumerate(iters):
        inject_first_element(it, i)

    # Repeatedly yield and then repopulate from the same iterator
    while not pq.empty():
        _, item, tb, it = pq.get()
        yield item
        inject_first_element(it, tb)
Exemple #16
0
def mapcat(func, seqs):
    """ Apply func to each sequence in seqs, concatenating results.

    >>> list(mapcat(lambda s: [c.upper() for c in s],
    ...             [["a", "b"], ["c", "d", "e"]]))
    ['A', 'B', 'C', 'D', 'E']
    """
    return concat(map(func, seqs))
Exemple #17
0
def mapcat(func, seqs):
    """ Apply func to each sequence in seqs, concatenating results.

    >>> list(mapcat(lambda s: [c.upper() for c in s],
    ...             [["a", "b"], ["c", "d", "e"]]))
    ['A', 'B', 'C', 'D', 'E']
    """
    return concat(map(func, seqs))
Exemple #18
0
def merge_sorted(*seqs, **kwargs):
    """ Merge and sort a collection of sorted collections

    This works lazily and only keeps one value from each iterable in memory.

    >>> list(merge_sorted([1, 3, 5], [2, 4, 6]))
    [1, 2, 3, 4, 5, 6]

    >>> ''.join(merge_sorted('abc', 'abc', 'abc'))
    'aaabbbccc'

    The "key" function used to sort the input may be passed as a keyword.

    >>> list(merge_sorted([2, 3], [1, 3], key=lambda x: x // 3))
    [2, 1, 3, 3]
    """
    key = kwargs.get('key', None)
    if key is None:
        # heapq.merge does what we do below except by val instead of key(val)
        for item in heapq.merge(*seqs):
            yield item
    else:
        # The commented code below shows an alternative (slower) implementation
        # to apply a key function for sorting.
        #
        # mapper = lambda i, item: (key(item), i, item)
        # keyiters = [map(partial(mapper, i), itr) for i, itr in
        #             enumerate(seqs)]
        # return (item for (item_key, i, item) in heapq.merge(*keyiters))

        # binary heap as a priority queue
        pq = []

        # Initial population
        for itnum, it in enumerate(map(iter, seqs)):
            try:
                item = next(it)
                pq.append([key(item), itnum, item, it])
            except StopIteration:
                pass
        heapq.heapify(pq)

        # Repeatedly yield and then repopulate from the same iterator
        while True:
            try:
                while True:
                    # raises IndexError when pq is empty
                    _, itnum, item, it = s = pq[0]
                    yield item
                    item = next(it)  # raises StopIteration when exhausted
                    s[0] = key(item)
                    s[2] = item
                    heapq.heapreplace(pq, s)  # restore heap condition
            except StopIteration:
                heapq.heappop(pq)  # remove empty iterator
            except IndexError:
                return
Exemple #19
0
def pprint(g):
    """ Pretty print a tree of goals """
    if callable(g) and hasattr(g, "__name__"):
        return g.__name__
    if isinstance(g, type):  # pragma: no cover
        return g.__name__
    if isinstance(g, tuple):
        return "(" + ", ".join(map(pprint, g)) + ")"
    return str(g)
Exemple #20
0
def pprint(g):
    """ Pretty print a tree of goals """
    if callable(g) and hasattr(g, '__name__'):
        return g.__name__
    if isinstance(g, type):
        return g.__name__
    if isinstance(g, tuple):
        return "(" + ', '.join(map(pprint, g)) + ")"
    return str(g)
Exemple #21
0
def merge_sorted(*seqs, **kwargs):
    """ Merge and sort a collection of sorted collections

    This works lazily and only keeps one value from each iterable in memory.

    >>> list(merge_sorted([1, 3, 5], [2, 4, 6]))
    [1, 2, 3, 4, 5, 6]

    >>> ''.join(merge_sorted('abc', 'abc', 'abc'))
    'aaabbbccc'

    The "key" function used to sort the input may be passed as a keyword.

    >>> list(merge_sorted([2, 3], [1, 3], key=lambda x: x // 3))
    [2, 1, 3, 3]
    """
    key = kwargs.get('key', None)
    if key is None:
        # heapq.merge does what we do below except by val instead of key(val)
        for item in heapq.merge(*seqs):
            yield item
    else:
        # The commented code below shows an alternative (slower) implementation
        # to apply a key function for sorting.
        #
        # mapper = lambda i, item: (key(item), i, item)
        # keyiters = [map(partial(mapper, i), itr) for i, itr in
        #             enumerate(seqs)]
        # return (item for (item_key, i, item) in heapq.merge(*keyiters))

        # binary heap as a priority queue
        pq = []

        # Initial population
        for itnum, it in enumerate(map(iter, seqs)):
            try:
                item = next(it)
                pq.append([key(item), itnum, item, it])
            except StopIteration:
                pass
        heapq.heapify(pq)

        # Repeatedly yield and then repopulate from the same iterator
        while True:
            try:
                while True:
                    # raises IndexError when pq is empty
                    _, itnum, item, it = s = pq[0]
                    yield item
                    item = next(it)  # raises StopIteration when exhausted
                    s[0] = key(item)
                    s[2] = item
                    heapq.heapreplace(pq, s)  # restore heap condition
            except StopIteration:
                heapq.heappop(pq)  # remove empty iterator
            except IndexError:
                return
Exemple #22
0
def pprint(g):
    """ Pretty print a tree of goals """
    if callable(g) and hasattr(g, '__name__'):
        return g.__name__
    if isinstance(g, type):  # pragma: no cover
        return g.__name__
    if isinstance(g, tuple):
        return "(" + ', '.join(map(pprint, g)) + ")"
    return str(g)
Exemple #23
0
def freeze(d):
    """ Freeze container to hashable form

    >>> freeze(1)
    1

    >>> freeze([1, 2])
    (1, 2)

    >>> freeze({1: 2}) # doctest: +SKIP
    frozenset([(1, 2)])
    """
    if isinstance(d, dict):
        return frozenset(map(freeze, d.items()))
    if isinstance(d, set):
        return frozenset(map(freeze, d))
    if isinstance(d, (tuple, list)):
        return tuple(map(freeze, d))
    return d
Exemple #24
0
def freeze(d):
    """ Freeze container to hashable form

    >>> freeze(1)
    1

    >>> freeze([1, 2])
    (1, 2)

    >>> freeze({1: 2}) # doctest: +SKIP
    frozenset([(1, 2)])
    """
    if isinstance(d, dict):
        return frozenset(map(freeze, d.items()))
    if isinstance(d, set):
        return frozenset(map(freeze, d))
    if isinstance(d, (tuple, list)):
        return tuple(map(freeze, d))
    return d
Exemple #25
0
def keymap(func, d):
    """ Apply function to keys of dictionary

    >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]}
    >>> keymap(str.lower, bills)  # doctest: +SKIP
    {'alice': [20, 15, 30], 'bob': [10, 35]}

    See Also:
        valmap
    """
    return dict(zip(map(func, iterkeys(d)), itervalues(d)))
Exemple #26
0
def valmap(func, d):
    """ Apply function to values of dictionary

    >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]}
    >>> valmap(sum, bills)  # doctest: +SKIP
    {'Alice': 65, 'Bob': 45}

    See Also:
        keymap
    """
    return dict(zip(iterkeys(d), map(func, itervalues(d))))
Exemple #27
0
def interleave(seqs, pass_exceptions=()):
    iters = map(iter, seqs)
    while iters:
        newiters = []
        for itr in iters:
            try:
                yield next(itr)
                newiters.append(itr)
            except (StopIteration, ) + tuple(pass_exceptions):
                pass
        iters = newiters
Exemple #28
0
def keymap(func, d):
    """ Apply function to keys of dictionary

    >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]}
    >>> keymap(str.lower, bills)  # doctest: +SKIP
    {'alice': [20, 15, 30], 'bob': [10, 35]}

    See Also:
        valmap
    """
    return dict(zip(map(func, iterkeys(d)), itervalues(d)))
Exemple #29
0
def interleave(seqs, pass_exceptions=()):
    iters = map(iter, seqs)
    while iters:
        newiters = []
        for itr in iters:
            try:
                yield next(itr)
                newiters.append(itr)
            except (StopIteration,) + tuple(pass_exceptions):
                pass
        iters = newiters
Exemple #30
0
def valmap(func, d):
    """ Apply function to values of dictionary

    >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]}
    >>> valmap(sum, bills)  # doctest: +SKIP
    {'Alice': 65, 'Bob': 45}

    See Also:
        keymap
    """
    return dict(zip(iterkeys(d), map(func, itervalues(d))))
Exemple #31
0
def test_append_spark_df_to_json_lines(ctx):
    out = os.linesep.join(map(json.dumps, df.to_dict('records')))
    sdf = ctx.table('t')
    expected = pd.concat([df, df]).sort('amount').reset_index(drop=True).sort_index(axis=1)
    with tmpfile('.json') as fn:
        with open(fn, mode='wb') as f:
            f.write(out + os.linesep)

        uri = 'jsonlines://%s' % fn
        odo(sdf, uri)
        result = odo(uri, pd.DataFrame).sort('amount').reset_index(drop=True).sort_index(axis=1)
        tm.assert_frame_equal(result, expected)
Exemple #32
0
def test_append_spark_df_to_json_lines(ctx):
    out = os.linesep.join(map(json.dumps, df.to_dict('records')))
    sdf = ctx.table('t')
    expected = pd.concat([df, df]).sort('amount').reset_index(drop=True).sort_index(axis=1)
    with tmpfile('.json') as fn:
        with open(fn, mode='w') as f:
            f.write(out + os.linesep)

        uri = 'jsonlines://%s' % fn
        odo(sdf, uri)
        result = odo(uri, pd.DataFrame).sort('amount').reset_index(drop=True).sort_index(axis=1)
        tm.assert_frame_equal(result, expected)
def itemmap(func, d):
    """ Apply function to items of dictionary

    >>> accountids = {"Alice": 10, "Bob": 20}
    >>> itemmap(reversed, accountids)  # doctest: +SKIP
    {10: "Alice", 20: "Bob"}

    See Also:
        keymap
        valmap
    """
    return dict(map(func, iteritems(d)))
Exemple #34
0
def itemmap(func, d, factory=dict):
    """ Apply function to items of dictionary

    >>> accountids = {"Alice": 10, "Bob": 20}
    >>> itemmap(reversed, accountids)  # doctest: +SKIP
    {10: "Alice", 20: "Bob"}

    See Also:
        keymap
        valmap
    """
    rv = factory()
    rv.update(map(func, iteritems(d)))
    return rv
Exemple #35
0
def valmap(func, d, factory=dict):
    """ Apply function to values of dictionary

    >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]}
    >>> valmap(sum, bills)  # doctest: +SKIP
    {'Alice': 65, 'Bob': 45}

    See Also:
        keymap
        itemmap
    """
    rv = factory()
    rv.update(zip(iterkeys(d), map(func, itervalues(d))))
    return rv
Exemple #36
0
def keymap(func, d, factory=dict):
    """ Apply function to keys of dictionary

    >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]}
    >>> keymap(str.lower, bills)  # doctest: +SKIP
    {'alice': [20, 15, 30], 'bob': [10, 35]}

    See Also:
        valmap
        itemmap
    """
    rv = factory()
    rv.update(zip(map(func, iterkeys(d)), itervalues(d)))
    return rv
Exemple #37
0
def valmap(func, d, factory=dict):
    """ Apply function to values of dictionary

    >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]}
    >>> valmap(sum, bills)  # doctest: +SKIP
    {'Alice': 65, 'Bob': 45}

    See Also:
        keymap
        itemmap
    """
    rv = factory()
    rv.update(zip(iterkeys(d), map(func, itervalues(d))))
    return rv
Exemple #38
0
def keymap(func, d, factory=dict):
    """ Apply function to keys of dictionary

    >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]}
    >>> keymap(str.lower, bills)  # doctest: +SKIP
    {'alice': [20, 15, 30], 'bob': [10, 35]}

    See Also:
        valmap
        itemmap
    """
    rv = factory()
    rv.update(zip(map(func, iterkeys(d)), itervalues(d)))
    return rv
Exemple #39
0
def itemmap(func, d, factory=dict):
    """ Apply function to items of dictionary

    >>> accountids = {"Alice": 10, "Bob": 20}
    >>> itemmap(reversed, accountids)  # doctest: +SKIP
    {10: "Alice", 20: "Bob"}

    See Also:
        keymap
        valmap
    """
    rv = factory()
    rv.update(map(func, iteritems(d)))
    return rv
Exemple #40
0
def deep_transitive_get(key, d):
    """ Transitive get that propagates within tuples

    >>> from logpy.util import transitive_get, deep_transitive_get
    >>> d = {1: (2, 3), 2: 12, 3: 13}
    >>> transitive_get(1, d)
    (2, 3)
    >>> deep_transitive_get(1, d)
    (12, 13)
    """

    key = transitive_get(key, d)
    if isinstance(key, tuple):
        return tuple(map(lambda k: deep_transitive_get(k, d), key))
    else:
        return key
Exemple #41
0
def deep_transitive_get(key, d):
    """ Transitive get that propagates within tuples

    >>> from logpy.util import transitive_get, deep_transitive_get
    >>> d = {1: (2, 3), 2: 12, 3: 13}
    >>> transitive_get(1, d)
    (2, 3)
    >>> deep_transitive_get(1, d)
    (12, 13)
    """

    key = transitive_get(key, d)
    if isinstance(key, tuple):
        return tuple(map(lambda k: deep_transitive_get(k, d), key))
    else:
        return key
Exemple #42
0
def _merge_sorted_key(seqs, key):
    # The commented code below shows an alternative (slower) implementation
    # to apply a key function for sorting.
    #
    # mapper = lambda i, item: (key(item), i, item)
    # keyiters = [map(partial(mapper, i), itr) for i, itr in
    #             enumerate(seqs)]
    # return (item for (item_key, i, item) in heapq.merge(*keyiters))

    # binary heap as a priority queue
    pq = []

    # Initial population
    for itnum, it in enumerate(map(iter, seqs)):
        try:
            item = next(it)
            pq.append([key(item), itnum, item, it])
        except StopIteration:
            pass
    heapq.heapify(pq)

    # Repeatedly yield and then repopulate from the same iterator
    heapreplace = heapq.heapreplace
    heappop = heapq.heappop
    while len(pq) > 1:
        try:
            while True:
                # raises IndexError when pq is empty
                _, itnum, item, it = s = pq[0]
                yield item
                item = next(it)  # raises StopIteration when exhausted
                s[0] = key(item)
                s[2] = item
                heapreplace(pq, s)  # restore heap condition
        except StopIteration:
            heappop(pq)  # remove empty iterator
    if pq:
        # Much faster when only a single iterable remains
        _, itnum, item, it = pq[0]
        yield item
        for item in it:
            yield item
Exemple #43
0
def _merge_sorted_key(seqs, key):
    # The commented code below shows an alternative (slower) implementation
    # to apply a key function for sorting.
    #
    # mapper = lambda i, item: (key(item), i, item)
    # keyiters = [map(partial(mapper, i), itr) for i, itr in
    #             enumerate(seqs)]
    # return (item for (item_key, i, item) in heapq.merge(*keyiters))

    # binary heap as a priority queue
    pq = []

    # Initial population
    for itnum, it in enumerate(map(iter, seqs)):
        try:
            item = next(it)
            pq.append([key(item), itnum, item, it])
        except StopIteration:
            pass
    heapq.heapify(pq)

    # Repeatedly yield and then repopulate from the same iterator
    heapreplace = heapq.heapreplace
    heappop = heapq.heappop
    while len(pq) > 1:
        try:
            while True:
                # raises IndexError when pq is empty
                _, itnum, item, it = s = pq[0]
                yield item
                item = next(it)  # raises StopIteration when exhausted
                s[0] = key(item)
                s[2] = item
                heapreplace(pq, s)  # restore heap condition
        except StopIteration:
            heappop(pq)  # remove empty iterator
    if pq:
        # Much faster when only a single iterable remains
        _, itnum, item, it = pq[0]
        yield item
        for item in it:
            yield item
Exemple #44
0
def interleave(seqs):
    """ Interleave a sequence of sequences

    >>> list(interleave([[1, 2], [3, 4]]))
    [1, 3, 2, 4]

    >>> ''.join(interleave(('ABC', 'XY')))
    'AXBYC'

    Both the individual sequences and the sequence of sequences may be infinite

    Returns a lazy iterator
    """
    iters = itertools.cycle(map(iter, seqs))
    while True:
        try:
            for itr in iters:
                yield next(itr)
            return
        except StopIteration:
            predicate = partial(operator.is_not, itr)
            iters = itertools.cycle(itertools.takewhile(predicate, iters))
Exemple #45
0
def interleave(seqs):
    """ Interleave a sequence of sequences

    >>> list(interleave([[1, 2], [3, 4]]))
    [1, 3, 2, 4]

    >>> ''.join(interleave(('ABC', 'XY')))
    'AXBYC'

    Both the individual sequences and the sequence of sequences may be infinite

    Returns a lazy iterator
    """
    iters = itertools.cycle(map(iter, seqs))
    while True:
        try:
            for itr in iters:
                yield next(itr)
            return
        except StopIteration:
            predicate = partial(operator.is_not, itr)
            iters = itertools.cycle(itertools.takewhile(predicate, iters))
Exemple #46
0
def diff(*seqs, **kwargs):
    """ Return those items that differ between sequences

    >>> list(diff([1, 2, 3], [1, 2, 10, 100]))
    [(3, 10)]

    Shorter sequences may be padded with a ``default`` value:

    >>> list(diff([1, 2, 3], [1, 2, 10, 100], default=None))
    [(3, 10), (None, 100)]

    A ``key`` function may also be applied to each item to use during
    comparisons:

    >>> list(diff(['apples', 'bananas'], ['Apples', 'Oranges'], key=str.lower))
    [('bananas', 'Oranges')]
    """
    N = len(seqs)
    if N == 1 and isinstance(seqs[0], list):
        seqs = seqs[0]
        N = len(seqs)
    if N < 2:
        raise TypeError('Too few sequences given (min 2 required)')
    default = kwargs.get('default', no_default)
    if default == no_default:
        iters = zip(*seqs)
    else:
        iters = zip_longest(*seqs, fillvalue=default)
    key = kwargs.get('key', None)
    if key is None:
        for items in iters:
            if items.count(items[0]) != N:
                yield items
    else:
        for items in iters:
            vals = tuple(map(key, items))
            if vals.count(vals[0]) != N:
                yield items
Exemple #47
0
def diff(*seqs, **kwargs):
    """ Return those items that differ between sequences

    >>> list(diff([1, 2, 3], [1, 2, 10, 100]))
    [(3, 10)]

    Shorter sequences may be padded with a ``default`` value:

    >>> list(diff([1, 2, 3], [1, 2, 10, 100], default=None))
    [(3, 10), (None, 100)]

    A ``key`` function may also be applied to each item to use during
    comparisons:

    >>> list(diff(['apples', 'bananas'], ['Apples', 'Oranges'], key=str.lower))
    [('bananas', 'Oranges')]
    """
    N = len(seqs)
    if N == 1 and isinstance(seqs[0], list):
        seqs = seqs[0]
        N = len(seqs)
    if N < 2:
        raise TypeError('Too few sequences given (min 2 required)')
    default = kwargs.get('default', no_default)
    if default == no_default:
        iters = zip(*seqs)
    else:
        iters = zip_longest(*seqs, fillvalue=default)
    key = kwargs.get('key', None)
    if key is None:
        for items in iters:
            if items.count(items[0]) != N:
                yield items
    else:
        for items in iters:
            vals = tuple(map(key, items))
            if vals.count(vals[0]) != N:
                yield items
Exemple #48
0
def interleave(seqs, pass_exceptions=()):
    """ Interleave a sequence of sequences

    >>> list(interleave([[1, 2], [3, 4]]))
    [1, 3, 2, 4]

    >>> ''.join(interleave(('ABC', 'XY')))
    'AXBYC'

    Both the individual sequences and the sequence of sequences may be infinite

    Returns a lazy iterator
    """
    iters = map(iter, seqs)
    while iters:
        newiters = []
        for itr in iters:
            try:
                yield next(itr)
                newiters.append(itr)
            except (StopIteration,) + tuple(pass_exceptions):
                pass
        iters = newiters
Exemple #49
0
def interleave(seqs, pass_exceptions=()):
    """ Interleave a sequence of sequences

    >>> list(interleave([[1, 2], [3, 4]]))
    [1, 3, 2, 4]

    >>> ''.join(interleave(('ABC', 'XY')))
    'AXBYC'

    Both the individual sequences and the sequence of sequences may be infinite

    Returns a lazy iterator
    """
    iters = map(iter, seqs)
    while iters:
        newiters = []
        for itr in iters:
            try:
                yield next(itr)
                newiters.append(itr)
            except (StopIteration, ) + tuple(pass_exceptions):
                pass
        iters = newiters
Exemple #50
0
def test_pyspark_to_sparksql(ctx, people):
    sdf = odo(data, ctx, dshape=discover(df))
    assert isinstance(sdf, (SparkDataFrame, SchemaRDD))
    assert (list(map(set, odo(people, list))) == list(map(set, odo(sdf,
                                                                   list))))
Exemple #51
0
def test_load_from_jsonlines(ctx):
    with tmpfile('.json') as fn:
        js = odo(df, 'jsonlines://%s' % fn)
        result = odo(js, ctx, name='r')
        assert (list(map(set, odo(result,
                                  list))) == list(map(set, odo(df, list))))
def test_map_filter_are_lazy():
    def bad(x):
        raise Exception()
    map(bad, [1, 2, 3])
    filter(bad, [1, 2, 3])
Exemple #53
0
def test_map_filter_are_lazy():
    def bad(x):
        raise Exception()
    map(bad, [1, 2, 3])
    filter(bad, [1, 2, 3])
Exemple #54
0
def _reify(t, s):
    return map(partial(reify, s=s), t)
Exemple #55
0
def test_load_from_jsonlines(ctx):
    with tmpfile('.json') as fn:
        js = odo(df, 'jsonlines://%s' % fn)
        result = odo(js, ctx, name='r')
        assert (list(map(set, odo(result, list))) ==
                list(map(set, odo(df, list))))
Exemple #56
0
def test_pyspark_to_sparksql(ctx, people):
    sdf = odo(data, ctx, dshape=discover(df))
    assert isinstance(sdf, (SparkDataFrame, SchemaRDD))
    assert (list(map(set, odo(people, list))) ==
            list(map(set, odo(sdf, list))))
Exemple #57
0
def test_reduction_to_scalar(ctx):
    result = odo(ctx.sql('select sum(amount) from t'), float)
    assert isinstance(result, float)
    assert result == sum(map(toolz.second, data))
Exemple #58
0
def test_reduction_to_scalar(ctx):
    result = odo(ctx.sql('select sum(amount) from t'), float)
    assert isinstance(result, float)
    assert result == sum(map(toolz.second, data))