Beispiel #1
0
def test_group_by_greedy():
    """Make sure group_by aggregates subgroups."""
    seq = list(range(7))

    groups = setops.group_by(seq, 2)

    # without being greedy, this would be [[0, 1, 2], [0, 1, 2, 3], [0, 1,
    # 2, 3, 4], [1, 2, 3, 4, 5], [2, 3, 4, 5, 6], [3, 4, 5, 6], [4, 5, 6]]
    #
    assert sorted(groups) == [[0, 1, 2, 3, 4], [1, 2, 3, 4, 5],
                              [2, 3, 4, 5, 6]]
Beispiel #2
0
def test_group_by_unique():
    """Make sure group_by removes duplicate groups."""
    seq = [0, 1, 50, 51, 99, 100]

    groups = setops.group_by(seq, 10)

    assert len(groups) > 0

    while groups:
        group = groups.pop()
        assert group not in groups
Beispiel #3
0
def test_group_by_lossless(seq, tolerance):
    """Make sure all items in seq are in the output, and vice-versa."""
    groups = setops.group_by(seq, tolerance)

    # all returned items are contained in seq
    for group in groups:
        for i in group:
            assert i in seq

    joined = sum(groups, [])

    # all of seq's items are contained in groups
    for i in seq:
        assert i in joined
Beispiel #4
0
def test_group_by_key():
    """group_by(seq, t, key) = e"""
    class A(object):
        def __init__(self, x):
            self.x = x

    seq = [A(i) for i in range(10)]
    expected = [seq[0:7], seq[1:8], seq[2:9], seq[3:10]]

    groups = setops.group_by(seq, 3, key=lambda a: a.x)

    # sort the received groups by their first element
    groups.sort(key=lambda i: i[0].x)

    # make sure that every element in every group is the same as the
    # corresponding element in the expected group
    for group, expected_group in zip(groups, expected):
        for i, a in enumerate(group):
            assert a is expected_group[i]
Beispiel #5
0
def get_grouped_quadrants(img_descriptors, tolerance, nquads_x, nquads_y,
                          pool):
    """Calculate quadrants for the images, and group them by similarity.

    Receives a list of ImageDescr, a tolerance value between 0 and 255, the
    number of subdivisions along the x axis, the number of subdivisions
    along the y axis, and a multiprocessing.Pool object to parallelize the
    work. The tolerance value specifies the difference in average value for
    two quadrants to be considered similar.

    Returns a list of grouped quadrants, of length nquads_x * nquads_y.
    Each grouped quadrant is a (possibly overlapping) list of sets of
    similar QuadrantAverages within that quadrant:
        [{a, b, c}, {f, g, c}, ...]

    """
    # specific calc_quadrants for our nquads_x and nquads_y; needs Python
    # >= 2.7, since in earlier versions partial functions weren't picklable
    # (and pool.map needs a global or picklable function)
    calc_quadrants_xy = functools.partial(imagedescr.calc_quadrants,
                                          n_x=nquads_x,
                                          n_y=nquads_y)

    all_image_quads = pool.map(calc_quadrants_xy, img_descriptors)

    # XXX: These pool operations imply memory copies. The data is pickled
    # and sent to the worker processes. The return values will contain
    # copies of the original objects, which wastes memory. We may want to
    # add a unification pass to eliminate duplicate objects: iterate the
    # quadrants, storing each object in a set, and replacing it with the
    # instance from the set.

    quads_per_image = nquads_x * nquads_y
    grouped_quads = [
        setops.group_by(all_image_quads,
                        tolerance,
                        key=lambda q: q.quadrants[n])
        for n in range(quads_per_image)
    ]

    return grouped_quads
Beispiel #6
0
def test_group_by_bounds(seq, tolerance):
    """group_by(seq, t) = x, for g in x, max(g) - min(g) <= t*2"""
    groups = setops.group_by(seq, tolerance)

    for group in groups:
        assert max(group) - min(group) <= tolerance * 2