def test_group_by_greedy(): """Make sure group_by aggregates subgroups.""" seq = list(range(7)) groups = setops.group_by(seq, 2) # without being greedy, this would be [[0, 1, 2], [0, 1, 2, 3], [0, 1, # 2, 3, 4], [1, 2, 3, 4, 5], [2, 3, 4, 5, 6], [3, 4, 5, 6], [4, 5, 6]] # assert sorted(groups) == [[0, 1, 2, 3, 4], [1, 2, 3, 4, 5], [2, 3, 4, 5, 6]]
def test_group_by_unique(): """Make sure group_by removes duplicate groups.""" seq = [0, 1, 50, 51, 99, 100] groups = setops.group_by(seq, 10) assert len(groups) > 0 while groups: group = groups.pop() assert group not in groups
def test_group_by_lossless(seq, tolerance): """Make sure all items in seq are in the output, and vice-versa.""" groups = setops.group_by(seq, tolerance) # all returned items are contained in seq for group in groups: for i in group: assert i in seq joined = sum(groups, []) # all of seq's items are contained in groups for i in seq: assert i in joined
def test_group_by_key(): """group_by(seq, t, key) = e""" class A(object): def __init__(self, x): self.x = x seq = [A(i) for i in range(10)] expected = [seq[0:7], seq[1:8], seq[2:9], seq[3:10]] groups = setops.group_by(seq, 3, key=lambda a: a.x) # sort the received groups by their first element groups.sort(key=lambda i: i[0].x) # make sure that every element in every group is the same as the # corresponding element in the expected group for group, expected_group in zip(groups, expected): for i, a in enumerate(group): assert a is expected_group[i]
def get_grouped_quadrants(img_descriptors, tolerance, nquads_x, nquads_y, pool): """Calculate quadrants for the images, and group them by similarity. Receives a list of ImageDescr, a tolerance value between 0 and 255, the number of subdivisions along the x axis, the number of subdivisions along the y axis, and a multiprocessing.Pool object to parallelize the work. The tolerance value specifies the difference in average value for two quadrants to be considered similar. Returns a list of grouped quadrants, of length nquads_x * nquads_y. Each grouped quadrant is a (possibly overlapping) list of sets of similar QuadrantAverages within that quadrant: [{a, b, c}, {f, g, c}, ...] """ # specific calc_quadrants for our nquads_x and nquads_y; needs Python # >= 2.7, since in earlier versions partial functions weren't picklable # (and pool.map needs a global or picklable function) calc_quadrants_xy = functools.partial(imagedescr.calc_quadrants, n_x=nquads_x, n_y=nquads_y) all_image_quads = pool.map(calc_quadrants_xy, img_descriptors) # XXX: These pool operations imply memory copies. The data is pickled # and sent to the worker processes. The return values will contain # copies of the original objects, which wastes memory. We may want to # add a unification pass to eliminate duplicate objects: iterate the # quadrants, storing each object in a set, and replacing it with the # instance from the set. quads_per_image = nquads_x * nquads_y grouped_quads = [ setops.group_by(all_image_quads, tolerance, key=lambda q: q.quadrants[n]) for n in range(quads_per_image) ] return grouped_quads
def test_group_by_bounds(seq, tolerance): """group_by(seq, t) = x, for g in x, max(g) - min(g) <= t*2""" groups = setops.group_by(seq, tolerance) for group in groups: assert max(group) - min(group) <= tolerance * 2