def test_collisions(self): """Ensure that no pair of paths intersects.""" # collided_paths = list() paths = list() for p, w in self.word_cloud: paths.extend(BoxifyWord.cleaned_textpath(p)) for p1, p2 in combinations(paths, 2): if( p1.get_extents().overlaps(p2.get_extents()) and p1.intersects_path(p2, filled=True) ): # collided_paths.extend((p1, p2)) self.fail("Found a collision between two paths with holes")
def test_collisions_noloops(self): """Ensure that no pair of paths without holes intersects.""" words = [ "sun", "luck" ] * 50 weights = map( lambda w: 1.0/(w+1), range(100) ) word_weights = zip(words, weights) word_cloud = WordCloud(word_weights, seed=1) paths = list() for p, w in word_cloud: paths.extend(BoxifyWord.cleaned_textpath(p)) for p1, p2 in combinations(paths, 2): if( p1.get_extents().overlaps(p2.get_extents()) and p1.intersects_path(p2, filled=True) ): self.fail("Found a collision between two paths without holes")
def build_cloud(wordweights, loose=False, seed=None, split_limit=2**-3, pad=1.10, visual_limit=2**-5, highest_weight=None ): """Convert a list of words and weights into a list of paths and weights. You should only use this function if you know what you're doing, or if you really don't want to cache the generated paths. Otherwise just use the WordCloud class. Args: wordweights: An iterator of the form [ (word, weight), (word, weight), ... ] such that the weights are in decreasing order. loose: If `true', words won't be broken up into rectangles after insertion. This results in a looser cloud, generated faster. seed: A random seed to use split_limit: When words are approximated by rectangles, the rectangles will have dimensions less than split_limit. Higher values result in a tighter cloud, at a cost of more CPU time. The largest word has height 1.0. pad: Expand a word's bounding box by a factor of `pad' before inserting it. This can actually result in a tighter cloud if you have many small words by leaving space between large words. visual_limit: Words with height smaller than visual_limit will be discarded. highest_weight: Experimental feature. If you provide an upper bound on the weights that will be seen you don't have to provide words and weights sorted. The resulting word cloud will be noticeably uglier. Generates: Tuples of the form (path, weight) such that: * No two paths intersect * Paths are fairly densely packed around the origin * All weights are normalized to fall in the interval [0, 1] """ if seed is not None: random.seed(seed) font_properties = font_manager.FontProperties( family="sans", weight="bold", stretch="condensed") xheight = TextPath((0,0), "x", prop=font_properties).get_extents().expanded(pad,pad).height # These are magic numbers. Most wordclouds will not exceed these bounds. # If they do, it will have to re-index all of the bounding boxes. index_bounds = (-16, -16, 16, 16) index = BboxQuadtree(index_bounds) if highest_weight is None: # Attempt to pull the first word and weight. If we fail, the wordweights # list is empty and we should just quit. # # All this nonsense is to ensure it accepts an iterator of words # correctly. iterwords = iter(wordweights) try: first_word, first_weight = iterwords.next() iterwords = chain([(first_word, first_weight)], iterwords) except StopIteration: return # We'll scale all of the weights down by this much. weight_scale = 1.0/first_weight else: weight_scale = 1.0/highest_weight iterwords = iter(wordweights) bboxes = list() bounds = transforms.Bbox(((-0.5, -0.5), (-0.5, -0.5))) for tword, tweight in iterwords: weight = tweight*weight_scale if weight < visual_limit: # You're not going to be able to see the word anyway. Quit # rendering words now. continue word_path = TextPath((0,0), tword, prop=font_properties) word_bbox = word_path.get_extents().expanded(pad, pad) # word_scale = weight/float(word_bbox.height) word_scale = weight/float(xheight) # When we build a TextPath at (0,0) it doesn't necessarily have # its corner at (0,0). So we have to translate to the origin, # scale down, then translate to center it. Feel free to simplify # this if you want. word_trans = Affine2D.identity().translate( -word_bbox.xmin, -word_bbox.ymin ).scale(word_scale).translate( -0.5*abs(word_bbox.width)*word_scale, -0.5*abs(word_bbox.height)*word_scale ) word_path = word_path.transformed(word_trans) word_bbox = word_path.get_extents().expanded(pad, pad) if weight > split_limit: # Big words we place carefully, trying to make the dimensions of # the cloud equal and center it around the origin. gaps = ( ("left", bounds.xmin), ("bottom", bounds.ymin), ("right", bounds.xmax), ("top", bounds.ymax) ) direction = min(gaps, key=lambda g: abs(g[1]))[0] else: # Small words we place randomly. direction = random.choice( [ "left", "bottom", "right", "top" ] ) # Randomly place the word along an edge... if direction in ( "top", "bottom" ): center = random_position(bounds.xmin, bounds.xmax) elif direction in ( "right", "left" ): center = random_position(bounds.ymin, bounds.ymax) # And push it toward an axis. if direction == "top": bbox = word_bbox.translated( center, index_bounds[3] ) xpos, ypos = push_bbox_down( bbox, bboxes, index ) elif direction == "right": bbox = word_bbox.translated( index_bounds[2], center ) xpos, ypos = push_bbox_left( bbox, bboxes, index ) elif direction == "bottom": bbox = word_bbox.translated( center, index_bounds[1] ) xpos, ypos = push_bbox_up( bbox, bboxes, index ) elif direction == "left": bbox = word_bbox.translated( index_bounds[0], center ) xpos, ypos = push_bbox_right( bbox, bboxes, index ) # Now alternate pushing the word toward different axes until either # it stops movign or we get sick of it. max_moves = 2 moves = 0 while moves < max_moves and (moves == 0 or prev_xpos != xpos or prev_ypos != ypos): moves += 1 prev_xpos = xpos prev_ypos = ypos if direction in ["top", "bottom", "vertical"]: if xpos > 0: bbox = word_bbox.translated( xpos, ypos ) xpos, ypos = push_bbox_left( bbox, bboxes, index ) elif xpos < 0: bbox = word_bbox.translated( xpos, ypos ) xpos, ypos = push_bbox_right( bbox, bboxes, index ) direction = "horizontal" elif direction in ["left", "right", "horizontal"]: if ypos > 0: bbox = word_bbox.translated( xpos, ypos ) xpos, ypos = push_bbox_down( bbox, bboxes, index ) elif ypos < 0: bbox = word_bbox.translated( xpos, ypos ) xpos, ypos = push_bbox_up( bbox, bboxes, index ) direction = "vertical" wordtrans = Affine2D.identity().translate( xpos, ypos ) transpath = word_path.transformed(wordtrans) bbox = transpath.get_extents() # Swallow the new word into the bounding box for the word cloud. bounds = matplotlib.transforms.Bbox.union( [ bounds, bbox ] ) # We need to check if we've expanded past the bounds of our quad tree. # If so we'll need to expand the bounds and then re-index. new_bounds = index_bounds while not BoxifyWord.bbox_covers( # FIXME: Why am I not just doing this with a couple of logarithms? matplotlib.transforms.Bbox(((new_bounds[0], new_bounds[1]), (new_bounds[2], new_bounds[3]))), bounds ): new_bounds = tuple( map( lambda x: 2*x, index_bounds ) ) if new_bounds != index_bounds: # We need to re-index. index_bounds = new_bounds index = BboxQuadtree(index_bounds) for i, b in enumerate(bboxes): index.add_bbox(i, b) # Approximate the new word by rectangles (unless it's too small) and # insert them into the index. if not loose and max(abs(bbox.width), abs(bbox.height)) > split_limit: for littlebox in BoxifyWord.splitword( bbox, transpath, limit=split_limit ): bboxes.append( littlebox ) index.add_bbox( len(bboxes)-1, littlebox ) else: bboxes.append( bbox ) index.add_bbox( len(bboxes)-1, bbox ) yield (transpath, weight)