Beispiel #1
0
    def test_collisions(self):
        """Ensure that no pair of paths intersects."""

        # collided_paths = list()
        
        paths = list()
        for p, w in self.word_cloud:
            paths.extend(BoxifyWord.cleaned_textpath(p))

        for p1, p2 in combinations(paths, 2):
            if( p1.get_extents().overlaps(p2.get_extents())
                    and p1.intersects_path(p2, filled=True) ):
                # collided_paths.extend((p1, p2))
                self.fail("Found a collision between two paths with holes")
Beispiel #2
0
    def test_collisions_noloops(self):
        """Ensure that no pair of paths without holes intersects."""

        words = [ "sun", "luck" ] * 50
        weights = map( lambda w: 1.0/(w+1), range(100) )
        word_weights = zip(words, weights)
        word_cloud = WordCloud(word_weights, seed=1)
        
        paths = list()
        for p, w in word_cloud:
            paths.extend(BoxifyWord.cleaned_textpath(p))

        for p1, p2 in combinations(paths, 2):
            if( p1.get_extents().overlaps(p2.get_extents())
                    and p1.intersects_path(p2, filled=True) ):
                self.fail("Found a collision between two paths without holes")
Beispiel #3
0
def build_cloud(wordweights, 
        loose=False, seed=None, split_limit=2**-3, pad=1.10, visual_limit=2**-5,
        highest_weight=None ):
    """Convert a list of words and weights into a list of paths and weights.

    You should only use this function if you know what you're doing, or if
    you really don't want to cache the generated paths.  Otherwise just use
    the WordCloud class.

    Args:
        wordweights: An iterator of the form 
                [ (word, weight), (word, weight), ... ]
            such that the weights are in decreasing order.
        loose: If `true', words won't be broken up into rectangles after
            insertion.  This results in a looser cloud, generated faster.
        seed: A random seed to use
        split_limit: When words are approximated by rectangles, the rectangles
            will have dimensions less than split_limit.  Higher values result
            in a tighter cloud, at a cost of more CPU time.  The largest word
            has height 1.0.
        pad: Expand a word's bounding box by a factor of `pad' before
            inserting it.  This can actually result in a tighter cloud if you
            have many small words by leaving space between large words.
        visual_limit: Words with height smaller than visual_limit will be
            discarded.
        highest_weight: Experimental feature.  If you provide an upper bound
            on the weights that will be seen you don't have to provide words
            and weights sorted.  The resulting word cloud will be noticeably
            uglier.

    Generates:
        Tuples of the form (path, weight) such that:
            * No two paths intersect
            * Paths are fairly densely packed around the origin
            * All weights are normalized to fall in the interval [0, 1]
    """
    if seed is not None:
        random.seed(seed)

    font_properties = font_manager.FontProperties(
                family="sans", weight="bold", stretch="condensed")
    xheight = TextPath((0,0), "x", prop=font_properties).get_extents().expanded(pad,pad).height

    # These are magic numbers.  Most wordclouds will not exceed these bounds.
    # If they do, it will have to re-index all of the bounding boxes.
    index_bounds = (-16, -16, 16, 16)
    index = BboxQuadtree(index_bounds)

    if highest_weight is None:
        # Attempt to pull the first word and weight.  If we fail, the wordweights
        # list is empty and we should just quit.
        #
        # All this nonsense is to ensure it accepts an iterator of words
        # correctly.
        iterwords = iter(wordweights)
        try:
            first_word, first_weight = iterwords.next()
            iterwords = chain([(first_word, first_weight)], iterwords)
        except StopIteration:
            return

        # We'll scale all of the weights down by this much.
        weight_scale = 1.0/first_weight
    else:
        weight_scale = 1.0/highest_weight
        iterwords = iter(wordweights)

    bboxes = list()

    bounds = transforms.Bbox(((-0.5, -0.5), (-0.5, -0.5)))
    for tword, tweight in iterwords:
        weight = tweight*weight_scale
        if weight < visual_limit:
            # You're not going to be able to see the word anyway.  Quit
            # rendering words now.
            continue

        word_path = TextPath((0,0), tword, prop=font_properties)
        word_bbox = word_path.get_extents().expanded(pad, pad)
        # word_scale = weight/float(word_bbox.height)
        word_scale = weight/float(xheight)
        
        # When we build a TextPath at (0,0) it doesn't necessarily have
        # its corner at (0,0).  So we have to translate to the origin,
        # scale down, then translate to center it.  Feel free to simplify
        # this if you want.
        word_trans = Affine2D.identity().translate(
                                -word_bbox.xmin,
                                -word_bbox.ymin
                            ).scale(word_scale).translate(
                                -0.5*abs(word_bbox.width)*word_scale,
                                -0.5*abs(word_bbox.height)*word_scale )

        word_path = word_path.transformed(word_trans)

        word_bbox = word_path.get_extents().expanded(pad, pad)

        if weight > split_limit:
            # Big words we place carefully, trying to make the dimensions of
            # the cloud equal and center it around the origin.
            gaps = ( 
                    ("left", bounds.xmin), ("bottom", bounds.ymin), 
                    ("right", bounds.xmax), ("top", bounds.ymax) )
            direction = min(gaps, key=lambda g: abs(g[1]))[0]
        else:
            # Small words we place randomly.
            direction = random.choice( [ "left", "bottom", "right", "top" ] )

        # Randomly place the word along an edge...
        if direction in ( "top", "bottom" ):
            center = random_position(bounds.xmin, bounds.xmax)
        elif direction in ( "right", "left" ):
            center = random_position(bounds.ymin, bounds.ymax)

        # And push it toward an axis.
        if direction == "top":
            bbox = word_bbox.translated( center, index_bounds[3] )
            xpos, ypos = push_bbox_down( bbox, bboxes, index )
        elif direction == "right":
            bbox = word_bbox.translated( index_bounds[2], center )
            xpos, ypos = push_bbox_left( bbox, bboxes, index )
        elif direction == "bottom":
            bbox = word_bbox.translated( center, index_bounds[1] )
            xpos, ypos = push_bbox_up( bbox, bboxes, index )
        elif direction == "left":
            bbox = word_bbox.translated( index_bounds[0], center )
            xpos, ypos = push_bbox_right( bbox, bboxes, index )
    
        # Now alternate pushing the word toward different axes until either
        # it stops movign or we get sick of it.
        max_moves = 2
        moves = 0
        while moves < max_moves and (moves == 0 or prev_xpos != xpos or prev_ypos != ypos):
            moves += 1
            prev_xpos = xpos
            prev_ypos = ypos
            if direction in ["top", "bottom", "vertical"]:
                if xpos > 0:
                    bbox = word_bbox.translated( xpos, ypos )
                    xpos, ypos = push_bbox_left( bbox, bboxes, index )
                elif xpos < 0:
                    bbox = word_bbox.translated( xpos, ypos )
                    xpos, ypos = push_bbox_right( bbox, bboxes, index )
                direction = "horizontal"
            elif direction in ["left", "right", "horizontal"]:
                if ypos > 0:
                    bbox = word_bbox.translated( xpos, ypos )
                    xpos, ypos = push_bbox_down( bbox, bboxes, index )
                elif ypos < 0:
                    bbox = word_bbox.translated( xpos, ypos )
                    xpos, ypos = push_bbox_up( bbox, bboxes, index )
                direction = "vertical"

        wordtrans = Affine2D.identity().translate( xpos, ypos )

        transpath = word_path.transformed(wordtrans)
        bbox = transpath.get_extents()

        # Swallow the new word into the bounding box for the word cloud.
        bounds = matplotlib.transforms.Bbox.union( [ bounds, bbox ] )

        # We need to check if we've expanded past the bounds of our quad tree.
        # If so we'll need to expand the bounds and then re-index.
        new_bounds = index_bounds
        while not BoxifyWord.bbox_covers(
            # FIXME: Why am I not just doing this with a couple of logarithms?
                    matplotlib.transforms.Bbox(((new_bounds[0], new_bounds[1]),
                                                (new_bounds[2], new_bounds[3]))),
                    bounds ):
            new_bounds = tuple( map( lambda x: 2*x, index_bounds ) )

        if new_bounds != index_bounds:
            # We need to re-index.
            index_bounds = new_bounds
            index = BboxQuadtree(index_bounds)
            for i, b in enumerate(bboxes):
                index.add_bbox(i, b)

        # Approximate the new word by rectangles (unless it's too small) and
        # insert them into the index.
        if not loose and max(abs(bbox.width), abs(bbox.height)) > split_limit:
            for littlebox in BoxifyWord.splitword( 
                    bbox, transpath, limit=split_limit ):
                bboxes.append( littlebox )
                index.add_bbox( len(bboxes)-1, littlebox )
        else:
            bboxes.append( bbox )
            index.add_bbox( len(bboxes)-1, bbox )

        yield (transpath, weight)