def to_image(self): self._check_generated() if self.mask is not None: width = self.mask.shape[1] height = self.mask.shape[0] else: height, width = self.height, self.width img = Image.new(self.mode, (int(width * self.scale), int(height * self.scale)), self.background_color) draw = ImageDraw.Draw(img) for (word, count), font_size, position, orientation, color in self.layout_: font = ImageFont.truetype(self.font_path, int(font_size * self.scale)) transposed_font = ImageFont.TransposedFont(font, orientation=orientation) pos = (int(position[1] * self.scale), int(position[0] * self.scale)) draw.text(pos, word, fill=color, font=transposed_font) return self._draw_contour(img=img)
def select_orintation(font_size, font_path, canvas_size, word, margin, draw, font): """"choice the orintation for each word""" width, height = canvas_size draw.setfont(font) nontransposed_box_size = draw.textsize(word) transposed_font = ImageFont.TransposedFont(font, orientation=Image.ROTATE_90) draw.setfont(transposed_font) transposed_box_size = draw.textsize(word) box_size = None orientation = None if not check_in_bound( (width, height), (transposed_box_size[1] + margin, transposed_box_size[0] + margin)): box_size = nontransposed_box_size orientation = None elif not check_in_bound((width, height), (nontransposed_box_size[1] + margin, nontransposed_box_size[0] + margin)): box_size = transposed_box_size orientation = Image.ROTATE_90 if debug >= 1: print('trans:', transposed_box_size, 'nontrans:', nontransposed_box_size, orientation, box_size) # transpose font optionally if box_size is None: box_size, orientation = random.choice( [(nontransposed_box_size, None)] * 9 + [(transposed_box_size, Image.ROTATE_90)]) return box_size, orientation
def generate_from_topics(self, topics): """Create a topic_cloud from topics. Parameters ---------- topics : array of tuples Each topic: (proportion in the document, [ (word1, freq1), (word2, freq2), ... ] ) Returns ------- self """ # lemmatizing for topic in topics: words_freq = topic[1] words_freq2 = [] word2idx = {} idx = 0 for word, freq in words_freq: word2 = lemmatize2(word) if word2 in word2idx: wid = word2idx[word2] words_freq2[wid][1] += freq else: words_freq2.append([word2, freq]) word2idx[word2] = idx idx += 1 words_freq2 = sorted(words_freq2, key=itemgetter(1), reverse=True) for i in xrange(len(words_freq2) - 1, -1, -1): if words_freq2[i][1] >= self.min_word_topic_prop: break words_freq2 = words_freq2[:i + 1] topic[1] = words_freq2[:self.max_topic_words] # topic_mass = sum( [ len(w) for (w,f) in topic[1] ] ) # topic_masses.append(topic_mass) #topic[0] *= topic[1][0][1] * sum( [ word_freq[1] for word_freq in topic[1] ] ) # make sure topics are sorted and normalized topics = sorted(topics, key=itemgetter(0), reverse=True) if len(topics) > self.max_topic_num: topics = topics[:self.max_topic_num] min_topic_prop = topics[0][0] / self.max_topic_prop_ratio for i in xrange(len(topics) - 1, 0, -1): if topics[i][0] >= min_topic_prop: break topics = topics[:i + 1] T = len(topics) #topic_masses = [] topic_masses = np.ones(T) # sqrt for smoothing total_props = sum([ np.power(topics[i][0] * topic_masses[i], 0.8) for i in xrange(len(topics)) ]) for i in xrange(len(topics)): topics[i][0] = np.power(topics[i][0] * topic_masses[i], 0.8) / total_props avail_angles = 360 - T * self.min_sector_padding max_angle = avail_angles * topics[0][0] angle_scale = 1 if max_angle > self.max_sector_angle: angle_scale = self.max_sector_angle / max_angle topic_angles = [] for topic in topics: topic_angles.append(avail_angles * topic[0] * angle_scale) sector_padding = (360 - sum(topic_angles)) / T topic_angles = np.array(topic_angles) height, width = self.height, self.width # create image img_grey = Image.new("L", (width, height)) draw = ImageDraw.Draw(img_grey) img_array = np.asarray(img_grey) total_freqs, font_sizes, positions, orientations, colors = [], [], [], [], [] if self.random_state is not None: random_state = self.random_state else: random_state = Random() sector_masks = [] sector_angles = [] for i, topic in enumerate(topics): width = self.width height = self.height last_freq = 1. font_size = self.max_font_size * min( np.sqrt(topic[1][0][1] / topics[0][1][0][1]), 2) if i == 0: # initial angle starts from the symmetric left side of the y-axis # to ensure first sector always at right above of the canvas start_angle = 270 - topic_angles[0] / 2 stop_angle = 270 + topic_angles[0] / 2 else: start_angle = stop_angle + sector_padding stop_angle += sector_padding + topic_angles[i] # reverse sign to conform with pillow's measurement of angles sector_angles.append(clockwise(start_angle, stop_angle)) #print "%.1f - %.1f =>" %( start_angle % 360, stop_angle % 360), #print "%.1f - %.1f" %( clockwise(start_angle, stop_angle) ) sector_mask = genSectorMask(width, height, start_angle, stop_angle) sector_masks.append(sector_mask) occupancy = IntegralOccupancyMap(height, width, sector_mask) frequencies = topic[1][:self.max_words] frequencies = sorted(frequencies, key=itemgetter(1), reverse=True) # largest entry will be 1 max_frequency = float(frequencies[0][1]) frequencies = [(word, freq / max_frequency) for word, freq in frequencies] if len(frequencies) == 0: print("We need at least 1 word to plot a word cloud, got 0.") continue total_freqs += frequencies drawn_words = [] # start drawing grey image for word, freq in frequencies: # select the font size rs = self.relative_scaling if rs != 0: font_size = int( round((rs * (freq / float(last_freq)) + (1 - rs)) * font_size)) while True: # try to find a position font = ImageFont.truetype(self.font_path, font_size) # transpose font optionally if random_state.random() < self.prefer_horizontal: orientation = None else: orientation = Image.ROTATE_90 transposed_font = ImageFont.TransposedFont( font, orientation=orientation) # get size of resulting text box_size = draw.textsize(word, font=transposed_font) # find possible places using integral image: result = occupancy.sample_position( box_size[1] + 2 * self.margin, box_size[0] + 2 * self.margin, random_state) if result is not None or font_size == 0: break # if we didn't find a place, make font smaller font_size -= self.font_step if font_size < self.min_font_size: # we were unable to draw any more font_size = self.min_font_size drawn_words.append(word) x, y = np.array(result) + self.margin // 2 # actually draw the text draw.text((y, x), word, fill="white", font=transposed_font) positions.append((x, y)) orientations.append(orientation) font_sizes.append(font_size) colors.append(d3_category20_rand(i)) # recompute integral image img_array = (np.asarray(img_grey) + sector_mask) > 0 # recompute bottom right # the order of the cumsum's is important for speed ?! occupancy.update(img_array, x, y) last_freq = freq print "Topic %d (%.1f):" % (i + 1, topic_angles[i]) print drawn_words # for i in xrange(len(sector_masks)): # for j in xrange(i): # if np.any( (1-sector_masks[i]) * (1-sector_masks[j]) ): # pdb.set_trace() self.layout_ = list( zip(total_freqs, font_sizes, positions, orientations, colors)) self.sector_angles = sector_angles return self
def draw_text(self, font_offset, font, fillcolor, s, recursion=False): # vertical font is_vertical = False if np.random.rand() < self.conf.vertical_ratio: font = ImageFont.TransposedFont(font, orientation=Image.ROTATE_90) is_vertical = True # border is_border = False if np.random.rand() < self.conf.border_ratio: shadowcolor = 'black' if fillcolor == (255, 255, 255) else 'white' is_border = True # random_space is_random_space = False char_space_width = 0 if np.random.rand() < self.conf.random_space_ratio: is_random_space = True chars_size = [] width = 0 height = 0 y_offset = 10**5 for c in s: size = font.getsize(c) chars_size.append(size) width += size[0] if size[1] > height: height = size[1] if is_vertical: c_offset = font.font.getoffset(c) if c_offset[0] < y_offset: y_offset = c_offset[0] else: c_offset = font.getoffset(c) if c_offset[1] < y_offset: y_offset = c_offset[1] c_x, c_y = font_offset c_x_ori = c_x c_y_ori = c_y c_y -= y_offset char_space_width = int(height * np.random.uniform( self.conf.random_space_min, self.conf.random_space_max)) if is_random_space else 0 width += (char_space_width * (len(s) - 1)) height -= y_offset if not is_vertical and not is_random_space: s = [s] for i, c in enumerate(s): if is_border: x = c_x y = c_y for j in [x - 1, x + 1, x]: for k in [y - 1, y + 1, y]: self.draw.text((j, k), c, font=font, fill=shadowcolor) self.draw.text((c_x, c_y), c, fillcolor, font=font) c_x += (chars_size[i][0] + char_space_width) if recursion: return self.is_chars_disturb = False if np.random.rand() < self.conf.chars_disturb_ratio: self.is_chars_disturb = True if is_vertical: font = font.font self.draw_text((c_x_ori, c_y_ori - (1.2 * height)), font, fillcolor, self.shuffle_str(s), True) self.draw_text((c_x_ori, c_y_ori + (1.2 * height)), font, fillcolor, self.shuffle_str(s), True) self.s_width = width self.s_height = height
def make_wordcloud(words, counts, fname=None, font_path=None, width=400, height=200, margin=5, ranks_only=False, backgroundweight=255): """Build word cloud using word counts, store in image. Parameters ---------- words : numpy array of strings Words that will be drawn in the image. counts : numpy array of word counts Word counts or weighting of words. Determines the size of the word in the final image. Will be normalized to lie between zero and one. font_path : string Font path to the font that will be used. Defaults to DroidSansMono path. fname : sting Output filename. Extension determins image type (written with PIL). width : int (default=400) Width of the word cloud image. height : int (default=200) Height of the word cloud image. ranks_only : boolean (default=False) Only use the rank of the words, not the actual counts. backgroundweight : int (default=255) Weight that the background of the wordcloud is multiplied by. Applies in cases where there are more than 2 dimensions which charecterize the cloud; in our case it is the logged number of community population whose tweets resulted in the cloud. Notes ----- Larger Images with make the code significantly slower. If you need a large image, you can try running the algorithm at a lower resolution and then drawing the result at the desired resolution. In the current form it actually just uses the rank of the counts, i.e. the relative differences don't matter. Play with setting the font_size in the main loop vor differnt styles. Colors are used completely at random. Currently the colors are sampled from HSV space with a fixed S and V. Adjusting the percentages at the very end gives differnt color ranges. Obviously you can also set all at random - haven't tried that. """ if len(counts) <= 0: print("We need at least 1 word to plot a word cloud, got %d." % len(counts)) if font_path is None: font_path = FONT_PATH if not os.path.exists(font_path): raise ValueError("The provided font %s does not exist." % font_path) # normalize counts counts=[float(i/max(counts)) for i in counts] # sort words by counts inds = np.argsort(counts)[::-1] counts = [counts[i] for i in inds] words = [words[i] for i in inds] # create image img_grey = Image.new("L", (width, height)) draw = ImageDraw.Draw(img_grey) integral = np.zeros((height, width), dtype=np.uint32) img_array = np.asarray(img_grey) font_sizes, positions, orientations = [], [], [] # intitiallize font size "large enough" font_size = 1000 # start drawing grey image for word, count in zip(words, counts): # alternative way to set the font size if not ranks_only: font_size = min(font_size, int(100 * np.log(count + 100))) while True: # try to find a position font = ImageFont.truetype(font_path, font_size, encoding = 'unic') # transpose font optionally orientation = random.choice([None, Image.ROTATE_90]) transposed_font = ImageFont.TransposedFont(font, orientation=orientation) draw.setfont(transposed_font) # get size of resulting text box_size = draw.textsize(word) # find possible places using integral image: result = query_integral_image(integral, box_size[1] + margin, box_size[0] + margin) if result is not None or font_size == 0: break # if we didn't find a place, make font smaller font_size -= 1 if font_size == 0: # we were unable to draw any more break x, y = np.array(result) + margin // 2 # actually draw the text draw.text((y, x), word, fill="white") positions.append((x, y)) orientations.append(orientation) font_sizes.append(font_size) # recompute integral image img_array = np.asarray(img_grey) # recompute bottom right # the order of the cumsum's is important for speed ?! partial_integral = np.cumsum(np.cumsum(img_array[x:, y:], axis=1), axis=0) # paste recomputed part into old image # if x or y is zero it is a bit annoying if x > 0: if y > 0: partial_integral += (integral[x - 1, y:] - integral[x - 1, y - 1]) else: partial_integral += integral[x - 1, y:] if y > 0: partial_integral += integral[x:, y - 1][:, np.newaxis] integral[x:, y:] = partial_integral # redraw in color img = Image.new("RGB", (width, height), (backgroundweight,backgroundweight,backgroundweight)) draw = ImageDraw.Draw(img) everything = zip(words, font_sizes, positions, orientations) for word, font_size, position, orientation in everything: font = ImageFont.truetype(font_path, font_size) # transpose font optionally transposed_font = ImageFont.TransposedFont(font, orientation=orientation) draw.setfont(transposed_font) draw.text((position[1], position[0]), word, #fill = "red") fill="hsl(%d" % random.randint(0, 50) + ", 80%, 50%)") #img.show() try: img.save(fname) except: pass return img
def fit_words(words, font_path=None, width=400, height=200, margin=5, ranks_only=False, prefer_horiz=0.90): """Generate the positions for words. Parameters ---------- words : array of tuples A tuple contains the word and its frequency. font_path : string Font path to the font that will be used (OTF or TTF). Defaults to DroidSansMono path, but you might not have it. width : int (default=400) Width of the canvas. height : int (default=200) Height of the canvas. ranks_only : boolean (default=False) Only use the rank of the words, not the actual counts. prefer_horiz : float (default=0.90) The ratio of times to try horizontal fitting as opposed to vertical. Notes ----- Larger canvases with make the code significantly slower. If you need a large word cloud, run this function with a lower canvas size, and draw it with a larger scale. In the current form it actually just uses the rank of the counts, i.e. the relative differences don't matter. Play with setting the font_size in the main loop for different styles. """ if len(words) <= 0: print("We need at least 1 word to plot a word cloud, got %d." % len(words)) if font_path is None: font_path = FONT_PATH if not os.path.exists(font_path): raise ValueError("The font %s does not exist." % font_path) # create image img_grey = Image.new("L", (width, height)) draw = ImageDraw.Draw(img_grey) integral = np.zeros((height, width), dtype=np.uint32) img_array = np.asarray(img_grey) font_sizes, positions, orientations = [], [], [] # intitiallize font size "large enough" font_size = height # start drawing grey image for word, count in words: # alternative way to set the font size if not ranks_only: font_size = min(font_size, int(100 * np.log(count + 100))) while True: # try to find a position font = ImageFont.truetype(font_path, font_size) # transpose font optionally if random.random() < prefer_horiz: orientation = None else: orientation = Image.ROTATE_90 transposed_font = ImageFont.TransposedFont(font, orientation=orientation) draw.setfont(transposed_font) # get size of resulting text box_size = draw.textsize(word) # find possible places using integral image: result = query_integral_image(integral, box_size[1] + margin, box_size[0] + margin) if result is not None or font_size == 0: break # if we didn't find a place, make font smaller font_size -= 1 if font_size == 0: # we were unable to draw any more break x, y = np.array(result) + margin // 2 # actually draw the text draw.text((y, x), word, fill="white") positions.append((x, y)) orientations.append(orientation) font_sizes.append(font_size) # recompute integral image img_array = np.asarray(img_grey) # recompute bottom right # the order of the cumsum's is important for speed ?! partial_integral = np.cumsum(np.cumsum(img_array[x:, y:], axis=1), axis=0) # paste recomputed part into old image # if x or y is zero it is a bit annoying if x > 0: if y > 0: partial_integral += (integral[x - 1, y:] - integral[x - 1, y - 1]) else: partial_integral += integral[x - 1, y:] if y > 0: partial_integral += integral[x:, y - 1][:, np.newaxis] integral[x:, y:] = partial_integral return zip(words, font_sizes, positions, orientations)
def _fit_words(self, words): """Generate the positions for words. Parameters ---------- words : array of tuples A tuple contains the word and its frequency. Returns ------- layout_ : list of tuples (string, int, (int, int), int, color)) Encodes the fitted word cloud. Encodes for each word the string, font size, position, orientation and color. Notes ----- Larger canvases with make the code significantly slower. If you need a large word cloud, run this function with a lower canvas size, and draw it with a larger scale. In the current form it actually just uses the rank of the counts, i.e. the relative differences don't matter. Play with setting the font_size in the main loop for different styles. """ if self.random_state is not None: random_state = self.random_state else: random_state = Random() if len(words) <= 0: print("We need at least 1 word to plot a word cloud, got %d." % len(words)) if self.mask is not None: width = self.mask.shape[1] height = self.mask.shape[0] # the order of the cumsum's is important for speed ?! integral = np.cumsum(np.cumsum(self.mask, axis=1), axis=0).astype(np.uint32) else: height, width = self.height, self.width integral = np.zeros((height, width), dtype=np.uint32) # create image img_grey = Image.new("L", (width, height)) draw = ImageDraw.Draw(img_grey) img_array = np.asarray(img_grey) font_sizes, positions, orientations, colors = [], [], [], [] font_size = self.max_font_size # start drawing grey image for word, count in words: # alternative way to set the font size if not self.ranks_only: font_size = min(font_size, int(100 * np.log(count + 100))) while True: # try to find a position font = ImageFont.truetype(self.font_path, font_size) # transpose font optionally if random_state.random() < self.prefer_horizontal: orientation = None else: orientation = Image.ROTATE_90 transposed_font = ImageFont.TransposedFont( font, orientation=orientation) draw.setfont(transposed_font) # get size of resulting text box_size = draw.textsize(word) # find possible places using integral image: result = query_integral_image(integral, box_size[1] + self.margin, box_size[0] + self.margin, random_state) if result is not None or font_size == 0: break # if we didn't find a place, make font smaller font_size -= 1 if font_size == 0: # we were unable to draw any more break x, y = np.array(result) + self.margin // 2 # actually draw the text draw.text((y, x), word, fill="white") positions.append((x, y)) orientations.append(orientation) font_sizes.append(font_size) colors.append( self.color_func(word, font_size, (x, y), orientation, random_state=random_state)) # recompute integral image if self.mask is None: img_array = np.asarray(img_grey) else: img_array = np.asarray(img_grey) + self.mask # recompute bottom right # the order of the cumsum's is important for speed ?! partial_integral = np.cumsum(np.cumsum(img_array[x:, y:], axis=1), axis=0) # paste recomputed part into old image # if x or y is zero it is a bit annoying if x > 0: if y > 0: partial_integral += (integral[x - 1, y:] - integral[x - 1, y - 1]) else: partial_integral += integral[x - 1, y:] if y > 0: partial_integral += integral[x:, y - 1][:, np.newaxis] integral[x:, y:] = partial_integral self.layout_ = zip(words, font_sizes, positions, orientations, colors) return self.layout_
def runExplosion(oledDisplays) : center = [math.floor(oledDisplays[0].width / 2), math.floor(oledDisplays[0].height / 2)] oledImages = [Image.new('1', (oledDisplay.width, oledDisplay.height)) for oledDisplay in oledDisplays] drawObjects = [ImageDraw.Draw(oledImage) for oledImage in oledImages] # this starts to get a little less than perfectly performant # consider using sprites/etc, or creating some random ones and then re-drawing with those for i in range(1, 15, 2) : for displayIndex, oledDisplay in enumerate(oledDisplays) : plotPoints = [] for randomi in range (0, 30 * i * math.floor(i/1.5)) : x = random.randrange(-6*i, 6*i) + center[0] y = random.randrange(math.floor(-5*i), math.floor(5*i)) + center[1] plotPoints.append((x,y)) drawObjects[displayIndex].point(plotPoints, fill=1) oledDisplay.image(oledImages[displayIndex]) oledDisplay.show() # not an issue on Pi3B, but this should help keep timing consistent on faster systems time.sleep(.05) """ textLine1 = "GAME" textLine2 = "OVER" basicFont = ImageFont.truetype(font="/usr/share/fonts/truetype/dejavu/DejaVuSansMono-Bold.ttf", size=60) # have to rotate the letters to fit the screen orientation! fontObject = ImageFont.TransposedFont(basicFont, orientation = screenOrientation) for displayIndex, oledDisplay in enumerate(oledDisplays) : (textWidth, textHeight) = fontObject.getsize(textLine1[displayIndex]) drawObjects[displayIndex].text((center[0] + center[0] / 2 - textWidth / 2, center[1] - textHeight / 2), textLine1[displayIndex], font=fontObject, fill=0) oledDisplay.image(oledImages[displayIndex]) oledDisplay.show() time.sleep(.05) for displayIndex, oledDisplay in enumerate(oledDisplays) : (textWidth, textHeight) = fontObject.getsize(textLine2[displayIndex]) drawObjects[displayIndex].text((center[0] / 2 - textWidth / 2, center[1] - textHeight / 2), textLine2[displayIndex], font=fontObject, fill=0) oledDisplay.image(oledImages[displayIndex]) oledDisplay.show() time.sleep(.05) """ textLines = ["MAKE", "YOUR", "TIME"] basicFont = ImageFont.truetype(font="/usr/share/fonts/truetype/dejavu/DejaVuSansMono-Bold.ttf", size=40) # have to rotate the letters to fit the screen orientation! fontObject = ImageFont.TransposedFont(basicFont, orientation = screenOrientation) for i, textLine in enumerate(textLines) : for displayIndex, oledDisplay in enumerate(oledDisplays) : (textWidth, textHeight) = fontObject.getsize(textLine[displayIndex]) drawObjects[displayIndex].text((center[0] + (oledDisplay.width / 3) * (1-i) - textWidth / 2, center[1] - textHeight / 2), textLine[displayIndex], font=fontObject, fill=0) oledDisplay.image(oledImages[displayIndex]) oledDisplay.show() time.sleep(.05) time.sleep(1)
def generate_from_frequencies(self, frequencies): """Create a word_cloud from words and frequencies. Parameters ---------- frequencies : array of tuples A tuple contains the word and its frequency. Returns ------- self """ if self.random_state is not None: random_state = self.random_state else: random_state = Random() if len(frequencies) <= 0: print("We need at least 1 word to plot a word cloud, got %d." % len(frequencies)) if self.mask is not None: mask = self.mask width = mask.shape[1] height = mask.shape[0] if mask.dtype.kind == 'f': warnings.warn( "mask image should be unsigned byte between 0 and" " 255. Got a float array") if mask.ndim == 2: boolean_mask = mask == 255 elif mask.ndim == 3: # "OR" the color channels boolean_mask = np.sum(mask[:, :, :3] == 255, axis=-1) else: raise ValueError("Got mask of invalid shape: %s" % str(mask.shape)) # the order of the cumsum's is important for speed ?! integral = np.cumsum(np.cumsum(boolean_mask * 255, axis=1), axis=0).astype(np.uint32) else: height, width = self.height, self.width integral = np.zeros((height, width), dtype=np.uint32) # create image img_grey = Image.new("L", (width, height)) draw = ImageDraw.Draw(img_grey) img_array = np.asarray(img_grey) font_sizes, positions, orientations, colors = [], [], [], [] font_size = self.max_font_size # start drawing grey image for word, count in frequencies: # alternative way to set the font size if not self.ranks_only: font_size = min(font_size, int(100 * np.log(count + 100))) while True: # try to find a position font = ImageFont.truetype(self.font_path, font_size) # transpose font optionally if random_state.random() < self.prefer_horizontal: orientation = None else: orientation = Image.ROTATE_90 transposed_font = ImageFont.TransposedFont( font, orientation=orientation) draw.setfont(transposed_font) # get size of resulting text box_size = draw.textsize(word) # find possible places using integral image: result = query_integral_image(integral, box_size[1] + self.margin, box_size[0] + self.margin, random_state) if result is not None or font_size == 0: break # if we didn't find a place, make font smaller font_size -= 1 if font_size == 0: # we were unable to draw any more break x, y = np.array(result) + self.margin // 2 # actually draw the text draw.text((y, x), word, fill="white") positions.append((x, y)) orientations.append(orientation) font_sizes.append(font_size) colors.append( self.color_func(word, font_size=font_size, position=(x, y), orientation=orientation, random_state=random_state, font_path=self.font_path)) # recompute integral image if self.mask is None: img_array = np.asarray(img_grey) else: img_array = np.asarray(img_grey) + boolean_mask # recompute bottom right # the order of the cumsum's is important for speed ?! partial_integral = np.cumsum(np.cumsum(img_array[x:, y:], axis=1), axis=0) # paste recomputed part into old image # if x or y is zero it is a bit annoying if x > 0: if y > 0: partial_integral += (integral[x - 1, y:] - integral[x - 1, y - 1]) else: partial_integral += integral[x - 1, y:] if y > 0: partial_integral += integral[x:, y - 1][:, np.newaxis] integral[x:, y:] = partial_integral self.layout_ = list( zip(frequencies, font_sizes, positions, orientations, colors)) return self
def acomodar_palabras(self): """El 'trabajo pesado' de distribuir las palabras de forma que encajen en la imágen a generar""" altura, largo = self.height, self.width integral = zeros((altura, largo), dtype=uint32) # Creación de imágen imagen = Image.new("L", (largo, altura)) dibujo = ImageDraw.Draw(imagen) img_arreglo = asarray(imagen) tamaños, posiciones, orientaciones, colores = [], [], [], [] font_actual = self.max_font_size # Inicio de dibujado for palabra, freq in self.palabras: # Hay que encontrar una posición para todas las palabras while True: # font_actual = min(self.max_font_size, int(100 * log(freq + 100))) fuente = ImageFont.truetype(self.font_path, font_actual) # Vemos si va horizontal o vertical y dibujamos if self.R.random() < self.horizontales: orientacion = None else: orientacion = Image.ROTATE_90 font_transpuesta = ImageFont.TransposedFont( fuente, orientation=orientacion) dibujo.setfont(font_transpuesta) tamaño_resultante = dibujo.textsize(palabra) # Buscamos posibles lugares resultado = query_integral_image( integral, tamaño_resultante[1] + self.margin, tamaño_resultante[0] + self.margin, self.R) # Si el resultado es posible o no podemos escribir más if resultado is not None or font_actual == 0: break # Si no hay espacio, achicamos la fuente font_actual -= 1 x, y = array(resultado) + self.margin // 2 # Dibujar el resultado dibujo.text((y, x), palabra, fill="white") posiciones.append((x, y)) orientaciones.append(orientacion) tamaños.append(font_actual) colores.append( self.color_func(palabra, font_size=font_actual, position=(x, y), orientation=orientacion)) # Recalcular imágen img_arreglo = asarray(imagen) integral_parcial = cumsum(cumsum(img_arreglo[x:, y:], axis=1), axis=0) # Pegar parte calculada a la imágen acumulada if x > 0: if y > 0: integral_parcial += (integral[x - 1, y:] - integral[x - 1, y - 1]) else: integral_parcial += integral[x - 1, y:] if y > 0: integral_parcial += integral[x:, y - 1][:, newaxis] integral[x:, y:] = integral_parcial self.distribucion = list( zip(self.palabras, tamaños, posiciones, orientaciones, colores)) self.imagen_generada = True
def generate_from_frequencies(self, scores_in, max_font_size=None): """Create a word_cloud from words and frequencies. Parameters ---------- scores_in : dict from tuple to float A contains lemmas and associated score. max_font_size : int Use this font-size instead of self.max_font_size Returns ------- self """ # Check length of scores if len(scores_in) <= 0: raise ValueError("We need at least 1 word to plot a word cloud, " "got %d." % len(scores_in)) # Make sure scores are sorted and normalized scores_in = sorted(scores_in.items(), key=itemgetter(1), reverse=True) scores_in = scores_in[:self.max_words] # Slice down to max words # Largest entry will be the 1st max_score = float(scores_in[0][1]) # Normalize scores_norm = [((word, tag), score / max_score) for (word, tag), score in scores_in] # Set random state if self.random_state is not None: random_state = self.random_state else: random_state = Random() # Set boolean mask if self.mask is not None: boolean_mask = self._get_bolean_mask(self.mask) width = self.mask.shape[1] height = self.mask.shape[0] else: boolean_mask = None height, width = self.height, self.width occupancy = IntegralOccupancyMap(height, width, boolean_mask) # Create image img_grey = Image.new("L", (width, height)) draw = ImageDraw.Draw(img_grey) img_array = np.asarray(img_grey) font_sizes, positions, orientations, colors = [], [], [], [] last_score = 1. # If not provided use default font_size if max_font_size is None: max_font_size = self.max_font_size # Figure out a good font size by trying to draw with if max_font_size is None: # Just the first two words if len(scores_norm) == 1: # We only have one word. We make it big! font_size = self.height else: # Recursive call: this sets layout_ self.generate_from_frequencies(dict(scores_norm[:2]), max_font_size=self.height) # Find font sizes sizes = [x[1] for x in self.layout_] try: font_size = int(2 * sizes[0] * sizes[1] / (sizes[0] + sizes[1])) # Quick fix for if self.layout_ contains less than 2 values # On very small images it can be empty except IndexError: try: font_size = sizes[0] except IndexError: raise ValueError( "Couldn't find space to draw. Either the Canvas size" " is too small or too much of the image is masked " "out.") # Case font size has been manually set else: font_size = max_font_size # Set self.words_ here, because we called generate_from_frequencies # above... hurray for good design? self.words_ = {word: score for (word, tag), score in scores_norm} # Check repetitiorn if self.repeat and len(scores_norm) < self.max_words: # Pad frequencies with repeating words. times_extend = int(np.ceil(self.max_words / len(scores_norm))) - 1 # Get smallest frequency scores_org = list(scores_norm) downweight = scores_norm[-1][1] for i in range(times_extend): scores_norm.extend([((word, tag), freq * downweight**(i + 1)) for (word, tag), score in scores_org]) # start drawing grey image for (word, tag), score in scores_norm: # Do not show 0 score lemmas if score == 0: continue # Select the font size rs = self.relative_scaling if rs != 0: font_size = int( round((rs * (score / float(last_score)) + (1 - rs)) * font_size)) if random_state.random() < self.prefer_horizontal: orientation = None else: orientation = Image.ROTATE_90 tried_other_orientation = False while True: # Try to find a position font = ImageFont.truetype(self.font_path, font_size) # Transpose font optionally transposed_font = ImageFont.TransposedFont( font, orientation=orientation) # Get size of resulting text box_size = draw.textsize(word, font=transposed_font) # Find possible places using integral image: result = occupancy.sample_position(box_size[1] + self.margin, box_size[0] + self.margin, random_state) # Either we found a place or font-size went too small if result is not None or font_size < self.min_font_size: break # If we didn't find a place, make font smaller, but first try to rotate! if not tried_other_orientation and self.prefer_horizontal < 1: orientation = (Image.ROTATE_90 if orientation is None else Image.ROTATE_90) tried_other_orientation = True # Make font smaller else: font_size -= self.font_step orientation = None # Case we were unable to draw any more if font_size < self.min_font_size: break # Define position of the text x, y = np.array(result) + self.margin // 2 # Actually draw the text draw.text((y, x), word, fill="white", font=transposed_font) positions.append((x, y)) orientations.append(orientation) font_sizes.append(font_size) # Color according to POS tag colors.append(color_pos_tag(tag)) # colors.append(self.color_func(word, font_size=font_size, # position=(x, y), # orientation=orientation, # random_state=random_state, # font_path=self.font_path)) # recompute integral image if self.mask is None: img_array = np.asarray(img_grey) else: img_array = np.asarray(img_grey) + boolean_mask # Recompute bottom right # The order of the cumsum's is important for speed ?! occupancy.update(img_array, x, y) last_score = score # Set layout self.layout_ = list( zip([(word, score) for (word, tag), score in scores_norm], font_sizes, positions, orientations, colors)) # Return object itself return self
# bounds.add_sprite(mask_sprite, 0, 0) # 颜色映射 color_mask = Image.open('color_mask.png').resize((width, height)) # 计算四叉树 sprites = [] for (word, size) in words: sprite = Sprite() sprite.text = word sprite.font_size = int(math.sqrt(size) * 4) if sprite.font_size < 10: sprite.font_size = 10 font = ImageFont.truetype(font_file, sprite.font_size) font = ImageFont.TransposedFont(font) size = font.getsize(word) # 绘制字符 img_txt = Image.new('L', (size[0] + 2, size[1] + 2)) # 留边距, 简化运算 draw_txt = ImageDraw.Draw(img_txt) draw_txt.text((1,1), word, font=font, fill=255) # 留边距, 简化运算 # 随机角度旋转 sprite.rotate = random.randint(-45, 45) img_txt = img_txt.rotate(sprite.rotate, resample=Image.NEAREST, expand=1) sprite.img = img_txt sprite.build_tree() sprites.append(sprite)
if not ranks_only: font_size = min(font_size, int(100 * np.log(count + 100))) while True: try: # try to find a position font = ImageFont.truetype(font_path, font_size) except IOError: fontfile = FONT_PATH.rsplit('/', 1)[-1] raise IOError("Font '%s' not found. Please change 'FONT_PATH' " "to a valid font file path." % fontfile) # transpose font optionally if random.random() < prefer_horiz: orientation = None else: orientation = Image.ROTATE_90 transposed_font = ImageFont.TransposedFont(font, orientation=orientation) draw.setfont(transposed_font) # get size of resulting text box_size = draw.textsize(word) # find possible places using integral image: result = query_integral_image(integral, box_size[1] + margin, box_size[0] + margin) if result is not None or font_size == 0: break # if we didn't find a place, make font smaller font_size -= 1 if font_size == 0: # we were unable to draw any more break
def generate_from_frequencies(self, frequencies, max_font_size=None): frequencies = sorted(frequencies.items(), key=item1, reverse=True) if len(frequencies) <= 0: raise ValueError("We need at least 1 word to plot a word cloud, " "got %d." % len(frequencies)) frequencies = frequencies[:self.max_words] # largest entry will be 1 max_frequency = float(frequencies[0][1]) frequencies = [(word, freq / max_frequency) for word, freq in frequencies] if self.random_state is not None: random_state = self.random_state else: random_state = Random() if self.mask is not None: mask = self.mask width = mask.shape[1] height = mask.shape[0] if mask.dtype.kind == 'f': warnings.warn("mask image should be unsigned byte between 0" " and 255. Got a float array") if mask.ndim == 2: boolean_mask = mask == 255 elif mask.ndim == 3: # if all channels are white, mask out boolean_mask = np.all(mask[:, :, :3] == 255, axis=-1) else: raise ValueError("Got mask of invalid shape: %s" % str(mask.shape)) else: boolean_mask = None height, width = self.height, self.width # 核心调用IntegralOccupancyMap,建立占用对象occupancy # 参数:height背景区域高度(像素数),width背景区域宽度(像素数),boolean_mask是否是前景像素(黑色255)的bool数组 occupancy = IntegralOccupancyMap(height, width, boolean_mask) # create 灰度image img_grey = Image.new("L", (width, height)) draw = ImageDraw.Draw(img_grey) #建立图像生成对象 img_array = np.asarray(img_grey) # 初始化词云内部区域布局信息列表(显示字体类型、字体大小、显示位置、旋转角度、显示颜色) # 增加了字体类型 font_types, font_sizes, positions, orientations, colors = [], [], [], [], [] last_freq = 1. if max_font_size is None: # if not provided use default font_size max_font_size = self.max_font_size if max_font_size is None: # figure out a good font size by trying to draw with # just the first two words if len(frequencies) == 1: # we only have one word. We make it big! font_size = self.height else: # 只生成两个词 self.generate_from_frequencies(dict(frequencies[:2]), max_font_size=self.height) # find font sizes sizes = [x[2] for x in self.layout_] font_size = int(2 * sizes[0] * sizes[1] / (sizes[0] + sizes[1])) else: font_size = max_font_size # we set self.words_ here because we called generate_from_frequencies # above... hurray for good design? # 修改为可重复文字列表 self.words_ = dict(frequencies) # start drawing grey image # 修改部分 fonts_len = len(self.font_paths) # 可重复性汉字或英文单词和频率的元组列表[(word1,freq1),...,(wordm,freqm)] my_frequencies = [] # 对每个词循环 for word, freq in frequencies: # select the font size # rs为词频和字体大小的关联性系数 rs = self.relative_scaling if rs != 0: font_size = int(round((rs * (freq / float(last_freq)) + (1 - rs)) * font_size)) #self.prefer_horizontal为词语水平方向排版出现的频率,默认 0.9 if random_state.random() < self.prefer_horizontal: orientation = None else: orientation = Image.ROTATE_90 tried_other_orientation = False # 随机选取一种字体 n = random_state.randint(0, fonts_len-1) # 只保留"_"前面的汉字或英文 word = word.split("_")[0] while True: # try to find a position font = ImageFont.truetype(self.font_paths[n], font_size) # transpose font optionally transposed_font = ImageFont.TransposedFont( font, orientation=orientation) # get size of resulting text # 返回一个两元素的元组,是给定字符串像素意义上的size box_size = draw.textsize(word, font=transposed_font) # find possible places using integral image: # 返回值result是一个二元组坐标位置(x,y) result = occupancy.sample_position(box_size[1] + self.margin, box_size[0] + self.margin, random_state) # if result is not None or font_size < self.min_font_size: # either we found a place or font-size went too small break # if we didn't find a place, make font smaller # but first try to rotate! if not tried_other_orientation and self.prefer_horizontal < 1: orientation = (Image.ROTATE_90 if orientation is None else Image.ROTATE_90) tried_other_orientation = True else: font_size -= self.font_step orientation = None if font_size < self.min_font_size: # we were unable to draw any more break x, y = np.array(result) + self.margin // 2 # actually draw the text draw.text((y, x), word, fill="white", font=transposed_font) positions.append((x, y)) orientations.append(orientation) font_types.append(n) font_sizes.append(font_size) # 修改font_path=self.font_paths[n] colors.append(self.color_func(word, font_size=font_size, position=(x, y), orientation=orientation, random_state=random_state, font_path=self.font_paths[n])) # 添加选项 my_frequencies.append((word, freq)) # recompute integral image if self.mask is None: img_array = np.asarray(img_grey) else: img_array = np.asarray(img_grey) + boolean_mask # recompute bottom right # the order of the cumsum's is important for speed ?! # 更新self.integral属性 occupancy.update(img_array, x, y) last_freq = freq #记录上一次处理的词频 self.layout_ = list(zip(my_frequencies, font_types, font_sizes, positions, orientations, colors)) return self
def generate_from_frequencies(self, frequencies, debug=False, max_font_size=None): # noqa: C901 """Create a word_cloud from words and frequencies. Parameters ---------- frequencies : dict from string to float A contains words and associated frequency. max_font_size : int Use this font-size instead of self.max_font_size Returns ------- self """ # make sure frequencies are sorted and normalized frequencies = sorted(frequencies.items(), key=itemgetter(1), reverse=True) if len(frequencies) <= 0: raise ValueError("We need at least 1 word to plot a word cloud, " "got %d." % len(frequencies)) frequencies = frequencies[:self.max_words] # largest entry will be 1 max_frequency = float(frequencies[0][1]) frequencies = [(word, freq / max_frequency) for word, freq in frequencies] if self.random_state is not None: random_state = self.random_state else: random_state = Random() if self.mask is not None: boolean_mask = self._get_bolean_mask(self.mask) width = self.mask.shape[1] height = self.mask.shape[0] else: boolean_mask = None height, width = self.height, self.width occupancy = IntegralOccupancyMap(height, width, boolean_mask) # create image img_grey = Image.new("L", (width, height)) draw = ImageDraw.Draw(img_grey) img_array = np.asarray(img_grey) font_sizes, positions, orientations, colors = [], [], [], [] if(debug): img_debug = Image.new("RGBA", (width, height)) #modified debug_draw = ImageDraw.Draw(img_debug) last_freq = 1. if max_font_size is None: # if not provided use default font_size max_font_size = self.max_font_size if max_font_size is None: # figure out a good font size by trying to draw with # just the first two words if len(frequencies) == 1: # we only have one word. We make it big! font_size = self.height else: self.generate_from_frequencies(dict(frequencies[:2]), max_font_size=self.height*self.font_size_mod) # find font sizes sizes = [x[1] for x in self.layout_] try: font_size = int(2 * sizes[0] * sizes[1] / (sizes[0] + sizes[1])) # quick fix for if self.layout_ contains less than 2 values # on very small images it can be empty except IndexError: try: font_size = sizes[0] except IndexError: raise ValueError( "Couldn't find space to draw. Either the Canvas size" " is too small or too much of the image is masked " "out.") else: font_size = max_font_size # we set self.words_ here because we called generate_from_frequencies # above... hurray for good design? self.words_ = dict(frequencies) if self.repeat and len(frequencies) < self.max_words: # pad frequencies with repeating words. times_extend = int(np.ceil(self.max_words / len(frequencies))) - 1 # get smallest frequency frequencies_org = list(frequencies) downweight = frequencies[-1][1] for i in range(times_extend): frequencies.extend([(word, freq * downweight ** (i + 1)) for word, freq in frequencies_org]) # start drawing grey image for word, freq in frequencies: if freq == 0: continue m = self.emoji_regex.search(word) emoji = m != None #modified size = (0, 0) mask = 0 if(emoji): size = self.get_emoji_size(m.group(1)) size = (size[0] / max(*size), size[1] / max(*size)) name = self.download_emoji(m.group(1)) #extract alpha channel to use as a mask img = Image.open(name) mask = Image.new("RGBA", img.size, (255, 255, 255, 0)) img.load() mask.putalpha(img.split()[3]) # select the font size rs = self.relative_scaling if rs != 0: font_size = int(round((rs * (freq / float(last_freq)) + (1 - rs)) * font_size)) if random_state.random() < self.prefer_horizontal: orientation = None else: orientation = Image.ROTATE_90 tried_other_orientation = False while True: # try to find a position mult = 1.0 if(len(word) == 1): mult = 0.5 font = ImageFont.truetype(self.font_path, int(font_size*mult)) # transpose font optionally transposed_font = ImageFont.TransposedFont( font, orientation=orientation) # get size of resulting text if(emoji): if(orientation != None and self.rotate_emoji): size = (size[1], size[0]) #if(emoji): # font_size = min(font_size, min(height/3, width/3)) #cap the emoji height box_size = (int(font_size*size[0]*mult), int(font_size*size[1]*mult)) #scale size by dimension ratio if (not emoji): x, y = draw.textsize(word, font=transposed_font) o_x, o_y = font.getoffset(word) if(orientation != None): o_x, o_y = o_y, o_x box_size = (x - o_x, y - o_y) # find possible places using integral image: result = occupancy.sample_position(box_size[1] + self.margin, box_size[0] + self.margin, random_state) if result is not None: if(debug): o = self.margin // 2 x, y = np.array(result) + 1 p = ((y-o, x-o), (y + box_size[0] + 2*o, x + box_size[1] + 2*o)) debug_draw.rectangle(p, outline="red") # either we found a place or font-size went too small break if font_size < self.min_font_size: break # if we didn't find a place, make font smaller # but first try to rotate! if not tried_other_orientation and self.prefer_horizontal < 1: orientation = (Image.ROTATE_90 if orientation is None else Image.ROTATE_90) tried_other_orientation = True else: font_size -= self.font_step orientation = None if font_size < self.min_font_size: # we were unable to draw any more break x, y = np.array(result) + 1 + self.margin // 2 # actually draw the text if(emoji): #modified if(orientation != None and self.rotate_emoji): #rotate mask first because size has already been swapped mask = mask.rotate(90, expand=True) mask = mask.resize((int(box_size[0]), int(box_size[1])), Image.NEAREST) img_grey.paste(mask, (y, x), mask) else: draw.text((y, x), word, fill="white", font=transposed_font, stroke_width=int(font_size * mult * self.outline_mult)) positions.append((x, y)) orientations.append(orientation) font_sizes.append(int(font_size*mult)) colors.append(self.color_func(word, font_size=int(font_size*mult), position=(x, y), orientation=orientation, random_state=random_state, font_path=self.font_path)) # recompute integral image if self.mask is None: img_array = np.asarray(img_grey) else: img_array = np.asarray(img_grey) + boolean_mask # recompute bottom right # the order of the cumsum's is important for speed ?! occupancy.update(img_array, x, y) last_freq = freq self.layout_ = list(zip(frequencies, font_sizes, positions, orientations, colors)) if(debug): #writes the mask to debug.png (slow) #this is what word cloud uses to determine where to place text i = Image.new("RGBA", (occupancy.width, occupancy.height), (0, 0, 0, 255)) #modified for x in range(0, occupancy.width): for y in range(0, occupancy.height): a = int(max(min(occupancy.query_position(x, y), 255), 0)) if(a != 0): i.putpixel((x, y), (255, 255, 255, 255)) Image.alpha_composite(i, img_debug).save("debug.png") return self
def make_wordcloud(words, counts, fname="words.png", font_path=None, width=400, height=200, margin=16): """Build word cloud using word counts, store in image. Parameters ---------- words : numpy array of strings Words that will be drawn in the image. counts : numpy array of word counts Word counts or weighting of words. Determines the size of the word in the final image. Will be normalized to lie between zero and one. font_path : sting Font path to the font that will be used. Defaults to DroidSansMono path. fname : sting Output filename. Extension determins image type (written with PIL). width : int (default=400) Width of the word cloud image. height : int (default=200) Height of the word cloud image. Notes ----- Larger Images with make the code significantly slower. If you need a large image, you can try running the algorithm at a lower resolution and then drawing the result at the desired resolution. In the current form it actually just uses the rank of the counts, i.e. the relative differences don't matter. Play with setting the font_size in the main loop vor differnt styles. Colors are used completely at random. Currently the colors are sampled from HSV space with a fixed S and V. Adjusting the percentages at the very end gives differnt color ranges. Obviously you can also set all at random - haven't tried that. """ if len(counts) <= 0: print("We need at least 1 word to plot a word cloud, got %d." % len(counts)) if font_path is None: font_path = FONT_PATH # normalize counts counts = counts / float(counts.max()) # sort words by counts inds = np.argsort(counts)[::-1] counts = counts[inds] words = words[inds] # create image img_grey = Image.new("L", (width, height)) draw = ImageDraw.Draw(img_grey) integral = np.zeros((height, width), dtype=np.uint32) img_array = np.asarray(img_grey) font_sizes, positions, orientations = [], [], [] # intitiallize font size "large enough" font_size = 1000 # start drawing grey image for word, count in zip(words, counts): # alternative way to set the font size #font_size = min(font_size, int(100 * np.log(count + 100))) while True: # try to find a position font = ImageFont.truetype(font_path, font_size) # transpose font optionally orientation = random.choice([None, Image.ROTATE_90]) transposed_font = ImageFont.TransposedFont(font, orientation=orientation) draw.setfont(transposed_font) # get size of resulting text box_size = draw.textsize(word) # find possible places using integral image: result = query_integral_image(integral, box_size[1] + margin, box_size[0] + margin) if result is not None or font_size == 0: break # if we didn't find a place, make font smaller font_size -= 1 if font_size == 0: # we were unable to draw any more break x, y = np.array(result) + margin // 2 # actually draw the text draw.text((y, x), word, fill="white") positions.append((x, y)) orientations.append(orientation) font_sizes.append(font_size) # recompute integral image img_array = np.asarray(img_grey) # recompute bottom right # the order of the cumsum's is important for speed ?! partial_integral = np.cumsum(np.cumsum(img_array[x:, y:], axis=1), axis=0) # paste recomputed part into old image # if x or y is zero it is a bit annoying if x > 0: if y > 0: partial_integral += (integral[x - 1, y:] - integral[x - 1, y - 1]) else: partial_integral += integral[x - 1, y:] if y > 0: partial_integral += integral[x:, y - 1][:, np.newaxis] integral[x:, y:] = partial_integral # redraw in color img = Image.new("RGB", (width, height), "white") draw = ImageDraw.Draw(img) everything = zip(words, font_sizes, positions, orientations) e_length = len(everything) hues = np.round(np.linspace(1, 255 - e_length, e_length)).astype(int) for i, (word, font_size, position, orientation) in enumerate(everything): font = ImageFont.truetype(font_path, font_size) # transpose font optionally transposed_font = ImageFont.TransposedFont(font, orientation=orientation) draw.setfont(transposed_font) draw.text((position[1], position[0]), word, fill="hsl(%d" % hues[i] + ", 55%, 35%)") # img.save(fname) return img
def draw(self): #### create image bwimg = Image.new("L", (self.width, self.height)) draw = ImageDraw.Draw(bwimg) integral = np.zeros((self.height, self.width), dtype=np.uint32) img_array = np.asarray(bwimg) font_sizes, positions, orientations = [], [], [] font_size = 1000 # start drawing grey image for word, count in zip(self.words, self.counts): # alternative way to set the font size while True: # try to find a position font = ImageFont.truetype(self.font_path, font_size) # transpose font optionally orientation = random.choice([None, Image.ROTATE_90]) transposed_font = ImageFont.TransposedFont( font, orientation=orientation) draw.setfont(transposed_font) # get size of resulting text box_size = draw.textsize(word) # find possible places using integral image: result = query_integral_image(integral, box_size[1] + self.margin, box_size[0] + self.margin) if result is not None or font_size == 0: break # if we didn't find a place, make font smaller font_size -= 1 if font_size == 0: # we were unable to draw any more break x, y = np.array(result) + self.margin // 2 # actually draw the text draw.text((y, x), word, fill="white") positions.append((x, y)) orientations.append(orientation) font_sizes.append(font_size) # recompute integral image img_array = np.asarray(bwimg) # recompute bottom right # the order of the cumsum's is important for speed ?! partial_integral = np.cumsum(np.cumsum(img_array[x:, y:], axis=1), axis=0) # paste recomputed part into old image # if x or y is zero it is a bit annoying if x > 0: if y > 0: partial_integral += (integral[x - 1, y:] - integral[x - 1, y - 1]) else: partial_integral += integral[x - 1, y:] if y > 0: partial_integral += integral[x:, y - 1][:, np.newaxis] integral[x:, y:] = partial_integral # redraw in color img = Image.new("RGB", (self.width, self.height)) draw = ImageDraw.Draw(img) everything = zip(self.words, font_sizes, positions, orientations) for word, font_size, position, orientation in everything: font = ImageFont.truetype(self.font_path, font_size) transposed_font = ImageFont.TransposedFont(font, orientation=orientation) draw.setfont(transposed_font) draw.text((position[1], position[0]), word, fill="hsl(%d" % random.randint(0, 255) + ", 80%, 50%)") #img.show() return img
def generate_from_frequencies(self, frequencies, max_font_size=None, min_font_size=None): # noqa: C901 """Create a word_cloud from words and frequencies. Parameters ---------- frequencies : dict from string to float A contains words and associated frequency. max_font_size : int Use this font-size instead of self.max_font_size Returns ------- self """ # make sure frequencies are sorted and normalized frequencies = sorted(frequencies.items(), key=itemgetter(1), reverse=True) if len(frequencies) <= 0: raise ValueError("We need at least 1 word to plot a word cloud, " "got %d." % len(frequencies)) frequencies = frequencies[:self.max_words] # largest entry will be 1 max_frequency = float(frequencies[0][1]) frequencies = [(word, freq / max_frequency) for word, freq in frequencies] if self.random_state is not None: random_state = self.random_state else: random_state = Random() if self.mask is not None: boolean_mask = self._get_bolean_mask(self.mask) width = self.mask.shape[1] height = self.mask.shape[0] else: boolean_mask = None height, width = self.height, self.width occupancy = IntegralOccupancyMap(height, width, boolean_mask) # create image img_grey = Image.new("L", (width, height)) draw = ImageDraw.Draw(img_grey) img_array = np.asarray(img_grey) wordlist, font_sizes, positions, orientations, colors, bounding_boxes = [], [], [], [], [], [] last_freq = 1. if max_font_size is None: max_font_size = self.max_font_size if min_font_size is None: max_font_size = self.min_font_size if max_font_size is None: # figure out a good font size by trying to draw with # just the first two words if len(frequencies) == 1: # we only have one word. We make it big! font_size = self.height else: self.generate_from_frequencies(dict(frequencies[:2]), max_font_size=self.height, min_font_size=1) # find font sizes sizes = [x[1] for x in self.layout_] try: font_size = int(2 * sizes[0] * sizes[1] / (sizes[0] + sizes[1])) # quick fix for if self.layout_ contains less than 2 values # on very small images it can be empty except IndexError: try: font_size = sizes[0] except IndexError: raise ValueError( "Couldn't find space to draw. Either the Canvas size" " is too small or too much of the image is masked " "out.") else: font_size = max_font_size # we set self.words_ here because we called generate_from_frequencies # above... hurray for good design? self.words_ = dict(frequencies) if self.repeat and len(frequencies) < self.max_words: # pad frequencies with repeating words. times_extend = int(np.ceil(self.max_words / len(frequencies))) - 1 # get smallest frequency frequencies_org = list(frequencies) downweight = frequencies[-1][1] for i in range(times_extend): frequencies.extend([(word, freq * downweight ** (i + 1)) for word, freq in frequencies_org]) # start drawing grey image for word, freq in frequencies: if freq == 0: continue # select the font size rs = self.relative_scaling if rs != 0: font_size = int(round((rs * (freq / float(last_freq))+ (1 - rs)) * (min_font_size+freq*(max_font_size-min_font_size)))) else: font_size = int(min_font_size+freq*(max_font_size-min_font_size)) if random_state.random() < self.prefer_horizontal: orientation = None else: orientation = Image.ROTATE_90 tried_other_orientation = False # try to find a position while True: font = ImageFont.truetype(self.font_path, font_size) # transpose font optionally transposed_font = ImageFont.TransposedFont( font, orientation=orientation) # get size of resulting text # box_size = draw.textsize(word, font=transposed_font) box_size = (len(word)*font_size, font_size) # find possible places using integral image: result = occupancy.sample_position(box_size[1] + self.margin, box_size[0] + self.margin, random_state) if result is not None or font_size < self.min_font_size: # either we found a place or font-size went too small break # if we didn't find a place, make font smaller # but first try to rotate! if not tried_other_orientation and self.prefer_horizontal < 1: orientation = (Image.ROTATE_90 if orientation is None else Image.ROTATE_90) tried_other_orientation = True else: font_size -= self.font_step orientation = None if font_size < self.min_font_size: # we were unable to draw any more continue x, y = np.array(result) + self.margin // 2 # actually draw the text draw.multiline_text((y, x), word, fill="white", font=transposed_font) positions.append((x, y)) orientations.append(orientation) font_sizes.append(font_size) bounding_boxes.append(box_size) wordlist.append((word, freq)) colors.append(self.color_func(word, font_size=font_size, position=(x, y), orientation=orientation, random_state=random_state, font_path=self.font_path)) # recompute integral image if self.mask is None: img_array = np.asarray(img_grey) else: img_array = np.asarray(img_grey) + boolean_mask # recompute bottom right # the order of the cumsum's is important for speed ?! occupancy.update(img_array, x, y) last_freq = freq self.layout_ = list(zip(wordlist, font_sizes, positions, bounding_boxes, orientations, colors)) return self
def get_wordCloud(self, freq): sorted_freq = sorted(freq.items(), key=lambda kv: kv[1], reverse=True) #image = Image.open("background.png").convert('RGBA') img = Image.new("RGBA", (self.width, self.height), color="white").convert('RGBA') draw = ImageDraw.Draw(img) orientation = Image.ROTATE_90 # initial size size = self.height // 4 * 2 # initial grd grid = np.zeros((self.width, self.height)) integral_grid = np.zeros((self.width + 1, self.height + 1)) random_count = 50 random.seed(42) # loop thru words for test, value in sorted_freq: # randomly choose a color color = (random.randint(0, 225), random.randint(0, 225), random.randint(0, 225)) # font initial size font = ImageFont.truetype('Roboto-Bold.ttf', size=size) # random offset (x, y) = (random.randint(0, self.width - 1), random.randint(0, self.height - 1)) transposed = False ascent, descent = font.getmetrics() (w, baseline), (offset_x, offset_y) = font.font.getsize(test) print(w, baseline, offset_x, offset_y) # finds the right size while (True): text_size = font.getsize(test) if (text_size[0] * text_size[1] * 4 > self.width * self.height): size -= 1 font = ImageFont.truetype('Roboto-Bold.ttf', size=size) continue # boolean to quit loop new_size_working = False x_sum = 0 y_sum = 0 (width, baseline), (offset_x, offset_y) = font.font.getsize(test) # check initial font is okay for char in test: char_size = font.getsize(char) x_sum += char_size[0] y_sum = max(y_sum, char_size[1]) if (x_sum + x < self.width and y + y_sum < self.height and self.check_grid( integral_grid, font, x, y, offset_y, test)): new_size_working = True break ############# HORITZONAL FONT ########################## for i in range(10): (x, y) = (random.randint(0, self.width - 1), random.randint(0, self.height - 1)) # check right bottom is in range and no intersection x_sum = 0 y_sum = 0 for char in test: char_size = font.getsize(char) x_sum += char_size[0] y_sum = max(y_sum, char_size[1]) if (x_sum + x < self.width and y + y_sum < self.height and self.check_grid( integral_grid, font, x, y, offset_y, test)): new_size_working = True break # if horizontal work, then fill text if (new_size_working): break ############# VERTICAL FONT ############################### font = ImageFont.TransposedFont(font, orientation=orientation) for i in range(random_count): (x, y) = (random.randint(0, self.width - 1), random.randint(0, self.height - 1)) # check right bottom is in range and no intersection if (font.getsize(test)[0] + x <= self.width and font.getsize(test)[1] + y <= self.height and self.check_grid_vertical( integral_grid, font, x, y, offset_y, test)): new_size_working = True transposed = True break # decrase size if neither work if (new_size_working == False): size -= 1 font = ImageFont.truetype('Roboto-Bold.ttf', size=size) else: break # mark the occupied blocks if (transposed == False): tx = x for char in test: (_, _), (_, offset_y_) = font.font.getsize(char) for i in range(font.getsize(char)[0]): for j in range(font.getsize(char)[1] - offset_y_): grid[i + tx][y + offset_y_ + j] = 1. tx += font.getsize(char)[0] print(test, font.getsize(test)[0], font.getsize(test)[0] + x, font.getsize(test)[1], font.getsize(test)[1] + y) # draw text draw.text((x, y), test, fill=color, font=font) else: ty = y (_, _), (_, offset_y_) = font.font.font.getsize(test) for char in test[::-1]: (_, _), (_, offset_y_local) = font.font.font.getsize(char) # print(test, char, w,b,offset_x_, offset_y_) for i in range(offset_y_local - offset_y_, font.getsize(char)[0] - offset_y_): for j in range(font.getsize(char)[1]): grid[i + x][ty + j] = 1. ty += font.getsize(char)[1] # for i in range(font.getsize(test)[0]-offset_y): # for j in range(font.getsize(test)[1]): # grid[i+x][j+y] = 1. print(test, font.getsize(test)[0], font.getsize(test)[0] + x, font.getsize(test)[1], font.getsize(test)[1] + y) draw.text((x, y), test, fill=color, font=font) for i in range(x + 1, self.width + 1): for j in range(y + 1, self.height + 1): integral_grid[i][j] = grid[i - 1][ j - 1] + integral_grid[i][j - 1] + integral_grid[ i - 1][j] - integral_grid[i - 1][j - 1] # pixels = img.load() # for i in range(img.size[0]): # for j in range(img.size[1]): # if (grid[i][j] == 0): # pixels[i,j] = (0, 0, 0) # show image return img
def generate_from_frequencies(self, frequencies): """Create a word_cloud from words and frequencies. Parameters ---------- frequencies : array of tuples A tuple contains the word and its frequency. Returns ------- self """ # make sure frequencies are sorted and normalized frequencies = sorted(frequencies, key=item1, reverse=True) frequencies = frequencies[:self.max_words] # largest entry will be 1 max_frequency = float(frequencies[0][1]) frequencies = [(word, freq / max_frequency) for word, freq in frequencies] self.words_ = frequencies if self.random_state is not None: random_state = self.random_state else: random_state = Random() if len(frequencies) <= 0: print("We need at least 1 word to plot a word cloud, got %d." % len(frequencies)) if self.mask is not None: mask = self.mask width = mask.shape[1] height = mask.shape[0] if mask.dtype.kind == 'f': warnings.warn( "mask image should be unsigned byte between 0 and" " 255. Got a float array") if mask.ndim == 2: boolean_mask = mask == 255 elif mask.ndim == 3: # if all channels are white, mask out boolean_mask = np.all(mask[:, :, :3] == 255, axis=-1) else: raise ValueError("Got mask of invalid shape: %s" % str(mask.shape)) else: boolean_mask = None height, width = self.height, self.width occupancy = IntegralOccupancyMap(height, width, boolean_mask) # create image img_grey = Image.new("L", (width, height)) draw = ImageDraw.Draw(img_grey) img_array = np.asarray(img_grey) font_sizes, positions, orientations, colors = [], [], [], [] font_size = self.max_font_size last_freq = 1. # start drawing grey image for word, freq in frequencies: # select the font size rs = self.relative_scaling if rs != 0: font_size = int( round((rs * (freq / float(last_freq)) + (1 - rs)) * font_size)) while True: # try to find a position font = ImageFont.truetype(self.font_path, font_size) # transpose font optionally if random_state.random() < self.prefer_horizontal: orientation = None else: orientation = Image.ROTATE_90 transposed_font = ImageFont.TransposedFont( font, orientation=orientation) # get size of resulting text box_size = draw.textsize(word, font=transposed_font) # find possible places using integral image: result = occupancy.sample_position(box_size[1] + self.margin, box_size[0] + self.margin, random_state) if result is not None or font_size == 0: break # if we didn't find a place, make font smaller font_size -= self.font_step if font_size < self.min_font_size: # we were unable to draw any more break x, y = np.array(result) + self.margin // 2 # actually draw the text draw.text((y, x), word, fill="white", font=transposed_font) positions.append((x, y)) orientations.append(orientation) font_sizes.append(font_size) colors.append( self.color_func(word, font_size=font_size, position=(x, y), orientation=orientation, random_state=random_state, font_path=self.font_path)) # recompute integral image if self.mask is None: img_array = np.asarray(img_grey) else: img_array = np.asarray(img_grey) + boolean_mask # recompute bottom right # the order of the cumsum's is important for speed ?! occupancy.update(img_array, x, y) last_freq = freq self.layout_ = list( zip(frequencies, font_sizes, positions, orientations, colors)) return self
def fit_words(words, font_path=None, width=80, height=40, margin=2, prefer_horiz=0.90, scale=5, file_name=None): """Generate the positions for words. Parameters ---------- words : array of tuples A tuple contains the word and its frequency. font_path : string Font path to the font that will be used (OTF or TTF). Defaults to DroidSansMono path, but you might not have it. width : int (default=400) Width of the canvas. height : int (default=200) Height of the canvas. margin: int(default=2) prefer_horiz : float (default=0.90) The ratio of times to try horizontal fitting as opposed to vertical. scale : int( default=5) this number is used to scale the font size in case of the font is too small. Notes ----- """ if len(words) <= 0: print("We need at least 1 word to plot a word cloud, got %d." % len(words)) if font_path is None: font_path = get_default_font() if not os.path.exists(font_path): raise ValueError("The font %s does not exist." % font_path) # create image img_grey = Image.new("L", (width, height)) draw = ImageDraw.Draw(img_grey) valid_words, font_sizes, positions, orientations = [], [], [], [] #sort the words by weight sum_weight = sum(weight for word, weight in words) words = [(word, weight * 1.0 / sum_weight) for word, weight in words] # start drawing grey image for word, weight in sorted(words, key=lambda x: x[1], reverse=True): # alternative way to set the font size integral = np.asarray(img_grey) font_size = int((weight * height * scale)) font = ImageFont.truetype(font_path, font_size) box_size, orientation = select_orintation(font_size, font_path, (width, height), word, margin, draw, font) # find possible places using integral image: result = query_integral_image( integral, (box_size[0] + margin, box_size[1] + margin)) if result is None: break if debug >= 1: print('font_size', font_size, word, weight, 'orientation:', orientation, 'pos:', result, 'box_size:', box_size) x, y = np.array(result) + margin // 2 #need to reset the font transposed_font = ImageFont.TransposedFont(font, orientation=orientation) draw.setfont(transposed_font) draw.text((y, x), word, fill="white") # store the information valid_words.append((word, weight)) positions.append((x, y)) orientations.append(orientation) font_sizes.append(font_size) fill_rate = 1.0 * (integral != 0).sum() / (integral.shape[0] * integral.shape[1]) show_rate = len(valid_words) * 1.0 / len(words) score = show_rate * fill_rate if debug >= 3: print(zip(valid_words, font_sizes, positions, orientations)) print('size:', len(valid_words), 'all:', len(words)) if debug >= 1: print('integral sum:', (integral != 0).sum(), 'show_rate:', show_rate, 'fille_rate:', fill_rate, 'score:', score) return zip(valid_words, font_sizes, positions, orientations), score, fill_rate, show_rate
layout[0][4] = dct[0][0] layout[0][5] = frequencies[0] pixels = set_pixels(pixels, w, h, X // 2 - 1 - w // 2, Y // 2 - 1 - h // 2, 0) for i in range(1, len(dct)): flag = 0 txt = dct[i][0] font_size = dct[i][1] #font_size=10*dct[i][1]+25 if font_size <= 25: font_size = font_size + 15 fnt = ImageFont.truetype("FORTE.ttf", font_size) w, h = get_text_dimensions(txt, fnt) hit = 0 flag = 0 if i % 4 == 0: fnt = ImageFont.TransposedFont(fnt, orientation=Image.ROTATE_90) while flag != 1: x_coordinate, y_coordinate = get_x_and_y(0, X, Y) if (x_coordinate + h) < X and (y_coordinate + w) < Y: flag = check_if_possible_rotate(pixels, x_coordinate, y_coordinate, h, w) if hit > 1000: flag = 1 break hit += 1 if flag == 1: draw.text((x_coordinate, y_coordinate), txt, fill=generate_random_color(), font=fnt) layout[i][0] = x_coordinate
def generate_word_cloud(self, num_terms_to_visualize=50, margin=5): ''' In simple terms, we size each term proportional to its weight and then try to find a spot on the canvas where it fits. We first build a black & white image and then redraw in color, as this is faster than calculating and drawing in color the first time. ''' # check length of term dict -- must be GT 0 if len(self.weighted_terms) <= 0: print "List of terms must contain at least one term and weight." print "The current list contains {0} terms.".format( self.weighted_terms) return 0 # make sure all weights are 0 <= weight <= 1 for term, weight in self.weighted_terms: if weight < 0 or 1 < weight: print "All weights must be between 0 and 1." print "Term '{0}' has weight {1}.".format(term, weight) return 0 # reduce the term list down to length num_terms_to_visualize if num_terms_to_visualize < len(self.weighted_terms): term_list = self.weighted_terms[:num_terms_to_visualize] else: term_list = self.weighted_terms # sort term list by weights term_list.sort(key=lambda pair: pair[1], reverse=True) # create black&white image black_white_image = Image.new("L", (self.image_width, self.image_height)) draw = ImageDraw.Draw(black_white_image) integral = numpy.zeros((self.image_height, self.image_width), dtype=numpy.uint32) image_array = numpy.asarray(black_white_image) font_sizes, term_positions, term_orientations = [], [], [] # set font_size to "large enough" value font_size = 200 for term, weight in term_list: font_size = min(font_size, int(100 * numpy.log(weight + 100))) while True: font = ImageFont.truetype(FONT_PATH, font_size) # optionally rotate orientation orientation = random.choice([None, Image.ROTATE_90]) transposed_font = ImageFont.TransposedFont(font, orientation=orientation) draw.setfont(transposed_font) # get size of box for current term term_box_size = draw.textsize(term) # query_integral_image to get possible places for current term location_result = self.query_integral_image(integral, term_box_size[1] + margin, term_box_size[0] + margin) # if there are results or font_size hits 0, we're done if location_result is not None or font_size == 0: break else: font_size -= 1 # if font_size hits 0, we cannot draw anymore if font_size == 0: break # set x and y coords for placing term and then draw it x_coord, y_coord = numpy.array(location_result) + margin // 2 draw.text((y_coord, x_coord), term, fill="white") term_positions.append((x_coord, y_coord)) term_orientations.append(orientation) font_sizes.append(font_size) image_array = numpy.asarray(black_white_image) temp_sum = numpy.cumsum(image_array[x_coord:, y_coord:], axis=1) partial_integral = numpy.cumsum(temp_sum, axis=0) # paste recomputed part into old image if x_coord > 0: if y_coord > 0: partial_integral += (integral[x_coord-1, y_coord:] - integral[x_coord-1, y_coord-1]) else: partial_integral += integral[x_coord-1, y_coord:] if y_coord > 0: partial_integral += integral[x_coord:, y_coord-1][:, numpy.newaxis] integral[x_coord:, y_coord:] = partial_integral # now redraw entire image in color color_image = Image.new("RGB", (self.image_width, self.image_height), "white") color_draw = ImageDraw.Draw(color_image) # build a list of big tuples with all the needed info for each term terms = [term for term, weight in term_list] term_data = zip(terms, font_sizes, term_positions, term_orientations) for term, font_size, term_position, term_orientation in term_data: font = ImageFont.truetype(FONT_PATH, font_size) transposed_font = ImageFont.TransposedFont(font, orientation=term_orientation) color_draw.setfont(transposed_font) color_draw.text((term_position[1], term_position[0]), term, fill="hsl(%d" % random.randint(0, 255) + ", 80%, 50%)") #black_white_image = ImageOps.invert(black_white_image) # display image #black_white_image.show() color_image.show() # save image to file #black_white_image.save(self.output_filename) color_image.save(self.output_filename)
def generate_from_frequencies(self, frequencies, max_font_size=None): """Create a word_cloud from words and frequencies. Parameters ---------- frequencies : dict from string to float A contains words and associated frequency. max_font_size : int Use this font-size instead of self.max_font_size Returns ------- self """ # make sure frequencies are sorted and normalized frequencies = sorted(frequencies.items(), key=itemgetter(1), reverse=True) if len(frequencies) <= 0: raise ValueError("We need at least 1 word to plot a word cloud, " "got %d." % len(frequencies)) frequencies = frequencies[:self.max_words] # largest entry will be 1 max_frequency = float(frequencies[0][1]) frequencies = [(word, freq / max_frequency) for word, freq in frequencies] if self.random_state is not None: random_state = self.random_state else: random_state = Random() if self.mask is not None: mask = self.mask width = mask.shape[1] height = mask.shape[0] if mask.dtype.kind == 'f': warnings.warn("mask image should be unsigned byte between 0" " and 255. Got a float array") if mask.ndim == 2: boolean_mask = mask == 255 elif mask.ndim == 3: # if all channels are white, mask out boolean_mask = np.all(mask[:, :, :3] == 255, axis=-1) else: raise ValueError("Got mask of invalid shape: %s" % str(mask.shape)) else: boolean_mask = None height, width = self.height, self.width occupancy = IntegralOccupancyMap(height, width, boolean_mask) # create image img_grey = Image.new("L", (width, height)) draw = ImageDraw.Draw(img_grey) img_array = np.asarray(img_grey) font_sizes, positions, orientations, colors = [], [], [], [] last_freq = 1. if max_font_size is None: # if not provided use default font_size max_font_size = self.max_font_size if max_font_size is None: # figure out a good font size by trying to draw with # just the first two words if len(frequencies) == 1: # we only have one word. We make it big! font_size = self.height else: self.generate_from_frequencies(dict(frequencies[:2]), max_font_size=self.height) # find font sizes sizes = [x[1] for x in self.layout_] try: font_size = int(2 * sizes[0] * sizes[1] / (sizes[0] + sizes[1])) # quick fix for if self.layout_ contains less than 2 values # on very small images it can be empty except IndexError: try: font_size = sizes[0] except IndexError: raise ValueError('canvas size is too small') else: font_size = max_font_size # we set self.words_ here because we called generate_from_frequencies # above... hurray for good design? self.words_ = dict(frequencies) # start drawing grey image for word, freq in frequencies: # select the font size rs = self.relative_scaling if rs != 0: font_size = int( round((rs * (freq / float(last_freq)) + (1 - rs)) * font_size)) if random_state.random() < self.prefer_horizontal: orientation = None else: orientation = Image.ROTATE_90 tried_other_orientation = False while True: # try to find a position font = ImageFont.truetype(self.font_path, font_size) # transpose font optionally transposed_font = ImageFont.TransposedFont( font, orientation=orientation) # get size of resulting text box_size = draw.textsize(word, font=transposed_font) # find possible places using integral image: result = occupancy.sample_position(box_size[1] + self.margin, box_size[0] + self.margin, random_state) if result is not None or font_size < self.min_font_size: # either we found a place or font-size went too small break # if we didn't find a place, make font smaller # but first try to rotate! if not tried_other_orientation and self.prefer_horizontal < 1: orientation = (Image.ROTATE_90 if orientation is None else Image.ROTATE_90) tried_other_orientation = True else: font_size -= self.font_step orientation = None if font_size < self.min_font_size: # we were unable to draw any more break x, y = np.array(result) + self.margin // 2 # actually draw the text draw.text((y, x), word, fill="white", font=transposed_font) positions.append((x, y)) orientations.append(orientation) font_sizes.append(font_size) colors.append( self.color_func(word, font_size=font_size, position=(x, y), orientation=orientation, random_state=random_state, font_path=self.font_path)) # recompute integral image if self.mask is None: img_array = np.asarray(img_grey) else: img_array = np.asarray(img_grey) + boolean_mask # recompute bottom right # the order of the cumsum's is important for speed ?! occupancy.update(img_array, x, y) last_freq = freq self.layout_ = list( zip(frequencies, font_sizes, positions, orientations, colors)) return self
def generate_from_frequencies(self, frequencies): """Create a word_cloud from words and frequencies. Parameters ---------- frequencies : array of tuples A tuple contains the word, its frequency, and a tuple of its ocean scores in OCEAN order Returns ------- self """ # make sure frequencies are sorted and normalized frequencies = sorted(frequencies, key=item1, reverse=True) frequencies = frequencies[:self.max_words] # largest entry will be 1 for freq and ocean scores max_frequency = float(frequencies[0][1]) max_ope = max(frequencies, key=itemgetter(2))[2] max_con = max(frequencies, key=itemgetter(3))[3] max_ext = max(frequencies, key=itemgetter(4))[4] max_agr = max(frequencies, key=itemgetter(5))[5] max_neu = max(frequencies, key=itemgetter(6))[6] frequencies = [ (word, freq / max_frequency, ope / max_ope, con / max_con, ext / max_ext, agr / max_agr, neu / max_neu) for word, freq, ope, con, ext, agr, neu in frequencies ] self.words_ = frequencies # variabled used to find actual largest font size max_actual_size = 0 # gives self a random state object for random generation if self.random_state is not None: random_state = self.random_state else: random_state = Random() # checks frequency array size if len(frequencies) <= 0: print("We need at least 1 word to plot a word cloud, got %d." % len(frequencies)) # if there is a mask, set the attributes if self.mask is not None: mask = self.mask width = mask.shape[1] height = mask.shape[0] if mask.dtype.kind == 'f': warnings.warn( "mask image should be unsigned byte between 0 and" " 255. Got a float array") if mask.ndim == 2: boolean_mask = mask == 255 elif mask.ndim == 3: # if all channels are white, mask out boolean_mask = np.all(mask[:, :, :3] == 255, axis=-1) else: raise ValueError("Got mask of invalid shape: %s" % str(mask.shape)) # reset the max font size font_size = height * 1 / 4 else: boolean_mask = None height, width = self.height, self.width font_size = self.max_font_size occupancy = IntegralOccupancyMap(height, width, boolean_mask) # create image img_grey = Image.new("L", (width, height)) draw = ImageDraw.Draw(img_grey) img_array = np.asarray(img_grey) font_sizes, positions, orientations, colors, font_indecies, oceans = [], [], [], [], [], [] last_freq = 1. # start drawing grey image for word, freq, ope, con, ext, agr, neu in frequencies: # select the font size rs = self.relative_scaling if rs != 0: # relative size heuristics. might not want to mess with this? font_size = int( round((rs * (freq / float(last_freq)) + (1 - rs)) * font_size)) # try to find a position while True: # set max actual size if len(font_sizes) > 0 and len(font_sizes) < 2: max_actual_size = font_sizes[0] # font size is in the middle, make it smaller # this check will be ignored if max_actual_size is 0 aka not set yet # this check will also always fail if upper_font_filter is 0 aka not set if font_size < max_actual_size * self.upper_font_filter \ and font_size > max_actual_size * self.lower_font_filter: font_size = max_actual_size * self.lower_font_filter # randomize a font path to use based on bold_font_threshold # if len(font_sizes) is 0 or font_size > max_actual_size * self.bold_font_threshold: # # use bold fonts # font_index = random_state.randint(0, self.bold_fonts_index) # else: # # use normal fonts # font_index = random_state.randint(self.bold_fonts_index + 1, len(self.font_paths) - 1) font_index = random_state.randint(0, len(self.font_paths) - 1) font = ImageFont.truetype(self.font_paths[font_index], font_size) # transpose font optionally if random_state.random() < self.prefer_horizontal: orientation = None else: orientation = Image.ROTATE_90 transposed_font = ImageFont.TransposedFont( font, orientation=orientation) # get size of resulting text box_size = draw.textsize(word, font=transposed_font) # find possible places using integral image: result = occupancy.sample_position(box_size[1] + self.margin, box_size[0] + self.margin, random_state) if result is not None or font_size == 0: break # if we didn't find a place, make font smaller font_size -= self.font_step if font_size < self.min_font_size: # we were unable to draw any more break # if len(font_sizes) > 0 and font_size > font_sizes[0] * 2 / 3 and \ # random_state.random() < self.random_noise: # font_size *= 2 # if len(font_sizes) < 5: # font_size *= 2 x, y = np.array(result) + self.margin // 2 # actually draw the text draw.text((y, x), word, fill="white", font=transposed_font) positions.append((x, y)) orientations.append(orientation) font_sizes.append(font_size) font_indecies.append(font_index) ocean = (ope, con, ext, agr, neu) oceans.append(ocean) colors.append( self.color_func(word, font_size=font_size, position=(x, y), orientation=orientation, random_state=random_state, font_path=self.font_paths, width=width, height=height, ocean=ocean, personality_score=self.personality_score, pos_score=self.pos_score, neg_score=self.neg_score)) # recompute integral image if self.mask is None: img_array = np.asarray(img_grey) else: img_array = np.asarray(img_grey) + boolean_mask # recompute bottom right # the order of the cumsum's is important for speed ?! occupancy.update(img_array, x, y) last_freq = freq # print max_actual_size self.layout_ = list( zip(frequencies, font_sizes, positions, orientations, font_indecies, oceans, colors)) return self