def test_generate_from_frequencies(): # test that generate_from_frequencies() takes input argument dicts wc = TextCloud(max_words=50) words = wc.process_text(THIS) result = wc.generate_from_frequencies(words) assert isinstance(result, TextCloud)
def test_zero_frequencies(): word_cloud = TextCloud() word_cloud.generate_from_frequencies({'test': 1, 'test1': 0, 'test2': 0}) assert len(word_cloud.layout_) == 1 assert word_cloud.layout_[0][0][0] == 'test'
def test_coloring_black_works(): # check that using black colors works. mask = np.zeros((50, 50, 3)) image_colors = ImageColorGenerator(mask) wc = TextCloud(width=50, height=50, random_state=42, color_func=image_colors, min_font_size=1) wc.generate(THIS)
def test_recolor_too_small(): # check exception is raised when image is too small colouring = np.array(Image.new('RGB', size=(20, 20))) wc = TextCloud(width=30, height=30, random_state=0, min_font_size=1).generate(THIS) image_colors = ImageColorGenerator(colouring) with pytest.raises(ValueError, match='ImageColorGenerator is smaller than the canvas'): wc.recolor(color_func=image_colors)
def test_process_text(): # test that process function returns a dict wc = TextCloud(max_words=50) result = wc.process_text(THIS) # check for proper return type assert isinstance(result, dict)
def test_collocation_stopwords(): wc = TextCloud(collocations=True, stopwords={"you", "very"}, collocation_threshold=9) wc.generate(STOPWORDED_COLLOCATIONS) assert "thank you" not in wc.words_ assert "very much" not in wc.words_ assert "thank" in wc.words_ # a bigram of all stopwords will be removed assert "you very" not in wc.words_
def make_cloud(self): docs=self.articledb.return_doc() proc_obj=DocProc() doc_string=" ".join(docs) doc_vector=[doc_string] tokenized_doc_vector=proc_obj.tokenize(doc_vector) final_doc_vector=proc_obj.remove_stopwords(tokenized_doc_vector) final_text=" ".join(final_doc_vector[0]) txt_cl=TextCloud(final_text) txt_cl.make_cloud()
def test_plural_stopwords(): x = '''was was was was was was was was was was was was was was was wa hello hello hello hello hello hello hello hello goodbye good bye maybe yes no''' w = TextCloud().generate(x) assert w.words_['wa'] < 1 w = TextCloud(collocations=False).generate(x) assert w.words_['wa'] < 1
def test_collocation_stopwords_uppercase(): wc = TextCloud(collocations=True, stopwords={"thank", "hi", "there"}, collocation_threshold=9) wc.generate(STOPWORDED_COLLOCATIONS_UPPERCASE) assert "Thank you" not in wc.words_ assert "thank you" not in wc.words_ assert "Thank" not in wc.words_ # a bigram of all stopwords will be removed assert "hi There" not in wc.words_ assert "Hi there" not in wc.words_ assert "Hi There" not in wc.words_
def makeImage(text): alice_mask = np.array(Image.open("alice_mask.png")) wc = TextCloud(background_color="white", max_words=1000, mask=alice_mask) # generate word cloud wc.generate_from_frequencies(text) # show plt.imshow(wc, interpolation="bilinear") plt.axis("off") plt.show()
def test_multiple_s(): text = 'flo flos floss flosss' wc = TextCloud(stopwords=[]).generate(text) assert "flo" in wc.words_ assert "flos" not in wc.words_ assert "floss" in wc.words_ assert "flosss" in wc.words_ # not normalizing means that the one with just one s is kept wc = TextCloud(stopwords=[], normalize_plurals=False).generate(text) assert "flo" in wc.words_ assert "flos" in wc.words_ assert "floss" in wc.words_ assert "flosss" in wc.words_
def test_random_state(): # check that random state makes everything deterministic wc = TextCloud(random_state=0) wc2 = TextCloud(random_state=0) wc.generate(THIS) wc2.generate(THIS) assert_array_equal(wc, wc2)
def test_repeat(): short_text = "Some short text" wc = TextCloud(stopwords=[]).generate(short_text) assert len(wc.layout_) == 3 wc = TextCloud(max_words=50, stopwords=[], repeat=True).generate(short_text) # multiple of word count larger than max_words assert len(wc.layout_) == 51 # relative scaling doesn't work well with repeat assert wc.relative_scaling == 0 # all frequencies are 1 assert len(wc.words_) == 3 assert_array_equal(list(wc.words_.values()), 1) frequencies = [w[0][1] for w in wc.layout_] assert_array_equal(frequencies, 1) repetition_text = "Some short text with text" wc = TextCloud(max_words=52, stopwords=[], repeat=True) wc.generate(repetition_text) assert len(wc.words_) == 4 # normalized frequencies assert wc.words_['text'] == 1 assert wc.words_['with'] == .5 assert len(wc.layout_), wc.max_words frequencies = [w[0][1] for w in wc.layout_] # check that frequencies are sorted assert np.all(np.diff(frequencies) <= 0)
def test_empty_text(): # test originally empty text raises an exception wc = TextCloud(stopwords=[]) with pytest.raises(ValueError): wc.generate('') # test empty-after-filtering text raises an exception wc = TextCloud(stopwords=['a', 'b']) with pytest.raises(ValueError): wc.generate('a b a')
def test_collocations(): wc = TextCloud(collocations=False, stopwords=set()) wc.generate(THIS) wc2 = TextCloud(collocations=True, stopwords=set()) wc2.generate(THIS) assert "is better" in wc2.words_ assert "is better" not in wc.words_ assert "way may" not in wc2.words_
def test_plurals_numbers(): text = THIS + "\n" + "1 idea 2 ideas three ideas although many Ideas" wc = TextCloud(stopwords=[]).generate(text) # not capitalized usually assert "Ideas" not in wc.words_ # plural removed assert "ideas" not in wc.words_ # usually capitalized assert "although" not in wc.words_ assert "idea" in wc.words_ assert "Although" in wc.words_ assert "better than" in wc.words_
def test_unicode_stopwords(): wc_unicode = TextCloud(stopwords=[u'Beautiful']) try: words_unicode = wc_unicode.process_text(unicode(THIS)) except NameError: # PY3 words_unicode = wc_unicode.process_text(THIS) wc_str = TextCloud(stopwords=['Beautiful']) words_str = wc_str.process_text(str(THIS)) assert words_unicode == words_str
def test_writing_to_file(tmpdir): wc = TextCloud() wc.generate(THIS) # check writing to file filename = str(tmpdir.join("word_cloud.png")) wc.to_file(filename) loaded_image = Image.open(filename) assert loaded_image.size == (wc.width, wc.height)
def test_process_text_default_patterns(): wc = TextCloud(stopwords=set(), include_numbers=True, min_word_length=2) words = wc.process_text(THIS) wc2 = TextCloud(stopwords=set(), include_numbers=True, min_word_length=1) words2 = wc2.process_text(THIS) assert "a" not in words assert "3" not in words assert "a" in words2 assert "3" in words2
def test_check_errors(): wc = TextCloud() with pytest.raises(NotImplementedError): wc.to_html() try: np.array(wc) raise AssertionError("np.array(wc) didn't raise") except ValueError as e: assert "call generate" in str(e) try: wc.recolor() raise AssertionError("wc.recolor didn't raise") except ValueError as e: assert "call generate" in str(e)
def gen_cloud(): #Load teammember details a_file = open(STRENGTHPATH+TEAMMEMBERNAME+'.txt', "r") list_of_sentences = [(line.strip()) for line in a_file] a_file.close() #Generate Strength Sentence Frequency List strength_frequency = {} count = 30 iter_num = 0 for sentence in list_of_sentences: iter_num+=1 strength_frequency[sentence] = count count = 22 if iter_num == 1 else 12 if iter_num ==2 else 7 if iter_num ==3 else 4 if iter_num == 4 else 7-iter_num if count < 2: count=1 #Select Cloud Shape (mask) #sentences = len(list_of_sentences) #maskname = 'jetfighter.jpg' if sentences > 25 else 'diamond.jpg' if sentences > 20 else 'oval.jpg' if sentences > 15 else 'diamond.jpg' custom_mask = np.array(Image.open(CLOUDMASK)) #Generate Cloud textcloud = TextCloud(width=1000, height=1000, background_color="rgba(255, 255, 255, 0)", mode = 'RGBA', min_font_size=20, max_font_size=400, font_path= FONT, repeat = True, colormap='Set3', # color_func=partial(palette_color_func, palette=5) mask=custom_mask).generate(strength_frequency) #save to file strength_cloud=np.array(textcloud) cv2.imwrite(OFOLDERPATH+TEAMMEMBERNAME+'_cloud.png', strength_cloud)
def test_default(): # test that default word cloud creation and conversions work wc = TextCloud(max_words=50) wc.generate(THIS) # check for proper word extraction assert len(wc.words_) == wc.max_words # check that we got enough words assert len(wc.layout_) == wc.max_words # check image export wc_image = wc.to_image() assert wc_image.size == (wc.width, wc.height) # check that numpy conversion works wc_array = np.array(wc) assert_array_equal(wc_array, wc.to_array()) # check size assert wc_array.shape == (wc.height, wc.width, 3)
# subsample by factor of 3. Very lossy but for a wordcloud we don't really care. parrot_color = parrot_color[::3, ::3] # create mask white is "masked out" parrot_mask = parrot_color.copy() parrot_mask[parrot_mask.sum(axis=2) == 0] = 255 # some finesse: we enforce boundaries between colors so they get less washed out. # For that we do some edge detection in the image edges = np.mean([gaussian_gradient_magnitude(parrot_color[:, :, i] / 255., 2) for i in range(3)], axis=0) parrot_mask[edges > .08] = 255 # create wordcloud. A bit sluggish, you can subsample more strongly for quicker rendering # relative_scaling=0 means the frequencies in the data are reflected less # acurately but it makes a better picture wc = TextCloud(max_words=2000, mask=parrot_mask, max_font_size=40, random_state=42, relative_scaling=0) # generate word cloud wc.generate(text) plt.imshow(wc) # create coloring from image image_colors = ImageColorGenerator(parrot_color) wc.recolor(color_func=image_colors) plt.figure(figsize=(10, 10)) plt.imshow(wc, interpolation="bilinear") wc.to_file("parrot_new.png") plt.figure(figsize=(10, 10)) plt.title("Original Image") plt.imshow(parrot_color)
def test_mask_contour(): # test mask contour is created, learn more at: # https://github.com/amueller/word_cloud/pull/348#issuecomment-370883873 mask = np.zeros((234, 456), dtype=np.int) mask[100:150, 300:400] = 255 sm = TextCloud(mask=mask, contour_width=1, contour_color='blue') sm.generate(THIS) sm_array = np.array(sm) sm_total = sm_array[100:150, 300:400].sum() lg = TextCloud(mask=mask, contour_width=20, contour_color='blue') lg.generate(THIS) lg_array = np.array(lg) lg_total = lg_array[100:150, 300:400].sum() sc = TextCloud(mask=mask, contour_width=1, scale=2, contour_color='blue') sc.generate(THIS) sc_array = np.array(sc) sc_total = sc_array[100:150, 300:400].sum() # test `contour_width` assert lg_total > sm_total # test contour varies with `scale` assert sc_total > sm_total # test `contour_color` assert all(sm_array[100, 300] == [0, 0, 255])
def test_relative_scaling_zero(): # non-regression test for non-integer font size wc = TextCloud(relative_scaling=0) wc.generate(THIS)
def test_process_text_regexp_parameter(): # test that word processing is influenced by `regexp` wc = TextCloud(max_words=50, regexp=r'\w{5}') words = wc.process_text(THIS) assert 'than' not in words
def test_include_numbers(): wc_numbers = TextCloud(include_numbers=True) wc = wc_numbers.process_text(THIS) assert '14' in wc.keys()
def test_min_word_length(): wc_numbers = TextCloud(min_word_length=5) wc = wc_numbers.process_text(THIS) word_lengths = [len(word) for word in wc.keys()] assert min(word_lengths) == 5
def test_recolor_too_small_set_default(): # check no exception is raised when default colour is used colouring = np.array(Image.new('RGB', size=(20, 20))) wc = TextCloud(max_words=50, width=30, height=30, min_font_size=1).generate(THIS) image_colors = ImageColorGenerator(colouring, default_color=(0, 0, 0)) wc.recolor(color_func=image_colors)
def test_small_canvas(): # check font size fallback works on small canvas wc = TextCloud(max_words=50, width=21, height=21) wc.generate(SMALL_CANVAS) assert len(wc.layout_) > 0
def test_tiny_canvas(): # check exception if canvas too small for fallback w = TextCloud(max_words=50, width=1, height=1) with pytest.raises(ValueError, match="Couldn't find space to draw"): w.generate(THIS) assert len(w.layout_) == 0