def make_tag_cloud(data, can_be_noun_arg, process_option='freqs'): stop_words = sw.words() process_f = { 'concatenate': lambda : concatenate(data, can_be_noun_arg, stop_words), 'freqs': lambda : freq_weight(data, can_be_noun_arg, stop_words), 'race' : lambda : race_tfidf(data, can_be_noun_arg, stop_words) } freqs = process_f[process_option]() if type(freqs) == type([]): freqs = freqs[:30] # normalize freqs in case they are counts sum_freqs = np.sum(x for _,x in freqs) freqs = [(w, np.float(f)/sum_freqs) for w,f in freqs] #pprint(freqs) #return tags = make_tags(freqs, maxsize=80) fname = 'noun_last_words_{}.png'.format(process_option) if not can_be_noun_arg: fname = 'not_'+fname create_tag_image(tags, fname, size=(900, 600), fontname='Lobster') elif type(freqs)==type({}): for k in freqs: top_freqs = freqs[k][:30] # normalize sum_freqs = np.sum(x for _,x in top_freqs) top_freqs = [(w, np.float(f)/sum_freqs) for w,f in top_freqs] print top_freqs tags = make_tags(top_freqs, maxsize=15) fname = 'noun_last_words_{}_{}.png'.format(process_option,k) create_tag_image(tags, fname, size=(900, 600), fontname='Lobster')
def wordcloud(self,OVERALLTEXT,NEGATIVETEXT,POSITIVETEXT,test_output, font): #Constants overalltext = open(OVERALLTEXT, 'r') negativetext = open(NEGATIVETEXT, 'r') positivetext = open(POSITIVETEXT, 'r') #Overall tags = make_tags(get_tag_counts(overalltext.read())[:50],maxsize=90, minsize = 15) for layout in LAYOUTS: create_tag_image(tags,os.path.join(test_output, 'Overall_%s.png' % layout), size=(900,600), background=(255, 255, 255), layout = layout, fontname = font, rectangular=True) #Negative tags = make_tags(get_tag_counts(negativetext.read())[:50], maxsize=90,minsize = 15, colors=COLOR_SCHEMES['audacity']) for layout in LAYOUTS: create_tag_image(tags, os.path.join(test_output, 'negative_%s.png' % layout), size=(900,600), background=(205, 50, 50), layout=layout, fontname = font) #Positive tags = make_tags(get_tag_counts(positivetext.read())[:50], maxsize=120, minsize = 25, colors=COLOR_SCHEMES['oldschool']) for layout in LAYOUTS: create_tag_image(tags, os.path.join(test_output, 'positive_%s.png' % layout), size=(900,600), background=(0, 255, 15), layout=layout, fontname = font)
def make_tag_cloud(data, can_be_noun_arg, process_option='freqs'): stop_words = sw.words() process_f = { 'concatenate': lambda: concatenate(data, can_be_noun_arg, stop_words), 'freqs': lambda: freq_weight(data, can_be_noun_arg, stop_words), 'race': lambda: race_tfidf(data, can_be_noun_arg, stop_words) } freqs = process_f[process_option]() if type(freqs) == type([]): freqs = freqs[:30] # normalize freqs in case they are counts sum_freqs = np.sum(x for _, x in freqs) freqs = [(w, np.float(f) / sum_freqs) for w, f in freqs] #pprint(freqs) #return tags = make_tags(freqs, maxsize=80) fname = 'noun_last_words_{}.png'.format(process_option) if not can_be_noun_arg: fname = 'not_' + fname create_tag_image(tags, fname, size=(900, 600), fontname='Lobster') elif type(freqs) == type({}): for k in freqs: top_freqs = freqs[k][:30] # normalize sum_freqs = np.sum(x for _, x in top_freqs) top_freqs = [(w, np.float(f) / sum_freqs) for w, f in top_freqs] print top_freqs tags = make_tags(top_freqs, maxsize=15) fname = 'noun_last_words_{}_{}.png'.format(process_option, k) create_tag_image(tags, fname, size=(900, 600), fontname='Lobster')
def run(textpath): text = open(textpath, 'r') start = time.time() taglist = get_tag_counts(text.read().decode('utf8')) cleantaglist = process_tags(taglist) tags = make_tags(taglist[0:100], colors=COLOR_MAP) create_tag_image(tags, 'cloud.png', size=(1280, 900), background=(0, 0, 0 , 255), layout=LAYOUT_MOST_HORIZONTAL, crop=False, fontname='Cuprum', fontzoom=2) tags2 = make_tags(cleantaglist[0:100], colors=COLOR_MAP) create_tag_image(tags2, 'rcloud.png', size=(1280, 900), background=(0, 0, 0, 255), layout=LAYOUT_MOST_HORIZONTAL, crop=False, fontname='Cuprum', fontzoom=2) print "Duration: %d sec" % (time.time() - start)
def createTagCloud(self,wordline): """ Create tag cloud image """ wordstream = [] if wordline == '': return False wordsTokens = WhitespaceTokenizer().tokenize(wordline) wordsTokens.remove(wordsTokens[0]) wordstream.append(' '.join(wordsTokens)) wordstream = ' '.join(wordstream) thresh = self.wordCount colorS = self.colorSchemes[self.color] tags = make_tags(get_tag_counts(wordstream)[:thresh],\ minsize=3, maxsize=40,\ colors = COLOR_SCHEMES[colorS]) create_tag_image(tags, self.png,\ size=(960, 400),\ background=(255, 255, 255, 255),\ layout= LAYOUT_HORIZONTAL,\ fontname='Neuton') return True
def action(counts): tags = make_tags(counts, minsize=15, maxsize=120) create_tag_image(tags, 'weibo_liu.png', background=(0, 0, 0, 0), size=(1200, 1200), fontname="simhei")
def plot(game_name, game_id): dict = {} comments = DbUtil.getAllResult("select * from comment where game_id = %s" % game_id) for comment in comments: result = jieba.analyse.extract_tags(comment[2], topK=3) for word in result: if len(word) < 2: continue elif word in stop: continue if word not in dict: dict[word] = 1 else: dict[word] += 1 print(dict) swd = sorted(dict.items(), key=itemgetter(1), reverse=True) swd = swd[1:50] tags = make_tags(swd, minsize=30, maxsize=120, colors=random.choice(list(COLOR_SCHEMES.values()))) create_tag_image(tags, 'c:/%s.png' % game_name, background=(0, 0, 0, 255), size=(900, 600), fontname='SimHei') print('having save file to dick')
def get_tag_cloud(request, region_code): # Get all tweets in the region data_zone = DataZone.objects.get(code=region_code) tweet_locations = TweetLocation.objects.filter(zone=data_zone) body_text = '' for x in tweet_locations: body_text += x.tweet.body + ' ' tc = TagCloud() body_text = tc.filter_body(body_text) if body_text.strip() == '': body_text = "Region Empty" tags = make_tags(get_tag_counts(body_text)[:50], maxsize=50, colors=COLOR_SCHEMES['audacity']) data = create_html_data(tags, (560,450), layout=LAYOUT_HORIZONTAL, fontname='PT Sans Regular') context = {} tags_template = '<li class="cnt" style="top: %(top)dpx; left: %(left)dpx; height: %(height)dpx;"><a class="tag %(cls)s" href="#%(tag)s" style="top: %(top)dpx;\ left: %(left)dpx; font-size: %(size)dpx; height: %(height)dpx; line-height:%(lh)dpx;">%(tag)s</a></li>' context['tags'] = ''.join([tags_template % link for link in data['links']]) context['width'] = data['size'][0] context['height'] = data['size'][1] context['css'] = "".join("a.%(cname)s{color:%(normal)s;}a.%(cname)s:hover{color:%(hover)s;}" % {'cname':k, 'normal': v[0], 'hover': v[1]} for k,v in data['css'].items()) return render_to_response('tag_cloud.html', {'tags': context['tags'], 'css': context['css']})
def word_cloud(final_object, cloud_object): import re from pytagcloud.lang.stopwords import StopWords from operator import itemgetter final_object = [x for x in final_object if x != "no_object"] counted = {} for word in final_object: if len(word) > 1: if counted.has_key(word): counted[word] += 1 else: counted[word] = 1 #print len(counted) counts = sorted(counted.iteritems(), key=itemgetter(1), reverse=True) print "Total count of Word Cloud List Items: ",counts #type(counts) words = make_tags(counts, maxsize=100) print "Word Cloud List items: ", words create_tag_image(words, 'cloud_1_All_Objects.png', size=(1280, 900), fontname='Lobster') width = 1280 height = 800 layout = 3 background_color = (255, 255, 255)
def plot(game_name, game_id): dict = {} comments = DbUtil.getAllResult("select * from comment where game_id = %s" % game_id) for comment in comments: result = jieba.analyse.extract_tags(comment[2], topK=3) for word in result: if len(word) < 2: continue elif word in stop: continue if word not in dict: dict[word] = 1 else: dict[word] += 1 print(dict) swd = sorted(dict.items(), key=itemgetter(1), reverse=True) swd = swd[1:50] tags = make_tags(swd, minsize=30, maxsize=120, colors=random.choice(list(COLOR_SCHEMES.values()))) create_tag_image(tags, 'c:/%s.png' % game_name, background=(0, 0, 0, 255), size=(900, 600), fontname='SimHei') print('having save file to dick')
def _create_image(self, text): tag_counts = get_tag_counts(text) if tag_counts is None: sys.exit(-1) if self._repeat_tags: expanded_tag_counts = [] for tag in tag_counts: expanded_tag_counts.append((tag[0], 5)) for tag in tag_counts: expanded_tag_counts.append((tag[0], 2)) for tag in tag_counts: expanded_tag_counts.append((tag[0], 1)) tag_counts = expanded_tag_counts tags = make_tags(tag_counts, maxsize=150, colors=self._color_scheme) path = os.path.join('/tmp/cloud_large.png') if Gdk.Screen.height() < Gdk.Screen.width(): height = Gdk.Screen.height() width = int(height * 4 / 3) else: width = Gdk.Screen.width() height = int(width * 3 / 4) if self._font_name is not None: create_tag_image(tags, path, layout=self._layout, size=(width, height), fontname=self._font_name) else: create_tag_image(tags, path, layout=self._layout, size=(width, height)) return 0
def make_wordcloud(self,*total): t_list = list(total) total_list=t_list[0] temp_list=[] #print(total_list) for i in total_list: if i ==0: temp_list.append(1*300) else: temp_list.append(i*300) #total_list=[0, 16.0, 5.0, 5.5, 0, 0, 0] cloud_catagory = [] ##rating 합산 별로 값 반복하기. (word cloud 빈도수를 기준으로 단어 크기를 띄어주는 방법을 이용하기위하여) for i in range(1, int(temp_list[0])): cloud_catagory.append("농작물 경작") for i in range(1, int(temp_list[1])): cloud_catagory.append("공예(만들기)") for i in range(1, int(temp_list[2])): cloud_catagory.append("음식체험") for i in range(1, int(temp_list[3])): cloud_catagory.append("전통문화") for i in range(1, int(temp_list[4])): cloud_catagory.append("자연생태") for i in range(1, int(temp_list[5])): cloud_catagory.append("건강레포츠") for i in range(1, int(temp_list[6])): cloud_catagory.append("산·어촌 생활") # 각 카테고리별 빈도 count를 dictionary 형태로 만들어줌 ex '산·어촌 생활': 9, count = Counter(cloud_catagory) tags = count.most_common(7) taglist = pytagcloud.make_tags(tags, maxsize=45) pytagcloud.create_tag_image(taglist, 'wordcloud.jpg', size=(500, 200), fontname='Noto Sans CJK', layout=pytagcloud.LAYOUT_MOST_HORIZONTAL)
def finance_cloud(tag): tags = make_tags(get_tag_counts(tag), maxsize=100) create_tag_image(tags, "cloud.png", size=(1280, 800), background=(0, 0, 0, 255), fontname="SimHei")
def generate_html(self, word_freq_tup_list, search_term): """ this function generates html file depicting word cloud word_freq_tup is passed by the caller :param word_freq_tup_list: :return: generated word cloud html text """ tags = make_tags(word_freq_tup_list) # print tags cloud_html = create_html_data(tags[:100], (500, 500), layout=LAYOUT_HORIZONTAL, fontname='PT Sans Regular') temp_path = os.path.abspath( os.path.join(os.path.dirname(__file__), '..', '..', '..', 'templates', 'template.html')) template_file = codecs.open(temp_path, mode='r', encoding='utf-8') html_template = Template(template_file.read()) context = {} # TODO: change href attribute in a tag to give link to data display tags_template = '<li class="cnt" style="top: %(top)dpx; left: %(left)dpx; height: %(height)dpx;">' \ '<a class="tag %(cls)s" href="#%(tag)s" style="top: %(top)dpx;\ left: %(left)dpx; font-size: %(size)dpx; height: %(height)dpx; line-height:%(lh)dpx;">%(tag)s</a></li>' context['tags'] = ''.join( [tags_template % link for link in cloud_html['links']]) context['width'] = cloud_html['size'][0] context['height'] = cloud_html['size'][1] context['css'] = "".join("a.%(cname)s{color:%(normal)s;}\ a.%(cname)s:hover{color:%(hover)s;}" % { 'cname': k, 'normal': v[0], 'hover': v[1] } for k, v in cloud_html['css'].items()) context['mycss'] = "\ #word_f { \ height: 300px; \ -webkit-column-count: 5; \ -moz-column-count: 5; \ column-count: 5; \ } \ #word_f li { \ display: block; \ }\ #word_f li a { \ color: rgb(0, 162, 232); \ }" context['word_freq'] = "".join("<li> %(key)s => %(val)s </li>" % { 'key': t[0], 'val': str(t[1]) } for t in word_freq_tup_list) context[ 'page_title'] = 'Search Term : %s - Frequency Based Word Cloud - Top 100 words from search \ results (length(word) >= 3)' % search_term context[ 'list_title'] = '300 features extracted and their corresponding frequencies' html_text = html_template.substitute(context) return html_text
def create_cloud(oname, words,maxsize=120, fontname='Lobster'): '''Creates a word cloud (when pytagcloud is installed) Parameters ---------- oname : output filename words : list of (value,str) maxsize : int, optional Size of maximum word. The best setting for this parameter will often require some manual tuning for each input. fontname : str, optional Font to use. ''' try: from pytagcloud import create_tag_image, make_tags except ImportError: if not warned_of_error: print("Could not import pytagcloud. Skipping cloud generation") return # gensim는 각 단어에 대해 0과 1사이의 가중치를 반환하지만 # pytagcloud는 단어 수를 받는다. 그래서 큰 수를 곱한다 # gensim는 (value, word)를 반환하고 pytagcloud는 (word, value)으로 입력해야 한다 words = [(w,int(v*10000)) for v,w in words] tags = make_tags(words, maxsize=maxsize) create_tag_image(tags, oname, size=(1800, 1200), fontname=fontname)
def __generate_tag_cloud(self): import jieba.analyse jieba.add_word('氪金') jieba.add_word('逼氪') jieba.add_word('骗氪') jieba.add_word('王者荣耀') jieba.del_word('...') jieba.del_word('只能') jieba.del_word('可能') jieba.del_word('觉得') jieba.del_word('而且') jieba.del_word('然后') jieba.del_word('还有') jieba.del_word('游戏') comments_file = open(self.__comment_file_name, 'r') tags = jieba.analyse.extract_tags(comments_file.read(), topK=100, withWeight=True) comments_file.close() dd = [] for i in tags: dd.append((i[0], int(float(i[1] * 1000)))) print 'i is ', i[0], i[1] tags = make_tags(dd, minsize=10, maxsize=80, colors=COLOR_SCHEMES['audacity']) create_tag_image( tags, self.__tag_image_file_name, size=(600, 600), layout=LAYOUT_HORIZONTAL, fontname='SimHei' # !!! 注意字体需要自己设置了才会有效, 见ReadMe ) print self.__tag_image_file_name
def create_wordcloud_file(self, tags, output_file): # Get configuration parameters conf_num_tags = self.configuration['num_tags'] conf_min_tag_size = self.configuration['min_tag_size'] conf_max_tag_size = self.configuration['max_tag_size'] conf_image_size = self.configuration['image_size'] conf_font = self.configuration['font'] conf_background = self.configuration['background'] logging.info("Creating wordcloud image file: %s" % output_file) # Limit the tags to be displayed to those appearing more frequently tags = tags[:conf_num_tags] # Create the image tags = wc.make_tags(tags, minsize=conf_min_tag_size, maxsize=conf_max_tag_size) # Save image to file wc.create_tag_image(tags, output_file, size=conf_image_size, fontname=conf_font, layout=wc.LAYOUT_HORIZONTAL, background=conf_background) logging.info("Created wordcloud image file: %s" % output_file) print("Created wordcloud image file: %s" % output_file)
def test_create_html_data(self): """ HTML code sample """ tags = make_tags(get_tag_counts(self.hound.read())[:100], maxsize=120, colors=COLOR_SCHEMES['audacity']) data = create_html_data(tags, (440,600), layout=LAYOUT_HORIZONTAL, fontname='PT Sans Regular') template_file = open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'web/template.html'), 'r') html_template = Template(template_file.read()) context = {} tags_template = '<li class="cnt" style="top: %(top)dpx; left: %(left)dpx; height: %(height)dpx;"><a class="tag %(cls)s" href="#%(tag)s" style="top: %(top)dpx;\ left: %(left)dpx; font-size: %(size)dpx; height: %(height)dpx; line-height:%(lh)dpx;">%(tag)s</a></li>' context['tags'] = ''.join([tags_template % link for link in data['links']]) context['width'] = data['size'][0] context['height'] = data['size'][1] context['css'] = "".join("a.%(cname)s{color:%(normal)s;}\ a.%(cname)s:hover{color:%(hover)s;}" % {'cname':k, 'normal': v[0], 'hover': v[1]} for k,v in data['css'].items()) html_text = html_template.substitute(context) html_file = open(os.path.join(self.test_output, 'cloud.html'), 'w') html_file.write(html_text) html_file.close()
def make_pytagcloud_image(data, imagefilename, most_top_number=50): """ data 은 리스트 형태로 데이터가 들어와야 함 """ imagefile_size = (1000, 1000) max_word_size = 200 background_color = (255, 255, 255) c = collections.Counter(data) d = c.most_common(most_top_number) tags = make_tags(d, maxsize=max_word_size) (i, j) = os.path.splitext(imagefilename) vertical_image = i + "_vertical" + j horizontal_image = i + "_horizontal" + j create_tag_image(tags, imagefilename, size=imagefile_size, background=background_color, fontname='NotoSansCJKkr-Bold') create_tag_image(tags, vertical_image, size=imagefile_size, background=background_color, fontname='NotoSansCJKkr-Bold', layout=LAYOUT_VERTICAL) create_tag_image(tags, horizontal_image, size=imagefile_size, background=background_color, fontname='NotoSansCJKkr-Bold', layout=LAYOUT_HORIZONTAL)
def create_wordcloud(topic_id): word_tuples = LDA_MODEL.show_topic(topic_id, 20) # array of words with their frequencies words_arr = [] freq_arr = [] for word_tuple in word_tuples: try: word = str(word_tuple[0]) words_arr.append(word) freq_arr.append(word_tuple[1]) except: continue print words_arr normalize(freq_arr) print freq_arr # code for generating word cloud word_count = len(words_arr) text = "" counts = [] for i in range(word_count): counts.append((words_arr[i], int(freq_arr[i]*100))) for i in range(0, word_count): for j in range(0, (int)(freq_arr[i] * 100)): text = text + words_arr[i] + " " tags = make_tags(counts, minsize=20, maxsize=60, colors=COLOR_SCHEMES['audacity']) output = join(WORDCLOUD_PATH, 'cloud' + str(topic_id) + '.png') create_tag_image(tags=tags, output=output, size=(500, 333), background=(255, 255, 255, 255), layout=3, fontname='PT Sans Regular', rectangular=True)
def generate_word_cloud(counts, title): # Sort the keywords sorted_wordscount = sorted(counts.iteritems(), key=operator.itemgetter(1), reverse=True)[:20] # Generate the word cloud image create_tag_image(make_tags(sorted_wordscount, minsize=50, maxsize=150), title + '.png', size=(1300,1150), background=(0, 0, 0, 255), layout=LAYOUT_MIX, fontname='Molengo', rectangular=True)
def create_cloud(out_name, words, maxsize=120, fontname='Lobster'): """ Create a word cloud when pytagcloud is installed :param out_name: output filename :param words: list of (value,str), a gensim returns (value, word) :param maxsize: int, optional Size of maximum word. The best setting for this parameter will often require some manual tuning for each input. :param fontname: str, optional, Font to use. :return: """ try: from pytagcloud import create_tag_image, make_tags except ImportError: if not warned_error: print("Could not import pytagcloud. Skipping cloud generation!") return # gensim returns a weight between 0 and 1 for each word, while pytagcloud # expects an integer word count. So, we multiply by a large number and # round. For a visualization this is an adequate approximation. # We also need to flip the order as gensim returns (value, word), whilst # pytagcloud expects (word, value): words = [(w, int(v*10000)) for v, w in words] tags = make_tags(words, maxsize=maxsize) create_tag_image(tags, out_name, size=(1800, 1200), fontname=fontname)
def interactive_wordcloud(all_texts): ''' :param all_texts: :return: ''' flat_text = [] for text in all_texts: for word in text: flat_text.append(word) print(flat_text) counts = Counter(flat_text).items() print(counts) sorted_wordscount = sorted(counts, key=lambda tup: tup[ 1])[:200] # sort and select the top 200 words counts print(sorted_wordscount) # Running get_tag_counts result in error UnicodeDecodeError: 'charmap' codec can't decode byte 0xaa in position 90: character maps to <undefined> # This is because in file stopwords.py, that is called by counter.py (contains code for get_tag_counts), the stopwords are not read in utf-8 tags = make_tags(sorted_wordscount, maxsize=100) print('tags', tags) data = create_html_data(tags, size=(1600, 800), layout=LAYOUT_MIX, fontname='Philosopher', rectangular=True) print('data', data) # ====================================================================================================================== # Write wordcloud on HTML file # ====================================================================================================================== template_file = open( os.path.join(os.path.dirname(os.path.abspath(__file__)), 'out/template.html'), 'r') html_template = Template(template_file.read()) context = {} tags_template = '<li class="cnt" style="top: %(top)dpx; left: %(left)dpx; height: %(height)dpx;"><a class="tag %(cls)s" href="#%(tag)s" style="top: %(top)dpx;\ left: %(left)dpx; font-size: %(size)dpx; height: %(height)dpx; line-height:%(lh)dpx;">%(tag)s</a></li>' context['tags'] = ''.join([tags_template % link for link in data['links']]) context['width'] = data['size'][0] context['height'] = data['size'][1] context['css'] = "".join("a.%(cname)s{color:%(normal)s;}\ a.%(cname)s:hover{color:%(hover)s;}" % { 'cname': k, 'normal': v[0], 'hover': v[1] } for k, v in data['css'].items()) html_text = html_template.substitute(context) test_output = os.path.join(os.getcwd(), 'out') html_file = open(os.path.join(test_output, 'cloud.html'), 'w') html_file.write(html_text) html_file.close() '''
def create_cloud(oname, words, maxsize=120, fontname='Lobster'): '''Creates a word cloud (when pytagcloud is installed) Parameters ---------- oname : output filename words : list of (value,str) maxsize : int, optional Size of maximum word. The best setting for this parameter will often require some manual tuning for each input. fontname : str, optional Font to use. ''' try: from pytagcloud import create_tag_image, make_tags except ImportError: if not warned_of_error: print("Could not import pytagcloud. Skipping cloud generation") return # gensim는 각 단어에 대해 0과 1사이의 가중치를 반환하지만 # pytagcloud는 단어 수를 받는다. 그래서 큰 수를 곱한다 # gensim는 (value, word)를 반환하고 pytagcloud는 (word, value)으로 입력해야 한다 words = [(w, int(v * 10000)) for v, w in words] tags = make_tags(words, maxsize=maxsize) create_tag_image(tags, oname, size=(1800, 1200), fontname=fontname)
def wordcloud(query, layout, font, max_words, verbosity=False): my_oauth, complete_url, stop_words = twitter(query) punctuation = "#@!\"$%&'()*+,-./:;<=>?[\]^_`{|}~\'" # characters exluded from tweets my_text = '' r = requests.get(complete_url, auth=my_oauth) tweets = r.json() if verbosity == True: print tweets for tweet in tweets['statuses']: text = tweet['text'].lower() text = ''.join(ch for ch in text if ch not in punctuation) # exclude punctuation from tweets my_text += text words = my_text.split() counts = Counter(words) for word in stop_words: del counts[word] for key in counts.keys(): if len(key) < 3 or key.startswith('http'): del counts[key] final = counts.most_common(max_words) max_count = max(final, key=operator.itemgetter(1))[1] final = [(name, count / float(max_count))for name, count in final] tags = make_tags(final, maxsize=max_word_size) create_tag_image(tags, query + '.png', size=(width, height), layout=layout, fontname=font, background=background_color) print "new png created"
def create_cloud(oname, words,maxsize=120, fontname='Lobster'): '''Creates a word cloud (when pytagcloud is installed) Parameters ---------- oname : output filename words : list of (value,str) maxsize : int, optional Size of maximum word. The best setting for this parameter will often require some manual tuning for each input. fontname : str, optional Font to use. ''' try: from pytagcloud import create_tag_image, make_tags except ImportError: if not warned_of_error: print("Could not import pytagcloud. Skipping cloud generation") return # gensim returns a weight between 0 and 1 for each word, while pytagcloud # expects an integer word count. So, we multiply by a large number and # round. For a visualization this is an adequate approximation. words = [(w,int(v*10000)) for w,v in words] tags = make_tags(words, maxsize=maxsize) create_tag_image(tags, oname, size=(1800, 1200), fontname=fontname)
def make_html_data(tag_count, file_name, font_max_size=120, html_size=(900, 600)): tag_list = make_tags(tag_count, maxsize=font_max_size) data = create_html_data(tag_list, size=html_size, layout=LAYOUT_HORIZONTAL, fontname='Korean') template_file = open( os.path.join(os.path.dirname(os.path.abspath(__file__)), 'web/template.html'), 'r') html_template = Template(template_file.read()) context = {} tags_template = '<li class="cnt" style="top: %(top)dpx; left: %(left)dpx; height: %(height)dpx;"><a class="tag %(cls)s" href="#%(tag)s"\ style="top: %(top)dpx;left: %(left)dpx; font-size: %(size)dpx; height: %(height)dpx; line-height:%(lh)\ dpx;">%(tag)s</a></li>' context['tags'] = ''.join([tags_template % link for link in data['links']]) context['width'] = data['size'][0] context['height'] = data['size'][1] context['css'] = "".join("a.%(cname)s{color:%(normal)s;}\ a.%(cname)s:hover{color:%(hover)s;}" % { 'cname': k, 'normal': v[0], 'hover': v[1] } for k, v in data['css'].items()) html_text = html_template.substitute(context) html_file = open(os.path.join('../dist', file_name), 'w') html_file.write(html_text) html_file.close()
def words_check(request): # 필요한 라이브러리 및 변수 초기화 data = request.POST.get('data') komoran = Komoran() words = Counter(komoran.nouns(data)) # 1글자 단어 걸러내기 nouns = dict() for data in words.keys(): if len(data) != 1: nouns[data] = words.get(data) nouns = sorted(nouns.items(), key=lambda x: x[1], reverse=True) hashing = random.choice(range(100)) context = { 'nouns': nouns, 'hashing': hashing, } # 워드클라우드 taglist = pytagcloud.make_tags(nouns, minsize=10, maxsize=60) link = 'static/wordcloud/wordcloud' + str(hashing) + '.jpg' #link = 'static/wordcloud/wordcloud.jpg' pytagcloud.create_tag_image(taglist, link, size=(600, 600), layout=3, fontname='CookieRun', rectangular=True) return HttpResponse(json.dumps(context), content_type='application/json')
def show_token_df(): dic = _build_vocabulary(dictionary_path='../data/vocabulary_all.dict') id2token = {tokenid: token for (tokenid, token) in dic.items()} id2df = dic.dfs token2df = {id2token[tokenid]: df for (tokenid, df) in id2df.items()} df = pd.DataFrame() df['token'] = token2df.keys() df['df'] = token2df.values() print(df['df'].describe()) ''' count 125156.000000 mean 63.621824 std 858.189270 min 1.000000 25% 1.000000 50% 2.000000 75% 7.000000 max 39912.000000 ''' print({token: df for (token, df) in token2df.items() if df > 30000} ) ''' {'起诉书': 38442, '公诉': 39386, '现已': 39136, '参加': 38840, '检察员': 37974, '检': 37350, '机关': 39859, '元': 31317, '指控': 39265, '终结': 39468, '月': 39911, '证据': 37175, '年': 39912, '上述事实': 33553, '犯': 39459, '人民检察院': 39234, '号': 39814, '审理': 39629, '开庭审理': 35738, '到庭': 38301, '供述': 30093, '证实': 32083, '被告人': 39864, '提起公诉': 38118, '依法': 39123, '指派': 33070, '本案': 36616, '出庭': 34811, '支持': 35414, '公开': 38635, '中': 31875, '本院': 39852, '刑诉': 38329, '日': 39902, '诉讼': 38437} len 35 ''' print(df[(df['df'] > 3) & (df['df'] < 30000)].describe()) filter_words = {token:df for (token,df) in token2df.items() if df>5000 } print(filter_words,'len %s' % len(filter_words) ) swd = sorted(filter_words.items(), key=itemgetter(1), reverse=True) tags = make_tags(swd, minsize=10, maxsize=50, colors=COLOR_SCHEMES['goldfish']) create_tag_image(tags, 'keyword_tag_cloud4.png',size=(2400, 1000), background=(240, 255, 255), layout=LAYOUT_HORIZONTAL, fontname="SimHei")
def draw_wordcloud(self, tag, name): taglist = pytagcloud.make_tags(tag, maxsize=80) pytagcloud.create_tag_image(taglist, '%s.jpg' % name, size=(900, 600), fontname='Korean', rectangular=False)
def create_cloud(oname, words, maxsize=120, fontname='Lobster'): '''Creates a word cloud (when pytagcloud is installed) Parameters ---------- oname : output filename words : list of (value,str) maxsize : int, optional Size of maximum word. The best setting for this parameter will often require some manual tuning for each input. fontname : str, optional Font to use. ''' try: from pytagcloud import create_tag_image, make_tags except ImportError: if not warned_of_error: print("Could not import pytagcloud. Skipping cloud generation") return # gensim returns a weight between 0 and 1 for each word, while pytagcloud # expects an integer word count. So, we multiply by a large number and # round. For a visualization this is an adequate approximation. words = [(w, int(v * 10000)) for w, v in words] tags = make_tags(words, maxsize=maxsize) create_tag_image(tags, oname, size=(1800, 1200), fontname=fontname)
def make_cloud(self, output_html): keywords = KeywordManager().all() text = ' '.join([kw.keyword for kw in keywords]) if output_html: max_tags = 30 max_size = 42 else: max_tags = 100 max_size = self.maxsize tags = make_tags(get_tag_counts(text)[:max_tags], minsize=self.minsize, maxsize=max_size) if output_html: size = (900, 300) result = create_html_data(tags, size=size, layout=LAYOUT_HORIZONTAL) else: #now = datetime.utcnow() #filename = 'jcuwords/static/clouds/keyword-cloud-%s.png' % now.isoformat() cloud = self.resolver.resolve('jcuwords:keyword-cloud.png') filename = cloud.abspath() size = (1024, 500) create_tag_image(tags, filename, size=size, fontname='IM Fell DW Pica', layout=LAYOUT_MIX) image_url = self.request.resource_url(None, 'keyword-cloud.png') result = {'image': image_url} return result
def search(query_word): result = [] es = Elasticsearch() query1 = {"query": {"wildcard": {"name": {"value": "*" + query_word + "*" } } } } res = es.search(index="urban", body=query1) if res['hits']['total'] == 0: res = es.search(index="champ", body=query1) if res['hits']['total'] == 0: return 0 ret = res['hits']['hits'] temp = defaultdict(int) for item in ret: ids = item['_source']['business_id'] query2 = {"query": {"match": {"business_id": ids } } } res = es.search(index="my_data", body=query2) for item in res['hits']['hits'][0]['_source']['word_freq']: temp[item[0]] += item[1] words = [] for item in temp: words.append((item,temp[item])) tags = make_tags(words, maxsize=80) create_tag_image(tags, 'static/cloud_large.jpg', size=(900, 600), fontname='Lobster')
def create_file(res_id): all_reviews = '' api_key = 'db837d5e88fefd82d146b8e2e4e45c35' headers = { 'Content-Type': 'application/json', 'Access-Control-Allow-Origin': '*', 'user-key': api_key } url = "https://developers.zomato.com/api/v2.1/reviews?res_id=%s" % (res_id) try: response = requests.get(url, headers=headers) except: print 'Network Issues!' return if response.status_code == 200: data = response.json() count = data["reviews_count"] if count == 0: print 'No Reviews!' else: for review in data["user_reviews"]: review = review["review"] all_reviews = all_reviews + review["review_text"] + ' ' all_reviews = convert(all_reviews) tags = make_tags(get_tag_counts(all_reviews), maxsize=50, colors=COLOR_SCHEMES['goldfish']) create_tag_image(tags, 'static/img/' + res_id + '.png', size=(900, 600), fontname='Lobster') else: print 'Api Issues'
def _create_image(self, text): tag_counts = get_tag_counts(text) if tag_counts is None: sys.exit(-1) if self._repeat_tags: expanded_tag_counts = [] for tag in tag_counts: expanded_tag_counts.append((tag[0], 5)) for tag in tag_counts: expanded_tag_counts.append((tag[0], 2)) for tag in tag_counts: expanded_tag_counts.append((tag[0], 1)) tag_counts = expanded_tag_counts tags = make_tags(tag_counts, maxsize=150, colors=self._color_scheme) path = os.path.join('/tmp/cloud_large.png') if Gdk.Screen.height() < Gdk.Screen.width(): height = Gdk.Screen.height() width = int(height * 4 / 3) else: width = Gdk.Screen.width() height = int(width * 3 / 4) if self._font_name is not None: create_tag_image(tags, path, layout=self._layout, size=(width, height), fontname=self._font_name) else: create_tag_image(tags, path, layout=self._layout, size=(width, height)) return 0
def tagCloud(self): texts ="" for item in self.docSet: texts = texts +" " +item tags = make_tags(get_tag_counts(texts), maxsize=120) create_tag_image(tags,'filename.png', size=(2000,1000), background=(0, 0, 0, 255), layout=LAYOUT_MIX, fontname='Lobster', rectangular=True)
def saveWordCloud(wordinfo, filename): taglist = pytagcloud.make_tags(dict(wordinfo).items(), maxsize=80) pytagcloud.create_tag_image(taglist, filename, size=(640, 480), fontname="korean") webbrowser.open(filename)
def draw_pytagcloud(data_array, image_filename): words_count = Counter(data_array) counts = words_count.most_common(50) tags = make_tags(counts, maxsize=50) create_tag_image(tags, image_filename, size=(900, 600), fontname='Nanum Gothic')
def make_image(tag_count, file_name, font_max_size=120, size=(900, 600)): tag_list = make_tags(tag_count, maxsize=font_max_size) create_tag_image(tag_list, file_name, size=size, fontname='Korean', rectangular=False) print("-" * 6 + "make wordcloud Image" + "-" * 6)
def create_word_claod(words, output_file_name, maxsize, fontname='Lobster'): words = [(w, int(v * 10000)) for w, v in words] try: from pytagcloud import create_tag_image, make_tags except ImportError: print("Could not import pytagcloud. Skipping cloud generation") tags = make_tags(words, maxsize=maxsize) create_tag_image(tags, output_file_name, size=(1800, 1200), fontname=fontname)
def saveWordCloud( wordInfo ): taglist = pytagcloud.make_tags(dict(wordInfo).items(), maxsize=80) print( type(taglist) ) # <class 'list'> filename = 'wordcloud.png' pytagcloud.create_tag_image(taglist, filename, \ size=(640, 480), fontname='korean', rectangular=False) webbrowser.open( filename )
def saveWordCloud(wordInfo, filename): taglist = pytagcloud.make_tags(dict(wordInfo).items(), maxsize=100) pytagcloud.create_tag_image(taglist, filename, size=(700, 480), fontname='korean', rectangular=False) webbrowser.open(filename)
def _test_large_tag_image(self): start = time.time() tags = make_tags(get_tag_counts(self.hound.read())[:80], maxsize=120, colors=COLOR_SCHEMES['audacity']) create_tag_image(tags, os.path.join(self.test_output, 'cloud_large.png'), ratio=0.75, background=(0, 0, 0, 255), layout=LAYOUT_HORIZONTAL, fontname='Lobster') print "Duration: %d sec" % (time.time() - start)
def make_tag_cloud(): for line in sys.stdin: try: text += ' ' + line.strip().lower() except: pass tags = make_tags(get_tag_counts(text), maxsize=150) create_tag_image(tags, sys.argv[1] + '.png', size=(1024, 768))
def make_tag_cloud(): for line in sys.stdin: try: text += ' ' + line.strip().lower() except: pass tags = make_tags(get_tag_counts(text), maxsize=150) create_tag_image(tags, sys.argv[1] + '.png', size=(1024, 768))
def create_cloud(counter, filename): ''' Creates a word cloud from a counter ''' tags = make_tags(get_tag_counts(counter)[:80], maxsize=120, colors=COLOR_SCHEMES['goldfish']) create_tag_image(tags, './img/' + filename + '.png', size=(900, 600), background=(0, 0, 0, 255), layout=LAYOUT_HORIZONTAL, fontname='Lobster')
def make_wordcloud(text, image_name, width, height): list_of_tuple = return_list_of_tuples(text) tuple_countnoun = tuple(list_of_tuple) taglist = pytagcloud.make_tags(tuple_countnoun, maxsize=80, minsize=10) pytagcloud.create_tag_image(taglist, image_name, size=(width, height), fontname='Nanum Gothic', rectangular=False)
def build_pytag_cloud(self): width = 900 height = 575 fileName = '{0}/{1}.{2}.{3}.{4}.png'.format(self.img_directory, self.state, self.city, width, height) items = sorted(self.tagcloud.iteritems(), key=itemgetter(1), reverse=True) tags = make_tags(items[:self.wordcount], maxsize=80) create_tag_image(tags, fileName, size=(width, height), fontname='Droid Sans') import webbrowser webbrowser.open(fileName) # see results
def calAccuracy(self): self.cursor1.execute("select id, name from phone_id;") result = self.cursor1.fetchall() for data in result: print data[0],data[1] self.phone_id=raw_input("Enter phone id\n"); self.name = raw_input("Enter name:") import os os.mkdir('/home/darshan-ubuntu/Project/Products/Features/'+self.name) self.getReview(1) tags = make_tags(get_tag_counts(self.actual_review), maxsize=120) create_tag_image(tags, self.name+'/positive.png', size=(900, 600)) self.actual_review="" self.getReview(0) tags = make_tags(get_tag_counts(self.actual_review), maxsize=60) create_tag_image(tags, self.name+'/negative.png', size=(900, 600))
def _test_make_tags(self): mtags = make_tags(get_tag_counts(self.hound.read())[:60]) found = False for tag in mtags: if tag['tag'] == 'sir' and tag['size'] == 40: found = True break self.assertTrue(found)
def test_make_tags(self): mtags = make_tags(get_tag_counts(self.hound.read())[:60]) found = False for tag in mtags: if tag['tag'] == 'sir' and tag['size'] == 40: found = True break self.assertTrue(found)
def semantic_cloud(topic): topic_list = TopicList(topic) tlist = topic_list.GetTopicList() htagsl = HashtagsList(tlist['statuses'], topic) hl = htagsl.GetHashtagsList() cadena = " ".join(hl) print cadena tags = make_tags(get_tag_counts(cadena), maxsize=120) create_tag_image(tags, 'semantic_cloud.png', size=(900, 600), fontname='Lobster')
def test_layouts(self): start = time.time() tags = make_tags(get_tag_counts(self.hound.read())[:80], maxsize=120) for layout in LAYOUTS: create_tag_image(tags, os.path.join(self.test_output, 'cloud_%s.png' % layout), size=(900, 600), background=(255, 255, 255, 255), layout=layout, fontname='Lobster') print "Duration: %d sec" % (time.time() - start)
def makeCloud(self, tagsCounts, name="tag_cloud.png", height=500,\ width=500, font="Droid Sans"): # Get rid of unigrams contained in bigrams tagsCounts = self.parseWords(tagsCounts) tags = pytagcloud.make_tags(tagsCounts, colors=self._colors) pytagcloud.create_tag_image(tags, name, size=(width, height),\ fontname=font, rectangular=True)
def make_cloud(text,fname): '''create the wordcloud from variable text''' Data1 = text.lower().replace('http','').replace('rt ','').replace('.co','') Data = Data1.split() two_words = [' '.join(ws) for ws in zip(Data, Data[1:])] wordscount = {w:f for w, f in collections.Counter(two_words).most_common() if f > 200} sorted_wordscount = sorted(wordscount.iteritems(), key=operator.itemgetter(1),reverse=True) tags = make_tags(get_tag_counts(Data1)[:50],maxsize=350,minsize=100) create_tag_image(tags,fname+'.png', size=(3000,3250), background=(0, 0, 0, 255), layout=LAYOUT_MIX, fontname='Lobster', rectangular=True)
def __init__(self, raw_text, except_words): # Remove words shorter than 2 chars and words in except list filtered = " ".join([x for x in raw_text.split() if len(x) > 2 and x not in except_words]) # Get word counts for each word in filtered text tag_counts = get_tag_counts(filtered) self.filtered = filtered self.tags = ptc.make_tags(tag_counts, maxsize=60, minsize=6)
def createTagCloud(self,rapper): #creates a tag cloud for the given artist. #For some reason these imports only work when placed in the function #but they do not if they are placed at the top of the document from pytagcloud import create_tag_image, make_tags from pytagcloud.lang.counter import get_tag_counts teststr = rapper.rawLyrics tags = make_tags(get_tag_counts(teststr), maxsize=100) tags = [a for a in tags if a['size'] > 20] create_tag_image(tags, 'cloud_large.png', size=(800, 400), background=(239,101,85,255), fontname='PT Sans Regular')
def cr_tagcloud(words, fn, minsize=17, maxsize=50, size=(680, 500), fontname='Nobile'): tags = make_tags([(i[0], i[2]) for i in words], minsize=minsize, maxsize=maxsize) create_tag_image(tags, fn, size=size, fontname=fontname)
def init(): global tags global test_output home_folder = os.getenv('USERPROFILE') or os.getenv('HOME') test_output = os.path.join(home_folder, 'pytagclouds') if not os.path.exists(test_output): os.mkdir(test_output ) hound = open(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../test/pg2852.txt'), 'r') tags = make_tags(get_tag_counts(hound.read())[:50], maxsize=120, colors=COLOR_SCHEMES['audacity'])