def wordcloud(self,OVERALLTEXT,NEGATIVETEXT,POSITIVETEXT,test_output, font): #Constants overalltext = open(OVERALLTEXT, 'r') negativetext = open(NEGATIVETEXT, 'r') positivetext = open(POSITIVETEXT, 'r') #Overall tags = make_tags(get_tag_counts(overalltext.read())[:50],maxsize=90, minsize = 15) for layout in LAYOUTS: create_tag_image(tags,os.path.join(test_output, 'Overall_%s.png' % layout), size=(900,600), background=(255, 255, 255), layout = layout, fontname = font, rectangular=True) #Negative tags = make_tags(get_tag_counts(negativetext.read())[:50], maxsize=90,minsize = 15, colors=COLOR_SCHEMES['audacity']) for layout in LAYOUTS: create_tag_image(tags, os.path.join(test_output, 'negative_%s.png' % layout), size=(900,600), background=(205, 50, 50), layout=layout, fontname = font) #Positive tags = make_tags(get_tag_counts(positivetext.read())[:50], maxsize=120, minsize = 25, colors=COLOR_SCHEMES['oldschool']) for layout in LAYOUTS: create_tag_image(tags, os.path.join(test_output, 'positive_%s.png' % layout), size=(900,600), background=(0, 255, 15), layout=layout, fontname = font)
def create_file(res_id): all_reviews = '' api_key = 'db837d5e88fefd82d146b8e2e4e45c35' headers = { 'Content-Type': 'application/json', 'Access-Control-Allow-Origin': '*', 'user-key': api_key } url = "https://developers.zomato.com/api/v2.1/reviews?res_id=%s" % (res_id) try: response = requests.get(url, headers=headers) except: print 'Network Issues!' return if response.status_code == 200: data = response.json() count = data["reviews_count"] if count == 0: print 'No Reviews!' else: for review in data["user_reviews"]: review = review["review"] all_reviews = all_reviews + review["review_text"] + ' ' all_reviews = convert(all_reviews) tags = make_tags(get_tag_counts(all_reviews), maxsize=50, colors=COLOR_SCHEMES['goldfish']) create_tag_image(tags, 'static/img/' + res_id + '.png', size=(900, 600), fontname='Lobster') else: print 'Api Issues'
def _create_image(self, text): tag_counts = get_tag_counts(text) if tag_counts is None: sys.exit(-1) if self._repeat_tags: expanded_tag_counts = [] for tag in tag_counts: expanded_tag_counts.append((tag[0], 5)) for tag in tag_counts: expanded_tag_counts.append((tag[0], 2)) for tag in tag_counts: expanded_tag_counts.append((tag[0], 1)) tag_counts = expanded_tag_counts tags = make_tags(tag_counts, maxsize=150, colors=self._color_scheme) path = os.path.join('/tmp/cloud_large.png') if Gdk.Screen.height() < Gdk.Screen.width(): height = Gdk.Screen.height() width = int(height * 4 / 3) else: width = Gdk.Screen.width() height = int(width * 3 / 4) if self._font_name is not None: create_tag_image(tags, path, layout=self._layout, size=(width, height), fontname=self._font_name) else: create_tag_image(tags, path, layout=self._layout, size=(width, height)) return 0
def get_tag_cloud(request, region_code): # Get all tweets in the region data_zone = DataZone.objects.get(code=region_code) tweet_locations = TweetLocation.objects.filter(zone=data_zone) body_text = '' for x in tweet_locations: body_text += x.tweet.body + ' ' tc = TagCloud() body_text = tc.filter_body(body_text) if body_text.strip() == '': body_text = "Region Empty" tags = make_tags(get_tag_counts(body_text)[:50], maxsize=50, colors=COLOR_SCHEMES['audacity']) data = create_html_data(tags, (560,450), layout=LAYOUT_HORIZONTAL, fontname='PT Sans Regular') context = {} tags_template = '<li class="cnt" style="top: %(top)dpx; left: %(left)dpx; height: %(height)dpx;"><a class="tag %(cls)s" href="#%(tag)s" style="top: %(top)dpx;\ left: %(left)dpx; font-size: %(size)dpx; height: %(height)dpx; line-height:%(lh)dpx;">%(tag)s</a></li>' context['tags'] = ''.join([tags_template % link for link in data['links']]) context['width'] = data['size'][0] context['height'] = data['size'][1] context['css'] = "".join("a.%(cname)s{color:%(normal)s;}a.%(cname)s:hover{color:%(hover)s;}" % {'cname':k, 'normal': v[0], 'hover': v[1]} for k,v in data['css'].items()) return render_to_response('tag_cloud.html', {'tags': context['tags'], 'css': context['css']})
def test_create_html_data(self): """ HTML code sample """ tags = make_tags(get_tag_counts(self.hound.read())[:100], maxsize=120, colors=COLOR_SCHEMES['audacity']) data = create_html_data(tags, (440,600), layout=LAYOUT_HORIZONTAL, fontname='PT Sans Regular') template_file = open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'web/template.html'), 'r') html_template = Template(template_file.read()) context = {} tags_template = '<li class="cnt" style="top: %(top)dpx; left: %(left)dpx; height: %(height)dpx;"><a class="tag %(cls)s" href="#%(tag)s" style="top: %(top)dpx;\ left: %(left)dpx; font-size: %(size)dpx; height: %(height)dpx; line-height:%(lh)dpx;">%(tag)s</a></li>' context['tags'] = ''.join([tags_template % link for link in data['links']]) context['width'] = data['size'][0] context['height'] = data['size'][1] context['css'] = "".join("a.%(cname)s{color:%(normal)s;}\ a.%(cname)s:hover{color:%(hover)s;}" % {'cname':k, 'normal': v[0], 'hover': v[1]} for k,v in data['css'].items()) html_text = html_template.substitute(context) html_file = open(os.path.join(self.test_output, 'cloud.html'), 'w') html_file.write(html_text) html_file.close()
def make_cloud(self, output_html): keywords = KeywordManager().all() text = ' '.join([kw.keyword for kw in keywords]) if output_html: max_tags = 30 max_size = 42 else: max_tags = 100 max_size = self.maxsize tags = make_tags(get_tag_counts(text)[:max_tags], minsize=self.minsize, maxsize=max_size) if output_html: size = (900, 300) result = create_html_data(tags, size=size, layout=LAYOUT_HORIZONTAL) else: #now = datetime.utcnow() #filename = 'jcuwords/static/clouds/keyword-cloud-%s.png' % now.isoformat() cloud = self.resolver.resolve('jcuwords:keyword-cloud.png') filename = cloud.abspath() size = (1024, 500) create_tag_image(tags, filename, size=size, fontname='IM Fell DW Pica', layout=LAYOUT_MIX) image_url = self.request.resource_url(None, 'keyword-cloud.png') result = {'image': image_url} return result
def tagCloud(self): texts ="" for item in self.docSet: texts = texts +" " +item tags = make_tags(get_tag_counts(texts), maxsize=120) create_tag_image(tags,'filename.png', size=(2000,1000), background=(0, 0, 0, 255), layout=LAYOUT_MIX, fontname='Lobster', rectangular=True)
def createTagCloud(self,wordline): """ Create tag cloud image """ wordstream = [] if wordline == '': return False wordsTokens = WhitespaceTokenizer().tokenize(wordline) wordsTokens.remove(wordsTokens[0]) wordstream.append(' '.join(wordsTokens)) wordstream = ' '.join(wordstream) thresh = self.wordCount colorS = self.colorSchemes[self.color] tags = make_tags(get_tag_counts(wordstream)[:thresh],\ minsize=3, maxsize=40,\ colors = COLOR_SCHEMES[colorS]) create_tag_image(tags, self.png,\ size=(960, 400),\ background=(255, 255, 255, 255),\ layout= LAYOUT_HORIZONTAL,\ fontname='Neuton') return True
def finance_cloud(tag): tags = make_tags(get_tag_counts(tag), maxsize=100) create_tag_image(tags, "cloud.png", size=(1280, 800), background=(0, 0, 0, 255), fontname="SimHei")
def _test_large_tag_image(self): start = time.time() tags = make_tags(get_tag_counts(self.hound.read())[:80], maxsize=120, colors=COLOR_SCHEMES['audacity']) create_tag_image(tags, os.path.join(self.test_output, 'cloud_large.png'), ratio=0.75, background=(0, 0, 0, 255), layout=LAYOUT_HORIZONTAL, fontname='Lobster') print "Duration: %d sec" % (time.time() - start)
def _test_make_tags(self): mtags = make_tags(get_tag_counts(self.hound.read())[:60]) found = False for tag in mtags: if tag['tag'] == 'sir' and tag['size'] == 40: found = True break self.assertTrue(found)
def test_layouts(self): start = time.time() tags = make_tags(get_tag_counts(self.hound.read())[:80], maxsize=120) for layout in LAYOUTS: create_tag_image(tags, os.path.join(self.test_output, 'cloud_%s.png' % layout), size=(900, 600), background=(255, 255, 255, 255), layout=layout, fontname='Lobster') print "Duration: %d sec" % (time.time() - start)
def test_make_tags(self): mtags = make_tags(get_tag_counts(self.hound.read())[:60]) found = False for tag in mtags: if tag['tag'] == 'sir' and tag['size'] == 40: found = True break self.assertTrue(found)
def make_tag_cloud(): for line in sys.stdin: try: text += ' ' + line.strip().lower() except: pass tags = make_tags(get_tag_counts(text), maxsize=150) create_tag_image(tags, sys.argv[1] + '.png', size=(1024, 768))
def semantic_cloud(topic): topic_list = TopicList(topic) tlist = topic_list.GetTopicList() htagsl = HashtagsList(tlist['statuses'], topic) hl = htagsl.GetHashtagsList() cadena = " ".join(hl) print cadena tags = make_tags(get_tag_counts(cadena), maxsize=120) create_tag_image(tags, 'semantic_cloud.png', size=(900, 600), fontname='Lobster')
def calAccuracy(self): self.cursor1.execute("select id, name from phone_id;") result = self.cursor1.fetchall() for data in result: print data[0],data[1] self.phone_id=raw_input("Enter phone id\n"); self.name = raw_input("Enter name:") import os os.mkdir('/home/darshan-ubuntu/Project/Products/Features/'+self.name) self.getReview(1) tags = make_tags(get_tag_counts(self.actual_review), maxsize=120) create_tag_image(tags, self.name+'/positive.png', size=(900, 600)) self.actual_review="" self.getReview(0) tags = make_tags(get_tag_counts(self.actual_review), maxsize=60) create_tag_image(tags, self.name+'/negative.png', size=(900, 600))
def get_tag_cloud(startdate, enddate, max_tags=100, max_size=120, height=900, width=600, filename='cloud_large.png'): """get_tag_cloud This function generates a png file with an image of a tag cloud from the collected updates notes between the specified dates The required params are the start date and the end date to specify a desired time range. You can specify a different number of max tags, max tag size or the size og the resultant image. The filename is optional. """ count = 0 progress_index = 1 mem_file = StringIO.StringIO() try: notes = get_notes(startdate, enddate) n_notes = notes.count() logger.info("Progress 0 % of {} notes".format(n_notes)) for note in notes: mem_file.write(note['update_note']) if count == int(progress_index * 0.10 * n_notes): logger.info("Progress {} % of {} notes".format( int(100 * (float(count) / n_notes)), n_notes)) progress_index += 1 count += 1 logger.info("Calculating tags") tags = make_tags(get_tag_counts(mem_file.getvalue()), maxsize=max_size) logger.info("Generating {}".format(filename)) logger.info( "File specs: h={height};w={width}; max_tags={max_tags}; tag_max_size{max_size}" .format(height=height, width=width, max_tags=max_tags, max_size=max_size)) create_tag_image(tags[:max_tags], filename, size=(height, width)) logger.info( "{} succesfully created. Procces finished.".format(filename)) except Exception: mem_file.close() raise
def run(textpath): text = open(textpath, 'r') start = time.time() taglist = get_tag_counts(text.read().decode('utf8')) cleantaglist = process_tags(taglist) tags = make_tags(taglist[0:100], colors=COLOR_MAP) create_tag_image(tags, 'cloud.png', size=(1280, 900), background=(0, 0, 0 , 255), layout=LAYOUT_MOST_HORIZONTAL, crop=False, fontname='Cuprum', fontzoom=2) tags2 = make_tags(cleantaglist[0:100], colors=COLOR_MAP) create_tag_image(tags2, 'rcloud.png', size=(1280, 900), background=(0, 0, 0, 255), layout=LAYOUT_MOST_HORIZONTAL, crop=False, fontname='Cuprum', fontzoom=2) print "Duration: %d sec" % (time.time() - start)
def make_cloud(text,fname): '''create the wordcloud from variable text''' Data1 = text.lower().replace('http','').replace('rt ','').replace('.co','') Data = Data1.split() two_words = [' '.join(ws) for ws in zip(Data, Data[1:])] wordscount = {w:f for w, f in collections.Counter(two_words).most_common() if f > 200} sorted_wordscount = sorted(wordscount.iteritems(), key=operator.itemgetter(1),reverse=True) tags = make_tags(get_tag_counts(Data1)[:50],maxsize=350,minsize=100) create_tag_image(tags,fname+'.png', size=(3000,3250), background=(0, 0, 0, 255), layout=LAYOUT_MIX, fontname='Lobster', rectangular=True)
def create_tags(tweet, out='out'): tweet = clean_tweet(tweet) output = os.path.join(os.getcwd(), out) # tag_list = get_tag_counts(self.hound.read())[:50] tags = make_tags(get_tag_counts(tweet)[:50], maxsize=50) for layout in LAYOUTS: create_tag_image(tags, './static/' + out + ('/cloud_%s.png' % layout), size=(500, 500), background=(255, 255, 255, 255), layout=layout, fontname='Lobster')
def __init__(self, raw_text, except_words): # Remove words shorter than 2 chars and words in except list filtered = " ".join([x for x in raw_text.split() if len(x) > 2 and x not in except_words]) # Get word counts for each word in filtered text tag_counts = get_tag_counts(filtered) self.filtered = filtered self.tags = ptc.make_tags(tag_counts, maxsize=60, minsize=6)
def createTagCloud(self,rapper): #creates a tag cloud for the given artist. #For some reason these imports only work when placed in the function #but they do not if they are placed at the top of the document from pytagcloud import create_tag_image, make_tags from pytagcloud.lang.counter import get_tag_counts teststr = rapper.rawLyrics tags = make_tags(get_tag_counts(teststr), maxsize=100) tags = [a for a in tags if a['size'] > 20] create_tag_image(tags, 'cloud_large.png', size=(800, 400), background=(239,101,85,255), fontname='PT Sans Regular')
def _test_create_tag_image_rect(self): start = time.time() create_tag_image(make_tags(get_tag_counts(self.hound.read())[:30]), os.path.join(self.test_output, 'cloud_rect.png'), size=(300, 400), background=(255, 255, 255, 255), layout=LAYOUT_HORIZONTAL, crop=False, rectangular=True, fontname='Lobster', fontzoom=2) print "Duration: %d sec" % (time.time() - start)
def init(): global tags global test_output home_folder = os.getenv("USERPROFILE") or os.getenv("HOME") test_output = os.path.join(home_folder, "pytagclouds") if not os.path.exists(test_output): os.mkdir(test_output) hound = open(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../test/pg2852.txt"), "r") tags = make_tags(get_tag_counts(hound.read())[:50], maxsize=120, colors=COLOR_SCHEMES["audacity"])
def test_large_tag_image(self): start = time.time() tags = make_tags(get_tag_counts(self.hound.read())[:80], maxsize=120, colors=COLOR_SCHEMES['audacity']) create_tag_image(tags, os.path.join(self.test_output, 'cloud_large.png'), size=(1500, 600), background=(0, 0, 0, 255), layout=LAYOUT_HORIZONTAL, fontname='Lobster') print "Duration: %d sec" % (time.time() - start)
def init(): global tags global test_output home_folder = os.getenv('USERPROFILE') or os.getenv('HOME') test_output = os.path.join(home_folder, 'pytagclouds') if not os.path.exists(test_output): os.mkdir(test_output ) hound = open(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../test/pg2852.txt'), 'r') tags = make_tags(get_tag_counts(hound.read())[:50], maxsize=120, colors=COLOR_SCHEMES['audacity'])
def get_cloud(self, freq, fname): """ Create a tag cloud. """ txt_str = "" stop_words = ["https", "http"] for word in freq: if freq[word] > 0 and len(word) < 8 and word not in stop_words: txt_str += ("%s " % word) * freq[word] tags = make_tags(get_tag_counts(txt_str), minsize=1, maxsize=120) #print tags create_tag_image(tags, fname, size=(1200, 800), fontname='Inconsolata', layout=LAYOUT_MIX)
def openWestCoastCloud(): #Stores tags from multiple cities into one text string. TEXT = getTags(34,118) #Los Angeles TEXT += ' ' + getTags(37,122) # San Francisco TEXT += ' ' + getTags(47,122) #Seattle #Draws Word Cloud tags = make_tags(get_tag_counts(TEXT), maxsize=80) #Creates Word Cloud File create_tag_image(tags, 'cloud_large.png', size=(900, 600), fontname='Lobster') #Opens Word Cloud File webbrowser.open('cloud_large.png') # see results
def createTagCloud(self, rapper): #creates a tag cloud for the given artist. #For some reason these imports only work when placed in the function #but they do not if they are placed at the top of the document from pytagcloud import create_tag_image, make_tags from pytagcloud.lang.counter import get_tag_counts teststr = rapper.rawLyrics tags = make_tags(get_tag_counts(teststr), maxsize=100) tags = [a for a in tags if a['size'] > 20] create_tag_image(tags, 'cloud_large.png', size=(800, 400), background=(239, 101, 85, 255), fontname='PT Sans Regular')
def _test_create_tag_image(self): start = time.time() tags = make_tags(get_tag_counts(self.hound.read())[:30]) for layout in LAYOUTS: create_tag_image(tags, os.path.join(self.test_output, 'cloud_%s.png' % layout), size=(600, 500), background=(255, 255, 255, 255), layout=layout, crop=True, fontname='Lobster', fontzoom=3) print "Duration: %d sec" % (time.time() - start)
def _test_large_tag_image(self): start = time.time() tags = make_tags(get_tag_counts(self.hound.read())[:120], maxsize=120, colors=COLOR_SCHEMES['audacity']) create_tag_image(tags, os.path.join(self.test_output, 'cloud_large.png'), size=(1280, 900), background=(0, 0, 0, 255), layout=LAYOUT_MIX, crop=True, fontname='Lobster', fontzoom=1) print "Duration: %d sec" % (time.time() - start)
def keys_cloud(): for i in range(9): f = file('../data_preprocess/Data/ftags_{}.pkl'.format(i), 'rb') fdist = pickle.load(f) tag = '' print fdist.most_common()[0][0], fdist.most_common()[0][1] for key, count in fdist.most_common(100): tag +=( key+" ")*count #text = "%s" % " ".join(tag) #tags = make_tags(get_tag_counts('cbb cbb xuxian xuxian keke keke keke'),maxsize=100) tags = make_tags(get_tag_counts(tag),maxsize=100) # Set your output filename create_tag_image(tags,"Data/word_cloud_{}.png".format(i), size=(600,400),background=(0, 0, 0, 255), fontname="SimHei")
def createHtmlData(wors): auxwords = removeWords(wors) tags = make_tags(get_tag_counts(auxwords)[:20], maxsize=80, colors=COLOR_SCHEMES['oldschool']) data = create_html_data(tags, (800,600), layout=LAYOUT_HORIZONTAL, fontname='PT Sans Regular') tags_template = '<li class="cnt" style="top: %(top)dpx; left: %(left)dpx; height: %(height)dpx;"><a class="tag %(cls)s" href="#%(tag)s" style="top: %(top)dpx;\ left: %(left)dpx; font-size: %(size)dpx; height: %(height)dpx; line-height:%(lh)dpx;">%(tag)s</a></li>' return ''.join([tags_template % link for link in data['links']])
def test1(): from pytagcloud import create_tag_image, make_tags from pytagcloud.lang.counter import get_tag_counts f = file('./北京_python_30_jobs.json', 'r') YOUR_TEXT = f.read() tags = make_tags(get_tag_counts(YOUR_TEXT), maxsize=80) create_tag_image(tags, 'cloud_large.png', size=(900, 600), fontname='Lobster') import webbrowser webbrowser.open('cloud_large.png') # see results f.close()
def createHtmlData(wors): auxwords = removeWords(wors) tags = make_tags(get_tag_counts(auxwords)[:20], maxsize=80, colors=COLOR_SCHEMES['oldschool']) data = create_html_data(tags, (800, 600), layout=LAYOUT_HORIZONTAL, fontname='PT Sans Regular') tags_template = '<li class="cnt" style="top: %(top)dpx; left: %(left)dpx; height: %(height)dpx;"><a class="tag %(cls)s" href="#%(tag)s" style="top: %(top)dpx;\ left: %(left)dpx; font-size: %(size)dpx; height: %(height)dpx; line-height:%(lh)dpx;">%(tag)s</a></li>' return ''.join([tags_template % link for link in data['links']])
def make_tag_cloud(labels_text, input_fname, output_dir): """ Make tag cloud for each label/cluster from their text. labels_text: dict input_fname: string. output_dir: string. return: None """ print("Let's make tag cloud") for label, text in labels_text.iteritems(): fig_name = input_fname + "_label{}.png".format(label) fig_path = output_dir + "/" + fig_name tags = make_tags(get_tag_counts(text), maxsize=80) create_tag_image(tags, fig_path, size=(900, 600)) print("label {} finished".format(label))
def openEastCoastCloud(): ''' Generates and displays East Coast word cloud. ''' #Stores tags from multiple cities into one text string. TEXT = getTags(40, 73) #New York City TEXT += ' ' + getTags(42.3744, 71.1169) #Harvard TEXT += ' ' + getTags(42.3598, 71.0921) #MIT TEXT = getTags(38.9072, 77.0728) #GeorgeTown University TEXT += ' ' + getTags(42.3496, 71.0997) #Boston University #Draws Word Cloud tags = make_tags(get_tag_counts(TEXT), maxsize=80) #Creates Word Cloud File create_tag_image(tags, 'cloud_large.png', size=(900, 600), fontname='Lobster') #Opens Word Cloud File webbrowser.open('cloud_large.png') # see results
def make_cloud(cloud_type,time,maxsize=80,cutoff=65,layout=4): """ Make word cloud. Returns path to image file""" text = tb.build_text(cloud_type) filename = "images/" + cloud_type + "_" + time + ".png" tags = make_tags(get_tag_counts(text), minsize=30, maxsize=maxsize) if len(tags) >= cutoff: tags = tags[:cutoff] else: return None if tags: create_tag_image(tags, filename, background=(0,0,0),size=(900, 600), layout=layout) return filename else: return None
def _test_create_html_data(self): """ HTML code sample """ tags = make_tags(get_tag_counts(self.hound.read())[:100], maxsize=120, colors=COLOR_SCHEMES['audacity']) data, html_text = create_html_data(tags, size=(1280, 900), fontname='Lobster', fontzoom=1) html_file = open(os.path.join(self.test_output, 'cloud.html'), 'w') html_file.write(html_text) html_file.close print '\nCSS\n' for style in data['css']: print style print '\nHTML\n' for link in data['links']: print '<a class="tag %(cls)s" href="#" style="top: %(top)dpx; left: %(left)dpx; font-size: %(size)dpx;">%(tag)s</a>' % link
def word_cloud(text): """Function to get word cloud data.""" counts = get_tag_counts(text) text_data = [] stop_words = [ 'rt', 'ji', 'wish', 'wishes', 'wished', 'shri', 'pm', 'gv', 'duttyogi', 'srikidambi', 'tomar', 'amp' ] i = 0 while len(text_data) < 100: dic = {} if (counts[i][0] not in stop_words): dic[counts[i][0]] = counts[i][1] text_data.append(dic) i += 1 print text_data with open(handle + '_' + date + '_word_cloud.json', 'w') as f: json.dump(text_data, f, indent=1) f.close()
def main(): cat_dict = {} text = [] f = open('cat_links.txt') for line in f: link1,link2 = line.strip().split('\t') make_page_cat(cat_dict, int(link1), int(link2)) f.close() cat_count = {} for cat in cat_dict: cat_count[cat] = len(cat_dict[cat]) cat_name = {} f = open('cats.txt') for line in f: link1,link2 = line.strip().split('\t') cat_name[int(link1)] = link2 f.close() rank_num = 30 ranking_list =[] count = 0 for cat, page_num in sorted(cat_count.items(), key=lambda x:x[1], reverse=True): if count < rank_num: ranking_list.append(cat) count += 1 else: break for item in ranking_list: text.append(cat_name[item]) YOUR_TEXT = ' '.join(text) tags = make_tags(get_tag_counts(YOUR_TEXT), maxsize=80) create_tag_image(tags, 'cloud_large.png', size=(900, 600), fontname='Lobster') import webbrowser webbrowser.open('cloud_large.png') # see results
def get_m_tags(text, m): """ Get tags from this text. tags are the most frequent words. text: not None or empty string. m: the no. of tags, m > 0 return: a list of tags and their frequence. """ if m <= 0: raise AssertionError("m should be bigger than 0.") assert text # text is none or empty text = text.strip() assert text # no word in text tag_counts = get_tag_counts(text) sort_tag_counts(tag_counts) if m <= len(tag_counts): return tag_counts[:m] else: return tag_counts
def _create_image(self, text): tag_counts = get_tag_counts(text) if tag_counts is None: sys.exit(-1) if self._repeat_tags: expanded_tag_counts = [] for tag in tag_counts: expanded_tag_counts.append((tag[0], 5)) for tag in tag_counts: expanded_tag_counts.append((tag[0], 2)) for tag in tag_counts: expanded_tag_counts.append((tag[0], 1)) tag_counts = expanded_tag_counts tags = make_tags(tag_counts, maxsize=150, colors=self._color_scheme) path = os.path.join('/tmp/cloud_large.png') width, height = self.get_display_rectangle() if height < width: ratio = width / height width = int(height * ratio) else: ratio = height / width height = int(width * ratio) if self._font_name is not None: create_tag_image(tags, path, layout=self._layout, size=(width, height), fontname=self._font_name) else: create_tag_image(tags, path, layout=self._layout, size=(width, height)) return 0
def make_cloud(text, fname): '''create the wordcloud from variable text''' Data1 = text.lower().replace('http', '').replace('rt ', '').replace('.co', '') Data = Data1.split() two_words = [' '.join(ws) for ws in zip(Data, Data[1:])] wordscount = { w: f for w, f in collections.Counter(two_words).most_common() if f > 200 } sorted_wordscount = sorted(wordscount.iteritems(), key=operator.itemgetter(1), reverse=True) tags = make_tags(get_tag_counts(Data1)[:50], maxsize=350, minsize=100) create_tag_image(tags, fname + '.png', size=(3000, 3250), background=(0, 0, 0, 255), layout=LAYOUT_MIX, fontname='Lobster', rectangular=True)
def create_html_data(self): """ HTML code sample """ tags = make_tags(get_tag_counts(self.content), maxsize=120, colors=COLOR_SCHEMES['audacity']) # FIXME 存在segmentfault bug data = create_html_data(tags, (840, 1000), layout=LAYOUT_HORIZONTAL, fontname='PT Sans Regular') template_file = open(os.path.join('templates/', 'web/template.html'), 'r') html_template = Template(template_file.read()) template_file.close() context = {} tags_template = '<li class="cnt" style="top: %(top)dpx; left: %(left)dpx; height: %(height)dpx;"><a class="tag %(cls)s" href="#%(tag)s" style="top: %(top)dpx;\ left: %(left)dpx; font-size: %(size)dpx; height: %(height)dpx; line-height:%(lh)dpx;">%(tag)s</a></li>' context['tags'] = ''.join( [tags_template % link for link in data['links']]) context['width'] = data['size'][0] context['height'] = data['size'][1] context['css'] = "".join("a.%(cname)s{color:%(normal)s;}\ a.%(cname)s:hover{color:%(hover)s;}" % { 'cname': k, 'normal': v[0], 'hover': v[1] } for k, v in data['css'].items()) html_text = html_template.substitute(context) html_file = open('templates/cloud.html', 'w') html_file.write(html_text.encode('utf-8')) html_file.close()
def makeTagCloud(self, counter = 0): """ function building the tag clouds and shutting down after the last one """ # get tag count tagCount = get_tag_counts(self.corpus) # cut out low entries, short words, the tracked word & some more stuff tagCount = [x for x in tagCount if x[1] >= self.minOcc and not(x[0] in [self.tracked, 'http', 'https']) and len(x[0])>2] tags = make_tags(tagCount, maxsize =150) create_tag_image(tags, self.tracked[0] + '_cloud_'+ str(counter) \ + '.png', size=(1000,1000)) print "tag cloud built..." print ("current corpus size: lenght = ",len(self.corpus), ", memory = ",sys.getsizeof(self.corpus)/1000) # after the last tag clodu has been completed, shut down if counter >= self.maxTweets: print 'enough, I am shutting down' sys.exit()
def test_create_html_data(self): """ HTML code sample """ tags = make_tags(get_tag_counts(self.hound.read())[:100], maxsize=36, colors=COLOR_SCHEMES['goldfish']) data = create_html_data(tags, (220, 300), layout=LAYOUT_HORIZONTAL, fontname='PT Sans Regular') template_file = open( os.path.join(os.path.dirname(os.path.abspath(__file__)), 'web/template.html'), 'r') html_template = Template(template_file.read()) context = {} tags_template = '<li class="cnt" style="top: %(top)dpx; left: %(left)dpx; height: %(height)dpx;"><a class="tag %(cls)s" href="/tag/%(tag)s" style="top: %(top)dpx;\ left: %(left)dpx; font-size: %(size)dpx; height: %(height)dpx; line-height:%(lh)dpx;">%(tag)s</a></li>' context['tags'] = ''.join( [tags_template % link for link in data['links']]) context['width'] = data['size'][0] context['height'] = data['size'][1] context['css'] = "".join("a.%(cname)s{color:%(normal)s;}\ a.%(cname)s:hover{color:%(hover)s;}" % { 'cname': k, 'normal': v[0], 'hover': v[1] } for k, v in data['css'].items()) html_text = html_template.substitute(context) html_file = open(os.path.join(self.test_output, 'cloud.html'), 'w') html_file.write(html_text) html_file.close()
def match(): ''' returns labeled tweets ''' Tweets = get_tweets() label_sentiments(Tweets) ana = analyze(Tweets) topics = extract_topics(Tweets) remove_repeated_tags(Tweets) s = get_string(topics[2:]) tags = make_tags(get_tag_counts(s)) if os.path.isfile('tweet_sentiment_vis/static/images/tagcloud.png'): os.remove('tweet_sentiment_vis/static/images/tagcloud.png') #print "creating image" create_tag_image(tags,'tweet_sentiment_vis/static/images/tagcloud.png',size=(500,500)) #print "image created" return [Tweets,ana,topics]
def run(textpath): text = open(textpath, 'r') start = time.time() taglist = get_tag_counts(text.read().decode('utf8')) cleantaglist = process_tags(taglist) tags = make_tags(taglist[0:100], colors=COLOR_MAP) create_tag_image(tags, 'cloud.png', size=(1280, 900), background=(0, 0, 0, 255), layout=LAYOUT_MOST_HORIZONTAL, crop=False, fontname='Cuprum', fontzoom=2) tags2 = make_tags(cleantaglist[0:100], colors=COLOR_MAP) create_tag_image(tags2, 'rcloud.png', size=(1280, 900), background=(0, 0, 0, 255), layout=LAYOUT_MOST_HORIZONTAL, crop=False, fontname='Cuprum', fontzoom=2) print "Duration: %d sec" % (time.time() - start)
def wordcloud(direc): text = "" with open(direc) as f: lines = f.readlines() text = "".join(lines) words = word_tokenize(text) tags = make_tags(get_tag_counts(text), maxsize=90) data = create_html_data(tags, (1600,1200), layout = LAYOUT_MIX, fontname='Philiosopher', rectangular=True) template_file = open(os.path.join(os.path.dirname(os.path.abspath(__file__)), '../corpus/'), 'r') html_template = Template(template_file.read()) context = {} tags_template = '<li class="cnt" style="top: %(top)dpx; left: %(left)dpx; height: %(height)dpx;"><a class="tag %(cls)s" href="#%(tag)s" style="top: %(top)dpx;\ left: %(left)dpx; font-size: %(size)dpx; height: %(height)dpx; line-height:%(lh)dpx;">%(tag)s</a></li>' context['tags'] = ''.join([tags_template % link for link in data['links']]) context['width'] = data['size'][0] context['height'] = data['size'][1] context['css'] = "".join("a.%(cname)s{color:%(normal)s;}\ a.%(cname)s:hover{color:%(hover)s;}" % {'cname':k, 'normal': v[0], 'hover': v[1]} for k,v in data['css'].items()) html_text = html_template.substitute(context) html_file = open(os.path.join(self.test_output, 'cloud.html'), 'w') html_file.write(html_text) html_file.close()
def test_tag_counter(self): tag_list = get_tag_counts(self.hound.read())[:50] self.assertTrue(('sir', 350) in tag_list)
def run_pgm(page): start_time = time.time() global zipP, zipN, zipNu, res page += ''' <h3 class="hiddenmsg title col-md-12 col-xs-12">hold on...we're mining tweets...</h3>'''.encode( "utf-8") #*****************************************************PROCESSING TWEETS*********************************************************************** def process_tweet(tweet): #print 'tweet before processing:',tweet,'after' #convert to lower case tweet = tweet.lower() tweet = replaceTwoOrMore(tweet) #convert www.* or http:* to url tweet = re.sub('((www\.[^\s]+)|((http|https)://[^\s]+))', 'url', tweet) #covert @username to at_user tweet = re.sub('@[^\s]+', 'at_user', tweet) #remove additional whitespaces tweet = re.sub('[\s]+', ' ', tweet) tweet = re.sub('[\']', '', tweet) #replace #word with word tweet = re.sub(r'#([^\s]+)', r'\1', tweet) #trim tweet = tweet.strip("'") tweet = tweet.strip() tweet = removeStopWords(tweet) return tweet def replaceTwoOrMore(s): #look for 2/more reps of the charc and replace with the charc itself pattern = re.compile( r"(.)\1{1,}", re.DOTALL) # re.DOTALL makes '.' match anything INCL newline return pattern.sub(r"\1\1", s) #split tweet and get individual words which are not in the stopwords list def getFeatureVector(tweet): featureVector = [] #split tweet into words words = tweet.split() for w in words: #replace 2/more w = replaceTwoOrMore(w) w = w.strip('\'"?,') w = re.sub('[.,]$', '', w) #stemming if (w.endswith('ing') or w.endswith('ed') or w.endswith('ses')): w = stemmer.stem(w) #check if word starts with alphabet or emoticon val = re.search(r"(^[a-zA-Z][a-zA-Z0-9]*$)|([:();@])", w) #ignore if it's a stop word if (w in stopWords or val is None): continue else: featureVector.append(w.lower()) #print featureVector return featureVector #*************************************************EXTRACTING FEATURE VECTORS****************************************************************** #return dictionary of words with true/false def extract_features(tweet): tweet_words = tweet #print 'tweet words: ',tweet_words features = {} #print 'in extr feaures, featureList: ',featureList for word in featureList: features['contains(%s)' % word] = (word in tweet_words) return features def get_words_in_tweets(tweets): all_words = [] for (words, sentiment) in tweets: all_words.extend(words) return all_words def get_word_features(wordlist): # print 'wordlist: ',wordlist wordlist = nltk.FreqDist(wordlist) #print 'wordlist: ' word_features = wordlist.keys() return word_features #*****************************************************TRAINING NAIVE BAYES CLASSIFIER********************************************************* #read tweets from training_set.txt #read featureList from featureList.txt tweets = [] featureList = [] tweets = json.load(open("training_set.txt", "rb")) featureList = json.load(open("featureList.txt", "rb")) #from file training/countValues.txt fp = open('MODULES/training/countValues.txt', 'rb') positive_review_count = int(fp.readline()) negative_review_count = int(fp.readline()) prob_positive = float(fp.readline()) prob_negative = float(fp.readline()) #get freqDist of words positive_counts = {} negative_counts = {} #from file training/negCounts.json json1_file = open('MODULES/training/negCounts.json') str_data = json1_file.read() negative_counts = json.loads(str_data) #from file training/posCounts.json json1_file = open('MODULES/training/posCounts.json') str_data = json1_file.read() positive_counts = json.loads(str_data) def make_class_prediction(text, counts, class_prob, class_count): #class_prob => prior probablity #counts => no of times a particular word appears in the main freqDist prediction = 1 text_counts = Counter(re.split("\s+", text)) #print 'Text Counts: ', text_counts for word in text_counts: # For every word in the text, we get the number of times that word occured in the reviews for a given class, add 1 to smooth the value, and divide by the total number of words in the class (plus the class_count to also smooth the denominator). # Smoothing ensures that we don't multiply the prediction by 0 if the word didn't exist in the training data. # We also smooth the denominator counts to keep things even. value = text_counts.get(word) * ( (counts.get(word, 0) + 1) / (sum(counts.values()) + class_count)) prediction *= value # Now we multiply by the probability of the class existing in the documents. training_set = nltk.classify.util.apply_features(extract_features, tweets) # Train the classifier NBClassifier = nltk.NaiveBayesClassifier.train(training_set) print NBClassifier.show_most_informative_features(30) #******************************************************DYNAMIC EXECUTION BEGINS*************************************************************** polarity = 0.0 tweets_data = [] tweets_file = open(tweets_data_path, "r") for line in tweets_file: try: tweet = json.loads(line) #tweets_data.append(tweet) tweet = process_tweet(tweet['text']) tweetblob = textblob.TextBlob(tweet) polarity = tweetblob.sentiment.polarity # neg_pred = make_class_prediction(tweet, negative_counts, prob_negative, negative_review_count) # pos_pred = make_class_prediction(tweet, positive_counts, prob_positive, positive_review_count) #print 'tweet: ',tweet result = NBClassifier.classify(extract_features(tweet)) print 'Tweet: ', tweet, ' POLARITY: ', polarity, ' RESULT: ', result if result == 'positive' or polarity >= 0.1: positiveTweets['tweet'].append(tweet) positiveTweets['tweet'] = list(set(positiveTweets['tweet'])) positiveTweets['polarity'].append((polarity + 0.1)) c1 = len(positiveTweets['tweet']) elif result == 'negative' or polarity <= -0.1: negativeTweets['tweet'].append(tweet) negativeTweets['polarity'].append((polarity - 0.1)) negativeTweets['tweet'] = list(set(negativeTweets['tweet'])) c2 = len(negativeTweets['tweet']) else: neutralTweets['tweet'].append(tweet) neutralTweets['polarity'].append((polarity)) neutralTweets['tweet'] = list(set(neutralTweets['tweet'])) c3 = len(neutralTweets['tweet']) except: continue print 'Out of this LOOOOOOOOOOP!' #raw_input() #saving results in res res['positive'] = [c1] res['negative'] = [c2] res['neutral'] = [c3] #sort the tweet lists by decreasing polarity zipP = zip(positiveTweets['tweet'], positiveTweets['polarity']) zipN = zip(negativeTweets['tweet'], negativeTweets['polarity']) zipNu = zip(neutralTweets['tweet'], neutralTweets['polarity']) zipP = sorted(zipP, key=operator.itemgetter(1), reverse=True) zipN = sorted(zipN, key=operator.itemgetter(1), reverse=True) zipNu = sorted(zipNu, key=operator.itemgetter(1), reverse=True) #Pie Chart df = pd.DataFrame(res, columns=['positive', 'negative', 'neutral']) fig, ax = plt.subplots() plt.pie(res.values(), labels=['positive', 'neutral', 'negative'], colors=['aqua', 'grey', 'crimson'], autopct="%1.1f%%") plt.title('Sentiment analysis') plt.axis('equal') plt.savefig(bio, format="png") #Tag Cloud pos_text = '' neg_text = '' neu_text = '' print 'Only 100 tweets!', positiveTweets['tweet'][:100] for word in positiveTweets['tweet'][:10]: pos_text += word + ' ' for word in negativeTweets['tweet'][:10]: neg_text += word + ' ' for word in neutralTweets['tweet'][:10]: neu_text += word + ' ' ptags = make_tags(get_tag_counts(pos_text), maxsize=150) ntags = make_tags(get_tag_counts(neg_text), maxsize=150) nutags = make_tags(get_tag_counts(neu_text), maxsize=150) print 'ptags', len(ptags) create_tag_image(ptags, 'static/pcloud.png', size=(900, 600), layout=LAYOUT_MIX, fontname='Molengo', rectangular=True) print 'created pcloud' raw_input() create_tag_image(ntags, 'static/negcloud.png', size=(900, 600), layout=LAYOUT_MIX, fontname='Molengo', rectangular=True) print 'created pcloud' raw_input() create_tag_image(nutags, 'static/nucloud.png', size=(900, 600), layout=LAYOUT_MIX, fontname='Molengo', rectangular=True) print 'created pcloud' raw_input() #print total time to execute print("--- %s seconds ---" % (time.time() - start_time))
Mr. Sherlock Holmes, who was usually very late in the mornings, save upon those not infrequent occasions when he was up all night, was seated at the breakfast table. I stood upon the hearth-rug and picked up the stick which our visitor had left behind him the night before. It was a fine, thick piece of wood, bulbous-headed, of the sort which is known as a "Penang lawyer." Just under the head was a broad silver band nearly an inch across. "To James Mortimer, M.R.C.S., from his friends of the C.C.H.," was engraved upon it, with the date "1884." It was just such a stick as the old-fashioned family practitioner used to carry--dignified, solid, and reassuring. ''' tags = make_tags( get_tag_counts(text, maxsize=120, colors=COLOR_SCHEMES['audacity'])) data = create_html_data(tags, (440, 600), layout=LAYOUT_HORIZONTAL, fontname='PT Sans Regular') template_file = open( os.path.join(os.path.dirname(os.path.abspath(__file__)), 'web/template.html'), 'r') html_template = Template(''' <html> <head> <meta http-equiv="Content-Type" content="text/html; utf-8"> <title>PyTagCloud</title> <style type="text/css"> body{
__author__ = 'tvsamartha' from pytagcloud import create_tag_image, make_tags from pytagcloud.lang.counter import get_tag_counts spam_input_file = open("space_output.txt", "r") text_data = str(spam_input_file.readlines()) input_data = text_data.split("\n") final_str = "" for line in input_data: final_str+=line+" " tags = make_tags(get_tag_counts(final_str), maxsize=120) create_tag_image(tags, 'space_tagcloud.png', size=(1000, 700), fontname='Neucha') ### notspam_input_file = open("hardware_output.txt", "r") text_data = str(notspam_input_file.readlines()) input_data = text_data.split("\n") final_str = "" for line in input_data: final_str+=line+" " tags = make_tags(get_tag_counts(final_str), maxsize=120)
import os import sys import sqlite3 from Num_Str.Numerical import * from Num_Str.Strings_comp import * from Num_Str.Numerical2 import * from collections import Counter from Database import * import distance #strDB = data() strDB = data() conn = sqlite3.connect(strDB) result= conn.execute('select * from mappingapp_sample') cursor = conn.cursor() cursor.execute('select * from mappingapp_coordinates') allAttr = cursor.fetchall() bng_ing = [attr[1] for attr in allAttr] bng_str = "" for i in bng_ing: if(i != None): bng_str += i + " " file_path = (os.path.abspath(__file__)).replace('Wordle_Coord.py', "") tags = make_tags(get_tag_counts(bng_str), maxsize=120) create_tag_image(tags, file_path+'IMGS\\cloud_large.png', size=(900,700), fontname='Lobster')