def get(self): global term_freq term = self.get_argument("term") day = int(self.get_argument("day")) #Fetch all articles for the given day, with the given term datefilter = query.Term("pubdate", unicode("200704{0:02d}T000000".format(day))) day_articles = application.searcher_frequency.find("content", term, limit=9999, filter=datefilter) #day_articles = application.searcher_bm25f.find("pubdate", unicode("200704{0:02d}T000000".format(day)), limit=9999) #Combine all term frequencies tf = defaultdict(int) cont = [] #titles = [] for d in day_articles: #titles.append(d['title']) doc_freq = get_term_freq_doc(d['id'],application.searcher_cosine) cont.extend(extract_content_from_xml(get_relative_path(d['path']))) for k in doc_freq: tf[k] += doc_freq[k] #pprint(titles) #Generate a wordcloud for the combined articles tags = tagcloud.make_cloud(0, None, term_freq, ' '.join(cont), tf) #Load and show relevant template self.render("termcloud.html",term=term, day=day, tagcloud=tags)
def get(self): docid=self.get_argument("docid") res = application.searcher_bm25f.find("id", unicode(docid)) path = get_relative_path(res[0]['path']) f = open(path, "r") lines = f.readlines() for l in lines: self.write(l) #self.write(tagcloud.print_cloud(['word1', 'word2'])) #self.write(tagcloud.print_cloud(get_term_freq_doc(docid))) searcher = application.searcher_frequency x = get_term_freq_doc(docid, searcher) pprint(x) p = tagcloud.make_cloud(x) self.write(p)
def get(self): global term_freq, loc docid = self.get_argument("docid") res = application.searcher_bm25f.find("id", unicode(docid)) path = get_relative_path(res[0]['path']) searcher = application.searcher_cosine #Find document title and body title = res[0]['title'] cont = extract_content_from_xml(path) #Generate tag cloud, related articles and map tags = tagcloud.make_cloud(docid, searcher, term_freq, ' '.join(cont)) rel = relatedarticles.find_related(docid, searcher, term_freq) (locs, map_link) = loc.find_locs_in_text(" ".join(cont), application.reader) #Load and show relevant template self.render("document.html",related=rel, title=title, content=cont, tagcloud=tags, maploc=map_link, locations=locs)
tweet_list = [] x = 0 user = api.get_user(handle) print ("Basic information for", user.name) print ("Screen Name:", user.screen_name) if handle == "@pes_pse": for page in range(1, pages): timeline = api.user_timeline( user_id=user.id, max_id=handles[handle]["max"], since_id=handles[handle]["since"], count=500, page=page ) for tweet in timeline: x += 1 if hashtag in tweet.text: sheet1.write(5, 5, hashtag) for key in sheet_dict.keys(): if sheet_dict[key][2] != "twitter": sheet1.write(sheet_dict[key][0] + x, sheet_dict[key][1], eval(sheet_dict[key][2])) else: sheet1.write(sheet_dict[key][0] + x, sheet_dict[key][1], sheet_dict[key][2]) with open("hello.txt", "a") as file: file.write(tweet.text.encode("utf8") + "\n") # print "Text:", tweet.text tweet_list.append(tweet.text.encode("utf8") + "/n") print "ID:", tweet.id # print "User ID:", tweet.user.id print "Created:", tweet.created_at # print "Retweeted:", tweet.retweeted # print "Retweet count:", tweet.retweet_count make_cloud("".join(tweet_list), handle) wb.save("tweets2.xls")
__author__ = 'maikflow' import xlrd import tagcloud some_list = [] workbook = xlrd.open_workbook('hashtags.xls') worksheets = workbook.sheet_names() for worksheet_name in worksheets: worksheet = workbook.sheet_by_name(worksheet_name) num_rows = worksheet.nrows - 1 num_cells = worksheet.ncols - 1 curr_row = -2 while curr_row < num_rows: row = worksheet.row(curr_row) # print 'Row:', curr_row curr_row += 3 curr_cell = -1 while curr_cell < num_cells: curr_cell += 1 # Cell Types: 0=Empty, 1=Text, 2=Number, 3=Date, 4=Boolean, 5=Error, 6=Blank # cell_type = worksheet.cell_type(curr_row, curr_cell) try: cell_value = worksheet.cell_value(curr_row, curr_cell) print cell_value.encode('utf8') some_list.append(cell_value.encode('utf8')) except IndexError: pass # print type(some_list[0].encode('utf8')) tagcloud.make_cloud(''.join(some_list), '#epp2014')
x =0 user = api.get_user(handle) # print ("Basic information for", user.name) print ("Screen Name:", user.screen_name) if handle == '@theprogressives': for page in range(1,pages): timeline = api.user_timeline(user_id=user.id, max_id=handles[handle]['max'], since_id=handles[handle]['since'], count=500, page=page) for tweet in timeline: x += 1 if hashtag in tweet.text: sheet1.write(5,5,hashtag) for key in sheet_dict.keys(): if sheet_dict[key][2] != 'twitter': sheet1.write(sheet_dict[key][0]+x,sheet_dict[key][1],eval(sheet_dict[key][2])) else: sheet1.write(sheet_dict[key][0]+x,sheet_dict[key][1],sheet_dict[key][2]) with open("hello.txt",'a') as file: file.write(tweet.text.encode('utf8')+'\n') # print "Text:", tweet.text tweet_list.append(tweet.text.encode('utf8')) # print "ID:", tweet.id # print "User ID:", tweet.user.id # print "Created:", tweet.created_at # print "Retweeted:", tweet.retweeted # print "Retweet count:", tweet.retweet_count make_cloud(''.join(tweet_list),'/home/maikflow/Documents/python/twitter/wordclouds/'+handle) # wb.save('tweets3.xls')
print("Screen Name:", user.screen_name) if handle == '@pes_pse': for page in range(1, pages): timeline = api.user_timeline(user_id=user.id, max_id=handles[handle]['max'], since_id=handles[handle]['since'], count=500, page=page) for tweet in timeline: x += 1 if hashtag in tweet.text: sheet1.write(5, 5, hashtag) for key in sheet_dict.keys(): if sheet_dict[key][2] != 'twitter': sheet1.write(sheet_dict[key][0] + x, sheet_dict[key][1], eval(sheet_dict[key][2])) else: sheet1.write(sheet_dict[key][0] + x, sheet_dict[key][1], sheet_dict[key][2]) with open("hello.txt", 'a') as file: file.write(tweet.text.encode('utf8') + '\n') # print "Text:", tweet.text tweet_list.append(tweet.text.encode('utf8') + '/n') print "ID:", tweet.id # print "User ID:", tweet.user.id print "Created:", tweet.created_at # print "Retweeted:", tweet.retweeted # print "Retweet count:", tweet.retweet_count make_cloud(''.join(tweet_list), handle) wb.save('tweets2.xls')
for page in range(1, pages): timeline = api.user_timeline(user_id=user.id, max_id=handles[handle]['max'], since_id=handles[handle]['since'], count=500, page=page) for tweet in timeline: x += 1 if hashtag in tweet.text: sheet1.write(5, 5, hashtag) for key in sheet_dict.keys(): if sheet_dict[key][2] != 'twitter': sheet1.write(sheet_dict[key][0] + x, sheet_dict[key][1], eval(sheet_dict[key][2])) else: sheet1.write(sheet_dict[key][0] + x, sheet_dict[key][1], sheet_dict[key][2]) with open("hello.txt", 'a') as file: file.write(tweet.text.encode('utf8') + '\n') # print "Text:", tweet.text tweet_list.append(tweet.text.encode('utf8')) # print "ID:", tweet.id # print "User ID:", tweet.user.id # print "Created:", tweet.created_at # print "Retweeted:", tweet.retweeted # print "Retweet count:", tweet.retweet_count make_cloud( ''.join(tweet_list), '/home/maikflow/Documents/python/twitter/wordclouds/' + handle) # wb.save('tweets3.xls')
__author__ = 'maikflow' import xlrd import tagcloud some_list = [] workbook = xlrd.open_workbook('hashtags.xls') worksheets = workbook.sheet_names() for worksheet_name in worksheets: worksheet = workbook.sheet_by_name(worksheet_name) num_rows = worksheet.nrows - 1 num_cells = worksheet.ncols - 1 curr_row = -2 while curr_row < num_rows: row = worksheet.row(curr_row) # print 'Row:', curr_row curr_row += 3 curr_cell = -1 while curr_cell < num_cells: curr_cell += 1 # Cell Types: 0=Empty, 1=Text, 2=Number, 3=Date, 4=Boolean, 5=Error, 6=Blank # cell_type = worksheet.cell_type(curr_row, curr_cell) try: cell_value = worksheet.cell_value(curr_row, curr_cell) print cell_value.encode('utf8') some_list.append(cell_value.encode('utf8')) except IndexError: pass # print type(some_list[0].encode('utf8')) tagcloud.make_cloud(''.join(some_list),'#epp2014')