Exemple #1
0
 def get(self):
   global term_freq
   
   term = self.get_argument("term")
   day = int(self.get_argument("day"))
   
   #Fetch all articles for the given day, with the given term
   datefilter = query.Term("pubdate", unicode("200704{0:02d}T000000".format(day)))
   day_articles = application.searcher_frequency.find("content", term, limit=9999, filter=datefilter)
   #day_articles = application.searcher_bm25f.find("pubdate", unicode("200704{0:02d}T000000".format(day)), limit=9999)
   
   #Combine all term frequencies
   tf = defaultdict(int)
   cont = []
   #titles = []
   for d in day_articles:
     #titles.append(d['title'])
     doc_freq = get_term_freq_doc(d['id'],application.searcher_cosine)
     cont.extend(extract_content_from_xml(get_relative_path(d['path'])))
     for k in doc_freq:
       tf[k] += doc_freq[k]
   #pprint(titles)
   
   #Generate a wordcloud for the combined articles
   tags = tagcloud.make_cloud(0, None, term_freq, ' '.join(cont), tf)
   
   #Load and show relevant template
   self.render("termcloud.html",term=term, day=day, tagcloud=tags)
Exemple #2
0
 def get(self):
   docid=self.get_argument("docid")
   res = application.searcher_bm25f.find("id", unicode(docid))
   path = get_relative_path(res[0]['path'])
   f = open(path, "r")
   lines = f.readlines()
   for l in lines:
     self.write(l)
   #self.write(tagcloud.print_cloud(['word1', 'word2']))
   #self.write(tagcloud.print_cloud(get_term_freq_doc(docid)))
   searcher = application.searcher_frequency
   x = get_term_freq_doc(docid, searcher)
   pprint(x)
   p = tagcloud.make_cloud(x)
   self.write(p)
Exemple #3
0
 def get(self):
   global term_freq, loc
   
   docid = self.get_argument("docid")
   res = application.searcher_bm25f.find("id", unicode(docid))
   path = get_relative_path(res[0]['path'])
   searcher = application.searcher_cosine
   
   #Find document title and body
   title = res[0]['title']
   cont = extract_content_from_xml(path)
   
   #Generate tag cloud, related articles and map
   tags = tagcloud.make_cloud(docid, searcher, term_freq, ' '.join(cont))
   rel = relatedarticles.find_related(docid, searcher, term_freq)
   (locs, map_link) = loc.find_locs_in_text(" ".join(cont), application.reader)
   
   #Load and show relevant template
   self.render("document.html",related=rel, title=title, content=cont, tagcloud=tags, maploc=map_link, locations=locs)
Exemple #4
0
    tweet_list = []
    x = 0
    user = api.get_user(handle)
    print ("Basic information for", user.name)
    print ("Screen Name:", user.screen_name)
    if handle == "@pes_pse":
        for page in range(1, pages):
            timeline = api.user_timeline(
                user_id=user.id, max_id=handles[handle]["max"], since_id=handles[handle]["since"], count=500, page=page
            )
            for tweet in timeline:
                x += 1
                if hashtag in tweet.text:
                    sheet1.write(5, 5, hashtag)
                for key in sheet_dict.keys():
                    if sheet_dict[key][2] != "twitter":
                        sheet1.write(sheet_dict[key][0] + x, sheet_dict[key][1], eval(sheet_dict[key][2]))
                    else:
                        sheet1.write(sheet_dict[key][0] + x, sheet_dict[key][1], sheet_dict[key][2])
                with open("hello.txt", "a") as file:
                    file.write(tweet.text.encode("utf8") + "\n")
                # print "Text:", tweet.text
                tweet_list.append(tweet.text.encode("utf8") + "/n")
                print "ID:", tweet.id
                # print "User ID:", tweet.user.id
                print "Created:", tweet.created_at
                # print "Retweeted:", tweet.retweeted
                # print "Retweet count:", tweet.retweet_count
        make_cloud("".join(tweet_list), handle)
wb.save("tweets2.xls")
__author__ = 'maikflow'
import xlrd
import tagcloud

some_list = []
workbook = xlrd.open_workbook('hashtags.xls')
worksheets = workbook.sheet_names()
for worksheet_name in worksheets:
    worksheet = workbook.sheet_by_name(worksheet_name)
    num_rows = worksheet.nrows - 1
    num_cells = worksheet.ncols - 1
    curr_row = -2
    while curr_row < num_rows:
        row = worksheet.row(curr_row)
        # print 'Row:', curr_row
        curr_row += 3
        curr_cell = -1
        while curr_cell < num_cells:
            curr_cell += 1
            # Cell Types: 0=Empty, 1=Text, 2=Number, 3=Date, 4=Boolean, 5=Error, 6=Blank
            # cell_type = worksheet.cell_type(curr_row, curr_cell)
            try:
                cell_value = worksheet.cell_value(curr_row, curr_cell)
                print cell_value.encode('utf8')
                some_list.append(cell_value.encode('utf8'))
            except IndexError:
                pass
# print type(some_list[0].encode('utf8'))
tagcloud.make_cloud(''.join(some_list), '#epp2014')
    x =0
    user = api.get_user(handle)
    # print ("Basic information for", user.name)
    print ("Screen Name:", user.screen_name)
    if handle == '@theprogressives':
        for page in range(1,pages):
            timeline = api.user_timeline(user_id=user.id,
                                         max_id=handles[handle]['max'],
                                         since_id=handles[handle]['since'],
                                         count=500,
                                         page=page)
            for tweet in timeline:
                x += 1
                if hashtag in tweet.text:
                    sheet1.write(5,5,hashtag)
                for key in sheet_dict.keys():
                    if sheet_dict[key][2] != 'twitter':
                        sheet1.write(sheet_dict[key][0]+x,sheet_dict[key][1],eval(sheet_dict[key][2]))
                    else:
                        sheet1.write(sheet_dict[key][0]+x,sheet_dict[key][1],sheet_dict[key][2])
                with open("hello.txt",'a') as file:
                    file.write(tweet.text.encode('utf8')+'\n')
                # print "Text:", tweet.text
                tweet_list.append(tweet.text.encode('utf8'))
                # print "ID:", tweet.id
                # print "User ID:", tweet.user.id
                # print "Created:", tweet.created_at
                # print "Retweeted:", tweet.retweeted
                # print "Retweet count:", tweet.retweet_count
        make_cloud(''.join(tweet_list),'/home/maikflow/Documents/python/twitter/wordclouds/'+handle)
# wb.save('tweets3.xls')
Exemple #7
0
    print("Screen Name:", user.screen_name)
    if handle == '@pes_pse':
        for page in range(1, pages):
            timeline = api.user_timeline(user_id=user.id,
                                         max_id=handles[handle]['max'],
                                         since_id=handles[handle]['since'],
                                         count=500,
                                         page=page)
            for tweet in timeline:
                x += 1
                if hashtag in tweet.text:
                    sheet1.write(5, 5, hashtag)
                for key in sheet_dict.keys():
                    if sheet_dict[key][2] != 'twitter':
                        sheet1.write(sheet_dict[key][0] + x,
                                     sheet_dict[key][1],
                                     eval(sheet_dict[key][2]))
                    else:
                        sheet1.write(sheet_dict[key][0] + x,
                                     sheet_dict[key][1], sheet_dict[key][2])
                with open("hello.txt", 'a') as file:
                    file.write(tweet.text.encode('utf8') + '\n')
                # print "Text:", tweet.text
                tweet_list.append(tweet.text.encode('utf8') + '/n')
                print "ID:", tweet.id
                # print "User ID:", tweet.user.id
                print "Created:", tweet.created_at
                # print "Retweeted:", tweet.retweeted
                # print "Retweet count:", tweet.retweet_count
        make_cloud(''.join(tweet_list), handle)
wb.save('tweets2.xls')
        for page in range(1, pages):
            timeline = api.user_timeline(user_id=user.id,
                                         max_id=handles[handle]['max'],
                                         since_id=handles[handle]['since'],
                                         count=500,
                                         page=page)
            for tweet in timeline:
                x += 1
                if hashtag in tweet.text:
                    sheet1.write(5, 5, hashtag)
                for key in sheet_dict.keys():
                    if sheet_dict[key][2] != 'twitter':
                        sheet1.write(sheet_dict[key][0] + x,
                                     sheet_dict[key][1],
                                     eval(sheet_dict[key][2]))
                    else:
                        sheet1.write(sheet_dict[key][0] + x,
                                     sheet_dict[key][1], sheet_dict[key][2])
                with open("hello.txt", 'a') as file:
                    file.write(tweet.text.encode('utf8') + '\n')
                # print "Text:", tweet.text
                tweet_list.append(tweet.text.encode('utf8'))
                # print "ID:", tweet.id
                # print "User ID:", tweet.user.id
                # print "Created:", tweet.created_at
                # print "Retweeted:", tweet.retweeted
                # print "Retweet count:", tweet.retweet_count
        make_cloud(
            ''.join(tweet_list),
            '/home/maikflow/Documents/python/twitter/wordclouds/' + handle)
# wb.save('tweets3.xls')
__author__ = 'maikflow'
import xlrd
import tagcloud

some_list = []
workbook = xlrd.open_workbook('hashtags.xls')
worksheets = workbook.sheet_names()
for worksheet_name in worksheets:
    worksheet = workbook.sheet_by_name(worksheet_name)
    num_rows = worksheet.nrows - 1
    num_cells = worksheet.ncols - 1
    curr_row = -2
    while curr_row < num_rows:
        row = worksheet.row(curr_row)
        # print 'Row:', curr_row
        curr_row += 3
        curr_cell = -1
        while curr_cell < num_cells:
            curr_cell += 1
            # Cell Types: 0=Empty, 1=Text, 2=Number, 3=Date, 4=Boolean, 5=Error, 6=Blank
            # cell_type = worksheet.cell_type(curr_row, curr_cell)
            try:
                cell_value = worksheet.cell_value(curr_row, curr_cell)
                print cell_value.encode('utf8')
                some_list.append(cell_value.encode('utf8'))
            except IndexError:
                pass
# print type(some_list[0].encode('utf8'))
tagcloud.make_cloud(''.join(some_list),'#epp2014')