예제 #1
0
 def process(self, tweet):
     Profiler.process(self, tweet)
     # gather a list of the tags in this tweet, lowercased
     savetweet = []
     for tag in tweet["entities"]["hashtags"]:
         t = tag["text"].lower()
         savetweet.append(t)
         # and increment count for this tag
         self.counts[t] += 1
     # add tag list to savetweets
     self.savetweets.append(savetweet)
예제 #2
0
 def process(self, tweet):
     Profiler.process(self, tweet)
     # gather a list of the tags in this tweet, lowercased
     savetweet = []
     for tag in tweet['entities']['hashtags']:
         t = tag['text'].lower()
         savetweet.append(t)
         # and increment count for this tag
         self.counts[t] += 1
     # add tag list to savetweets
     self.savetweets.append(savetweet)
예제 #3
0
 def process(self, tweet):
     Profiler.process(self, tweet)
     user = tweet["user"]["screen_name"]
     if self.mode == 'mentions':
         if "user_mentions" in tweet["entities"]:
             for mention in tweet["entities"]["user_mentions"]:
                 self.addlink(user, str(mention["screen_name"]))
     elif self.mode == 'replies':
         if not (tweet["in_reply_to_screen_name"] == None):
             self.addlink(tweet["in_reply_to_screen_name"], user)
     else:  # default mode: retweets
         if "retweeted_status" in tweet:
             self.addlink(user,
                          tweet["retweeted_status"]["user"]["screen_name"])
     # add to tweet count for this tag
     if not user in self.nodes:
         self.addsingle(user)
     self.nodes[user]["tweetcount"] += 1
예제 #4
0
 def process(self, tweet):
     Profiler.process(self, tweet)
     created_at = dateutil.parser.parse(tweet["created_at"])
     local_dt = self.tz.normalize(created_at.astimezone(tz))
     if self.intervalStr != '':
         if self.intervalUnit == "S":
             local_dt = local_dt - datetime.timedelta(
                 seconds=local_dt.second % int(self.intervalCount))
         elif self.intervalUnit == "M":
             local_dt = local_dt - datetime.timedelta(
                 minutes=local_dt.minute % int(self.intervalCount))
         elif self.intervalUnit == "H":
             local_dt = local_dt - datetime.timedelta(
                 hours=local_dt.hour % int(self.intervalCount))
     # otherwise use format to aggregate values - though this treats intervalCount as 1
     result = local_dt.strftime(self.format)
     if self.aggregate:
         self.items[result] = self.items.get(result, 0) + 1
     else:
         self.items.append(result)
예제 #5
0
 def process(self, tweet):
     Profiler.process(self, tweet)
예제 #6
0
 def process(self, tweet):
     Profiler.process(self, tweet)
예제 #7
0
opt_parser = optparse.OptionParser()
opt_parser.add_option("-o",
                      "--output",
                      dest="output",
                      type="str",
                      help="text | json (default: json)",
                      default="json")
opts, args = opt_parser.parse_args()

profiler = Profiler({"extended": True})

for line in fileinput.input(args):
    try:
        tweet = json.loads(line)
        profiler.process(tweet)
    except ValueError as e:
        sys.stderr.write("uhoh: %s\n" % e)

data = profiler.report()

if (opts.output == "json"):
    print data
else:
    print "Count:            " + '{:>9}'.format(str(data["count"]))
    print "Users:            " + '{:>9}'.format(str(data["usercount"]))
    print "User percentiles: " + sparkline.sparkify(data["userspercentiles"])
    print "                  " + str(data["userspercentiles"])
    print "Has hashtag:      " + '{:>9}'.format(str(
        data["hashtagcount"])) + " (" + str("%.2f" % (
            float(data["hashtagcount"]) / float(data["count"]) * 100.0)) + "%)"