def process(self, tweet): Profiler.process(self, tweet) # gather a list of the tags in this tweet, lowercased savetweet = [] for tag in tweet["entities"]["hashtags"]: t = tag["text"].lower() savetweet.append(t) # and increment count for this tag self.counts[t] += 1 # add tag list to savetweets self.savetweets.append(savetweet)
def process(self, tweet): Profiler.process(self, tweet) # gather a list of the tags in this tweet, lowercased savetweet = [] for tag in tweet['entities']['hashtags']: t = tag['text'].lower() savetweet.append(t) # and increment count for this tag self.counts[t] += 1 # add tag list to savetweets self.savetweets.append(savetweet)
def process(self, tweet): Profiler.process(self, tweet) user = tweet["user"]["screen_name"] if self.mode == 'mentions': if "user_mentions" in tweet["entities"]: for mention in tweet["entities"]["user_mentions"]: self.addlink(user, str(mention["screen_name"])) elif self.mode == 'replies': if not (tweet["in_reply_to_screen_name"] == None): self.addlink(tweet["in_reply_to_screen_name"], user) else: # default mode: retweets if "retweeted_status" in tweet: self.addlink(user, tweet["retweeted_status"]["user"]["screen_name"]) # add to tweet count for this tag if not user in self.nodes: self.addsingle(user) self.nodes[user]["tweetcount"] += 1
def process(self, tweet): Profiler.process(self, tweet) created_at = dateutil.parser.parse(tweet["created_at"]) local_dt = self.tz.normalize(created_at.astimezone(tz)) if self.intervalStr != '': if self.intervalUnit == "S": local_dt = local_dt - datetime.timedelta( seconds=local_dt.second % int(self.intervalCount)) elif self.intervalUnit == "M": local_dt = local_dt - datetime.timedelta( minutes=local_dt.minute % int(self.intervalCount)) elif self.intervalUnit == "H": local_dt = local_dt - datetime.timedelta( hours=local_dt.hour % int(self.intervalCount)) # otherwise use format to aggregate values - though this treats intervalCount as 1 result = local_dt.strftime(self.format) if self.aggregate: self.items[result] = self.items.get(result, 0) + 1 else: self.items.append(result)
def process(self, tweet): Profiler.process(self, tweet)
def process(self, tweet): Profiler.process(self, tweet)
opt_parser = optparse.OptionParser() opt_parser.add_option("-o", "--output", dest="output", type="str", help="text | json (default: json)", default="json") opts, args = opt_parser.parse_args() profiler = Profiler({"extended": True}) for line in fileinput.input(args): try: tweet = json.loads(line) profiler.process(tweet) except ValueError as e: sys.stderr.write("uhoh: %s\n" % e) data = profiler.report() if (opts.output == "json"): print data else: print "Count: " + '{:>9}'.format(str(data["count"])) print "Users: " + '{:>9}'.format(str(data["usercount"])) print "User percentiles: " + sparkline.sparkify(data["userspercentiles"]) print " " + str(data["userspercentiles"]) print "Has hashtag: " + '{:>9}'.format(str( data["hashtagcount"])) + " (" + str("%.2f" % ( float(data["hashtagcount"]) / float(data["count"]) * 100.0)) + "%)"