def calculate(): r = redis.Redis() # Default connection settings on localhost follower_ids = list( r.smembers(getRedisIdByScreenName(SCREEN_NAME, 'follower_ids'))) followers = r.mget([ getRedisIdByUserId(follower_id, 'info.json') for follower_id in follower_ids ]) followers = [json.loads(f) for f in followers if f is not None] freqs = {} for f in followers: cnt = f['followers_count'] if not freqs.has_key(cnt): freqs[cnt] = [] freqs[cnt].append({ 'screen_name': f['screen_name'], 'user_id': f['id'] }) # It could take a few minutes to calculate freqs, so store a snapshot for later use r.set(getRedisIdByScreenName(SCREEN_NAME, 'follower_freqs'), json.dumps(freqs)) keys = freqs.keys() keys.sort() print 'The top 10 followers from the sample:' field_names = ['Date', 'Count'] pt = PrettyTable(field_names=field_names) pt.align = 'l' for (user, freq) in reversed([(user['screen_name'], k) for k in keys[-10:] for user in freqs[k]]): pt.add_row([user, pp(freq)]) print pt all_freqs = [k for k in keys for user in freqs[k]] avg = reduce(lambda x, y: x + y, all_freqs) / len(all_freqs) print "\nThe average number of followers for %s's followers: %s" \ % (SCREEN_NAME, pp(avg))
def calculate(): r = redis.Redis() # Default connection settings on localhost follower_ids = list(r.smembers(getRedisIdByScreenName(SCREEN_NAME, 'follower_ids'))) followers = r.mget([getRedisIdByUserId(follower_id, 'info.json') for follower_id in follower_ids]) followers = [json.loads(f) for f in followers if f is not None] freqs = {} for f in followers: cnt = f['followers_count'] if not freqs.has_key(cnt): freqs[cnt] = [] freqs[cnt].append({'screen_name': f['screen_name'], 'user_id': f['id']}) # It could take a few minutes to calculate freqs, so store a snapshot for later use r.set(getRedisIdByScreenName(SCREEN_NAME, 'follower_freqs'), json.dumps(freqs)) keys = freqs.keys() keys.sort() print 'The top 10 followers from the sample:' fields = ['Date', 'Count'] pt = PrettyTable(fields=fields) [pt.set_field_align(f, 'l') for f in fields] for (user, freq) in reversed([(user['screen_name'], k) for k in keys[-10:] for user in freqs[k]]): pt.add_row([user, pp(freq)]) pt.printt() all_freqs = [k for k in keys for user in freqs[k]] avg = reduce(lambda x, y: x + y, all_freqs) / len(all_freqs) print "\nThe average number of followers for %s's followers: %s" \ % (SCREEN_NAME, pp(avg))
def friendsFollowersInCommon(screen_names): print "method gets called with args: ", json.dumps(screen_names) r.sinterstore('temp$friends_in_common', [getRedisIdByScreenName(screen_name, 'friends_ids') for screen_name in screen_names] ) r.sinterstore("temp$followers_in_common", [getRedisIdByScreenName(screen_name, 'follower_ids') for screen_name in screen_names] ) print 'Friends in common for %s: %s' % (', '.join(screen_names), pp(r.scard('temp$friends_in_common'))) print 'Followers in common for %s: %s' % (', '.join(screen_names), pp(r.scard('temp$followers_in_common'))) r.delete('temp$friends_in_common') r.delete('temp$followers_in_common')
def friendsFollowersInCommon(screen_names): r.sinterstore('temp$friends_in_common', [getRedisIdByScreenName(screen_name, 'friend_ids') for screen_name in screen_names] ) r.sinterstore('temp$followers_in_common', [getRedisIdByScreenName(screen_name, 'follower_ids') for screen_name in screen_names] ) print 'Friends in common for %s: %s' % (', '.join(screen_names), pp(r.scard('temp$friends_in_common'))) print 'Followers in common for %s: %s' % (', '.join(screen_names), pp(r.scard('temp$followers_in_common'))) # Clean up scratch workspace r.delete('temp$friends_in_common') r.delete('temp$followers_in_common')
def calculate(): r = redis.Redis() # Default connection settings on localhost follower_ids = list(r.smembers(getRedisIdByScreenName(SCREEN_NAME, "follower_ids"))) followers = r.mget([getRedisIdByUserId(follower_id, "info.json") for follower_id in follower_ids]) followers = [json.loads(f) for f in followers if f is not None] freqs = {} for f in followers: cnt = f["followers_count"] if not freqs.has_key(cnt): freqs[cnt] = [] freqs[cnt].append({"screen_name": f["screen_name"], "user_id": f["id"]}) # It could take a few minutes to calculate freqs, so store a snapshot for later use r.set(getRedisIdByScreenName(SCREEN_NAME, "follower_freqs"), json.dumps(freqs)) keys = freqs.keys() keys.sort() print "The top 10 followers from the sample:" field_names = ["Date", "Count"] pt = PrettyTable(field_names=field_names) pt.align = "l" for (user, freq) in reversed([(user["screen_name"], k) for k in keys[-10:] for user in freqs[k]]): pt.add_row([user, pp(freq)]) print pt all_freqs = [k for k in keys for user in freqs[k]] avg = reduce(lambda x, y: x + y, all_freqs) / len(all_freqs) print "\nThe average number of followers for %s's followers: %s" % (SCREEN_NAME, pp(avg))
reverse=True): pt.add_row([k, v]) if k == "100+": retweet_total += 100 * v elif k == 0: num_zero_retweets += v else: retweet_total += k * v num_tweets += v print pt print '\n%s of %s authored tweets were retweeted at least once' % \ (pp(num_tweets - num_zero_retweets), pp(num_tweets),) print '\t(%s tweet/retweet ratio)\n' % \ (1.0*(num_tweets - num_zero_retweets)/num_tweets,) print 'Those %s authored tweets generated %s retweets' % ( pp(num_tweets), pp(retweet_total), ) # <markdowncell> # Example 5-13. Counting hashtag entities in tweets (the_tweet__avg_hashtags_per_tweet.py) # <codecell> import sys
view.sync(db) fields = ["Num Tweets", "Retweet Count"] pt = PrettyTable(fields=fields) [pt.set_field_align(f, "l") for f in fields] retweet_total, num_tweets, num_zero_retweets = 0, 0, 0 for (k, v) in sorted( [(row.key, row.value) for row in db.view("index/retweets_by_id", group=True) if row.key is not None], key=lambda x: x[0], reverse=True, ): pt.add_row([k, v]) if k == "100+": retweet_total += 100 * v elif k == 0: num_zero_retweets += v else: retweet_total += k * v num_tweets += v pt.printt() print "\n%s of %s authored tweets were retweeted at least once" % (pp(num_tweets - num_zero_retweets), pp(num_tweets)) print "\t(%s tweet/retweet ratio)\n" % (1.0 * (num_tweets - num_zero_retweets) / num_tweets,) print "Those %s authored tweets generated %s retweets" % (pp(num_tweets), pp(retweet_total))
key=lambda x: x[0], reverse=True): pt.add_row([k, v]) if k == "100+": retweet_total += 100*v elif k == 0: num_zero_retweets += v else: retweet_total += k*v num_tweets += v print pt print '\n%s of %s authored tweets were retweeted at least once' % \ (pp(num_tweets - num_zero_retweets), pp(num_tweets),) print '\t(%s tweet/retweet ratio)\n' % \ (1.0*(num_tweets - num_zero_retweets)/num_tweets,) print 'Those %s authored tweets generated %s retweets' % (pp(num_tweets), pp(retweet_total),) # <markdowncell> # Example 5-13. Counting hashtag entities in tweets (the_tweet__avg_hashtags_per_tweet.py) # <codecell> import sys import couchdb from couchdb.design import ViewDefinition