def calculate():
    r = redis.Redis()  # Default connection settings on localhost

    follower_ids = list(
        r.smembers(getRedisIdByScreenName(SCREEN_NAME, 'follower_ids')))

    followers = r.mget([
        getRedisIdByUserId(follower_id, 'info.json')
        for follower_id in follower_ids
    ])
    followers = [json.loads(f) for f in followers if f is not None]

    freqs = {}
    for f in followers:
        cnt = f['followers_count']
        if not freqs.has_key(cnt):
            freqs[cnt] = []

        freqs[cnt].append({
            'screen_name': f['screen_name'],
            'user_id': f['id']
        })

    # It could take a few minutes to calculate freqs, so store a snapshot for later use

    r.set(getRedisIdByScreenName(SCREEN_NAME, 'follower_freqs'),
          json.dumps(freqs))

    keys = freqs.keys()
    keys.sort()

    print 'The top 10 followers from the sample:'

    field_names = ['Date', 'Count']
    pt = PrettyTable(field_names=field_names)
    pt.align = 'l'

    for (user, freq) in reversed([(user['screen_name'], k) for k in keys[-10:]
                                  for user in freqs[k]]):
        pt.add_row([user, pp(freq)])

    print pt

    all_freqs = [k for k in keys for user in freqs[k]]
    avg = reduce(lambda x, y: x + y, all_freqs) / len(all_freqs)

    print "\nThe average number of followers for %s's followers: %s" \
        % (SCREEN_NAME, pp(avg))
def calculate():
    r = redis.Redis()  # Default connection settings on localhost

    follower_ids = list(r.smembers(getRedisIdByScreenName(SCREEN_NAME,
                        'follower_ids')))

    followers = r.mget([getRedisIdByUserId(follower_id, 'info.json')
                       for follower_id in follower_ids])
    followers = [json.loads(f) for f in followers if f is not None]

    freqs = {}
    for f in followers:
        cnt = f['followers_count']
        if not freqs.has_key(cnt):
            freqs[cnt] = []

        freqs[cnt].append({'screen_name': f['screen_name'], 'user_id': f['id']})

    # It could take a few minutes to calculate freqs, so store a snapshot for later use

    r.set(getRedisIdByScreenName(SCREEN_NAME, 'follower_freqs'),
          json.dumps(freqs))

    keys = freqs.keys()
    keys.sort()

    print 'The top 10 followers from the sample:'

    fields = ['Date', 'Count']
    pt = PrettyTable(fields=fields)
    [pt.set_field_align(f, 'l') for f in fields]

    for (user, freq) in reversed([(user['screen_name'], k) for k in keys[-10:]
                                    for user in freqs[k]]):
        pt.add_row([user, pp(freq)])

    pt.printt()

    all_freqs = [k for k in keys for user in freqs[k]]
    avg = reduce(lambda x, y: x + y, all_freqs) / len(all_freqs)

    print "\nThe average number of followers for %s's followers: %s" \
        % (SCREEN_NAME, pp(avg))
def friendsFollowersInCommon(screen_names):
    print "method gets called with args: ", json.dumps(screen_names)
    r.sinterstore('temp$friends_in_common', 
    [getRedisIdByScreenName(screen_name, 'friends_ids')
        for screen_name in screen_names]
    )
    
    r.sinterstore("temp$followers_in_common", 
    [getRedisIdByScreenName(screen_name, 'follower_ids')
        for screen_name in screen_names]
    )
    
    print 'Friends in common for %s: %s' % (', '.join(screen_names),
    pp(r.scard('temp$friends_in_common')))
    
    print 'Followers in common for %s: %s' % (', '.join(screen_names), 
    pp(r.scard('temp$followers_in_common')))
    
    r.delete('temp$friends_in_common')
    r.delete('temp$followers_in_common')
def friendsFollowersInCommon(screen_names):
    r.sinterstore('temp$friends_in_common', 
                  [getRedisIdByScreenName(screen_name, 'friend_ids') 
                      for screen_name in screen_names]
                 )

    r.sinterstore('temp$followers_in_common',
                  [getRedisIdByScreenName(screen_name, 'follower_ids')
                      for screen_name in screen_names]
                 )

    print 'Friends in common for %s: %s' % (', '.join(screen_names),
            pp(r.scard('temp$friends_in_common')))

    print 'Followers in common for %s: %s' % (', '.join(screen_names),
            pp(r.scard('temp$followers_in_common')))

    # Clean up scratch workspace

    r.delete('temp$friends_in_common')
    r.delete('temp$followers_in_common')
def calculate():
    r = redis.Redis()  # Default connection settings on localhost

    follower_ids = list(r.smembers(getRedisIdByScreenName(SCREEN_NAME, "follower_ids")))

    followers = r.mget([getRedisIdByUserId(follower_id, "info.json") for follower_id in follower_ids])
    followers = [json.loads(f) for f in followers if f is not None]

    freqs = {}
    for f in followers:
        cnt = f["followers_count"]
        if not freqs.has_key(cnt):
            freqs[cnt] = []

        freqs[cnt].append({"screen_name": f["screen_name"], "user_id": f["id"]})

    # It could take a few minutes to calculate freqs, so store a snapshot for later use

    r.set(getRedisIdByScreenName(SCREEN_NAME, "follower_freqs"), json.dumps(freqs))

    keys = freqs.keys()
    keys.sort()

    print "The top 10 followers from the sample:"

    field_names = ["Date", "Count"]
    pt = PrettyTable(field_names=field_names)
    pt.align = "l"

    for (user, freq) in reversed([(user["screen_name"], k) for k in keys[-10:] for user in freqs[k]]):
        pt.add_row([user, pp(freq)])

    print pt

    all_freqs = [k for k in keys for user in freqs[k]]
    avg = reduce(lambda x, y: x + y, all_freqs) / len(all_freqs)

    print "\nThe average number of followers for %s's followers: %s" % (SCREEN_NAME, pp(avg))
コード例 #6
0
                     reverse=True):
    pt.add_row([k, v])

    if k == "100+":
        retweet_total += 100 * v
    elif k == 0:
        num_zero_retweets += v
    else:
        retweet_total += k * v

    num_tweets += v

print pt

print '\n%s of %s authored tweets were retweeted at least once' % \
    (pp(num_tweets - num_zero_retweets), pp(num_tweets),)
print '\t(%s tweet/retweet ratio)\n' % \
      (1.0*(num_tweets - num_zero_retweets)/num_tweets,)

print 'Those %s authored tweets generated %s retweets' % (
    pp(num_tweets),
    pp(retweet_total),
)

# <markdowncell>

# Example 5-13. Counting hashtag entities in tweets (the_tweet__avg_hashtags_per_tweet.py)

# <codecell>

import sys
コード例 #7
0
view.sync(db)

fields = ["Num Tweets", "Retweet Count"]
pt = PrettyTable(fields=fields)
[pt.set_field_align(f, "l") for f in fields]

retweet_total, num_tweets, num_zero_retweets = 0, 0, 0
for (k, v) in sorted(
    [(row.key, row.value) for row in db.view("index/retweets_by_id", group=True) if row.key is not None],
    key=lambda x: x[0],
    reverse=True,
):
    pt.add_row([k, v])

    if k == "100+":
        retweet_total += 100 * v
    elif k == 0:
        num_zero_retweets += v
    else:
        retweet_total += k * v

    num_tweets += v

pt.printt()

print "\n%s of %s authored tweets were retweeted at least once" % (pp(num_tweets - num_zero_retweets), pp(num_tweets))
print "\t(%s tweet/retweet ratio)\n" % (1.0 * (num_tweets - num_zero_retweets) / num_tweets,)

print "Those %s authored tweets generated %s retweets" % (pp(num_tweets), pp(retweet_total))
コード例 #8
0
                 key=lambda x: x[0], reverse=True):
    pt.add_row([k, v])

    if k == "100+":
        retweet_total += 100*v
    elif k == 0:
        num_zero_retweets += v
    else:
        retweet_total += k*v

    num_tweets += v

print pt

print '\n%s of %s authored tweets were retweeted at least once' % \
    (pp(num_tweets - num_zero_retweets), pp(num_tweets),)
print '\t(%s tweet/retweet ratio)\n' % \
      (1.0*(num_tweets - num_zero_retweets)/num_tweets,)

print 'Those %s authored tweets generated %s retweets' % (pp(num_tweets), pp(retweet_total),)

# <markdowncell>

# Example 5-13. Counting hashtag entities in tweets (the_tweet__avg_hashtags_per_tweet.py)

# <codecell>

import sys
import couchdb
from couchdb.design import ViewDefinition
コード例 #9
0
ファイル: Chapter5.py プロジェクト: anishpurohit/dropbox
                 key=lambda x: x[0], reverse=True):
    pt.add_row([k, v])

    if k == "100+":
        retweet_total += 100*v
    elif k == 0:
        num_zero_retweets += v
    else:
        retweet_total += k*v

    num_tweets += v

print pt

print '\n%s of %s authored tweets were retweeted at least once' % \
    (pp(num_tweets - num_zero_retweets), pp(num_tweets),)
print '\t(%s tweet/retweet ratio)\n' % \
      (1.0*(num_tweets - num_zero_retweets)/num_tweets,)

print 'Those %s authored tweets generated %s retweets' % (pp(num_tweets), pp(retweet_total),)

# <markdowncell>

# Example 5-13. Counting hashtag entities in tweets (the_tweet__avg_hashtags_per_tweet.py)

# <codecell>

import sys
import couchdb
from couchdb.design import ViewDefinition