def crawlmapper(screen_name):
        if  r.get(getRedisIdByScreenName(screen_name,'crawled_in_60min')) is None:
            friends_info = getFriendsBatch(screen_name,friends_limit)
            map(lambda x:
                    r.sadd(getRedisIdByScreenName(screen_name, 'friend_ids'), 
                           x['id']),
                friends_info)
            scard = r.scard(getRedisIdByScreenName(screen_name, 'friend_ids'))
            print >> sys.stderr, 'Fetched %s ids for %s' % (scard, screen_name)

            
            followers_info = getFollowersBatch(screen_name,followers_limit)
            map(lambda x:
                    r.sadd(getRedisIdByScreenName(screen_name, 'follower_ids'),
                           x['id']),
                followers_info)
            scard = r.scard(getRedisIdByScreenName(screen_name, 'follower_ids'))
            print >> sys.stderr, 'Fetched %s ids for %s' % (scard, screen_name)
            if friends_info==[] or followers_info==[]:
                r.set(getRedisIdByScreenName(screen_name,'crawled_in_60min'),'1')
                r.expire(getRedisIdByScreenName(screen_name,'crawled_in_60min'),3600)
        else:
            friends_info=map(RedisUserId2UserInfoWraper,
                             list(r.smembers(getRedisIdByScreenName(screen_name,'friend_ids'))))
            followers_info=map(RedisUserId2UserInfoWraper,
                               list(r.smembers(getRedisIdByScreenName(screen_name,'follower_ids'))))
        
        return map(lambda u1: u1['screen_name'],
                   filter(lambda info:
                              (info is not None and 
                               info['followers_count']<1000 and
                               info['friends_count']<1000), #filter Public Intellectual and Zombie
                          flat(map(samplemapper,
                                   [friends_info,followers_info],
                                   [friends_sample,followers_sample]))))
def calculate():
    r = redis.Redis()  # Default connection settings on localhost

    follower_ids = list(
        r.smembers(getRedisIdByScreenName(SCREEN_NAME, 'follower_ids')))

    followers = r.mget([
        getRedisIdByUserId(follower_id, 'info.json')
        for follower_id in follower_ids
    ])
    followers = [json.loads(f) for f in followers if f is not None]

    freqs = {}
    for f in followers:
        cnt = f['followers_count']
        if not freqs.has_key(cnt):
            freqs[cnt] = []

        freqs[cnt].append({
            'screen_name': f['screen_name'],
            'user_id': f['id']
        })

    # It could take a few minutes to calculate freqs, so store a snapshot for later use

    r.set(getRedisIdByScreenName(SCREEN_NAME, 'follower_freqs'),
          json.dumps(freqs))

    keys = freqs.keys()
    keys.sort()

    print 'The top 10 followers from the sample:'

    field_names = ['Date', 'Count']
    pt = PrettyTable(field_names=field_names)
    pt.align = 'l'

    for (user, freq) in reversed([(user['screen_name'], k) for k in keys[-10:]
                                  for user in freqs[k]]):
        pt.add_row([user, pp(freq)])

    print pt

    all_freqs = [k for k in keys for user in freqs[k]]
    avg = reduce(lambda x, y: x + y, all_freqs) / len(all_freqs)

    print "\nThe average number of followers for %s's followers: %s" \
        % (SCREEN_NAME, pp(avg))
def calculate():
    r = redis.Redis()  # Default connection settings on localhost

    follower_ids = list(r.smembers(getRedisIdByScreenName(SCREEN_NAME,
                        'follower_ids')))

    followers = r.mget([getRedisIdByUserId(follower_id, 'info.json')
                       for follower_id in follower_ids])
    followers = [json.loads(f) for f in followers if f is not None]

    freqs = {}
    for f in followers:
        cnt = f['followers_count']
        if not freqs.has_key(cnt):
            freqs[cnt] = []

        freqs[cnt].append({'screen_name': f['screen_name'], 'user_id': f['id']})

    # It could take a few minutes to calculate freqs, so store a snapshot for later use

    r.set(getRedisIdByScreenName(SCREEN_NAME, 'follower_freqs'),
          json.dumps(freqs))

    keys = freqs.keys()
    keys.sort()

    print 'The top 10 followers from the sample:'

    fields = ['Date', 'Count']
    pt = PrettyTable(fields=fields)
    [pt.set_field_align(f, 'l') for f in fields]

    for (user, freq) in reversed([(user['screen_name'], k) for k in keys[-10:]
                                    for user in freqs[k]]):
        pt.add_row([user, pp(freq)])

    pt.printt()

    all_freqs = [k for k in keys for user in freqs[k]]
    avg = reduce(lambda x, y: x + y, all_freqs) / len(all_freqs)

    print "\nThe average number of followers for %s's followers: %s" \
        % (SCREEN_NAME, pp(avg))
def friendsFollowersInCommon(screen_names):
    print "method gets called with args: ", json.dumps(screen_names)
    r.sinterstore('temp$friends_in_common', 
    [getRedisIdByScreenName(screen_name, 'friends_ids')
        for screen_name in screen_names]
    )
    
    r.sinterstore("temp$followers_in_common", 
    [getRedisIdByScreenName(screen_name, 'follower_ids')
        for screen_name in screen_names]
    )
    
    print 'Friends in common for %s: %s' % (', '.join(screen_names),
    pp(r.scard('temp$friends_in_common')))
    
    print 'Followers in common for %s: %s' % (', '.join(screen_names), 
    pp(r.scard('temp$followers_in_common')))
    
    r.delete('temp$friends_in_common')
    r.delete('temp$followers_in_common')
Ejemplo n.º 5
0
    def crawlmapper(screen_name):
        if r.get(getRedisIdByScreenName(screen_name,
                                        'crawled_in_60min')) is None:
            friends_info = getFriendsBatch(screen_name, friends_limit)
            map(
                lambda x: r.sadd(
                    getRedisIdByScreenName(screen_name, 'friend_ids'), x['id']
                ), friends_info)
            scard = r.scard(getRedisIdByScreenName(screen_name, 'friend_ids'))
            print >> sys.stderr, 'Fetched %s ids for %s' % (scard, screen_name)

            followers_info = getFollowersBatch(screen_name, followers_limit)
            map(
                lambda x: r.sadd(
                    getRedisIdByScreenName(screen_name, 'follower_ids'), x[
                        'id']), followers_info)
            scard = r.scard(getRedisIdByScreenName(screen_name,
                                                   'follower_ids'))
            print >> sys.stderr, 'Fetched %s ids for %s' % (scard, screen_name)
            if friends_info == [] or followers_info == []:
                r.set(getRedisIdByScreenName(screen_name, 'crawled_in_60min'),
                      '1')
                r.expire(
                    getRedisIdByScreenName(screen_name, 'crawled_in_60min'),
                    3600)
        else:
            friends_info = map(
                RedisUserId2UserInfoWraper,
                list(
                    r.smembers(
                        getRedisIdByScreenName(screen_name, 'friend_ids'))))
            followers_info = map(
                RedisUserId2UserInfoWraper,
                list(
                    r.smembers(
                        getRedisIdByScreenName(screen_name, 'follower_ids'))))

        return map(
            lambda u1: u1['screen_name'],
            filter(
                lambda info: (info is not None and info[
                    'followers_count'] < 1000 and info['friends_count'] < 1000
                              ),  #filter Public Intellectual and Zombie
                flat(
                    map(samplemapper, [friends_info, followers_info],
                        [friends_sample, followers_sample]))))
def friendsFollowersInCommon(screen_names):
    r.sinterstore('temp$friends_in_common', 
                  [getRedisIdByScreenName(screen_name, 'friend_ids') 
                      for screen_name in screen_names]
                 )

    r.sinterstore('temp$followers_in_common',
                  [getRedisIdByScreenName(screen_name, 'follower_ids')
                      for screen_name in screen_names]
                 )

    print 'Friends in common for %s: %s' % (', '.join(screen_names),
            pp(r.scard('temp$friends_in_common')))

    print 'Followers in common for %s: %s' % (', '.join(screen_names),
            pp(r.scard('temp$followers_in_common')))

    # Clean up scratch workspace

    r.delete('temp$friends_in_common')
    r.delete('temp$followers_in_common')
def calculate():
    r = redis.Redis()  # Default connection settings on localhost

    follower_ids = list(r.smembers(getRedisIdByScreenName(SCREEN_NAME, "follower_ids")))

    followers = r.mget([getRedisIdByUserId(follower_id, "info.json") for follower_id in follower_ids])
    followers = [json.loads(f) for f in followers if f is not None]

    freqs = {}
    for f in followers:
        cnt = f["followers_count"]
        if not freqs.has_key(cnt):
            freqs[cnt] = []

        freqs[cnt].append({"screen_name": f["screen_name"], "user_id": f["id"]})

    # It could take a few minutes to calculate freqs, so store a snapshot for later use

    r.set(getRedisIdByScreenName(SCREEN_NAME, "follower_freqs"), json.dumps(freqs))

    keys = freqs.keys()
    keys.sort()

    print "The top 10 followers from the sample:"

    field_names = ["Date", "Count"]
    pt = PrettyTable(field_names=field_names)
    pt.align = "l"

    for (user, freq) in reversed([(user["screen_name"], k) for k in keys[-10:] for user in freqs[k]]):
        pt.add_row([user, pp(freq)])

    print pt

    all_freqs = [k for k in keys for user in freqs[k]]
    avg = reduce(lambda x, y: x + y, all_freqs) / len(all_freqs)

    print "\nThe average number of followers for %s's followers: %s" % (SCREEN_NAME, pp(avg))
Ejemplo n.º 8
0
    t = login()

    r = redis.Redis()
    getFriends = functools.partial(_getFriendsOrFollowersUsingFunc, t.friends.ids, 'friend_ids', t, r)
    getFollowers = functools.partial(_getFriendsOrFollowersUsingFunc, t.followers.ids, 'follower_ids', t, r)
    screen_name = SCREEN_NAME

    # data retrieval
    print >> sys.stderr, 'Getting friends for %s...' % (screen_name, )
    getFriends(screen_name, limit=MAXINT)
    print >> sys.stderr, 'Getting followers for %s...' % (screen_name, )
    getFollowers(screen_name, limit=MAXINT)

    # redis calculations
    n_friends = r.scard(getRedisIdByScreenName(screen_name, 'friend_ids'))
    n_followers = r.scard(getRedisIdByScreenName(screen_name, 'follower_ids'))

    n_friends_diff_followers = r.sdiffstore('temp', [getRedisIdByScreenName(screen_name, 'friend_ids'),
                                                     getRedisIdByScreenName(screen_name, 'follower_ids')])
    r.delete('temp')
    n_followers_diff_friends = r.sdiffstore('temp', [getRedisIdByScreenName(screen_name, 'follower_ids'),
                                                     getRedisIdByScreenName(screen_name, 'friend_ids')])
    r.delete('temp')
    n_friends_inter_followers = r.sinterstore('temp', [getRedisIdByScreenName(screen_name, 'follower_ids'),
                                                       getRedisIdByScreenName(screen_name, 'friend_ids')])
    r.delete('temp')

    print '%s is following %s' % (screen_name, locale.format('%d', n_friends, True))
    print '%s is being followed by %s' % (screen_name, locale.format('%d', n_followers, True))
    print '%s of %s are not following %s back' % (locale.format('%d', n_friends_diff_followers, True),
            response = makeTwitterRequest(t.friends.ids,
                                          screen_name=screen_name,
                                          cursor=cursor)
            ids += response['ids']
            cursor = response['next_cursor']
            print >> sys.stderr, 'Fetched %i ids for %s' % (len(ids),
                                                            screen_name)
        except twitter.api.TwitterHTTPError, e:
            if e.e.code == 404:
                print >> sys.stderr, "404 Error with screen_name '%s'. Continuing." % screen_name
                break

    # Store the ids into Redis

    [
        r.sadd(getRedisIdByScreenName(screen_name, 'friend_ids'), _id)
        for _id in ids
    ]

    count += 1
    print >> sys.stderr, '\t\tFetched friends for %s / %s' % (
        count, len(screen_names))

print >> sys.stderr, 'Done fetching friend ids...'

####################################
# Resolve screen_names for user_ids
####################################

while len(screen_names) > 0:
    (screen_names_str, screen_names) = (','.join(screen_names[:100]),
getFollowers = functools.partial(_getFriendsOrFollowersUsingFunc,
                                 t.followers.ids, 'follower_ids', t, r)

screen_name = SCREEN_NAME

# get the data

print >> sys.stderr, 'Getting friends for %s...' % (screen_name, )
getFriends(screen_name, limit=MAXINT)

print >> sys.stderr, 'Getting followers for %s...' % (screen_name, )
getFollowers(screen_name, limit=MAXINT)

# use redis to compute the numbers

n_friends = r.scard(getRedisIdByScreenName(screen_name, 'friend_ids'))

n_followers = r.scard(getRedisIdByScreenName(screen_name, 'follower_ids'))

n_friends_diff_followers = r.sdiffstore('temp',
                                        [getRedisIdByScreenName(screen_name,
                                        'friend_ids'),
                                        getRedisIdByScreenName(screen_name,
                                        'follower_ids')])
r.delete('temp')

n_followers_diff_friends = r.sdiffstore('temp',
                                        [getRedisIdByScreenName(screen_name,
                                        'follower_ids'),
                                        getRedisIdByScreenName(screen_name,
                                        'friend_ids')])
import sys
import redis
import json
from twitter__util import getRedisIdByScreenName
from twitter__util import getRedisIdByUserId

EGO = sys.argv[1]

r = redis.Redis()
normalized_locations = []

friend_ids = list(r.smembers(getRedisIdByScreenName(EGO, 'friend_ids')))
ego_id = json.loads(r.get(getRedisIdByScreenName(EGO, 'info.json')))['id']

ids = [ego_id] + friend_ids

for user_id in ids:
	redis_id = getRedisIdByUserId(str(user_id), 'info.json')
	location_json = r.get(redis_id)
	if 	location_json:
		location = json.loads(location_json)['location']
		if location:
			normalized_location = location.lower().encode("utf-8")
			normalized_locations.append(normalized_location)
	
unique_locations = set(normalized_locations)

for ul in unique_locations:
	print ul
    while cursor != 0:
        try:
            response = makeTwitterRequest(t.friends.ids, 
                                          screen_name=screen_name, 
                                          cursor=cursor)
            ids += response['ids']
            cursor = response['next_cursor']
            print >> sys.stderr, 'Fetched %i ids for %s' % (len(ids), screen_name)
        except twitter.api.TwitterHTTPError, e:
            if e.e.code == 404:
                print >> sys.stderr, "404 Error with screen_name '%s'. Continuing." % screen_name
                break

    # Store the ids into Redis

    [r.sadd(getRedisIdByScreenName(screen_name, 'friend_ids'), _id) for _id in
     ids]

    count += 1
    print >> sys.stderr, '\t\tFetched friends for %s / %s' % (count, len(screen_names))

print >> sys.stderr, 'Done fetching friend ids...'

####################################
# Resolve screen_names for user_ids
####################################

while len(screen_names) > 0:
    (screen_names_str, screen_names) = (','.join(screen_names[:100]),
                                        screen_names[100:])
Ejemplo n.º 13
0
                                 t.followers.ids, 'follower_ids', t, r, limit=200)

screen_names = SCREEN_NAME
friends_ids = []
followers_ids = []
union = []
# get the data
for screen_name in screen_names:
	if screen_name != None:

		print >> sys.stderr, 'Getting friends for %s...' % (screen_name, )
		friends_ids = getFriends(screen_name, limit=200)
		print >> sys.stderr, 'Getting followers for %s...' % (screen_name, )
		followers_ids = getFollowers(screen_name, limit=200)
		# make union of friends and followers
		union = r.sunion([getRedisIdByScreenName(screen_name,'friends_ids'),getRedisIdByScreenName(screen_name, 'follower_ids')])
		# convert from set to list
		union = list(union)
		# we need just 200 of them
		union = union[1:200]

		# get info for all 200; needed for the location
		friends_info = getUserInfo(t, r, user_ids=union, sample=1.0)
		

		print "Now harvesting ", screen_name,"'s friends subgraphs"
		for current_friend in friends_info:
			if current_friend != None:

				print "+",current_friend['screen_name']," From ",
				if current_friend['location'] != None and current_friend['location']!= "" :
Ejemplo n.º 14
0
import sys
import redis
import networkx as nx
import json
import matplotlib.pyplot as plt
from twitter__util import getRedisIdByScreenName
from twitter__util import getRedisIdByUserId

r = redis.Redis()
graph = nx.Graph()

EGO = sys.argv[1]
EGO_ID = json.loads(r.get(getRedisIdByScreenName(EGO, 'info.json')))['id']
TEMP_UNION_KEY = 'temp$union'

# union EGO's friends and followers
r.sunionstore(TEMP_UNION_KEY, [getRedisIdByScreenName(EGO,'friend_ids'), getRedisIdByScreenName(EGO,'follower_ids')])
friend_follower_ids = list(r.smembers(TEMP_UNION_KEY))

# for each friend/follower :
for friend_follower_id in friend_follower_ids:
	# intersect friend/follower's friends with ego's union
	# NB: the getFriends function used for mining only uses screen_name keys
	try:
		screen_name = json.loads(r.get(getRedisIdByUserId(friend_follower_id, 'info.json')))['screen_name']
	except:
		continue
	intersecting_ids = list(r.sinter(TEMP_UNION_KEY, getRedisIdByScreenName(screen_name, 'friend_ids')))
	# add edge between EGO and the current friend/follower
	graph.add_edge(EGO_ID, friend_follower_id)
	# add edges between each id in the intersection and the id of the current friend/follower
import redis
import couchdb
import sys
from twitter__util import getRedisIdByScreenName
from twitter__util import getRedisIdByUserId

SCREEN_NAME = sys.argv[1]
THRESHOLD = int(sys.argv[2])

# Connect using default settings for localhost

r = redis.Redis()

# Compute screen_names for friends

friend_ids = r.smembers(getRedisIdByScreenName(SCREEN_NAME, 'friend_ids'))
friend_screen_names = []
for friend_id in friend_ids:
    try:
        friend_screen_names.append(json.loads(r.get(getRedisIdByUserId(friend_id,
                                   'info.json')))['screen_name'].lower())
    except TypeError, e:
        continue  # not locally available in Redis - look it up or skip it

# Pull the  list of (entity, frequency) tuples from CouchDB

server = couchdb.Server('http://localhost:5984')
db = server['tweets-user-timeline-' + SCREEN_NAME]

entities_freqs = sorted([(row.key, row.value) for row in
                        db.view('index/entity_count_by_doc', group=True)],
locale.setlocale(locale.LC_ALL, "")
t = login()
r = redis.Redis()

getFriends = functools.partial(_getFriendsOrFollowersUsingFunc, t.friends.ids, "friend_ids", t, r)
getFollowers = functools.partial(_getFriendsOrFollowersUsingFunc, t.followers.ids, "follower_ids", t, r)

screen_name = SCREEN_NAME

print >> sys.stderr, "Getting friends for %s... " % (screen_name,)
getFriends(screen_name, limit=MAXINT)
print >> sys.stderr, "Getting followers for %s..." % (screen_name,)
getFollowers(screen_name, limit=MAXINT)

# Number of Friends and Followers
n_friends = r.scard(getRedisIdByScreenName(screen_name, "friend_ids"))
n_followers = r.scard(getRedisIdByScreenName(screen_name, "follower_ids"))

#
n_friends_diff_followers = r.sdiffstore(
    "temp", [getRedisIdByScreenName(screen_name, "friend_ids"), getRedisIdByScreenName(screen_name, "follower_ids")]
)

r.delete("temp")

n_followers_diff_friends = r.sdiffstore(
    "temp", [getRedisIdByScreenName(screen_name, "follower_ids"), getRedisIdByScreenName(screen_name, "friend_ids")]
)

r.delete("temp")
import sys
import json
import networkx as nx
import redis

from twitter__util import getRedisIdByScreenName
from twitter__util import getRedisIdByUserId

SCREEN_NAME = sys.argv[1]

g = nx.Graph()
r = redis.Redis()

# Compute all ids for nodes appearing in the graph

friend_ids = list(r.smembers(getRedisIdByScreenName(SCREEN_NAME, 'friend_ids')))
id_for_screen_name = json.loads(r.get(getRedisIdByScreenName(SCREEN_NAME,
                                'info.json')))['id']
ids = [id_for_screen_name] + friend_ids

# Pickle the graph to disk...

if not os.path.isdir('out'):
    os.mkdir('out')

filename = os.path.join('out', SCREEN_NAME + '.gpickle')
nx.write_gpickle(g, filename)

print 'Pickle file stored in: %s' % filename

# You can un-pickle like so...
Ejemplo n.º 18
0
        try:
            response = makeTwitterRequest(t, 
                                          t.friends.ids, 
                                          screen_name=screen_name, 
                                          cursor=cursor)
            ids += response['ids']
            cursor = response['next_cursor']
            print >> sys.stderr, 'Fetched %i ids for %s' % (len(ids), screen_name)
        except twitter.api.TwitterHTTPError, e:
            if e.e.code == 404:
                print >> sys.stderr, "404 Error with screen_name '%s'. Continuing." % screen_name
                break

    # Store the ids into Redis

    [r.sadd(getRedisIdByScreenName(screen_name, 'friend_ids'), _id) for _id in
     ids]

    count += 1
    print >> sys.stderr, '\t\tFetched friends for %s / %s' % (count, len(screen_names))

print >> sys.stderr, 'Done fetching friend ids...'

####################################
# Resolve screen_names for user_ids
####################################

while len(screen_names) > 0:
    (screen_names_str, screen_names) = (','.join(screen_names[:100]),
                                        screen_names[100:])
Ejemplo n.º 19
0
MAXINT = sys.maxint

t = login()
r = redis.Redis()

# get info and friends for central user
getUserInfo(t,r,[SCREEN_NAME])
getFriends = functools.partial(_getFriendsOrFollowersUsingFunc, t.friends.ids, 'friend_ids', t, r)
getFollowers = functools.partial(_getFriendsOrFollowersUsingFunc, t.followers.ids, 'follower_ids', t, r)

# get friends and followers of central user
friend_ids = getFriends(SCREEN_NAME)
follower_ids = getFollowers(SCREEN_NAME)

# do union of friends and followers
ids = list(r.sunion(getRedisIdByScreenName(SCREEN_NAME,'friend_ids'), getRedisIdByScreenName(SCREEN_NAME,'follower_ids')))

# get user info for friends and followers
getUserInfo(t, r, user_ids=ids)

# get friends of friends and followers
for user_id in ids:
	screen_name = json.loads(r.get(getRedisIdByUserId(user_id, 'info.json')))['screen_name']
	try:
		getFriends(screen_name)
	except:
		continue



Ejemplo n.º 20
0
getFollowers = functools.partial(_getFriendsOrFollowersUsingFunc,
                                 t.followers.ids, 'follower_ids', t, r)

screen_name = SCREEN_NAME

# get the data

print >> sys.stderr, 'Getting friends for %s...' % (screen_name, )
getFriends(screen_name, limit=MAXINT)

print >> sys.stderr, 'Getting followers for %s...' % (screen_name, )
getFollowers(screen_name, limit=MAXINT)

# use redis to compute the numbers

n_friends = r.scard(getRedisIdByScreenName(screen_name, 'friend_ids'))

n_followers = r.scard(getRedisIdByScreenName(screen_name, 'follower_ids'))

n_friends_diff_followers = r.sdiffstore('temp', [
    getRedisIdByScreenName(screen_name, 'friend_ids'),
    getRedisIdByScreenName(screen_name, 'follower_ids')
])
r.delete('temp')

n_followers_diff_friends = r.sdiffstore('temp', [
    getRedisIdByScreenName(screen_name, 'follower_ids'),
    getRedisIdByScreenName(screen_name, 'friend_ids')
])
r.delete('temp')
Ejemplo n.º 21
0
getFollowers = functools.partial(_getFriendsOrFollowersUsingFunc,
                                 t.followers.ids, 'follower_ids', t, r)

screen_name = SCREEN_NAME

# get the data

print >> sys.stderr, 'Getting friends for %s...' % (screen_name, )
getFriends(screen_name, limit=MAXINT)

print >> sys.stderr, 'Getting followers for %s...' % (screen_name, )
getFollowers(screen_name, limit=MAXINT)

# use redis to compute the numbers

n_friends = r.scard(getRedisIdByScreenName(screen_name, 'friend_ids'))

n_followers = r.scard(getRedisIdByScreenName(screen_name, 'follower_ids'))

n_friends_diff_followers = r.sdiffstore('temp',
                                        [getRedisIdByScreenName(screen_name,
                                        'friend_ids'),
                                        getRedisIdByScreenName(screen_name,
                                        'follower_ids')])
r.delete('temp')

n_followers_diff_friends = r.sdiffstore('temp',
                                        [getRedisIdByScreenName(screen_name,
                                        'follower_ids'),
                                        getRedisIdByScreenName(screen_name,
                                        'friend_ids')])
Ejemplo n.º 22
0
import redis
import couchdb
import sys
from twitter__util import getRedisIdByScreenName
from twitter__util import getRedisIdByUserId

SCREEN_NAME = 'timoreilly'  # XXX: iPython Notebook cannot prompt for input
THRESHOLD = 15  # XXX: iPython Notebook cannot prompt for input

# Connect using default settings for localhost

r = redis.Redis()

# Compute screen_names for friends

friend_ids = r.smembers(getRedisIdByScreenName(SCREEN_NAME, 'friend_ids'))
friend_screen_names = []
for friend_id in friend_ids:
    try:
        friend_screen_names.append(
            json.loads(r.get(getRedisIdByUserId(
                friend_id, 'info.json')))['screen_name'].lower())
    except TypeError, e:
        continue  # not locally available in Redis - look it up or skip it

# Pull the  list of (entity, frequency) tuples from CouchDB

server = couchdb.Server('http://localhost:5984')
db = server['tweets-user-timeline-' + SCREEN_NAME]

entities_freqs = sorted(
import sys
import json
import networkx as nx
import redis

from twitter__util import getRedisIdByScreenName
from twitter__util import getRedisIdByUserId

SCREEN_NAME = sys.argv[1]

g = nx.Graph()
r = redis.Redis()

# Compute all ids for nodes appearing in the graph

friend_ids = list(r.smembers(getRedisIdByScreenName(SCREEN_NAME,
                                                    'friend_ids')))
id_for_screen_name = json.loads(
    r.get(getRedisIdByScreenName(SCREEN_NAME, 'info.json')))['id']
ids = [id_for_screen_name] + friend_ids

for current_id in ids:
    print >> sys.stderr, 'Processing user with id', current_id

    try:
        current_info = json.loads(
            r.get(getRedisIdByUserId(current_id, 'info.json')))
        current_screen_name = current_info['screen_name']
        friend_ids = list(
            r.smembers(
                getRedisIdByScreenName(current_screen_name, 'friend_ids')))
	for current_id in ids:
		print >> sys.stderr, 'Processing user with id', current_id

		try:
			raw_current_info = r.get(getRedisIdByUserId(current_id, 'info.json'
									  ))
			if not raw_current_info:
				# try to get it one more time
				print "Making req to Twitter API"
				os.system('python friends_followers__get_user_info_by_id.py ' + current_id)

			current_info = json.loads(r.get(getRedisIdByUserId(current_id, 'info.json'
									  )))

			current_screen_name = current_info['screen_name']
			friend_ids = list(r.smembers(getRedisIdByScreenName(current_screen_name,
							  'friend_ids')))

			# filter out ids for this person if they aren't also SCREEN_NAME's friends too, 
			# which is the basis of the query

			friend_ids = [fid for fid in friend_ids if fid in starting_ids] # TODO czy to nie dziala?
		except Exception, e:
			print >> sys.stderr, 'Problems with', current_id


		for friend_id in friend_ids:
			if friend_id in starting_ids:
				try:
					raw_friend_info = r.get(getRedisIdByUserId(friend_id, 'info.json'))

					if not raw_friend_info:
Ejemplo n.º 25
0
getFollowers = functools.partial(_getFriendsOrFollowersUsingFunc,
                                 t.followers.ids, 'follower_ids', t, r)

screen_name = SCREEN_NAME

# get the data

print >> sys.stderr, 'Getting friends for %s...' % (screen_name, )
getFriends(screen_name, limit=MAXINT)

print >> sys.stderr, 'Getting followers for %s...' % (screen_name, )
getFollowers(screen_name, limit=MAXINT)

# use redis to compute the numbers

n_friends = r.scard(getRedisIdByScreenName(screen_name, 'friend_ids'))

n_followers = r.scard(getRedisIdByScreenName(screen_name, 'follower_ids'))

n_friends_diff_followers = r.sdiffstore('temp',
                                        [getRedisIdByScreenName(screen_name,
                                        'friend_ids'),
                                        getRedisIdByScreenName(screen_name,
                                        'follower_ids')])
r.delete('temp')

n_followers_diff_friends = r.sdiffstore('temp',
                                        [getRedisIdByScreenName(screen_name,
                                        'follower_ids'),
                                        getRedisIdByScreenName(screen_name,
                                        'friend_ids')])