def crawl_followers(t, r, follower_ids, limit=1000000, depth=2): # Helper function def get_all_followers_ids(user_id, limit): cursor = -1 ids = [] while cursor != 0: response = make_twitter_request(t, t.followers.ids, user_id=user_id, cursor=cursor) if response is not None: ids += response['ids'] cursor = response['next_cursor'] print >> sys.stderr, 'Fetched %i total ids for %s' % (len(ids), user_id) # Consider storing the ids to disk during each iteration to provide an # an additional layer of protection from exceptional circumstances if len(ids) >= limit or response is None: break return ids for fid in follower_ids: next_queue = get_all_followers_ids(fid, limit) # Store a fid => next_queue mapping in Redis or other database of choice # In Redis, it might look something like this: rid = get_redis_id('follower_ids', user_id=fid) [r.sadd(rid, _id) for _id in next_queue] d = 1 while d < depth: d += 1 (queue, next_queue) = (next_queue, []) for _fid in queue: _follower_ids = get_all_followers_ids(user_id=_fid, limit=limit) # Store a fid => _follower_ids mapping in Redis or other # database of choice. In Redis, it might look something like this: rid = get_redis_id('follower_ids', user_id=fid) [r.sadd(rid, _id) for _id in _follower_ids] next_queue += _follower_ids
def crawl_followers(t, r, follower_ids, limit=1000000, depth=2): # Helper function def get_all_followers_ids(user_id, limit): cursor = -1 ids = [] while cursor != 0: response = make_twitter_request(t, t.followers.ids, user_id=user_id, cursor=cursor) if response is not None: ids += response['ids'] cursor = response['next_cursor'] print >> sys.stderr, 'Fetched %i total ids for %s' % (len(ids), user_id) # Consider storing the ids to disk during each iteration to provide an # an additional layer of protection from exceptional circumstances if len(ids) >= limit or response is None: break return ids for fid in follower_ids: next_queue = get_all_followers_ids(fid, limit) # Store a fid => next_queue mapping in Redis or other database of choice # In Redis, it might look something like this: rid = get_redis_id('follower_ids', user_id=fid) [ r.sadd(rid, _id) for _id in next_queue ] d = 1 while d < depth: d += 1 (queue, next_queue) = (next_queue, []) for _fid in queue: _follower_ids = get_all_followers_ids(user_id=_fid, limit=limit) # Store a fid => _follower_ids mapping in Redis or other # database of choice. In Redis, it might look something like this: rid = get_redis_id('follower_ids', user_id=fid) [ r.sadd(rid, _id) for _id in _follower_ids ] next_queue += _follower_ids
t = oauth_login() _id = str(t.users.show(screen_name=SCREEN_NAME)["id"]) g = nx.Graph() # An undirected graph r = redis.Redis() # Compute all ids for nodes appearing in the graph. Let's assume you've # adapted recipe__crawl to harvest all of the friends and friends' friends # for a user so that you can build a graph to inspect how these # friendships relate to one another # Create a collection of ids for a person and all of this person's friends ids = [_id] + list(r.smembers(get_redis_id("friend_ids", user_id=_id))) # Process each id in the collection such that edges are added to the graph # for each of current_id's friends if those friends are also # friends of SCREEN_NAME. In the end, you get an ego graph of # SCREEN_NAME and SCREEN_NAME's friends, but you also see connections that # existing amongst SCREEN_NAME's friends as well for current_id in ids: print >>sys.stderr, "Processing user with id", current_id try: friend_ids = list(r.smembers(get_redis_id("friend_ids", user_id=current_id))) friend_ids = [fid for fid in friend_ids if fid in ids] except Exception, e:
t = oauth_login() _id = str(t.users.show(screen_name=SCREEN_NAME)['id']) g = nx.Graph() # An undirected graph r = redis.Redis() # Compute all ids for nodes appearing in the graph. Let's assume you've # adapted recipe__crawl to harvest all of the friends and friends' friends # for a user so that you can build a graph to inspect how these # friendships relate to one another # Create a collection of ids for a person and all of this person's friends ids = [_id] + list(r.smembers(get_redis_id('friend_ids', user_id=_id))) # Process each id in the collection such that edges are added to the graph # for each of current_id's friends if those friends are also # friends of SCREEN_NAME. In the end, you get an ego graph of # SCREEN_NAME and SCREEN_NAME's friends, but you also see connections that # existing amongst SCREEN_NAME's friends as well for current_id in ids: print >> sys.stderr, 'Processing user with id', current_id try: friend_ids = list(r.smembers(get_redis_id('friend_ids', user_id=current_id))) friend_ids = [fid for fid in friend_ids if fid in ids] except Exception, e:
queue = {'1417479073': False} #bangdduck's id limit_level = 2; current_level = 1; while current_level <= limit_level: current_targets = []; for key, value in queue.iteritems(): if (value == False): current_targets.append(key) #start traverse for target_ids in current_targets: friends = get_friends(api, target_ids) #database works rid = get_redis_id('friends_ids', user_id = str(target_ids)) print target_ids [ r.sadd(rid, _id) for _id in friends] #for each user for iden in friends: edges.append([iden, target_ids]) if not queue.has_key(iden): queue.update({iden: False}) print 'saving {} on queue'.format(iden) #works.. for a user #now we just save screen_name in memory #info = get_information(api, iden) #informations.update({iden: info}) queue.update({target_ids: True})