Beispiel #1
0
def crawl_followers(t, r, follower_ids, limit=1000000, depth=2):

    # Helper function

    def get_all_followers_ids(user_id, limit):

        cursor = -1
        ids = []
        while cursor != 0:

            response = make_twitter_request(t,
                                            t.followers.ids,
                                            user_id=user_id,
                                            cursor=cursor)

            if response is not None:
                ids += response['ids']
                cursor = response['next_cursor']

            print >> sys.stderr, 'Fetched %i total ids for %s' % (len(ids),
                                                                  user_id)

            # Consider storing the ids to disk during each iteration to provide an
            # an additional layer of protection from exceptional circumstances

            if len(ids) >= limit or response is None:
                break

        return ids

    for fid in follower_ids:

        next_queue = get_all_followers_ids(fid, limit)

        # Store a fid => next_queue mapping in Redis or other database of choice
        # In Redis, it might look something like this:

        rid = get_redis_id('follower_ids', user_id=fid)
        [r.sadd(rid, _id) for _id in next_queue]

        d = 1
        while d < depth:
            d += 1
            (queue, next_queue) = (next_queue, [])
            for _fid in queue:
                _follower_ids = get_all_followers_ids(user_id=_fid,
                                                      limit=limit)

                # Store a fid => _follower_ids mapping in Redis or other
                # database of choice. In Redis, it might look something like this:

                rid = get_redis_id('follower_ids', user_id=fid)
                [r.sadd(rid, _id) for _id in _follower_ids]

                next_queue += _follower_ids
def crawl_followers(t, r, follower_ids, limit=1000000, depth=2):

    # Helper function

    def get_all_followers_ids(user_id, limit):

        cursor = -1
        ids = []
        while cursor != 0:

            response = make_twitter_request(t, t.followers.ids,
                                            user_id=user_id, cursor=cursor)

            if response is not None:
                ids += response['ids']
                cursor = response['next_cursor']

            print >> sys.stderr, 'Fetched %i total ids for %s' % (len(ids), user_id)

            # Consider storing the ids to disk during each iteration to provide an 
            # an additional layer of protection from exceptional circumstances

            if len(ids) >= limit or response is None:
                break

        return ids

    for fid in follower_ids:

        next_queue = get_all_followers_ids(fid, limit)

        # Store a fid => next_queue mapping in Redis or other database of choice
        # In Redis, it might look something like this:

        rid = get_redis_id('follower_ids', user_id=fid)
        [ r.sadd(rid, _id) for _id in next_queue ]

        d = 1
        while d < depth:
            d += 1
            (queue, next_queue) = (next_queue, [])
            for _fid in queue:
                _follower_ids = get_all_followers_ids(user_id=_fid, limit=limit)

                # Store a fid => _follower_ids mapping in Redis or other 
                # database of choice. In Redis, it might look something like this:

                rid = get_redis_id('follower_ids', user_id=fid)
                [ r.sadd(rid, _id) for _id in _follower_ids ] 

                next_queue += _follower_ids
t = oauth_login()

_id = str(t.users.show(screen_name=SCREEN_NAME)["id"])

g = nx.Graph()  # An undirected graph
r = redis.Redis()

# Compute all ids for nodes appearing in the graph. Let's assume you've
# adapted recipe__crawl to harvest all of the friends and friends' friends
# for a user so that you can build a graph to inspect how these
# friendships relate to one another

# Create a collection of ids for a person and all of this person's friends

ids = [_id] + list(r.smembers(get_redis_id("friend_ids", user_id=_id)))

# Process each id in the collection such that edges are added to the graph
# for each of current_id's friends if those friends are also
# friends of SCREEN_NAME. In the end, you get an ego graph of
# SCREEN_NAME and SCREEN_NAME's friends, but you also see connections that
# existing amongst SCREEN_NAME's friends as well

for current_id in ids:

    print >>sys.stderr, "Processing user with id", current_id

    try:
        friend_ids = list(r.smembers(get_redis_id("friend_ids", user_id=current_id)))
        friend_ids = [fid for fid in friend_ids if fid in ids]
    except Exception, e:
t = oauth_login()

_id = str(t.users.show(screen_name=SCREEN_NAME)['id'])

g = nx.Graph()      # An undirected graph
r = redis.Redis()

# Compute all ids for nodes appearing in the graph. Let's assume you've
# adapted recipe__crawl to harvest all of the friends and friends' friends
# for a user so that you can build a graph to inspect how these 
# friendships relate to one another

# Create a collection of ids for a person and all of this person's friends

ids = [_id] + list(r.smembers(get_redis_id('friend_ids', user_id=_id)))

# Process each id in the collection such that edges are added to the graph
# for each of current_id's friends if those friends are also
# friends of SCREEN_NAME. In the end, you get an ego graph of
# SCREEN_NAME and SCREEN_NAME's friends, but you also see connections that
# existing amongst SCREEN_NAME's friends as well

for current_id in ids:

    print >> sys.stderr, 'Processing user with id', current_id

    try:
        friend_ids = list(r.smembers(get_redis_id('friend_ids', user_id=current_id)))
        friend_ids = [fid for fid in friend_ids if fid in ids]
    except Exception, e:
Beispiel #5
0
	queue = {'1417479073': False} #bangdduck's id

	limit_level = 2;
	current_level = 1; 

	while current_level <= limit_level:
		current_targets = [];
		for key, value in queue.iteritems():
			if (value == False):
				current_targets.append(key)

		#start traverse
		for target_ids in current_targets:
			friends = get_friends(api, target_ids)
			#database works
			rid = get_redis_id('friends_ids', user_id = str(target_ids))
			print target_ids
			[ r.sadd(rid, _id) for _id in friends]
					
			#for each user		
			for iden in friends:
				edges.append([iden, target_ids])
				if not queue.has_key(iden):
					queue.update({iden: False})
					print 'saving {} on queue'.format(iden)
					#works.. for a user
					#now we just save screen_name in memory
					#info = get_information(api, iden)
					#informations.update({iden: info})

			queue.update({target_ids: True})