def insert_followers(uid, graph_db, client, version): # Getting data from twitter using API entry = twitter.get_followers(uid, 0, version, client) if(version==1): limit = int(entry['response']['x-ratelimit-remaining']) else: limit = int(entry['response']['x-rate-limit-remaining']) # Didn't get any followers. if(entry['followers'] == []): return [0, limit ] else: batch = neo4j.WriteBatch(graph_db) author = graph_db.get_indexed_node("users", "uid", str(uid)) # this should be true as the authors profiles have been created first. if(author): followers = entry['followers'] for fid in followers: batch.get_or_create_indexed_node("users","uid", fid ,{"uid" : fid,"is_author": "0"}) nodes = batch.submit() # Create the relations for n in nodes: batch.get_or_create_relationship(n, "follows", author) rels = batch.submit() return [len(followers), limit]
def insert_followers(uid, client, version): """ Get Followers ids of author from twitter using API. uid : User id of the author graph_db : neo4j graph client : client twitter API version : API version """ entry = twitter.get_followers(uid, 0, version, client) if (version == 1): limit = int(entry['response']['x-ratelimit-remaining']) else: limit = int(entry['response']['x-rate-limit-remaining']) if (entry['response']['status'] != '200'): return [entry, limit, 0] else: return [entry, limit, 1]
def insert_followers(uid, client, version): """ Get Followers ids of author from twitter using API. uid : User id of the author graph_db : neo4j graph client : client twitter API version : API version """ entry = twitter.get_followers(uid, 0, version, client) if(version==1): limit = int(entry['response']['x-ratelimit-remaining']) else: limit = int(entry['response']['x-rate-limit-remaining']) if(entry['response']['status'] != '200'): return [entry, limit, 0] else: return [entry, limit, 1]
def get_followers(version, app, fnewAuth, fdump): CONSUMER_KEY = app['c_key'] CONSUMER_SECRET = app['c_sec'] ACCESS_KEY = app['a_key'] ACCESS_SECRET = app['a_sec'] consumer = oauth.Consumer(key=CONSUMER_KEY, secret=CONSUMER_SECRET) access_token = oauth.Token(key=ACCESS_KEY, secret=ACCESS_SECRET) client = oauth.Client(consumer, access_token) ret =0;count =0;limit=0 line = fnewAuth.readline() while line: author = json.loads(line) uid = author['user_id'] # #GETTING FOLLOWERS FROM TWITTER by user_id entry = twitter.get_followers(uid,0,version,client) if(version==1): limit = int(entry['response']['x-ratelimit-remaining']) else: limit = int(entry['response']['x-rate-limit-remaining']) if(limit<3): endtime = datetime.now() ret =1 print "limit reached" break # DUMP FOLLOWERS IDs fdump.write(json.dumps(entry)+"\n") count+=1 line = fnewAuth.readline() # BREAKING LOOP BECAUSE FILES IS COMPLETE if(limit >=3): ret =2 return [ret, limit, count]
def collect_followers(version, app, dump_path): CONSUMER_KEY = app['c_key'] CONSUMER_SECRET = app['c_sec'] ACCESS_KEY = app['a_key'] ACCESS_SECRET = app['a_sec'] consumer = oauth.Consumer(key=CONSUMER_KEY, secret=CONSUMER_SECRET) access_token = oauth.Token(key=ACCESS_KEY, secret=ACCESS_SECRET) client = oauth.Client(consumer, access_token) raw_tweet = dump_path[0] followers_dump = dump_path[1] author_dump = dump_path[2] log_details = dump_path[4] fraw = open(raw_tweet,"r") flog = open(log_details,"a") fdump = open(followers_dump,"a") fauth = open(author_dump,"a") # ---------------------------------- # Skipping the lines to start from right place. start_from = find_start(followers_dump, raw_tweet) line="" for i in range(start_from): line = fraw.readline() # ---------------------------------- starttime = datetime.now() endtime ="" endtweet="" print "starting from the line number :",start_from count =1;limit =0;ret =0 line = fraw.readline() while line: count+=1 tweet = json.loads(line) uid = twitter.get_uid(tweet,"yahoo") tid = twitter.get_tweetid(tweet,"yahoo") endtweet = tid author_details = get_author_details(tweet,"yahoo", author_dump) #GETTING FOLLOWERS FROM TWITTER by user_id entry = twitter.get_followers(uid,0,version,client) if(version==1): limit = int(entry['response']['x-ratelimit-remaining']) else: limit = int(entry['response']['x-rate-limit-remaining']) sys.stdout.write("\rlimit x-ratelimit-remaining: %d The request number : %d" %(limit,count)) sys.stdout.flush() if(limit<3): endtime = datetime.now() ret =1 print "limit reached\n" break # Dumping followers ids and author_details entry["tweet_id"] = tid fdump.write(json.dumps(entry)+"\n") fauth.write(json.dumps(author_details)+"\n") #new line line = fraw.readline() fraw.close() fdump.close() fauth.close() if(limit >=3): ret =2 endtime = datetime.now() flog.write(raw_tweet+"\t"+str(starttime)+"\t"+str(endtime)+"\t"+str(endtweet)+"\t"+str(ret)+"\t"+str(count)+"\n") flog.close() return [ret, limit]
def get_followers_parallel(conn, username): followers = twitter.get_followers(username) conn.send(followers)
def get_followers_friends(version, app, c, start = 0): fr_dump = open("friends.txt", 'a') fo_dump = open("followers.txt", 'a') f_crawled = open("crawled.txt", 'a') f_log = open("log.txt", 'a') CONSUMER_KEY = app['c_key'] CONSUMER_SECRET = app['c_sec'] ACCESS_KEY = app['a_key'] ACCESS_SECRET = app['a_sec'] consumer = oauth.Consumer(key=CONSUMER_KEY, secret=CONSUMER_SECRET) access_token = oauth.Token(key=ACCESS_KEY, secret=ACCESS_SECRET) client = oauth.Client(consumer, access_token) ret =0;count =start;limit=20 for i in range(start, len(c)): uid = c[i] # #GETTING FOLLOWERS FROM TWITTER by user_id entry = twitter.get_followers(uid,0,version,client) #entry2 = twitter.get_followers(uid,0,version,client) limit = int(entry['response']['x-rate-limit-remaining']) #print entry #print entry2 if (str(entry['response']['status']) == '200'): fo_dump.write(json.dumps(entry)+"\n") f_crawled.write(str(uid) + "\n") else: f_log.write("followers: " +json.dumps(entry)) #if (str(entry2['response']['status']) == '200'): # fr_dump.write(json.dumps(entry)+"\n") # f_crawled.write(str(i) + "\n") #else: # f_log.write("followers: " + json.dumps(entry)) if(limit<3): endtime = datetime.now() ret =1 print "limit reached" break #print uid entry2 = twitter.get_followers(uid,0,version,client) limit = int(entry2['response']['x-rate-limit-remaining']) if (str(entry2['response']['status']) == '200'): fr_dump.write(json.dumps(entry)+"\n") f_crawled.write(str(uid) + "\n") else: f_log.write("followers: " + json.dumps(entry)) #print uid if(limit<3): endtime = datetime.now() ret =1 print "limit reached" break count+=1 # BREAKING LOOP BECAUSE FILES IS COMPLETE if(limit >=3): ret =2 fr_dump.close() fo_dump.close() f_crawled.close() f_log.close() return [ret, limit, count]