def get_users_data(users): # safety parameter to changing credentials files safety = 0 # getting access file access_file, safety = get_access_file(safety) # creating connection with Twitter API client = get_twitter_client(access_file) counter = 0 for user in users: if counter % 50 == 0: print(str(counter) + " DONE") counter += 1 try: # if user details already exists in database skip user users_collection.find_one({"_id": user})['Followers Count'] except: #otherwise try to download them try: account = client.get_user(user) user_details = get_user_details(account) average_per_day, average_per_hour = get_user_tweets_number( account, client) user_details['Tweets per day'] = average_per_day user_details['Tweets per hour'] = average_per_hour query = {"_id": user} # if users exists in database but without details # append details if users_collection.find_one({"_id": user}): values = {"$set": user_details} users_collection.update_one(query, values) # otherwise insert new user else: users_collection.insert_one(user_details) except tweepy.TweepError as e: # if Twitter API limit is reached, change access file if e.response.text == '{"errors":[{"message":"Rate limit exceeded","code":88}]}': print(e.response.text) print('CHANGING ACCESS FILE TO ' + str(safety) + '!') access_file, safety = get_access_file(safety) client = get_twitter_client(access_file) # try to download with new access file try: user_details = get_user_details(account) average_per_day, average_per_hour = get_user_tweets_number( account, client) user_details['Tweets per day'] = average_per_day user_details['Tweets per hour'] = average_per_hour query = {"_id": user} # if users exists in database but without details # append details if users_collection.find_one({"_id": user}): values = {"$set": user_details} users_collection.update_one(query, values) # otherwise insert new user else: users_collection.insert_one(user_details) except tweepy.TweepError: pass
def get_single_user(screen_name): # safety parameter to changing credentials files safety = 0 # getting access file access_file, safety = get_access_file(safety) # creating connection with Twitter API client = get_twitter_client(access_file) try: # if user details already exists in database get them users_collection.find_one({"_id": screen_name})['Verified'] user_details = users_collection.find_one({"_id": screen_name}) except: # otherwise download user details account = client.get_user(screen_name) user_details = get_user_details(account) average_per_day, average_per_hour = get_user_tweets_number( account, client) user_details['Tweets per day'] = average_per_day user_details['Tweets per hour'] = average_per_hour query = {"_id": screen_name} # if users exists in database but without details # append details if users_collection.find_one({"_id": screen_name}): values = {"$set": user_details} users_collection.update_one(query, values) # otherwise insert new user else: users_collection.insert_one(user_details) return user_details
def get_user_timeline(user): fname = 'temp_user_timeline_{}.jsonl'.format(user) client = get_twitter_client() print("Downloading timeline...") try: with open(fname, 'w') as f: for page in Cursor(client.user_timeline, screen_name=user, count=200).pages(16): sys.stdout.write(spinner.__next__()) sys.stdout.flush() sys.stdout.write('\b') for status in page: f.write(json.dumps(status._json) + '\n') print("Done.") with open('temp_user.json', 'w') as f: t = datetime.now() t = t.strftime("%A, %d. %B %Y %I:%M%p") data_user = {'user': user, 'date': t} f.write(json.dumps(data_user)) return True except Exception as e: print("Error:") print(e) return False
def get_all_tweets(screen_name): client = get_twitter_client() alltweets = [] new_tweets = client.user_timeline(screen_name=screen_name, count=200) alltweets.extend(new_tweets) oldest = alltweets[-1].id - 1 for i in range(0, 5): new_tweets = client.user_timeline(screen_name=screen_name, count=200, max_id=oldest) alltweets.extend(new_tweets) oldest = alltweets[-1].id - 1 jsonResults = { "screen_name": client.get_user(uscreen_name=screen_name).screen_name, "tweets": [] } for tweet in alltweets: tweet = { "id": tweet.id_str, "coordinates": tweet.coordinates, "text": tweet.text, "retweet_count": tweet.retweet_count, "created_at": tweet.created_at.isoformat(), "favorite_count": tweet.favorite_count, } jsonResults["tweets"].append(tweet) print(len(jsonResults["tweets"])) return jsonResults
def get_followers(username, max_followers, output_file): print("Getting <{}> followers".format(username)) client = get_twitter_client() max_pages = math.ceil(max_followers / 5000) count = 0 json_file = output_file.split('.')[0] + '_full.json' with open(json_file, 'w') as json_output: with progressbar.ProgressBar( max_value=progressbar.UnknownLength) as bar: for followers in Cursor(client.followers_ids, screen_name=username).pages(max_pages): for chunk in paginate(followers, 100): try: users = client.lookup_users(user_ids=chunk) for user in users: user_info = user._json screen_name = user_info['screen_name'] with open(output_file, 'a') as txt_output: txt_output.write(screen_name + '\n') json_output.write(json.dumps(user._json) + '\n') count += 1 bar.update(count) except: pass if len(followers) == 5000: time.sleep(60) print("<{}> followers completed".format(username)) time.sleep(60)
def check_by_tweets(screen_name, access_file): client = get_twitter_client(access_file) tweets = [] positive = [] negative = [] ids = [] dates = [] check = tweets_collection.count_documents({"_id": screen_name}) if check == 1: print('Already in database!') else: try: current_user = client.get_user(screen_name) if not current_user.protected: for page in Cursor(client.user_timeline, screen_name=screen_name, count=200).pages(16): for status in page: ids.append(str(status.id)) dates.append(status.created_at) tweets.append(status.text) tweet_data = { "_id": screen_name, screen_name: { "_id": ids, "Text": tweets, "Created at": dates } } tweets_collection.insert_one(tweet_data) else: tweet_data = {"_id": screen_name} tweets_collection.insert_one(tweet_data) except tweepy.error.TweepError: print(screen_name + ' NOT EXISTS!') for tweet in tweets: result = check_tweet(tweet) if result: positive.append(1) else: negative.append(1) if len(positive) > len(negative): return True else: return False
def pull_timeline(user): client = get_twitter_client() file_name = "tweets_{}.json".format(user) with open(file_name, 'w') as f: for page in Cursor(client.user_timeline, screen_name=user, count=200).pages(): for status in page: f.write(json.dumps(status._json) + '\n') return file_name
def get_timeline(): user = sys.argv[1] client = get_twitter_client() m = re.compile( r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+' ) #Storing some of the recent tweets into a file in json format fname = "user_timeline_{}.json1".format(user) with open(fname, 'w') as f: for page in Cursor(client.user_timeline, screen_name=user, count=50).pages(4): for status in page: f.write(json.dumps(status._json) + "\n") f.close() with sqlite3.connect("tweet.db") as con: cur = con.cursor() cur.execute("DROP TABLE tweeturl") cur.execute( "CREATE TABLE tweeturl(tid INT PRIMARY KEY NOT NULL,url TEXT NOT NULL,created_at TEXT)" ) con.commit() #con.close() tweets_file = open(fname, "r") for line in tweets_file: #opening the above file accessing the required data like tweet id,tweet url and tweet time into the database try: tweet = json.loads(line.strip()) if 'text' in tweet: u = tweet['user']['id'] print(u) i = tweet['id'] #print(i) t = tweet['created_at'] #print(t) p = m.findall(tweet['text']) #print(p) try: with sqlite3.connect("tweet.db") as con: cur = con.cursor() cur.execute( "INSERT INTO tweeturl(tid,url,created_at)VALUES(?,?,?)", (i, p[0], t)) con.commit() print("Record successfully added") except: con.rollback() print("error in insert operation") con.close() except: continue
def get_sinlge_user(screen_name, access_file): client = get_twitter_client(access_file) account = client.get_user(screen_name) users_data = get_user_data(account) try: users_collection.insert_one(users_data) except pymongo.errors.DuplicateKeyError: pass return users_data
def twitter_get_timeline2(user_list, numbers_of_tweets, dir_name): client = get_twitter_client() n = numbers_of_tweets dirname = "tweets/{}".format(dir_name) os.makedirs(dirname, mode=0o755, exist_ok=True) for x in range(0, len(user_list), 20): chunks = user_list[x:x+20] for entry in chunks: fname = "{}/user_timeline_{}.jsonl".format(dirname, entry) with open(fname, 'w') as f: for tweet in Cursor(client.user_timeline, screen_name = entry).items(n): f.write(json.dumps(tweet._json)+"\n") print(entry)
def getting_timeline(user): client = get_twitter_client() #Seems to be the rate limiting step fname = "user_timeline_{}.jsonl".format(user) with open(fname, 'w') as f: for page in Cursor( client.user_timeline, screen_name=user, count=200).pages(16): #Up to 3200 Tweets (Twitter limit?*) counter = 0 for status in page: if counter % 4 == 0: #Tried collecting smaller subsections of Tweets. f.write(json.dumps(status._json) + "\n") counter += 1 else: counter += 1
def user_search(user_list): client = get_twitter_client() #Load user IDs dataset = pd.read_csv(user_list, encoding = "ISO-8859-1") x = dataset.iloc[:,1] ids = [] for id in x: ids.append(id) #ACCESS THE LOOKUP_USER METHOD OF THE TWITTER API -- GRAB INFO ON UP TO 100 IDS WITH EACH API CALL #THE VARIABLE USERS IS A JSON FILE WITH DATA ON THE 32 TWITTER USERS LISTED for x in range(0, len(ids), 100): chunks = ids[x:x+100] for user in chunks: profile = client.get_user(screen_name = user) users.append(profile._json) print(user)
def pull_timeline(user_name): client = get_twitter_client() file_name = "tweets_{}.json".format(user_name) path = os.path.join(root + file_name) if user_name not in handle_dict: print("You're going to want to add this to handle_file_dict:\n") print("'{}' : '{}'".format(user_name, file_name)) with open(path, 'w') as f: for page in Cursor(client.user_timeline, screen_name=user_name, tweet_mode='extended', count=200).pages(): for status in page: f.write(json.dumps(status._json) + '\n') return file_name
def get_links(database, collection): api = get_twitter_client() client = MongoClient() db = client[database] col = db[collection] for user in col.find(no_cursor_timeout=True): current_user = api.get_user(user['_id']) friends = current_user.friends() for friend in friends: for existing_user in col.find(): if friend.screen_name == existing_user['_id']: print("Creating link between " + user['_id'] + " and " + friend.screen_name) new_link = LinksHS(user_screen_name=user['_id'], user_id=user['id_str'], friend_screen_name=friend.screen_name, friend_id=friend.id_str) new_link.save() time.sleep(61)
def get_all_tweets(user_id): client = get_twitter_client() alltweets = [] new_tweets = client.user_timeline(user_id=user_id, count=200) alltweets.extend(new_tweets) oldest = alltweets[-1].id - 1 for i in range(0,5): new_tweets = client.user_timeline(user_id=user_id, count=200, max_id=oldest) alltweets.extend(new_tweets) oldest = alltweets[-1].id - 1 ''' #for csv writing: outtweets = [ [tweet.coordinates, tweet.text.encode("utf-8"), tweet.favorited, tweet.place, tweet.id_str, tweet.created_at, tweet.user.location.encode("utf-8")] for tweet in alltweets] ''' jsonResults = { "screen_name": client.get_user(user_id=user_id).screen_name, "total_tweets": len(alltweets), "followers_count": client.get_user(user_id=user_id).followers_count, "friends_count": client.get_user(user_id=user_id).friends_count, "total_favorite_count": client.get_user(user_id=user_id).favourites_count, "tweets": [] } for tweet in alltweets: tweet = { "id": tweet.id_str, "coordinates": tweet.coordinates, "text": tweet.text.encode("utf-8"), "retweet_count": tweet.retweet_count, "created_at": tweet.created_at.isoformat(), "favorite_count": tweet.favorite_count, } jsonResults["tweets"].append(tweet) with open('/Users/Eric/Documents/EE695/specialProject/jsonFiles/user_timelines/%s_tweets.json' % user_id, 'w') as f: f.write(json.dumps(jsonResults, indent=4)) f.close()
def get_number(screen_name, access_file): client = get_twitter_client(access_file) isProtected = client.get_user(screen_name).protected max_pages = math.ceil(MAX_FRIENDS / 5000) # get data for a given user group1 = set() group2 = set() people = set() counter = 0 if not isProtected: for followers in Cursor(client.followers_ids, screen_name=screen_name, count=1500).pages(max_pages): for chunk in paginate(followers, 100): users = client.lookup_users(user_ids=chunk) for user in users: people.add(json.dumps(user._json)) group1.add(json.dumps(user._json)) counter += len(followers) for friends in Cursor(client.friends_ids, screen_name=screen_name, count=1500).pages(max_pages): for chunk in paginate(friends, 100): users = client.lookup_users(user_ids=chunk) for user in users: people.add(json.dumps(user._json)) group2.add(json.dumps(user._json) + "\n") counter += len(friends) return len(people)
def paginate(items, n): """ Generate n-sized chunks from items """ for i in range(0, len(items), n): yield items[i:i + n] if __name__ == '__main__': if len(sys.argv) != 2: usage() sys.exit(1) screen_name = sys.argv[1] client = get_twitter_client() dirname = "users/{}".format(screen_name) max_pages = math.ceil(MAX_FRIENDS / 5000) try: os.makedirs(dirname, mode=0o755, exist_ok=True) except OSError: print("Directory {} already exists".format(dirname)) except Exception as e: print("Error while creating directory {}".format(dirname)) print(e) sys.exit(1) # get followers for a given user fname = "users/{}/followers.jsonl".format(screen_name) with open(fname, 'w') as f: for followers in Cursor(client.followers_ids,
def fetch_tweets(kwd, since_id, channel, redis_conf): """ :param kwd: :param since_id: :param channel: :param redis_conf: :return: """ r = redis_conf['cursor'] key = redis_conf['key'] api, credential_id = get_twitter_client(r, key) if not api: logger.info(f"{credential_id} failed ...using another one ...") api, credential_id = get_twitter_client(r, key) keyword = kwd['kwd'] keyword = f'"{keyword} "' + config.get('FETCHER', 'FILTER') page_remaining = int(config.get('FETCHER', 'PAGE_LIMIT')) tweets_cursor = Cursor(api.search, q=keyword, count=100, since_id=since_id, tweet_mode='extended').pages(page_remaining) page_index = 0 retry = 0 t_id = 0 _sleep = 0 sleep_delay = int(config.get('FETCHER', 'SLEEP')) retry_limit = int(config.get('FETCHER', 'RETRY_LIMIT')) while True: try: print(kwd, page_index) tweets, t_id = process_page(tweets_cursor.next(), kwd, page_index) feed_saver_new_keyword_tweets(channel, tweets) page_index += 1 page_remaining = int(config.get('FETCHER', 'PAGE_LIMIT')) - page_index # sleep(1) except StopIteration: if page_index == 0: # No Tweets Found data = {'status': 404, 'k_id': kwd['k_id']} feed_saver_new_keyword_tweets(channel, data) else: # last packet for this kwd so that saver can update scheduled_on data = {'status': 202, 'k_id': kwd['k_id']} feed_saver_new_keyword_tweets(channel, data) # Change credential & lpush current credential id r.lpush(key, credential_id) return True except TweepError as error: logger.error( f"Tweepy Exception occurred for credential id {credential_id} : {error}" ) # Change credential & lpush current credential id r.lpush(key, credential_id) retry += 1 if retry <= retry_limit: logger.info(f"Retrying for keyword {kwd['kwd']}") _sleep += sleep_delay sleep(_sleep) api, credential_id = get_twitter_client(r, key) tweets_cursor = Cursor( api.search, q=keyword, count=100, since_id=since_id, max_id=t_id, tweet_mode='extended').pages(page_remaining) continue # finally after retries data = {'status': 500, 'k_id': kwd['k_id']} feed_saver_new_keyword_tweets(channel, data) return False except Exception as e: # push keyword in queue & maintain log logger.error( f"Exception occurred for keyword {kwd['kwd']}. Exception : {e}" ) retry += 1 # Change credential & lpush current credential id r.lpush(key, credential_id) if retry <= retry_limit: _sleep += sleep_delay logger.info(f"Retrying for keyword {kwd['kwd']}") api, credential_id = get_twitter_client(r, key) tweets_cursor = Cursor( api.search, q=keyword, count=100, since_id=since_id, max_id=t_id, tweet_mode='extended').pages(page_remaining) continue data = {'status': 500, 'k_id': kwd['k_id']} feed_saver_new_keyword_tweets(channel, data) return False
import tweepy from textblob import TextBlob from twitter_client import get_twitter_client api = get_twitter_client() # Retrieve Tweets public_tweets = api.search('#magicleap') for tweet in public_tweets: print(tweet.text) analysis = TextBlob(tweet.text) print(analysis.sentiment)
from twitter_client import get_twitter_client MAX_FRIENDS = 15000 def usage(): print("Usage:") print("python {} <username>".format(" aaa ")) def paginate(items, n): """Generate n-sized chunks from items""" for i in range(0, len(items), n): yield items[i:i+n] if __name__ == '__main__': screen_name = "richarddeng88" # "jtimberlake" client = get_twitter_client() dirname = "users/{}".format(screen_name) max_pages = math.ceil(MAX_FRIENDS / 5000) try: os.makedirs(dirname, mode=0o755, exist_ok=True) except OSError: print("Directory {} already exists".format(dirname)) except Exception as e: print("Error while creating directory {}".format(dirname)) print(e) sys.exit(1) # get followers for a given user fname = "users/{}/followers.jsonl".format(screen_name) print fname with open(fname, 'w') as f:
def get_info(twitter_user): # screen_name = sys.argv[1] screen_name = twitter_user client = get_twitter_client() dirname = "users/{}".format(screen_name) max_pages = math.ceil(MAX_FRIENDS / 5000) try: os.makedirs(dirname, mode=0o755, exist_ok=True) except OSError: print("Directory {} already exists".format(dirname)) except Exception as e: print("Error while creating directory{}".format(dirname)) print(e) sys.exit(1) print('Extrayendo a {} \n'.format(screen_name)) # get followers for a given user fjson = "users/{}/followers.jsonl".format(screen_name) fcsv = "users/{}/followers.csv".format(screen_name) with open(fjson, 'w') as f1, open(fcsv, 'w') as f2: for followers in Cursor(client.followers_ids, screen_name=screen_name).pages(max_pages): for chunk in paginate(followers, 100): users = client.lookup_users(user_ids=chunk) # out = [[user.created_at, user.id, user.screen_name,user.name, user.description, user.location] for user in users] out = [[user.id, user.screen_name] for user in users] writer = csv.writer(f2) writer.writerow(["id", "screen_name"]) writer.writerows(out) for user in users: f1.write(json.dumps(user._json) + "\n") if len(followers) == 5000: print( "Followers: More results available. Sleeping for 60seconds to avoid rate limit" ) time.sleep(60) # get friends for a given user fjson = "users/{}/friends.jsonl".format(screen_name) fcsv = "users/{}/friends.csv".format(screen_name) with open(fjson, 'w') as f1, open(fcsv, 'w') as f2: for friends in Cursor(client.friends_ids, screen_name=screen_name).pages(max_pages): for chunk in paginate(friends, 100): users = client.lookup_users(user_ids=chunk) # out = [[user.created_at, user.id, user.screen_name,user.name, user.description, user.location] for user in users] # writer = csv.writer(f2) # writer.writerow(["id","screen_name"]) # writer.writerows(out) for user in users: f1.write(json.dumps(user._json) + "\n") if len(friends) == 5000: print( "Friends: More results available. Sleeping for 60 seconds to avoid rate limit" ) time.sleep(60) # get user's profile fname = "users/{}/user_profile.json".format(screen_name) with open(fname, 'w') as f: profile = client.get_user(screen_name=screen_name) f.write(json.dumps(profile._json, indent=4))
except KeyError: store[value] = 1 return if __name__ == '__main__': fname = 'data/for_tweets.txt' tmpname = 'data/tmptwt.txt' access_file = sys.argv[1] screen_name = sys.argv[2] period = sys.argv[3] if period == 'all': client = get_twitter_client(access_file) counter = 0 tweets = [] dates = Counter() current_user = client.get_user(screen_name) if not current_user.protected: for page in Cursor(client.user_timeline, screen_name=screen_name, count=200).pages(16): for status in page: tweets.append(status.created_at.year) counter += 1 a = Counter(sorted(tweets))
def convert_valid(one_char): """Convert a character into '_' if "invalid". Return: string """ valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits) if one_char in valid_chars: return one_char else: return '_' def get_tweets(query_fname, auth, max_time, location=None): stop = datetime.now() + max_time twitter_stream = Stream(auth, CustomListener(query_fname)) while datetime.now() < stop: if location: twitter_stream.filter( locations=[ 11.94, -13.64, 30.54, 5.19], is_async=True) else: twitter_stream.filter(track=query, is_async=True) if __name__ == '__main__': query = sys.argv[1:] # list of CLI arguments query_fname = ' '.join(query) # string auth = get_twitter_auth() location = get_location(get_twitter_client(), 'Congo') get_tweets(query_fname, auth, timedelta(minutes=30), location=location)
def find_reply_count(user_name, tweet_ids): client = get_twitter_client() searched_tweets = [status for status in ]
return False else: sys.stderr.write("Error {}\n".format(status)) return True tweet_listener = TweetListener() auth = get_twitter_auth() stream = Stream(auth, tweet_listener) stream.filter(track=topics, locations=locations) return Observable.create(observe_tweets).share() topics = [ 'RDC', 'RDCongo', 'DRC', 'DRCongo', ] coordinates = get_location(get_twitter_client(), 'Congo').get('coordinates') location_tweets = tweet_for(topics=[], locations=[11.94, -13.64, 30.54, 5.19]) hash_tag_tweets = tweet_for(topics=topics) place_id_tweet = tweet_for(topics=topics, locations=coordinates) combine_loc_hash_tag = Observable.merge(hash_tag_tweets) ( combine_loc_hash_tag.subscribe() )
def get_user(screen_name, access_file): client = get_twitter_client(access_file) account = client.get_user(screen_name) max_pages = math.ceil(MAX_FRIENDS / 5000) # get data for a given user source = [] target = [] direction = [] connections = set() users_data = get_user_data(account) try: users_collection.insert_one(users_data) except pymongo.errors.DuplicateKeyError: pass counter = 0 if not account.protected: for followers in Cursor(client.followers_ids, screen_name=screen_name, count=1500).pages(max_pages): for chunk in paginate(followers, 100): users = client.lookup_users(user_ids=chunk) for user in users: users_data = get_user_data(user) source.append(str(account.id)) target.append(str(user.id)) direction.append('Directed') connections.add(user.screen_name) try: users_collection.insert_one(users_data) except pymongo.errors.DuplicateKeyError: continue counter += len(followers) for friends in Cursor(client.friends_ids, screen_name=screen_name, count=1500).pages(max_pages): for chunk in paginate(friends, 100): users = client.lookup_users(user_ids=chunk) for user in users: users_data = get_user_data(user) source.append(str(user.id)) target.append(str(account.id)) direction.append('Directed') connections.add(user.screen_name) try: users_collection.insert_one(users_data) except pymongo.errors.DuplicateKeyError: continue counter += len(friends) my_query = {"_id": str(account.id)} connections_data = {"$set": {"Connections": list(connections)}} users_collection.update_one(my_query, connections_data) graph_data = get_graph_data(account, source, target, direction) try: graph_collection.insert_one(graph_data) except pymongo.errors.DuplicateKeyError: pass
def create_graph(screen_name, deepth): # safety parameter to changing credentials files safety = 0 # getting access file access_file, safety = get_access_file(safety) # creating connection with Twitter API client = get_twitter_client(access_file) # add graph if not in database if not graph_collection.find_one({"_id": screen_name}): graph_collection.insert_one({"_id": screen_name, "Deepth": deepth}) # list for users to check to_do = [] to_do.append(screen_name) # loop for graph deepth counter = 1 # set to keep connections edges = set() for i in range(1, deepth + 1): # loop for all users for name in set(to_do): if counter % 50 == 0: print(str(counter) + " DONE") counter += 1 account = client.get_user(name) # check if friend and followers already available try: users_collection.find_one({"_id": name})['Friends'] # if not availavle, download it except (KeyError, TypeError) as e: if str(e) == "'NoneType' object is not subscriptable": users_collection.insert_one({"_id": name}) # list for users for current depth temp = [] # check friends and followers of given user try: followers, friends = get_user_connections( account, client) for follower in followers: record = follower + "," + name + ",Directed" # save sonnection edges.add(record) my_query = {"_id": screen_name} data = {"$addToSet": {"Edges": record}} graph_collection.update_one(my_query, data) for friend in friends: record = name + "," + friend + ",Directed" # save connection edges.add(record) my_query = {"_id": screen_name} data = {"$addToSet": {"Edges": record}} graph_collection.update_one(my_query, data) # add users to do for next deepth temp += followers temp += friends except tweepy.TweepError as e: # if Twitter API limit is reached, change access file if e.response.text == '{"errors":[{"message":"Rate limit exceeded","code":88}]}': print('CHANGING ACCESS FILE TO ' + str(safety) + '!') access_file, safety = get_access_file(safety) # try to check friends and followers of given user again # with new access file try: client = get_twitter_client(access_file) account = client.get_user(screen_name) followers, friends = get_user_connections( account, client) # check friends and followers of given user for follower in followers: record = follower + "," + name + ",Directed" # save connection edges.add(record) my_query = {"_id": screen_name} data = {"$addToSet": {"Edges": record}} graph_collection.update_one(my_query, data) for friend in friends: record = name + "," + friend + ",Directed" # save connection edges.add(record) my_query = {"_id": screen_name} data = {"$addToSet": {"Edges": record}} graph_collection.update_one(my_query, data) # add users to do for next deepth temp += followers temp += friends except tweepy.TweepError: pass # otherwise, probably deleted user was found else: pass elif str(e) == "'Friends'": # list for users for current depth temp = [] # check friends and followers of given user try: followers, friends = get_user_connections( account, client) for follower in followers: record = follower + "," + name + ",Directed" # save connection edges.add(record) my_query = {"_id": screen_name} data = {"$addToSet": {"Edges": record}} graph_collection.update_one(my_query, data) for friend in friends: record = name + "," + friend + ",Directed" # save connection edges.add(record) my_query = {"_id": screen_name} data = {"$addToSet": {"Edges": record}} graph_collection.update_one(my_query, data) # add users to do for next deepth temp += followers temp += friends except tweepy.TweepError as e: # if Twitter API limit is reached, change access file if e.response.text == '{"errors":[{"message":"Rate limit exceeded","code":88}]}': print('CHANGING ACCESS FILE TO ' + str(safety) + '!') access_file, safety = get_access_file(safety) # try to check friends and followers of given user again # with new access file try: client = get_twitter_client(access_file) account = client.get_user(screen_name) followers, friends = get_user_connections( account, client) for follower in followers: record = follower + "," + name + ",Directed" # save connection edges.add(record) my_query = {"_id": screen_name} data = {"$addToSet": {"Edges": record}} graph_collection.update_one(my_query, data) for friend in friends: record = name + "," + friend + ",Directed" # save connection edges.add(record) my_query = {"_id": screen_name} data = {"$addToSet": {"Edges": record}} graph_collection.update_one(my_query, data) temp += followers temp += friends except tweepy.TweepError: pass # otherwise, probably deleted user was found else: pass if i == 1: to_do.remove(screen_name) to_do += temp # save connections to file filename = 'connections/{}.csv'.format(screen_name) with open(filename, 'w') as f: for edge in list(edges): f.write(edge + '\n')