def get_follower_ids(tweepy_obj): ids_list = [] try: c = Cursor(api.followers_ids, tweepy_obj.id) except TweepError: print 'tweepy breaks!' while(True): try: print 'new page...' page = c.pages().next() sleep(2) except TweepError: print "tweep breaks!" except StopIteration: print 'done with', tweepy_obj.id break ids_list.extend(page) try: print "the remaining hit is ", \ api.rate_limit_status()['remaining_hits'] except TweepError: print "tweep breaks!" print TweepError.message return ids_list
def findTweets(keywords): authentication = OAuthHandler(consumer_key, consumer_secret) authentication.set_access_token(access_token, access_token_secret) api = API(authentication) cursor = Cursor(api.search, q=keywords).items() while True: try: tweet = cursor.next() print ','.join([str(tweet.created_at), tweet.lang, tweet.text]).encode('utf-8')
def testcursornext(self): """ Test cursor.next() behavior, id being passed correctly. Regression test for issue #518 """ cursor = Cursor(self.api.user_timeline, id='twitter').items(5) status = cursor.next() self.assertEquals(status.user.screen_name, 'twitter')
def query_tweets(api, query, limit=None, languages=None): """ Queries twitter REST API for tweets matching given twitter search 'query'. Takes an authenticated api object (API or APIPool), a query string, an optional limit for number of tweets returned, and an optional list of languages to further filter results. Returns a cursor (iterator) over Tweepy status objects (not native JSON docs) """ cursor = Cursor(api.search, q=query, include_entities=True, lang=languages) if limit: return cursor.items(_check_limit(limit)) return cursor.items()
def get_tweets_by_cursor(query): api = API(auth) query = query + " -RT" cursor = Cursor(api.search, q=query, lang="en").items(5000) while True: try: tweet = cursor.next() print(tweet._json) database.tweets.insert(tweet._json) except TweepError: time.sleep(60 * 15) continue except StopIteration: break
def get_friends(self, user_id): logger.log(LOG_LEVEL, 'Getting friends for {}'.format(user_id)) friends = [] cursor = Cursor( self.api.friends, user_id=user_id ) for user in cursor.items(): if self.is_potential_target(user): friends.append(str(user.id)) return friends
def get_tweets(self, user_id): logger.log(LOG_LEVEL, 'Getting tweets for {}'.format(user_id)) statuses = [] cursor = Cursor( self.api.user_timeline, user_id=user_id, count=PAGE_COUNT ) for status in cursor.items(TWEET_COUNT): statuses.append(self.parse_api_tweet(status)) return statuses
def save_user_followers(user): try: c = Cursor(api.followers,user.user_id) except TweepError: print "tweep breaks!" print TweepError.message while(True): try: print 'taking a rest before move to next page' sleep(10) page = c.pages().next() print "start a new page of user ", user.scrn_name, \ 'page', c.pages().count except TweepError: print "tweep breaks!" print TweepError.message continue except StopIteration: print "Move to next unscanned" break for tweepy_user in page: print "follower -----", tweepy_user.screen_name, "----- found......" if TwitterUser.get_by_id(tweepy_user.id) or \ is_in_no_chn(tweepy_user.id): print 'ALREADY in DB!!, skip' continue try: if not tweepy_user.protected or \ (tweepy_user.protected and tweepy_user.following): if is_chn(tweepy_user): print "and speaks Chinese! Saving...." TwitterUser.save_tweepy_user(tweepy_user) else: save_non_chn(tweepy_user.id) print "pitty, s/he is not Chinese Speaker, next..." continue except TweepError: print "tweep breaks!" print TweepError.message try: print "the remaining hit is ", \ api.rate_limit_status()['remaining_hits'] except TweepError: print "tweep breaks!" print TweepError.message page =[] user.update_scanned()
def get_tweets_for_feature_extraction(query, count): api = API(auth) query = query + " -RT" cursor = Cursor(api.search, q=query, lang="en").items(count) tweets = [] while True: try: tweet = cursor.next() tweets.append(tweet._json) except TweepError as e: print(e) time.sleep(60 * 5) continue except StopIteration: break return tweets
def user_tweets(api, user_id=None, screen_name=None, limit=None): """ Queries Twitter REST API for user's tweets. Returns as many as possible, or up to given limit. Takes an authenticated API object (API or APIPool), one of user_id or screen_name (not both), and an optional limit for number of tweets returned. Returns a cursor (iterator) over Tweepy status objects """ if not (user_id or screen_name): raise Exception("Must provide one of user_id or screen_name") if user_id: cursor = Cursor(api.user_timeline, user_id=user_id) elif screen_name: cursor = Cursor(api.user_timeline, screen_name=screen_name) if limit: return cursor.items(_check_limit(limit)) return cursor.items()
def get_followers_ids(api, user_id): """ Given a Tweepy/smappPy TweepyPool api, query twitter's rest API for followers of given user_id. Returns IDs only (much faster / more per request). Parameters: api - fully authenticated Tweepy api or smappPy TweepyPool api user_id - twitter user id Returns tuple: return code, list of IDs or None (if API call fails) """ cursor = Cursor(api.followers_ids, user_id=user_id) user_list, ret_code = call_with_error_handling(list, cursor.items()) if ret_code != 0: logger.warning("User {0}: Followers request failed".format(user_id)) # Return user list from API or None (call_with_error_handling returns None if # call fail) return ret_code, user_list
def limit_handled(cursor: tweepy.Cursor): """Wrap cursor access with rate limiting :param cursor: The cursor to siphon :returns: Cursor items """ while True: try: yield cursor.next() except tweepy.RateLimitError: time.sleep(15 * 60)
def insert_user_with_friends(graph_db, twitter_user,user_labels=[]): user_labels.append("SeedNode") if isinstance(twitter_user, basestring): try: twitter_user = api.get_user(twitter_user) except: time.sleep(60 * 16) friend = friends.next() create_or_get_node(graph_db, twitter_user, user_labels) friend_count = 0 print u"\nINSERTING FOR: {}".format(twitter_user.name) friends = Cursor(api.friends, user_id=twitter_user.id_str, count=200).items() try: while True: try: friend = friends.next() except tweepy.TweepError: print "exceeded rate limit. waiting" time.sleep(60 * 16) friend = friends.next() #print u" INSERTING: {}".format(friend.name) friend_count += 1 sys.stdout.write('.') if(friend_count%10 == 0): sys.stdout.write(' ') if(friend_count%50 == 0): sys.stdout.write('| ') if(friend_count%100 == 0): print create_or_get_node(graph_db, friend) query_string = """ MATCH (user:User {id_str:{user_id_str}}),(friend:User {id_str:{friend_id_str}}) CREATE UNIQUE (user)-[:FOLLOWS]->(friend) """ data = {"user_id_str": twitter_user.id_str, "friend_id_str": friend.id_str} n = graph_db.cypher.execute(query_string, data) except StopIteration: print u"\n Total Friend Count = {}".format(friend_count)
def ensure_users_edges_in_db(user, edges_collection, twitter_api): "Looks up a user's friends_ids and followers_ids on the twitter api, and stores the edges in db." logging.info(".. Fetching followers_ids for user {0}.".format(user['id'])) logging.info(".... user has {0} followers.".format(user['followers_count'])) cursor = Cursor(twitter_api.followers_ids, id=user['id']) edges = [{ 'from' : follower_id, 'to' : user['id']} for follower_id in cursor.items()] store_edges(edges_collection, edges) followers_ids = [edge['from'] for edge in edges] logging.info(".. Fetching friends_ids for user {0}.".format(user['id'])) logging.info(".... user has {0} friends.".format(user['friends_count'])) cursor = Cursor(twitter_api.friends_ids, id=user['id']) edges = [{ 'to' : friend_id, 'from' : user['id']} for friend_id in cursor.items()] store_edges(edges_collection, edges) friends_ids = [edge['to'] for edge in edges] return friends_ids, followers_ids
def user_tweets(api, user_id=None, screen_name=None, limit=None, **kwargs): """ Queries Twitter REST API for user's tweets. Returns as many as possible, or up to given limit. Takes an authenticated API object (API or APIPool), one of user_id or screen_name (not both), and an optional limit for number of tweets returned. Returns a cursor (iterator) over Tweepy status objects. Also takes variable collection of keyword argument to pass on to Tweepy/APIPool query methods, to support full API call parameterization. """ if not (user_id or screen_name): raise Exception("Must provide one of user_id or screen_name") if user_id: cursor = Cursor(api.user_timeline, user_id=user_id, count=200, **kwargs) elif screen_name: cursor = Cursor(api.user_timeline, screen_name=screen_name, count=200, **kwargs) if limit: return cursor.items(_check_limit(limit)) return cursor.items()
'BBCWorld', 'NHKWORLD_News', 'trtworld', 'cnni', 'foxnews', 'dwnews', 'ajenglish', 'FRANCE24', 'rt_com', 'cgtnofficial' ] names = [ 'bbc', 'nhk', 'trt', 'cnn', 'foxnews', 'dw', 'aj', 'fr24', 'rt', 'cgtn' ] accounts = accounts + names tw = pd.DataFrame() lists = [bbc, nhk, trt, cnn, foxnews, dw, aj, fr24, rt, cgtn] accounts = accounts + lists # Here, I requested for tweets by qursor object and recieved necessary information, number of favorites, retweets and texts of specific tweets # This code may not work well since so many queries tw['fav_' + i] = [ k.favorite_count for i in accounts for j in Cursor(api.user_timeline, screen_name=i).pages() for k in j ] tw['rt_' + i] = [ k.retweet_count for i in accounts for j in Cursor(api.user_timeline, screen_name=i).pages() for k in j ] tw[i] = [ k.text for i in accounts for j in Cursor(api.user_timeline, screen_name=i).pages() for k in j ] # Then i reduced the number of rows of dataframe to 3000 tw = tw.iloc[:3000, :] # To manipulate text data, i set their column index numbers to a variable text_places = [2, 5, 8, 11, 14, 17, 20, 23, 26, 29]
from tweepy import Stream from tweepy import API from tweepy import Cursor from datetime import datetime, date, time, timedelta from twitter_authentication_keys import get_account_credentials import pprint import random import time import hashlib import base64 import sys import re if __name__ == '__main__': consumer_key, consumer_secret, access_token, access_token_secret = get_account_credentials( ) auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) auth_api = API(auth) print "Signing in as: " + auth_api.me().name for status in Cursor(auth_api.user_timeline, id="@r0zetta").items(): if (re.search("Android", status.source)): source = "Android" elif (re.search("iPhone", status.source)): source = "iPhone" else: source = status.source print source
auth = tA.authorization() api = API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) #Obtain Geo Code Location of Palo Alto California #places = api.geo_search(query="USA", granularity="country") #places = api.geo_search(query="Washington DC", granularity="city") #place_id = places[0].id #preventiveString, riskString, elderlyString, sentiments, misc = gA.returnSearchString() searchString = placeSearch[ index] + ' #COVID-19 OR "COVID-19" OR "pandemic" OR "Corona"' cursor = Cursor(api.search, q=searchString, count=20, lang="en", tweet_mode='extended') api = API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) maxCount = 1000 count = 0 for tweet in cursor.items(): count += 1 fileName = "tweets_" + str(index) + "_" + str( datetime.datetime.now().date()).replace('-', '_') file = open(outputPath + fileName + '.txt', 'a') file.write(str(tweet) + '\n') print(count, end='') backline()
(fullname,screenname)=line.split('\t') if fullname=='Name': continue if screenname in members: continue members.append(screenname) # open file for storing retweets number out=open("retweetsby.txt",'w') #store error message error_log = open('error_log.txt', 'w') #retrieve tweets member by member for member in members: print member try: c = Cursor(api.user_timeline, member) tweets = [] time_is_right = True while(time_is_right): try: page = c.pages().next() tweets.extend(page) month = page[-1].created_at.month year = page[-1].created_at.year if year < 2012 : time_is_right = False except StopIteration: break time.sleep(1) #error capture and store log into file
def testidcursorpages(self): pages = list(Cursor(self.api.user_timeline, count=1).pages(2)) self.assertEqual(len(pages), 2)
def get_friend_list(self, num_friends): friend_list = [] for friend in Cursor(self.twitter_client.friends, id=self.twitter_user).items(num_friends): friend_list.append(friend) return friend_list
def get_search_cursor(self, q): temp = [] for tweets in Cursor(self.api.search, q=q, count=200).pages(100): for gg in tweets: temp.append(gg._json) return temp
def get_home_timeline_tweets(self, num_tweets): home_timeline_tweets = [] for tweet in Cursor( self.twitter_client.home_timeline).items(num_tweets): home_timeline_tweets.append(tweet) return home_timeline_tweets
def get_info(twitter_user): # screen_name = sys.argv[1] screen_name = twitter_user client = get_twitter_client() dirname = "users/{}".format(screen_name) max_pages = math.ceil(MAX_FRIENDS / 5000) try: os.makedirs(dirname, mode=0o755, exist_ok=True) except OSError: print("Directory {} already exists".format(dirname)) except Exception as e: print("Error while creating directory{}".format(dirname)) print(e) sys.exit(1) print('Extrayendo a {} \n'.format(screen_name)) # get followers for a given user fjson = "users/{}/followers.jsonl".format(screen_name) fcsv = "users/{}/followers.csv".format(screen_name) with open(fjson, 'w') as f1, open(fcsv, 'w') as f2: for followers in Cursor(client.followers_ids, screen_name=screen_name).pages(max_pages): for chunk in paginate(followers, 100): users = client.lookup_users(user_ids=chunk) # out = [[user.created_at, user.id, user.screen_name,user.name, user.description, user.location] for user in users] out = [[user.id, user.screen_name] for user in users] writer = csv.writer(f2) writer.writerow(["id", "screen_name"]) writer.writerows(out) for user in users: f1.write(json.dumps(user._json) + "\n") if len(followers) == 5000: print( "Followers: More results available. Sleeping for 60seconds to avoid rate limit" ) time.sleep(60) # get friends for a given user fjson = "users/{}/friends.jsonl".format(screen_name) fcsv = "users/{}/friends.csv".format(screen_name) with open(fjson, 'w') as f1, open(fcsv, 'w') as f2: for friends in Cursor(client.friends_ids, screen_name=screen_name).pages(max_pages): for chunk in paginate(friends, 100): users = client.lookup_users(user_ids=chunk) # out = [[user.created_at, user.id, user.screen_name,user.name, user.description, user.location] for user in users] # writer = csv.writer(f2) # writer.writerow(["id","screen_name"]) # writer.writerows(out) for user in users: f1.write(json.dumps(user._json) + "\n") if len(friends) == 5000: print( "Friends: More results available. Sleeping for 60 seconds to avoid rate limit" ) time.sleep(60) # get user's profile fname = "users/{}/user_profile.json".format(screen_name) with open(fname, 'w') as f: profile = client.get_user(screen_name=screen_name) f.write(json.dumps(profile._json, indent=4))
def get_friend_list(self, number_of_friends): list_of_friends = [] for friend in Cursor(self.twitter_client.friends, id=self.twitter_user).items(number_of_friends): list_of_friends.append(friend) return list_of_friends
def get_user_timeline_tweets(self, number_of_tweets): tweets = [] #Cursor is a class which allows us to get the user timeline tweets from the chosen user's timeline. If nothing is given there then it's gonna take the tweets from your timeline. The items part decides how many tweets we want. for tweet in Cursor(self.twitter_client.user_timeline, id=self.twitter_user).items(number_of_tweets): tweets.append(tweet) return tweets
client = get_twitter_client() dirname = "users/{}".format(screen_name) max_pages = math.ceil(MAX_FRIENDS / 5000) try: os.makedirs(dirname, mode=0o755, exist_ok=True) except OSError: print("Directory {} already exists".format(dirname)) except Exception as e: print("Error while creating directory {}".format(dirname)) print(e) sys.exit(1) # get followers for a given user fname = "users/{}/followers.jsonl".format(screen_name) with open(fname, 'w') as f: for followers in Cursor(client.followers_ids, screen_name=screen_name).pages(max_pages): for chunk in paginate(followers, 100): users = client.lookup_users(user_ids=chunk) for user in users: f.write(json.dumps(user._json)+"\n") if len(followers) == 5000: print("More results available. Sleeping for 60 seconds to avoid rate limit") time.sleep(60) # get friends for a given user fname = "users/{}/friends.jsonl".format(screen_name) with open(fname, 'w') as f: for friends in Cursor(client.friends_ids, screen_name=screen_name).pages(max_pages): for chunk in paginate(friends, 100): users = client.lookup_users(user_ids=chunk) for user in users:
for file in files: done_list.append(file[:-4]) except FileNotFoundError: pass done_list.extend(no_tweet_list) users=list(set(users)-set(done_list)) print("City:",city) print("No. of users remaining:",len(users)) if len(users)==0: break for user_id in users: tweets=Cursor(api.user_timeline,user_id=int(user_id)).items() last_user_id=user_id tweet_count=0 try: print("User id:",user_id) for tweet in tweets: tweet_count+=1 print("No. of tweets written:",tweet_count,end='\r') try: f=open('tweets/'+city+'/'+user_id+'.txt','a+',encoding='utf-8') except FileNotFoundError: os.mkdir('tweets/'+city) f=open('tweets/'+city+'/'+user_id+'.txt','a+',encoding='utf-8') f.write('tweet:'+tweet.text+'\n')
def testidcursoritems(self): items = list(Cursor(self.api.user_timeline).items(2)) self.assertEqual(len(items), 2)
#Prints the first ten items from timeline to terminal from tweepy import Cursor from twitter_client import get_twitter_client if __name__ == '__main__': client = get_twitter_client() """status variable is instance of tweepy.Status (model used by Tweepy to wrap statuses aka tweets)""" for status in Cursor(client.home_timeline).items(10): print(status.text) print("-----------------")
def testcursorcursorpages(self): pages = list(Cursor(self.api.friends_ids).pages(1)) self.assertTrue(len(pages) == 1) pages = list(Cursor(self.api.followers_ids, username).pages(1)) self.assertTrue(len(pages) == 1)
def testcursorcursoritems(self): items = list(Cursor(self.api.friends_ids).items(2)) self.assertEqual(len(items), 2) items = list(Cursor(self.api.followers_ids, username).items(1)) self.assertEqual(len(items), 1)
def get_user_timeline_tweets(self, startDate=datetime.datetime(1,1,1,0,0), endDate=datetime.datetime(9999,1,1,0,0)): tweets = [] for tweet in Cursor(self.twitter_client.user_timeline, id=self.twitter_user, tweet_mode='extended').items(): if (not tweet.retweeted) and ('RT' not in tweet.full_text) and (tweet.created_at < endDate) and (tweet.created_at > startDate): tweets.append(tweet) return tweets
def get_user_timeline_tweets(self, num_tweets): tweets = [] for tweet in Cursor(self.twitter_client.user_timeline, id=self.twitter_user).items(num_tweets): tweets.append(tweet) return tweets
def testpagecursoritems(self): items = list(Cursor(self.api.user_timeline).items()) self.assert_(len(items) > 0) items = list(Cursor(self.api.user_timeline, 'twitter').items(30)) self.assert_(len(items) == 30)
data.append(['Joined At ', user_profile.created_at]) return data def GetScreenName(api, user_id): user = api.get_user(id=user_id) screen_name = user.screen_name return screen_name filename = "C:\\Python27\\tweet2.csv" f = open(filename, "w+") headers = "Name,friends,Possitive,Neggative,Neutral" f.write(headers + "\n") #list2=[] for follower_id in Cursor(api.followers_ids, screen_name='shreya73767208').items(): sc = GetScreenName(api, follower_id) data = GetUserProfileDetails(api, sc) print(str(data[0]) + ' : ' + str(data[1]) + ' : ' + str(data[2])) l1 = str(data[1]).split(',') print l1[1].replace('u', '') data1 = getTweets(l1[1].replace('u', ''), 40) print data1 l3 = str(data[6]).split(',') f.write(l1[1].replace('u', '') + "," + l3[1].replace(']', '') + "," + str(data1[0]) + "," + str(data1[1]) + "," + str(data1[2]) + "\n") f.close() #print friend list####################################################### filename = "C:\\Python27\\tweet.csv"
def testpagecursorpages(self): pages = list(Cursor(self.api.user_timeline).pages()) self.assert_(len(pages) > 0) pages = list(Cursor(self.api.user_timeline, 'twitter').pages(5)) self.assert_(len(pages) == 5)
def get_user_timeline_tweets(self, num_tweets): tweets = [] # looping through every tweet provided for tweet in Cursor(self.twitter_client.user_timeline, id=self.twitter_user).items(num_tweets): tweets.append(tweet) return tweets # list of user timeline tweets
def testcursorcursoritems(self): items = list(Cursor(self.api.friends_ids).items()) self.assert_(len(items) > 0) items = list(Cursor(self.api.followers_ids, 'twitter').items(30)) self.assert_(len(items) == 30)
city = "Delhi" access_token = '' access_token_secret = '' consumer_key = '' consumer_secret = '' auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) print('authed') api = tweepy.API(auth) places = api.geo_search(query=city) place_id = places[0].id coords = (places[0].bounding_box.coordinates)[0] location = coords[0] + coords[2] #Cursor implementation tweets = Cursor(api.search, q='place:' + place_id).items() i = 0 for tweet in tweets: with open('tweets/location/' + city + '.txt', 'a+') as f: f.write('TWEET_TEXT: ' + tweet.text + '\n') #Stream implementation '''class StdOutListener(StreamListener): def on_data(self, tweet): tweet=json.loads(tweet) try: #print(tweet['place']['name'],tweet['user']['name'])
def testcursorcursorpages(self): pages = list(Cursor(self.api.friends_ids).pages()) self.assert_(len(pages) > 0) pages = list(Cursor(self.api.followers_ids, 'twitter').pages(5)) self.assert_(len(pages) == 5)
def downloadTweets(tweetData): # Read authentication keys from .dat file print("trying to find twitter authentication keys from: "+ os.getcwd() + "/keys.dat") keys = open("keys.dat","r") # Authentication and connection to Twitter API. consumer_key = keys.readline()[:-1] consumer_secret = keys.readline()[:-1] access_key = keys.readline()[:-1] access_secret = keys.readline()[:-1] # Close authentication file keys.close() auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_key, access_secret) api = tweepy.API(auth) # Usernames whose tweets we want to gather. users = ["realDonaldTrump","tedcruz","LindseyGrahamSC","SpeakerRyan","BarackObama","GovGaryJohnson","BernieSanders","HillaryClinton","DrJillStein"] with open(tweetData, 'wb') as file: writer = unicodecsv.writer(file, delimiter = ',', quotechar = '"') # Write header row. writer.writerow(["politician_name", "politician_username", "tweet_text", "tweet_retweet_count", "tweet_favorite_count", "tweet_hashtags", "tweet_hashtags_count", "tweet_urls", "tweet_urls_count", "tweet_user_mentions", "tweet_user_mentions_count", "tweet_by_trump"]) # For each Twitter username in the users array for user in users: # Gather info specific to the current user. user_obj = api.get_user(user) user_info = [user_obj.name, user_obj.screen_name] startProgress("Downloading tweets from: " + user) # Maximum amounts of tweets to retrieve max_tweets = 1000; # Count the amount of tweets retrieved count = 0; # Get 1000 most recent tweets for the current user. for tweet in Cursor(api.user_timeline, screen_name = user).items(max_tweets): # Show progress progress(count/(max_tweets/100)) # Increase count for tweets count += 1 # Remove all retweets. if tweet.text[0:3] == "RT ": continue # Get info specific to the current tweet of the current user. tweet_info = [unidecode(tweet.text), tweet.retweet_count, tweet.favorite_count] # Below entities are stored as variable-length dictionaries, if present. hashtags = [] hashtags_data = tweet.entities.get('hashtags', None) if(hashtags_data != None): for i in range(len(hashtags_data)): hashtags.append(unidecode(hashtags_data[i]['text'])) urls = [] urls_data = tweet.entities.get('urls', None) if(urls_data != None): for i in range(len(urls_data)): urls.append(unidecode(urls_data[i]['url'])) user_mentions = [] user_mentions_data = tweet.entities.get('user_mentions', None) if(user_mentions_data != None): for i in range(len(user_mentions_data)): user_mentions.append(unidecode(user_mentions_data[i]['screen_name'])) tweet_by_trump = 0 if(user_obj.screen_name=='realDonaldTrump'): tweet_by_trump = 1 more_tweet_info = [', '.join(hashtags), len(hashtags), ', '.join(urls), len(urls), ', '.join(user_mentions), len(user_mentions),tweet_by_trump] # Write data to CSV. writer.writerow(user_info + tweet_info + more_tweet_info) endProgress() print("Wrote tweets by %s to CSV." % user)
tweets = item.statuses_count account_created_date = item.created_at delta = datetime.utcnow() - account_created_date account_age_days = delta.days # print("Account age (in days): " + str(account_age_days)) tweets_per_day = 0 if account_age_days > 0: tweets_per_day = "%.2f" % (float(tweets) / float(account_age_days)) hashtags = [] tweets_list = [] tweet_count = 0 end_date = datetime.utcnow() - timedelta(days=30) for status in Cursor(auth_api.user_timeline, id=target).items(): tweet_count += 1 if hasattr(status, "text"): tweets_list.append(status.text) if hasattr(status, "entities"): entities = status.entities if "hashtags" in entities: for ent in entities["hashtags"]: if ent is not None: if "text" in ent: hashtag = ent["text"] if hashtag is not None: hashtags.append(hashtag) if status.created_at < end_date: break
#Source Code for Mining data on Twitter with Python Course by TigerStyle Code Academy import sys import json from tweepy import Cursor from twitter_client import get_twitter_client if __name__ == '__main__': user = sys.argv[1] client = get_twitter_client() fname = "user_timeline_{}.jsonl".format(user) with open(fname, 'w') as f: for page in Cursor(client.user_timeline, screen_name=user, count=200).pages(16): for status in page: f.write(json.dumps(status._json)+"\n")
def testcursorsetstartcursor(self): c = Cursor(self.api.friends_ids, cursor=123456) self.assertEqual(c.iterator.next_cursor, 123456) self.assertFalse('cursor' in c.iterator.kwargs)
import tweepy from tweepy import Cursor import time import json consumer_key = 'p8rnniy2PVcnQR7I01s71g' consumer_secret = 'tLaYYeiXzkq1wDmS2gEHTSEArNxk8tSd4D3bQPX6FNM' access_token = '1196322271-BN4pBpveJuKSfUscrwss7T7KckX0Mgv75vJoVfp' access_token_secret = 'A4yooP5jkdUfqI1xMi7wzVi9XtCh8uwScrPvOZyXR4nTz' auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) rep_cursor = Cursor(api.followers, id="repcurrie") for page in rep_cursor.pages(): try: print len(page) for user in page: user_str = str(user) username = user_str.split(", 'screen_name': ")[1].split(", '")[0] print username time.sleep(5) except BaseException, e: print "failed: ", str(e) time.sleep(10) print "done pulling"
import json from tweepy import Cursor from twitter_client import get_twitter_client if __name__ == '__main__': client = get_twitter_client() # retrieve up to 200 statuses per page for 4 pages of recent timeline with open('home_timeline.jsonl', 'w') as f: for page in Cursor(client.home_timeline, count=200).pages(4): for status in page: f.write(json.dumps(status._json) + "\n")