def keywords_search(self, keywords, num_tweets, startDate, endDate): tweets = [] data = Cursor(self.twitter_client.search, q=keywords, until=endDate, lang="en").items(num_tweets) while True: try: tweet = data.next() if tweet.retweet_count > 0: if tweet not in tweets: tweets.append(tweet) else: tweets.append(tweet) except tweepy.TweepError: #exception for twitter rate limits print( "Twitter's free API limit rate has been reached. More data can be requested in fifteen minutes. Here is what we were able to pull: " ) break except Exception as e: break return tweets
def tweet_gathering(api: API, query: str, date_since: str, lang: str = 'en'): try: logger.info("Retrieving Tweets ... ") # Collect tweets tweets = Cursor(api.search, lang=lang, q=query, include_entities=True, monitor_rate_limit=True, wait_on_rate_limit_notify=True, wait_on_rate_limit=True, result_type="recent", tweet_mode='extended').items() while True: try: tweet: Status = tweets.next() print(tweet) yield tweet except RateLimitError: time.sleep(60 * 15) continue except StopIteration: break except Exception as e: logger.error(e)
def get_tweets_between_date2(self, start, end): df = pd.DataFrame() c = Cursor(self.twitter_client.user_timeline, id=self.twitter_user).pages() analyzer = tweet_analyzer() while True: try: tweets_list = c.next() tweets = [] for tweet in tweets_list: if tweet.created_at < end and tweet.created_at > start: tweets.append(tweet) elif tweet.created_at < start: return df if len(tweets) != 0: temp = analyzer.tweets_to_dataframe(tweets) if df.empty: df = temp else: df.append(temp, ignore_index=True) time.sleep(0.15) except TweepError: time.sleep(60 * 15) continue except StopIteration: return df
def testcursornext(self): """ Test cursor.next() behavior, id being passed correctly. Regression test for issue #518 """ cursor = Cursor(self.api.user_timeline, id='twitter').items(5) status = cursor.next() self.assertEquals(status.user.screen_name, 'twitter')
def get_image_tweet(self): tweets = [] c = Cursor(self.twitter_client.user_timeline, id=self.twitter_user, include_entities=True).pages() tweets = c.next() for tweet in tweets: if 'media' in tweet.entities: for image in tweet.entities['media']: print(image['media_url'])
def insert_user_with_friends(graph_db, twitter_user, user_labels=[]): user_labels.append("SeedNode") if isinstance(twitter_user, basestring): try: twitter_user = api.get_user(twitter_user) except: time.sleep(60 * 16) friend = friends.next() create_or_get_node(graph_db, twitter_user, user_labels) friend_count = 0 print u"\nINSERTING FOR: {}".format(twitter_user.name) friends = Cursor(api.friends, user_id=twitter_user.id_str, count=200).items() try: while True: try: friend = friends.next() except tweepy.TweepError: print "exceeded rate limit. waiting" time.sleep(60 * 16) friend = friends.next() #print u" INSERTING: {}".format(friend.name) friend_count += 1 sys.stdout.write('.') if (friend_count % 10 == 0): sys.stdout.write(' ') if (friend_count % 50 == 0): sys.stdout.write('| ') if (friend_count % 100 == 0): print create_or_get_node(graph_db, friend) query_string = """ MATCH (user:User {id_str:{user_id_str}}),(friend:User {id_str:{friend_id_str}}) CREATE UNIQUE (user)-[:FOLLOWS]->(friend) """ data = { "user_id_str": twitter_user.id_str, "friend_id_str": friend.id_str } n = graph_db.cypher.execute(query_string, data) except StopIteration: print u"\n Total Friend Count = {}".format(friend_count)
def limit_handled(cursor: tweepy.Cursor): """Wrap cursor access with rate limiting :param cursor: The cursor to siphon :returns: Cursor items """ while True: try: yield cursor.next() except tweepy.RateLimitError: time.sleep(15 * 60)
def get_tweets_by_cursor(query): api = API(auth) query = query + " -RT" cursor = Cursor(api.search, q=query, lang="en").items(5000) while True: try: tweet = cursor.next() print(tweet._json) database.tweets.insert(tweet._json) except TweepError: time.sleep(60 * 15) continue except StopIteration: break
def rate_limit_handler(cursor: tweepy.Cursor): """ Handler for tweepy Cursors and automatically stops execution when rate limit is reached params: cursor(tweepy.Cursor) - cursor to handle """ while True: try: yield cursor.next() except tweepy.RateLimitError: print("Oh no!! We hit the rate limit. Resuming in 15 mins.") time.sleep(15 * 60)
def handle_rate_limit(cursor: tweepy.Cursor): """ If tweepy hits Twitter' API limit (180 calls in 15 minutes), wait for 15 minutes before continuing search. http://docs.tweepy.org/en/latest/code_snippet.html#handling-the-rate-limit-using-cursors :param cursor: Tweepy cursor iterator :return: Next iteration of cursor """ while True: try: yield cursor.next() except tweepy.RateLimitError: # sleep for 15 minutes logging.warning( "Hit Twitter APIs rate limit, sleeping for 15 minutes") time.sleep(15 * 60)
def get_tweets_between_date(self, start, end): tweets_during_florence = [] c = Cursor(self.twitter_client.user_timeline, id=self.twitter_user).items() while True: try: tweet = c.next() if tweet.created_at < end_date and tweet.created_at > start: tweets_during_florence.append(tweet) elif tweet.created_at < start: return tweets_during_florence except TweepError: time.sleep(60*15) continue except StopIteration: break
def get_tweets_for_feature_extraction(query, count): api = API(auth) query = query + " -RT" cursor = Cursor(api.search, q=query, lang="en").items(count) tweets = [] while True: try: tweet = cursor.next() tweets.append(tweet._json) except TweepError as e: print(e) time.sleep(60 * 5) continue except StopIteration: break return tweets
def handle_rate_limit(cursor: tweepy.Cursor): """ If Twitter API rate limit is exceeded (180 calls in 15 minutes), wait for 15 minutes before continuing :param cursor: Tweepy cursor :return: """ while True: try: yield cursor.next() except tweepy.RateLimitError: # Pause for 15 minutes logging.warning( "Twitter API rate limit exceeded, waiting for 15 minutes before continuing." ) time.sleep(15 * 60)
def fetch_tweets(self): cursor = Cursor(SentimentAnalysis.tweepy_api.search, q=f'#{self.search_key} -filter:retweets', count=100, tweet_mode='extended', lang='en').items(self.tweet_count) df = pd.DataFrame() i = 1 while True: print(f'Running... {i}\r', end='') try: tweet = cursor.next() row = { 'id': i, 'tweet_id': tweet.id, 'screen_name': tweet.user.screen_name, 'name': tweet.user.name, 'tweet_date': str(self.datetime_from_utc_to_local(tweet.created_at)), 'location': tweet.user.location, 'retweet_count': tweet.retweet_count, 'like_count': tweet.favorite_count, 'followers_count': tweet.user.followers_count, 'following_count': tweet.user.friends_count, 'text': tweet.full_text or tweet.text, 'embed_url': f'https://twitter.com/{tweet.user.screen_name}/status/{tweet.id}' } polarity, polarity_score = self.calc_polarity(row) row['polarity'], row['polarity_score'] = polarity, polarity_score new_rows = pd.DataFrame([row], index=[i]) df = pd.concat([df, new_rows]) self.send_response(row) except TweepError: break except RateLimitError: break except StopIteration: break i = i + 1 print('\nCompleted') self.save_files(df)
def fetch_tweets(search_key): os.mkdir(f'./{search_key}') cursor = Cursor(api.search, q=f'#{search_key} -filter:retweets', count=100, tweet_mode='extended').items(15000) df = pd.DataFrame() i = 1 while True: print(f'Running... {i}\r', end='') try: tweet = cursor.next() row = { 'tweet_id': tweet.id, 'screen_name': tweet.user.screen_name, 'name': tweet.user.name, 'tweet_date': datetime_from_utc_to_local(tweet.created_at), 'location': tweet.user.location, 'retweet_count': tweet.retweet_count, 'like_count': tweet.favorite_count, 'followers_count': tweet.user.followers_count, 'following_count': tweet.user.friends_count, 'text': tweet.full_text or tweet.text, 'embed_url': f'https://twitter.com/{tweet.user.screen_name}/status/{tweet.id}?s=20' } df = pd.concat([df, pd.DataFrame([row], index=[i])]) except TweepError: break except RateLimitError: break except StopIteration: break i = i + 1 df = df.sort_values(by=['like_count', 'retweet_count', 'followers_count'], ascending=False) df.to_csv(path_or_buf=f'./{search_key}/{search_key}.csv') df['screen_name'].value_counts().to_csv(path_or_buf=f'./{search_key}/screen_name_freq.csv') print('\nCompleted.')
def fetch_tweets(kwd, since_id, channel, redis_conf): """ :param kwd: :param since_id: :param channel: :param redis_conf: :return: """ r = redis_conf['cursor'] key = redis_conf['key'] api, credential_id = get_twitter_client(r, key) if not api: logger.info(f"{credential_id} failed ...using another one ...") api, credential_id = get_twitter_client(r, key) keyword = kwd['kwd'] keyword = f'"{keyword} "' + config.get('FETCHER', 'FILTER') page_remaining = int(config.get('FETCHER', 'PAGE_LIMIT')) tweets_cursor = Cursor(api.search, q=keyword, count=100, since_id=since_id, tweet_mode='extended').pages(page_remaining) page_index = 0 retry = 0 t_id = 0 _sleep = 0 sleep_delay = int(config.get('FETCHER', 'SLEEP')) retry_limit = int(config.get('FETCHER', 'RETRY_LIMIT')) while True: try: print(kwd, page_index) tweets, t_id = process_page(tweets_cursor.next(), kwd, page_index) feed_saver_new_keyword_tweets(channel, tweets) page_index += 1 page_remaining = int(config.get('FETCHER', 'PAGE_LIMIT')) - page_index # sleep(1) except StopIteration: if page_index == 0: # No Tweets Found data = {'status': 404, 'k_id': kwd['k_id']} feed_saver_new_keyword_tweets(channel, data) else: # last packet for this kwd so that saver can update scheduled_on data = {'status': 202, 'k_id': kwd['k_id']} feed_saver_new_keyword_tweets(channel, data) # Change credential & lpush current credential id r.lpush(key, credential_id) return True except TweepError as error: logger.error( f"Tweepy Exception occurred for credential id {credential_id} : {error}" ) # Change credential & lpush current credential id r.lpush(key, credential_id) retry += 1 if retry <= retry_limit: logger.info(f"Retrying for keyword {kwd['kwd']}") _sleep += sleep_delay sleep(_sleep) api, credential_id = get_twitter_client(r, key) tweets_cursor = Cursor( api.search, q=keyword, count=100, since_id=since_id, max_id=t_id, tweet_mode='extended').pages(page_remaining) continue # finally after retries data = {'status': 500, 'k_id': kwd['k_id']} feed_saver_new_keyword_tweets(channel, data) return False except Exception as e: # push keyword in queue & maintain log logger.error( f"Exception occurred for keyword {kwd['kwd']}. Exception : {e}" ) retry += 1 # Change credential & lpush current credential id r.lpush(key, credential_id) if retry <= retry_limit: _sleep += sleep_delay logger.info(f"Retrying for keyword {kwd['kwd']}") api, credential_id = get_twitter_client(r, key) tweets_cursor = Cursor( api.search, q=keyword, count=100, since_id=since_id, max_id=t_id, tweet_mode='extended').pages(page_remaining) continue data = {'status': 500, 'k_id': kwd['k_id']} feed_saver_new_keyword_tweets(channel, data) return False
today = datetime.today().date() week_ago = today - timedelta(7) start_date = week_ago while start_date < today: end_date = start_date + timedelta(1) c = Cursor(api.search, q=query, since=start_date.strftime('%Y-%m-%d'), until=end_date.strftime('%Y-%m-%d'), lang="en").items(400) while True: try: data = c.next() tweet = data._json print(tweet["created_at"], tweet["source"]) csvWriter.writerow(process_tweet(tweet)) except tweepy.TweepError: print("-------------------- GOT ERROR --------------------") time.sleep(60) continue except StopIteration: break start_date += timedelta(1) # write to csv # csvFile = open('data/' + since_date + ':' + until_date + '.csv', 'a') # csvWriter = csv.writer(csvFile)
account_created_date = item.created_at delta = datetime.utcnow() - account_created_date account_age_days = delta.days print("Account age (in days): " + str(account_age_days)) if account_age_days > 0: print("Average tweets per day: " + "%.2f" % (float(tweets) / float(account_age_days))) end_date = datetime.utcnow() - timedelta(days=30) cur = Cursor(auth_api.user_timeline, id=target).items() while True: try: status = cur.next() #print(status) tweet_count += 1 if hasattr(status, "text"): print(status.text) sentiment = get_tweet_sentiment(status.text) if hasattr(status, "id"): print(status.id) retweets_list = auth_api.retweets(status.id) # {id : vector<string>} for x in retweets_list: retweet.append(x.user.screen_name) # print(retweet.user.screen_name)