def main(): print("Starting Tweet fetcher. \nConfig file should be [{}]\n".format( argsHandler.env)) logger.info("[tweets_fetcher] started at {}".format(datetime.now())) tweets_fetch_stats = {'processed': 0} tweetsFetcher = TweetsFetcher() i = 0 sleeptime = 300 try: while True: i = i + 1 #logger.info("[tweets_fetcher] I-{} at {}".format(i, datetime.now())) #print("[tweets_fetcher] I-{} at {}".format(i, datetime.now())) start_time = time.time() tweetsFetcher.handle_tweets_command() if not argsHandler.daemon: logger.info("[tweets_fetcher]Exiting the program gracefuly") print("[tweets_fetcher]Exiting the program gracefuly") break elapsed_time = time.time() - start_time if (elapsed_time < sleeptime): remaining_time = sleeptime - elapsed_time logger.info( "[tweets_fetcher] next iterat {} seconds from {}".format( remaining_time, datetime.now())) print("[tweets_fetcher] next iterat {} seconds from {}".format( remaining_time, datetime.now())) time.sleep(remaining_time) except Exception as e: logger.exception("[tweets_fetcher]Caught exception {}".format(e)) print("[tweets_fetcher]Caught exception {}".format(e)) finally: tweets_fetch_stats['processed'] = tweetsFetcher.grandtotal logger.info("[tweets_fetcher stats] {}".format(tweets_fetch_stats)) logger.info("[tweets_fetcher] Ends at {}".format(datetime.now()))
def make_api_request(url, method='GET', headers={}): try: response = oauthSessionManager.make_api_request(url, method, headers) json_response = response.json() return response.headers, json_response except Exception as e: logger.exception("Error {} while {} API with {} method".format( e, url, method)) raise
def findDMForUsersInStore(self): print("Finding DM between the users") find_dm = True try_count = 0 buckets_batch_cnt = 2 while find_dm: try: try_count = try_count + 1 print("Retry count is {}".format(try_count)) buckets = self.dmcheck_bucket_mgr.assignBuckets( bucketscount=buckets_batch_cnt) while buckets: for bucket in buckets: print("Processing {} bucket at {}Z".format( bucket['bucket_id'], datetime.utcnow())) self.__process_bucket(bucket) print("Storing {} bucket user info at {}Z".format( bucket['bucket_id'], datetime.utcnow())) self.dmcheck_bucket_mgr.store_processed_data_for_bucket( bucket) buckets = self.dmcheck_bucket_mgr.assignBuckets( bucketscount=buckets_batch_cnt) print( "Not Found any bucket for processing. So waiting for more buckets to be added" ) time.sleep(60) except TwitterRateLimitError as e: logger.exception(e) print(traceback.format_exc()) print(e) # Sleep for 15 minutes - twitter API rate limit print('Sleeping for 15 minutes due to quota. Current time={}'. format(datetime.now())) time.sleep(900) continue except TwitterUserInvalidOrExpiredToken as e: logger.exception(e) print(traceback.format_exc()) print(e) print('Exiting since user credential is invalid') return except TwitterUserAccountLocked as e: logger.exception(e) print(traceback.format_exc()) print(e) print('Exiting since Account is locked') return except Exception as e: logger.exception(e) print(traceback.format_exc()) print(e) time.sleep(900) continue
def RefillBucketPools(self): #tested print("Refilling buckets") while True: try: print("Handling Dead buckets, if any at {}Z".format(datetime.utcnow())) self.bucket_mgr.handle_dead_buckets() print("Trying to add more buckets at {}Z".format(datetime.utcnow())) self.bucket_mgr.add_buckets() print("Sleeping for 15 mins at {}Z".format(datetime.utcnow())) time.sleep(900) except Exception as e: logger.exception(e) print(traceback.format_exc()) print(e) time.sleep(30) continue
def findDMForUsersInStore(self): print("Finding DM between the users") find_dm = True try_count = 0 while find_dm: try: try_count = try_count + 1 print("Retry count is {}".format(try_count)) users = self.dataStoreIntf.get_all_users_list() print("Total number of users are {}".format(len(users))) nonexists_users = self.dataStoreIntf.get_nonexists_users_list() print("Total number of invalid users are {} and they are {}".format(len(nonexists_users), nonexists_users)) dmusers = self.dataStoreIntf.get_dm_users_list() print("Total number of DM users are {}".format(len(dmusers))) nondmusers = self.dataStoreIntf.get_nondm_users_list() print("Total number of Non DM users are {}".format(len(nondmusers))) users_wkg = sorted(set(users) - set(nonexists_users) - set(dmusers) - set(nondmusers)) print('Processing with unchecked {} users'.format(len(users_wkg))) if(len(users_wkg)): self.__process_dm(users_wkg, 10) else: find_dm = False except TwitterRateLimitError as e: logger.exception(e) print(traceback.format_exc()) print(e) # Sleep for 15 minutes - twitter API rate limit print('Sleeping for 15 minutes due to quota. Current time={}'.format(datetime.now())) time.sleep(900) continue except Exception as e: logger.exception(e) print(traceback.format_exc()) print(e) time.sleep(30) continue
def import_tweets_search(self, search_term, categories_list, sync_with_store, tweet_filter): print( "Processing Tweets import for search key [{}]".format(search_term)) frequency = 100 tweets_to_import = True max_id = None total_count = 0 start_time = datetime.now() search_term_query = self.tweetStoreIntf.util_get_search_term_query( search_term) if sync_with_store: print("Syncing with store") min_id = self.tweetStoreIntf.get_tweets_min_id(search_term_query) if (min_id): max_id = int(min_id) - 1 while tweets_to_import: try: curr_limit = get_reponse_header('x-rate-limit-remaining') if (curr_limit and int(curr_limit) <= frequency + 1): print("Sleeping as remaining x-rate-limit-remaining is {}". format(curr_limit)) time_diff = (datetime.now() - start_time).seconds remaining_time = (15 * 60) - time_diff sleeptime = remaining_time + 2 print( "sleeping for {} seconds to avoid threshold. Current time={}" .format(sleeptime, datetime.now())) if (sleeptime > 0): time.sleep(sleeptime) start_time = datetime.now() print("Continuing after threshold reset") tweets = self.__process_tweets_search(search_term=search_term, max_id=max_id, count=frequency) if len(tweets) > 0: tweets_to_import = True plural = "s." if len(tweets) > 1 else "." print("Found " + str(len(tweets)) + " tweet" + plural) total_count += len(tweets) print("Found total {} tweets for {} search\n".format( total_count, search_term)) if not max_id: max_id = tweets[0]['id'] for tweet in tweets: max_id = min(max_id, tweet['id']) #decrement one less so that same tweet is not sent again in next call. max_id = max_id - 1 if tweet_filter: filtered_tweets = self.filterhandler.apply_filters( tweets, tweet_filter) else: filtered_tweets = tweets print("{} Tweets to be stored out of {} tweets".format( len(filtered_tweets), len(tweets))) if (len(filtered_tweets)): self.tweetStoreIntf.store_tweets_info( filtered_tweets, categories_list) print("{} Search tweets added to graph for {}!".format( len(filtered_tweets), search_term)) else: print("skipping as none found from {} total tweets". format(len(tweets))) else: print("No search tweets found for %s." % (search_term)) if (not total_count): logger.info("No search tweets found for -->> %s" % (search_term)) tweets_to_import = False except TwitterRateLimitError as e: logger.exception(e) print(traceback.format_exc()) print(e) # Sleep for 15 minutes - twitter API rate limit print('Sleeping for 15 minutes due to quota. Current time={}'. format(datetime.now())) time.sleep(900) continue except Exception as e: logger.exception(e) print(traceback.format_exc()) print(e) time.sleep(30) continue logger.info("[stats] {} tweets for [{}]".format( total_count, search_term)) self.grandtotal += total_count
def __import_tweets_by_tweet_id(self, tweet_id, fetch_retweet=False, forced=False): print('Importing Tweet for {}'.format(tweet_id)) count = 200 lang = "en" tweets_to_import = True retweets_to_import = fetch_retweet max_id = 0 since_id = 0 total_count = 0 if self.tweetStoreIntf.is_tweet_exists( tweet_id) == True and not forced: print("Skipping as there is already entry for {} tweet ID ".format( tweet_id)) return print('Fetching tweet detail for ID:{}'.format(tweet_id)) while tweets_to_import: try: print("Processing tweet fetch for {}".format(tweet_id)) tweets = self.__process_tweets_fetch(tweet_id) if tweets: tweets_to_import = False print("{} Tweets to be added in DB".format(len(tweets))) self.tweetStoreIntf.store_tweets_info(tweets) total_count += len(tweets) else: print("No tweets found.") tweets_to_import = False except TwitterRateLimitError as e: logger.exception(e) print(traceback.format_exc()) print(e) # Sleep for 15 minutes - twitter API rate limit print('Sleeping for 15 minutes due to quota') time.sleep(900) continue except Exception as e: logger.exception(e) print(traceback.format_exc()) print(e) time.sleep(30) continue while retweets_to_import: try: print("Processing retweet fetch for {}".format(tweet_id)) re_tweets = self.__process_retweets_fetch(tweet_id) if re_tweets: retweets_to_import = False print("{} Retweets to be added in DB".format( len(re_tweets))) self.tweetStoreIntf.store_tweets_info(re_tweets) total_count += len(re_tweets) else: print("No retweets found.") retweets_to_import = False except TwitterRateLimitError as e: logger.exception(e) print(traceback.format_exc()) print(e) # Sleep for 15 minutes - twitter API rate limit print('Sleeping for 15 minutes due to quota') time.sleep(900) continue except Exception as e: logger.exception(e) print(traceback.format_exc()) print(e) time.sleep(30) continue logger.info("[stats] {} tweets for [{}]".format(total_count, tweet_id)) self.grandtotal += total_count