def main(): print("Starting Tweet fetcher. \nConfig file should be [{}]\n".format( argsHandler.env)) logger.info("[tweets_fetcher] started at {}".format(datetime.now())) tweets_fetch_stats = {'processed': 0} tweetsFetcher = TweetsFetcher() i = 0 sleeptime = 300 try: while True: i = i + 1 #logger.info("[tweets_fetcher] I-{} at {}".format(i, datetime.now())) #print("[tweets_fetcher] I-{} at {}".format(i, datetime.now())) start_time = time.time() tweetsFetcher.handle_tweets_command() if not argsHandler.daemon: logger.info("[tweets_fetcher]Exiting the program gracefuly") print("[tweets_fetcher]Exiting the program gracefuly") break elapsed_time = time.time() - start_time if (elapsed_time < sleeptime): remaining_time = sleeptime - elapsed_time logger.info( "[tweets_fetcher] next iterat {} seconds from {}".format( remaining_time, datetime.now())) print("[tweets_fetcher] next iterat {} seconds from {}".format( remaining_time, datetime.now())) time.sleep(remaining_time) except Exception as e: logger.exception("[tweets_fetcher]Caught exception {}".format(e)) print("[tweets_fetcher]Caught exception {}".format(e)) finally: tweets_fetch_stats['processed'] = tweetsFetcher.grandtotal logger.info("[tweets_fetcher stats] {}".format(tweets_fetch_stats)) logger.info("[tweets_fetcher] Ends at {}".format(datetime.now()))
def main(): print("Starting service at {}".format(datetime.now())) print( "Starting DM lookup with {}/{} client. \nConfig file should be [config/{}]\n" .format(os.environ["TWITTER_ID"], os.environ["TWITTER_USER"], '.env')) stats_tracker = {'processed': 0} userRelations = UserRelations( client_id=os.environ["CLIENT_ID"], client_screen_name=os.environ["CLIENT_SCREEN_NAME"], source_id=os.environ["TWITTER_ID"], source_screen_name=os.environ["TWITTER_USER"]) retry = True sleepseconds = 30 while retry: try: userRelations.register_as_dmcheck_client() userRelations.findDMForUsersInStore() except ServiceNotReady as e: print("caught exception {}".format(e)) print("Retrying after {} seconds as service is not ready".format( sleepseconds)) time.sleep(sleepseconds) except Exception as e: retry = False stats_tracker['processed'] = userRelations.grandtotal logger.info("[DM stats] {}".format(stats_tracker)) print("Exiting program at {}".format(datetime.now()))
def main(): print("Starting service at {}".format(datetime.now())) print( "Starting Following lookup with {}/{} client. \nConfig file should be [config/{}]\n" .format(os.environ["CLIENT_ID"], os.environ["CLIENT_SCREEN_NAME"], '.env')) stats_tracker = {'processed': 0} followingFetcher = FollowingFetcher( client_id=os.environ["CLIENT_ID"], client_screen_name=os.environ["CLIENT_SCREEN_NAME"]) retry = True sleepseconds = 30 while retry: try: followingFetcher.register_as_followingcheck_client() followingFetcher.findFollowingForUsersInStore() except ServiceNotReady as e: print("caught exception {}".format(e)) print("Retrying after {} seconds as service is not ready".format( sleepseconds)) time.sleep(sleepseconds) except Exception as e: retry = False stats_tracker['processed'] = followingFetcher.grandtotal logger.info("[Following stats] {}".format(stats_tracker)) print("Exiting program at {}".format(datetime.now()))
def main(): print( "Starting Tweet fetcher. \nConfig file should be [config/{}]\n".format( config_file_name)) tweets_fetch_stats = {'processed': 0} tweetsFetcher = TweetsFetcher() try: tweetsFetcher.handle_tweets_command() #tweetsFetcher.import_tweets_search('RT @actormanojjoshi: काग़ज़ मिले की') finally: tweets_fetch_stats['processed'] = tweetsFetcher.grandtotal logger.info("[tweets_fetcher stats] {}".format(tweets_fetch_stats))
def main(): print("Starting DM lookup. \nConfig file should be [config/{}]\n".format(config_file_name)) stats_tracker = {'processed': 0} userRelations = UserRelations(os.environ["TWITTER_USER"]) try: userRelations.findDMForUsersInStore() except Exception as e: pass finally: stats_tracker['processed'] = userRelations.grandtotal logger.info("[DM stats] {}".format(stats_tracker)) print("Exiting program")
def __check_follower_user_detail(self, users): #tested print("Finding follower users for {} users".format(len(users))) count = 0 self.twitter_query_start_time = datetime.now() for user in users: print("Fetching follower info for {} user".format(user)) followers_user = self.__process_follower_fetch(user) count = count + 1 user['followers'] = followers_user print("Processed {} out of {} users for follower Check".format(count, len(users))) if count != len(users): logger.info("Unable to fetch fetch status for {} users".format(len(users)-count))
def __check_following_user_detail(self, users): print("Finding following users for {} users".format(len(users))) count = 0 for user in users: print("Fetching following info for {} user".format(user)) followings_user = self.__process_following_fetch(user) count = count + 1 user['followings'] = followings_user print("Processed {} out of {} users for following Check".format( count, len(users))) if count != len(users): logger.info("Unable to fetch fetch status for {} users".format( len(users) - count))
def __check_dm_status(self, users): print("Finding relations between {} and {} users".format( self.source_screen_name, len(users))) friendships = [] count = 0 start_time = datetime.now() remaining_threshold = 0 for user in users: try: curr_limit = get_reponse_header('x-rate-limit-remaining') if (curr_limit and int(curr_limit) <= remaining_threshold): print("Sleeping as remaining x-rate-limit-remaining is {}". format(curr_limit)) time_diff = (datetime.now() - start_time).seconds remaining_time = (15 * 60) - time_diff sleeptime = remaining_time + 2 print( "sleeping for {} seconds to avoid threshold. Current time={}" .format(sleeptime, datetime.now())) if (sleeptime > 0): time.sleep(sleeptime) start_time = datetime.now() print("Continuing after threshold reset") print("Fetching friendship info from {} to {} user".format( self.source_screen_name, user)) friendship = self.__process_friendship_fetch(user) except TwitterUserNotFoundError as unf: logger.warning( "Twitter couldn't found user {} and so ignoring".format( user)) user['candm'] = "UNKNOWN" self.grandtotal += 1 continue count = count + 1 status = friendship['relationship']['source']['can_dm'] if status: user['candm'] = "DM" else: user['candm'] = "NON_DM" print("Processed {} out of {} users for DM Check".format( count, len(users))) if count != len(users): logger.info("Unable to fetch DM status for {} users".format( len(users) - count))
def __retweeted_status_screen_name(self, tweet, filter_param): status = False desired_screen_name = filter_param retweet_user_name = tweet['user']['screen_name'] if 'retweeted_status' in tweet and 'user' in tweet['retweeted_status']: orig_user = tweet['retweeted_status']['user'] if 'screen_name' in orig_user: orig_user_screen_name = orig_user['screen_name'] if retweet_user_name == orig_user_screen_name: logger.info( "skipping {} tweet as it is self retweet".format( tweet['id'])) elif orig_user_screen_name == desired_screen_name: status = True else: logger.error("Couldn't find screen name for {} Tweet".fromat( tweet.id)) return status
def get_nonexists_users_list(self): print("Finding Non existing users from file") in_file = self.outfile json_data = [] try: with open(in_file) as f: json_data = [json.loads(line) for line in f] except IOError as e: print("Info: Couldn't read file:({0}): {1}".format( e.errno, e.strerror)) logger.info("File {} I/O error({}): {}".format( in_file, e.errno, e.strerror)) users = [ user['target_screen_name'] for user in json_data if 'exists' in user and user['exists'] == 0 ] logger.debug("Got {} Non existant users".format(len(users))) return users
def import_tweets_search(self, search_term, categories_list, sync_with_store, tweet_filter): print( "Processing Tweets import for search key [{}]".format(search_term)) frequency = 100 tweets_to_import = True max_id = None total_count = 0 start_time = datetime.now() search_term_query = self.tweetStoreIntf.util_get_search_term_query( search_term) if sync_with_store: print("Syncing with store") min_id = self.tweetStoreIntf.get_tweets_min_id(search_term_query) if (min_id): max_id = int(min_id) - 1 while tweets_to_import: try: curr_limit = get_reponse_header('x-rate-limit-remaining') if (curr_limit and int(curr_limit) <= frequency + 1): print("Sleeping as remaining x-rate-limit-remaining is {}". format(curr_limit)) time_diff = (datetime.now() - start_time).seconds remaining_time = (15 * 60) - time_diff sleeptime = remaining_time + 2 print( "sleeping for {} seconds to avoid threshold. Current time={}" .format(sleeptime, datetime.now())) if (sleeptime > 0): time.sleep(sleeptime) start_time = datetime.now() print("Continuing after threshold reset") tweets = self.__process_tweets_search(search_term=search_term, max_id=max_id, count=frequency) if len(tweets) > 0: tweets_to_import = True plural = "s." if len(tweets) > 1 else "." print("Found " + str(len(tweets)) + " tweet" + plural) total_count += len(tweets) print("Found total {} tweets for {} search\n".format( total_count, search_term)) if not max_id: max_id = tweets[0]['id'] for tweet in tweets: max_id = min(max_id, tweet['id']) #decrement one less so that same tweet is not sent again in next call. max_id = max_id - 1 if tweet_filter: filtered_tweets = self.filterhandler.apply_filters( tweets, tweet_filter) else: filtered_tweets = tweets print("{} Tweets to be stored out of {} tweets".format( len(filtered_tweets), len(tweets))) if (len(filtered_tweets)): self.tweetStoreIntf.store_tweets_info( filtered_tweets, categories_list) print("{} Search tweets added to graph for {}!".format( len(filtered_tweets), search_term)) else: print("skipping as none found from {} total tweets". format(len(tweets))) else: print("No search tweets found for %s." % (search_term)) if (not total_count): logger.info("No search tweets found for -->> %s" % (search_term)) tweets_to_import = False except TwitterRateLimitError as e: logger.exception(e) print(traceback.format_exc()) print(e) # Sleep for 15 minutes - twitter API rate limit print('Sleeping for 15 minutes due to quota. Current time={}'. format(datetime.now())) time.sleep(900) continue except Exception as e: logger.exception(e) print(traceback.format_exc()) print(e) time.sleep(30) continue logger.info("[stats] {} tweets for [{}]".format( total_count, search_term)) self.grandtotal += total_count
def __import_tweets_by_tweet_id(self, tweet_id, fetch_retweet=False, forced=False): print('Importing Tweet for {}'.format(tweet_id)) count = 200 lang = "en" tweets_to_import = True retweets_to_import = fetch_retweet max_id = 0 since_id = 0 total_count = 0 if self.tweetStoreIntf.is_tweet_exists( tweet_id) == True and not forced: print("Skipping as there is already entry for {} tweet ID ".format( tweet_id)) return print('Fetching tweet detail for ID:{}'.format(tweet_id)) while tweets_to_import: try: print("Processing tweet fetch for {}".format(tweet_id)) tweets = self.__process_tweets_fetch(tweet_id) if tweets: tweets_to_import = False print("{} Tweets to be added in DB".format(len(tweets))) self.tweetStoreIntf.store_tweets_info(tweets) total_count += len(tweets) else: print("No tweets found.") tweets_to_import = False except TwitterRateLimitError as e: logger.exception(e) print(traceback.format_exc()) print(e) # Sleep for 15 minutes - twitter API rate limit print('Sleeping for 15 minutes due to quota') time.sleep(900) continue except Exception as e: logger.exception(e) print(traceback.format_exc()) print(e) time.sleep(30) continue while retweets_to_import: try: print("Processing retweet fetch for {}".format(tweet_id)) re_tweets = self.__process_retweets_fetch(tweet_id) if re_tweets: retweets_to_import = False print("{} Retweets to be added in DB".format( len(re_tweets))) self.tweetStoreIntf.store_tweets_info(re_tweets) total_count += len(re_tweets) else: print("No retweets found.") retweets_to_import = False except TwitterRateLimitError as e: logger.exception(e) print(traceback.format_exc()) print(e) # Sleep for 15 minutes - twitter API rate limit print('Sleeping for 15 minutes due to quota') time.sleep(900) continue except Exception as e: logger.exception(e) print(traceback.format_exc()) print(e) time.sleep(30) continue logger.info("[stats] {} tweets for [{}]".format(total_count, tweet_id)) self.grandtotal += total_count