def main():
    print("Starting Tweet fetcher. \nConfig file should be [{}]\n".format(
        argsHandler.env))
    logger.info("[tweets_fetcher] started at {}".format(datetime.now()))
    tweets_fetch_stats = {'processed': 0}
    tweetsFetcher = TweetsFetcher()
    i = 0
    sleeptime = 300
    try:
        while True:
            i = i + 1
            #logger.info("[tweets_fetcher] I-{} at {}".format(i, datetime.now()))
            #print("[tweets_fetcher] I-{} at {}".format(i, datetime.now()))
            start_time = time.time()
            tweetsFetcher.handle_tweets_command()
            if not argsHandler.daemon:
                logger.info("[tweets_fetcher]Exiting the program gracefuly")
                print("[tweets_fetcher]Exiting the program gracefuly")
                break
            elapsed_time = time.time() - start_time
            if (elapsed_time < sleeptime):
                remaining_time = sleeptime - elapsed_time
                logger.info(
                    "[tweets_fetcher] next iterat {} seconds from {}".format(
                        remaining_time, datetime.now()))
                print("[tweets_fetcher] next iterat {} seconds from {}".format(
                    remaining_time, datetime.now()))
                time.sleep(remaining_time)
    except Exception as e:
        logger.exception("[tweets_fetcher]Caught exception {}".format(e))
        print("[tweets_fetcher]Caught exception {}".format(e))
    finally:
        tweets_fetch_stats['processed'] = tweetsFetcher.grandtotal
        logger.info("[tweets_fetcher stats] {}".format(tweets_fetch_stats))
        logger.info("[tweets_fetcher] Ends at {}".format(datetime.now()))
Beispiel #2
0
def make_api_request(url, method='GET', headers={}):
    try:
        response = oauthSessionManager.make_api_request(url, method, headers)
        json_response = response.json()
        return response.headers, json_response
    except Exception as e:
        logger.exception("Error {} while {} API with {} method".format(
            e, url, method))
        raise
    def findDMForUsersInStore(self):
        print("Finding DM between the users")
        find_dm = True
        try_count = 0
        buckets_batch_cnt = 2
        while find_dm:
            try:
                try_count = try_count + 1
                print("Retry count is {}".format(try_count))
                buckets = self.dmcheck_bucket_mgr.assignBuckets(
                    bucketscount=buckets_batch_cnt)
                while buckets:
                    for bucket in buckets:
                        print("Processing {} bucket at  {}Z".format(
                            bucket['bucket_id'], datetime.utcnow()))
                        self.__process_bucket(bucket)
                        print("Storing {} bucket user info at  {}Z".format(
                            bucket['bucket_id'], datetime.utcnow()))
                        self.dmcheck_bucket_mgr.store_processed_data_for_bucket(
                            bucket)
                    buckets = self.dmcheck_bucket_mgr.assignBuckets(
                        bucketscount=buckets_batch_cnt)
                print(
                    "Not Found any bucket for processing. So waiting for more buckets to be added"
                )
                time.sleep(60)
            except TwitterRateLimitError as e:
                logger.exception(e)
                print(traceback.format_exc())
                print(e)
                # Sleep for 15 minutes - twitter API rate limit
                print('Sleeping for 15 minutes due to quota. Current time={}'.
                      format(datetime.now()))
                time.sleep(900)
                continue
            except TwitterUserInvalidOrExpiredToken as e:
                logger.exception(e)
                print(traceback.format_exc())
                print(e)
                print('Exiting since user credential is invalid')
                return

            except TwitterUserAccountLocked as e:
                logger.exception(e)
                print(traceback.format_exc())
                print(e)
                print('Exiting since Account is locked')
                return

            except Exception as e:
                logger.exception(e)
                print(traceback.format_exc())
                print(e)
                time.sleep(900)
                continue
    def RefillBucketPools(self):
        #tested
        print("Refilling buckets")
        while True:
            try:
                print("Handling Dead buckets, if any at {}Z".format(datetime.utcnow()))
                self.bucket_mgr.handle_dead_buckets()
                print("Trying to add more buckets at {}Z".format(datetime.utcnow()))
                self.bucket_mgr.add_buckets()
                print("Sleeping for 15 mins at {}Z".format(datetime.utcnow()))
                time.sleep(900)

            except Exception as e:
                logger.exception(e)
                print(traceback.format_exc())
                print(e)
                time.sleep(30)
                continue
Beispiel #5
0
    def findDMForUsersInStore(self):
        print("Finding DM between the users")
        find_dm = True
        try_count = 0
        while find_dm:
            try:
                try_count = try_count + 1
                print("Retry count is {}".format(try_count))
                users = self.dataStoreIntf.get_all_users_list()
                print("Total number of users are {}".format(len(users)))
                nonexists_users = self.dataStoreIntf.get_nonexists_users_list()
                print("Total number of invalid users are {} and they are {}".format(len(nonexists_users), nonexists_users))
                dmusers = self.dataStoreIntf.get_dm_users_list()
                print("Total number of DM users are {}".format(len(dmusers)))
                nondmusers = self.dataStoreIntf.get_nondm_users_list()
                print("Total number of Non DM users are {}".format(len(nondmusers)))
                users_wkg = sorted(set(users) - set(nonexists_users) - set(dmusers) - set(nondmusers))
                print('Processing with unchecked {} users'.format(len(users_wkg)))
                if(len(users_wkg)):
                    self.__process_dm(users_wkg, 10)
                else:
                    find_dm = False
            except TwitterRateLimitError as e:
                logger.exception(e)
                print(traceback.format_exc())
                print(e)
                # Sleep for 15 minutes - twitter API rate limit
                print('Sleeping for 15 minutes due to quota. Current time={}'.format(datetime.now()))
                time.sleep(900)
                continue

            except Exception as e:
                logger.exception(e)
                print(traceback.format_exc())
                print(e)
                time.sleep(30)
                continue
Beispiel #6
0
    def import_tweets_search(self, search_term, categories_list,
                             sync_with_store, tweet_filter):
        print(
            "Processing Tweets import for search key [{}]".format(search_term))
        frequency = 100
        tweets_to_import = True
        max_id = None
        total_count = 0
        start_time = datetime.now()
        search_term_query = self.tweetStoreIntf.util_get_search_term_query(
            search_term)
        if sync_with_store:
            print("Syncing with store")
            min_id = self.tweetStoreIntf.get_tweets_min_id(search_term_query)
            if (min_id):
                max_id = int(min_id) - 1

        while tweets_to_import:
            try:

                curr_limit = get_reponse_header('x-rate-limit-remaining')
                if (curr_limit and int(curr_limit) <= frequency + 1):
                    print("Sleeping as remaining x-rate-limit-remaining is {}".
                          format(curr_limit))
                    time_diff = (datetime.now() - start_time).seconds
                    remaining_time = (15 * 60) - time_diff
                    sleeptime = remaining_time + 2
                    print(
                        "sleeping for {} seconds to avoid threshold. Current time={}"
                        .format(sleeptime, datetime.now()))
                    if (sleeptime > 0):
                        time.sleep(sleeptime)
                    start_time = datetime.now()
                    print("Continuing after threshold reset")

                tweets = self.__process_tweets_search(search_term=search_term,
                                                      max_id=max_id,
                                                      count=frequency)
                if len(tweets) > 0:
                    tweets_to_import = True
                    plural = "s." if len(tweets) > 1 else "."
                    print("Found " + str(len(tweets)) + " tweet" + plural)
                    total_count += len(tweets)
                    print("Found total {} tweets for {} search\n".format(
                        total_count, search_term))

                    if not max_id:
                        max_id = tweets[0]['id']

                    for tweet in tweets:
                        max_id = min(max_id, tweet['id'])
                    #decrement one less so that same tweet is not sent again in next call.
                    max_id = max_id - 1
                    if tweet_filter:
                        filtered_tweets = self.filterhandler.apply_filters(
                            tweets, tweet_filter)
                    else:
                        filtered_tweets = tweets
                    print("{} Tweets to be stored out of {} tweets".format(
                        len(filtered_tweets), len(tweets)))
                    if (len(filtered_tweets)):
                        self.tweetStoreIntf.store_tweets_info(
                            filtered_tweets, categories_list)
                        print("{} Search tweets added to graph for {}!".format(
                            len(filtered_tweets), search_term))
                    else:
                        print("skipping as none found from {} total tweets".
                              format(len(tweets)))
                else:
                    print("No search tweets found for %s." % (search_term))
                    if (not total_count):
                        logger.info("No search tweets found for -->> %s" %
                                    (search_term))
                    tweets_to_import = False

            except TwitterRateLimitError as e:
                logger.exception(e)
                print(traceback.format_exc())
                print(e)
                # Sleep for 15 minutes - twitter API rate limit
                print('Sleeping for 15 minutes due to quota. Current time={}'.
                      format(datetime.now()))
                time.sleep(900)
                continue

            except Exception as e:
                logger.exception(e)
                print(traceback.format_exc())
                print(e)
                time.sleep(30)
                continue
        logger.info("[stats] {} tweets for [{}]".format(
            total_count, search_term))
        self.grandtotal += total_count
Beispiel #7
0
    def __import_tweets_by_tweet_id(self,
                                    tweet_id,
                                    fetch_retweet=False,
                                    forced=False):
        print('Importing Tweet for {}'.format(tweet_id))
        count = 200
        lang = "en"
        tweets_to_import = True
        retweets_to_import = fetch_retweet
        max_id = 0
        since_id = 0
        total_count = 0

        if self.tweetStoreIntf.is_tweet_exists(
                tweet_id) == True and not forced:
            print("Skipping as there is already entry for {} tweet ID ".format(
                tweet_id))
            return

        print('Fetching tweet detail for ID:{}'.format(tweet_id))
        while tweets_to_import:
            try:
                print("Processing tweet fetch for {}".format(tweet_id))
                tweets = self.__process_tweets_fetch(tweet_id)
                if tweets:
                    tweets_to_import = False
                    print("{} Tweets to be added in DB".format(len(tweets)))
                    self.tweetStoreIntf.store_tweets_info(tweets)
                    total_count += len(tweets)
                else:
                    print("No tweets found.")
                    tweets_to_import = False

            except TwitterRateLimitError as e:
                logger.exception(e)
                print(traceback.format_exc())
                print(e)
                # Sleep for 15 minutes - twitter API rate limit
                print('Sleeping for 15 minutes due to quota')
                time.sleep(900)
                continue

            except Exception as e:
                logger.exception(e)
                print(traceback.format_exc())
                print(e)
                time.sleep(30)
                continue

        while retweets_to_import:
            try:
                print("Processing retweet fetch for {}".format(tweet_id))
                re_tweets = self.__process_retweets_fetch(tweet_id)

                if re_tweets:
                    retweets_to_import = False
                    print("{} Retweets to be added in DB".format(
                        len(re_tweets)))
                    self.tweetStoreIntf.store_tweets_info(re_tweets)
                    total_count += len(re_tweets)

                else:
                    print("No retweets found.")
                    retweets_to_import = False

            except TwitterRateLimitError as e:
                logger.exception(e)
                print(traceback.format_exc())
                print(e)
                # Sleep for 15 minutes - twitter API rate limit
                print('Sleeping for 15 minutes due to quota')
                time.sleep(900)
                continue

            except Exception as e:
                logger.exception(e)
                print(traceback.format_exc())
                print(e)
                time.sleep(30)
                continue
        logger.info("[stats] {} tweets for [{}]".format(total_count, tweet_id))
        self.grandtotal += total_count