Esempio n. 1
0
def main():
    print("Starting Tweet fetcher. \nConfig file should be [{}]\n".format(
        argsHandler.env))
    logger.info("[tweets_fetcher] started at {}".format(datetime.now()))
    tweets_fetch_stats = {'processed': 0}
    tweetsFetcher = TweetsFetcher()
    i = 0
    sleeptime = 300
    try:
        while True:
            i = i + 1
            #logger.info("[tweets_fetcher] I-{} at {}".format(i, datetime.now()))
            #print("[tweets_fetcher] I-{} at {}".format(i, datetime.now()))
            start_time = time.time()
            tweetsFetcher.handle_tweets_command()
            if not argsHandler.daemon:
                logger.info("[tweets_fetcher]Exiting the program gracefuly")
                print("[tweets_fetcher]Exiting the program gracefuly")
                break
            elapsed_time = time.time() - start_time
            if (elapsed_time < sleeptime):
                remaining_time = sleeptime - elapsed_time
                logger.info(
                    "[tweets_fetcher] next iterat {} seconds from {}".format(
                        remaining_time, datetime.now()))
                print("[tweets_fetcher] next iterat {} seconds from {}".format(
                    remaining_time, datetime.now()))
                time.sleep(remaining_time)
    except Exception as e:
        logger.exception("[tweets_fetcher]Caught exception {}".format(e))
        print("[tweets_fetcher]Caught exception {}".format(e))
    finally:
        tweets_fetch_stats['processed'] = tweetsFetcher.grandtotal
        logger.info("[tweets_fetcher stats] {}".format(tweets_fetch_stats))
        logger.info("[tweets_fetcher] Ends at {}".format(datetime.now()))
Esempio n. 2
0
def main():
    print("Starting service at {}".format(datetime.now()))
    print(
        "Starting DM lookup with {}/{} client. \nConfig file should be [config/{}]\n"
        .format(os.environ["TWITTER_ID"], os.environ["TWITTER_USER"], '.env'))
    stats_tracker = {'processed': 0}
    userRelations = UserRelations(
        client_id=os.environ["CLIENT_ID"],
        client_screen_name=os.environ["CLIENT_SCREEN_NAME"],
        source_id=os.environ["TWITTER_ID"],
        source_screen_name=os.environ["TWITTER_USER"])
    retry = True
    sleepseconds = 30
    while retry:
        try:
            userRelations.register_as_dmcheck_client()
            userRelations.findDMForUsersInStore()
        except ServiceNotReady as e:
            print("caught exception {}".format(e))
            print("Retrying after {} seconds as service is not ready".format(
                sleepseconds))
            time.sleep(sleepseconds)
        except Exception as e:
            retry = False
    stats_tracker['processed'] = userRelations.grandtotal
    logger.info("[DM stats] {}".format(stats_tracker))
    print("Exiting program at {}".format(datetime.now()))
Esempio n. 3
0
def main():
    print("Starting service at {}".format(datetime.now()))
    print(
        "Starting Following lookup with {}/{} client. \nConfig file should be [config/{}]\n"
        .format(os.environ["CLIENT_ID"], os.environ["CLIENT_SCREEN_NAME"],
                '.env'))
    stats_tracker = {'processed': 0}
    followingFetcher = FollowingFetcher(
        client_id=os.environ["CLIENT_ID"],
        client_screen_name=os.environ["CLIENT_SCREEN_NAME"])
    retry = True
    sleepseconds = 30
    while retry:
        try:
            followingFetcher.register_as_followingcheck_client()
            followingFetcher.findFollowingForUsersInStore()
        except ServiceNotReady as e:
            print("caught exception {}".format(e))
            print("Retrying after {} seconds as service is not ready".format(
                sleepseconds))
            time.sleep(sleepseconds)
        except Exception as e:
            retry = False
    stats_tracker['processed'] = followingFetcher.grandtotal
    logger.info("[Following stats] {}".format(stats_tracker))
    print("Exiting program at {}".format(datetime.now()))
Esempio n. 4
0
def main():
    print(
        "Starting Tweet fetcher. \nConfig file should be [config/{}]\n".format(
            config_file_name))
    tweets_fetch_stats = {'processed': 0}
    tweetsFetcher = TweetsFetcher()
    try:
        tweetsFetcher.handle_tweets_command()
        #tweetsFetcher.import_tweets_search('RT @actormanojjoshi: काग़ज़ मिले की')
    finally:
        tweets_fetch_stats['processed'] = tweetsFetcher.grandtotal
        logger.info("[tweets_fetcher stats] {}".format(tweets_fetch_stats))
Esempio n. 5
0
def main():
    print("Starting DM lookup. \nConfig file should be [config/{}]\n".format(config_file_name))
    stats_tracker = {'processed': 0}
    userRelations = UserRelations(os.environ["TWITTER_USER"])
    try:
        userRelations.findDMForUsersInStore()
    except Exception as e:
        pass
    finally:
        stats_tracker['processed'] = userRelations.grandtotal
        logger.info("[DM stats] {}".format(stats_tracker))
        print("Exiting program")
Esempio n. 6
0
 def __check_follower_user_detail(self, users):
     #tested
     print("Finding follower users for {} users".format(len(users)))
     count = 0
     self.twitter_query_start_time = datetime.now()
     for user in users:
         print("Fetching follower info for {} user".format(user))
         followers_user = self.__process_follower_fetch(user)
         count = count + 1
         user['followers'] = followers_user
     print("Processed {} out of {} users for follower Check".format(count, len(users)))
     if count != len(users):
         logger.info("Unable to fetch fetch status for {} users".format(len(users)-count))
Esempio n. 7
0
 def __check_following_user_detail(self, users):
     print("Finding following users for {} users".format(len(users)))
     count = 0
     for user in users:
         print("Fetching following info for {} user".format(user))
         followings_user = self.__process_following_fetch(user)
         count = count + 1
         user['followings'] = followings_user
     print("Processed {} out of {} users for following Check".format(
         count, len(users)))
     if count != len(users):
         logger.info("Unable to fetch fetch status for {} users".format(
             len(users) - count))
Esempio n. 8
0
    def __check_dm_status(self, users):
        print("Finding relations between {} and {} users".format(
            self.source_screen_name, len(users)))
        friendships = []
        count = 0
        start_time = datetime.now()
        remaining_threshold = 0
        for user in users:
            try:
                curr_limit = get_reponse_header('x-rate-limit-remaining')
                if (curr_limit and int(curr_limit) <= remaining_threshold):
                    print("Sleeping as remaining x-rate-limit-remaining is {}".
                          format(curr_limit))
                    time_diff = (datetime.now() - start_time).seconds
                    remaining_time = (15 * 60) - time_diff
                    sleeptime = remaining_time + 2
                    print(
                        "sleeping for {} seconds to avoid threshold. Current time={}"
                        .format(sleeptime, datetime.now()))
                    if (sleeptime > 0):
                        time.sleep(sleeptime)
                    start_time = datetime.now()
                    print("Continuing after threshold reset")

                print("Fetching friendship info from {} to {} user".format(
                    self.source_screen_name, user))
                friendship = self.__process_friendship_fetch(user)
            except TwitterUserNotFoundError as unf:
                logger.warning(
                    "Twitter couldn't found user {} and so ignoring".format(
                        user))
                user['candm'] = "UNKNOWN"
                self.grandtotal += 1
                continue
            count = count + 1
            status = friendship['relationship']['source']['can_dm']
            if status:
                user['candm'] = "DM"
            else:
                user['candm'] = "NON_DM"
        print("Processed {} out of {} users for DM Check".format(
            count, len(users)))
        if count != len(users):
            logger.info("Unable to fetch DM status for {} users".format(
                len(users) - count))
 def __retweeted_status_screen_name(self, tweet, filter_param):
     status = False
     desired_screen_name = filter_param
     retweet_user_name = tweet['user']['screen_name']
     if 'retweeted_status' in tweet and 'user' in tweet['retweeted_status']:
         orig_user = tweet['retweeted_status']['user']
         if 'screen_name' in orig_user:
             orig_user_screen_name = orig_user['screen_name']
             if retweet_user_name == orig_user_screen_name:
                 logger.info(
                     "skipping {} tweet as it is self retweet".format(
                         tweet['id']))
             elif orig_user_screen_name == desired_screen_name:
                 status = True
         else:
             logger.error("Couldn't find screen name for {} Tweet".fromat(
                 tweet.id))
     return status
Esempio n. 10
0
    def get_nonexists_users_list(self):
        print("Finding Non existing users from file")
        in_file = self.outfile
        json_data = []
        try:
            with open(in_file) as f:
                json_data = [json.loads(line) for line in f]
        except IOError as e:
            print("Info: Couldn't read file:({0}): {1}".format(
                e.errno, e.strerror))
            logger.info("File {} I/O error({}): {}".format(
                in_file, e.errno, e.strerror))

        users = [
            user['target_screen_name'] for user in json_data
            if 'exists' in user and user['exists'] == 0
        ]
        logger.debug("Got {} Non existant users".format(len(users)))
        return users
Esempio n. 11
0
    def import_tweets_search(self, search_term, categories_list,
                             sync_with_store, tweet_filter):
        print(
            "Processing Tweets import for search key [{}]".format(search_term))
        frequency = 100
        tweets_to_import = True
        max_id = None
        total_count = 0
        start_time = datetime.now()
        search_term_query = self.tweetStoreIntf.util_get_search_term_query(
            search_term)
        if sync_with_store:
            print("Syncing with store")
            min_id = self.tweetStoreIntf.get_tweets_min_id(search_term_query)
            if (min_id):
                max_id = int(min_id) - 1

        while tweets_to_import:
            try:

                curr_limit = get_reponse_header('x-rate-limit-remaining')
                if (curr_limit and int(curr_limit) <= frequency + 1):
                    print("Sleeping as remaining x-rate-limit-remaining is {}".
                          format(curr_limit))
                    time_diff = (datetime.now() - start_time).seconds
                    remaining_time = (15 * 60) - time_diff
                    sleeptime = remaining_time + 2
                    print(
                        "sleeping for {} seconds to avoid threshold. Current time={}"
                        .format(sleeptime, datetime.now()))
                    if (sleeptime > 0):
                        time.sleep(sleeptime)
                    start_time = datetime.now()
                    print("Continuing after threshold reset")

                tweets = self.__process_tweets_search(search_term=search_term,
                                                      max_id=max_id,
                                                      count=frequency)
                if len(tweets) > 0:
                    tweets_to_import = True
                    plural = "s." if len(tweets) > 1 else "."
                    print("Found " + str(len(tweets)) + " tweet" + plural)
                    total_count += len(tweets)
                    print("Found total {} tweets for {} search\n".format(
                        total_count, search_term))

                    if not max_id:
                        max_id = tweets[0]['id']

                    for tweet in tweets:
                        max_id = min(max_id, tweet['id'])
                    #decrement one less so that same tweet is not sent again in next call.
                    max_id = max_id - 1
                    if tweet_filter:
                        filtered_tweets = self.filterhandler.apply_filters(
                            tweets, tweet_filter)
                    else:
                        filtered_tweets = tweets
                    print("{} Tweets to be stored out of {} tweets".format(
                        len(filtered_tweets), len(tweets)))
                    if (len(filtered_tweets)):
                        self.tweetStoreIntf.store_tweets_info(
                            filtered_tweets, categories_list)
                        print("{} Search tweets added to graph for {}!".format(
                            len(filtered_tweets), search_term))
                    else:
                        print("skipping as none found from {} total tweets".
                              format(len(tweets)))
                else:
                    print("No search tweets found for %s." % (search_term))
                    if (not total_count):
                        logger.info("No search tweets found for -->> %s" %
                                    (search_term))
                    tweets_to_import = False

            except TwitterRateLimitError as e:
                logger.exception(e)
                print(traceback.format_exc())
                print(e)
                # Sleep for 15 minutes - twitter API rate limit
                print('Sleeping for 15 minutes due to quota. Current time={}'.
                      format(datetime.now()))
                time.sleep(900)
                continue

            except Exception as e:
                logger.exception(e)
                print(traceback.format_exc())
                print(e)
                time.sleep(30)
                continue
        logger.info("[stats] {} tweets for [{}]".format(
            total_count, search_term))
        self.grandtotal += total_count
Esempio n. 12
0
    def __import_tweets_by_tweet_id(self,
                                    tweet_id,
                                    fetch_retweet=False,
                                    forced=False):
        print('Importing Tweet for {}'.format(tweet_id))
        count = 200
        lang = "en"
        tweets_to_import = True
        retweets_to_import = fetch_retweet
        max_id = 0
        since_id = 0
        total_count = 0

        if self.tweetStoreIntf.is_tweet_exists(
                tweet_id) == True and not forced:
            print("Skipping as there is already entry for {} tweet ID ".format(
                tweet_id))
            return

        print('Fetching tweet detail for ID:{}'.format(tweet_id))
        while tweets_to_import:
            try:
                print("Processing tweet fetch for {}".format(tweet_id))
                tweets = self.__process_tweets_fetch(tweet_id)
                if tweets:
                    tweets_to_import = False
                    print("{} Tweets to be added in DB".format(len(tweets)))
                    self.tweetStoreIntf.store_tweets_info(tweets)
                    total_count += len(tweets)
                else:
                    print("No tweets found.")
                    tweets_to_import = False

            except TwitterRateLimitError as e:
                logger.exception(e)
                print(traceback.format_exc())
                print(e)
                # Sleep for 15 minutes - twitter API rate limit
                print('Sleeping for 15 minutes due to quota')
                time.sleep(900)
                continue

            except Exception as e:
                logger.exception(e)
                print(traceback.format_exc())
                print(e)
                time.sleep(30)
                continue

        while retweets_to_import:
            try:
                print("Processing retweet fetch for {}".format(tweet_id))
                re_tweets = self.__process_retweets_fetch(tweet_id)

                if re_tweets:
                    retweets_to_import = False
                    print("{} Retweets to be added in DB".format(
                        len(re_tweets)))
                    self.tweetStoreIntf.store_tweets_info(re_tweets)
                    total_count += len(re_tweets)

                else:
                    print("No retweets found.")
                    retweets_to_import = False

            except TwitterRateLimitError as e:
                logger.exception(e)
                print(traceback.format_exc())
                print(e)
                # Sleep for 15 minutes - twitter API rate limit
                print('Sleeping for 15 minutes due to quota')
                time.sleep(900)
                continue

            except Exception as e:
                logger.exception(e)
                print(traceback.format_exc())
                print(e)
                time.sleep(30)
                continue
        logger.info("[stats] {} tweets for [{}]".format(total_count, tweet_id))
        self.grandtotal += total_count