class Get_Retweets(object): def __init__(self, mongo_config=MONGO_CONFIG): logging.debug("Establishing MongoDB connection with parameters:- {}".format(mongo_config)) self.cursor = MongoConnector(mongo_config).__connect__() logging.info("Established connection...\n") def get_retweets(self): '''Function that returns the retweets of each tweet collected and stores it in a JSON format ''' tweets_read = 0 logging.debug("Checking for retweets...") with open(OUTPUT_DIRECTORY+F_NAME,'w') as f_out: total_tweets = self.cursor.find().count() logging.debug("Total Number of tweets (historical tweets) : {0}".format(total_tweets)) for tweet in self.cursor.find(): if tweet.get('retweeted_status'): json.dump(tweet, f_out, default=json_util.default) f_out.write('\n') tweets_read += 1 logging.debug("Total Number of retweets (historical tweets) : {0}".format(tweets_read)) logging.debug("Total Number of non-retweets (historical tweets) : {0}".format(total_tweets-tweets_read)) logging.debug("Succesully checked for retweets")
'id_str', 'created_at', 'text', 'tweet_status', 'truncated', 'entities.user_mentions', 'favorited', 'favorite_count', 'retweeted', 'retweet_count', 'in_reply_to_screen_name', 'in_reply_to_status_id_str', 'in_reply_to_user_id', 'reply_count', 'is_quote_status', 'quote_count' ] user_projection = {attribute: 1 for attribute in user_visible_list} tweet_projection = {attribute: 1 for attribute in tweet_visible_list} discarded_users_list = list() counter = 0 for user in unique_users_list: tweet_list = list() query = {'user.id_str': user, 'lang': 'en'} tweet_count = cursor.find(query, user_projection).count() if (tweet_count < 10): discarded_users_list.append(user) print('discarding userid : %s...' % (user)) counter += 1 continue user_docs = cursor.find_one(query, user_projection) tweet_docs = cursor.find(query, tweet_projection).limit(MAX_TWEET_LIMIT) tweet_list = list(tweet_docs) print("Storing raw info of user with userid : %s" % (user)) # Discard users whose tweets < 10 #pprint(list(tweet_docs)) tweet_dict['doc'] = {'user_info': user_docs['user'], 'tweets': tweet_list}