コード例 #1
0
class Get_Retweets(object):

    def __init__(self, mongo_config=MONGO_CONFIG):
        logging.debug("Establishing MongoDB connection with parameters:- {}".format(mongo_config))
        self.cursor = MongoConnector(mongo_config).__connect__()
        logging.info("Established connection...\n")

    def get_retweets(self):
        '''Function that returns the retweets of each tweet collected 
           and stores it in a JSON format
        '''
        tweets_read = 0
        logging.debug("Checking for retweets...")
        with open(OUTPUT_DIRECTORY+F_NAME,'w') as f_out:
            total_tweets = self.cursor.find().count()
            logging.debug("Total Number of tweets (historical tweets) : {0}".format(total_tweets))
            for tweet in self.cursor.find():
                if tweet.get('retweeted_status'):
                    json.dump(tweet, f_out, default=json_util.default)
                    f_out.write('\n')
                    tweets_read += 1
            logging.debug("Total Number of retweets (historical tweets) : {0}".format(tweets_read))
            logging.debug("Total Number of non-retweets (historical tweets) : {0}".format(total_tweets-tweets_read))
        logging.debug("Succesully checked for retweets")
コード例 #2
0
    'id_str', 'created_at', 'text', 'tweet_status', 'truncated',
    'entities.user_mentions', 'favorited', 'favorite_count', 'retweeted',
    'retweet_count', 'in_reply_to_screen_name', 'in_reply_to_status_id_str',
    'in_reply_to_user_id', 'reply_count', 'is_quote_status', 'quote_count'
]
user_projection = {attribute: 1 for attribute in user_visible_list}
tweet_projection = {attribute: 1 for attribute in tweet_visible_list}

discarded_users_list = list()
counter = 0

for user in unique_users_list:
    tweet_list = list()
    query = {'user.id_str': user, 'lang': 'en'}

    tweet_count = cursor.find(query, user_projection).count()
    if (tweet_count < 10):
        discarded_users_list.append(user)
        print('discarding userid : %s...' % (user))
        counter += 1
        continue

    user_docs = cursor.find_one(query, user_projection)
    tweet_docs = cursor.find(query, tweet_projection).limit(MAX_TWEET_LIMIT)
    tweet_list = list(tweet_docs)
    print("Storing raw info of user with userid : %s" % (user))

    # Discard users whose tweets < 10

    #pprint(list(tweet_docs))
    tweet_dict['doc'] = {'user_info': user_docs['user'], 'tweets': tweet_list}