def __get_hashtags_and_metadata(self): script_parent_dir = pathlib.Path(__file__).parents[1] config_fn = script_parent_dir.joinpath('config.json') configuration = get_config(config_fn) keywords, metadata = parse_metadata(configuration['metadata']) hashtags = [] for keyword in keywords: if '@' not in keyword: # The following hashtags are excluded because they are proper names of # movements and people if keyword not in ['HonorColorado', 'ColoradoAñetete', 'tuma']: hashtags.append(keyword.lower()) return hashtags, metadata
def fix_tweets_with_empty_flags(): dbm = DBManager('tweets') script_parent_dir = pathlib.Path(__file__).parents[1] conf_file = script_parent_dir.joinpath('config.json') configuration = get_config(conf_file) keyword, k_metadata = parse_metadata(configuration['metadata']) tweets_with_empty_flags = dbm.search({'flag.keyword': {'$size': 0}, 'relevante': 1}) for tweet in tweets_with_empty_flags: logging.info('Updating flags of tweet {0}'.format(tweet['tweet_obj']['id_str'])) flag, headers = create_flag(k_metadata) entities = get_entities_tweet(tweet['tweet_obj']) flag = add_values_to_flags(flag, entities, k_metadata) dbm.update_record({'tweet_obj.id_str': tweet['tweet_obj']['id_str']}, flag) #if __name__ == '__main__': # fix_tweets_with_empty_flags()
def do_tweet_collection(): script_parent_dir = pathlib.Path(__file__).parents[0] conf_file = script_parent_dir.joinpath('config.json') configuration = get_config(conf_file) credentials = {'key': configuration['twitter']['consumer_key'], 'secret': configuration['twitter']['consumer_secret']} keyword, k_metadata = parse_metadata(configuration['metadata']) dbm = DBManager('tweets') tm = TwitterAPIManager(credentials, dbm) for current_keyword, keyword_row in zip(keyword, k_metadata): logging.info('Searching tweets for %s' % current_keyword) if '@' in current_keyword: tm.search_tweets(configuration['tweets_qry'], current_keyword, 'user', k_metadata) else: tm.search_tweets(configuration['tweets_qry'], current_keyword, 'hashtag', k_metadata) logging.info('Evaluating the relevance of the new tweets...') te = TweetEvaluator() te.identify_relevant_tweets()
def fix_value_of_candidatura(self): script_parent_dir = pathlib.Path(__file__).parents[1] config_fn = script_parent_dir.joinpath('config.json') configuration = get_config(config_fn) keyword, k_metadata = parse_metadata(configuration['metadata']) interested_data = [] # keep metadata that refer to candidacies for kword, kmetada in zip(keyword, k_metadata): if kmetada['candidatura'] != '': kmetada.update({'keyword': kword}) interested_data.append(kmetada) query = {'candidatura': ''} # select tweets without candidacy s_objs = self.__dbm.search(query) num_fixed_tweets = 0 # iterate over tweets without candidacy and fix those # whose text mention a candidate or have hashtags # related to a candidacy for s_obj in s_objs: party = s_obj['partido_politico'] movement = s_obj['movimiento'] tweet = s_obj['tweet_obj'] relevant_data = [] candidacy = '' # keep metadata related to the political party # (and movement) of the tweet (s_obj) for ida in interested_data: if ida['partido_politico'] == party: if movement != '': if ida['movimiento'] == movement: relevant_data.append(ida) else: relevant_data.append(ida) if len(relevant_data) > 0: # extract relevant information of the tweet. hashtags and mentions if # the tweet obj has these entities otherwise the text of the tweet if 'retweeted_status' in tweet.keys(): original_tweet = tweet['retweeted_status'] else: original_tweet = tweet if 'entities' in original_tweet.keys(): t_user_mentions = self.__get_screen_names( original_tweet['entities']['user_mentions']) t_hashtags = self.__get_hashtags( original_tweet['entities']['hashtags']) # see if the interested keywords are part of the tweet hashtags or mentions for rd in relevant_data: if rd['keyword'] in t_user_mentions: candidacy = rd['candidatura'] break else: if rd['keyword'] in t_hashtags: candidacy = rd['candidatura'] break else: if 'full_text' in original_tweet.keys(): t_text = tweet['full_text'] else: t_text = tweet['text'] # see if the interested keywords are present in the text for rd in relevant_data: if rd['keyword'] in t_text: candidacy = rd['candidatura'] break # fix candidacy key if candidacy: s_obj['candidatura'] = candidacy num_fixed_tweets += 1 self.__dbm.save_record(s_obj) return num_fixed_tweets