def handle_hashtags(str_hashtag, dict_set_hashtags, str_username): """ Adds a hashtag to the hashtags dictionary. Each entry contains a set of users that tweeted the key hashtag. """ str_hashtag = str_hashtag.lower() str_hashtag = remove_invalid_characters(str_hashtag) if str_hashtag is not '': #str_hashtag = str_hashtag.lower() str_hashtag = remove_latin_accents(str_hashtag) try: dict_set_hashtags[str_hashtag].add(str_username) except KeyError: dict_set_hashtags[str_hashtag] = set([str_username])
def get_hashtags_without_accents(str_text): """ Returns all the hashtags in a given string. Hashtags are considered words that start with # and have a length bigger than 1, not considering the # character. """ list_str_words = str_text.split() list_str_hashtags = [] for word in list_str_words: if word.startswith("#") and not(word.endswith("…")): #checks if the word wasn't truncated temp_word = remove_punctuation(word.lower()) if temp_word is not None and len(temp_word) > 1: list_str_hashtags.append("#" + remove_latin_accents(temp_word)) return list_str_hashtags
def get_hashtags_without_accents(str_text): """ Returns all the hashtags in a given string. Hashtags are considered words that start with # and have a length bigger than 1, not considering the # character. """ list_str_words = str_text.split() list_str_hashtags = [] for word in list_str_words: if word.startswith("#") and not ( word.endswith("…")): #checks if the word wasn't truncated temp_word = remove_punctuation(word.lower()) if temp_word is not None and len(temp_word) > 1: list_str_hashtags.append("#" + remove_latin_accents(temp_word)) return list_str_hashtags
def handle_hashtags(str_hashtag, str_username, dict_set_hashtags, dict_set_hashtags_without_accents): """ Adds a hashtag to the hashtags dictionary. Each entry contains a set of users that tweeted the key hashtag. """ str_hashtag = str_hashtag.lower() str_hashtag = remove_punctuation(str_hashtag) str_hashtag_without_accents = remove_latin_accents(str_hashtag) if str_hashtag is not '': try: dict_set_hashtags[str_hashtag].add(str_username) dict_set_hashtags_without_accents[str_hashtag_without_accents].add(str_username) except KeyError: dict_set_hashtags[str_hashtag] = set([str_username]) dict_set_hashtags_without_accents[str_hashtag_without_accents] = set([str_username])
def handle_hashtags(str_hashtag, str_username, dict_set_hashtags, dict_set_hashtags_without_accents): """ Adds a hashtag to the hashtags dictionary. Each entry contains a set of users that tweeted the key hashtag. """ str_hashtag = str_hashtag.lower() str_hashtag = remove_punctuation(str_hashtag) str_hashtag_without_accents = remove_latin_accents(str_hashtag) if str_hashtag is not '': try: dict_set_hashtags[str_hashtag].add(str_username) dict_set_hashtags_without_accents[str_hashtag_without_accents].add( str_username) except KeyError: dict_set_hashtags[str_hashtag] = set([str_username]) dict_set_hashtags_without_accents[ str_hashtag_without_accents] = set([str_username])
def filtered(str_in): from lib_text import remove_latin_accents return remove_latin_accents(str_in.lower())