Ejemplo n.º 1
0
def handle_hashtags(str_hashtag, dict_set_hashtags, str_username):
	"""
	Adds a hashtag to the hashtags dictionary. Each entry contains a set of 
	users that tweeted the key hashtag.
	"""
	str_hashtag = str_hashtag.lower()
	str_hashtag = remove_invalid_characters(str_hashtag)
	if str_hashtag is not '':
		#str_hashtag = str_hashtag.lower()
		str_hashtag = remove_latin_accents(str_hashtag)
		try:
			dict_set_hashtags[str_hashtag].add(str_username)
		except KeyError:
			dict_set_hashtags[str_hashtag] = set([str_username])
Ejemplo n.º 2
0
def get_hashtags_without_accents(str_text):
	""" 
	Returns all the hashtags in a given string. 
	Hashtags are considered words that start with # and 
	have a length bigger than 1, not considering the # character. 
	"""
	list_str_words = str_text.split()
	list_str_hashtags = []
	for word in list_str_words:
		if word.startswith("#") and not(word.endswith("…")): #checks if the word wasn't truncated
			temp_word = remove_punctuation(word.lower())
			if temp_word is not None and len(temp_word) > 1:
				list_str_hashtags.append("#" + remove_latin_accents(temp_word))
	return list_str_hashtags
def get_hashtags_without_accents(str_text):
    """ 
	Returns all the hashtags in a given string. 
	Hashtags are considered words that start with # and 
	have a length bigger than 1, not considering the # character. 
	"""
    list_str_words = str_text.split()
    list_str_hashtags = []
    for word in list_str_words:
        if word.startswith("#") and not (
                word.endswith("…")):  #checks if the word wasn't truncated
            temp_word = remove_punctuation(word.lower())
            if temp_word is not None and len(temp_word) > 1:
                list_str_hashtags.append("#" + remove_latin_accents(temp_word))
    return list_str_hashtags
Ejemplo n.º 4
0
def handle_hashtags(str_hashtag, str_username, dict_set_hashtags, dict_set_hashtags_without_accents):
	"""
	Adds a hashtag to the hashtags dictionary. Each entry contains a set of 
	users that tweeted the key hashtag.
	"""
	str_hashtag = str_hashtag.lower()
	str_hashtag = remove_punctuation(str_hashtag)
	str_hashtag_without_accents = remove_latin_accents(str_hashtag)
	if str_hashtag is not '':
		try:
			dict_set_hashtags[str_hashtag].add(str_username)
			dict_set_hashtags_without_accents[str_hashtag_without_accents].add(str_username)
		except KeyError:
			dict_set_hashtags[str_hashtag] = set([str_username])
			dict_set_hashtags_without_accents[str_hashtag_without_accents] = set([str_username])
Ejemplo n.º 5
0
def handle_hashtags(str_hashtag, str_username, dict_set_hashtags,
                    dict_set_hashtags_without_accents):
    """
	Adds a hashtag to the hashtags dictionary. Each entry contains a set of 
	users that tweeted the key hashtag.
	"""
    str_hashtag = str_hashtag.lower()
    str_hashtag = remove_punctuation(str_hashtag)
    str_hashtag_without_accents = remove_latin_accents(str_hashtag)
    if str_hashtag is not '':
        try:
            dict_set_hashtags[str_hashtag].add(str_username)
            dict_set_hashtags_without_accents[str_hashtag_without_accents].add(
                str_username)
        except KeyError:
            dict_set_hashtags[str_hashtag] = set([str_username])
            dict_set_hashtags_without_accents[
                str_hashtag_without_accents] = set([str_username])
Ejemplo n.º 6
0
def filtered(str_in):
    from lib_text import remove_latin_accents
    return remove_latin_accents(str_in.lower())