Esempio n. 1
0
def add_word_to_timeline(str_word, words_per_time, timestamp):
    if timestamp is not '':
        str_word = remove_punctuation(str_word)
        if str_word is not None:
            str_word = str_word.lower()
            if (not is_stopword(str_word)) and len(str_word) > 1:
                try:
                    words_per_time[str_word].append(timestamp)
                except KeyError:
                    words_per_time[str_word] = [timestamp]
Esempio n. 2
0
def add_word_to_timeline(str_word, words_per_time, timestamp):
	if timestamp is not '':
		str_word = remove_invalid_characters(str_word)
		if str_word is not None:
			str_word = str_word.lower()
			if (not is_stopword(str_word)) and len(str_word) > 1:
				try:
					words_per_time[str_word].append(timestamp)
				except KeyError:
					words_per_time[str_word] = [timestamp]
Esempio n. 3
0
def handle_common_words(str_word, dict_int_words):
    """ 
	Inserts a word in the dictionary of word counts or increment the 
	count if it already was used. 
	"""
    str_word = str_word.lower()
    str_word = remove_punctuation(str_word)
    if str_word is not '':
        #after the word was cleaned, it may have 0 letters i.e: if the word was ";)"
        if (not is_stopword(str_word)) and len(str_word) > 1:
            dict_int_words[str_word] += 1
Esempio n. 4
0
def handle_common_words(str_word, dict_int_words):
	""" 
	Inserts a word in the dictionary of word counts or increment the 
	count if it already was used. 
	"""
	str_word = str_word.lower()
	str_word = remove_invalid_characters(str_word)
	if str_word is not '':		
		#after the word was cleaned, it may have 0 letters i.e: if the word was ";)"
		if (not is_stopword(str_word)) and len(str_word) > 1:
			dict_int_words[str_word] += 1
Esempio n. 5
0
def clear_text(text):
    tmp = text.translate(punct_translate_tab)
    tmp = tmp.split()
    # splits words and checks for stopwords
    tmp2 = []
    for word in tmp:
        # clears the text from irrelevant puctuation and commom words
        if not (is_stopword(word.lower()) and word not in dual_mean) \
        and not any(w in word for w in word_in)\
        and not any((word.startswith(w) or word == w) for w in word_start) \
        and word.lower() not in word_remove_list and not isNumber(word):
            tmp2.append(word)

    return tmp2