def add_word_to_timeline(str_word, words_per_time, timestamp): if timestamp is not '': str_word = remove_punctuation(str_word) if str_word is not None: str_word = str_word.lower() if (not is_stopword(str_word)) and len(str_word) > 1: try: words_per_time[str_word].append(timestamp) except KeyError: words_per_time[str_word] = [timestamp]
def add_word_to_timeline(str_word, words_per_time, timestamp): if timestamp is not '': str_word = remove_invalid_characters(str_word) if str_word is not None: str_word = str_word.lower() if (not is_stopword(str_word)) and len(str_word) > 1: try: words_per_time[str_word].append(timestamp) except KeyError: words_per_time[str_word] = [timestamp]
def handle_common_words(str_word, dict_int_words): """ Inserts a word in the dictionary of word counts or increment the count if it already was used. """ str_word = str_word.lower() str_word = remove_punctuation(str_word) if str_word is not '': #after the word was cleaned, it may have 0 letters i.e: if the word was ";)" if (not is_stopword(str_word)) and len(str_word) > 1: dict_int_words[str_word] += 1
def handle_common_words(str_word, dict_int_words): """ Inserts a word in the dictionary of word counts or increment the count if it already was used. """ str_word = str_word.lower() str_word = remove_invalid_characters(str_word) if str_word is not '': #after the word was cleaned, it may have 0 letters i.e: if the word was ";)" if (not is_stopword(str_word)) and len(str_word) > 1: dict_int_words[str_word] += 1
def clear_text(text): tmp = text.translate(punct_translate_tab) tmp = tmp.split() # splits words and checks for stopwords tmp2 = [] for word in tmp: # clears the text from irrelevant puctuation and commom words if not (is_stopword(word.lower()) and word not in dual_mean) \ and not any(w in word for w in word_in)\ and not any((word.startswith(w) or word == w) for w in word_start) \ and word.lower() not in word_remove_list and not isNumber(word): tmp2.append(word) return tmp2