for text in texts: splitted_text = "POEMSTART " text = text_to_word_sequence(text, filters=text_filter, lower=True, split=" ") for word in text: if word == "\n": word = "LINEEND" splitted_text += " " + word continue for syllable in hyphenate_word(word): splitted_text += " " + syllable splitted_text += " POEMEND" splitted_texts.append(splitted_text) progressbar.count() print("") # Create an initial tokenizer text_tokenizer = Tokenizer(filters=text_filter, lower=True, split=" ", char_level=False) text_tokenizer.fit_on_texts(splitted_texts) # Generate a list of words that occur more than n times # Generate a list of words that occur less than n times less_occurring_words = [] more_occurring_words = [] progressbar = ProgressBar(len(text_tokenizer.word_counts.items())) for word in text_tokenizer.word_counts.items():