class HaikuFinder(object): def __init__(self): self.custom_dictionary = CustomDictionary() def save_dict(self): self.custom_dictionary.save_dict(); def find_haiku_in_text(self, raw_text): haiku_found = [] #Split at double line breaks paragraphs = raw_text.split("\n\n") #ignore single line breaks, tabs - replace with spaces #remove all non-alphanumeric characters/non-punctuation characters paragraphs = [nonalphanumeric_pattern.sub(' ',p).strip() for p in paragraphs] for paragraph in paragraphs: clauses = split_at_punctuation(paragraph) if len(clauses) < 3: continue clauses_with_syllable_count = self.count_syllables_in_clauses(clauses) haiku_found.extend(self.find_haiku_in_clauses(clauses_with_syllable_count)) return haiku_found def find_haiku_in_clauses(self, clauses): haiku_found = [] for i in range(0, len(clauses)-2): if [clauses[i][2], clauses[i+1][2], clauses[i+2][2]] == [5,7,5]: haiku_found.append(''.join(clauses[i][0] + clauses[i][1] + " " \ + clauses[i+1][0] + clauses[i+1][1] + " " \ + clauses[i+2][0] + clauses[i+2][1])) return haiku_found #Returns None if word isn't recognised def count_syllables_in_clause(self, clause): return reduce(sum_syllables, [self.number_of_syllables(word) for word in clause.split()]) def count_syllables_in_clauses(self, clauses_with_punctuation): return [(clause, ending_punctuation, self.count_syllables_in_clause(clause)) for (clause, ending_punctuation) in clauses_with_punctuation] #returns None if word isn't recognised def number_of_syllables(self, word): if word == "": return 0 #If the word is hypenated then use the sum of the word on each side of the dash if "-" in word: return reduce(sum_syllables, [self.number_of_syllables(w) for w in word.split("-")]) is_vowel_sound = lambda syllable: isdigit(syllable[-1]) try: #cmudict actually returns a list of phonetics, so by default choose first length return len([1 for syllable in cmudictionary[word.lower()][0] if is_vowel_sound(syllable) ]) except KeyError as e: num = self.custom_dictionary.number_of_syllables(word) if num is None: return None return num