def __init__(self): self.custom_dictionary = CustomDictionary()
elif o == "--input": input_file = a else: print "Unhandled Option\n" logger.critical("Unhandled Option") usage() sys.exit(2) if not input_file: print "No input file set, use --input option." usage() logger.critical("No input file set") sys.exit(2) try: raw_tex = open(input_file, "r").read() except IOError as e: print "Can't read input file:" + str(e) + "\n" logger.critical("Can't read input file : " + str(e)) sys.exit(2) custom_dictionary = CustomDictionary(no_dictionary_update=no_dictionary_update) haiku_list = find_haiku_in_tex(raw_tex, custom_dictionary) custom_dictionary.save_dict() logger.info("Found the following haiku: " + str(haiku_list)) if len(haiku_list)==0: print "Found no Haiku, sorry :(" else: print "Found the following Haiku:" for haiku in haiku_list: print haiku
class HaikuFinder(object): def __init__(self): self.custom_dictionary = CustomDictionary() def save_dict(self): self.custom_dictionary.save_dict(); def find_haiku_in_text(self, raw_text): haiku_found = [] #Split at double line breaks paragraphs = raw_text.split("\n\n") #ignore single line breaks, tabs - replace with spaces #remove all non-alphanumeric characters/non-punctuation characters paragraphs = [nonalphanumeric_pattern.sub(' ',p).strip() for p in paragraphs] for paragraph in paragraphs: clauses = split_at_punctuation(paragraph) if len(clauses) < 3: continue clauses_with_syllable_count = self.count_syllables_in_clauses(clauses) haiku_found.extend(self.find_haiku_in_clauses(clauses_with_syllable_count)) return haiku_found def find_haiku_in_clauses(self, clauses): haiku_found = [] for i in range(0, len(clauses)-2): if [clauses[i][2], clauses[i+1][2], clauses[i+2][2]] == [5,7,5]: haiku_found.append(''.join(clauses[i][0] + clauses[i][1] + " " \ + clauses[i+1][0] + clauses[i+1][1] + " " \ + clauses[i+2][0] + clauses[i+2][1])) return haiku_found #Returns None if word isn't recognised def count_syllables_in_clause(self, clause): return reduce(sum_syllables, [self.number_of_syllables(word) for word in clause.split()]) def count_syllables_in_clauses(self, clauses_with_punctuation): return [(clause, ending_punctuation, self.count_syllables_in_clause(clause)) for (clause, ending_punctuation) in clauses_with_punctuation] #returns None if word isn't recognised def number_of_syllables(self, word): if word == "": return 0 #If the word is hypenated then use the sum of the word on each side of the dash if "-" in word: return reduce(sum_syllables, [self.number_of_syllables(w) for w in word.split("-")]) is_vowel_sound = lambda syllable: isdigit(syllable[-1]) try: #cmudict actually returns a list of phonetics, so by default choose first length return len([1 for syllable in cmudictionary[word.lower()][0] if is_vowel_sound(syllable) ]) except KeyError as e: num = self.custom_dictionary.number_of_syllables(word) if num is None: return None return num