def normalize(self): if self.text_pron is None: return normalize(self.original, strip=False, rm_apostrophe_end=False) else: return self.text
def add_subword_to_tokens(self, sub_word): if self.just_append: self.append_to_last_token(sub_word) elif self.is_separator(sub_word): self.process_separator(sub_word) elif is_consonants(normalize(sub_word)): self.append_with_miss(sub_word) else: self.append_with_miss(sub_word) self.just_append = True
def elide_inside_words(self, all_next_chunks): if self.text == "e-": self.weights = [0] # force elision next_chunk = all_next_chunks[0] if self.text == "e" and next_chunk.text.startswith("-h"): # collect what follows until the next hyphen or end flw = next_chunk.original.split('-')[1] for future_chunk in all_next_chunks[1:]: flw += future_chunk.original.split('-')[0] if '-' in future_chunk.original: break # TODO: not sure if this reconstruction of the original word is bulletproof... if haspirater.lookup(normalize(flw)): self.weights = [0] else: self.weights = [1]
def elision(word, original_word, was_cap): if word.startswith('y'): if word == 'y': return [True] if was_cap: if word == 'york': return [False] # Grevisse, Le Bon usage, 14th ed., paragraphs 49-50 # depends on whether it's French or foreign... return [True, False] else: exc = ["york", "yeux", "yeuse", "ypérite"] for w in exc: if word.startswith(w): return [True] # otherwise, no elision return [False] if word in ["oui", "ouis"]: # elision for those words, but beware, no elision for "ouighour" # boileau : "Ont l'esprit mieux tourné que n'a l'homme ? Oui sans doute." # so elision sometimes return [True, False] if word.startswith("ouistiti") or word.startswith("ouagadougou"): return [False] # "un", "une" are non-elided as nouns ("cette une") if word in ["un", "une"]: return [True, False] # "onze" is not elided if word == "onze": return [False] if word.startswith('ulul'): return [False] # ululement, ululer, etc. if word.startswith('uhlan'): return [False] # uhlan if word[0] == 'h': if word == "huis": # special case, "huis" is elided but "huis clos" isn't return [True, False] # look up in haspirater using the original (but normalized) word return list( map((lambda s: not s), haspirater.lookup(normalize(original_word)))) if is_vowels(word[0]): return [True] return [False]
def elision_wrap(self, chunk_group): first_letter = common.remove_punctuation( chunk_group[0].original.strip()) word = ''.join(chunk.text for chunk in chunk_group) original_word = ''.join(chunk.original for chunk in chunk_group) self.elision = elision(word, original_word, first_letter == first_letter.upper()) self.causes_hiatus = False if is_vowels(word[0]): # "oui, oui" often occurs if word not in ["oui", "ouis"]: self.causes_hiatus = True elif word[0] == 'h': result = list( map((lambda s: not s), haspirater.lookup(normalize(original_word)))) if len(result) == 1 and True in result: self.causes_hiatus = True
def __init__(self, word, verse): self.original = word self.text = normalize(word, rm_apostrophe=True) self.hemistiche = None self.error = None self.illegal_str = None self.weights = None self.had_hyphen = None self.text_pron = None self.elision = None self.no_hiatus = None self.causes_hiatus = None self.elidable = None self.word_end = False # self.weight contains the weight attributed to the chunk when fitting # all chunks of the verse (function fit in chunks.py) to respect the # metric self.weight = None self.verse = verse
def check(self, line, output_file=None, last=False, n_syllables=None, offset=0): """Check line (wrapper)""" self.line_no += 1 line = line.rstrip() if normalize(line) == '' and not last: return None errors, pattern, verse = self.match(line, output_file, last=last, n_syllables=n_syllables, offset=offset) if len(errors) > 0 and self.reject_errors: self.back() self.line_no -= 1 return error.ErrorCollection(self.line_no, line, pattern, verse, errors)
def match(self, line, output_file=None, last=False, n_syllables=None, offset=0): """Check a line against current pattern, return errors""" was_incomplete = last and not self.beyond errors = [] pattern = self.get() line_with_case = normalize(line, downcase=False) verse = Verse(line, self, pattern) if n_syllables: verse.print_n_syllables(n_syllables, offset, output_file) return errors, pattern, verse if last: if was_incomplete and not self.options[ 'incomplete_ok'] and not self.overflowed: return [error.ErrorIncompleteTemplate()], pattern, verse return [], pattern, verse if self.overflowed: return [error.ErrorOverflowedTemplate()], pattern, verse rhyme_failed = False # rhymes if pattern.my_id not in self.env: # initialize the rhyme # last_count is passed later self.env[pattern.my_id] = rhyme.Rhyme(verse.normalized, pattern.constraint, self.mergers, self.options) else: # update the rhyme self.env[pattern.my_id].feed(verse.normalized, pattern.constraint) if not self.env[pattern.my_id].satisfied_phon(): # no more possible rhymes, something went wrong, check phon self.env[pattern.my_id].rollback() rhyme_failed = True errors.append( error.ErrorBadRhymeSound( self.env[pattern.my_id], self.env[pattern.my_id].new_rhyme)) # occurrences if self.options['check_occurrences']: if pattern.my_id not in self.occurrence_environment.keys(): self.occurrence_environment[pattern.my_id] = {} last_word = re.split(r'[- ]', line_with_case)[-1] if last_word not in self.occurrence_environment[ pattern.my_id].keys(): self.occurrence_environment[pattern.my_id][last_word] = 0 self.occurrence_environment[pattern.my_id][last_word] += 1 if self.occurrence_environment[ pattern.my_id][last_word] > nature_count(last_word): errors.insert( 0, error.ErrorMultipleWordOccurrence( last_word, self.occurrence_environment[pattern.my_id][last_word])) verse.phon = self.env[pattern.my_id].phon verse.parse() # now that we have parsed, adjust rhyme to reflect last word length # and check eye if not rhyme_failed: self.env[pattern.my_id].adjust_last_count(verse.get_last_count()) if not self.env[pattern.my_id].satisfied_eye(): old_phon = len(self.env[pattern.my_id].phon) self.env[pattern.my_id].rollback() errors.append( error.ErrorBadRhymeEye(self.env[pattern.my_id], self.env[pattern.my_id].new_rhyme, old_phon)) errors = verse.problems() + errors verse.print_possible(output_file) # rhyme genres # inequality constraint # TODO this is simplistic and order-dependent if pattern.feminine_id.swapcase() in self.feminine_environment.keys(): new = { 'M', 'F' } - self.feminine_environment[pattern.feminine_id.swapcase()] if len(new) > 0: self.feminine_environment[pattern.feminine_id] = new if pattern.feminine_id not in self.feminine_environment.keys(): if pattern.feminine_id == 'M': x = {'M'} elif pattern.feminine_id == 'F': x = {'F'} else: x = {'M', 'F'} self.feminine_environment[pattern.feminine_id] = x old = list(self.feminine_environment[pattern.feminine_id]) new = verse.genders() self.feminine_environment[pattern.feminine_id] &= set(new) if len(self.feminine_environment[pattern.feminine_id]) == 0: errors.append(error.ErrorBadRhymeGenre(old, new)) return errors, pattern, verse
def testOnlyHyphens(self): text = "-----" self.assertEqual(common.normalize(text), "")