Ejemplo n.º 1
0
 def normalize(self):
     if self.text_pron is None:
         return normalize(self.original,
                          strip=False,
                          rm_apostrophe_end=False)
     else:
         return self.text
Ejemplo n.º 2
0
 def add_subword_to_tokens(self, sub_word):
     if self.just_append:
         self.append_to_last_token(sub_word)
     elif self.is_separator(sub_word):
         self.process_separator(sub_word)
     elif is_consonants(normalize(sub_word)):
         self.append_with_miss(sub_word)
     else:
         self.append_with_miss(sub_word)
         self.just_append = True
Ejemplo n.º 3
0
 def elide_inside_words(self, all_next_chunks):
     if self.text == "e-":
         self.weights = [0]  # force elision
     next_chunk = all_next_chunks[0]
     if self.text == "e" and next_chunk.text.startswith("-h"):
         # collect what follows until the next hyphen or end
         flw = next_chunk.original.split('-')[1]
         for future_chunk in all_next_chunks[1:]:
             flw += future_chunk.original.split('-')[0]
             if '-' in future_chunk.original:
                 break
         # TODO: not sure if this reconstruction of the original word is bulletproof...
         if haspirater.lookup(normalize(flw)):
             self.weights = [0]
         else:
             self.weights = [1]
Ejemplo n.º 4
0
def elision(word, original_word, was_cap):
    if word.startswith('y'):
        if word == 'y':
            return [True]
        if was_cap:
            if word == 'york':
                return [False]
            # Grevisse, Le Bon usage, 14th ed., paragraphs 49-50
            # depends on whether it's French or foreign...
            return [True, False]
        else:
            exc = ["york", "yeux", "yeuse", "ypérite"]
            for w in exc:
                if word.startswith(w):
                    return [True]
            # otherwise, no elision
            return [False]
    if word in ["oui", "ouis"]:
        # elision for those words, but beware, no elision for "ouighour"
        # boileau : "Ont l'esprit mieux tourné que n'a l'homme ? Oui sans doute."
        # so elision sometimes
        return [True, False]
    if word.startswith("ouistiti") or word.startswith("ouagadougou"):
        return [False]
    # "un", "une" are non-elided as nouns ("cette une")
    if word in ["un", "une"]:
        return [True, False]
    # "onze" is not elided
    if word == "onze":
        return [False]
    if word.startswith('ulul'):
        return [False]  # ululement, ululer, etc.
    if word.startswith('uhlan'):
        return [False]  # uhlan
    if word[0] == 'h':
        if word == "huis":
            # special case, "huis" is elided but "huis clos" isn't
            return [True, False]
        # look up in haspirater using the original (but normalized) word
        return list(
            map((lambda s: not s),
                haspirater.lookup(normalize(original_word))))
    if is_vowels(word[0]):
        return [True]
    return [False]
Ejemplo n.º 5
0
    def elision_wrap(self, chunk_group):
        first_letter = common.remove_punctuation(
            chunk_group[0].original.strip())
        word = ''.join(chunk.text for chunk in chunk_group)
        original_word = ''.join(chunk.original for chunk in chunk_group)
        self.elision = elision(word, original_word,
                               first_letter == first_letter.upper())

        self.causes_hiatus = False
        if is_vowels(word[0]):
            # "oui, oui" often occurs
            if word not in ["oui", "ouis"]:
                self.causes_hiatus = True
        elif word[0] == 'h':
            result = list(
                map((lambda s: not s),
                    haspirater.lookup(normalize(original_word))))
            if len(result) == 1 and True in result:
                self.causes_hiatus = True
Ejemplo n.º 6
0
    def __init__(self, word, verse):
        self.original = word
        self.text = normalize(word, rm_apostrophe=True)
        self.hemistiche = None
        self.error = None
        self.illegal_str = None
        self.weights = None
        self.had_hyphen = None
        self.text_pron = None
        self.elision = None
        self.no_hiatus = None
        self.causes_hiatus = None
        self.elidable = None
        self.word_end = False

        # self.weight contains the weight attributed to the chunk when fitting
        # all chunks of the verse (function fit in chunks.py) to respect the
        # metric
        self.weight = None

        self.verse = verse
Ejemplo n.º 7
0
    def check(self,
              line,
              output_file=None,
              last=False,
              n_syllables=None,
              offset=0):
        """Check line (wrapper)"""
        self.line_no += 1
        line = line.rstrip()
        if normalize(line) == '' and not last:
            return None

        errors, pattern, verse = self.match(line,
                                            output_file,
                                            last=last,
                                            n_syllables=n_syllables,
                                            offset=offset)
        if len(errors) > 0 and self.reject_errors:
            self.back()
            self.line_no -= 1
        return error.ErrorCollection(self.line_no, line, pattern, verse,
                                     errors)
Ejemplo n.º 8
0
    def match(self,
              line,
              output_file=None,
              last=False,
              n_syllables=None,
              offset=0):
        """Check a line against current pattern, return errors"""

        was_incomplete = last and not self.beyond

        errors = []
        pattern = self.get()

        line_with_case = normalize(line, downcase=False)

        verse = Verse(line, self, pattern)

        if n_syllables:
            verse.print_n_syllables(n_syllables, offset, output_file)
            return errors, pattern, verse

        if last:
            if was_incomplete and not self.options[
                    'incomplete_ok'] and not self.overflowed:
                return [error.ErrorIncompleteTemplate()], pattern, verse
            return [], pattern, verse

        if self.overflowed:
            return [error.ErrorOverflowedTemplate()], pattern, verse

        rhyme_failed = False
        # rhymes
        if pattern.my_id not in self.env:
            # initialize the rhyme
            # last_count is passed later
            self.env[pattern.my_id] = rhyme.Rhyme(verse.normalized,
                                                  pattern.constraint,
                                                  self.mergers, self.options)
        else:
            # update the rhyme
            self.env[pattern.my_id].feed(verse.normalized, pattern.constraint)
            if not self.env[pattern.my_id].satisfied_phon():
                # no more possible rhymes, something went wrong, check phon
                self.env[pattern.my_id].rollback()
                rhyme_failed = True
                errors.append(
                    error.ErrorBadRhymeSound(
                        self.env[pattern.my_id],
                        self.env[pattern.my_id].new_rhyme))

        # occurrences
        if self.options['check_occurrences']:
            if pattern.my_id not in self.occurrence_environment.keys():
                self.occurrence_environment[pattern.my_id] = {}
            last_word = re.split(r'[- ]', line_with_case)[-1]
            if last_word not in self.occurrence_environment[
                    pattern.my_id].keys():
                self.occurrence_environment[pattern.my_id][last_word] = 0
            self.occurrence_environment[pattern.my_id][last_word] += 1
            if self.occurrence_environment[
                    pattern.my_id][last_word] > nature_count(last_word):
                errors.insert(
                    0,
                    error.ErrorMultipleWordOccurrence(
                        last_word,
                        self.occurrence_environment[pattern.my_id][last_word]))

        verse.phon = self.env[pattern.my_id].phon
        verse.parse()

        # now that we have parsed, adjust rhyme to reflect last word length
        # and check eye
        if not rhyme_failed:
            self.env[pattern.my_id].adjust_last_count(verse.get_last_count())
            if not self.env[pattern.my_id].satisfied_eye():
                old_phon = len(self.env[pattern.my_id].phon)
                self.env[pattern.my_id].rollback()
                errors.append(
                    error.ErrorBadRhymeEye(self.env[pattern.my_id],
                                           self.env[pattern.my_id].new_rhyme,
                                           old_phon))

        errors = verse.problems() + errors

        verse.print_possible(output_file)

        # rhyme genres
        # inequality constraint
        # TODO this is simplistic and order-dependent
        if pattern.feminine_id.swapcase() in self.feminine_environment.keys():
            new = {
                'M', 'F'
            } - self.feminine_environment[pattern.feminine_id.swapcase()]
            if len(new) > 0:
                self.feminine_environment[pattern.feminine_id] = new
        if pattern.feminine_id not in self.feminine_environment.keys():
            if pattern.feminine_id == 'M':
                x = {'M'}
            elif pattern.feminine_id == 'F':
                x = {'F'}
            else:
                x = {'M', 'F'}
            self.feminine_environment[pattern.feminine_id] = x
        old = list(self.feminine_environment[pattern.feminine_id])
        new = verse.genders()
        self.feminine_environment[pattern.feminine_id] &= set(new)
        if len(self.feminine_environment[pattern.feminine_id]) == 0:
            errors.append(error.ErrorBadRhymeGenre(old, new))

        return errors, pattern, verse
Ejemplo n.º 9
0
 def testOnlyHyphens(self):
     text = "-----"
     self.assertEqual(common.normalize(text), "")