Ejemplo n.º 1
0
    def i_hyphenate_derived(self,aWord):
        """
        You can use this method in classes derived from ExplicitHyphenator.
        It will first split the word using BaseHyphenator,
        then for each "subword" it will call ExplicitHyphenator,
        and only call the derived classes hyph method for the still
        unknown subwords.
        
        TODO: The implementation does not match the docstring
              test: "hohenlimburg.de", "hohenlimburg.de)"
        """
        #print "ExplicitHyphenator.i_hyphenate_derived", aWord
        assert isinstance(aWord, unicode_type)

        # Helper function
        
        sub_hwords = []
        hword = BaseHyphenator.i_hyphenate(self,aWord)
        #print "BaseHyphenator.i_hyphenate returned %r" % hword
        if hword is None:
            hword = HyphenatedWord(aWord,hyphenations=[])
        base_hyph_points = hword.hyphenations
        last_indx = 0
        nr = 0
        for hpnum, hp in enumerate(base_hyph_points):
            if isinstance(hp, int):
                hp = HyphenationPoint(hp, quality=5, sl=SHY)
            subword = hword[last_indx+nr:hp.indx]
            # handle subword
            if SHY in subword:
                sub_hword = self.stripper.apply_stripped(BaseHyphenator.hyph, self, subword)
            else:
                sub_hword = self.stripper.apply_stripped(ExplicitHyphenator.hyph, self, subword)
            if sub_hword is None:
                sub_hword = self.stripper.apply_stripped(self.hyph, self, subword)
            if sub_hword is None:
                sub_hword = HyphenatedWord(subword, hyphenations=[])
            sub_hwords.append(sub_hword)
            # end handle subword
            last_indx = hp.indx
            nr = hp.nr            
        # Now the last subword
        subword = hword[last_indx:]
        # handle subword
        if SHY in subword:
            sub_hword = self.stripper.apply_stripped(BaseHyphenator.hyph, self, subword)
        else:
            sub_hword = self.stripper.apply_stripped(ExplicitHyphenator.hyph, self, subword)
        if sub_hword is None:
            sub_hword = self.stripper.apply_stripped(self.hyph, self, subword)
        if sub_hword is None:
            sub_hword = HyphenatedWord(subword, hyphenations=[])
        sub_hwords.append(sub_hword)
        #end handle subword
        if len(sub_hwords) > 1:
            return HyphenatedWord.join(sub_hwords)
        else:        
            return sub_hwords[0] # Kann auch None sein.
Ejemplo n.º 2
0
 def hyph(self, aWord):
     assert isinstance(aWord, unicode_type)
     hword = HyphenatedWord(aWord, hyphenations=self.zerlegeWort(aWord))
     # None (unknown) kann hier nicht vorkommen, da der
     # Algorithmus musterbasiert funktioniert und die Wörter
     # sowieso nicht "kennt" oder "nicht kennt".
     return hword
Ejemplo n.º 3
0
    def i_hyphenate_derived(self,aWord):
        """
        You can use this method in classes derived from ExplicitHyphenator.
        It will first split the word using BaseHyphenator,
        then for each "subword" it will call ExplicitHyphenator,
        and only call the derived classes hyph method for the still
        unknown subwords.
        
        TODO: The implementation does not match the docstring
              test: "hohenlimburg.de", "hohenlimburg.de)"
        """
        #print "ExplicitHyphenator.i_hyphenate_derived", aWord
        assert isinstance(aWord, unicode)

        # Helper function
        
        sub_hwords = []
        hword = BaseHyphenator.i_hyphenate(self,aWord)
        #print "BaseHyphenator.i_hyphenate returned %r" % hword
        if hword is None:
            hword = HyphenatedWord(aWord,hyphenations=[])
        base_hyph_points = hword.hyphenations
        last_indx = 0
        nr = 0
        for hpnum, hp in enumerate(base_hyph_points):
            if isinstance(hp, int):
                hp = HyphenationPoint(hp, quality=5, sl=SHY)
            subword = hword[last_indx+nr:hp.indx]
            # handle subword
            if SHY in subword:
                sub_hword = self.stripper.apply_stripped(BaseHyphenator.hyph, self, subword)
            else:
                sub_hword = self.stripper.apply_stripped(ExplicitHyphenator.hyph, self, subword)
            if sub_hword is None:
                sub_hword = self.stripper.apply_stripped(self.hyph, self, subword)
            if sub_hword is None:
                sub_hword = HyphenatedWord(subword, hyphenations=[])
            sub_hwords.append(sub_hword)
            # end handle subword
            last_indx = hp.indx
            nr = hp.nr            
        # Now the last subword
        subword = hword[last_indx:]
        # handle subword
        if SHY in subword:
            sub_hword = self.stripper.apply_stripped(BaseHyphenator.hyph, self, subword)
        else:
            sub_hword = self.stripper.apply_stripped(ExplicitHyphenator.hyph, self, subword)
        if sub_hword is None:
            sub_hword = self.stripper.apply_stripped(self.hyph, self, subword)
        if sub_hword is None:
            sub_hword = HyphenatedWord(subword, hyphenations=[])
        sub_hwords.append(sub_hword)
        #end handle subword
        if len(sub_hwords) > 1:
            return HyphenatedWord.join(sub_hwords)
        else:        
            return sub_hwords[0] # Kann auch None sein.
 def hyph(self, word):
     #print "ExplicitHyphenator hyph", word
     lenword = len(word)
     for (lae, L) in self.sonderfaelle:
         if lae == lenword:
             trennung = L.get(word.lower(), None)
             if trennung is not None:
                 hword = HyphenatedWord(word, decodeTrennung(trennung))
                 return hword
             break
     # Wort nicht gefunden
     return None
Ejemplo n.º 5
0
 def hyph(self, word):
     log.debug("DCW hyphenate %r", word)
     assert isinstance(word, unicode)
     loesungen = self.zerlegeWort(word)
     if len(loesungen) > 1:
         # Trennung ist nicht eindeutig, z.B. bei WachsTube oder WachStube.
         #hword.info = ("AMBIGUOUS", loesungen)
         # nimm nur solche Trennstellen, die in allen Lösungen vorkommen,
         # und für die Qualität nimm die schlechteste.
         loesung = []
         loesung0, andere = loesungen[0], loesungen[1:]
         for i, hp in enumerate(loesung0):
             q = hp.quality
             for a in andere:
                 if q:
                     for hp1 in a:
                         if hp1.indx==hp.indx \
                         and hp1.nl==hp.nl and hp1.sl==hp.sl \
                         and hp1.nr==hp.nr and hp1.sr==hp.sr:
                             q = min(q, hp1.quality)
                             break
                     else:
                         # Trennstelle nicht in der anderen Lösung enthalten
                         q = 0
             if q:
                 loesung.append(
                     HyphenationPoint(hp.indx, q, hp.nl, hp.sl, hp.nr,
                                      hp.sr))
         if loesung:
             # Es gibt mindestens eine Trennstelle, die bei allen Varianten
             # enthalten ist, z.b. Wachstu-be.
             pass
             # hword.info = ("HYPHEN_OK", loesung)
         else:
             # Es gibt keine Trennstelle.
             pass
     elif len(loesungen) == 1:
         # Trennung ist eindeutig
         loesung = loesungen[0]
         #hword.info = ("HYPHEN_OK", loesung)
         if not loesung:
             pass  # hword.info = ("NOT_HYPHENATABLE", aWord)
     else:
         # Das Wort ist uns unbekannt.
         return None
     return HyphenatedWord(word, loesung)