コード例 #1
0
ファイル: omorfi.py プロジェクト: jiemakel/omorfi-old
    def analyse(this, token):
        """Perform a simple morphological analysis lookup.

        If can_titlecase does not evaluate to False,
        the analysis will also be performed with first letter
        uppercased and rest lowercased.
        If can_uppercase evaluates to not False,
        the analysis will also be performed on all uppercase variant.
        If can_lowercase evaluates to not False,
        the analysis will also be performed on all lowercase variant.
        
        The analyses with case mangling will have an additional element to them
        identifying the casing, assuming the analyser variant has opening for
        one.
        """
        anals = None
        if 'default' in this.analysers:
            anals = this._analyse(token, 'default')
        if not anals and 'omor' in this.analysers:
            anals = this._analyse(token, 'omor')
            if not anals:
                class FakeAnal():
                    pass
                anal = FakeAnal()
                anal.output = '[WORD_ID=%s][GUESS=UNKNOWN]' % (token)
                anal.weight = float('inf')
                anals = [anal]
        if not anals and 'ftb3' in this.analysers:
            anals = this._analyse(token, 'ftb3')
            if not anals:
                class FakeAnal():
                    pass
                anal = FakeAnal()
                anal.output = convert_omor_tag('[WORD_ID=%s]' % (token), 
                        'ftb3') + convert_omor_tag('[GUESS=UNKNOWN]', 'ftb3')
                anal.weight = float('inf')
                anals = [anal]
        return anals
コード例 #2
0
ファイル: omorfi.py プロジェクト: jiemakel/omorfi-old
 def _analyse(this, token, automaton):
     res = libhfst.detokenize_paths(this.analysers[automaton].lookup_fd(token))
     if len(token) > 2 and token[0].islower() and not token[1:].islower() and this.can_titlecase:
         tcres = libhfst.detokenize_paths(this.analysers[automaton].lookup_fd(token[0].lower() + token[1:].lower()))
         for r in tcres:
             r.output = r.output + convert_omor_tag('[CASECHANGE=TITLECASED]',
                     automaton)
         res = res + tcres
     if not token.isupper() and this.can_uppercase:
         upres = libhfst.detokenize_paths(this.analysers[automaton].lookup_fd(token.upper()))
         for r in tupes:
             r.output = r.output + convert_omor_tag('[CASECHANGE=UPPERCASED]'.
                     automaton)
         res = res + tcres
     if not token.islower() and this.can_lowercase:
         lowres = libhfst.detokenize_paths(this.analysers[automaton].lookup_fd(token.lower()))
         for r in lowres:
             r.output = r.output + convert_omor_tag('[CASECHANGE=LOWERCASED]',
                     automaton)
         res += lowres
     for r in res:
         r.output = r.output + convert_omor_tag('[WEIGHT=%f]' %(r.weight),
                 automaton)
     return res