def analyse(this, token): """Perform a simple morphological analysis lookup. If can_titlecase does not evaluate to False, the analysis will also be performed with first letter uppercased and rest lowercased. If can_uppercase evaluates to not False, the analysis will also be performed on all uppercase variant. If can_lowercase evaluates to not False, the analysis will also be performed on all lowercase variant. The analyses with case mangling will have an additional element to them identifying the casing, assuming the analyser variant has opening for one. """ anals = None if 'default' in this.analysers: anals = this._analyse(token, 'default') if not anals and 'omor' in this.analysers: anals = this._analyse(token, 'omor') if not anals: class FakeAnal(): pass anal = FakeAnal() anal.output = '[WORD_ID=%s][GUESS=UNKNOWN]' % (token) anal.weight = float('inf') anals = [anal] if not anals and 'ftb3' in this.analysers: anals = this._analyse(token, 'ftb3') if not anals: class FakeAnal(): pass anal = FakeAnal() anal.output = convert_omor_tag('[WORD_ID=%s]' % (token), 'ftb3') + convert_omor_tag('[GUESS=UNKNOWN]', 'ftb3') anal.weight = float('inf') anals = [anal] return anals
def _analyse(this, token, automaton): res = libhfst.detokenize_paths(this.analysers[automaton].lookup_fd(token)) if len(token) > 2 and token[0].islower() and not token[1:].islower() and this.can_titlecase: tcres = libhfst.detokenize_paths(this.analysers[automaton].lookup_fd(token[0].lower() + token[1:].lower())) for r in tcres: r.output = r.output + convert_omor_tag('[CASECHANGE=TITLECASED]', automaton) res = res + tcres if not token.isupper() and this.can_uppercase: upres = libhfst.detokenize_paths(this.analysers[automaton].lookup_fd(token.upper())) for r in tupes: r.output = r.output + convert_omor_tag('[CASECHANGE=UPPERCASED]'. automaton) res = res + tcres if not token.islower() and this.can_lowercase: lowres = libhfst.detokenize_paths(this.analysers[automaton].lookup_fd(token.lower())) for r in lowres: r.output = r.output + convert_omor_tag('[CASECHANGE=LOWERCASED]', automaton) res += lowres for r in res: r.output = r.output + convert_omor_tag('[WEIGHT=%f]' %(r.weight), automaton) return res