def __init__(self, agglutinative): MARKER_DOT = u'.' FlexAutomaton.__init__(self, empty=MARKER_DOT) self.paradigms = {} self.pMetrics = {} self.prefixes = {} # inflection -> set(subsequences). ATTENTION: this parameter is set from the outside! self.nullFlex = {} self.lens = {} self.agglutinative = agglutinative
def add(self, flex): """ Add all the inflection options to the data structure. As we propose that some dots missing, all the options of dot positions should be added. """ patterns = [u'%s', u'%s.', u'.%s', u'.%s.'] for ptrn in patterns: flexFormatted = self._replace.sub(self.marker, ptrn % flex) # in a pattern flex, # add missing dots and replace them all with a number sign self.contents[flexFormatted] = flex # inside the keys, a number sign only # is used as an empty transition marker. However, values look like a normal flex. FlexAutomaton.add(self, flexFormatted)
def parse(self, token): """ For a token given, get a list of options (inflection, stem) possible. """ options = FlexAutomaton.parse(self, token) if not self.agglutinative: return options toRemove = set() for flex, stem in options: flexPrefixes = self.prefixes.get(flex, None) if flexPrefixes: toRemove |= flexPrefixes options = [(flex, stem) for flex, stem in options if flex not in toRemove] return options
def __init__(self): PREFIX_AUTOMATON_MARKER = u'#' self.contents = {} FlexAutomaton.__init__(self, empty=PREFIX_AUTOMATON_MARKER)