コード例 #1
0
    def __init__(self, agglutinative):

        MARKER_DOT = u'.'
        FlexAutomaton.__init__(self, empty=MARKER_DOT)

        self.paradigms = {}
        self.pMetrics = {}
        self.prefixes = {}  # inflection -> set(subsequences). ATTENTION: this parameter is set from the outside!
        self.nullFlex = {}

        self.lens = {}

        self.agglutinative = agglutinative
コード例 #2
0
    def add(self, flex):
        """
        Add all the inflection options to the data structure. As we propose that some dots missing,
        all the options of dot positions should be added.

        """
        patterns = [u'%s', u'%s.', u'.%s', u'.%s.']
        for ptrn in patterns:
            flexFormatted = self._replace.sub(self.marker, ptrn % flex)  # in a pattern flex,
            # add missing dots and replace them all with a number sign
            self.contents[flexFormatted] = flex  # inside the keys, a number sign only
            # is used as an empty transition marker. However, values look like a normal flex.
            FlexAutomaton.add(self, flexFormatted)
コード例 #3
0
    def parse(self, token):
        """
        For a token given, get a list of options (inflection, stem) possible.

        """
        options = FlexAutomaton.parse(self, token)
        if not self.agglutinative:
            return options
        toRemove = set()
        for flex, stem in options:
            flexPrefixes = self.prefixes.get(flex, None)
            if flexPrefixes:
                toRemove |= flexPrefixes
        options = [(flex, stem) for flex, stem in options if flex not in toRemove]
        return options
コード例 #4
0
    def __init__(self):
        PREFIX_AUTOMATON_MARKER = u'#'
        self.contents = {}

        FlexAutomaton.__init__(self, empty=PREFIX_AUTOMATON_MARKER)