コード例 #1
0
ファイル: configFile.py プロジェクト: Glorf/morfeusz
 def _addSectionStart(self, sectionName, lineNum):
     if not sectionName in self.sectionNames:
         raise exceptions.ConfigFileException(
             self.filename, lineNum, 'Invalid section: %s' % sectionName)
     if sectionName in self.section2Lines:
         raise exceptions.ConfigFileException(
             self.filename, lineNum, 'Duplicate section: %s' % sectionName)
     self.section2Lines[sectionName] = []
     self.currSection = sectionName
コード例 #2
0
ファイル: configFile.py プロジェクト: Glorf/morfeusz
 def _addLine(self, line, lineNum):
     line = line.strip()
     if line:
         if self.currSection is None and not line.startswith('#'):
             raise exceptions.ConfigFileException(
                 self.filename, lineNum, 'Text outside of any section')
         self.section2Lines[self.currSection].append((lineNum, line))
コード例 #3
0
 def _createQuantRule1(self, child, quantity, lineNum, line,
                       segtypesHelper):
     if quantity <= 0:
         raise exceptions.ConfigFileException(
             segtypesHelper.filename, lineNum,
             u'%s - invalid quantity: %d' % (line, quantity))
     else:
         return rules.ConcatRule(quantity * [child], lineNum)
コード例 #4
0
 def _createNewTagRule(self, segtype, shiftOrth, lineNum, line,
                       segtypesHelper):
     if not segtypesHelper.hasSegtype(segtype):
         raise exceptions.ConfigFileException(
             segtypesHelper.filename, lineNum,
             u'%s - invalid segment type: %s' % (line, segtype))
     else:
         #             return rules.TagRule(segtype)
         return rules.TagRule(segtypesHelper.getSegnum4Segtype(segtype),
                              shiftOrth, segtype, lineNum)
コード例 #5
0
ファイル: configFile.py プロジェクト: Glorf/morfeusz
 def enumerateLinesInSection(self, sectionName, ignoreComments=True):
     if sectionName not in self.section2Lines:
         raise exceptions.ConfigFileException(
             self.filename, None, u'Missing section: "%s"' % sectionName)
     if not ignoreComments:
         return self.section2Lines[sectionName]
     else:
         return [(linenum, line)
                 for (linenum, line) in self.section2Lines[sectionName]
                 if not line.startswith('#')]
コード例 #6
0
ファイル: separatorChars.py プロジェクト: tgandor/Morfeusz
def parseSeparatorChars(segtypesConfigFile):
    res = []
    for lineNum, line in segtypesConfigFile.enumerateLinesInSection(
            'separator chars', ignoreComments=True):
        try:
            cp = int(line)
            res.append(cp)
        except ValueError as ex:
            raise exceptions.ConfigFileException(segtypesConfigFile.filename,
                                                 lineNum, str(ex))
    return res
コード例 #7
0
 def _getKey2Defs(self, segtypesConfigFile):
     res = {}
     for lineNum, line in segtypesConfigFile.enumerateLinesInSection(
             'options'):
         lineToParse = Word(alphanums + '_') + Suppress('=') + Group(
             OneOrMore(Word(alphanums + '_'))) + LineEnd().suppress()
         try:
             key, defs = lineToParse.parseString(line)
             res[key] = tuple(defs)
         except Exception as ex:
             raise exceptions.ConfigFileException(
                 segtypesConfigFile.filename, lineNum,
                 u'Error in [options] section: %s' % str(ex))
     return res
コード例 #8
0
 def _createQuantRule2(self, child, leftN, rightN, lineNum, line,
                       segtypesHelper):
     if leftN > rightN or (leftN, rightN) == (0, 0):
         raise exceptions.ConfigFileException(
             segtypesHelper.filename, lineNum,
             u'%s - invalid quantities: %d %d' % (line, leftN, rightN))
     elif leftN == 0:
         children = [rules.OptionalRule(child, lineNum)]
         for n in range(2, rightN + 1):
             children.append(
                 self._createQuantRule1(child, n, lineNum, line,
                                        segtypesHelper))
         return rules.OrRule(children, lineNum)
     else:
         children = [
             self._createQuantRule1(child, n, lineNum, line, segtypesHelper)
             for n in range(leftN, rightN + 1)
         ]
         return rules.OrRule(children, lineNum)
コード例 #9
0
 def _validate(self, msg, lineNum, cond):
     if not cond:
         raise exceptions.ConfigFileException(self.filename, lineNum, msg)
コード例 #10
0
    def parse(self, filename):

        segtypesConfigFile = configFile.ConfigFile(filename, [
            'options', 'combinations', 'tags', 'lexemes', 'segment types',
            'separator chars'
        ])
        key2Defs = self._getKey2Defs(segtypesConfigFile)
        segtypesHelper = segtypes.Segtypes(self.tagset, self.namesMap,
                                           self.labelsMap, segtypesConfigFile)
        separatorsList = separatorChars.parseSeparatorChars(segtypesConfigFile) \
            if self.rulesType == RulesParser.PARSE4ANALYZER \
            else []

        res = rulesManager.RulesManager(segtypesHelper, separatorsList)

        def2Key = {}
        for key, defs in key2Defs.iteritems():
            for define in defs:
                def2Key[define] = key

        resultsMap = {}
        for idx, defs in enumerate(itertools.product(*key2Defs.values())):
            key2Def = dict([(def2Key[define], define) for define in defs])
            currRes = []
            resultsMap[self._key2DefAsKey(key2Def)] = currRes
            combinationEnumeratedLines = segtypesConfigFile.enumerateLinesInSection(
                'combinations', ignoreComments=False)
            combinationEnumeratedLines = list(
                preprocessor.preprocess(combinationEnumeratedLines, defs,
                                        filename))
            for rule in self._doParse(combinationEnumeratedLines,
                                      segtypesHelper, filename):
                if rule.allowsEmptySequence():
                    raise exceptions.ConfigFileException(
                        filename, rule.linenum,
                        'This rule allows empty segments sequence to be accepted'
                    )
                rule.validate(filename)
                if self.rulesType == RulesParser.PARSE4GENERATOR:
                    additionalRules = rule.getAdditionalAtomicRules4Generator()
                    for rule in additionalRules:
                        rule.autogenerated = True
                    currRes.extend(additionalRules)
                    rule = rule.transformToGeneratorVersion()
                if not rule.isSinkRule():
                    currRes.append(rule)
#             nfa.debug()

# if self.rulesType == RulesParser.PARSE4GENERATOR:
#     self.doGeneratorMagic(resultsMap)

        self.doShiftOrthMagic(resultsMap, res)

        for idx, defs in enumerate(itertools.product(*key2Defs.values())):
            key2Def = dict([(def2Key[define], define) for define in defs])

            nfa = rulesNFA.RulesNFA()

            for rule in resultsMap[self._key2DefAsKey(key2Def)]:
                rule.addToNFA(nfa)

            try:
                dfa = nfa.convertToDFA()
                res.addDFA(key2Def, dfa)
            except rulesNFA.InconsistentStateWeaknessException as ex:
                raise exceptions.ConfigFileException(
                    filename, ex.weakState.rule.linenum,
                    'conflicts with rule at line %d. Segmentation for some chunks can be both weak and non-weak which is illegal.'
                    % ex.nonWeakState.rule.linenum)
            if idx == 0:
                res.setDefaultOptions(key2Def)

        return res