def _addSectionStart(self, sectionName, lineNum): if not sectionName in self.sectionNames: raise exceptions.ConfigFileException( self.filename, lineNum, 'Invalid section: %s' % sectionName) if sectionName in self.section2Lines: raise exceptions.ConfigFileException( self.filename, lineNum, 'Duplicate section: %s' % sectionName) self.section2Lines[sectionName] = [] self.currSection = sectionName
def _addLine(self, line, lineNum): line = line.strip() if line: if self.currSection is None and not line.startswith('#'): raise exceptions.ConfigFileException( self.filename, lineNum, 'Text outside of any section') self.section2Lines[self.currSection].append((lineNum, line))
def _createQuantRule1(self, child, quantity, lineNum, line, segtypesHelper): if quantity <= 0: raise exceptions.ConfigFileException( segtypesHelper.filename, lineNum, u'%s - invalid quantity: %d' % (line, quantity)) else: return rules.ConcatRule(quantity * [child], lineNum)
def _createNewTagRule(self, segtype, shiftOrth, lineNum, line, segtypesHelper): if not segtypesHelper.hasSegtype(segtype): raise exceptions.ConfigFileException( segtypesHelper.filename, lineNum, u'%s - invalid segment type: %s' % (line, segtype)) else: # return rules.TagRule(segtype) return rules.TagRule(segtypesHelper.getSegnum4Segtype(segtype), shiftOrth, segtype, lineNum)
def enumerateLinesInSection(self, sectionName, ignoreComments=True): if sectionName not in self.section2Lines: raise exceptions.ConfigFileException( self.filename, None, u'Missing section: "%s"' % sectionName) if not ignoreComments: return self.section2Lines[sectionName] else: return [(linenum, line) for (linenum, line) in self.section2Lines[sectionName] if not line.startswith('#')]
def parseSeparatorChars(segtypesConfigFile): res = [] for lineNum, line in segtypesConfigFile.enumerateLinesInSection( 'separator chars', ignoreComments=True): try: cp = int(line) res.append(cp) except ValueError as ex: raise exceptions.ConfigFileException(segtypesConfigFile.filename, lineNum, str(ex)) return res
def _getKey2Defs(self, segtypesConfigFile): res = {} for lineNum, line in segtypesConfigFile.enumerateLinesInSection( 'options'): lineToParse = Word(alphanums + '_') + Suppress('=') + Group( OneOrMore(Word(alphanums + '_'))) + LineEnd().suppress() try: key, defs = lineToParse.parseString(line) res[key] = tuple(defs) except Exception as ex: raise exceptions.ConfigFileException( segtypesConfigFile.filename, lineNum, u'Error in [options] section: %s' % str(ex)) return res
def _createQuantRule2(self, child, leftN, rightN, lineNum, line, segtypesHelper): if leftN > rightN or (leftN, rightN) == (0, 0): raise exceptions.ConfigFileException( segtypesHelper.filename, lineNum, u'%s - invalid quantities: %d %d' % (line, leftN, rightN)) elif leftN == 0: children = [rules.OptionalRule(child, lineNum)] for n in range(2, rightN + 1): children.append( self._createQuantRule1(child, n, lineNum, line, segtypesHelper)) return rules.OrRule(children, lineNum) else: children = [ self._createQuantRule1(child, n, lineNum, line, segtypesHelper) for n in range(leftN, rightN + 1) ] return rules.OrRule(children, lineNum)
def _validate(self, msg, lineNum, cond): if not cond: raise exceptions.ConfigFileException(self.filename, lineNum, msg)
def parse(self, filename): segtypesConfigFile = configFile.ConfigFile(filename, [ 'options', 'combinations', 'tags', 'lexemes', 'segment types', 'separator chars' ]) key2Defs = self._getKey2Defs(segtypesConfigFile) segtypesHelper = segtypes.Segtypes(self.tagset, self.namesMap, self.labelsMap, segtypesConfigFile) separatorsList = separatorChars.parseSeparatorChars(segtypesConfigFile) \ if self.rulesType == RulesParser.PARSE4ANALYZER \ else [] res = rulesManager.RulesManager(segtypesHelper, separatorsList) def2Key = {} for key, defs in key2Defs.iteritems(): for define in defs: def2Key[define] = key resultsMap = {} for idx, defs in enumerate(itertools.product(*key2Defs.values())): key2Def = dict([(def2Key[define], define) for define in defs]) currRes = [] resultsMap[self._key2DefAsKey(key2Def)] = currRes combinationEnumeratedLines = segtypesConfigFile.enumerateLinesInSection( 'combinations', ignoreComments=False) combinationEnumeratedLines = list( preprocessor.preprocess(combinationEnumeratedLines, defs, filename)) for rule in self._doParse(combinationEnumeratedLines, segtypesHelper, filename): if rule.allowsEmptySequence(): raise exceptions.ConfigFileException( filename, rule.linenum, 'This rule allows empty segments sequence to be accepted' ) rule.validate(filename) if self.rulesType == RulesParser.PARSE4GENERATOR: additionalRules = rule.getAdditionalAtomicRules4Generator() for rule in additionalRules: rule.autogenerated = True currRes.extend(additionalRules) rule = rule.transformToGeneratorVersion() if not rule.isSinkRule(): currRes.append(rule) # nfa.debug() # if self.rulesType == RulesParser.PARSE4GENERATOR: # self.doGeneratorMagic(resultsMap) self.doShiftOrthMagic(resultsMap, res) for idx, defs in enumerate(itertools.product(*key2Defs.values())): key2Def = dict([(def2Key[define], define) for define in defs]) nfa = rulesNFA.RulesNFA() for rule in resultsMap[self._key2DefAsKey(key2Def)]: rule.addToNFA(nfa) try: dfa = nfa.convertToDFA() res.addDFA(key2Def, dfa) except rulesNFA.InconsistentStateWeaknessException as ex: raise exceptions.ConfigFileException( filename, ex.weakState.rule.linenum, 'conflicts with rule at line %d. Segmentation for some chunks can be both weak and non-weak which is illegal.' % ex.nonWeakState.rule.linenum) if idx == 0: res.setDefaultOptions(key2Def) return res