return ru # # unit test # if __name__ == '__main__': import ellyConfiguration import dumpEllyGrammar import punctuationRecognizer filn = sys.argv[1] if len(sys.argv) > 1 else 'test' sym = symbolTable.SymbolTable() # print sym base = ellyConfiguration.baseSource + '/' inp = ellyDefinitionReader.EllyDefinitionReader(base + filn + '.g.elly') if inp.error != None: print inp.error sys.exit(1) print 'reading', '[' + filn + ']', len( inp.buffer), 'lines of rule definitions' try: gtb = GrammarTable(sym, inp) pnc = punctuationRecognizer.PunctuationRecognizer(sym) # print gtb dumpEllyGrammar.dumpAll(sym, gtb, 5) except ellyException.TableFailure: print >> sys.stderr, 'exiting'
def __init__(self, system): """ initialization of processing rules arguments: system - root name of PyElly tables to load """ nfail = 0 # error count for reporting self.rul = None self.tks = None # token list for output self.ptr = Tree() try: self.rul = ellyDefinition.Grammar(system, True, None) except ellyException.TableFailure: nfail += 1 d = self.rul # language rules self.gtb = d.gtb if d != None else None mtb = d.mtb if d != None else None self.sbu = substitutionBuffer.SubstitutionBuffer(mtb) try: inflx = self.sbu.stemmer except AttributeError: inflx = None if d != None: d.man.suff.infl = inflx # define root restoration logic stb = d.stb if d != None else symbolTable.SymbolTable() try: voc = ellyDefinition.Vocabulary(system, True, stb) except ellyException.TableFailure: nfail += 1 if nfail > 0: print('exiting: table generation FAILures', file=sys.stderr) sys.exit(1) self.vtb = voc.vtb self.pnc = punctuationRecognizer.PunctuationRecognizer(stb) self.iex = entityExtractor.EntityExtractor(self.ptr, stb) # set up extractors self.trs = simpleTransform.SimpleTransform() ntabl = d.ntb if ntabl != None and ntabl.filled(): nameRecognition.setUp(ntabl) ellyConfiguration.extractors.append([nameRecognition.scan, 'name'])
def __init__(self, system, restore=None): """ initialization arguments: system - root name for PyElly tables to load restore - name of session to continue """ nfail = 0 # error count for reporting self.rul = None self.gundef = [] # record orphan symbols by module self.vundef = [] # self.pundef = [] # self.eundef = [] # # print ( 'EllyBase.__init__()' ) # aid = './' + system # try: # print ( 'a rul time=' , _timeModified(aid,rules) ) # print ( 'a voc time=' , _timeModified(aid,vocabulary) ) # except: # print ( '\n**** a rul or voc time exception' ) sysf = system + rules redefine = not _isSaved(system, rules, _rules) # print ( '0 redefine=' , redefine ) try: self.rul = ellyDefinition.Grammar(system, redefine, release) except ellyException.TableFailure: nfail += 1 if nfail == 0: self.gundef = self.rul.stb.findUnknown() if redefine: ellyPickle.save(self.rul, sysf) # try: # print ( 'b rul time=' , _timeModified(aid,rules) ) # print ( 'b voc time=' , _timeModified(aid,vocabulary) ) # except: # print ( '\n**** b rul or voc time exception' ) # print ( '1 redefine=' , redefine ) if restore != None: self.ses = ellyPickle.load(restore + '.' + system + _session) else: self.ses = ellySession.EllySession() s = self.ses # session info d = self.rul # language rules # print ( '0:' , len(d.stb.ntname) , 'syntactic categories' ) # print ( 'base language=' , ellyConfiguration.language ) mtb = d.mtb if d != None else None self.sbu = substitutionBuffer.SubstitutionBuffer(mtb) try: inflx = self.sbu.stemmer # print ( 'inflx=' , inflx ) except AttributeError: inflx = None # print ( 'inflx=' , inflx ) if d != None: d.man.suff.infl = inflx # define root restoration logic # print ( '2 redefine=' , redefine ) if not redefine: if not _isSaved(system, vocabulary, _vocabulary) or _notVocabularyToDate(system): redefine = True stb = d.stb if d != None else symbolTable.SymbolTable() # print ( self.rul.stb ) # print ( stb ) if nfail > 0: print('exiting: table generation FAILures', file=sys.stderr) sys.exit(1) # print ( '1:' , len(stb.ntname) , 'syntactic categories' ) self.ctx = extendedContext.ExtendedContext(stb, d.gtb.pndx, s.globals, d.hry) for z in d.gtb.initzn: # initialize global symbols for parsing self.ctx.glbls[z[0]] = z[1] # print ( '2:' , len(stb.ntname) , 'syntactic categories' ) self.pnc = punctuationRecognizer.PunctuationRecognizer(stb) self.pundef = stb.findUnknown() # print ( '3:' , len(stb.ntname) , 'syntactic categories' ) nto = len(stb.ntname) # for consistency check if noParseTree: self.ptr = NoParseTree(stb, d.gtb, d.ptb, self.ctx) elif ellyConfiguration.treeDisplay: self.ptr = parseTreeWithDisplay.ParseTreeWithDisplay( stb, d.gtb, d.ptb, self.ctx) else: self.ptr = parseTree.ParseTree(stb, d.gtb, d.ptb, self.ctx) ntabl = d.ntb if ntabl != None and ntabl.filled(): nameRecognition.setUp(ntabl) ellyConfiguration.extractors.append([nameRecognition.scan, 'name']) self.iex = entityExtractor.EntityExtractor(self.ptr, stb) # set up extractors self.eundef = stb.findUnknown() if ellyConfiguration.rewriteNumbers: self.trs = simpleTransform.SimpleTransform() else: self.trs = None # no automatic conversion of written out numbers # print ( '4:' , len(stb.ntname) , 'syntactic categories' ) # print ( '3 redefine=' , redefine ) if redefine: print('recompiling vocabulary rules') try: voc = ellyDefinition.Vocabulary(system, redefine, stb) except ellyException.TableFailure: voc = None nfail += 1 if ellyConfiguration.treeDisplay: print("tree display ON") else: print("tree display OFF") # try: # print ( 'c rul time=' , _timeModified(aid,rules) ) # print ( 'c voc time=' , _timeModified(aid,vocabulary) ) # except: # print ( 'rul or voc time exception' ) # print ( 'vundef=' , self.vundef ) if voc != None: self.vtb = voc.vtb self.vundef = stb.findUnknown() # print ( 'vundef=' , self.vundef ) ntn = len( stb.ntname) # do consistency check on syntactic category count if nto != ntn: print(file=sys.stderr) print( 'WARNING: grammar rules should predefine all syntactic categories', file=sys.stderr) print(' referenced in language definition files', file=sys.stderr) for i in range(nto, ntn): print(' ', stb.ntname[i].upper(), '=', i, file=sys.stderr) print(file=sys.stderr) if nfail > 0: print('exiting: table generation FAILures', file=sys.stderr) sys.exit(1) sys.stderr.flush()