Beispiel #1
0
    def __init__(self, system):
        """
        initialization of processing rules

        arguments:
            system   - root name of PyElly tables to load
        """

        nfail = 0  # error count for reporting

        self.rul = None

        self.tks = None  # token list for output

        self.ptr = Tree()

        try:
            self.rul = ellyDefinition.Grammar(system, True, None)
        except ellyException.TableFailure:
            nfail += 1

        d = self.rul  # language rules

        self.gtb = d.gtb if d != None else None

        mtb = d.mtb if d != None else None
        self.sbu = substitutionBuffer.SubstitutionBuffer(mtb)

        try:
            inflx = self.sbu.stemmer
        except AttributeError:
            inflx = None

        if d != None:
            d.man.suff.infl = inflx  # define root restoration logic

        stb = d.stb if d != None else symbolTable.SymbolTable()

        try:
            voc = ellyDefinition.Vocabulary(system, True, stb)
        except ellyException.TableFailure:
            nfail += 1

        if nfail > 0:
            print('exiting: table generation FAILures', file=sys.stderr)
            sys.exit(1)

        self.vtb = voc.vtb

        self.pnc = punctuationRecognizer.PunctuationRecognizer(stb)

        self.iex = entityExtractor.EntityExtractor(self.ptr,
                                                   stb)  # set up extractors

        self.trs = simpleTransform.SimpleTransform()

        ntabl = d.ntb

        if ntabl != None and ntabl.filled():
            nameRecognition.setUp(ntabl)
            ellyConfiguration.extractors.append([nameRecognition.scan, 'name'])
Beispiel #2
0
    def __init__ ( self , system , restore=None ):

        """
        initialization

        arguments:
            system   - root name for PyElly tables to load
            restore  - name of session to continue
        """

        nfail = 0          # error count for reporting
        self.rul = None

        self.gundef = [ ]  # record orphan symbols by module
        self.vundef = [ ]  #
        self.pundef = [ ]  #
        self.eundef = [ ]  #

#       print 'EllyBase.__init__()'
#       aid = './' + system
#       try:
#           print 'a rul time=' , _timeModified(aid,rules)
#           print 'a voc time=' , _timeModified(aid,vocabulary)
#       except:
#           print '\n**** a rul or voc time exception'

        sysf = system + rules
        redefine = not _isSaved(system,rules,_rules)
#       print '0 redefine=' , redefine
        try:
            self.rul = ellyDefinition.Grammar(system,redefine,release)
        except ellyException.TableFailure:
            nfail += 1
        if nfail == 0:
            self.gundef = self.rul.stb.findUnknown()
            if redefine:
                ellyPickle.save(self.rul,sysf)

#       try:
#           print 'b rul time=' , _timeModified(aid,rules)
#           print 'b voc time=' , _timeModified(aid,vocabulary)
#       except:
#           print '\n**** b rul or voc time exception'

#       print '1 redefine=' , redefine
        if restore != None:
            self.ses = ellyPickle.load(restore + '.' + system + _session)
        else:
            self.ses = ellySession.EllySession()

        s = self.ses  # session info
        d = self.rul  # language rules

#       print '0:' , len(d.stb.ntname) , 'syntactic categories'

        mtb = d.mtb if d != None else None
        self.sbu = substitutionBuffer.SubstitutionBuffer(mtb)

        try:
            inflx = self.sbu.stemmer
#           print 'inflx=' , inflx
        except AttributeError:
            inflx = None
#       print 'inflx=' , inflx
        if d != None:
            d.man.suff.infl = inflx   # define root restoration logic

#       print '2 redefine=' , redefine
        if not redefine:
            if not _isSaved(system,vocabulary,_vocabulary) or _notVocabularyToDate(system):
                redefine = True

        stb = d.stb if d != None else symbolTable.SymbolTable()

#       print self.rul.stb
#       print stb

        if nfail > 0:
            print >> sys.stderr , 'exiting: table generation FAILures'
            sys.exit(1)

#       print '1:' , len(stb.ntname) , 'syntactic categories'

        self.ctx = interpretiveContext.InterpretiveContext(stb,d.gtb.pndx,s.globals,d.hry)

        for z in d.gtb.initzn:        # initialize global symbols for parsing
            self.ctx.glbls[z[0]] = z[1]

#       print '2:' , len(stb.ntname) , 'syntactic categories'

        self.pnc = punctuationRecognizer.PunctuationRecognizer(stb)
        self.pundef = stb.findUnknown()

#       print '3:' , len(stb.ntname) , 'syntactic categories'

        nto = len(stb.ntname)         # for consistency check

        if noParseTree:
            self.ptr = NoParseTree(stb,d.gtb,d.ptb,self.ctx)
        elif ellyConfiguration.treeDisplay:
            self.ptr = parseTreeWithDisplay.ParseTreeWithDisplay(stb,d.gtb,d.ptb,self.ctx)
        else:
            self.ptr = parseTree.ParseTree(stb,d.gtb,d.ptb,self.ctx)

        ntabl = d.ntb

        if ntabl != None and ntabl.filled():
            nameRecognition.setUp(ntabl)
            ellyConfiguration.extractors.append( [ nameRecognition.scan , 'name' ] )

        self.iex = entityExtractor.EntityExtractor(self.ptr,stb) # set up extractors

        self.eundef = stb.findUnknown()

        if ellyConfiguration.rewriteNumbers:
            self.trs = simpleTransform.SimpleTransform()
        else:
            self.trs = None           # no automatic conversion of written out numbers

#       print '4:' , len(stb.ntname) , 'syntactic categories'

#       print '3 redefine=' , redefine
        if redefine: print 'recompiling vocabulary rules'
        try:
            voc = ellyDefinition.Vocabulary(system,redefine,stb)
        except ellyException.TableFailure:
            voc = None
            nfail += 1

        if ellyConfiguration.treeDisplay:
            print "tree display ON"
        else:
            print "tree display OFF"

#       try:
#           print 'c rul time=' , _timeModified(aid,rules)
#           print 'c voc time=' , _timeModified(aid,vocabulary)
#       except:
#           print 'rul or voc time exception'

#       print 'vundef=' , self.vundef
        if voc != None: self.vtb = voc.vtb
        self.vundef = stb.findUnknown()
#       print 'vundef=' , self.vundef

        ntn = len(stb.ntname)         # do consistency check on syntactic category count
        if nto != ntn:
            print >> sys.stderr , ''
            print >> sys.stderr , 'WARNING: grammar rules should predefine all syntactic categories'
            print >> sys.stderr , '         referenced in language definition files'
            for i in range(nto,ntn):
                print >> sys.stderr , '        ' , stb.ntname[i].upper() , '=' , i
            print >> sys.stderr , ''

        if nfail > 0:
            print >> sys.stderr , 'exiting: table generation FAILures'
            sys.exit(1)

        sys.stderr.flush()
Beispiel #3
0
    def __init__(self, system, restore=None):
        """
        initialization

        arguments:
            system   - root name for PyElly tables to load
            restore  - name of session to continue
        """

        nfail = 0  # error count for reporting
        self.rul = None

        self.gundef = []  # record orphan symbols by module
        self.vundef = []  #
        self.pundef = []  #
        self.eundef = []  #

        #       print ( 'EllyBase.__init__()' )
        #       aid = './' + system
        #       try:
        #           print ( 'a rul time=' , _timeModified(aid,rules) )
        #           print ( 'a voc time=' , _timeModified(aid,vocabulary) )
        #       except:
        #           print ( '\n**** a rul or voc time exception' )

        sysf = system + rules
        redefine = not _isSaved(system, rules, _rules)
        #       print ( '0 redefine=' , redefine )
        try:
            self.rul = ellyDefinition.Grammar(system, redefine, release)
        except ellyException.TableFailure:
            nfail += 1
        if nfail == 0:
            self.gundef = self.rul.stb.findUnknown()
            if redefine:
                ellyPickle.save(self.rul, sysf)

#       try:
#           print ( 'b rul time=' , _timeModified(aid,rules) )
#           print ( 'b voc time=' , _timeModified(aid,vocabulary) )
#       except:
#           print ( '\n**** b rul or voc time exception' )

#       print ( '1 redefine=' , redefine )
        if restore != None:
            self.ses = ellyPickle.load(restore + '.' + system + _session)
        else:
            self.ses = ellySession.EllySession()

        s = self.ses  # session info
        d = self.rul  # language rules

        #       print ( '0:' , len(d.stb.ntname) , 'syntactic categories' )

        #       print ( 'base language=' , ellyConfiguration.language )
        mtb = d.mtb if d != None else None
        self.sbu = substitutionBuffer.SubstitutionBuffer(mtb)

        try:
            inflx = self.sbu.stemmer
#           print ( 'inflx=' , inflx )
        except AttributeError:
            inflx = None
#       print ( 'inflx=' , inflx )
        if d != None:
            d.man.suff.infl = inflx  # define root restoration logic

#       print ( '2 redefine=' , redefine )
        if not redefine:
            if not _isSaved(system, vocabulary,
                            _vocabulary) or _notVocabularyToDate(system):
                redefine = True

        stb = d.stb if d != None else symbolTable.SymbolTable()

        #       print ( self.rul.stb )
        #       print ( stb )

        if nfail > 0:
            print('exiting: table generation FAILures', file=sys.stderr)
            sys.exit(1)

#       print ( '1:' , len(stb.ntname) , 'syntactic categories' )

        self.ctx = extendedContext.ExtendedContext(stb, d.gtb.pndx, s.globals,
                                                   d.hry)

        for z in d.gtb.initzn:  # initialize global symbols for parsing
            self.ctx.glbls[z[0]] = z[1]

#       print ( '2:' , len(stb.ntname) , 'syntactic categories' )

        self.pnc = punctuationRecognizer.PunctuationRecognizer(stb)
        self.pundef = stb.findUnknown()

        #       print ( '3:' , len(stb.ntname) , 'syntactic categories' )

        nto = len(stb.ntname)  # for consistency check

        if noParseTree:
            self.ptr = NoParseTree(stb, d.gtb, d.ptb, self.ctx)
        elif ellyConfiguration.treeDisplay:
            self.ptr = parseTreeWithDisplay.ParseTreeWithDisplay(
                stb, d.gtb, d.ptb, self.ctx)
        else:
            self.ptr = parseTree.ParseTree(stb, d.gtb, d.ptb, self.ctx)

        ntabl = d.ntb

        if ntabl != None and ntabl.filled():
            nameRecognition.setUp(ntabl)
            ellyConfiguration.extractors.append([nameRecognition.scan, 'name'])

        self.iex = entityExtractor.EntityExtractor(self.ptr,
                                                   stb)  # set up extractors

        self.eundef = stb.findUnknown()

        if ellyConfiguration.rewriteNumbers:
            self.trs = simpleTransform.SimpleTransform()
        else:
            self.trs = None  # no automatic conversion of written out numbers

#       print ( '4:' , len(stb.ntname) , 'syntactic categories' )

#       print ( '3 redefine=' , redefine )
        if redefine: print('recompiling vocabulary rules')
        try:
            voc = ellyDefinition.Vocabulary(system, redefine, stb)
        except ellyException.TableFailure:
            voc = None
            nfail += 1

        if ellyConfiguration.treeDisplay:
            print("tree display ON")
        else:
            print("tree display OFF")

#       try:
#           print ( 'c rul time=' , _timeModified(aid,rules) )
#           print ( 'c voc time=' , _timeModified(aid,vocabulary) )
#       except:
#           print ( 'rul or voc time exception' )

#       print ( 'vundef=' , self.vundef )
        if voc != None: self.vtb = voc.vtb
        self.vundef = stb.findUnknown()
        #       print ( 'vundef=' , self.vundef )

        ntn = len(
            stb.ntname)  # do consistency check on syntactic category count
        if nto != ntn:
            print(file=sys.stderr)
            print(
                'WARNING: grammar rules should predefine all syntactic categories',
                file=sys.stderr)
            print('         referenced in language definition files',
                  file=sys.stderr)
            for i in range(nto, ntn):
                print('        ',
                      stb.ntname[i].upper(),
                      '=',
                      i,
                      file=sys.stderr)
            print(file=sys.stderr)

        if nfail > 0:
            print('exiting: table generation FAILures', file=sys.stderr)
            sys.exit(1)

        sys.stderr.flush()
Beispiel #4
0
    def __init__ ( self , system ):

        """
        initialization of processing rules

        arguments:
            system   - root name for PyElly tables to load
        """

        nfail = 0          # error count for reporting

        self.rul = None

        self.tks = None    # token list for output

        self.ptr = Tree()

        try:
            self.rul = ellyDefinition.Grammar(system,True,None)
        except ellyException.TableFailure:
            nfail += 1

        d = self.rul  # language rules

        self.gtb = d.gtb if d != None else None

        mtb = d.mtb if d != None else None
        self.sbu = substitutionBuffer.SubstitutionBuffer(mtb)

        try:
            inflx = self.sbu.stemmer
        except AttributeError:
            inflx = None

        if d != None:
            d.man.suff.infl = inflx   # define root restoration logic

        stb = d.stb if d != None else symbolTable.SymbolTable()

        try:
            voc = ellyDefinition.Vocabulary(system,True,stb,inflx)
        except ellyException.TableFailure:
            nfail += 1

        if nfail > 0:
            print >> sys.stderr , 'exiting: table generation FAILures'
            sys.exit(1)

        self.vtb = voc.vtb

        self.pnc = punctuationRecognizer.PunctuationRecognizer(stb)

        self.iex = entityExtractor.EntityExtractor(self.ptr,stb) # set up extractors

        self.trs = simpleTransform.SimpleTransform()

        ntabl = d.ntb

        if ntabl != None and ntabl.filled():
            nameRecognition.setUp(ntabl)
            ellyConfiguration.extractors.append( [ nameRecognition.scan , 'name' ] )