Example #1
0
    print len(dctn) , 'unique tokens in' , no , "dictionary rules"

#
# unit test
#

if __name__ == '__main__':

    import ellyException
    import ellyDefinition
    import ellyPickle

    nam = sys.argv[1] if len(sys.argv) > 1 else 'test'
    ver = sys.argv[2] if len(sys.argv) > 2 else ''
    lvl = sys.argv[3] if len(sys.argv) > 3 else '3'

    if ver == '':
        try:
            rul = ellyDefinition.Grammar(nam,True,ver)
        except ellyException.TableFailure:
            print >> sys.stderr , 'grammar rules failed to compile'
            sys.exit(1)
    else:
        rul = ellyPickle.load(nam + '.rules.elly.bin')
        if rul == None:
            print >> sys.stderr , 'grammar rules failed to load'
            sys.exit(1)

    dumpAll(rul.stb,rul.gtb,int(lvl))

Example #2
0
        no += k

    print(len(dctn), 'unique tokens in', no, "dictionary rules")
    return noe


#
# unit test
#

if __name__ == '__main__':

    import ellyException
    import ellyDefinition
    import ellyPickle

    nam = sys.argv[1] if len(sys.argv) > 1 else 'test'
    lvl = sys.argv[2] if len(sys.argv) > 2 else '3'
    ver = sys.argv[3] if len(sys.argv) > 3 else ''

    rul = ellyPickle.load(nam + '.rules.elly.bin')
    if rul == None:
        try:
            rul = ellyDefinition.Grammar(nam, True, ver)
        except ellyException.TableFailure:
            print('grammar rules failed to compile', file=sys.stderr)
            sys.exit(1)

    dumpAll(rul.stb, rul.gtb, int(lvl))
Example #3
0
            print '--'

    try:
        if ellyConfiguration.language == 'EN':
            ustem = inflectionStemmerEN.InflectionStemmerEN()
        else:
            ustem = None
    except ellyException.TableFailure:
        print >> sys.stderr , 'inflectional stemming failure'
        sys.exit(1)

    nams = sys.argv[1] if len(sys.argv) > 1 else 'test'
    dfns = nams + source
    limt = sys.argv[2] if len(sys.argv) > 2 else 24

    erul = load(nams + rules)                       # get pickled Elly rules
    if erul == None:
        ustb = symbolTable.SymbolTable()            # if none, make new symbol table
    else:
        ustb = erul.stb                             # else, get existing symbol table

    unkns = ustb.findUnknown()                      # check for new symbols added
    print "new symbols"
    for us in unkns:
        print '[' + us + ']'                        # show every symbol
    print ''

    print 'source=' , dfns
    inp = ellyDefinitionReader.EllyDefinitionReader(dfns)
    if inp.error != None:
        print >> sys.stderr , inp.error
Example #4
0
    def __init__ ( self , system , create , rid ):

        """
        load all definitions from binary or text files

        arguments:
            self     -
            system   - which PyElly application
            create   - whether to create new binary
            rid      - PyElly release ID

        exceptions:
            TableFailure on error
        """

        super(Grammar,self).__init__()

        self.rls = rid
        sysf = system + grammar

        if create:
            print "recompiling grammar rules"

            self.stb = symbolTable.SymbolTable()  # new empty table to fill in

            el = [ ]

            try:
                self.mtb = macroTable.MacroTable(self.inpT(system,'m'))
            except ellyException.TableFailure:
                el.append('macro')
            try:
                self.gtb = grammarTable.GrammarTable(self.stb,self.inpT(system,'g'))
                self.stb.setBaseSymbols()
            except ellyException.TableFailure:
                el.append('grammar')
            try:
                self.ptb = patternTable.PatternTable(self.stb,self.inpT(system,'p'))
            except ellyException.TableFailure:
                el.append('pattern')

            try:
                self.hry = conceptualHierarchy.ConceptualHierarchy(self.inpT(system,'h'))
            except ellyException.TableFailure:
                el.append('concept')

            try:
                self.ntb = nameTable.NameTable(self.inpT(system,'n'))
            except ellyException.TableFailure:
                el.append('name')

            sa = self.inpT(system,'stl')
            pa = self.inpT(system,'ptl')
            try:
                self.man = morphologyAnalyzer.MorphologyAnalyzer(sa,pa)
            except ellyException.TableFailure:
                el.append('morphology')

            if len(el) > 0:
                print >> sys.stderr , 'rule FAILures on' , el
                raise ellyException.TableFailure

            if self.rls != None:
                ellyPickle.save(self,sysf)

        else:
            print "loading saved grammar rules from" , sysf

            gram = ellyPickle.load(sysf)
            if gram == None:
                raise ellyException.TableFailure
            if gram.rls != rid:
                print >> sys.stderr , 'inconsistent PyElly version for saved rules'
                sys.exit(1)
            self.stb = gram.stb  # copy in saved language definition objects
            self.mtb = gram.mtb  #
            self.gtb = gram.gtb  #
            self.ptb = gram.ptb  #
            self.ntb = gram.ntb  #
            self.hry = gram.hry  #
            self.man = gram.man  #
Example #5
0
    def __init__(self, system, restore=None):
        """
        initialization

        arguments:
            system   - root name for PyElly tables to load
            restore  - name of session to continue
        """

        nfail = 0  # error count for reporting
        self.rul = None

        self.gundef = []  # record orphan symbols by module
        self.vundef = []  #
        self.pundef = []  #
        self.eundef = []  #

        #       print ( 'EllyBase.__init__()' )
        #       aid = './' + system
        #       try:
        #           print ( 'a rul time=' , _timeModified(aid,rules) )
        #           print ( 'a voc time=' , _timeModified(aid,vocabulary) )
        #       except:
        #           print ( '\n**** a rul or voc time exception' )

        sysf = system + rules
        redefine = not _isSaved(system, rules, _rules)
        #       print ( '0 redefine=' , redefine )
        try:
            self.rul = ellyDefinition.Grammar(system, redefine, release)
        except ellyException.TableFailure:
            nfail += 1
        if nfail == 0:
            self.gundef = self.rul.stb.findUnknown()
            if redefine:
                ellyPickle.save(self.rul, sysf)

#       try:
#           print ( 'b rul time=' , _timeModified(aid,rules) )
#           print ( 'b voc time=' , _timeModified(aid,vocabulary) )
#       except:
#           print ( '\n**** b rul or voc time exception' )

#       print ( '1 redefine=' , redefine )
        if restore != None:
            self.ses = ellyPickle.load(restore + '.' + system + _session)
        else:
            self.ses = ellySession.EllySession()

        s = self.ses  # session info
        d = self.rul  # language rules

        #       print ( '0:' , len(d.stb.ntname) , 'syntactic categories' )

        #       print ( 'base language=' , ellyConfiguration.language )
        mtb = d.mtb if d != None else None
        self.sbu = substitutionBuffer.SubstitutionBuffer(mtb)

        try:
            inflx = self.sbu.stemmer
#           print ( 'inflx=' , inflx )
        except AttributeError:
            inflx = None
#       print ( 'inflx=' , inflx )
        if d != None:
            d.man.suff.infl = inflx  # define root restoration logic

#       print ( '2 redefine=' , redefine )
        if not redefine:
            if not _isSaved(system, vocabulary,
                            _vocabulary) or _notVocabularyToDate(system):
                redefine = True

        stb = d.stb if d != None else symbolTable.SymbolTable()

        #       print ( self.rul.stb )
        #       print ( stb )

        if nfail > 0:
            print('exiting: table generation FAILures', file=sys.stderr)
            sys.exit(1)

#       print ( '1:' , len(stb.ntname) , 'syntactic categories' )

        self.ctx = extendedContext.ExtendedContext(stb, d.gtb.pndx, s.globals,
                                                   d.hry)

        for z in d.gtb.initzn:  # initialize global symbols for parsing
            self.ctx.glbls[z[0]] = z[1]

#       print ( '2:' , len(stb.ntname) , 'syntactic categories' )

        self.pnc = punctuationRecognizer.PunctuationRecognizer(stb)
        self.pundef = stb.findUnknown()

        #       print ( '3:' , len(stb.ntname) , 'syntactic categories' )

        nto = len(stb.ntname)  # for consistency check

        if noParseTree:
            self.ptr = NoParseTree(stb, d.gtb, d.ptb, self.ctx)
        elif ellyConfiguration.treeDisplay:
            self.ptr = parseTreeWithDisplay.ParseTreeWithDisplay(
                stb, d.gtb, d.ptb, self.ctx)
        else:
            self.ptr = parseTree.ParseTree(stb, d.gtb, d.ptb, self.ctx)

        ntabl = d.ntb

        if ntabl != None and ntabl.filled():
            nameRecognition.setUp(ntabl)
            ellyConfiguration.extractors.append([nameRecognition.scan, 'name'])

        self.iex = entityExtractor.EntityExtractor(self.ptr,
                                                   stb)  # set up extractors

        self.eundef = stb.findUnknown()

        if ellyConfiguration.rewriteNumbers:
            self.trs = simpleTransform.SimpleTransform()
        else:
            self.trs = None  # no automatic conversion of written out numbers

#       print ( '4:' , len(stb.ntname) , 'syntactic categories' )

#       print ( '3 redefine=' , redefine )
        if redefine: print('recompiling vocabulary rules')
        try:
            voc = ellyDefinition.Vocabulary(system, redefine, stb)
        except ellyException.TableFailure:
            voc = None
            nfail += 1

        if ellyConfiguration.treeDisplay:
            print("tree display ON")
        else:
            print("tree display OFF")

#       try:
#           print ( 'c rul time=' , _timeModified(aid,rules) )
#           print ( 'c voc time=' , _timeModified(aid,vocabulary) )
#       except:
#           print ( 'rul or voc time exception' )

#       print ( 'vundef=' , self.vundef )
        if voc != None: self.vtb = voc.vtb
        self.vundef = stb.findUnknown()
        #       print ( 'vundef=' , self.vundef )

        ntn = len(
            stb.ntname)  # do consistency check on syntactic category count
        if nto != ntn:
            print(file=sys.stderr)
            print(
                'WARNING: grammar rules should predefine all syntactic categories',
                file=sys.stderr)
            print('         referenced in language definition files',
                  file=sys.stderr)
            for i in range(nto, ntn):
                print('        ',
                      stb.ntname[i].upper(),
                      '=',
                      i,
                      file=sys.stderr)
            print(file=sys.stderr)

        if nfail > 0:
            print('exiting: table generation FAILures', file=sys.stderr)
            sys.exit(1)

        sys.stderr.flush()
Example #6
0
    def __init__ ( self , system , restore=None ):

        """
        initialization

        arguments:
            system   - root name for PyElly tables to load
            restore  - name of session to continue
        """

        nfail = 0          # error count for reporting
        self.rul = None

        self.gundef = [ ]  # record orphan symbols by module
        self.vundef = [ ]  #
        self.pundef = [ ]  #
        self.eundef = [ ]  #

#       print 'EllyBase.__init__()'
#       aid = './' + system
#       try:
#           print 'a rul time=' , _timeModified(aid,rules)
#           print 'a voc time=' , _timeModified(aid,vocabulary)
#       except:
#           print '\n**** a rul or voc time exception'

        sysf = system + rules
        redefine = not _isSaved(system,rules,_rules)
#       print '0 redefine=' , redefine
        try:
            self.rul = ellyDefinition.Grammar(system,redefine,release)
        except ellyException.TableFailure:
            nfail += 1
        if nfail == 0:
            self.gundef = self.rul.stb.findUnknown()
            if redefine:
                ellyPickle.save(self.rul,sysf)

#       try:
#           print 'b rul time=' , _timeModified(aid,rules)
#           print 'b voc time=' , _timeModified(aid,vocabulary)
#       except:
#           print '\n**** b rul or voc time exception'

#       print '1 redefine=' , redefine
        if restore != None:
            self.ses = ellyPickle.load(restore + '.' + system + _session)
        else:
            self.ses = ellySession.EllySession()

        s = self.ses  # session info
        d = self.rul  # language rules

#       print '0:' , len(d.stb.ntname) , 'syntactic categories'

        mtb = d.mtb if d != None else None
        self.sbu = substitutionBuffer.SubstitutionBuffer(mtb)

        try:
            inflx = self.sbu.stemmer
#           print 'inflx=' , inflx
        except AttributeError:
            inflx = None
#       print 'inflx=' , inflx
        if d != None:
            d.man.suff.infl = inflx   # define root restoration logic

#       print '2 redefine=' , redefine
        if not redefine:
            if not _isSaved(system,vocabulary,_vocabulary) or _notVocabularyToDate(system):
                redefine = True

        stb = d.stb if d != None else symbolTable.SymbolTable()

#       print self.rul.stb
#       print stb

        if nfail > 0:
            print >> sys.stderr , 'exiting: table generation FAILures'
            sys.exit(1)

#       print '1:' , len(stb.ntname) , 'syntactic categories'

        self.ctx = interpretiveContext.InterpretiveContext(stb,d.gtb.pndx,s.globals,d.hry)

        for z in d.gtb.initzn:        # initialize global symbols for parsing
            self.ctx.glbls[z[0]] = z[1]

#       print '2:' , len(stb.ntname) , 'syntactic categories'

        self.pnc = punctuationRecognizer.PunctuationRecognizer(stb)
        self.pundef = stb.findUnknown()

#       print '3:' , len(stb.ntname) , 'syntactic categories'

        nto = len(stb.ntname)         # for consistency check

        if noParseTree:
            self.ptr = NoParseTree(stb,d.gtb,d.ptb,self.ctx)
        elif ellyConfiguration.treeDisplay:
            self.ptr = parseTreeWithDisplay.ParseTreeWithDisplay(stb,d.gtb,d.ptb,self.ctx)
        else:
            self.ptr = parseTree.ParseTree(stb,d.gtb,d.ptb,self.ctx)

        ntabl = d.ntb

        if ntabl != None and ntabl.filled():
            nameRecognition.setUp(ntabl)
            ellyConfiguration.extractors.append( [ nameRecognition.scan , 'name' ] )

        self.iex = entityExtractor.EntityExtractor(self.ptr,stb) # set up extractors

        self.eundef = stb.findUnknown()

        if ellyConfiguration.rewriteNumbers:
            self.trs = simpleTransform.SimpleTransform()
        else:
            self.trs = None           # no automatic conversion of written out numbers

#       print '4:' , len(stb.ntname) , 'syntactic categories'

#       print '3 redefine=' , redefine
        if redefine: print 'recompiling vocabulary rules'
        try:
            voc = ellyDefinition.Vocabulary(system,redefine,stb)
        except ellyException.TableFailure:
            voc = None
            nfail += 1

        if ellyConfiguration.treeDisplay:
            print "tree display ON"
        else:
            print "tree display OFF"

#       try:
#           print 'c rul time=' , _timeModified(aid,rules)
#           print 'c voc time=' , _timeModified(aid,vocabulary)
#       except:
#           print 'rul or voc time exception'

#       print 'vundef=' , self.vundef
        if voc != None: self.vtb = voc.vtb
        self.vundef = stb.findUnknown()
#       print 'vundef=' , self.vundef

        ntn = len(stb.ntname)         # do consistency check on syntactic category count
        if nto != ntn:
            print >> sys.stderr , ''
            print >> sys.stderr , 'WARNING: grammar rules should predefine all syntactic categories'
            print >> sys.stderr , '         referenced in language definition files'
            for i in range(nto,ntn):
                print >> sys.stderr , '        ' , stb.ntname[i].upper() , '=' , i
            print >> sys.stderr , ''

        if nfail > 0:
            print >> sys.stderr , 'exiting: table generation FAILures'
            sys.exit(1)

        sys.stderr.flush()
Example #7
0
    def __init__(self, system, create, rid=None):
        """
        load all definitions from binary or text files

        arguments:
            self     -
            system   - which PyElly application
            create   - whether to create new binary
            rid      - PyElly release ID

        exceptions:
            TableFailure on error
        """

        super(Grammar, self).__init__()

        self.rls = rid
        sysf = system + grammar

        if create:
            print("recompiling grammar rules")

            self.stb = symbolTable.SymbolTable()  # new empty table to fill in

            el = []

            try:
                self.mtb = macroTable.MacroTable(self.inpT(system, 'm'))
            except ellyException.TableFailure:
                el.append('macro')
            try:
                self.gtb = grammarTable.GrammarTable(self.stb,
                                                     self.inpT(system, 'g'))
                self.stb.setBaseSymbols()
            except ellyException.TableFailure:
                el.append('grammar')
            try:
                self.ptb = patternTable.PatternTable(self.stb,
                                                     self.inpT(system, 'p'))
            except ellyException.TableFailure:
                el.append('pattern')

            try:
                self.hry = conceptualHierarchy.ConceptualHierarchy(
                    self.inpT(system, 'h'))
            except ellyException.TableFailure:
                el.append('concept')

            try:
                self.ntb = nameTable.NameTable(self.inpT(system, 'n'))
            except ellyException.TableFailure:
                el.append('name')

            try:
                self.ctb = compoundTable.CompoundTable(self.stb,
                                                       self.inpT(system, 't'))
            except ellyException.TableFailure:
                el.append('compound')

            sa = self.inpT(system, 'stl')
            pa = self.inpT(system, 'ptl')
            try:
                self.man = morphologyAnalyzer.MorphologyAnalyzer(sa, pa)
            except ellyException.TableFailure:
                el.append('morphology')

            if len(el) > 0:
                print('rule FAILures on', el, file=sys.stderr)
                raise ellyException.TableFailure

            if self.rls != None:
                ellyPickle.save(self, sysf)

        else:
            print("loading saved grammar rules from", sysf)

            gram = ellyPickle.load(sysf)
            if gram == None:
                raise ellyException.TableFailure
            if gram.rls != rid:
                print('inconsistent PyElly version for saved rules',
                      file=sys.stderr)
                sys.exit(1)
            self.stb = gram.stb  # copy in saved language definition objects
            self.mtb = gram.mtb  #
            self.gtb = gram.gtb  #
            self.ptb = gram.ptb  #
            self.ctb = gram.ctb  #
            self.ntb = gram.ntb  #
            self.hry = gram.hry  #
            self.man = gram.man  #
Example #8
0
            print()
            for r in rs:  # if found, note each entry
                print('=', str(r.vem))  # show each match
                print(' ', r.nspan, 'chars matched, endings included')
                if r.suffx != '':
                    print('  ending=', '[' + r.suffx + ']')
#               print ( 'generative semantics' )
                showCode(r.vem.gen.logic)
                print()
            print('--')

    nams = arg[0] if len(arg) > 0 else 'test'
    dfns = nams + source
    limt = sys.argv[2] if len(sys.argv) > 2 else 24

    erul = load(nams + grammar)  # get pickled Elly rules
    if erul == None:
        ustb = symbolTable.SymbolTable()  # if none, make new symbol table
    else:
        ustb = erul.stb  # else, get existing symbol table

    unkns = ustb.findUnknown()  # check for new symbols added
    print("new symbols")
    for us in unkns:
        print('[' + us + ']')  # show every symbol
    print()

    print('source=', dfns)
    inp = ellyDefinitionReader.EllyDefinitionReader(dfns)
    if inp.error != None:
        print(inp.error, file=sys.stderr)
Example #9
0
            print ''
            for r in rs:                     # if found, note each entry
                print '=' , unicode(r.vem)   # show each match
                print ' ' , r.nspan , 'chars matched, endings included'
                if r.suffx != '':
                    print '  ending=' , '[' +  r.suffx + ']'
#               print 'generative semantics'
                showCode(r.vem.gen.logic)
                print ''
            print '--'

    nams = arg[0] if len(arg) > 0 else 'test'
    dfns = nams + source
    limt = sys.argv[2] if len(sys.argv) > 2 else 24

    erul = load(nams + rules)                       # get pickled Elly rules
    if erul == None:
        ustb = symbolTable.SymbolTable()            # if none, make new symbol table
    else:
        ustb = erul.stb                             # else, get existing symbol table

    unkns = ustb.findUnknown()                      # check for new symbols added
    print "new symbols"
    for us in unkns:
        print '[' + us + ']'                        # show every symbol
    print ''

    print 'source=' , dfns
    inp = ellyDefinitionReader.EllyDefinitionReader(dfns)
    if inp.error != None:
        print >> sys.stderr , inp.error