Ejemplo n.º 1
0
#   print ( 'mtb=' , mtb , 'with' , mtb.count , 'patterns' )
    if mtb.count == 0:  # check for success
        print('empty table', file=sys.stderr)
        sys.exit(1)  # quit on no table
    print()
    mtb.dump()

    #######################################################################
    # this code will test macro substitution indirectly through a PyElly  #
    # substitutionBuffer, which supports basic tokenization from a stream #
    # of input text; it does NONE of the tokenization associated with     #
    # recognizing token types by character patterns, entity extraction or #
    # vocabulary table lookup                                             #
    #######################################################################

    sb = substitutionBuffer.SubstitutionBuffer(mtb)
    while True:
        sys.stdout.write('\n> ')
        st = sys.stdin.readline()  # get test input
        if len(st) <= 1: break
        ss = st.strip()  # convert to Unicode
        print('RAW =', st.strip())
        print('TEXT=', list(ss), '(' + str(len(ss)) + ')')
        sb.clear()
        sb.append(ss)
        print()

        no = 0
        while True:
            print('+', sb.buffer)
            to = sb.getNext()
Ejemplo n.º 2
0
    def __init__(self, system):
        """
        initialization of processing rules

        arguments:
            system   - root name of PyElly tables to load
        """

        nfail = 0  # error count for reporting

        self.rul = None

        self.tks = None  # token list for output

        self.ptr = Tree()

        try:
            self.rul = ellyDefinition.Grammar(system, True, None)
        except ellyException.TableFailure:
            nfail += 1

        d = self.rul  # language rules

        self.gtb = d.gtb if d != None else None

        mtb = d.mtb if d != None else None
        self.sbu = substitutionBuffer.SubstitutionBuffer(mtb)

        try:
            inflx = self.sbu.stemmer
        except AttributeError:
            inflx = None

        if d != None:
            d.man.suff.infl = inflx  # define root restoration logic

        stb = d.stb if d != None else symbolTable.SymbolTable()

        try:
            voc = ellyDefinition.Vocabulary(system, True, stb)
        except ellyException.TableFailure:
            nfail += 1

        if nfail > 0:
            print('exiting: table generation FAILures', file=sys.stderr)
            sys.exit(1)

        self.vtb = voc.vtb

        self.pnc = punctuationRecognizer.PunctuationRecognizer(stb)

        self.iex = entityExtractor.EntityExtractor(self.ptr,
                                                   stb)  # set up extractors

        self.trs = simpleTransform.SimpleTransform()

        ntabl = d.ntb

        if ntabl != None and ntabl.filled():
            nameRecognition.setUp(ntabl)
            ellyConfiguration.extractors.append([nameRecognition.scan, 'name'])
Ejemplo n.º 3
0
    def __init__(self, system, restore=None):
        """
        initialization

        arguments:
            system   - root name for PyElly tables to load
            restore  - name of session to continue
        """

        nfail = 0  # error count for reporting
        self.rul = None

        self.gundef = []  # record orphan symbols by module
        self.vundef = []  #
        self.pundef = []  #
        self.eundef = []  #

        #       print ( 'EllyBase.__init__()' )
        #       aid = './' + system
        #       try:
        #           print ( 'a rul time=' , _timeModified(aid,rules) )
        #           print ( 'a voc time=' , _timeModified(aid,vocabulary) )
        #       except:
        #           print ( '\n**** a rul or voc time exception' )

        sysf = system + rules
        redefine = not _isSaved(system, rules, _rules)
        #       print ( '0 redefine=' , redefine )
        try:
            self.rul = ellyDefinition.Grammar(system, redefine, release)
        except ellyException.TableFailure:
            nfail += 1
        if nfail == 0:
            self.gundef = self.rul.stb.findUnknown()
            if redefine:
                ellyPickle.save(self.rul, sysf)

#       try:
#           print ( 'b rul time=' , _timeModified(aid,rules) )
#           print ( 'b voc time=' , _timeModified(aid,vocabulary) )
#       except:
#           print ( '\n**** b rul or voc time exception' )

#       print ( '1 redefine=' , redefine )
        if restore != None:
            self.ses = ellyPickle.load(restore + '.' + system + _session)
        else:
            self.ses = ellySession.EllySession()

        s = self.ses  # session info
        d = self.rul  # language rules

        #       print ( '0:' , len(d.stb.ntname) , 'syntactic categories' )

        #       print ( 'base language=' , ellyConfiguration.language )
        mtb = d.mtb if d != None else None
        self.sbu = substitutionBuffer.SubstitutionBuffer(mtb)

        try:
            inflx = self.sbu.stemmer
#           print ( 'inflx=' , inflx )
        except AttributeError:
            inflx = None
#       print ( 'inflx=' , inflx )
        if d != None:
            d.man.suff.infl = inflx  # define root restoration logic

#       print ( '2 redefine=' , redefine )
        if not redefine:
            if not _isSaved(system, vocabulary,
                            _vocabulary) or _notVocabularyToDate(system):
                redefine = True

        stb = d.stb if d != None else symbolTable.SymbolTable()

        #       print ( self.rul.stb )
        #       print ( stb )

        if nfail > 0:
            print('exiting: table generation FAILures', file=sys.stderr)
            sys.exit(1)

#       print ( '1:' , len(stb.ntname) , 'syntactic categories' )

        self.ctx = extendedContext.ExtendedContext(stb, d.gtb.pndx, s.globals,
                                                   d.hry)

        for z in d.gtb.initzn:  # initialize global symbols for parsing
            self.ctx.glbls[z[0]] = z[1]

#       print ( '2:' , len(stb.ntname) , 'syntactic categories' )

        self.pnc = punctuationRecognizer.PunctuationRecognizer(stb)
        self.pundef = stb.findUnknown()

        #       print ( '3:' , len(stb.ntname) , 'syntactic categories' )

        nto = len(stb.ntname)  # for consistency check

        if noParseTree:
            self.ptr = NoParseTree(stb, d.gtb, d.ptb, self.ctx)
        elif ellyConfiguration.treeDisplay:
            self.ptr = parseTreeWithDisplay.ParseTreeWithDisplay(
                stb, d.gtb, d.ptb, self.ctx)
        else:
            self.ptr = parseTree.ParseTree(stb, d.gtb, d.ptb, self.ctx)

        ntabl = d.ntb

        if ntabl != None and ntabl.filled():
            nameRecognition.setUp(ntabl)
            ellyConfiguration.extractors.append([nameRecognition.scan, 'name'])

        self.iex = entityExtractor.EntityExtractor(self.ptr,
                                                   stb)  # set up extractors

        self.eundef = stb.findUnknown()

        if ellyConfiguration.rewriteNumbers:
            self.trs = simpleTransform.SimpleTransform()
        else:
            self.trs = None  # no automatic conversion of written out numbers

#       print ( '4:' , len(stb.ntname) , 'syntactic categories' )

#       print ( '3 redefine=' , redefine )
        if redefine: print('recompiling vocabulary rules')
        try:
            voc = ellyDefinition.Vocabulary(system, redefine, stb)
        except ellyException.TableFailure:
            voc = None
            nfail += 1

        if ellyConfiguration.treeDisplay:
            print("tree display ON")
        else:
            print("tree display OFF")

#       try:
#           print ( 'c rul time=' , _timeModified(aid,rules) )
#           print ( 'c voc time=' , _timeModified(aid,vocabulary) )
#       except:
#           print ( 'rul or voc time exception' )

#       print ( 'vundef=' , self.vundef )
        if voc != None: self.vtb = voc.vtb
        self.vundef = stb.findUnknown()
        #       print ( 'vundef=' , self.vundef )

        ntn = len(
            stb.ntname)  # do consistency check on syntactic category count
        if nto != ntn:
            print(file=sys.stderr)
            print(
                'WARNING: grammar rules should predefine all syntactic categories',
                file=sys.stderr)
            print('         referenced in language definition files',
                  file=sys.stderr)
            for i in range(nto, ntn):
                print('        ',
                      stb.ntname[i].upper(),
                      '=',
                      i,
                      file=sys.stderr)
            print(file=sys.stderr)

        if nfail > 0:
            print('exiting: table generation FAILures', file=sys.stderr)
            sys.exit(1)

        sys.stderr.flush()