Beispiel #1
0
    def _store ( self , defs , nowarn ):

        """
        put macro substitutions into table with indexing by first char of pattern

        arguments:
            self   -
            defs   - list of macro definition as strings
            nowarn - whether to turn warnings off

        exceptions:
            TableFailure on error
        """

#       print defs.linecount() , 'lines'
        while True:
            l = defs.readline()               # next macro rule
#           print "rule input=" , l
            if len(l) == 0: break             # EOF check
            dl = definitionLine.DefinitionLine(l,False)
            left = dl.left                    # pattern to be matched
            tail = dl.tail                    # transformation to apply to match
#           print 'dl.left=' , left
            if left == None or tail == None:
                self._err(l=l)                # report missing part of rule
                continue
            if left.find(' ') >= 0:           # pattern side of macro rule
                ms = 'pattern in macro contains spaces'
                self._err(s=ms,l=l,d=1)       # cannot contain any space chars
                continue

            lefts = list(left)
#           print 'left=' , lefts
            nspm = ellyWildcard.numSpaces(lefts)
            pat = ellyWildcard.convert(left)  # get pattern with encoded wildcards
            if pat == None:
                self._err('bad wildcards',l)
                continue
#           print 'pat=' , ellyWildcard.deconvert(pat) , 'len=' , len(pat)
#           print 'pat=' , list(pat)
            pe = pat[-1]
            if not pe in [ ellyWildcard.cALL , ellyWildcard.cEND , ellyWildcard.cSPC ]:
                pat += ellyWildcard.cEND      # pattern must end in $ if it does not end in * or _
            if not _checkBindings(pat,tail):
                self._err('bad bindings in substitution',l)
                continue
            if not nowarn and not _checkExpansion(pat,tail):
                self._err('substitution may be longer than original string',l,0)

#           print "rule =" , [ left , nspm , tail ]
            if pat == None:
                self._err('no pattern',l)
                continue

            r = Rule( pat , nspm , tail )

            c = pat[0]                        # first char of pattern
                                              # check type to see how to index rule
#           print 'c=' , ellyWildcard.deconvert(c) , ', pat=' , ellyWildcard.deconvert(pat)
            p = pat
            while c == ellyWildcard.cSOS:     # optional sequence?
                if not cEOS in p:
                    break
                k = p.index(cEOS)             # if so, find the end of sequence
                if k < 0 or k == 1: break     # if no end or empty sequence, stop
                k += 1
                if k == len(pat): break       # should be something after sequence
                m = ellyChar.toIndex(pat[1])  # index by first char of optional sequence
                self.index[m].append(r)       #   (must be non-wildcard)
                p = p[k:]                     # move up in pattern
                c = p[0]                      #   but check for another optional sequence

            if c == ellyWildcard.cSOS:
                self._err(l=l)
                continue                      # bad sequence, skip this rule

#           print 'c=' , ord(c)
            if ellyChar.isLetterOrDigit(c):   # check effective first char of pattern
                m = ellyChar.toIndex(c)
                self.index[m].append(r)       # add to index under alphanumeric char
            elif ellyChar.isText(c):
                self.index[0].append(r)       # add to index under punctuation
            elif not c in ellyWildcard.Matching:
                if c == ellyWildcard.cEND:
                    print >> sys.stderr , '** macro warning: pattern can have empty match'
                    print >> sys.stderr , '*  at [' , l , ']'
                else:
                    dc = '=' + str(ord(c) - ellyWildcard.X)
                    self._err('bad wildcard code' , dc)
                continue
            elif c == ellyWildcard.cANY or c == ellyWildcard.cALL:
                self.anyWx.append(r)          # under general wildcards
            elif c == ellyWildcard.cCAN:
                self.index[0].append(r)       # under punctuation
            elif c == ellyWildcard.cDIG or c == ellyWildcard.cSDG:
                self.digWx.append(r)          # under digit wildcards
            elif c == ellyWildcard.cSAN:
                self.digWx.append(r)          # under both digit and
                self.letWx.append(r)          #   letter wildcards
            elif c == ellyWildcard.cAPO:      # right single quote or apostrophe
                self.apoWx.append(r)          #
            elif c == ellyWildcard.cSPC or c == ellyWildcard.cEND:
                self._err('bad wildcard in context',l)
                continue                      # wildcards unacceptable here
            else:
                self.letWx.append(r)          # everything else under letter wildcard

            self.count += 1                   # count up macro substitution
#           print 'count=' , self.count

        if self._errcount > 0:
            print >> sys.stderr , '**' , self._errcount , 'macro errors in all'
            print >> sys.stderr , 'macro table definition FAILed'
            raise ellyException.TableFailure
Beispiel #2
0
    def __init__ ( self , syms , dfls ):

        """
        initialization

        arguments:
            self  -
            syms  - Elly grammatical symbol table
            dfls  - definition elements in list

        exceptions:
            FormatFailure on error
        """

#       print 'dfls=' , dfls
        ne = len(dfls)
#       print 'ne=' , ne
        if 3 > ne or ne > 5:                              # must have 3 to 5 elements
            raise ellyException.FormatFailure
        else:
            if dfls[0] == '\\0':
                self.patn = u'\x00'                       # special nul pattern
            elif ellyWildcard.numSpaces(list(dfls[0])) > 0:
                print >> sys.stderr , '** link pattern includes space:' , dfls[0]
                raise ellyException.FormatFailure
            else:
                self.patn = ellyWildcard.convert(dfls[0]) # encode Elly pattern
            if dfls[0] != '$':
                if self.patn == None or ellyWildcard.minMatch(self.patn) == 0:
                    print >> sys.stderr , '** bad link pattern:' , dfls[0]
                    raise ellyException.FormatFailure
#               print 'appended patn=' , list(self.patn) , '=' , len(self.patn)

            lastat = dfls[-1]
            self.catg = None                          # defaults
            self.synf = None                          #
            self.semf = None                          #
            self.bias = 0                             #
            sss = dfls[1].lower()                     # assumed not to be Unicode
#           print 'sss=' , sss
            if sss != '-':                            # allow for no category
                syx = syntaxSpecification.SyntaxSpecification(syms,sss)
                if syx != None:
                    if lastat != '-1':                    # not a stop state for matching
                        raise ellyException.FormatFailure # cannot have syntax here
                    self.catg = syx.catg              # syntactic category
                    self.synf = syx.synf.positive     # syntactic features

            if ne > 3:
                if lastat != '-1':                    # not a stop state for matching
                    raise ellyException.FormatFailure # cannot have semantics here
                sss = None if dfls[2] == '-' else dfls[2].lower()
            else:
                sss = None
#           print 'semantic features=' , sss
            sem = featureSpecification.FeatureSpecification(syms,sss,True)
            self.semf = sem.positive                  # get semantic features
#           print 'semf=' , self.semf

            if ne > 4:
                try:
                    self.bias = int(dfls[3])
                except ValueError:
                    raise ellyException.FormatFailure # unrecognizable bias

            try:
                n = int(lastat)                       # next state for link
            except ValueError:
                raise ellyException.FormatFailure     # unrecognizable number

#           print 'transition=' , n

            if n < 0:                                 # final transition?
                if self.patn == u'\x00':
                    raise ellyException.FormatFailure # final state not allowed here
                pe = self.patn[-1]                    # if so, get last pattern element
                if ( pe != ellyWildcard.cALL and      # final pattern must end with * or $
                     pe != ellyWildcard.cEND ):
                    self.patn += ellyWildcard.cEND    # default is $
                    print >> sys.stderr , '** final $ added to pattern' , list(self.patn)

            self.nxts = n                             # specify next state
Beispiel #3
0
    def _store(self, defs, nowarn):
        """
        put macro substitutions into table with indexing by first char of pattern

        arguments:
            self   -
            defs   - list of macro definition as strings
            nowarn - whether to turn warnings off

        exceptions:
            TableFailure on error
        """

        #       print ( defs.linecount() , 'lines' )
        while True:
            l = defs.readline()  # next macro rule
            #           print ( "rule input=" , l )
            if len(l) == 0: break  # EOF check
            dl = definitionLine.DefinitionLine(l, False)
            left = dl.left  # pattern to be matched
            tail = dl.tail  # transformation to apply to match
            #           print ( 'dl.left=' , left )
            if left == None or tail == None:
                self._err(l=l)  # report missing part of rule
                continue
            if left.find(' ') >= 0:  # pattern side of macro rule
                ms = 'pattern in macro contains spaces'
                self._err(s=ms, l=l, d=1)  # cannot contain any space chars
                continue

            lefts = list(left)
            #           print ( 'left=' , lefts )
            nspm = ellyWildcard.numSpaces(lefts)
            pat = ellyWildcard.convert(
                left)  # get pattern with encoded wildcards
            if pat == None:
                self._err('bad wildcards', l)
                continue
#           print ( 'pat=' , ellyWildcard.deconvert(pat) , 'len=' , len(pat) )
#           print ( 'pat=' , list(pat) )
            pe = pat[-1]
            if not pe in [
                    ellyWildcard.cALL, ellyWildcard.cEND, ellyWildcard.cSPC
            ]:
                pat += ellyWildcard.cEND  # pattern must end in $ if it does not end in * or _
            if not _checkBindings(pat, tail):
                self._err('bad bindings in substitution', l)
                continue
            if not nowarn and not _checkExpansion(pat, tail):
                self._err('substitution may be longer than original string', l,
                          0)

#           print ( "rule =" , [ left , nspm , tail ] )
            if pat == None:
                self._err('no pattern', l)
                continue

            r = Rule(pat, nspm, tail)

            c = pat[0]  # first char of pattern
            # check type to see how to index rule
            #           print ( 'c=' , ellyWildcard.deconvert(c) , ', pat=' , ellyWildcard.deconvert(pat) )
            p = pat
            while c == ellyWildcard.cSOS:  # optional sequence?
                if not cEOS in p:
                    break
                k = p.index(cEOS)  # if so, find the end of sequence
                if k < 0 or k == 1: break  # if no end or empty sequence, stop
                k += 1
                if k == len(pat): break  # should be something after sequence
                m = ellyChar.toIndex(
                    pat[1])  # index by first char of optional sequence
                self.index[m].append(r)  #   (must be non-wildcard)
                p = p[k:]  # move up in pattern
                c = p[0]  #   but check for another optional sequence

            if c == ellyWildcard.cSOS:
                self._err(l=l)
                continue  # bad sequence, skip this rule

#           print ( 'c=' , ord(c) )
            if ellyChar.isLetterOrDigit(
                    c):  # check effective first char of pattern
                m = ellyChar.toIndex(c)
                self.index[m].append(r)  # add to index under alphanumeric char
            elif ellyChar.isText(c):
                self.index[0].append(r)  # add to index under punctuation
            elif not c in ellyWildcard.Matching:
                if c == ellyWildcard.cEND:
                    print('** macro warning: pattern can have empty match',
                          file=sys.stderr)
                    print('*  at [', l, ']', file=sys.stderr)
                else:
                    dc = '=' + str(ord(c) - ellyWildcard.X)
                    self._err('bad wildcard code', dc)
                continue
            elif c == ellyWildcard.cANY or c == ellyWildcard.cALL:
                self.anyWx.append(r)  # under general wildcards
            elif c == ellyWildcard.cCAN:
                self.index[0].append(r)  # under punctuation
            elif c == ellyWildcard.cDIG or c == ellyWildcard.cSDG:
                self.digWx.append(r)  # under digit wildcards
            elif c == ellyWildcard.cSAN:
                self.digWx.append(r)  # under both digit and
                self.letWx.append(r)  #   letter wildcards
            elif c == ellyWildcard.cAPO:  # right single quote or apostrophe
                self.apoWx.append(r)  #
            elif c == ellyWildcard.cSPC or c == ellyWildcard.cEND:
                self._err('bad wildcard in context', l)
                continue  # wildcards unacceptable here
            else:
                self.letWx.append(r)  # everything else under letter wildcard

            self.count += 1  # count up macro substitution


#           print ( 'count=' , self.count )

        if self._errcount > 0:
            print(self._errcount, 'macro errors in all', file=sys.stderr)
            print('macro table definition FAILed', file=sys.stderr)
            raise ellyException.TableFailure
Beispiel #4
0
    def __init__(self, syms, dfls):
        """
        initialization

        arguments:
            self  -
            syms  - Elly grammatical symbol table
            dfls  - definition elements in list

        exceptions:
            FormatFailure on error
        """

        #       print 'dfls=' , dfls
        ne = len(dfls)
        #       print 'ne=' , ne
        if 3 > ne or ne > 5:  # must have 3 to 5 elements
            raise ellyException.FormatFailure
        else:
            if dfls[0] == '\\0':
                self.patn = u'\x00'  # special nul pattern
            elif ellyWildcard.numSpaces(list(dfls[0])) > 0:
                print >> sys.stderr, '** link pattern includes space:', dfls[0]
                raise ellyException.FormatFailure
            else:
                #               print 'do conversion'
                self.patn = ellyWildcard.convert(
                    dfls[0])  # encode Elly pattern
#           print 'patn=' , self.patn
            if dfls[0] != '$':
                if self.patn == None or ellyWildcard.minMatch(self.patn) == 0:
                    print >> sys.stderr, '** bad link pattern:', dfls[0]
                    raise ellyException.FormatFailure
#               print 'appended patn=' , list(self.patn) , '=' , len(self.patn)

            lastat = dfls[-1]
            self.catg = None  # defaults
            self.synf = None  #
            self.semf = None  #
            self.bias = 0  #
            sss = dfls[1].lower()  # assumed not to be Unicode
            #           print 'sss=' , sss
            if sss != '-':  # allow for no category
                syx = syntaxSpecification.SyntaxSpecification(syms, sss)
                if syx != None:
                    if not lastat in ['-1', '-2'
                                      ]:  # not a stop state for matching
                        raise ellyException.FormatFailure  # cannot have syntax here
                    self.catg = syx.catg  # syntactic category
                    self.synf = syx.synf.positive  # syntactic features

            if ne > 3:
                if lastat != '-1':  # not a stop state for matching
                    raise ellyException.FormatFailure  # cannot have semantics here
                sss = None if dfls[2] == '-' else dfls[2].lower()
            else:
                sss = None
#           print 'semantic features=' , sss
            sem = featureSpecification.FeatureSpecification(syms, sss, True)
            self.semf = sem.positive  # get semantic features
            #           print 'semf=' , self.semf

            if ne > 4:
                try:
                    self.bias = int(dfls[3])
                except ValueError:
                    raise ellyException.FormatFailure  # unrecognizable bias

            try:
                n = int(lastat)  # next state for link
            except ValueError:
                raise ellyException.FormatFailure  # unrecognizable number

#           print 'transition=' , n

            if n < 0:  # final transition?
                if self.patn == u'\x00':
                    raise ellyException.FormatFailure  # final state not allowed here
                if n == -1:
                    pe = self.patn[-1]  # if so, get last pattern element
                    if (pe != ellyWildcard.cALL
                            and  # final pattern must end with * or $
                            pe != ellyWildcard.cEND):
                        self.patn += ellyWildcard.cEND  # default is $
                        print >> sys.stderr, '** final $ added to pattern', list(
                            self.patn)

            self.nxts = n  # specify next state