def _store ( self , defs , nowarn ): """ put macro substitutions into table with indexing by first char of pattern arguments: self - defs - list of macro definition as strings nowarn - whether to turn warnings off exceptions: TableFailure on error """ # print defs.linecount() , 'lines' while True: l = defs.readline() # next macro rule # print "rule input=" , l if len(l) == 0: break # EOF check dl = definitionLine.DefinitionLine(l,False) left = dl.left # pattern to be matched tail = dl.tail # transformation to apply to match # print 'dl.left=' , left if left == None or tail == None: self._err(l=l) # report missing part of rule continue if left.find(' ') >= 0: # pattern side of macro rule ms = 'pattern in macro contains spaces' self._err(s=ms,l=l,d=1) # cannot contain any space chars continue lefts = list(left) # print 'left=' , lefts nspm = ellyWildcard.numSpaces(lefts) pat = ellyWildcard.convert(left) # get pattern with encoded wildcards if pat == None: self._err('bad wildcards',l) continue # print 'pat=' , ellyWildcard.deconvert(pat) , 'len=' , len(pat) # print 'pat=' , list(pat) pe = pat[-1] if not pe in [ ellyWildcard.cALL , ellyWildcard.cEND , ellyWildcard.cSPC ]: pat += ellyWildcard.cEND # pattern must end in $ if it does not end in * or _ if not _checkBindings(pat,tail): self._err('bad bindings in substitution',l) continue if not nowarn and not _checkExpansion(pat,tail): self._err('substitution may be longer than original string',l,0) # print "rule =" , [ left , nspm , tail ] if pat == None: self._err('no pattern',l) continue r = Rule( pat , nspm , tail ) c = pat[0] # first char of pattern # check type to see how to index rule # print 'c=' , ellyWildcard.deconvert(c) , ', pat=' , ellyWildcard.deconvert(pat) p = pat while c == ellyWildcard.cSOS: # optional sequence? if not cEOS in p: break k = p.index(cEOS) # if so, find the end of sequence if k < 0 or k == 1: break # if no end or empty sequence, stop k += 1 if k == len(pat): break # should be something after sequence m = ellyChar.toIndex(pat[1]) # index by first char of optional sequence self.index[m].append(r) # (must be non-wildcard) p = p[k:] # move up in pattern c = p[0] # but check for another optional sequence if c == ellyWildcard.cSOS: self._err(l=l) continue # bad sequence, skip this rule # print 'c=' , ord(c) if ellyChar.isLetterOrDigit(c): # check effective first char of pattern m = ellyChar.toIndex(c) self.index[m].append(r) # add to index under alphanumeric char elif ellyChar.isText(c): self.index[0].append(r) # add to index under punctuation elif not c in ellyWildcard.Matching: if c == ellyWildcard.cEND: print >> sys.stderr , '** macro warning: pattern can have empty match' print >> sys.stderr , '* at [' , l , ']' else: dc = '=' + str(ord(c) - ellyWildcard.X) self._err('bad wildcard code' , dc) continue elif c == ellyWildcard.cANY or c == ellyWildcard.cALL: self.anyWx.append(r) # under general wildcards elif c == ellyWildcard.cCAN: self.index[0].append(r) # under punctuation elif c == ellyWildcard.cDIG or c == ellyWildcard.cSDG: self.digWx.append(r) # under digit wildcards elif c == ellyWildcard.cSAN: self.digWx.append(r) # under both digit and self.letWx.append(r) # letter wildcards elif c == ellyWildcard.cAPO: # right single quote or apostrophe self.apoWx.append(r) # elif c == ellyWildcard.cSPC or c == ellyWildcard.cEND: self._err('bad wildcard in context',l) continue # wildcards unacceptable here else: self.letWx.append(r) # everything else under letter wildcard self.count += 1 # count up macro substitution # print 'count=' , self.count if self._errcount > 0: print >> sys.stderr , '**' , self._errcount , 'macro errors in all' print >> sys.stderr , 'macro table definition FAILed' raise ellyException.TableFailure
def __init__ ( self , syms , dfls ): """ initialization arguments: self - syms - Elly grammatical symbol table dfls - definition elements in list exceptions: FormatFailure on error """ # print 'dfls=' , dfls ne = len(dfls) # print 'ne=' , ne if 3 > ne or ne > 5: # must have 3 to 5 elements raise ellyException.FormatFailure else: if dfls[0] == '\\0': self.patn = u'\x00' # special nul pattern elif ellyWildcard.numSpaces(list(dfls[0])) > 0: print >> sys.stderr , '** link pattern includes space:' , dfls[0] raise ellyException.FormatFailure else: self.patn = ellyWildcard.convert(dfls[0]) # encode Elly pattern if dfls[0] != '$': if self.patn == None or ellyWildcard.minMatch(self.patn) == 0: print >> sys.stderr , '** bad link pattern:' , dfls[0] raise ellyException.FormatFailure # print 'appended patn=' , list(self.patn) , '=' , len(self.patn) lastat = dfls[-1] self.catg = None # defaults self.synf = None # self.semf = None # self.bias = 0 # sss = dfls[1].lower() # assumed not to be Unicode # print 'sss=' , sss if sss != '-': # allow for no category syx = syntaxSpecification.SyntaxSpecification(syms,sss) if syx != None: if lastat != '-1': # not a stop state for matching raise ellyException.FormatFailure # cannot have syntax here self.catg = syx.catg # syntactic category self.synf = syx.synf.positive # syntactic features if ne > 3: if lastat != '-1': # not a stop state for matching raise ellyException.FormatFailure # cannot have semantics here sss = None if dfls[2] == '-' else dfls[2].lower() else: sss = None # print 'semantic features=' , sss sem = featureSpecification.FeatureSpecification(syms,sss,True) self.semf = sem.positive # get semantic features # print 'semf=' , self.semf if ne > 4: try: self.bias = int(dfls[3]) except ValueError: raise ellyException.FormatFailure # unrecognizable bias try: n = int(lastat) # next state for link except ValueError: raise ellyException.FormatFailure # unrecognizable number # print 'transition=' , n if n < 0: # final transition? if self.patn == u'\x00': raise ellyException.FormatFailure # final state not allowed here pe = self.patn[-1] # if so, get last pattern element if ( pe != ellyWildcard.cALL and # final pattern must end with * or $ pe != ellyWildcard.cEND ): self.patn += ellyWildcard.cEND # default is $ print >> sys.stderr , '** final $ added to pattern' , list(self.patn) self.nxts = n # specify next state
def _store(self, defs, nowarn): """ put macro substitutions into table with indexing by first char of pattern arguments: self - defs - list of macro definition as strings nowarn - whether to turn warnings off exceptions: TableFailure on error """ # print ( defs.linecount() , 'lines' ) while True: l = defs.readline() # next macro rule # print ( "rule input=" , l ) if len(l) == 0: break # EOF check dl = definitionLine.DefinitionLine(l, False) left = dl.left # pattern to be matched tail = dl.tail # transformation to apply to match # print ( 'dl.left=' , left ) if left == None or tail == None: self._err(l=l) # report missing part of rule continue if left.find(' ') >= 0: # pattern side of macro rule ms = 'pattern in macro contains spaces' self._err(s=ms, l=l, d=1) # cannot contain any space chars continue lefts = list(left) # print ( 'left=' , lefts ) nspm = ellyWildcard.numSpaces(lefts) pat = ellyWildcard.convert( left) # get pattern with encoded wildcards if pat == None: self._err('bad wildcards', l) continue # print ( 'pat=' , ellyWildcard.deconvert(pat) , 'len=' , len(pat) ) # print ( 'pat=' , list(pat) ) pe = pat[-1] if not pe in [ ellyWildcard.cALL, ellyWildcard.cEND, ellyWildcard.cSPC ]: pat += ellyWildcard.cEND # pattern must end in $ if it does not end in * or _ if not _checkBindings(pat, tail): self._err('bad bindings in substitution', l) continue if not nowarn and not _checkExpansion(pat, tail): self._err('substitution may be longer than original string', l, 0) # print ( "rule =" , [ left , nspm , tail ] ) if pat == None: self._err('no pattern', l) continue r = Rule(pat, nspm, tail) c = pat[0] # first char of pattern # check type to see how to index rule # print ( 'c=' , ellyWildcard.deconvert(c) , ', pat=' , ellyWildcard.deconvert(pat) ) p = pat while c == ellyWildcard.cSOS: # optional sequence? if not cEOS in p: break k = p.index(cEOS) # if so, find the end of sequence if k < 0 or k == 1: break # if no end or empty sequence, stop k += 1 if k == len(pat): break # should be something after sequence m = ellyChar.toIndex( pat[1]) # index by first char of optional sequence self.index[m].append(r) # (must be non-wildcard) p = p[k:] # move up in pattern c = p[0] # but check for another optional sequence if c == ellyWildcard.cSOS: self._err(l=l) continue # bad sequence, skip this rule # print ( 'c=' , ord(c) ) if ellyChar.isLetterOrDigit( c): # check effective first char of pattern m = ellyChar.toIndex(c) self.index[m].append(r) # add to index under alphanumeric char elif ellyChar.isText(c): self.index[0].append(r) # add to index under punctuation elif not c in ellyWildcard.Matching: if c == ellyWildcard.cEND: print('** macro warning: pattern can have empty match', file=sys.stderr) print('* at [', l, ']', file=sys.stderr) else: dc = '=' + str(ord(c) - ellyWildcard.X) self._err('bad wildcard code', dc) continue elif c == ellyWildcard.cANY or c == ellyWildcard.cALL: self.anyWx.append(r) # under general wildcards elif c == ellyWildcard.cCAN: self.index[0].append(r) # under punctuation elif c == ellyWildcard.cDIG or c == ellyWildcard.cSDG: self.digWx.append(r) # under digit wildcards elif c == ellyWildcard.cSAN: self.digWx.append(r) # under both digit and self.letWx.append(r) # letter wildcards elif c == ellyWildcard.cAPO: # right single quote or apostrophe self.apoWx.append(r) # elif c == ellyWildcard.cSPC or c == ellyWildcard.cEND: self._err('bad wildcard in context', l) continue # wildcards unacceptable here else: self.letWx.append(r) # everything else under letter wildcard self.count += 1 # count up macro substitution # print ( 'count=' , self.count ) if self._errcount > 0: print(self._errcount, 'macro errors in all', file=sys.stderr) print('macro table definition FAILed', file=sys.stderr) raise ellyException.TableFailure
def __init__(self, syms, dfls): """ initialization arguments: self - syms - Elly grammatical symbol table dfls - definition elements in list exceptions: FormatFailure on error """ # print 'dfls=' , dfls ne = len(dfls) # print 'ne=' , ne if 3 > ne or ne > 5: # must have 3 to 5 elements raise ellyException.FormatFailure else: if dfls[0] == '\\0': self.patn = u'\x00' # special nul pattern elif ellyWildcard.numSpaces(list(dfls[0])) > 0: print >> sys.stderr, '** link pattern includes space:', dfls[0] raise ellyException.FormatFailure else: # print 'do conversion' self.patn = ellyWildcard.convert( dfls[0]) # encode Elly pattern # print 'patn=' , self.patn if dfls[0] != '$': if self.patn == None or ellyWildcard.minMatch(self.patn) == 0: print >> sys.stderr, '** bad link pattern:', dfls[0] raise ellyException.FormatFailure # print 'appended patn=' , list(self.patn) , '=' , len(self.patn) lastat = dfls[-1] self.catg = None # defaults self.synf = None # self.semf = None # self.bias = 0 # sss = dfls[1].lower() # assumed not to be Unicode # print 'sss=' , sss if sss != '-': # allow for no category syx = syntaxSpecification.SyntaxSpecification(syms, sss) if syx != None: if not lastat in ['-1', '-2' ]: # not a stop state for matching raise ellyException.FormatFailure # cannot have syntax here self.catg = syx.catg # syntactic category self.synf = syx.synf.positive # syntactic features if ne > 3: if lastat != '-1': # not a stop state for matching raise ellyException.FormatFailure # cannot have semantics here sss = None if dfls[2] == '-' else dfls[2].lower() else: sss = None # print 'semantic features=' , sss sem = featureSpecification.FeatureSpecification(syms, sss, True) self.semf = sem.positive # get semantic features # print 'semf=' , self.semf if ne > 4: try: self.bias = int(dfls[3]) except ValueError: raise ellyException.FormatFailure # unrecognizable bias try: n = int(lastat) # next state for link except ValueError: raise ellyException.FormatFailure # unrecognizable number # print 'transition=' , n if n < 0: # final transition? if self.patn == u'\x00': raise ellyException.FormatFailure # final state not allowed here if n == -1: pe = self.patn[-1] # if so, get last pattern element if (pe != ellyWildcard.cALL and # final pattern must end with * or $ pe != ellyWildcard.cEND): self.patn += ellyWildcard.cEND # default is $ print >> sys.stderr, '** final $ added to pattern', list( self.patn) self.nxts = n # specify next state