def __init__ ( self , syms , fets=None , semantic=False ): """ initialization arguments: self - syms - symbol table fets - string representation of feature set semantic - flag for semantic features exceptions: FormatFailure on error """ if syms == None or fets == None: # special case generating zero feature set self.positive = ellyBits.EllyBits(symbolTable.FMAX) self.negative = ellyBits.EllyBits(symbolTable.FMAX) self.id = '' return segm = fets.lower() # print "features=",segm,"semantic=",semantic if segm == None or len(segm) < 3 or segm[0] != '[' or segm[-1] != ']': raise ellyException.FormatFailure elif segm[1] == ' ' or ellyChar.isLetterOrDigit(segm[1]) or segm[1] == '*': raise ellyException.FormatFailure else: self.id = segm[1] # print "id=",self.id fs = syms.getFeatureSet(segm[1:-1] , semantic) # print 'fs=' , str(fs[0]) , ',' , str(fs[1]) if fs == None: raise ellyException.FormatFailure self.positive , self.negative = fs
def __init__(self): """ initialize node to defaults arguments: self """ self.synf = ellyBits.EllyBits(symbolTable.FMAX) self.semf = ellyBits.EllyBits(symbolTable.FMAX) self.seqn = -1 self.reset()
def __init__(self, syms): """ initialization arguments: self - syms - Elly symbol table exceptions: FormatFailure on error """ self.catg = syms.getSyntaxTypeIndexNumber(category) self.synf = None self.semf = None self.hpnc = {} brkg = _FS(syms, '[' + sID + sBRK + ']', True) zero = ellyBits.EllyBits() # print ( 'smfs=' , smfs ) for sky in smfs.keys(): # predefine semantic features for punctuation # print ( 'sky=' , sky , ' : ' , smfs[sky] ) smfs[sky] = _FS(syms, smfs[sky], True) for defn in defns: # syntactic pc = defn[0] if len(defn) > 1: sxf = _FS(syms, defn[1]) smf = smfs[pc] if pc in smfs else brkg if len( defn) > 2 else zero self.hpnc[pc] = [sxf, smf] else: self.hpnc[pc] = [zero, zero]
def addGoalPositions(self, n=10): """ extend goal lists and goal bits arguments: self - n - how many new positions to add """ # print 'add goals, ntyp=' , self.ntyp for _ in range(n): self.goal.append([]) self.gbits.append(ellyBits.EllyBits(self.ntyp))
def __init__(self, nmax): """ initialization arguments: self - nmax - how many syntactic types to encode """ self.dm = [] # empty matrix initially for i in range(nmax): rw = ellyBits.EllyBits(nmax) # get bit string rw.set(i) # diagonalize matrix by row self.dm.append(rw) # and save
def __init__(self): """ create environment for testing semantic procedure arguments: self """ stb = symbolTable.SymbolTable() # empty hry = conceptualHierarchy.ConceptualHierarchy() # empty ctx = interpretiveContext.InterpretiveContext(stb, {}, {}, hry) self.context = ctx # make available ptb = parseTreeBase.ParseTreeBase() # just for generating phrases self.toknL = ellyToken.EllyToken( 'uvwxxyz') # insert dummy data that might self.toknR = ellyToken.EllyToken('abcdefg') # be replaced from outside ctx.addTokenToListing(self.toknL) # put a token in first position ctx.addTokenToListing(self.toknR) # and a token in second x = ctx.syms.getSyntaxTypeIndexNumber( 'x') # for consistency, define two y = ctx.syms.getSyntaxTypeIndexNumber( 'y') # syntactic categories for rules fbs = ellyBits.EllyBits(symbolTable.FMAX) # zero feature bits exL = grammarRule.ExtendingRule(x, fbs) # dummy rules as a place for exR = grammarRule.ExtendingRule(x, fbs) # attaching semantic procedures spl = grammarRule.SplittingRule(y, fbs) # for testing # dummy semantic procedures gX = ["left", "right"] # generative gL = ["obtain"] # gR = ["obtain"] # gP = ["append did it!"] # for standalone generative subprocedure cX = [] # cognitive cL = [">> +1"] # cR = [">> -1"] # ctx.pushStack() # needed for local variables usable in testing ctx.setLocalVariable( "vl", "LLLL") # make two variables available to work with ctx.setLocalVariable("vr", "RRRR") # ctx.setProcedure('do', self._genp(gP)) # define procedure 'do' exL.gens = self._genp(gL) # assign semantic procedures to rules exL.cogs = self._cogp(cL) # exR.gens = self._genp(gR) # exR.cogs = self._cogp(cR) # spl.gens = self._genp(gX) # spl.cogs = self._cogp(cX) # phr = ptb.makePhrase(0, spl) # make phrase for splitting plus phr.krnl.lftd = ptb.makePhrase(0, exL) # left and right descendants phr.krnl.rhtd = ptb.makePhrase(1, exR) # defined by left and right # extending rules from above phr.ntok = 1 stb.getFeatureSet('!one,two', True) # define semantic feature print stb.smindx smx = stb.smindx['!'] # ix = smx['one'] # print 'ix=', ix phr.krnl.semf.set(ix) # turn on feature for phrase ix = smx['two'] # print 'ix=', ix phr.krnl.semf.set(ix) # turn on feature for phrase print 'semf=', phr.krnl.semf self.phrase = phr # make phrase available
def dumpAll(self): """ dump all tree fragments (overrides superclass method) arguments: self - """ # print ( ' all depth=' , self.dlm + 1 ) if self.dlm < 0: return out.write('dump all\n\n') n = self.phlim - 1 # index of last node created while n >= 0: # process until oldest node at n=0 ph = self.phrases[n] # get phrase if ph.dump: # if not already dumped self.dumpTree(ph) # dump subtree starting at current phrase n -= 1 out.write('rules invoked and associated phrases\n') hr = {} # association of rules with phrase nodes lm = N # maximum number phrase nodes to report for k in range(self.phlim): ph = self.phrases[k] phno = ph.krnl.seqn rs = ph.krnl.rule.seqn # print ( '!!!! ' , phno , ':' , rs ) if not rs in hr: hr[rs] = [] hr[rs].append(phno) # make association # print ( '!!!! =' , hr[rs] ) # print ( len(hr) , 'distinct rules' ) # print ( 'keys=' , hr.keys() ) for rs in hr.keys(): # iterate on sequence numbers for rules found ls = hr[rs] if len(ls) > lm: ls = ls[:lm] ls.append('...') rssn = 'rule {:4d}:'.format(rs) out.write(rssn) # report up to lm phrase nodes for rule # with this sequence number out.write(str(ls)) out.write('\n') out.write('\n') wno = len(self.ctx.tokns) # set to last parse position in input wn = wno while wn > 0 and len(self.goal[wn]) == 0: wn -= 1 gs = self.goal[wn] # goals at end of parsed input gl = len(gs) # number of goals at last position out.write(str(gl) + ' final goals at position= ') out.write(str(wn) + ' / ' + str(wno) + '\n') for g in gs: fs = '{:4.4s}'.format(self.stb.getSyntaxTypeName(g.cat)) grs = str(g) kn = grs.find(':') + 1 out.write(NBSP + grs[:kn] + ' ' + fs + grs[kn:] + '\n') out.write("\n") out.write(str(self.phlim) + ' phrases altogether\n') out.write('\nambiguities\n') nph = self.phlim # number of phrases allocated in parse phx = ellyBits.EllyBits( nph) # to keep track of phrases listed as ambiguous nam = 0 # ambiguity count for i in range(nph): # scan all phrases for ambiguities ph = self.phrases[i] if ph.alnk == None or phx.test(i): continue # is this a new ambiguity? s = self.stb.getSyntaxTypeName( ph.krnl.typx) # if so, show phrase info f = ph.krnl.synf.hexadecimal( False) # convert to packed hexadecimal out.write("{0:<5.5s} {1}: ".format( s, f)) # show syntax type of ambiguity nam += 1 while ph != None: phx.set(ph.krnl.seqn) # mark phrase as scanned out.write("{:2d} ".format( ph.krnl.seqn)) # show its sequence number bx = '-' if ph.krnl.rule.bias < 0 else '0' # show rule bias in effect out.write("({0:+2d}/{1}) ".format(ph.krnl.bias, bx)) ph = ph.alnk # next phrase in ambiguity list out.write('\n') if nam == 0: out.write('NONE\n') self.showTokens(out) ng = self.glim out.write(str(nph) + ' phrases, ' + str(ng) + ' goals\n\n') out.flush()
print('loading', '[' + base + name + '.g.elly]', len(rdr.buffer), 'lines') stbu = symbolTable.SymbolTable() gtbu = grammarTable.GrammarTable(stbu, rdr) ctxu = Ctx() tksu = ctxu.tokns tree = ParseTreeWithDisplay(stbu, gtbu, None, ctxu) print() print(tree) print() print(dir(tree)) print() cat = stbu.getSyntaxTypeIndexNumber('num') fbs = ellyBits.EllyBits(symbolTable.FMAX) tree.addLiteralPhrase(cat, fbs) tree.digest() tksu.append(ellyToken.EllyToken('66')) tree.restartQueue() ws = ['nn', 'b', 'aj'] # from test.g.elly wu = ['ww', 'wx', 'wy', 'wz'] # unknown terms for w in ws: tree.createPhrasesFromDictionary(w, False, False) # print ( '**** to' , tree.phlim , tree.lastph , 'rule=' , tree.lastph.krnl.rule.seqn ) tree.digest() # print ( '**** to' , tree.phlim , tree.lastph , 'rule=' , tree.lastph.krnl.rule.seqn ) tksu.append(ellyToken.EllyToken(w))
def __init__(self, m, n=0): self.styp = m self.sfet = ellyBits.EllyBits() self.rtyp = n self.seqn = 10000
def getFeatureSet ( self , fs , ty=False ): """ get feature indices associated with given names in given set arguments: self - fs - feature set without enclosing brackets ty - False=syntactic, True=semantic returns: list of EllyBits [ positive , negative ] on success, None on failure """ if len(fs) < 1: return None # print ( 'fs=' , fs ) bp = ellyBits.EllyBits(FMAX) # all feature bits zeroed bn = ellyBits.EllyBits(FMAX) # fsx = self.smindx if ty else self.sxindx # print ( '-------- fs=' , fs ) fid = fs[0] # feature set ID fnm = fs[1:].split(',') # feature names if not fid in fsx: # known ID? # print ( 'new feature set' ) d = { } # new dictionary of feature names if ty: d['*c'] = 0 # always define '*c' as semantic feature d['*capital'] = 0 # equivalent to '*c' else: d['*r'] = 0 # always define '*r' as syntactic feature d['*right'] = 0 # equivalent to '*r' d['*l'] = 1 # always define '*l' d['*left'] = 1 # equivalent to '*l' d['*x'] = LAST # always define '*x' d['*u'] = LAST # always define '*u' d['*unique'] = LAST # equivalent to '*u' and '*x' fsx[fid] = d # make new feature set known h = fsx[fid] # for hashing of feature names if len(fnm) == 0: # check for empty features return [ bp , bn ] for nm in fnm: nm = nm.strip() if len(nm) == 0: continue if nm[0] == '-': # negative feature? b = bn # if so, look at negative bits nm = nm[1:] elif nm[0] == '+': # positive feature? b = bp # if so, look at positive bits nm = nm[1:] else: b = bp # positive bits by default # print ( '-------- nm=' , nm ) nmc = nm if nm[0] != '*' else nm[1:] for c in nmc: # check feature name chars if not ellyChar.isLetterOrDigit(c): print ( 'bad feature name=' , nm , file=sys.stderr ) return None if not nm in h: # new name in feature set? if nm[0] == '*': # user cannot define reserved name print ( 'unknown reserved feature=' , nm , file=sys.stderr ) return None # print ( 'define new feature' ) k = len(h) # yes, this will be next free index l = FMAX # upper limit on feature index if ty: # semantic feature? k -= 1 # if so, adjust for extra name *C else: k -= 5 # else, adjust for *UNIQUE and extra names *L, *R , *U , *X l -= 1 # adjust upper limit for *UNIQUE if k == l: # overflow check print ( '** ERROR: too many feature names, fid=',fid,'nm=',nm , file=sys.stderr ) print ( '**' , end=' ' , file=sys.stderr ) print ( h.keys() , file=sys.stderr ) return None if k < 0: print ( 'bad index=' , k , 'l=' , l , file=sys.stderr ) return None h[nm] = k # define new feature # print ( 'k=' , k ) # print ( 'set bit' , h[nm] , 'for' , fid + nm ) b.set(h[nm]) # set bit for feature return [ bp , bn ]
to be subclassed further """ import sys import ellyBits import ellyDefinitionReader import ellyConfiguration import grammarRule import symbolTable import cognitiveProcedure import generativeProcedure import parseTreeBase NPOSNS = 128 # nominal minimum number of tree leaf nodes ZEROfs = ellyBits.EllyBits(symbolTable.FMAX) class ParseTreeBottomUp(parseTreeBase.ParseTreeBase): """ parse tree plus supporting structures for table-driven bottom-up parsing algorithm attributes: newph - unique new phrases at each position ambig - accumulate ambiguous phrases for reporting queue - of phrases yet to process gtb - basic syntax and internal dictionary ptb - syntax type patterns
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ----------------------------------------------------------------------------- """ runs extraction methods and generates phrases """ import ellyBits import ellyChar import ellyConfiguration import syntaxSpecification import featureSpecification noBits = ellyBits.EllyBits() class EntityExtractor(object): """ handler for information extraction (IE) attributes: ptr - parse tree for extracted information sym - saved symbol table exs - extraction procedure list """ def __init__(self, ptr, sym): """ initialization
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ----------------------------------------------------------------------------- """ classes for syntax rules with associated semantics """ import featureSpecification import ellyBits _dfrs = ellyBits.EllyBits() _dfrs.complement() class BasicRule(object): """ basic rule structure attributes: cogs - cognitive semantics gens - generative semantics styp - syntactic type produced by rule sfet - syntactic features to set sftr - to reset bias - for rule ordering in ambiguity handling nmrg - to indicate degree of merging by rule (1 or 2)
def __init__(self, syms, defn=None): """ initialization arguments: self - syms - symbol table for grammar defn - EllyDefinitionReader grammar definition exceptions: TableFailure on error """ self.initzn = [] # preset global variables self.proc = {} # named semantic procedures self.dctn = {} # builtin words and semantics self.pndx = {} # standalone procedures self.extens = [] # 1-branch rule self.splits = [] # 2-branch rule for _ in range(symbolTable.NMAX): self.extens.append( []) # list of 1-branch rules for each syntax type self.splits.append( []) # list of 2-branch rules for each syntax type` self.mat = derivabilityMatrix.DerivabilityMatrix(symbolTable.NMAX) # coding of predefined syntax types self.START = syms.getSyntaxTypeIndexNumber('sent') self.END = syms.getSyntaxTypeIndexNumber('end') self.UNKN = syms.getSyntaxTypeIndexNumber('unkn') self.SEPR = syms.getSyntaxTypeIndexNumber('sepr') self.XXX = syms.getSyntaxTypeIndexNumber('...') # special rule for ... type going to null fets = ellyBits.EllyBits(symbolTable.FMAX) self.arbr = grammarRule.ExtendingRule(self.XXX, fets) self.arbr.cogs = None self.arbr.gens = compile(syms, 'g', []) # special rule for SENT->SENT END ru = grammarRule.SplittingRule(self.START, fets) ru.rtyp = self.END ru.ltfet = ru.rtfet = ellyBits.join(fets, fets) ru.cogs = None ru.gens = None self.splits[self.START].append(ru) # special rule for RS (ASCII record separator) ru = grammarRule.ExtendingRule(self.SEPR, fets) ru.cogs = None ru.gens = None self.dctn[ellyChar.RS] = [ru] # should be only rule here ever # predefined generative semantic procedures self.pndx['defl'] = compile(syms, 'g', ['left']) self.pndx['defr'] = compile(syms, 'g', ['right']) self.pndx['deflr'] = compile(syms, 'g', ['left', 'right']) self.d1bp = self.pndx['defl'] # default 1-branch generative semantics self.d2bp = self.pndx['deflr'] # default 2-branch if defn != None: if not self.define(syms, defn): print >> sys.stderr, 'grammar table definition FAILed' raise ellyException.TableFailure
def __init__(self, n): """ initialization """ self.styp = n self.sfet = ellyBits.EllyBits() self.cogs = None
def __init__(self): """ initialization """ self.catg = 0 self.synf = ellyBits.EllyBits(symbolTable.FMAX)
def __init__(self, dta): """ initialization of vocabulary object from retrieved record arguments: self - dta - what DB support returns throws: FormatFailure on error """ self._ln = 0 rec = dta[1] # data record found for search key # print ( 'voc rec=' , rec , file=sys.stderr ) r = rec.split('=:') # split off term in data record if len(r) <= 1: return # the '=:' is mandatory d = r[1].strip().split(' ') # definition is right of '=:' # print ( 'VEntry: define as' , d , file=sys.stderr ) if len(d) < 4: return # it should have at least 4 parts ur = r[0].strip() # term left of '=:' self.chs = list(ur) # save it self.cat = int(d.pop(0)) # syntactic category # print ( ' full term=' , ''.join(self.chs) , file=sys.stderr ) sy = d.pop(0) nb = len(sy) * 4 self.syf = ellyBits.EllyBits(nb) # allocate bits self.syf.reinit(sy) # set syntactic features sm = d.pop(0) nb = len(sm) * 4 self.smf = ellyBits.EllyBits(nb) # allocate bits self.smf.reinit(sm) # set semantic features self.bia = int(d.pop(0)) # save initial plausibility if len(d) > 0: # any concept? self.con = d.pop(0).upper() # if so, save it else: self.con = '-' # print ( ' translation=' , d , file=sys.stderr ) if len(d) == 0: # no further definition? self.gen = obtnp # if so, then use default procedure self._nt = 0 # i.e. no translation elif d[0][0] == '=': # simple translation? dfs = ' '.join(d) # just in case translation had spaces # print ( 'def ext=' , dfs , file=sys.stderr ) pls = ['append ' + dfs[1:]] inpts = ellyDefinitionReader.EllyDefinitionReader(pls) self.gen = generativeProcedure.GenerativeProcedure(None, inpts) self._nt = 1 elif d[0][0] == '(': # get predefined procedure inpts = ellyDefinitionReader.EllyDefinitionReader([d[0]]) self.gen = generativeProcedure.GenerativeProcedure(None, inpts) self._nt = 0 else: # otherwise, set for selection of translation # print ( 'multi selection, d=' , d , file=sys.stderr ) cm = 'pick LANG (' # construct instruction to select for p in d: if p[-1] == ',': p = p[:-1] cm += p + '#' # build selection clauses cm += ')' gens[0] = cm # replace action # print ( 'cm=' , cm ) # print ( 'gens=' , gens ) inpts = ellyDefinitionReader.EllyDefinitionReader(gens) self.gen = generativeProcedure.GenerativeProcedure(None, inpts) if self.gen == None: print('vocabulary generative semantic failure', file=sys.stderr) print('gens=', gens, file=sys.stderr) raise ellyException.FormatFailure # print ( 'vocabulary gen.logic' ) # generativeDefiner.showCode(self.gen.logic) self._nt = len(d) self._ln = len(self.chs)