def traverseInOrder(item): result = TypeCheckList(Tree) result.append(traverseInOrder(item.childlist[0])) result.append(item) for childindex in range(1, len(item.childlist)): result += traverseInOrder(item.childlist[childindex]) return result
def traversePreOrder(item): from pydsl.Abstract import TypeCheckList result = TypeCheckList(Tree) result.append(item) for child in item.childlist: result += traversePreOrder(child) return result
def split(self): """splits a result""" result = TypeCheckList(ParseTree) for symbol in self.symbollist: currentlist = self.childlist while len(currentlist) > 1 and len(currentlist[0].symbollist) > 1 and currentlist[0].symbollist[0] != symbol: for dpr in currentlist: if symbol in dpr.symbollist: currentlist = dpr.childlist break break #Not found assert(len(currentlist < 2)) dpr = currentlist[0] currentsymbol = dpr.symbollist[0] if len(dpr.symbollist) > 0: pass #Ir eliminando los simbolos de alrededor hasta que se quede solo el que nos interesa junto a su word if currentsymbol != symbol: #Not found raise Exception result.append(dpr) return result
def terminal_symbol_reducer(symbol, word, production): """ Reduces a terminal symbol """ from pydsl.Abstract import TypeCheckList from pydsl.Grammar.Tree import ParseTree if not isinstance(word, str): word = str(word) validresults = TypeCheckList(ParseTree) if symbol.boundariesrules.policy == "min": LOG.debug("terminal_symbol_reducer: policy: min") for begin in range(0, len(word)): for end in range(begin, len(word)+1): if symbol.check(word[begin:end]): LOG.debug("terminal_symbol_reducer: parsed:"+ str(word[begin:end])) validresults.append(ParseTree(begin, end, [symbol], word[begin:end], production)) break #found the smallest valid symbol at begin elif symbol.boundariesrules.policy == "max": LOG.debug("terminal_symbol_reducer: policy: max") for begin in range(0, len(word)): maxword = 0 for end in range(begin, len(word)+1): if symbol.check(word[begin:end]): LOG.debug("terminal_symbol_reducer: parsed:"+ str(word[begin:end])) maxword = end if maxword > 0: validresults.append(ParseTree(begin, maxword, [symbol], word[begin:maxword], production)) elif symbol.boundariesrules.policy == "fixed": LOG.debug("terminal_symbol_reducer: policy: fixed") size = symbol.boundariesrules.size for begin in range(0, len(word)): if symbol.check(word[begin:begin + size]): LOG.debug("__auxReducer: parsed:"+ str(word[begin:begin + size])) validresults.append(ParseTree(begin, begin + size, [symbol], word[begin:begin + size], production)) else: LOG.warning("terminal_symbol_reducer: Unknown size policy") return TypeCheckList(ParseTree) return validresults
def __recursive_parser(self, onlysymbol, data, production, showerrors:bool = False, recurssions:int = 0): import pdb #pdb.set_trace() """ Aux function. helps check_word""" LOG.debug("__recursive_parser: Begin ") if not isinstance(data, str): data = str(data) if recurssions > 16*len(data): LOG.debug("RECURSSION LIMIT") return TypeCheckList(ParseTree) if len(data) == 0: return TypeCheckList(ParseTree) #if(len(data) < 4): # import pdb # pdb.set_trace() from ..Symbol import TerminalSymbol, NullSymbol, NonTerminalSymbol if isinstance(onlysymbol, TerminalSymbol): #Locate every ocurrence of word and return a set of results. Follow boundariesrules LOG.debug("Iteration: terminalsymbol") sproduction = self._productionset.getProductionsBySide([onlysymbol])[0] result = terminal_symbol_consume(onlysymbol, data, sproduction ) if showerrors and not result: LOG.debug("error symbolo: " + str(onlysymbol)) #print("ERROR!: " + str(data) + str(onlysymbol)) return TypeCheckList(ParseTree, [ParseTree(0,len(data), [onlysymbol] , data, sproduction, valid = False)]) return result elif isinstance(onlysymbol, NonTerminalSymbol): result = TypeCheckList(ParseTree) tmpresults = [] invalidlist = [] for alternative in self._productionset.getProductionsBySide([onlysymbol]): alternativetree = RecursiveDescentResultTree(None) alternativesuccess = True alternativeinvalidlist = [] for symbolindex, symbol in enumerate(alternative.rightside): symbolsuccess = False for totalpos in alternativetree.last_poss(): if totalpos >= len(data): LOG.debug("Alternative length problem:" + str(alternative) + " Recursion: "+ str(recurssions) + " INTENTO SIMBOLO " + str(symbolindex) + ":" + str(symbol) + " Input: "+ str(data) +" SHIFT " + str(totalpos)) continue LOG.debug("Alternative:" + str(alternative) + " Recursion: "+ str(recurssions) + " INTENTO SIMBOLO " + str(symbolindex) + ":" + str(symbol) + " Input: "+ str(data) +" SHIFT " + str(totalpos)) if symbol == onlysymbol: #recurssions += 1 pass thisresult = self.__recursive_parser(symbol, data[totalpos:], alternative, showerrors, recurssions+1) #pdb.set_trace() #print("RECIBE: " + str([(str(x.leftpos),str(x.rightpos)) for x in thisresult])) allvalids = all([x.valid for x in thisresult]) if thisresult and allvalids: LOG.debug("Alternative:" + str(alternative) + " Recursion: "+ str(recurssions) + " Trying Symbol " + str(symbolindex) + ":" + str(symbol) + " Input: "+ str(data) +" SHIFT " + str(totalpos)+ " results: " + str(thisresult) + "leftpos: " + str(thisresult[0].leftpos) + " rightpos: " + str(thisresult[-1].rightpos)) symbolsuccess = True for x in thisresult: x.shift(totalpos) exito = alternativetree.append(x, totalpos) if not exito: #TODO: Añadir como error al arbol o a otro sitio LOG.debug("Discarded symbol :" + str(symbol) + " position:" + str(totalpos)) else: LOG.debug("Added symbol :" + str(symbol) + " position:" + str(totalpos)) else: for x in thisresult: if not x.valid: alternativeinvalidlist.append(x) if not symbolsuccess: LOG.debug("Symbol doesn't work" + str(symbol)) alternativesuccess = False break #Try another alternative else: LOG.debug("Symbol work at recursion" + str(recurssions) + ": " + str(symbol)) if not alternativesuccess: continue #A symbol doesn't work, this alternative doesn't neither else: invalidlist += alternativeinvalidlist for x in alternativetree.get_lists(): LOG.debug("Recurssion" + str(recurssions) + " Adding...",[y.content for y in x]) tmpresults.append(x) LOG.debug("La iteracion " + str(recurssions) + "result collection finished:" + str(tmpresults)) for alternative in self._productionset.getProductionsBySide([onlysymbol]): for results in tmpresults: allvalids = all([x.valid for x in results]) LOG.debug("result: " + str([x.content for x in results]) + ", alternative:" + str(alternative)) nullcounter = alternative.rightside.count(NullSymbol()) nnullresults = 0 for y in [x.symbollist for x in results]: nnullresults += y.count(NullSymbol()) #print(len(results),nnullresults,len(alternative.rightside),nullcounter) if len(results) - nnullresults != len(alternative.rightside) - nullcounter: LOG.debug("Discarded: Bad result number") continue if results[-1].rightpos > len(data): LOG.debug("Discarded: Bad rightpos") continue for x in range(min(len(alternative.rightside), len(results))): #It is the same rule? if results[x].content != alternative.rightside[x]: continue #print([(x.leftpos, x.rightpos) for x in results]) #print(recurssions, results[0].leftpos, results[-1].rightpos, allvalids, data, alternative) newresult = ParseTree(0, results[-1].rightpos - results[0].leftpos, [onlysymbol], data[results[0].leftpos:results[-1].rightpos], production) #print(newresult) for child in results: newresult.append_child(child) if not child.valid: newresult.valid = False if newresult.valid: result.append(newresult) if showerrors and not result: erroresult = TypeCheckList(ParseTree, [ParseTree(0,len(data), [onlysymbol] , data, production, valid = False)]) for invalid in invalidlist: if invalid.production.leftside[0] in production.rightside: erroresult[0].append_child(invalid) return erroresult #LOG.debug("Returns: " + str([(str(x.leftpos),str(x.rightpos)) for x in result])) return result elif isinstance(onlysymbol, NullSymbol): return TypeCheckList(ParseTree,[ParseTree(0, 0, [onlysymbol], "", production)]) else: raise Exception
def __init__(self, leftpos, rightpos, content, valid = True): self.leftpos = leftpos self.rightpos = rightpos self.childlist = TypeCheckList(Tree) self.content = content self.valid = valid
class Tree(metaclass = ABCMeta): def __init__(self, leftpos, rightpos, content, valid = True): self.leftpos = leftpos self.rightpos = rightpos self.childlist = TypeCheckList(Tree) self.content = content self.valid = valid def __bool__(self): """checks if it is a null result""" return self.valid def __getitem__(self, key, order = "preorder"): #FIXME: getitem and optional argument?? result = [] mylist = self.getAllByOrder(order) for element in mylist: if element.content == key: result.append(element) return result def getItemByOrder(self, key, order): if order == "preorder": return traversePreOrder(self) elif order == "inorder": return traverseInOrder(self) elif order == "postorder": return traversePostOrder(self) else: raise KeyError def getAllByOrder(self, order = "preorder"): if order == "preorder": return traversePreOrder(self) elif order == "inorder": return traverseInOrder(self) elif order == "postorder": return traversePostOrder(self) else: raise KeyError def shift(self, amount): """ shifts position """ self.leftpos += amount self.rightpos += amount def append_child(self, dpr): """appends dpr to childlist""" self.childlist.append(dpr) def __len__(self): return self.rightpos - self.leftpos def coverage(self): if not(self): return 0, len(self) if childlist: childtotal = 0 childcoverage = 0 for child in childlist: newcoverage, newtotal = child.coverage() childcoverage += newcoverage childtotal += newtotal assert(childtotal == len(self)) return childcoverage, childtotal else: return len(self), len(self) def first_leaf(self): """Devuelve la primera hoja a la izq""" if self.childlist: return self.childlist[0].first_leaf() else: return self
def traversePostOrder(item): result = TypeCheckList(Tree) for child in item.childlist: result += traversePostOrder(child) result.append(item) return result
def mix_results(resultll:list, productionset): """ Mix n sets of results """ from pydsl.Grammar.Tree import ParseTree production = None for resultl in resultll: assert(isinstance(resultl, TypeCheckList) and resultl.instancetype == ParseTree) midlist = [] #All blocks combinations are stored here firstindex = 0 while firstindex < len(resultll) and len(resultll[firstindex]) == 0: firstindex += 1 if firstindex == len(resultll): return [] validsets = 1 #Processing head set firstresultl = resultll[firstindex] for result in firstresultl: if not(isinstance(result, ParseTree)): raise TypeError if result.leftpos == 0: midlist.append([result]) elif result.leftpos == None: raise Exception #FIXME:What's the right thing to do here? #Processing Tail sets for resultl in resultll[firstindex + 1:]: #For each result list if len(resultl) == 0: continue for result in resultl: #for each result tmp = [] for middleresult in midlist: #combinamos todos los elementos con los ya apuntados en la lista intermedia #Here we mix every result with intermediate results list lastresult = middleresult[-1] if lastresult.production != result.production: pass if result.rightpos == None: result.rightpos = lastresult.rightpos result.leftpos = lastresult.rightpos if lastresult.rightpos == None or result.leftpos == None: tmp.append(middleresult + [ParseTree(result.leftpos, result.rightpos, \ result.symbollist, result.content, result.production, \ TypeCheckList(ParseTree, result.childlist), result.valid)]) elif lastresult.rightpos == result.leftpos: tmp.append(middleresult + [ParseTree(result.leftpos, result.rightpos, \ result.symbollist, result.content, result.production, \ TypeCheckList(ParseTree, result.childlist), result.valid)]) midlist += tmp validsets += 1 #eliminamos los resultados intermedios que no contienen tantos elementos como hemos procesado. Es decir, no se ha encontrado una combinacion valida en la ultima mezcla #Removes all results that have less elements than the number of valid sets for element in midlist[:]: if len(element) != validsets: midlist.remove(element) #Combinamos resultados en la lista final #We mix all results into final result finallist = TypeCheckList(ParseTree) for middleresult in midlist: if len(middleresult) == 1: finallist.append(middleresult[0]) elif middleresult[0].leftpos != None and middleresult[-1].rightpos != None: #mezclamos la coleccion y dejamos los originales como hijos #Creates a node with all elements, and originals nodes are the childs of the new node symbollist = [] for element in middleresult: compoundword += element.content symbollist += element.symbollist finalresult = ParseTree(middleresult[0].leftpos, middleresult[-1].rightpos, symbollist, compoundword, middleresult[0].production, valid = result.valid) psl = middleresult[0].production #Add childs to result. FIXME El problema es que estamos añadiendo como hijos del nuevo los elementos ya creados error = False rightside = [] for child in middleresult: assert(child != finalresult) finalresult.append_child(child) if child.production: rightside += child.production.leftside if not child.valid: finalresult.valid = False #valid status propagates upwards #if error: # print([str(x.production) for x in middleresult]) # continue if productionset: try: finalresult.production = productionset.getProductionsBySide(rightside, "right") except IndexError: finallist += middleresult #rule not found: we add it unprocessed (non joined version) else: finallist.append(finalresult) #rule found; we add binded together version else: raise Exception return finallist