def nextToken(self, include_gd=False): tree = PositionResultList() # This is the extract algorithm valid_alternatives = [] for gd in self.alphabet: checker = checker_factory(gd) for left in range(0, len(self.string)): for right in range(left +1, len(self.string) +1 ): if checker.check(self.string[left:right]): valid_alternatives.append((left, right, gd)) if not valid_alternatives: raise Exception("Nothing consumed") for left, right, gd in valid_alternatives: string = self.string[left:right] tree.append(left, right, string, gd, check_position=False) right_length_seq = [] for x in tree.valid_sequences(): if x[-1]['right'] == len(self.string): right_length_seq.append(x) if not right_length_seq: raise Exception("No sequence found for input %s alphabet %s" % (self.string,self.alphabet)) for y in sorted(right_length_seq, key=lambda x:len(x))[0]: #Always gets the match with less tokens if include_gd: yield Token(y['content'], y.get('gd')) else: yield Token(y['content'], None)
def nextToken(self): tree = PositionResultList() # This is the extract algorithm valid_alternatives = [] for gd in self.alphabet: checker = checker_factory(gd) for left in range(0, len(self.string)): if getattr(gd, 'maxsize', None): max_right = left + gd.maxsize + 1 else: max_right = len(self.string) +1 for right in range(left +1, min(max_right, len(self.string) +1)): slice = self.string[left:right] if checker.check(slice): tree.append(left, right, slice, gd, check_position=False) if not tree: raise Exception("Nothing consumed") right_length_seq = [] valid_sequences = tree.valid_sequences() for x in valid_sequences: my_list = list(x) if my_list[-1]['right'] == len(self.string): right_length_seq.append(my_list) if not right_length_seq: raise Exception("No sequence found for input %s alphabet %s" % (self.string,self.alphabet)) for y in sorted(right_length_seq, key=lambda x:len(x))[0]: #Always gets the match with less tokens yield Token(y['content'], y.get('gd'))
def nextToken(self): tree = PositionResultList() # This is the extract algorithm valid_alternatives = [] for gd in self.alphabet: checker = checker_factory(gd) for left in range(0, len(self.string)): if getattr(gd, 'maxsize', None): max_right = left + gd.maxsize + 1 else: max_right = len(self.string) + 1 for right in range(left + 1, min(max_right, len(self.string) + 1)): slice = self.string[left:right] if checker.check(slice): tree.append(left, right, slice, gd, check_position=False) if not tree: raise Exception("Nothing consumed") right_length_seq = [] valid_sequences = tree.valid_sequences() for x in valid_sequences: my_list = list(x) if my_list[-1]['right'] == len(self.string): right_length_seq.append(my_list) if not right_length_seq: raise Exception("No sequence found for input %s alphabet %s" % (self.string, self.alphabet)) for y in sorted(right_length_seq, key=lambda x: len(x) )[0]: #Always gets the match with less tokens yield Token(y['content'], y.get('gd'))
def testMain(self): from pydsl.tree import PositionResultList seq = PositionResultList() seq.append(0, 1, ".") seq.append(1, 2, ".") seq.append(2, 3, ".") seq.append(3, 4, ".") seq.append(4, 5, ".") self.assertEqual(len(seq.valid_sequences()[-1]), 5)
def testMain(self): from pydsl.tree import PositionResultList seq = PositionResultList() seq.append(0,1,".") seq.append(1,2,".") seq.append(2,3,".") seq.append(3,4,".") seq.append(4,5,".") self.assertEqual(len(seq.valid_sequences()[-1]), 5)
def __recursive_parser(self, onlysymbol, data, production, showerrors = False): """ Aux function. helps check_word""" LOG.debug("__recursive_parser: Begin ") if not data: return [] from pydsl.grammar.symbol import TerminalSymbol, NullSymbol, NonTerminalSymbol if isinstance(onlysymbol, TerminalSymbol): LOG.debug("Iteration: terminalsymbol") return self._reduce_terminal(onlysymbol,data[0], showerrors) elif isinstance(onlysymbol, NullSymbol): return [ParseTree(0, 0, onlysymbol, "")] elif isinstance(onlysymbol, NonTerminalSymbol): validstack = [] invalidstack = [] for alternative in self._productionset.getProductionsBySide(onlysymbol): #Alternative alternativetree = PositionResultList() alternativeinvalidstack = [] for symbol in alternative.rightside: # Symbol symbol_success = False for totalpos in alternativetree.right_limit_list(): # Right limit if totalpos >= len(data): continue thisresult = self.__recursive_parser(symbol, data[totalpos:], alternative, showerrors) if not (thisresult and all(thisresult)): alternativeinvalidstack += [x for x in thisresult if not x] continue symbol_success = True for x in thisresult: x.shift(totalpos) success = alternativetree.append(x.left, x.right, x) if not success: #TODO: Add as an error to the tree or to another place LOG.debug("Discarded symbol :" + str(symbol) + " position:" + str(totalpos)) else: LOG.debug("Added symbol :" + str(symbol) + " position:" + str(totalpos)) if not symbol_success: LOG.debug("Symbol doesn't work" + str(symbol)) break #Try next alternative else: # Alternative success (no break happened) invalidstack += alternativeinvalidstack for x in alternativetree.valid_sequences(): validstack.append(x) result = [] LOG.debug("iteration result collection finished:" + str(validstack)) for alternative in self._productionset.getProductionsBySide(onlysymbol): nullcount = alternative.rightside.count(NullSymbol()) for results in validstack: nnullresults = 0 left = results[0]['left'] right = results[-1]['right'] nnullresults = len([x for x in results if x['content'].symbol == NullSymbol()]) if len(results) - nnullresults != len(alternative.rightside) - nullcount: LOG.debug("Discarded: incorrect number of non null symbols") continue if right > len(data): LOG.debug("Discarded: length mismatch") continue for x in range(min(len(alternative.rightside), len(results))): if results[x]['content'] != alternative.rightside[x]: LOG.debug("Discarded: rule doesn't match partial result") continue childlist = [x['content'] for x in results] allvalid = all([x.valid for x in childlist]) if allvalid: newresult = ParseTree(0, right - left, onlysymbol, data[left:right], childlist = childlist) newresult.valid = True result.append(newresult) if showerrors and not result: erroresult = ParseTree(0,len(data), onlysymbol , data, valid = False) for invalid in invalidstack: if invalid.content in production.rightside: erroresult.append(invalid) return [erroresult] return result raise Exception("Unknown symbol:" + str(onlysymbol))
def __recursive_parser(self, onlysymbol, data, production, showerrors=False): """ Aux function. helps check_word""" LOG.debug("__recursive_parser: Begin ") if not data: return [] from pydsl.grammar.symbol import TerminalSymbol, NullSymbol, NonTerminalSymbol if isinstance(onlysymbol, TerminalSymbol): LOG.debug("Iteration: terminalsymbol") return self._reduce_terminal(onlysymbol, data[0], showerrors) elif isinstance(onlysymbol, NullSymbol): return [ParseTree(0, 0, onlysymbol, "")] elif isinstance(onlysymbol, NonTerminalSymbol): validstack = [] invalidstack = [] for alternative in self._productionset.getProductionsBySide( onlysymbol): #Alternative alternativetree = PositionResultList() alternativeinvalidstack = [] for symbol in alternative.rightside: # Symbol symbol_success = False for totalpos in alternativetree.right_limit_list( ): # Right limit if totalpos >= len(data): continue thisresult = self.__recursive_parser( symbol, data[totalpos:], alternative, showerrors) if not (thisresult and all(thisresult)): alternativeinvalidstack += [ x for x in thisresult if not x ] continue symbol_success = True for x in thisresult: x.shift(totalpos) success = alternativetree.append( x.left, x.right, x) if not success: #TODO: Add as an error to the tree or to another place LOG.debug("Discarded symbol :" + str(symbol) + " position:" + str(totalpos)) else: LOG.debug("Added symbol :" + str(symbol) + " position:" + str(totalpos)) if not symbol_success: LOG.debug("Symbol doesn't work" + str(symbol)) break #Try next alternative else: # Alternative success (no break happened) invalidstack += alternativeinvalidstack for x in alternativetree.valid_sequences(): validstack.append(x) result = [] LOG.debug("iteration result collection finished:" + str(validstack)) for alternative in self._productionset.getProductionsBySide( onlysymbol): nullcount = alternative.rightside.count(NullSymbol()) for results in validstack: nnullresults = 0 left = results[0]['left'] right = results[-1]['right'] nnullresults = len([ x for x in results if x['content'].symbol == NullSymbol() ]) if len(results) - nnullresults != len( alternative.rightside) - nullcount: LOG.debug( "Discarded: incorrect number of non null symbols") continue if right > len(data): LOG.debug("Discarded: length mismatch") continue for x in range( min(len(alternative.rightside), len(results))): if results[x]['content'] != alternative.rightside[x]: LOG.debug( "Discarded: rule doesn't match partial result") continue childlist = [x['content'] for x in results] allvalid = all([x.valid for x in childlist]) if allvalid: newresult = ParseTree(0, right - left, onlysymbol, data[left:right], childlist=childlist) newresult.valid = True result.append(newresult) if showerrors and not result: erroresult = ParseTree(0, len(data), onlysymbol, data, valid=False) for invalid in invalidstack: if invalid.content in production.rightside: erroresult.append(invalid) return [erroresult] return result raise Exception("Unknown symbol:" + str(onlysymbol))