def testZeroOrMore(self): mygrammar = ZeroOrMore(String("a")) self.assertTrue(isinstance(mygrammar, Grammar)) self.assertEqual(mygrammar.first(), Choice([String("a")])) from pydsl.Check import check self.assertTrue(check(mygrammar, "a")) self.assertTrue(check(mygrammar, "aa")) self.assertTrue(check(mygrammar, "aaaa")) self.assertTrue(check(mygrammar, "")) self.assertFalse(check(mygrammar, "b"))
def __call__(self, data, include_gd=False): if isinstance(self.base, Encoding): data = [x for x in EncodingLexer(self.base)(data)] from pydsl.Token import append_position_to_token_list data = append_position_to_token_list(data) for element in data: from pydsl.Check import check if not check(self.base, element): raise ValueError('Unexpected input grammar') graph = graph_from_alphabet(self.alphabet, self.base) solved_elements = {} graph.node[self.base]['parsed'] = data #Attach data to every element in the graph digraph_walker_backwards(graph, self.base, my_call_back) result = [] for output_alphabet in self.alphabet: if output_alphabet not in graph.node or 'parsed' not in graph.node[output_alphabet]: raise Exception("alphabet not initialized:%s" % output_alphabet) for token in graph.node[output_alphabet]['parsed']: #This step needs to flat the token so it matches the signature of the function (base -> alphabet) def flat_token(token): while hasattr(token, 'content'): token = token.content return token result.append(PositionToken(flat_token(token), output_alphabet, token.left, token.right)) result = sorted(result, key=lambda x: x.left) result = remove_subsets(result) result = remove_duplicates(result) return [Token(x.content, x.gd) for x in result]
def __aux_parser(self, symbol): from pydsl.Grammar.Symbol import TerminalSymbol if isinstance(symbol, TerminalSymbol): LOG.debug("matching symbol %s, data:%s, index:%s" % (symbol, self.data, self.index)) result = self.match(symbol) LOG.debug("symbol matched %s" % result) return result productions = self._productionset.getProductionsBySide(symbol) valid_firsts = [] for production in productions: first_of_production = self._productionset.first_lookup( production.rightside[0]) if check(first_of_production, self.current): valid_firsts.append(production) if len(valid_firsts) != 1: raise Exception("Expected only one valid production, found %s" % len(valid_firsts)) childlist = [] for element in valid_firsts[0].rightside: childlist.append(self.__aux_parser(element)) left = childlist[0].left right = childlist[-1].right content = [x.content for x in childlist] return ParseTree(left, right, symbol, content, childlist=childlist)
def __call__(self, data, include_gd=False): if isinstance(self.base, Encoding): data = [x for x in EncodingLexer(self.base)(data)] from pydsl.Token import append_position_to_token_list data = append_position_to_token_list(data) for element in data: from pydsl.Check import check if not check(self.base, element): raise ValueError('Unexpected input grammar') graph = graph_from_alphabet(self.alphabet, self.base) solved_elements = {} graph.node[self.base][ 'parsed'] = data #Attach data to every element in the graph digraph_walker_backwards(graph, self.base, my_call_back) result = [] for output_alphabet in self.alphabet: if output_alphabet not in graph.node or 'parsed' not in graph.node[ output_alphabet]: raise Exception("alphabet not initialized:%s" % output_alphabet) for token in graph.node[output_alphabet]['parsed']: #This step needs to flat the token so it matches the signature of the function (base -> alphabet) def flat_token(token): while hasattr(token, 'content'): token = token.content return token result.append( PositionToken(flat_token(token), output_alphabet, token.left, token.right)) result = sorted(result, key=lambda x: x.left) result = remove_subsets(result) result = remove_duplicates(result) return [Token(x.content, x.gd) for x in result]
def _reduce_terminal(self, symbol, data, showerrors = False): from pydsl.Check import check from pydsl.Tree import ParseTree result = check(symbol.gd, data) if result: return [ParseTree(0,1, symbol , data)] if showerrors and not result: return [ParseTree(0,1, symbol , data, valid = False)] return []
def _reduce_terminal(self, symbol, data, showerrors=False): from pydsl.Check import check from pydsl.Tree import ParseTree result = check(symbol.gd, data) if result: return [ParseTree(0, 1, symbol, data)] if showerrors and not result: return [ParseTree(0, 1, symbol, data, valid=False)] return []
def get_trees(self, data, showerrors = False): # -> list: """ returns a list of trees with valid guesses """ if not all(check(self._productionset.alphabet, x) for x in data): raise ValueError("Unknown element in %s" % str(data)) result = self.__recursive_parser(self._productionset.initialsymbol, data, self._productionset.main_production, showerrors) finalresult = [] for eresult in result: if eresult.left == 0 and eresult.right == len(data) and eresult not in finalresult: finalresult.append(eresult) return finalresult
def insert(self, state, token): """change internal state, return action""" if token == EndSymbol(): return self[state][EndSymbol()] from pydsl.Check import check symbol_list = [x for x in self[state] if isinstance(x, TerminalSymbol) and check(x.gd,token)] if not symbol_list: return {"action":"Fail"} if len(symbol_list) > 1: raise Exception("Multiple symbols matches input") symbol = symbol_list[0] return self[state][symbol]
def get_trees(self, data, showerrors=False): # -> list: """ returns a list of trees with valid guesses """ if isinstance(data, str): data = [x for x in data] for element in data: if not check(self._productionset.alphabet, element): raise ValueError("Unknown element %s" % str(element)) result = self.__recursive_parser(self._productionset.initialsymbol, data, self._productionset.main_production, showerrors) finalresult = [] for eresult in result: if eresult.left == 0 and eresult.right == len( data) and eresult not in finalresult: finalresult.append(eresult) return finalresult
def __aux_parser(self, symbol): from pydsl.Grammar.Symbol import TerminalSymbol if isinstance(symbol, TerminalSymbol): LOG.debug("matching symbol %s, data:%s, index:%s" % (symbol,self.data,self.index )) result= self.match(symbol) LOG.debug("symbol matched %s" % result) return result productions = self._productionset.getProductionsBySide(symbol) valid_firsts = [] for production in productions: first_of_production = self._productionset.first_lookup(production.rightside[0]) if check(first_of_production, self.current): valid_firsts.append(production) if len(valid_firsts) != 1: raise ParseError("Expected only one valid production, found %s" % len(valid_firsts), 0) childlist = [self.__aux_parser(x) for x in valid_firsts[0].rightside] left = childlist[0].left right = childlist[-1].right content = [x.content for x in childlist] return ParseTree(left, right, symbol, content, childlist=childlist)
def __call__(self, data): return [x for x in self.grammarlist if check(x,data)]
def testChoice(self): mygrammar = Choice((String("a"), String("b"))) from pydsl.Check import check self.assertTrue(check(mygrammar, "a")) self.assertTrue(check(mygrammar, "b")) self.assertFalse(check(mygrammar, "c"))
def check(self, data):# ->bool: """Checks if input is recognized as this symbol""" return check(self.gd, data)
def check(self, data): # ->bool: """Checks if input is recognized as this symbol""" return check(self.gd, data)