def testZeroOrMore(self): mygrammar = ZeroOrMore(String("a")) self.assertTrue(isinstance(mygrammar, Grammar)) self.assertEqual(mygrammar.first(), Choice([String("a")])) self.assertTrue(check(mygrammar, "a")) self.assertTrue(check(mygrammar, "aa")) self.assertTrue(check(mygrammar, "aaaa")) self.assertTrue(check(mygrammar, "")) self.assertFalse(check(mygrammar, "b"))
def __call__(self, data): if isinstance(data, str): data = [Token(x, ascii_encoding) for x in data] from pydsl.token import append_position_to_token_list data = append_position_to_token_list(data) if not all(isinstance(x, Token) for x in data): raise TypeError for element in data: from pydsl.check import check if not check(self.base, [element]): raise ValueError('Unexpected input %s for alphabet %s' % (element, self.base)) if self.base == self.alphabet: return data graph = graph_from_alphabet(self.alphabet, self.base) solved_elements = {} graph.node[self.base]['parsed'] = data #Attach data to every element in the graph #print_graph(graph) digraph_walker_backwards(graph, self.base, my_call_back) result = [] for output_alphabet in self.alphabet: if output_alphabet in self.base: output_alphabet = self.base if output_alphabet not in graph.node or 'parsed' not in graph.node[output_alphabet]: raise Exception("alphabet not initialized:%s" % output_alphabet) for token in graph.node[output_alphabet]['parsed']: result.append(PositionToken(str(token), output_alphabet, token.left, token.right)) result = sorted(result, key=lambda x: x.left) result = remove_subsets(result) result = remove_duplicates(result) return [Token(x.content, x.gd) for x in result]
def __call__(self, data, include_gd=False): if self.base == ascii_encoding: data = [Token(x, x) for x in data] from pydsl.token import append_position_to_token_list data = append_position_to_token_list(data) for element in data: from pydsl.check import check if not check(self.base, [element]): raise ValueError('Unexpected input %s for alphabet %s' % (element, self.base)) if self.base == self.alphabet: return data graph = graph_from_alphabet(self.alphabet, self.base) solved_elements = {} graph.node[self.base]['parsed'] = data #Attach data to every element in the graph digraph_walker_backwards(graph, self.base, my_call_back) result = [] for output_alphabet in self.alphabet: if output_alphabet in self.base: output_alphabet = self.base if output_alphabet not in graph.node or 'parsed' not in graph.node[output_alphabet]: raise Exception("alphabet not initialized:%s" % output_alphabet) for token in graph.node[output_alphabet]['parsed']: #This step needs to flat the token so it matches the signature of the function (base -> alphabet) def flat_token(token): while hasattr(token, 'content'): token = token.content return token result.append(PositionToken(flat_token(token), output_alphabet, token.left, token.right)) result = sorted(result, key=lambda x: x.left) result = remove_subsets(result) result = remove_duplicates(result) return [Token(x.content, x.gd) for x in result]
def concept_tree(self): for sentence in self.repository.sentences: if not check(general_grammar, sentence): LOG.warning("Unknown sentence " + sentence) continue translated = general_grammar.grammar(sentence) translated_tuple = translated.expr() action = translated_tuple[0] if action == 'ISA': _, child_concept, parent_concept = translated_tuple if self.root is None: self.root = Tree(parent_concept) self.root.append(child_concept) elif self.root.find_content(parent_concept) and not self.root.find_content(child_concept): self.root.append(child_concept) elif not self.root.find_content(parent_concept) and self.root.find_content(child_concept): old_root = self.root self.root = Tree(parent_concept) self.root.append(old_root) elif self.root.find_content(parent_concept) and self.root.find_content(child_concept): self.root.find_content(parent_concept).append(child_concept) else: raise NotImplementedError elif action == 'HAS': _, subject, property_name, dobject = translated_tuple subject_instance = self.root.find_content(subject) else: raise ValueError(action) return self.root
def _reduce_terminal(self, symbol, data, showerrors = False): from pydsl.check import check from pydsl.tree import ParseTree result = check(symbol.gd, [data]) if result: return [ParseTree(0,1, symbol , data)] if showerrors and not result: return [ParseTree(0,1, symbol , data, valid = False)] return []
def get_trees(self, data, showerrors = False): # -> list: """ returns a list of trees with valid guesses """ if not all(check(self._productionset.alphabet, [x]) for x in data): raise ValueError("Unknown element in {}, alphabet:{}".format(str(data), self.productionset.alphabet)) result = self.__recursive_parser(self._productionset.initialsymbol, data, self._productionset.main_production, showerrors) finalresult = [] for eresult in result: if eresult.left == 0 and eresult.right == len(data) and eresult not in finalresult: finalresult.append(eresult) return finalresult
def insert(self, state, token): """change internal state, return action""" if token == EndSymbol(): return self[state][EndSymbol()] from pydsl.check import check symbol_list = [x for x in self[state] if isinstance(x, TerminalSymbol) and check(x.gd, [token])] if not symbol_list: return {"action":"Fail"} if len(symbol_list) > 1: raise Exception("Multiple symbols matches input") symbol = symbol_list[0] return self[state][symbol]
def get_trees(self, data, showerrors=False): # -> list: """ returns a list of trees with valid guesses """ if not all(check(self._productionset.alphabet, [x]) for x in data): raise ValueError("Unknown element in {}, alphabet:{}".format( str(data), self.productionset.alphabet)) result = self.__recursive_parser(self._productionset.initialsymbol, data, self._productionset.main_production, showerrors) finalresult = [] for eresult in result: if eresult.left == 0 and eresult.right == len( data) and eresult not in finalresult: finalresult.append(eresult) return finalresult
def insert(self, state, token): """change internal state, return action""" if token == EndSymbol(): return self[state][EndSymbol()] from pydsl.check import check symbol_list = [ x for x in self[state] if isinstance(x, TerminalSymbol) and check(x.gd, [token]) ] if not symbol_list: return {"action": "Fail"} if len(symbol_list) > 1: raise Exception("Multiple symbols matches input") symbol = symbol_list[0] return self[state][symbol]
def __aux_parser(self, symbol): from pydsl.grammar.symbol import TerminalSymbol if isinstance(symbol, TerminalSymbol): LOG.debug("matching symbol %s, data:%s, index:%s" % (symbol,self.data,self.index )) result= self.match(symbol) LOG.debug("symbol matched %s" % result) return result productions = self._productionset.getProductionsBySide(symbol) valid_firsts = [] for production in productions: first_of_production = self._productionset.first_lookup(production.rightside[0]) if check(first_of_production, self.current): valid_firsts.append(production) if len(valid_firsts) != 1: raise ParseError("Expected only one valid production, found %s" % len(valid_firsts), 0) childlist = [self.__aux_parser(x) for x in valid_firsts[0].rightside] left = childlist[0].left right = childlist[-1].right content = [x.content for x in childlist] return ParseTree(left, right, symbol, content, childlist=childlist)
def __call__(self, data): if isinstance(data, str): data = [Token(x, ascii_encoding) for x in data] from pydsl.token import append_position_to_token_list data = append_position_to_token_list(data) if not all(isinstance(x, Token) for x in data): raise TypeError for element in data: from pydsl.check import check if not check(self.base, [element]): raise ValueError('Unexpected input %s for alphabet %s' % (element, self.base)) if self.base == self.alphabet: return data graph = graph_from_alphabet(self.alphabet, self.base) solved_elements = {} graph.node[self.base][ 'parsed'] = data #Attach data to every element in the graph #print_graph(graph) digraph_walker_backwards(graph, self.base, my_call_back) result = [] for output_alphabet in self.alphabet: if output_alphabet in self.base: output_alphabet = self.base if output_alphabet not in graph.node or 'parsed' not in graph.node[ output_alphabet]: raise Exception("alphabet not initialized:%s" % output_alphabet) for token in graph.node[output_alphabet]['parsed']: result.append( PositionToken(str(token), output_alphabet, token.left, token.right)) result = sorted(result, key=lambda x: x.left) result = remove_subsets(result) result = remove_duplicates(result) return [Token(x.content, x.gd) for x in result]
def __aux_parser(self, symbol): from pydsl.grammar.symbol import TerminalSymbol if isinstance(symbol, TerminalSymbol): LOG.debug("matching symbol %s, data:%s, index:%s" % (symbol, self.data, self.index)) result = self.match(symbol) LOG.debug("symbol matched %s" % result) return result productions = self._productionset.getProductionsBySide(symbol) valid_firsts = [] for production in productions: first_of_production = self._productionset.first_lookup( production.rightside[0]) if check(first_of_production, [self.current]): valid_firsts.append(production) if len(valid_firsts) != 1: raise ParseError( "Expected only one valid production, found %s" % len(valid_firsts), 0) childlist = [self.__aux_parser(x) for x in valid_firsts[0].rightside] left = childlist[0].left right = childlist[-1].right content = [x.content for x in childlist] return ParseTree(left, right, symbol, content, childlist=childlist)
def __call__(self, data): return [x for x in self.grammarlist if check(x, data)]
def __call__(self, data): return [x for x in self.grammarlist if check(x,data)]
def testChoice(self): mygrammar = Choice((String("a"), String("b"))) self.assertTrue(check(mygrammar, "a")) self.assertTrue(check(mygrammar, "b")) self.assertFalse(check(mygrammar, "c"))
def known_concepts(self): result = set() for sentence in self.repository.sentences: if check(self.grammars()['main'], sentence): result.add(" ".join(sentence.split()[:-2])) return result
def check(self, data):# ->bool: """Checks if input is recognized as this symbol""" return check(self.gd, data)
def check(self, data): # ->bool: """Checks if input is recognized as this symbol""" return check(self.gd, data)