Beispiel #1
0
 def testZeroOrMore(self):
     mygrammar = ZeroOrMore(String("a"))
     self.assertTrue(isinstance(mygrammar, Grammar))
     self.assertEqual(mygrammar.first(), Choice([String("a")]))
     self.assertTrue(check(mygrammar, "a"))
     self.assertTrue(check(mygrammar, "aa"))
     self.assertTrue(check(mygrammar, "aaaa"))
     self.assertTrue(check(mygrammar, ""))
     self.assertFalse(check(mygrammar, "b"))
Beispiel #2
0
 def testZeroOrMore(self):
     mygrammar = ZeroOrMore(String("a"))
     self.assertTrue(isinstance(mygrammar, Grammar))
     self.assertEqual(mygrammar.first(), Choice([String("a")]))
     self.assertTrue(check(mygrammar, "a"))
     self.assertTrue(check(mygrammar, "aa"))
     self.assertTrue(check(mygrammar, "aaaa"))
     self.assertTrue(check(mygrammar, ""))
     self.assertFalse(check(mygrammar, "b"))
Beispiel #3
0
    def __call__(self, data):
        if isinstance(data, str):
            data = [Token(x, ascii_encoding) for x in data]
        from pydsl.token import append_position_to_token_list
        data = append_position_to_token_list(data)

        if not all(isinstance(x, Token) for x in data):
            raise TypeError
        for element in data:
            from pydsl.check import check
            if not check(self.base, [element]):
                raise ValueError('Unexpected input %s for alphabet %s' % (element, self.base))
        if self.base == self.alphabet:
            return data
        graph = graph_from_alphabet(self.alphabet, self.base)
        solved_elements = {}
        graph.node[self.base]['parsed'] = data #Attach data to every element in the graph
        #print_graph(graph)
        digraph_walker_backwards(graph, self.base, my_call_back)
        result = []
        for output_alphabet in self.alphabet:
            if output_alphabet in self.base:
                output_alphabet = self.base
            if output_alphabet not in graph.node or 'parsed' not in graph.node[output_alphabet]:
                raise Exception("alphabet not initialized:%s" % output_alphabet)
            for token in graph.node[output_alphabet]['parsed']:
                result.append(PositionToken(str(token), output_alphabet, token.left, token.right))

        result = sorted(result, key=lambda x: x.left)
        result = remove_subsets(result)
        result = remove_duplicates(result)
        return [Token(x.content, x.gd) for x in result]
Beispiel #4
0
 def __call__(self, data, include_gd=False):
     if self.base == ascii_encoding:
         data = [Token(x, x) for x in data]
         from pydsl.token import append_position_to_token_list
         data = append_position_to_token_list(data)
     for element in data:
         from pydsl.check import check
         if not check(self.base, [element]):
             raise ValueError('Unexpected input %s for alphabet %s' % (element, self.base))
     if self.base == self.alphabet:
         return data
     graph = graph_from_alphabet(self.alphabet, self.base)
     solved_elements = {}
     graph.node[self.base]['parsed'] = data #Attach data to every element in the graph
     digraph_walker_backwards(graph, self.base, my_call_back)
     result = []
     for output_alphabet in self.alphabet:
         if output_alphabet in self.base:
             output_alphabet = self.base
         if output_alphabet not in graph.node or 'parsed' not in graph.node[output_alphabet]:
             raise Exception("alphabet not initialized:%s" % output_alphabet)
         for token in graph.node[output_alphabet]['parsed']:
             #This step needs to flat the token so it matches the signature of the function (base -> alphabet)
             def flat_token(token):
                 while hasattr(token, 'content'):
                     token = token.content
                 return token
             result.append(PositionToken(flat_token(token), output_alphabet, token.left, token.right))
     result = sorted(result, key=lambda x: x.left)
     result = remove_subsets(result)
     result = remove_duplicates(result)
     return [Token(x.content, x.gd) for x in result]
Beispiel #5
0
 def concept_tree(self):
     for sentence in self.repository.sentences:
         if not check(general_grammar, sentence):
             LOG.warning("Unknown sentence " + sentence)
             continue
         translated = general_grammar.grammar(sentence)
         translated_tuple = translated.expr()
         action = translated_tuple[0]
         if action == 'ISA':
             _, child_concept, parent_concept = translated_tuple
             if self.root is None:
                 self.root = Tree(parent_concept)
                 self.root.append(child_concept)
             elif self.root.find_content(parent_concept) and not self.root.find_content(child_concept):
                 self.root.append(child_concept)
             elif not self.root.find_content(parent_concept) and self.root.find_content(child_concept):
                 old_root = self.root
                 self.root = Tree(parent_concept)
                 self.root.append(old_root)
             elif self.root.find_content(parent_concept) and self.root.find_content(child_concept):
                 self.root.find_content(parent_concept).append(child_concept)
             else:
                 raise NotImplementedError
         elif action == 'HAS':
             _, subject, property_name, dobject = translated_tuple
             subject_instance = self.root.find_content(subject)
         else:
             raise ValueError(action)
     return self.root
Beispiel #6
0
 def _reduce_terminal(self, symbol, data, showerrors = False):
     from pydsl.check import check
     from pydsl.tree import ParseTree
     result = check(symbol.gd, [data])
     if result:
         return [ParseTree(0,1, symbol , data)]
     if showerrors and not result:
         return [ParseTree(0,1, symbol , data, valid = False)]
     return []
Beispiel #7
0
 def get_trees(self, data, showerrors = False): # -> list:
     """ returns a list of trees with valid guesses """
     if not all(check(self._productionset.alphabet, [x]) for x in data):
         raise ValueError("Unknown element in {}, alphabet:{}".format(str(data), self.productionset.alphabet))
     result = self.__recursive_parser(self._productionset.initialsymbol, data, self._productionset.main_production, showerrors)
     finalresult = []
     for eresult in result:
         if eresult.left == 0 and eresult.right == len(data) and eresult not in finalresult:
             finalresult.append(eresult)        
     return finalresult
Beispiel #8
0
 def insert(self, state, token):
     """change internal state, return action"""
     if token == EndSymbol():
         return self[state][EndSymbol()]
     from pydsl.check import check
     symbol_list = [x for x in self[state] if isinstance(x, TerminalSymbol) and check(x.gd, [token])]
     if not symbol_list:
         return {"action":"Fail"}
     if len(symbol_list) > 1:
         raise Exception("Multiple symbols matches input")
     symbol = symbol_list[0]
     return self[state][symbol]
Beispiel #9
0
 def get_trees(self, data, showerrors=False):  # -> list:
     """ returns a list of trees with valid guesses """
     if not all(check(self._productionset.alphabet, [x]) for x in data):
         raise ValueError("Unknown element in {}, alphabet:{}".format(
             str(data), self.productionset.alphabet))
     result = self.__recursive_parser(self._productionset.initialsymbol,
                                      data,
                                      self._productionset.main_production,
                                      showerrors)
     finalresult = []
     for eresult in result:
         if eresult.left == 0 and eresult.right == len(
                 data) and eresult not in finalresult:
             finalresult.append(eresult)
     return finalresult
Beispiel #10
0
 def insert(self, state, token):
     """change internal state, return action"""
     if token == EndSymbol():
         return self[state][EndSymbol()]
     from pydsl.check import check
     symbol_list = [
         x for x in self[state]
         if isinstance(x, TerminalSymbol) and check(x.gd, [token])
     ]
     if not symbol_list:
         return {"action": "Fail"}
     if len(symbol_list) > 1:
         raise Exception("Multiple symbols matches input")
     symbol = symbol_list[0]
     return self[state][symbol]
Beispiel #11
0
 def __aux_parser(self, symbol):
     from pydsl.grammar.symbol import TerminalSymbol
     if isinstance(symbol, TerminalSymbol):
         LOG.debug("matching symbol %s, data:%s, index:%s" % (symbol,self.data,self.index ))
         result= self.match(symbol)
         LOG.debug("symbol matched %s" % result)
         return result
     productions = self._productionset.getProductionsBySide(symbol)
     valid_firsts = []
     for production in productions:
         first_of_production = self._productionset.first_lookup(production.rightside[0])
         if check(first_of_production, self.current):
             valid_firsts.append(production)
     if len(valid_firsts) != 1:
         raise ParseError("Expected only one valid production, found %s" % len(valid_firsts), 0)
     childlist = [self.__aux_parser(x) for x in valid_firsts[0].rightside]
     left = childlist[0].left
     right = childlist[-1].right
     content = [x.content for x in childlist]
     return ParseTree(left, right, symbol, content, childlist=childlist)
Beispiel #12
0
    def __call__(self, data):
        if isinstance(data, str):
            data = [Token(x, ascii_encoding) for x in data]
        from pydsl.token import append_position_to_token_list
        data = append_position_to_token_list(data)

        if not all(isinstance(x, Token) for x in data):
            raise TypeError
        for element in data:
            from pydsl.check import check
            if not check(self.base, [element]):
                raise ValueError('Unexpected input %s for alphabet %s' %
                                 (element, self.base))
        if self.base == self.alphabet:
            return data
        graph = graph_from_alphabet(self.alphabet, self.base)
        solved_elements = {}
        graph.node[self.base][
            'parsed'] = data  #Attach data to every element in the graph
        #print_graph(graph)
        digraph_walker_backwards(graph, self.base, my_call_back)
        result = []
        for output_alphabet in self.alphabet:
            if output_alphabet in self.base:
                output_alphabet = self.base
            if output_alphabet not in graph.node or 'parsed' not in graph.node[
                    output_alphabet]:
                raise Exception("alphabet not initialized:%s" %
                                output_alphabet)
            for token in graph.node[output_alphabet]['parsed']:
                result.append(
                    PositionToken(str(token), output_alphabet, token.left,
                                  token.right))

        result = sorted(result, key=lambda x: x.left)
        result = remove_subsets(result)
        result = remove_duplicates(result)
        return [Token(x.content, x.gd) for x in result]
Beispiel #13
0
 def __aux_parser(self, symbol):
     from pydsl.grammar.symbol import TerminalSymbol
     if isinstance(symbol, TerminalSymbol):
         LOG.debug("matching symbol %s, data:%s, index:%s" %
                   (symbol, self.data, self.index))
         result = self.match(symbol)
         LOG.debug("symbol matched %s" % result)
         return result
     productions = self._productionset.getProductionsBySide(symbol)
     valid_firsts = []
     for production in productions:
         first_of_production = self._productionset.first_lookup(
             production.rightside[0])
         if check(first_of_production, [self.current]):
             valid_firsts.append(production)
     if len(valid_firsts) != 1:
         raise ParseError(
             "Expected only one valid production, found %s" %
             len(valid_firsts), 0)
     childlist = [self.__aux_parser(x) for x in valid_firsts[0].rightside]
     left = childlist[0].left
     right = childlist[-1].right
     content = [x.content for x in childlist]
     return ParseTree(left, right, symbol, content, childlist=childlist)
Beispiel #14
0
 def __call__(self, data):
     return [x for x in self.grammarlist if check(x, data)]
Beispiel #15
0
 def __call__(self, data):
     return [x for x in self.grammarlist if check(x,data)]
Beispiel #16
0
 def testChoice(self):
     mygrammar = Choice((String("a"), String("b")))
     self.assertTrue(check(mygrammar, "a"))
     self.assertTrue(check(mygrammar, "b"))
     self.assertFalse(check(mygrammar, "c"))
Beispiel #17
0
 def testChoice(self):
     mygrammar = Choice((String("a"), String("b")))
     self.assertTrue(check(mygrammar, "a"))
     self.assertTrue(check(mygrammar, "b"))
     self.assertFalse(check(mygrammar, "c"))
Beispiel #18
0
 def known_concepts(self):
     result = set()
     for sentence in self.repository.sentences:
         if check(self.grammars()['main'], sentence):
             result.add(" ".join(sentence.split()[:-2]))
     return result
Beispiel #19
0
 def check(self, data):# ->bool:
     """Checks if input is recognized as this symbol"""
     return check(self.gd, data)
Beispiel #20
0
 def check(self, data):  # ->bool:
     """Checks if input is recognized as this symbol"""
     return check(self.gd, data)