def parse(self, tokens): """ Parse a list of tokens and return the possible tree based on the ``grammar of this ``Parser`` Args: tokens: a string """ if len(tokens) == 0: raise ValueError("Tokens can't be empy") # Tokenize if needed if not isinstance(tokens, list): tokens = tokenize(tokens) root_tree = Tree(self._grammar.start(), []) frontier = [()] return self._parse(tokens, root_tree, frontier)
def test_removes_dot_at_end_of_sentence(self): res = ["fall", "leaves", "fall"] self.string += "." self.assertEqual(res, tokenize(self.string), msg="It should remove the dot")
def test_returns_list_of_words(self): res = ["fall", "leaves", "fall"] self.assertEqual(res, tokenize(self.string), msg="It should return list of words")
def test_returns_a_list(self): self.assertIsInstance(tokenize(self.string), list, msg="It should return a ``list``")
def test_returns_empty_list(self): self.assertIsInstance(tokenize(""), list) self.assertEqual(tokenize(""), [])
VP -> V NP | V | V NP NP N -> 'fall' | 'spring' | 'leaves' | 'dog' | 'cat' V -> 'spring' | 'leaves' | 'fall' | 'left' D -> 'the' C -> 'and' Adj -> 'fall' | 'spring' | 'purple' | 'left' """ sentences = [ "Fall leaves fall.", "Fall leaves fall and spring leaves spring.", "The fall leaves left.", "The purple dog left", "The dog and cat left" ] grammar = Grammar.parse_grammar(grammar_as_string) parser = TopDownParser(grammar) tokens = tokenize(sentences[4]) for sentence in sentences: tokens = tokenize(sentence) parse = parser.parse(tokens) results = [p for p in parse] print("==========================") print(sentence) print("--------------------------") for index, parse in enumerate(results): print("Parse #%d:\n%s" % (index, parse)) print("--------------------------") print("Count: %d" % len(results))