Esempio n. 1
0
    def parse(self, tokens):
        """ Parse a list of tokens and return the possible tree based on the
            ``grammar of this ``Parser``
            Args:
                tokens: a string
        """
        if len(tokens) == 0:
            raise ValueError("Tokens can't be empy")

        # Tokenize if needed
        if not isinstance(tokens, list):
            tokens = tokenize(tokens)
        root_tree = Tree(self._grammar.start(), [])
        frontier = [()]
        return self._parse(tokens, root_tree, frontier)
Esempio n. 2
0
 def test_removes_dot_at_end_of_sentence(self):
     res = ["fall", "leaves", "fall"]
     self.string += "."
     self.assertEqual(res,
                      tokenize(self.string),
                      msg="It should remove the dot")
Esempio n. 3
0
 def test_returns_list_of_words(self):
     res = ["fall", "leaves", "fall"]
     self.assertEqual(res,
                      tokenize(self.string),
                      msg="It should return list of words")
Esempio n. 4
0
 def test_returns_a_list(self):
     self.assertIsInstance(tokenize(self.string),
                           list,
                           msg="It should return a ``list``")
Esempio n. 5
0
 def test_returns_empty_list(self):
     self.assertIsInstance(tokenize(""), list)
     self.assertEqual(tokenize(""), [])
Esempio n. 6
0
        VP -> V NP | V | V NP NP
        N -> 'fall' | 'spring' | 'leaves' | 'dog' | 'cat'
        V -> 'spring' | 'leaves' | 'fall' | 'left'
        D -> 'the'
        C -> 'and'
        Adj -> 'fall' | 'spring' | 'purple' | 'left'
    """

    sentences = [
        "Fall leaves fall.",
        "Fall leaves fall and spring leaves spring.",
        "The fall leaves left.",
        "The purple dog left",
        "The dog and cat left"
    ]

    grammar = Grammar.parse_grammar(grammar_as_string)
    parser = TopDownParser(grammar)
    tokens = tokenize(sentences[4])
    for sentence in sentences:
        tokens = tokenize(sentence)
        parse = parser.parse(tokens)
        results = [p for p in parse]
        print("==========================")
        print(sentence)
        print("--------------------------")
        for index, parse in enumerate(results):
            print("Parse #%d:\n%s" % (index, parse))
        print("--------------------------")
        print("Count: %d" % len(results))