Ejemplo n.º 1
0
def test_evaluate():
    grammar = Grammar()

    query = "(3d1 + 5) * 2"
    expected = 16

    total, rolls = grammar.evaluate(query)

    print(total)
    print(rolls)

    assert total == expected
    assert ((sum(int(roll) for roll in rolls) + 5) * 2) == expected

    query = "3d1 + 2"
    expected = 5

    total, rolls = grammar.evaluate(query)

    assert total == expected
    assert (sum(int(roll) for roll in rolls) + 2) == expected
Ejemplo n.º 2
0
    def test_evaluation(self):
        """
        Evaluate the grammar on all examples, collecting metrics:
        
        semantics oracle accuracy: # of examples where one parse or the other was
        correct.

        semantics accuracy: # of examples where parse at position 0 was correct.
        """
        arithmetic_grammar = Grammar(self.arithmetic_rules)

        from executor import Executor

        examples = self.one_parse_examples + self.two_parse_examples
        self.assertEqual(17, len(examples))

        metrics = arithmetic_grammar.evaluate(executor=Executor.execute,
                                              examples=examples,
                                              print_examples=False)

        # in every example we produced some correct parse.
        self.assertEqual(metrics['semantics oracle accuracy'], 17)
        # three examples where the parse at position 0 was not correct
        self.assertEqual(metrics['semantics accuracy'], 14)