def check_grammar_induction_correctness( self, expected_grammar: ContextFreeGrammar, dataset: List[str] = None, words_per_slot=1, prune_redundant=True, minimal_variables=True, ) -> ContextFreeGrammar: if dataset is None: dataset = expected_grammar.generate_all_string() induced_grammar = grammar_induction.induce_grammar_using_template_trees( dataset, words_per_slot=words_per_slot, prune_redundant=prune_redundant, minimal_variables=minimal_variables, ) print(induced_grammar) # Check if same dataset generation self.check_grammar_expansion(induced_grammar, dataset) # Check if isomorph grammar self.assertTrue(expected_grammar.is_isomorphic_with(induced_grammar)) # Check that the grammar is representable as string, without exception self.assertTrue(len(str(induced_grammar)) > 0) return induced_grammar
def check_grammar_expansion(self, grammar: ContextFreeGrammar, expected_expansion: Collection[str]): """ Check that grammar indeed generates the dataset it learned from """ generated_dataset = grammar.generate_all_string() self.assertEqual(set(expected_expansion), set(generated_dataset))