Ejemplo n.º 1
0
    def test_isomorphic_multiple_possibilities_simple(self):
        gram1 = ContextFreeGrammar.from_string(
            {
                "origin": ["<a> world", "<b> world"],
                "a": ["<hello>"],
                "b": ["<world>"],
                "hello": ["hello", "hi", "hey"],
                "world": ["world", "universe"],
            }
        )
        gram2 = ContextFreeGrammar.from_string(
            {
                "origin": ["<1> world", "<2> world"],
                "1": ["<h>"],
                "2": ["<w>"],
                "h": ["hello", "hi", "hey"],
                "w": ["world", "universe"],
            }
        )
        gram3 = ContextFreeGrammar.from_string(
            {
                "origin": ["<1> world", "<2> world"],
                "1": ["<w>"],
                "2": ["<h>"],
                "w": ["world", "universe"],
                "h": ["hello", "hi", "hey"],
            }
        )
        # Test with self
        self.check_isomorphism(gram1, gram2, gram3)

        # Test not isomorphic with others
        self.assertFalse(gram1.is_isomorphic_with(self.hello_world))
        self.assertFalse(gram2.is_isomorphic_with(self.hello_world))
        self.assertFalse(gram3.is_isomorphic_with(self.hello_world))
Ejemplo n.º 2
0
    def check_grammar_induction_correctness(
        self,
        expected_grammar: ContextFreeGrammar,
        dataset: List[str] = None,
        words_per_slot=1,
        prune_redundant=True,
        minimal_variables=True,
    ) -> ContextFreeGrammar:
        if dataset is None:
            dataset = expected_grammar.generate_all_string()
        induced_grammar = grammar_induction.induce_grammar_using_template_trees(
            dataset,
            words_per_slot=words_per_slot,
            prune_redundant=prune_redundant,
            minimal_variables=minimal_variables,
        )

        print(induced_grammar)
        # Check if same dataset generation
        self.check_grammar_expansion(induced_grammar, dataset)

        # Check if isomorph grammar
        self.assertTrue(expected_grammar.is_isomorphic_with(induced_grammar))

        # Check that the grammar is representable as string, without exception
        self.assertTrue(len(str(induced_grammar)) > 0)

        return induced_grammar
Ejemplo n.º 3
0
 def setUp(self):
     random.seed(42)
     self.hello_world_small = ContextFreeGrammar.from_string({
         "origin": ["<hello> <world>"],
         "hello": ["hello", "hi", "hey"],
         "world": ["world", "universe", "earth"],
     })
     self.hello_world_full = ContextFreeGrammar.from_string({
         "origin":
         "<hello>, <location>!",
         "hello": ["Hello", "Greetings", "Howdy", "Hey"],
         "location": ["world", "solar system", "galaxy", "universe"],
     })
Ejemplo n.º 4
0
 def setUp(self) -> None:
     random.seed(123)
     self.hello_world_small = ContextFreeGrammar.from_string({
         "origin": ["<hello> <world>"],
         "hello": ["hello", "hi", "hey"],
         "world": ["world", "universe", "earth"],
     })
     self.hello_world_and_world_adjective = ContextFreeGrammar.from_string({
         "origin":
         ["<hello>, <location>!", "The <location> is <adjective>"],
         "hello": ["Hello", "Greetings", "Howdy", "Hey"],
         "location": ["universe", "earth", "world", "solar system"],
         "adjective": ["pretty", "cool", "amazing"],
     })
Ejemplo n.º 5
0
 def test_modifier_removal_small(self):
     # print(re.match(_tracery_slot_modifier, "#a.a#"))
     self.assertEqual(
         "#a#", ContextFreeGrammar.replace_modifier_variables("#a.bla#")
     )
     self.assertEqual(
         "#a#", ContextFreeGrammar.replace_modifier_variables("#a.title#")
     )
     self.assertEqual(
         "#b#", ContextFreeGrammar.replace_modifier_variables("#b.title#")
     )
     self.assertEqual(
         "#blabla#", ContextFreeGrammar.replace_modifier_variables("#blabla.title#")
     )
Ejemplo n.º 6
0
 def test_isomorphic_recursive(self):
     gram1 = ContextFreeGrammar.from_string(
         {"origin": ["<a>", "a <origin>"], "a": ["world"],}
     )
     gram2 = ContextFreeGrammar.from_string(
         {"origin": ["<b>", "a <origin>"], "b": ["world"],}
     )
     conflicting_gram1 = ContextFreeGrammar.from_string(
         {"origin": ["<b>", "a <origin>"], "b": ["earth"],}
     )
     conflicting_gram2 = ContextFreeGrammar.from_string(
         {"origin": ["<b>", "b <origin>"], "b": ["world"],}
     )
     self.check_isomorphism(gram1, gram2)
     self.assertFalse(gram1.is_isomorphic_with(conflicting_gram1))
     self.assertFalse(gram1.is_isomorphic_with(conflicting_gram2))
Ejemplo n.º 7
0
 def test_repeat_2_missing_data(self):
     grammar = ContextFreeGrammar.from_string({
         "origin": [
             "I like <X> and <X>",
             "<X> are not supposed to be in the zoo",
         ],
         "X": ["cats", "dogs", "geese", "bananas"],
     })
     dataset = [
         "I like cats and dogs",
         "I like bananas and geese",
         "I like geese and cats",
         "bananas are not supposed to be in the zoo",
         "geese are not supposed to be in the zoo",
     ]
     induced_grammar = grammar_induction.induce_grammar_using_template_trees(
         dataset,
         words_per_slot=1,
         prune_redundant=True,
         minimal_variables=True,
         max_recalculation=None,
         relative_similarity_threshold=0.01,
     )
     print(induced_grammar)
     self.assertTrue(grammar.is_isomorphic_with(induced_grammar))
     self.assertFalse(induced_grammar.is_recursive())
Ejemplo n.º 8
0
 def test_prohibit_empty_string(self):
     dataset = [
         "I saw her on the quiet hill",
         "I saw her on the tall hill",
         "I saw her on the hill",
         "He likes cute cats",
         "He likes nice cats",
         "He likes cats",
     ]
     expected_grammar = ContextFreeGrammar.from_string({
         "origin": ["I saw her on the <hill>", "He likes <cats>"],
         "hill": ["hill", "<hill_adj> hill"],
         "hill_adj": ["quiet", "tall"],
         "cats": ["cats", "<cat_adj> cats"],
         "cat_adj": ["nice", "cute"],
     })
     induced_grammar = grammar_induction.induce_grammar_using_template_trees(
         dataset,
         words_per_slot=1,
         relative_similarity_threshold=0.1,
         allow_empty_string=False,
         log_tree=log_tree,
     )
     print(induced_grammar)
     self.assertTrue(expected_grammar.is_isomorphic_with(induced_grammar))
Ejemplo n.º 9
0
def induce_grammar_using_template_trees(
    lines: Collection[str],
    relative_similarity_threshold: float = 1,
    minimal_variables: bool = True,
    words_per_slot: int = 1,
    prune_redundant: bool = True,
    max_recalculation: Optional[int] = None,
    use_best_merge_candidate=True,
    max_depth: Optional[int] = None,
):
    # Learn a tree from the given dataset
    learned_tree = TemplateLatticeLearner(
        minimal_variables=minimal_variables,
        words_per_leaf_slot=words_per_slot,
        use_best_merge_candidate=use_best_merge_candidate,
    ).learn(lines)

    # Prune all redundant children: if all other children of parent cover it, the child is not necessary.
    if prune_redundant:
        learned_tree = learned_tree.prune_redundant_abstractions()

    derived_slot_values, simplified_tree = _name_and_simplify_tree(
        learned_tree, relative_similarity_threshold)

    simplified_tree = simplified_tree.collapse_using_slot_values(
        derived_slot_values)

    # Keep recalculating the tree until convergence
    new_tt = None
    iteration = 0
    while simplified_tree != new_tt and (max_recalculation is None
                                         or iteration < max_recalculation):
        if new_tt is not None:
            simplified_tree = new_tt
        new_tt = simplified_tree.recalculate_templates(
            minimal_variables=minimal_variables)
        derived_slot_values, new_tt = _name_and_simplify_tree(
            new_tt, relative_similarity_threshold)
        iteration += 1

    # Collapse final tree using the last slot values
    collapsed_tt = simplified_tree.collapse_using_slot_values(
        derived_slot_values)

    # Limit max depth
    if max_depth is not None:
        collapsed_tt = collapsed_tt.reduce_depth(max_depth)

    # Derive final slot values
    final_slot_values = collapsed_tt.get_slot_values()

    # Create grammar
    grammar = ContextFreeGrammar.from_slot_values(
        collapsed_tt.get_template(),
        final_slot_values,
    )

    return grammar
Ejemplo n.º 10
0
 def test_from_string(self):
     input_dict = {"A": ["<B>, world", "hi"], "B": ["hello"]}
     expected_output = ContextFreeGrammar(
         {
             NamedTemplateSlot("A"): [
                 Template(
                     [
                         NamedTemplateSlot("B"),
                         TemplateString(","),
                         TemplateString("world"),
                     ]
                 ),
                 Template([TemplateString("hi")]),
             ],
             NamedTemplateSlot("B"): [Template([TemplateString("hello")])],
         }
     )
     output = ContextFreeGrammar.from_string(input_dict)
     self.assertEqual(expected_output, output)
Ejemplo n.º 11
0
    def setUp(self):
        random.seed(123)
        self.simple = ContextFreeGrammar.from_string(
            {"origin": ["expands only to one texts"]}
        )
        self.hello_world = ContextFreeGrammar.from_string(
            {
                "origin": ["<hello> <world>"],
                "hello": ["hello", "hi", "hey"],
                "world": ["world", "universe"],
            }
        )

        self.hello_world_single_a = ContextFreeGrammar.from_string(
            {"origin": ["<hello> <world>"], "hello": ["hello"], "world": ["world"]}
        )
        self.hello_world_single_b = ContextFreeGrammar.from_string(
            {"origin": ["<a> <b>"], "a": ["hello"], "b": ["world"]}
        )
Ejemplo n.º 12
0
 def test_slot_repeat(self):
     grammar = self.check_grammar_induction_correctness(
         ContextFreeGrammar.from_string({
             "origin": ["<a> <a>"],
             "a": ["1", "2", "3"],
         }),
         words_per_slot=1,
         minimal_variables=False,
     )
     print(grammar)
Ejemplo n.º 13
0
 def test_not_isomorphic_same_keys(self):
     gram1 = ContextFreeGrammar.from_string(
         {
             "origin": ["<a> <world>"],
             "a": ["<hello>"],
             "hello": ["hello", "hi", "hey"],
             "world": ["world", "universe"],
         }
     )
     gram2 = ContextFreeGrammar.from_string(
         {
             "origin": ["<a> <world>"],
             "a": ["<hello>"],
             "hello": ["a", "b", "c"],
             "world": ["d", "e"],
         }
     )
     self.assertFalse(gram1.is_isomorphic_with(gram2))
     self.assertFalse(gram2.is_isomorphic_with(gram1))
Ejemplo n.º 14
0
    def test_isomorphic_advanced(self):
        gram1 = ContextFreeGrammar.from_string(
            {
                # "origin": ["<a>", "<b>", "<c>", "<d>", "<e>", "<f>", "<g>"],
                "origin": ["<a>", "<b>", "<c>", "<d>", "<e>"],
                "a": ["<hello>"],
                "b": ["<world>"],
                "c": ["<world>"],
                "d": ["<world>", "<hello>"],
                "e": ["<hello> <world>", "<hello>"],
                "f": ["<a>", "<hello>"],
                "g": ["<a>"],
                "hello": ["hello", "hi", "hey"],
                "world": ["world", "universe"],
            }
        )
        gram2 = ContextFreeGrammar.from_string(
            {
                # "origin": ["<1>", "<2>", "<3>", "<4>", "<5>", "<6>", "<7>"],
                "origin": ["<1>", "<2>", "<3>", "<4>", "<5>"],
                "1": ["<h>"],
                "2": ["<w>"],
                "3": ["<w>"],
                "4": ["<w>", "<h>"],
                "5": ["<h> <w>", "<h>"],
                "6": ["<1>", "<h>"],
                "7": ["<1>"],
                "h": ["hello", "hi", "hey"],
                "w": ["world", "universe"],
            }
        )
        # Test with self
        self.assertTrue(gram1.is_isomorphic_with(gram1))
        self.assertTrue(gram2.is_isomorphic_with(gram2))

        # Test with other
        self.assertTrue(gram1.is_isomorphic_with(gram2))
        self.assertTrue(gram2.is_isomorphic_with(gram1))

        # Check non isomorphic
        self.assertFalse(gram1.is_isomorphic_with(self.hello_world))
        self.assertFalse(gram2.is_isomorphic_with(self.hello_world))
Ejemplo n.º 15
0
 def test_get_depth(self):
     self.assertEqual(1, self.simple.get_depth())
     self.assertEqual(2, self.hello_world.get_depth())
     self.assertEqual(2, self.hello_world_single_a.get_depth())
     self.assertEqual(2, self.hello_world_single_b.get_depth())
     self.assertEqual(
         4,
         ContextFreeGrammar.from_string(
             {"origin": ["<A>"], "A": ["<B>"], "B": ["<C>"], "C": ["hi"],}
         ).get_depth(),
     )
Ejemplo n.º 16
0
    def test_isomorphic_multiple_nt_refs(self):
        gram1 = ContextFreeGrammar.from_string(
            {
                "origin": ["<a> world", "<c> world", "<b> <world>"],
                "a": ["<hello>"],
                "c": ["<hello>"],
                "b": ["<world>", "<hello>"],
                "hello": ["hello", "hi", "hey", "<world>"],
                "world": ["world", "universe"],
            }
        )
        gram2 = ContextFreeGrammar.from_string(
            {
                "origin": ["<1> world", "<3> world", "<2> <w>"],
                "3": ["<h>"],
                "1": ["<h>"],
                "2": ["<w>", "<h>"],
                "h": ["hello", "hi", "hey", "<w>"],
                "w": ["world", "universe"],
            }
        )
        conflicting_gram = ContextFreeGrammar.from_string(
            {
                "origin": ["<1> world", "<1> world", "<2> <w>"],
                "1": ["<h>"],
                "2": ["<w>", "<h>"],
                "h": ["hello", "hi", "hey", "<w>"],
                "w": ["world", "universe"],
            }
        )
        # Test with self
        self.assertTrue(gram1.is_isomorphic_with(gram1))
        self.check_isomorphism(gram1, gram2)

        # Test not isomorphic with others
        self.assertFalse(gram1.is_isomorphic_with(conflicting_gram))
        self.assertFalse(gram2.is_isomorphic_with(conflicting_gram))
        self.assertFalse(gram1.is_isomorphic_with(self.hello_world))
        self.assertFalse(gram2.is_isomorphic_with(self.hello_world))
Ejemplo n.º 17
0
    def test_isomorphic_nested(self):
        gram1 = ContextFreeGrammar.from_string(
            {
                "origin": ["<a> <world>"],
                "a": ["<hello>"],
                "hello": ["hello", "hi", "hey"],
                "world": ["world", "universe"],
            }
        )
        gram2 = ContextFreeGrammar.from_string(
            {
                "origin": ["<b> <w>"],
                "b": ["<c>"],
                "c": ["hello", "hi", "hey"],
                "w": ["world", "universe"],
            }
        )

        # Test with self
        self.check_isomorphism(gram1, gram2)

        self.assertFalse(gram1.is_isomorphic_with(self.hello_world_single_a))
Ejemplo n.º 18
0
 def test_hello_world_multiple_origin_options(self):
     # Check if grammar generates same dataset
     grammar = self.check_grammar_induction_correctness(
         ContextFreeGrammar.from_string({
             "origin":
             ["<hello>, <location>!", "The <location> is <adjective>"],
             "hello": ["Hello", "Greetings", "Howdy", "Hey"],
             "location": ["world", "universe", "earth"],
             "adjective": ["pretty", "cool", "awesome"],
         }),
         words_per_slot=1,
     )
     print(grammar)
Ejemplo n.º 19
0
 def test_hello_world_multiple_deep(self):
     # Check if grammar generates same dataset
     grammar = self.check_grammar_induction_correctness(
         ContextFreeGrammar.from_string({
             "origin": ["<a>, <b>!"],
             "a": ["1", "2", "3"],
             "b": ["4", "5", "6", "- <c>"],
             "c": ["7", "8", "9"],
         }),
         words_per_slot=2,
         minimal_variables=True,
     )
     print(grammar)
     self.assertFalse(grammar.is_recursive())
Ejemplo n.º 20
0
 def test_isomorphic_advanced_self(self):
     gram1 = ContextFreeGrammar.from_string(
         {
             # "origin": ["<a>",  "<b>", "<c>", "<d>", "<e>", "<f>", "<g>"],
             "origin": ["<a>", "<b>", "<c>", "<d>", "<e>"],
             "a": ["<hello>"],
             "b": ["<world>"],
             "c": ["<world>"],
             "d": ["<hello>", "<world>"],
             "e": ["<hello> <world>", "<hello>"],
             "f": ["<a>", "<hello>"],
             "g": ["<a>"],
             "hello": ["hello", "hi", "hey"],
             "world": ["world", "universe"],
         }
     )
     self.assertTrue(gram1.is_isomorphic_with(gram1))
Ejemplo n.º 21
0
 def test_no_recursion(self):
     grammar = ContextFreeGrammar.from_string({
         "origin": ["<hello> <location>!", "<hello> there <hello> kid"],
         "hello": ["hello", "greetings"],
         "location": ["world", "earth"],
     })
     dataset = [t.to_flat_string() for t in grammar.generate_all()]
     dataset.sort()
     induced_grammar = grammar_induction.induce_grammar_using_template_trees(
         dataset,
         words_per_slot=1,
         prune_redundant=True,
         minimal_variables=True,
         max_recalculation=None,
     )
     print(induced_grammar)
     self.assertFalse(induced_grammar.is_recursive())
Ejemplo n.º 22
0
 def test_repeat_2(self):
     grammar = ContextFreeGrammar.from_string({
         "origin": [
             "I really like <X> and <X>",
             "<X> are not supposed to be in the zoo",
         ],
         "X": ["cats", "dogs", "geese", "bananas"],
     })
     dataset = [t.to_flat_string() for t in grammar.generate_all()]
     dataset.sort()
     induced_grammar = grammar_induction.induce_grammar_using_template_trees(
         dataset,
         words_per_slot=1,
         prune_redundant=True,
         minimal_variables=True,
         max_recalculation=None,
     )
     print(induced_grammar)
     self.assertTrue(grammar.is_isomorphic_with(induced_grammar))
     self.assertFalse(induced_grammar.is_recursive())
Ejemplo n.º 23
0
 def test_from_text_notebook_example(self):
     dataset = [
         "I like my cat and my dog",
         "I like my dog and my chicken",
         "Alice the cat is jumping",
         "Bob the dog is walking",
         "Cathy the cat is walking",
     ]
     expected_grammar = ContextFreeGrammar.from_string({
         "origin": ["<G> the <C> is <D>", "I like my <C> and my <C>"],
         "C": ["cat", "chicken", "dog"],
         "G": ["Alice", "Bob", "Cathy"],
         "D": ["jumping", "walking"],
     })
     induced_grammar = grammar_induction.induce_grammar_using_template_trees(
         dataset,
         words_per_slot=1,
         relative_similarity_threshold=0.1,
     )
     self.assertTrue(expected_grammar.is_isomorphic_with(induced_grammar))
Ejemplo n.º 24
0
 def test_not_joining_empty_string(self):
     dataset = [
         "I saw him on the quiet hill",
         "I saw her on the tall hill",
         "I saw her on the hill",
         "He likes cute cats",
         "He likes nice cats",
         "He likes cats",
     ]
     expected_grammar = ContextFreeGrammar.from_string({
         "origin":
         ["I saw <him> on the <adj> hill", "He likes <adj2> cats"],
         "him": ["her", "him"],
         "adj": ["", "quiet", "tall"],
         "adj2": ["", "nice", "cute"],
     })
     induced_grammar = grammar_induction.induce_grammar_using_template_trees(
         dataset,
         words_per_slot=1,
         relative_similarity_threshold=0.1,
     )
     print(induced_grammar)
     self.assertTrue(expected_grammar.is_isomorphic_with(induced_grammar))
Ejemplo n.º 25
0
 def test_generate_same_name(self):
     hello_world_single = ContextFreeGrammar.from_string(
         {"origin": ["I like <X> and <X>"], "X": ["cats", "dogs", "pandas"],}
     )
     possibilities = {
         "I like cats and cats",
         "I like cats and dogs",
         "I like cats and pandas",
         "I like dogs and cats",
         "I like dogs and dogs",
         "I like dogs and pandas",
         "I like pandas and cats",
         "I like pandas and dogs",
         "I like pandas and pandas",
     }
     for i in range(100):
         self.assertTrue(
             hello_world_single.generate().to_flat_string() in possibilities
         )
     self.assertEqual(
         possibilities,
         {g.to_flat_string() for g in hello_world_single.generate_all()},
     )
Ejemplo n.º 26
0
 def test_generate_flat(self):
     hello_world_single = ContextFreeGrammar.from_string(
         {"origin": ["<hello> <world>"], "hello": ["hello"], "world": ["world"],}
     )
     self.assertEqual("hello world", hello_world_single.generate().to_flat_string())
Ejemplo n.º 27
0
 def check_grammar_expansion(self, grammar: ContextFreeGrammar,
                             expected_expansion: Collection[str]):
     """ Check that grammar indeed generates the dataset it learned from """
     generated_dataset = grammar.generate_all_string()
     self.assertEqual(set(expected_expansion), set(generated_dataset))
Ejemplo n.º 28
0
 def test_isomorphic_repeat(self):
     gram1 = ContextFreeGrammar.from_string(
         {"origin": ["<a>", "<a>", "<b>"], "a": ["<b>"], "b": ["world"],}
     )
     self.assertTrue(gram1.is_isomorphic_with(gram1))