Python ContextFreeGrammar 예제들, gitta.context_free_grammar.ContextFreeGrammar Python 예제들

예제 #1

0

파일 보기

파일: test_context_free_grammar.py 프로젝트: twinters/gitta

    def test_isomorphic_multiple_possibilities_simple(self):
        gram1 = ContextFreeGrammar.from_string(
            {
                "origin": ["<a> world", "<b> world"],
                "a": ["<hello>"],
                "b": ["<world>"],
                "hello": ["hello", "hi", "hey"],
                "world": ["world", "universe"],
            }
        )
        gram2 = ContextFreeGrammar.from_string(
            {
                "origin": ["<1> world", "<2> world"],
                "1": ["<h>"],
                "2": ["<w>"],
                "h": ["hello", "hi", "hey"],
                "w": ["world", "universe"],
            }
        )
        gram3 = ContextFreeGrammar.from_string(
            {
                "origin": ["<1> world", "<2> world"],
                "1": ["<w>"],
                "2": ["<h>"],
                "w": ["world", "universe"],
                "h": ["hello", "hi", "hey"],
            }
        )
        # Test with self
        self.check_isomorphism(gram1, gram2, gram3)

        # Test not isomorphic with others
        self.assertFalse(gram1.is_isomorphic_with(self.hello_world))
        self.assertFalse(gram2.is_isomorphic_with(self.hello_world))
        self.assertFalse(gram3.is_isomorphic_with(self.hello_world))

예제 #2

0

파일 보기

파일: test_grammar_induction.py 프로젝트: twinters/gitta

    def check_grammar_induction_correctness(
        self,
        expected_grammar: ContextFreeGrammar,
        dataset: List[str] = None,
        words_per_slot=1,
        prune_redundant=True,
        minimal_variables=True,
    ) -> ContextFreeGrammar:
        if dataset is None:
            dataset = expected_grammar.generate_all_string()
        induced_grammar = grammar_induction.induce_grammar_using_template_trees(
            dataset,
            words_per_slot=words_per_slot,
            prune_redundant=prune_redundant,
            minimal_variables=minimal_variables,
        )

        print(induced_grammar)
        # Check if same dataset generation
        self.check_grammar_expansion(induced_grammar, dataset)

        # Check if isomorph grammar
        self.assertTrue(expected_grammar.is_isomorphic_with(induced_grammar))

        # Check that the grammar is representable as string, without exception
        self.assertTrue(len(str(induced_grammar)) > 0)

        return induced_grammar

예제 #3

0

파일 보기

파일: test_grammar_induction.py 프로젝트: twinters/gitta

 def setUp(self):
     random.seed(42)
     self.hello_world_small = ContextFreeGrammar.from_string({
         "origin": ["<hello> <world>"],
         "hello": ["hello", "hi", "hey"],
         "world": ["world", "universe", "earth"],
     })
     self.hello_world_full = ContextFreeGrammar.from_string({
         "origin":
         "<hello>, <location>!",
         "hello": ["Hello", "Greetings", "Howdy", "Hey"],
         "location": ["world", "solar system", "galaxy", "universe"],
     })

예제 #4

0

파일 보기

 def setUp(self) -> None:
     random.seed(123)
     self.hello_world_small = ContextFreeGrammar.from_string({
         "origin": ["<hello> <world>"],
         "hello": ["hello", "hi", "hey"],
         "world": ["world", "universe", "earth"],
     })
     self.hello_world_and_world_adjective = ContextFreeGrammar.from_string({
         "origin":
         ["<hello>, <location>!", "The <location> is <adjective>"],
         "hello": ["Hello", "Greetings", "Howdy", "Hey"],
         "location": ["universe", "earth", "world", "solar system"],
         "adjective": ["pretty", "cool", "amazing"],
     })

예제 #5

0

파일 보기

파일: test_context_free_grammar.py 프로젝트: twinters/gitta

 def test_modifier_removal_small(self):
     # print(re.match(_tracery_slot_modifier, "#a.a#"))
     self.assertEqual(
         "#a#", ContextFreeGrammar.replace_modifier_variables("#a.bla#")
     )
     self.assertEqual(
         "#a#", ContextFreeGrammar.replace_modifier_variables("#a.title#")
     )
     self.assertEqual(
         "#b#", ContextFreeGrammar.replace_modifier_variables("#b.title#")
     )
     self.assertEqual(
         "#blabla#", ContextFreeGrammar.replace_modifier_variables("#blabla.title#")
     )

예제 #6

0

파일 보기

파일: test_context_free_grammar.py 프로젝트: twinters/gitta

 def test_isomorphic_recursive(self):
     gram1 = ContextFreeGrammar.from_string(
         {"origin": ["<a>", "a <origin>"], "a": ["world"],}
     )
     gram2 = ContextFreeGrammar.from_string(
         {"origin": ["<b>", "a <origin>"], "b": ["world"],}
     )
     conflicting_gram1 = ContextFreeGrammar.from_string(
         {"origin": ["<b>", "a <origin>"], "b": ["earth"],}
     )
     conflicting_gram2 = ContextFreeGrammar.from_string(
         {"origin": ["<b>", "b <origin>"], "b": ["world"],}
     )
     self.check_isomorphism(gram1, gram2)
     self.assertFalse(gram1.is_isomorphic_with(conflicting_gram1))
     self.assertFalse(gram1.is_isomorphic_with(conflicting_gram2))

예제 #7

0

파일 보기

파일: test_grammar_induction.py 프로젝트: twinters/gitta

 def test_repeat_2_missing_data(self):
     grammar = ContextFreeGrammar.from_string({
         "origin": [
             "I like <X> and <X>",
             "<X> are not supposed to be in the zoo",
         ],
         "X": ["cats", "dogs", "geese", "bananas"],
     })
     dataset = [
         "I like cats and dogs",
         "I like bananas and geese",
         "I like geese and cats",
         "bananas are not supposed to be in the zoo",
         "geese are not supposed to be in the zoo",
     ]
     induced_grammar = grammar_induction.induce_grammar_using_template_trees(
         dataset,
         words_per_slot=1,
         prune_redundant=True,
         minimal_variables=True,
         max_recalculation=None,
         relative_similarity_threshold=0.01,
     )
     print(induced_grammar)
     self.assertTrue(grammar.is_isomorphic_with(induced_grammar))
     self.assertFalse(induced_grammar.is_recursive())

예제 #8

0

파일 보기

파일: test_grammar_induction.py 프로젝트: twinters/gitta

 def test_prohibit_empty_string(self):
     dataset = [
         "I saw her on the quiet hill",
         "I saw her on the tall hill",
         "I saw her on the hill",
         "He likes cute cats",
         "He likes nice cats",
         "He likes cats",
     ]
     expected_grammar = ContextFreeGrammar.from_string({
         "origin": ["I saw her on the <hill>", "He likes <cats>"],
         "hill": ["hill", "<hill_adj> hill"],
         "hill_adj": ["quiet", "tall"],
         "cats": ["cats", "<cat_adj> cats"],
         "cat_adj": ["nice", "cute"],
     })
     induced_grammar = grammar_induction.induce_grammar_using_template_trees(
         dataset,
         words_per_slot=1,
         relative_similarity_threshold=0.1,
         allow_empty_string=False,
         log_tree=log_tree,
     )
     print(induced_grammar)
     self.assertTrue(expected_grammar.is_isomorphic_with(induced_grammar))

예제 #9

0

파일 보기

파일: grammar_induction.py 프로젝트: herrmann/gitta

def induce_grammar_using_template_trees(
    lines: Collection[str],
    relative_similarity_threshold: float = 1,
    minimal_variables: bool = True,
    words_per_slot: int = 1,
    prune_redundant: bool = True,
    max_recalculation: Optional[int] = None,
    use_best_merge_candidate=True,
    max_depth: Optional[int] = None,
):
    # Learn a tree from the given dataset
    learned_tree = TemplateLatticeLearner(
        minimal_variables=minimal_variables,
        words_per_leaf_slot=words_per_slot,
        use_best_merge_candidate=use_best_merge_candidate,
    ).learn(lines)

    # Prune all redundant children: if all other children of parent cover it, the child is not necessary.
    if prune_redundant:
        learned_tree = learned_tree.prune_redundant_abstractions()

    derived_slot_values, simplified_tree = _name_and_simplify_tree(
        learned_tree, relative_similarity_threshold)

    simplified_tree = simplified_tree.collapse_using_slot_values(
        derived_slot_values)

    # Keep recalculating the tree until convergence
    new_tt = None
    iteration = 0
    while simplified_tree != new_tt and (max_recalculation is None
                                         or iteration < max_recalculation):
        if new_tt is not None:
            simplified_tree = new_tt
        new_tt = simplified_tree.recalculate_templates(
            minimal_variables=minimal_variables)
        derived_slot_values, new_tt = _name_and_simplify_tree(
            new_tt, relative_similarity_threshold)
        iteration += 1

    # Collapse final tree using the last slot values
    collapsed_tt = simplified_tree.collapse_using_slot_values(
        derived_slot_values)

    # Limit max depth
    if max_depth is not None:
        collapsed_tt = collapsed_tt.reduce_depth(max_depth)

    # Derive final slot values
    final_slot_values = collapsed_tt.get_slot_values()

    # Create grammar
    grammar = ContextFreeGrammar.from_slot_values(
        collapsed_tt.get_template(),
        final_slot_values,
    )

    return grammar

예제 #10

0

파일 보기

파일: test_context_free_grammar.py 프로젝트: twinters/gitta

 def test_from_string(self):
     input_dict = {"A": ["<B>, world", "hi"], "B": ["hello"]}
     expected_output = ContextFreeGrammar(
         {
             NamedTemplateSlot("A"): [
                 Template(
                     [
                         NamedTemplateSlot("B"),
                         TemplateString(","),
                         TemplateString("world"),
                     ]
                 ),
                 Template([TemplateString("hi")]),
             ],
             NamedTemplateSlot("B"): [Template([TemplateString("hello")])],
         }
     )
     output = ContextFreeGrammar.from_string(input_dict)
     self.assertEqual(expected_output, output)

예제 #11

0

파일 보기

파일: test_context_free_grammar.py 프로젝트: twinters/gitta

    def setUp(self):
        random.seed(123)
        self.simple = ContextFreeGrammar.from_string(
            {"origin": ["expands only to one texts"]}
        )
        self.hello_world = ContextFreeGrammar.from_string(
            {
                "origin": ["<hello> <world>"],
                "hello": ["hello", "hi", "hey"],
                "world": ["world", "universe"],
            }
        )

        self.hello_world_single_a = ContextFreeGrammar.from_string(
            {"origin": ["<hello> <world>"], "hello": ["hello"], "world": ["world"]}
        )
        self.hello_world_single_b = ContextFreeGrammar.from_string(
            {"origin": ["<a> <b>"], "a": ["hello"], "b": ["world"]}
        )

예제 #12

0

파일 보기

파일: test_grammar_induction.py 프로젝트: twinters/gitta

 def test_slot_repeat(self):
     grammar = self.check_grammar_induction_correctness(
         ContextFreeGrammar.from_string({
             "origin": ["<a> <a>"],
             "a": ["1", "2", "3"],
         }),
         words_per_slot=1,
         minimal_variables=False,
     )
     print(grammar)

예제 #13

0

파일 보기

파일: test_context_free_grammar.py 프로젝트: twinters/gitta

 def test_not_isomorphic_same_keys(self):
     gram1 = ContextFreeGrammar.from_string(
         {
             "origin": ["<a> <world>"],
             "a": ["<hello>"],
             "hello": ["hello", "hi", "hey"],
             "world": ["world", "universe"],
         }
     )
     gram2 = ContextFreeGrammar.from_string(
         {
             "origin": ["<a> <world>"],
             "a": ["<hello>"],
             "hello": ["a", "b", "c"],
             "world": ["d", "e"],
         }
     )
     self.assertFalse(gram1.is_isomorphic_with(gram2))
     self.assertFalse(gram2.is_isomorphic_with(gram1))

예제 #14

0

파일 보기

파일: test_context_free_grammar.py 프로젝트: twinters/gitta

    def test_isomorphic_advanced(self):
        gram1 = ContextFreeGrammar.from_string(
            {
                # "origin": ["<a>", "<b>", "<c>", "<d>", "<e>", "<f>", "<g>"],
                "origin": ["<a>", "<b>", "<c>", "<d>", "<e>"],
                "a": ["<hello>"],
                "b": ["<world>"],
                "c": ["<world>"],
                "d": ["<world>", "<hello>"],
                "e": ["<hello> <world>", "<hello>"],
                "f": ["<a>", "<hello>"],
                "g": ["<a>"],
                "hello": ["hello", "hi", "hey"],
                "world": ["world", "universe"],
            }
        )
        gram2 = ContextFreeGrammar.from_string(
            {
                # "origin": ["<1>", "<2>", "<3>", "<4>", "<5>", "<6>", "<7>"],
                "origin": ["<1>", "<2>", "<3>", "<4>", "<5>"],
                "1": ["<h>"],
                "2": ["<w>"],
                "3": ["<w>"],
                "4": ["<w>", "<h>"],
                "5": ["<h> <w>", "<h>"],
                "6": ["<1>", "<h>"],
                "7": ["<1>"],
                "h": ["hello", "hi", "hey"],
                "w": ["world", "universe"],
            }
        )
        # Test with self
        self.assertTrue(gram1.is_isomorphic_with(gram1))
        self.assertTrue(gram2.is_isomorphic_with(gram2))

        # Test with other
        self.assertTrue(gram1.is_isomorphic_with(gram2))
        self.assertTrue(gram2.is_isomorphic_with(gram1))

        # Check non isomorphic
        self.assertFalse(gram1.is_isomorphic_with(self.hello_world))
        self.assertFalse(gram2.is_isomorphic_with(self.hello_world))

예제 #15

0

파일 보기

파일: test_context_free_grammar.py 프로젝트: twinters/gitta

 def test_get_depth(self):
     self.assertEqual(1, self.simple.get_depth())
     self.assertEqual(2, self.hello_world.get_depth())
     self.assertEqual(2, self.hello_world_single_a.get_depth())
     self.assertEqual(2, self.hello_world_single_b.get_depth())
     self.assertEqual(
         4,
         ContextFreeGrammar.from_string(
             {"origin": ["<A>"], "A": ["<B>"], "B": ["<C>"], "C": ["hi"],}
         ).get_depth(),
     )

예제 #16

0

파일 보기

파일: test_context_free_grammar.py 프로젝트: twinters/gitta

    def test_isomorphic_multiple_nt_refs(self):
        gram1 = ContextFreeGrammar.from_string(
            {
                "origin": ["<a> world", "<c> world", "<b> <world>"],
                "a": ["<hello>"],
                "c": ["<hello>"],
                "b": ["<world>", "<hello>"],
                "hello": ["hello", "hi", "hey", "<world>"],
                "world": ["world", "universe"],
            }
        )
        gram2 = ContextFreeGrammar.from_string(
            {
                "origin": ["<1> world", "<3> world", "<2> <w>"],
                "3": ["<h>"],
                "1": ["<h>"],
                "2": ["<w>", "<h>"],
                "h": ["hello", "hi", "hey", "<w>"],
                "w": ["world", "universe"],
            }
        )
        conflicting_gram = ContextFreeGrammar.from_string(
            {
                "origin": ["<1> world", "<1> world", "<2> <w>"],
                "1": ["<h>"],
                "2": ["<w>", "<h>"],
                "h": ["hello", "hi", "hey", "<w>"],
                "w": ["world", "universe"],
            }
        )
        # Test with self
        self.assertTrue(gram1.is_isomorphic_with(gram1))
        self.check_isomorphism(gram1, gram2)

        # Test not isomorphic with others
        self.assertFalse(gram1.is_isomorphic_with(conflicting_gram))
        self.assertFalse(gram2.is_isomorphic_with(conflicting_gram))
        self.assertFalse(gram1.is_isomorphic_with(self.hello_world))
        self.assertFalse(gram2.is_isomorphic_with(self.hello_world))

예제 #17

0

파일 보기

파일: test_context_free_grammar.py 프로젝트: twinters/gitta

    def test_isomorphic_nested(self):
        gram1 = ContextFreeGrammar.from_string(
            {
                "origin": ["<a> <world>"],
                "a": ["<hello>"],
                "hello": ["hello", "hi", "hey"],
                "world": ["world", "universe"],
            }
        )
        gram2 = ContextFreeGrammar.from_string(
            {
                "origin": ["<b> <w>"],
                "b": ["<c>"],
                "c": ["hello", "hi", "hey"],
                "w": ["world", "universe"],
            }
        )

        # Test with self
        self.check_isomorphism(gram1, gram2)

        self.assertFalse(gram1.is_isomorphic_with(self.hello_world_single_a))

예제 #18

0

파일 보기

파일: test_grammar_induction.py 프로젝트: twinters/gitta

 def test_hello_world_multiple_origin_options(self):
     # Check if grammar generates same dataset
     grammar = self.check_grammar_induction_correctness(
         ContextFreeGrammar.from_string({
             "origin":
             ["<hello>, <location>!", "The <location> is <adjective>"],
             "hello": ["Hello", "Greetings", "Howdy", "Hey"],
             "location": ["world", "universe", "earth"],
             "adjective": ["pretty", "cool", "awesome"],
         }),
         words_per_slot=1,
     )
     print(grammar)

예제 #19

0

파일 보기

파일: test_grammar_induction.py 프로젝트: twinters/gitta

 def test_hello_world_multiple_deep(self):
     # Check if grammar generates same dataset
     grammar = self.check_grammar_induction_correctness(
         ContextFreeGrammar.from_string({
             "origin": ["<a>, <b>!"],
             "a": ["1", "2", "3"],
             "b": ["4", "5", "6", "- <c>"],
             "c": ["7", "8", "9"],
         }),
         words_per_slot=2,
         minimal_variables=True,
     )
     print(grammar)
     self.assertFalse(grammar.is_recursive())

예제 #20

0

파일 보기

파일: test_context_free_grammar.py 프로젝트: twinters/gitta

 def test_isomorphic_advanced_self(self):
     gram1 = ContextFreeGrammar.from_string(
         {
             # "origin": ["<a>",  "<b>", "<c>", "<d>", "<e>", "<f>", "<g>"],
             "origin": ["<a>", "<b>", "<c>", "<d>", "<e>"],
             "a": ["<hello>"],
             "b": ["<world>"],
             "c": ["<world>"],
             "d": ["<hello>", "<world>"],
             "e": ["<hello> <world>", "<hello>"],
             "f": ["<a>", "<hello>"],
             "g": ["<a>"],
             "hello": ["hello", "hi", "hey"],
             "world": ["world", "universe"],
         }
     )
     self.assertTrue(gram1.is_isomorphic_with(gram1))

예제 #21

0

파일 보기

파일: test_grammar_induction.py 프로젝트: twinters/gitta

 def test_no_recursion(self):
     grammar = ContextFreeGrammar.from_string({
         "origin": ["<hello> <location>!", "<hello> there <hello> kid"],
         "hello": ["hello", "greetings"],
         "location": ["world", "earth"],
     })
     dataset = [t.to_flat_string() for t in grammar.generate_all()]
     dataset.sort()
     induced_grammar = grammar_induction.induce_grammar_using_template_trees(
         dataset,
         words_per_slot=1,
         prune_redundant=True,
         minimal_variables=True,
         max_recalculation=None,
     )
     print(induced_grammar)
     self.assertFalse(induced_grammar.is_recursive())

예제 #22

0

파일 보기

파일: test_grammar_induction.py 프로젝트: twinters/gitta

 def test_repeat_2(self):
     grammar = ContextFreeGrammar.from_string({
         "origin": [
             "I really like <X> and <X>",
             "<X> are not supposed to be in the zoo",
         ],
         "X": ["cats", "dogs", "geese", "bananas"],
     })
     dataset = [t.to_flat_string() for t in grammar.generate_all()]
     dataset.sort()
     induced_grammar = grammar_induction.induce_grammar_using_template_trees(
         dataset,
         words_per_slot=1,
         prune_redundant=True,
         minimal_variables=True,
         max_recalculation=None,
     )
     print(induced_grammar)
     self.assertTrue(grammar.is_isomorphic_with(induced_grammar))
     self.assertFalse(induced_grammar.is_recursive())

예제 #23

0

파일 보기

파일: test_grammar_induction.py 프로젝트: twinters/gitta

 def test_from_text_notebook_example(self):
     dataset = [
         "I like my cat and my dog",
         "I like my dog and my chicken",
         "Alice the cat is jumping",
         "Bob the dog is walking",
         "Cathy the cat is walking",
     ]
     expected_grammar = ContextFreeGrammar.from_string({
         "origin": ["<G> the <C> is <D>", "I like my <C> and my <C>"],
         "C": ["cat", "chicken", "dog"],
         "G": ["Alice", "Bob", "Cathy"],
         "D": ["jumping", "walking"],
     })
     induced_grammar = grammar_induction.induce_grammar_using_template_trees(
         dataset,
         words_per_slot=1,
         relative_similarity_threshold=0.1,
     )
     self.assertTrue(expected_grammar.is_isomorphic_with(induced_grammar))

예제 #24

0

파일 보기

파일: test_grammar_induction.py 프로젝트: twinters/gitta

 def test_not_joining_empty_string(self):
     dataset = [
         "I saw him on the quiet hill",
         "I saw her on the tall hill",
         "I saw her on the hill",
         "He likes cute cats",
         "He likes nice cats",
         "He likes cats",
     ]
     expected_grammar = ContextFreeGrammar.from_string({
         "origin":
         ["I saw <him> on the <adj> hill", "He likes <adj2> cats"],
         "him": ["her", "him"],
         "adj": ["", "quiet", "tall"],
         "adj2": ["", "nice", "cute"],
     })
     induced_grammar = grammar_induction.induce_grammar_using_template_trees(
         dataset,
         words_per_slot=1,
         relative_similarity_threshold=0.1,
     )
     print(induced_grammar)
     self.assertTrue(expected_grammar.is_isomorphic_with(induced_grammar))

예제 #25

0

파일 보기

파일: test_context_free_grammar.py 프로젝트: twinters/gitta

 def test_generate_same_name(self):
     hello_world_single = ContextFreeGrammar.from_string(
         {"origin": ["I like <X> and <X>"], "X": ["cats", "dogs", "pandas"],}
     )
     possibilities = {
         "I like cats and cats",
         "I like cats and dogs",
         "I like cats and pandas",
         "I like dogs and cats",
         "I like dogs and dogs",
         "I like dogs and pandas",
         "I like pandas and cats",
         "I like pandas and dogs",
         "I like pandas and pandas",
     }
     for i in range(100):
         self.assertTrue(
             hello_world_single.generate().to_flat_string() in possibilities
         )
     self.assertEqual(
         possibilities,
         {g.to_flat_string() for g in hello_world_single.generate_all()},
     )

예제 #26

0

파일 보기

파일: test_context_free_grammar.py 프로젝트: twinters/gitta

 def test_generate_flat(self):
     hello_world_single = ContextFreeGrammar.from_string(
         {"origin": ["<hello> <world>"], "hello": ["hello"], "world": ["world"],}
     )
     self.assertEqual("hello world", hello_world_single.generate().to_flat_string())

예제 #27

0

파일 보기

파일: test_grammar_induction.py 프로젝트: twinters/gitta

 def check_grammar_expansion(self, grammar: ContextFreeGrammar,
                             expected_expansion: Collection[str]):
     """ Check that grammar indeed generates the dataset it learned from """
     generated_dataset = grammar.generate_all_string()
     self.assertEqual(set(expected_expansion), set(generated_dataset))

예제 #28

0

파일 보기

파일: test_context_free_grammar.py 프로젝트: twinters/gitta

 def test_isomorphic_repeat(self):
     gram1 = ContextFreeGrammar.from_string(
         {"origin": ["<a>", "<a>", "<b>"], "a": ["<b>"], "b": ["world"],}
     )
     self.assertTrue(gram1.is_isomorphic_with(gram1))