def test_named_slot_parsing(self): original_string = "a <A> c d <B>" template = Template.from_string(original_string) self.assertEqual( Template([ TemplateString("a"), NamedTemplateSlot("A"), TemplateString("c"), TemplateString("d"), NamedTemplateSlot("B"), ]), template, )
def test_get_possible_isomorphic_nt_replacements(self): self.assertEqual( { SlotReplacements( {NamedTemplateSlot("origin"): NamedTemplateSlot("origin")} ) }, set(self.simple.get_isomorphic_replacements(self.simple)), ) self.assertEqual( { SlotReplacements( { NamedTemplateSlot("origin"): NamedTemplateSlot("origin"), NamedTemplateSlot("hello"): NamedTemplateSlot("a"), NamedTemplateSlot("world"): NamedTemplateSlot("b"), } ) }, set( self.hello_world_single_a.get_isomorphic_replacements( self.hello_world_single_b ) ), )
def setUp(self) -> None: random.seed(123) self.a = TemplateString("a") self.b = TemplateString("b") self.c = TemplateString("c") self.slot1 = TemplateSlot() self.slot2 = TemplateSlot() self.slot_x = NamedTemplateSlot("x") self.slot_y = NamedTemplateSlot("y") self.slot_z = NamedTemplateSlot("z") self.at = Template([self.a]) self.bt = Template([self.b]) self.ct = Template([self.c])
def test_from_string(self): input_dict = {"A": ["<B>, world", "hi"], "B": ["hello"]} expected_output = ContextFreeGrammar( { NamedTemplateSlot("A"): [ Template( [ NamedTemplateSlot("B"), TemplateString(","), TemplateString("world"), ] ), Template([TemplateString("hi")]), ], NamedTemplateSlot("B"): [Template([TemplateString("hello")])], } ) output = ContextFreeGrammar.from_string(input_dict) self.assertEqual(expected_output, output)
def setUp(self) -> None: random.seed(123) self.a = NamedTemplateSlot("A") self.b = NamedTemplateSlot("B") self.c = NamedTemplateSlot("C") self.d = NamedTemplateSlot("D") self.e1 = Template([TemplateString("hello")]) self.e2 = Template([TemplateString("hi")]) self.e3 = Template([TemplateString("hey")]) self.e12 = {self.e1, self.e2} self.e23 = {self.e2, self.e3} self.e123 = {self.e1, self.e2, self.e3} self.e4 = Template([TemplateString("world")]) self.e5 = Template([TemplateString("earth")]) self.e6 = Template([TemplateString("everyone")]) self.e456 = {self.e4, self.e5, self.e6}
def test_get_slot_content_mappings(self): self.assertEqual(set(), self.s1.get_slot_content_mappings()) slot1 = NamedTemplateSlot("x") slot2 = NamedTemplateSlot("y") a = TemplateString("a") b = TemplateString("b") c = TemplateString("c") # Simple tree simple_tree = TemplateTree( Template([a, slot1]), [TemplateTree(Template([a, b]), [])] ) simple_slot_contents = simple_tree.get_slot_content_mappings() self.assertEqual(1, len(simple_slot_contents)) simple_slot_content = list(simple_slot_contents)[0] self.assertTrue(slot1 in simple_slot_content) self.assertTrue(slot1 in simple_slot_content.keys()) self.assertEqual(Template([b]), simple_slot_content[slot1]) self.assertEqual({SlotAssignment({slot1: Template([b])})}, simple_slot_contents) # Two slot tree two_slot_tree = TemplateTree( Template([slot1, b, slot2]), [TemplateTree(Template([a, b, c]), [])] ) two_slot_tree_contents = two_slot_tree.get_slot_content_mappings() self.assertEqual( {SlotAssignment({slot1: Template([a]), slot2: Template([c])})}, two_slot_tree_contents, ) # Test tree u1_slot = self.u1.get_template().get_slots()[0] self.assertEqual( { SlotAssignment({u1_slot: Template([TemplateString("c")])}), SlotAssignment({u1_slot: Template([TemplateString("e")])}), }, self.u1.get_slot_content_mappings(), )
def _create_large_slotvalues( nb_template_elements: int, nb_slots: int, max_elements_per_slot: int) -> Tuple[SlotValues, List[Template]]: contents = _create_contents(nb_template_elements) slot_generator = (NamedTemplateSlot(s) for s in alphabetic_slot_name_iterator()) slot_values = SlotValues() for i in range(nb_slots): slot_values[next(slot_generator)] = _shuffled_subset( contents, 0, random.randint(1, max_elements_per_slot)) return slot_values, contents
def test_collapse_using_slot_values(self): hello = TemplateString("hello") hey = TemplateString("hey") hi = TemplateString("hi") h1 = TemplateTree(Template([hello, hello])) h2 = TemplateTree(Template([hey, hello])) h3 = TemplateTree(Template([hello, hi])) h4 = TemplateTree(Template([hi, hello])) h5 = TemplateTree(Template([hi, hi])) hello_t = Template([hello]) hey_t = Template([hey]) hi_t = Template([hi]) slot_a = NamedTemplateSlot("A") slot_b = NamedTemplateSlot("B") slot_c = NamedTemplateSlot("C") slot_d = NamedTemplateSlot("D") slot_e = NamedTemplateSlot("E") slot_f = NamedTemplateSlot("F") t1 = TemplateTree(Template([hello, slot_a]), [h1, h3]) t2 = TemplateTree(Template([slot_b, hello]), [h1, h2, h4]) t3 = TemplateTree(Template([slot_c, hi]), [h3, h5]) t4 = TemplateTree(Template([hi, slot_d]), [h4, h5]) t5 = TemplateTree(Template([slot_e, slot_f]), [t1, t2, t3, t4]) slot_values = SlotValues( { slot_a: {Template([slot_e])}, slot_b: {Template([slot_e])}, slot_c: {Template([slot_e])}, slot_d: {Template([slot_e])}, slot_e: {hello_t, hi_t, hey_t}, slot_f: {Template([slot_e])}, } ) self.assertEqual( slot_values, t5.get_slot_values().merge_slots(relative_similarity_threshold=0.01), ) renamed_tree = t5.name_template_slots( { slot_a: slot_e, slot_b: slot_e, slot_c: slot_e, slot_d: slot_e, slot_f: slot_e, } ) collapsed_tree = renamed_tree.collapse_using_slot_values(slot_values) self.assertEqual(Template([slot_e, slot_e]), collapsed_tree.get_template()) self.assertEqual( {tt.get_template() for tt in [h1, h2, h3, h4, h5]}, {tt.get_template() for tt in collapsed_tree.get_children()}, )
def from_string( content: str, named_slot_regex=default_named_slot_regex, tokenizer: Callable[[str], List[str]] = word_tokenize, slot_token: str = "[SLOT]", ) -> "Template": if slot_token in content or named_slot_regex.search(content): # If a variable token is defined: split on the variables and add them in between parts = content.split(slot_token) tokens = [] for i in range(len(parts)): part = parts[i] part_parts = [] last_match: Match = named_slot_regex.search(part) while last_match: # Split in three parts part_part_until_match = part[:last_match.start()] part_match = part[last_match.start():last_match.end()] part_from_match = part[last_match.end():] # Tokenize first part_tokens = tokenizer(part_part_until_match) tokens += [TemplateString(t) for t in part_tokens] # Make slot name out of second part named_slot_name = named_slot_regex.findall(part_match)[0] named_slot = NamedTemplateSlot(named_slot_name) tokens += [named_slot] # Further process third part = part_from_match if len(part.strip()) > 0: last_match = named_slot_regex.search(part) else: last_match = None if len(part.strip()) > 0: part_tokens = tokenizer(part) tokens += [TemplateString(t) for t in part_tokens] # Add variable token in between if i < len(parts) - 1: tokens += [TemplateSlot()] else: tokens = [TemplateString(t) for t in tokenizer(content)] return Template(tokens)
def name_slots_automatically( self, slot_name_generator: Iterator[str] = alphabetic_slot_name_iterator() ): all_slots = self.get_all_descendent_slots_breadth_first() slot_names = {s.get_name() for s in all_slots if s.is_named()} unnamed_slots = [s for s in all_slots if not s.is_named()] named_slots_map = dict() for i in range(len(unnamed_slots)): # Make sure the new slot name is not being used in the template tree already new_slot_name = None while new_slot_name is None or new_slot_name in slot_names: new_slot_name = next(slot_name_generator) # Create new slot with this name named_slots_map[unnamed_slots[i]] = NamedTemplateSlot(new_slot_name) return self.name_template_slots(named_slots_map)
def test_collapse_using_slot_values(self): hello = TemplateString("hello") hey = TemplateString("hey") world = TemplateString("world") universe = TemplateString("universe") h1 = TemplateTree(Template([hello, world])) h2 = TemplateTree(Template([hey, world])) h3 = TemplateTree(Template([hello, universe])) h4 = TemplateTree(Template([hey, universe])) slot_a = NamedTemplateSlot("A") slot_b = NamedTemplateSlot("B") slot_c = NamedTemplateSlot("C") expected = TemplateTree(Template([slot_a, slot_b]), [h1, h2, h3, h4]) expected_values = SlotValues( { slot_a: {Template([hello]), Template([hey])}, slot_b: {Template([world]), Template([universe])}, } ) # Test first argument hello_t = Template([hello, slot_b]) hello_tt = TemplateTree(hello_t, [h1, h3]) hey_t = Template([hey, slot_b]) hey_tt = TemplateTree(hey_t, [h2, h4]) greeting_t = Template([slot_a, slot_b]) greeting_tt = TemplateTree(greeting_t, [hello_tt, hey_tt]) self.assertTrue(greeting_t.encompasses(hey_t, expected_values)) self.assertTrue(greeting_t.encompasses(hello_t, expected_values)) self.assertFalse(hello_t.encompasses(greeting_t, expected_values)) self.assertEqual( expected_values, greeting_tt.calculated_merged_independent_slot_values() ) self.assertEqual( expected, greeting_tt.collapse_using_slot_values(expected_values) ) # Do same, but for second argument world_t = Template([slot_a, world]) world_tt = TemplateTree(world_t, [h1, h2]) universe_t = Template([slot_a, universe]) universe_tt = TemplateTree(universe_t, [h3, h4]) place_t = Template([slot_a, slot_b]) place_tt = TemplateTree(place_t, [world_tt, universe_tt]) self.assertEqual( expected_values, place_tt.calculated_merged_independent_slot_values() ) self.assertEqual(expected, place_tt.collapse_using_slot_values(expected_values)) # Test mix mix_tt = TemplateTree(place_t, [world_tt, hey_tt, h3]) self.assertEqual( expected_values, mix_tt.calculated_merged_independent_slot_values() ) self.assertEqual(expected, mix_tt.collapse_using_slot_values(expected_values)) # Now with some noise noise = Template([TemplateString("noise")]) noise_tt = TemplateTree(noise) noise_t = Template([slot_c]) full_noise_tt = TemplateTree(noise_t, [greeting_tt, noise_tt]) noise_values = SlotValues( { slot_a: {Template([hello]), Template([hey])}, slot_b: {Template([world]), Template([universe])}, slot_c: {Template([slot_a, slot_b]), noise}, } ) collapsed_full_noise = full_noise_tt.collapse_using_slot_values(noise_values) self.assertEqual( noise_values, full_noise_tt.calculated_merged_independent_slot_values(), ) self.assertEqual( TemplateTree(Template([slot_c]), [expected, noise_tt]), collapsed_full_noise, )
def test_merge_large(self): contents = _create_contents(100) slot_values = SlotValues({ NamedTemplateSlot("a"): set(contents[0:2]), NamedTemplateSlot("b"): set(contents[2:4]), NamedTemplateSlot("c"): set(contents[4:6]), NamedTemplateSlot("d"): set(contents[6:8]), NamedTemplateSlot("e"): set(contents[8:10]), NamedTemplateSlot("f"): set(contents[10:12]), NamedTemplateSlot("g"): set(contents[12:14]), NamedTemplateSlot("h"): set(contents[14:16]), NamedTemplateSlot("i"): set(contents[16:18]), NamedTemplateSlot("j"): set(contents[18:20]), NamedTemplateSlot("k"): set(contents[20:22]), NamedTemplateSlot("l"): set(contents[22:24]), NamedTemplateSlot("m"): set(contents[24:26]), NamedTemplateSlot("n"): set(contents[26:28]), NamedTemplateSlot("o"): set(contents[28:30]), NamedTemplateSlot("p"): set(contents[30:32]), NamedTemplateSlot("q"): set(contents[32:34]), NamedTemplateSlot("r"): set(contents[34:36]), NamedTemplateSlot("s"): set(contents[36:38]), NamedTemplateSlot("t"): set(contents[38:40]), NamedTemplateSlot("u"): set(contents[40:42]), NamedTemplateSlot("v"): set(contents[42:44]), NamedTemplateSlot("w"): set(contents[44:46]), NamedTemplateSlot("x"): set(contents[46:48]), NamedTemplateSlot("y"): set(contents[48:50]), NamedTemplateSlot("z"): set(contents[50:52]), }) self.assertEqual(slot_values, slot_values.merge_slots()) self.assertEqual(slot_values, slot_values.merge_slots(0.1)) self.assertEqual(slot_values, slot_values.merge_slots(0.001)) # Now add something that overlaps extra_slot_1 = NamedTemplateSlot("zzz-extra") slot_values[extra_slot_1] = set(contents[0:9]) self.assertEqual(slot_values, slot_values.merge_slots()) merged_011 = slot_values.merge_slots(0.112) self.assertEqual(set(contents[0:9]), merged_011[NamedTemplateSlot("a")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged_011[NamedTemplateSlot("b")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged_011[NamedTemplateSlot("c")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged_011[NamedTemplateSlot("d")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged_011[extra_slot_1]) self.assertEqual( set(contents[8:10]), merged_011[NamedTemplateSlot("e")], ) merged_01 = slot_values.merge_slots(0.1) self.assertEqual(set(contents[0:10]), merged_01[NamedTemplateSlot("a")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged_01[NamedTemplateSlot("d")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged_011[extra_slot_1]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged_01[NamedTemplateSlot("e")]) extra_slot_2 = NamedTemplateSlot("zzz-extra-2") slot_values[extra_slot_2] = set(contents[11:52]) self.assertEqual(slot_values, slot_values.merge_slots()) merged2_005 = slot_values.merge_slots(0.05) self.assertEqual(set(contents[0:10]), merged2_005[NamedTemplateSlot("a")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged2_005[NamedTemplateSlot("d")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged2_005[extra_slot_1]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged2_005[NamedTemplateSlot("e")]) self.assertEqual(set(contents[11:52]), merged2_005[extra_slot_2]) self.assertEqual(set(contents[10:12]), merged2_005[NamedTemplateSlot("f")]) self.assertEqual(set(contents[14:16]), merged2_005[NamedTemplateSlot("h")]) merged2_0023 = slot_values.merge_slots(0.023) self.assertEqual(set(contents[0:10]), merged2_0023[NamedTemplateSlot("a")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged2_0023[NamedTemplateSlot("d")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged2_0023[extra_slot_1]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged2_0023[NamedTemplateSlot("e")]) self.assertEqual(set(contents[10:52]), merged2_0023[NamedTemplateSlot("f")]) self.assertEqual({Template([NamedTemplateSlot("f")])}, merged2_0023[NamedTemplateSlot("g")]) self.assertEqual({Template([NamedTemplateSlot("f")])}, merged2_0023[NamedTemplateSlot("h")]) self.assertEqual({Template([NamedTemplateSlot("f")])}, merged2_0023[extra_slot_2]) extra_slot_3 = NamedTemplateSlot("zzz-extra-3") slot_values[extra_slot_3] = set(contents[9:11]) self.assertEqual(slot_values, slot_values.merge_slots()) a = {Template([NamedTemplateSlot("a")])} self.assertEqual( SlotValues({ NamedTemplateSlot("a"): set(contents[0:52]), NamedTemplateSlot("b"): a, NamedTemplateSlot("c"): a, NamedTemplateSlot("d"): a, NamedTemplateSlot("e"): a, NamedTemplateSlot("f"): a, NamedTemplateSlot("g"): a, NamedTemplateSlot("h"): a, NamedTemplateSlot("i"): a, NamedTemplateSlot("j"): a, NamedTemplateSlot("k"): a, NamedTemplateSlot("l"): a, NamedTemplateSlot("m"): a, NamedTemplateSlot("n"): a, NamedTemplateSlot("o"): a, NamedTemplateSlot("p"): a, NamedTemplateSlot("q"): a, NamedTemplateSlot("r"): a, NamedTemplateSlot("s"): a, NamedTemplateSlot("t"): a, NamedTemplateSlot("u"): a, NamedTemplateSlot("v"): a, NamedTemplateSlot("w"): a, NamedTemplateSlot("x"): a, NamedTemplateSlot("y"): a, NamedTemplateSlot("z"): a, extra_slot_1: a, extra_slot_2: a, extra_slot_3: a, }), slot_values.merge_slots(0.01), )