def test_merge_relative_overlap_values(self): contents = _create_contents(10) slot_values = SlotValues({ self.a: set(contents), self.b: _shuffled_subset(contents, 0, 2), }) # It should not merge if the relative similarity threshold is > 0.2 merged_none = slot_values.merge_slots() self.assertEqual(slot_values, merged_none) merged_1 = slot_values.merge_slots(relative_similarity_threshold=1) self.assertEqual(slot_values, merged_1) merged_09 = slot_values.merge_slots(relative_similarity_threshold=0.9) self.assertEqual(slot_values, merged_09) merged_05 = slot_values.merge_slots(relative_similarity_threshold=0.5) self.assertEqual(slot_values, merged_05) # B should merge into A if the threshold is <= 0.2 expected_merged = SlotValues({ self.a: set(contents), self.b: {Template([self.a])} }) merged_02 = slot_values.merge_slots(relative_similarity_threshold=0.2) self.assertEqual(expected_merged, merged_02) merged_01 = slot_values.merge_slots(relative_similarity_threshold=0.1) self.assertEqual(expected_merged, merged_01)
def test_merge_relative_overlap_values_three_variables_2(self): contents = _create_contents(10) slot_values = SlotValues({ self.a: set(contents[1:5]), self.b: set(contents[0:2]), self.c: set(contents[2:6]), }) # It should not merge if the relative similarity threshold is > 0.2 merged_none = slot_values.merge_slots() self.assertEqual(slot_values, merged_none) merged_1 = slot_values.merge_slots(relative_similarity_threshold=1) self.assertEqual(slot_values, merged_1) merged_061 = slot_values.merge_slots( relative_similarity_threshold=0.61) self.assertEqual(slot_values, merged_061) expected_first_merged = SlotValues({ self.a: set(contents[1:6]), self.b: set(contents[0:2]), self.c: {Template([self.a])}, }) merged_06 = slot_values.merge_slots(relative_similarity_threshold=0.6) self.assertEqual( expected_first_merged, merged_06, ) merged_021 = slot_values.merge_slots( relative_similarity_threshold=0.21) self.assertEqual( expected_first_merged, merged_021, ) expected_full_merged = SlotValues({ self.a: set(contents[0:6]), self.b: {Template([self.a])}, self.c: {Template([self.a])}, }) merged_02 = slot_values.merge_slots(relative_similarity_threshold=0.2) self.assertEqual( expected_full_merged, merged_02, ) merged_01 = slot_values.merge_slots(relative_similarity_threshold=0.1) self.assertEqual( expected_full_merged, merged_01, )
def test_merge_small_overlap(self): slot_values = SlotValues({self.a: self.e12, self.b: self.e23}) merged = slot_values.merge_slots(relative_similarity_threshold=0.3) self.assertEqual(hashabledict({self.b: self.a}), merged.get_replacements()) self.assertEqual( SlotValues({ self.a: self.e123, self.b: {Template([self.a])} }), merged, )
def test_merge_containing_slot(self): slot_values = SlotValues({ self.a: {Template([self.b]), self.e1, self.e2, self.e3}, self.b: self.e123, self.c: self.e456, }) merged = slot_values.merge_slots() self.assertEqual(hashabledict({self.a: self.b}), merged.get_replacements()) self.assertEqual( SlotValues({ self.a: {Template([self.b])}, self.b: self.e123, self.c: self.e456 }), merged, )
def test_merge_basic(self): slot_values = SlotValues({ self.a: self.e123, self.b: self.e123, self.c: self.e456 }) merged = slot_values.merge_slots() self.assertEqual(hashabledict({self.b: self.a}), merged.get_replacements()) self.assertEqual( SlotValues({ self.a: self.e123, self.b: {Template([self.a])}, self.c: self.e456 }), merged, )
def test_merge_containing_multiple_slots_complely(self): slot_values = SlotValues({ self.a: {Template([self.b]), Template([self.c]), self.e1, self.e2}, self.b: self.e123, self.c: self.e123, }) merged = slot_values.merge_slots() self.assertEqual(hashabledict({ self.a: self.b, self.c: self.b }), merged.get_replacements()) self.assertEqual( SlotValues({ self.a: {Template([self.b])}, self.b: self.e123, self.c: {Template([self.b])}, }), merged, )
def test_merge_relative_overlap_values_three_variables_1(self): contents = _create_contents(10) slot_values = SlotValues({ self.a: set(contents), self.b: set(contents[0:2]), self.c: set(contents[5:8]), }) # It should not merge if the relative similarity threshold is > 0.2 merged_none = slot_values.merge_slots() self.assertEqual(slot_values, merged_none) merged_1 = slot_values.merge_slots(relative_similarity_threshold=1) self.assertEqual(slot_values, merged_1) merged_05 = slot_values.merge_slots(relative_similarity_threshold=0.5) self.assertEqual(slot_values, merged_05) merged_03 = slot_values.merge_slots(relative_similarity_threshold=0.3) self.assertEqual( SlotValues({ self.a: set(contents), self.b: set(contents[0:2]), self.c: {Template([self.a])}, }), merged_03, ) # B should merge into A if the threshold is <= 0.2 full_merge = SlotValues({ self.a: set(contents), self.b: {Template([self.a])}, self.c: {Template([self.a])}, }) merged_02 = slot_values.merge_slots(relative_similarity_threshold=0.2) self.assertEqual(full_merge, merged_02) merged_01 = slot_values.merge_slots(relative_similarity_threshold=0.1) self.assertEqual(full_merge, merged_01)
def test_merge_large(self): contents = _create_contents(100) slot_values = SlotValues({ NamedTemplateSlot("a"): set(contents[0:2]), NamedTemplateSlot("b"): set(contents[2:4]), NamedTemplateSlot("c"): set(contents[4:6]), NamedTemplateSlot("d"): set(contents[6:8]), NamedTemplateSlot("e"): set(contents[8:10]), NamedTemplateSlot("f"): set(contents[10:12]), NamedTemplateSlot("g"): set(contents[12:14]), NamedTemplateSlot("h"): set(contents[14:16]), NamedTemplateSlot("i"): set(contents[16:18]), NamedTemplateSlot("j"): set(contents[18:20]), NamedTemplateSlot("k"): set(contents[20:22]), NamedTemplateSlot("l"): set(contents[22:24]), NamedTemplateSlot("m"): set(contents[24:26]), NamedTemplateSlot("n"): set(contents[26:28]), NamedTemplateSlot("o"): set(contents[28:30]), NamedTemplateSlot("p"): set(contents[30:32]), NamedTemplateSlot("q"): set(contents[32:34]), NamedTemplateSlot("r"): set(contents[34:36]), NamedTemplateSlot("s"): set(contents[36:38]), NamedTemplateSlot("t"): set(contents[38:40]), NamedTemplateSlot("u"): set(contents[40:42]), NamedTemplateSlot("v"): set(contents[42:44]), NamedTemplateSlot("w"): set(contents[44:46]), NamedTemplateSlot("x"): set(contents[46:48]), NamedTemplateSlot("y"): set(contents[48:50]), NamedTemplateSlot("z"): set(contents[50:52]), }) self.assertEqual(slot_values, slot_values.merge_slots()) self.assertEqual(slot_values, slot_values.merge_slots(0.1)) self.assertEqual(slot_values, slot_values.merge_slots(0.001)) # Now add something that overlaps extra_slot_1 = NamedTemplateSlot("zzz-extra") slot_values[extra_slot_1] = set(contents[0:9]) self.assertEqual(slot_values, slot_values.merge_slots()) merged_011 = slot_values.merge_slots(0.112) self.assertEqual(set(contents[0:9]), merged_011[NamedTemplateSlot("a")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged_011[NamedTemplateSlot("b")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged_011[NamedTemplateSlot("c")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged_011[NamedTemplateSlot("d")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged_011[extra_slot_1]) self.assertEqual( set(contents[8:10]), merged_011[NamedTemplateSlot("e")], ) merged_01 = slot_values.merge_slots(0.1) self.assertEqual(set(contents[0:10]), merged_01[NamedTemplateSlot("a")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged_01[NamedTemplateSlot("d")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged_011[extra_slot_1]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged_01[NamedTemplateSlot("e")]) extra_slot_2 = NamedTemplateSlot("zzz-extra-2") slot_values[extra_slot_2] = set(contents[11:52]) self.assertEqual(slot_values, slot_values.merge_slots()) merged2_005 = slot_values.merge_slots(0.05) self.assertEqual(set(contents[0:10]), merged2_005[NamedTemplateSlot("a")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged2_005[NamedTemplateSlot("d")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged2_005[extra_slot_1]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged2_005[NamedTemplateSlot("e")]) self.assertEqual(set(contents[11:52]), merged2_005[extra_slot_2]) self.assertEqual(set(contents[10:12]), merged2_005[NamedTemplateSlot("f")]) self.assertEqual(set(contents[14:16]), merged2_005[NamedTemplateSlot("h")]) merged2_0023 = slot_values.merge_slots(0.023) self.assertEqual(set(contents[0:10]), merged2_0023[NamedTemplateSlot("a")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged2_0023[NamedTemplateSlot("d")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged2_0023[extra_slot_1]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged2_0023[NamedTemplateSlot("e")]) self.assertEqual(set(contents[10:52]), merged2_0023[NamedTemplateSlot("f")]) self.assertEqual({Template([NamedTemplateSlot("f")])}, merged2_0023[NamedTemplateSlot("g")]) self.assertEqual({Template([NamedTemplateSlot("f")])}, merged2_0023[NamedTemplateSlot("h")]) self.assertEqual({Template([NamedTemplateSlot("f")])}, merged2_0023[extra_slot_2]) extra_slot_3 = NamedTemplateSlot("zzz-extra-3") slot_values[extra_slot_3] = set(contents[9:11]) self.assertEqual(slot_values, slot_values.merge_slots()) a = {Template([NamedTemplateSlot("a")])} self.assertEqual( SlotValues({ NamedTemplateSlot("a"): set(contents[0:52]), NamedTemplateSlot("b"): a, NamedTemplateSlot("c"): a, NamedTemplateSlot("d"): a, NamedTemplateSlot("e"): a, NamedTemplateSlot("f"): a, NamedTemplateSlot("g"): a, NamedTemplateSlot("h"): a, NamedTemplateSlot("i"): a, NamedTemplateSlot("j"): a, NamedTemplateSlot("k"): a, NamedTemplateSlot("l"): a, NamedTemplateSlot("m"): a, NamedTemplateSlot("n"): a, NamedTemplateSlot("o"): a, NamedTemplateSlot("p"): a, NamedTemplateSlot("q"): a, NamedTemplateSlot("r"): a, NamedTemplateSlot("s"): a, NamedTemplateSlot("t"): a, NamedTemplateSlot("u"): a, NamedTemplateSlot("v"): a, NamedTemplateSlot("w"): a, NamedTemplateSlot("x"): a, NamedTemplateSlot("y"): a, NamedTemplateSlot("z"): a, extra_slot_1: a, extra_slot_2: a, extra_slot_3: a, }), slot_values.merge_slots(0.01), )