def test_prefix_non_matching_fuzziness(self, data): tmpl_1, fuzz_1, tmpl_2, fuzz_2 = data assume(fuzz_1 != fuzz_2) i = 0 while fuzz_1[i] == fuzz_2[i]: i += 1 self.assertLessEqual( MatchFragment(tmpl_1, fuzziness=fuzz_1).longest_common_prefix( MatchFragment(tmpl_2, fuzziness=fuzz_2)), i)
def insert(self, path: MatchFragment, data_obj: object) -> bool: self.finalized = False match_size: int = len(path) next_node: int = 0 while len(path) > 0: if path.fuzziness[0]: edges = self.fuzzy_starts[next_node] else: bin_ = self._to_bin(path.template[0]) edges = self.adjacency[next_node][bin_] for edge in edges: class_, prefix_len = edge.classify_path(path) if (class_ == PathClassification.PART ): # Path prefix is in tree - continue on this path path.drop_before(prefix_len) next_node = edge.to break if (class_ == PathClassification.EQUAL ): # Path is already in tree - add data logger.debug( f"{data_obj} is duplicate to {self.data[edge.to]}") self.data[edge.to].append(data_obj) return False if ( class_ == PathClassification.BRANCH ): # Path branches off existing edge - spit edge and add path self._split_edge(edge, prefix_len) path.drop_before(prefix_len) if len(path) > 0: self._add_edge(self.nodes - 1, self.nodes, path, match_size) self._new_node(data_obj) else: logger.debug( f"{data_obj} was added after a new branch") self.data[self.nodes - 1].append( data_obj) # add data to split edge return True else: logger.debug(f"{data_obj} was added as a new leaf") self._add_edge(next_node, self.nodes, path, match_size) self._new_node(data_obj) return True return False
class TestShortWildCardEdges(unittest.TestCase): def setUp(self): self.g = Graph() self.a = MatchFragment(b"a", bytes([1])) self.b = MatchFragment(b"ba", bytes([1, 1])) self.c = MatchFragment(b"ccc", bytes([1, 1, 1])) def test_longest_common_prefix(self): self.assertEqual(self.a.longest_common_prefix(self.b), 1) self.assertEqual(self.b.longest_common_prefix(self.a), self.a.longest_common_prefix(self.b)) self.assertEqual(self.c.longest_common_prefix(self.a), 1) self.assertEqual(self.b.longest_common_prefix(self.c), 2) def test_minimality(self): self.g.insert(self.a, "a") self.g.insert(self.b, "b") self.g.insert(self.c, "c") self.assertEqual(len(list(self.g.edges_at(0))), 1)
def test_identity_matching(self, match): assume(match) match_frag = MatchFragment(match) graph = Graph() graph.insert(match_frag, match) matches = list(graph.match(match)) note( f"frag: {match_frag}, target: {match}, matches: {matches}, graph: {graph}" ) self.assertIn(([match], len(match), len(match)), matches)
def test_graph_constraints(self, data): graph = Graph() for binary in data: graph.insert(MatchFragment(binary), binary) self.assertLessEqual(len(list(graph.edges_at(0))), len(data)) if data: self.assertEqual(max(len(binary) for binary in data), graph._get_max_match_size(0))
def test_data_existence(self, data): graph = Graph() for binary in data: graph.insert(MatchFragment(binary), binary) g_data = list(itertools.chain.from_iterable(graph.data.values())) self.assertEqual(len(g_data), len(data)) for binary in data: self.assertIn(binary, g_data)
def test_longest_common_prefix(self, prefix, suffix): prefix_fragment = MatchFragment(prefix) fragment = MatchFragment(prefix + suffix) self.assertEqual(fragment.longest_common_prefix(prefix_fragment), prefix_fragment.longest_common_prefix(fragment)) self.assertEqual(len(prefix), fragment.longest_common_prefix(prefix_fragment))
def comparer(self) -> MatchFragment: """Create an object that can be compared to string, other matchers and be split at given positions""" return MatchFragment(self.template, self.fuzziness)
def test_inequality_longest_prefix(self, bin_a, bin_b): assume(bin_a != bin_b) self.assertLessEqual( MatchFragment(bin_a).longest_common_prefix(MatchFragment(bin_b)), min(len(bin_a), len(bin_b)))
def test_inequality(self, bin_a, bin_b): assume(bin_a != bin_b) self.assertNotEqual(MatchFragment(bin_a), bin_b)
def test_fuzz_all(self, testers): tester, tester2 = testers a = MatchFragment(tester, fuzziness=bytes([1] * len(tester))) self.assertEqual(a, tester2)
def test_fuzzy_inequality(self, pair): tmpl_1, tmpl_2 = pair assume(tmpl_2 != tmpl_1) fuzz = [a == b for a, b in zip(tmpl_1, tmpl_2)] a = MatchFragment(tmpl_1, fuzziness=fuzz) self.assertNotEqual(a, tmpl_2)
def test_fuzzy_equality(self, pair): tmpl_1, tmpl_2 = pair fuzz = [a != b for a, b in zip(tmpl_1, tmpl_2)] a = MatchFragment(tmpl_1, fuzziness=fuzz) self.assertEqual(a, tmpl_2)
def test_fuzzy_identity(self, data): template, fuzz = data a = MatchFragment(template, fuzziness=fuzz) self.assertEqual(a, template)
def test_identity(self, tester): a = MatchFragment(tester) self.assertEqual(a, tester)
def setUp(self): self.g = Graph() self.a = MatchFragment(b"a", bytes([1])) self.b = MatchFragment(b"ba", bytes([1, 1])) self.c = MatchFragment(b"ccc", bytes([1, 1, 1]))