def test_random_hamming_distance(self, _, algorithm): T, n, A = 100, 100, ['a', 'b'] for _ in range(T): t_1, t_2 = rand.random_word(n, A), rand.random_word(n, A) reference = distance.hamming_distance(t_1, t_2, n, n) self.check_distance(t_1, t_2, n, n, reference, distance.HAMMING_DISTANCE, algorithm)
def test_random_string_matching_with_dont_care(self): T, n, m, A = 100, 500, 10, ['a', 'b'] for _ in range(T): t, w = rand.random_word(n, A), rand.random_word(m, A) reference = [match.start() + 1 for match in regex.finditer( w[1:].replace('?', '.'), t[1:], overlapped = True)] self.check_get_all_matches_with_dont_care(t, w, n, m, reference)
def test_random_edit_distance(self, _, algorithm): T, n_1, n_2, A = 100, 15, 15, ['a', 'b'] for _ in range(T): t_1, t_2 = rand.random_word(n_1, A), rand.random_word(n_2, A) reference = distance.distance(t_1, t_2, n_1, n_2, distance.EDIT_DISTANCE) self.check_distance(t_1, t_2, n_1, n_2, reference, distance.EDIT_DISTANCE, algorithm)
def test_random_string_matching(self, _, algorithm): T, n, m, K, A = 100, 20, 10, 6, ['a', 'b'] for _ in range(T): for k in range(1, K + 1): t, w = rand.random_word(n, A), rand.random_word(m, A) reference = [index for index in range(1, n + 1) if self.check_hamming_distance(t, w, m, k, index)] self.check_get_all_matches(t, w, n, m, k, reference, algorithm)
def test_random_lcs(self, _, algorithm): T, n_1, n_2, A = 100, 7, 7, ['a', 'b'] for _ in range(T): t_1, t_2 = rand.random_word(n_1, A), rand.random_word(n_2, A) self.check_lcs( t_1, t_2, n_1, n_2, distance.distance(t_1, t_2, n_1, n_2, distance.INDEL_DISTANCE), algorithm)
def test_random_big(self, _, build, algorithm): T, n, m, A = 100, 1000, 100, ['a', 'b', 'c'] for _ in range(T): t = random_word(n, A) W = {random_word(random.randint(2, 5), A) for _ in range(m)} reference = [(w[1:], i) for w in W for i in forward.brute_force(t, w, n, len(w) - 1)] self.check_all_matches(t, n, W, reference, build, algorithm)
def test_big_texts(self): a_len, alphabet = 1000000, ['a', 'b', 'c'] text_a = random_word(a_len, alphabet) text_b = text_a + random_word(100, alphabet) text_a = text_a + 'eeeeeeeeee' text_lcs = longest_common_substring(text_a, text_b, len(text_a) - 1, len(text_b) - 1) self.assertTrue( validate_lcs(text_a, text_b, text_lcs) and len(text_lcs) == a_len)
def test_random_string_matching_with_edit_distance(self): T, n, m, A = 100, 300, 10, ['a', 'b'] for _ in range(T): t, w, k = rand.random_word(n, A), rand.random_word( m, A), randrange(m - 1) reference = [ index - 1 for index in range(1, n + 2) if check_subwords(t, w, m, k, index) ] self.check_get_all_matches_with_edit_distance( t, w, n, m, k, reference)
def test_random_lcp_lr_matching(self): T, n, A = 1000, 500, ['a', 'b'] m, TT = 10, 10 for _ in range(T): text = rand.random_word(n, A) sa = suffix_array.skew(text, n) lcp = suffix_array.lcp_kasai(sa, text, n) lcplr = lcp_lr.lcplr_from_lcp(lcp, n) for _ in range(TT): word = rand.random_word(m, A) reference = suffix_array.contains(sa, text, word, n, m) self.assertEqual(list(lcp_lr.contains_with_lcplr( sa, lcplr, text, word, n, m)), list(reference))
def test_random_small(self): n, m, A = 100, 25, ['a', 'b', 'c'] for _ in range(100): t = random_word(n, A) patterns = {(random_word(randint(2, 5), A)) for _ in range(m)} cw_automat = commentz_walter_build(patterns) expected = set() for p in patterns: starts = brute_force(t, p, n, len(p) - 1) indices = [(p, i) for i in starts] expected.union(set(indices)) found = set(commentz_walter_search(t[1::], n, cw_automat)) self.assertSetEqual(expected, found)
def test_myers_random_extensions(self): a_len, alphabet = 10000, ['a', 'b', 'c', 'd', 'e'] text_a = random_word(a_len, alphabet) text_b = text_a diff = randint(50, 100) for _ in range(diff): place = randint(0, a_len - 1) extension = random_word(randint(1, 5), alphabet) text_b = text_b[:place] + extension + text_b[place:] text_lcs = longest_common_substring(text_a, text_b, len(text_a) - 1, len(text_b) - 1) self.assertTrue( validate_lcs(text_a, text_b, text_lcs) and len(text_lcs) == a_len)
def test_random(self): n, m, A = 500, 30, ['a', 'b', 'c'] for _ in range(100): t = random_word(n, A) patterns = {random_word(randint(2, 10), A) for _ in range(m)} automaton = self._create_from(patterns) expected = set() for p in patterns: starts = brute_force(t, f'#{p}', n, len(p) + 1) expected.union({(p, i) for i in starts}) found = set(find_occurrences(t, n, automaton)) self.assertSetEqual(expected, found)
def test_random_small(self): n, m, A = 100, 25, ['a', 'b', 'c'] for _ in range(100): t = random_word(n, A) patterns = {(random_word(randint(2, 5), A)) for _ in range(m)} cw_automat = fast_practical_multi_string_matching_build(patterns) expected = set() for p in patterns: starts = brute_force(t, p, n, len(p) - 1) indices = [(p, i) for i in starts] expected.union(set(indices)) found = set( fast_practical_multi_string_matching(t[1::], n, cw_automat)) self.assertSetEqual(expected, found)
def test_small_words(self, _, algorithm, bound): tests, k, n_low, n_high, A = 100, 10, 1, 3, ['a', 'b', 'c'] for _ in range(tests): T = [rand.random_word(random.randint(n_low, n_high), A) for _ in range(k)] self.check_shortest_common_superstring( T, shortest_common_superstring.exact(T), algorithm, bound)
def test_random_suffix_links(self): T, n, A = 100, 200, ['a', 'b'] for _ in range(T): t = rand.random_word(n, A) reference = TestSuffixTrees.get_suffix_links( *suffix_tree.mccreight(t, n)) self.check_suffix_links(t, n, reference)
def test_random_suffix_tree(self, _, algorithm, links): T, n, A = 100, 200, ['a', 'b'] for _ in range(T): t = rand.random_word(n, A) reference = (suffix_tree.naive(t, n), get_suffix_links(*suffix_tree.mccreight(t, n))[1]) self.check_suffix_trees(t, n, reference, algorithm, links)
def test_random_lcp_array(self): T, n, A = 100, 500, ['a', 'b'] for _ in range(T): t = rand.random_word(n, A) reference = suffix_array.lcp_from_suffix_array( suffix_array.prefix_doubling(t, n), t, n) self.check_lcp_array(t, n, reference)
def test_random_words(self): T, alphabet = 100, ["a", "b", "c", "d"] for _ in range(T): amount = random.randint(5, 50) input_words = [rand.random_word(random.randint(20, 300), alphabet) for\ _ in range(amount)] result = shortest_common_super_approx(input_words) self.check_superstring(result, input_words)
def test_many_abc_small_words(self): T, alphabet = 1000, ["a", "b", "c"] for _ in range(T): amount = random.randint(2, 4) input_words = [rand.random_word(random.randint(1, 4), alphabet)\ for _ in range(amount)] result = shortest_common_super_approx(input_words) self.check_superstring(result, input_words) self.check_length(result, input_words, alphabet)
def test_myers_random_lengths(self): iter_count, a_len, b_len = 10, randint(400, 1000), randint(400, 1000) alphabet = ['a', 'b', 'c'] solutions = [] for _ in range(iter_count): text_a = random_word(a_len, alphabet) text_b = random_word(b_len, alphabet) text_lcs = longest_common_substring(text_a, text_b, len(text_a) - 1, len(text_b) - 1) brute_lcs = brute_force_lcs(text_a, text_b, len(text_a) - 1, len(text_b) - 1) valid = (validate_lcs(text_a, text_b, text_lcs) and len(text_lcs) == len(brute_lcs)) solutions.append(valid) self.assertListEqual([True] * iter_count, solutions)
def generate_random_words_set(l, m, A): S = set() for _ in range(m): t = rand.random_word(l, A) S.add(t) for w_1, w_2 in itertools.combinations(S, 2): gen = forward.knuth_morris_pratt(w_1, w_2, len(w_1) - 1, len(w_2) - 1) if next(gen, None) is not None: S.remove(w_2) return S
def test_random_maximum_suffix(self): T, n, A = 100, 1000, ['a', 'b'] for _ in range(T): t = rand.random_word(n, A) reference = maximum_suffix.naive(t, n) self.check_maximum_suffix(t, n, reference)
def test_random_suffix_tree(self): T, n, A = 100, 200, ['a', 'b'] for _ in range(T): t = rand.random_word(n, A) reference = suffix_tree.naive(t, n) self.check_suffix_trees(t, n, reference)
def test_random_suffix_array(self): T, n, A = 100, 500, ['a', 'b'] for _ in range(T): t = rand.random_word(n, A) reference = suffix_array.naive(t, n) self.check_suffix_array(t, n, reference)
def test_random_recode_lempel_ziv_77(self): T, n, A = 100, 500, ['a', 'b'] for _ in range(T): t = rand.random_word(n, A) self.check_recode_lempel_ziv_77(t, n)
def test_random_boyer_moore_shift(self): T, n, A = 100, 100, ['a', 'b'] for _ in range(T): t = rand.random_word(n, A) reference_result = suffix.boyer_moore_shift_brute_force(t, n) self.check_boyer_moore_shift(t, n, reference_result)
def test_random_exact_string_matching(self): T, n, m, A = 100, 500, 10, ['a', 'b'] for _ in range(T): t, w = rand.random_word(n, A), rand.random_word(m, A) reference = list(forward.brute_force(t, w, n, m)) self.check_get_all_exact_matches(t, w, n, m, reference)
def test_random_critical_factorization(self): T, n, A = 100, 300, ['a', 'b'] for _ in range(T): t = rand.random_word(n, A) reference = critical_factorization.naive_all(t, n) self.check_critical_factorization(t, n, reference)
def test_random_string_lcs(self): T, n_1, n_2, A = 100, 100, 50, ['a', 'b'] for _ in range(T): t_1, t_2 = rand.random_word(n_1, A), rand.random_word(n_2, A) self.check_linear_space_lcs(t_1, t_2, len(t_1) - 1, len(t_2) - 1)
def test_random_exact_string_matching(self): T, n, m, A = 100, 500, 10, ['a', 'b'] for _ in range(T): t, w = rand.random_word(n, A), rand.random_word(m, A + ['?']) reference = list(dont_care.basic_fft(t, w, n, m)) self.make_test(t, w, reference)