def test_single_character_string(self): self.assertEqual([1], full_shift_table('a'))
def test_empty_string(self): self.assertEqual([], full_shift_table(''))
def test_multiple_suffix_prefix_matches(self): S = 'aabaabaabaab' expected = [12,9,9,9,6,6,6,3,3,3,0,0] self.assertEqual(expected, full_shift_table(S))
def test_single_suffix_prefix_match(self): S = 'aabaab' expected = [6,3,3,3,0,0] self.assertEqual(expected, full_shift_table(S))
def test_no_suffix_prefix_matches(self): S = 'abcdefgh' expected = [8,0,0,0,0,0,0,0] self.assertEqual(expected, full_shift_table(S))
def string_search(P, T): if len(P) == 0 or len(T) < len(P): return [] matches = [] # Preprocessing N = fundamental_preprocess(T[::-1]) # S[::-1] reverses S N.reverse() R = bad_character_table(P) L = good_suffix_table(P) F = full_shift_table(P) M = [-1 for c in T] k = len(P) - 1 # Represents alignment of end of P relative to T i = len(P) - 1 # Character to compare in P h = k # Character to compare in T match = False # Indicates whether an exact match has been found in this phase mismatch = False # Indicates whether a mismatch has occurred while k < len(T): if M[h] == -1 or M[h] == 0 or N[i] == 0: # Phase case 1 #print 'Case 1' if T[h] == P[i]: if i == 0: # Case 1a match = True mismatch = False else: # Case 1b i -= 1 h -= 1 match = False mismatch = False else: # Case 1c match = False mismatch = True elif (M[h] < N[i] and M[h] != -1) or (M[h] == N[i] and 0 < N[i] < i+1): # Case 2 & 5 #print 'Case 2 & 5' i -= M[h] h -= M[h] match = False mismatch = False elif M[h] >= N[i] and N[i] == i+1 > 0: # Phase case 3 #print 'Case 3' match = True mismatch = False elif M[h] > N[i] and N[i] < i+1: # Phase case 4 #print 'Case 4' i -= N[i] h -= N[i] match = False mismatch = True if match: matches.append(k - len(P) + 1) M[k] = k - h k += len(P)-F[1] if len(P) > 1 else 1 i = len(P) - 1 h = k match = False mismatch = False if mismatch: char_shift = i - R[alphabet_index(T[h])][i] if i+1 == len(P): # Mismatch happened on first attempt suffix_shift = 1 elif L[i+1] == -1: # Matched suffix does not appear anywhere in P suffix_shift = len(P) - F[i+1] else: # Matched suffix appears in P suffix_shift = len(P) - L[i+1] M[k] = k - h k += max(char_shift, suffix_shift) i = len(P) - 1 h = k match = False mismatch = False return matches