def test_empty_string(self):
     self.assertEqual([], good_suffix_table(''))
 def test_single_character_string(self):
     self.assertEqual([-1], good_suffix_table('a'))
 def test_no_suffix_occurrences(self):
     S = 'abcdefghijklmnopqrstuvwxyz'
     expected = [-1 for x in range(26)]
     self.assertEqual(expected, good_suffix_table(S))
 def test_repeated_char_string(self):
     S = 'aaaaaa'
     expected = [-1,4,3,2,1,0]
     self.assertEqual(expected, good_suffix_table(S))
 def test_last_char_matches_first(self):
     S = 'abcdefga'
     expected = [-1,-1,-1,-1,-1,-1,-1,0]
     self.assertEqual(expected, good_suffix_table(S))
 def test_suffix_matches_substring(self):
     S = 'xaabaab'
     expected = [-1,-1,-1,-1,3,-1,-1]
     self.assertEqual(expected, good_suffix_table(S))
 def test_multiple_suffix_occurrence(self):
     S = 'aacaacaacaac'
     expected = [-1,-1,-1,8,-1,-1,5,-1,-1,2,-1,-1]
     self.assertEqual(expected, good_suffix_table(S))
 def test_single_suffix_occurrence(self):
     S = 'aabaacaab'
     expected = [-1,-1,-1,-1,-1,-1,2,-1,-1]
     self.assertEqual(expected, good_suffix_table(S))
Example #9
0
def string_search(P, T):
    if len(P) == 0 or len(T) < len(P):
        return []

    matches = []

    # Preprocessing
    N = fundamental_preprocess(T[::-1]) # S[::-1] reverses S
    N.reverse()
    R = bad_character_table(P)
    L = good_suffix_table(P)
    F = full_shift_table(P)
    M = [-1 for c in T]

    k = len(P) - 1      # Represents alignment of end of P relative to T
    i = len(P) - 1      # Character to compare in P
    h = k               # Character to compare in T
    match = False       # Indicates whether an exact match has been found in this phase
    mismatch = False    # Indicates whether a mismatch has occurred

    while k < len(T):
        if M[h] == -1 or M[h] == 0 or N[i] == 0:    # Phase case 1
            #print 'Case 1'
            if T[h] == P[i]: 
                if i == 0:  # Case 1a
                    match = True
                    mismatch = False
                else:       # Case 1b
                    i -= 1
                    h -= 1
                    match = False
                    mismatch = False
            else:           # Case 1c
                match = False
                mismatch = True
        elif (M[h] < N[i] and M[h] != -1) or (M[h] == N[i] and 0 < N[i] < i+1): # Case 2 & 5
            #print 'Case 2 & 5'
            i -= M[h]
            h -= M[h]
            match = False
            mismatch = False
        elif M[h] >= N[i] and N[i] == i+1 > 0:  # Phase case 3
            #print 'Case 3'
            match = True 
            mismatch = False
        elif M[h] > N[i] and N[i] < i+1:    # Phase case 4
            #print 'Case 4'
            i -= N[i]
            h -= N[i]
            match = False
            mismatch = True
        if match:
            matches.append(k - len(P) + 1)
            M[k] = k - h
            k += len(P)-F[1] if len(P) > 1 else 1
            i = len(P) - 1
            h = k
            match = False
            mismatch = False
        if mismatch:
            char_shift = i - R[alphabet_index(T[h])][i]
            if i+1 == len(P):   # Mismatch happened on first attempt
                suffix_shift = 1
            elif L[i+1] == -1:   # Matched suffix does not appear anywhere in P
                suffix_shift = len(P) - F[i+1]
            else:               # Matched suffix appears in P
                suffix_shift = len(P) - L[i+1]
            M[k] = k - h
            k += max(char_shift, suffix_shift)
            i = len(P) - 1
            h = k
            match = False
            mismatch = False
    return matches