def test_gattaca(self): sequence = ( 'GACTTTTTTTTTTTTTCCTTTGGGAAAGGTAGGGAGGTGTTCGTACGGGAGCAGCCTCGG' 'GGACCCCTGCACTGGGTCAGGGCTTATGAAGCTAGAAGCGTCCCTCTGTTCCCTTTGTGA' 'GTTGGTGGGTTGTTGGTACATTTGGTTGGAAGCTGTGTTGCTGGTTAGGGAGACTCGGTT' 'TTGCTCCTTGGGTTCGAGGAAAGCTGGAGAATAGAAGCCATTGTTTGCCGTCTGTCGGCT' 'TTGTCGACCACGCTCACCCCCTCCTGTTCGTACTTTTTAAAGCAGTGAGGCGAGGTAGAC' 'AGGGTGTGTCACAGTACAGTTAAAGGGGTGAAGATCTAAACGCCAAAAGAGAAGTTAATC' 'ACAATAAGTGAGGTTTGGGATAAAAAGTTGGGCTTGCCCCTTTCAAAGTCCCAGAAAGCT' 'GGGAGGTAGATGGAGAGGGGGCCATTGGGAAGTTTTTTTGGTGTAGGGAGAGGAGTAGAA' 'GATAAAGGGTAAGCAGAGTGTTGGGTTCTGGGGGTCTTGTGAAGTTCCTTAAGGAAGGAG' 'GGAGTGTGGCCCTGCAGCCCTCCCAAACTGCTCCAGCCTATGCTCTCCGGCACCAGGAAG' 'TTCCAAGGTTCCCTTCCCCTGGTCTCCAAACTTCAGGTATTCCTCTCCCCTCACACCCCT' 'TCAACCTCAGCTCTTGGCCTCTACTCCTTACTCCACTGTTCCTCCTGTTTCCCCCTTCCC' 'CTTTTCCTGGTTCTTTATATTTTTGCAAAGTGGGATCCGAACTTGCTAGATTTTCCAATT' 'CTCCCAAGCCAGACCAGAGCAGCCTCTTTTAAAGGATGGAGACTTCTGTGGCAGATGCCG' 'CTGAAAATGTGGGTGTAATGCTGGGACTTAGAGTTTGATGACAGTTTGACTGAGCCCTAG' 'ATGCATGTGTTTTTCCTGAGAGTGAGGCTCAGAGAGCCCATGGACGTATGCTGTTGAACC' 'ACAGCTTGATATACCTTTTTCTCCTTCTGTTTTGTCTTAGGGGGAAGACTTTAACTAGGG' 'GCGCGCAGATGTGTGAGGCCTTTTATTGTGAGAGTGGACAGACATCCGAGATTTCAGGCA' 'AGTTCTGTGGTGGCTGCTTTGGGCT' ) print len(sequence) print kmp.match('CAT', sequence) print kmp.match('GGCAA', sequence)
def test_match_returns_index_of_pattern_matched_in_test(self): text = 'AAAAAAAAAAAAAAAAAB' pattern = 'AAAB' self.assertEqual(text.find(pattern), kmp.match(text, pattern)) text = 'AAAAABAAABA' pattern = 'AAAA' self.assertEqual(text.find(pattern), kmp.match(text, pattern)) text = 'ABABABCABABABCABABABC' pattern = 'ABABAC' self.assertEqual(text.find(pattern), kmp.match(text, pattern))
#we make all strings and text upper case to compensate for variation #in capitalization for i in range(len(products)): S1 = products[i][m].upper() S2 = products[i][mo].upper() S3 = charmod.repSpace(products[i][mo]).upper() S4 = charmod.rmSpace(products[i][mo]).upper() F1 = kmp.fa(S1); F2 = kmp.fa(S2) F3 = kmp.fa(S3); F4 = kmp.fa(S4) matchtemp = [] #stores indices of matching listings for j in range(len(listings)): #attempting manufacturer matching TF1 = kmp.match(S1,listings[j][m].upper(),F1) #if manufacturer matches, we attempt to match the model number in the #title of the listings text if TF1: TF2 = kmp.match(S2,listings[j][t].upper(),F2) TF3 = kmp.match(S3,listings[j][t].upper(),F3) TF4 = kmp.match(S4,listings[j][t].upper(),F4) if TF2: matchtemp.append(j) elif TF3: matchtemp.append(j) elif TF4: matchtemp.append(j) #save product name and list matching listings matchar.append( { pn : products[i][pn], 'listings' : [listings[k] for k in matchtemp]
def test_match_returns_negative_1_if_pattern_not_in_text(self): text = 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA' pattern = 'AAAB' self.assertEqual(-1, kmp.match(text, pattern))
def test_no_match(self): pattern = 'zzyzyvq' text = 'abc abcdab abcdabcdabde' result = kmp.match(pattern, text) self.assertEqual([], result)
def test_pattern_is_text(self): pattern = 'abcdabd' result = kmp.match(pattern, pattern) self.assertEqual([0], result)
def test_match(self): pattern = 'abcdabd' text = 'abc abcdab abcdabcdabde' result = kmp.match(pattern, text) self.assertEqual([15], result)