def distance_between_pattern_and_strings(pattern, dna): k = len(pattern) # return sum([min([hamming_distance(string[i:i+k], pattern) for i in range(len(string) - k + 1)]) for string in dna]) distance = 0 for i in range(len(dna)): hamming_dist = k * len(dna) for j in range(len(dna[i]) - k + 1): if hamming_dist > hamming_distance(pattern, dna[i][j:j + k]): hamming_dist = hamming_distance(pattern, dna[i][j:j + k]) distance += hamming_dist return distance
def approximate_pattern_count(text, pattern, d): count = 0 for i in range(len(text)-len(pattern)+1): window = text[i:i+len(pattern)] if hamming_distance(pattern, window) <= d: count += 1 return count
def neighbors(pattern, d): if d == 0: return pattern if len(pattern) == 1: return {'A', 'C', 'G', 'T'} neighborhood = set() suffix_neighbors = neighbors(pattern[1:], d) for suffix in suffix_neighbors: if hamming_distance(pattern[1:], suffix) < d: for nucleotide in {'A', 'C', 'G', 'T'}: neighborhood.add(nucleotide + suffix) else: neighborhood.add(pattern[:1] + suffix) return neighborhood
def neighbors(pattern, d): nucleotides = ["A", "C", "G", "T"] if d == 0: return [pattern] if len(pattern) == 1: return nucleotides neighborhood = [] suffix_neighbors = neighbors(pattern[1:], d) for text in suffix_neighbors: if hamming_distance(pattern[1:], text) < d: for x in nucleotides: neighborhood.append(x + text) else: neighborhood.append(pattern[0] + text) return neighborhood
def median_sorting(dna, k): distance = k * len(dna) kmers = [] median = '' for i in range(4**k): kmers.append(number_to_pattern(i, k)) for kmer in kmers: new_distance = sum([ min([ hamming_distance(string[i:i + k], kmer) for i in range(len(string) - k + 1) ]) for string in dna ]) if distance > new_distance: distance = new_distance median = kmer return median
from neighbors import * from hamming_distance import * from skew_array import * print(len(set(neighbors('ACGT', 3)))) print(approximate_pattern_count('CATGCCATTCGCATTGTCCCAGTGA', 'CCC', 2)) print( hamming_distance('CAGAAAGGAAGGTCCCCATACACCGACGCACCAGTTTA', 'CACGCCGTATGCATAAACGAGCCGCACGAACCAGAGAG')) print(skew_array('CATTCCAGTACTTCATGATGGCGTGAAGA'))
def test_hamming_distance(self): self.assertEqual(hamming_distance('GGGCCGTTGGT', 'GGACCGTTGAC'), 3)
def test_hamming_distance_0(self): p = 'GGGCCGTTGGT' q = 'GGACCGTTGAC' a = hamming_distance(p, q) b = 3 self.assertEqual(a, b)
def test_hamming_distance_1(self): p = 'GCTAAACTGTTAAAGGTAGACCCGGGACGATGTGGGGATCATCACACCGTAAAACGTATGGCGCCGCGCGGTGCTAGCCCGGGGTTCAGTAGAGGCTAATGTTTTGTGGGTGACACAATACGATTGAGCTCATCTACCCATTCACCCGTGACTATGGAACTCCATAAGCTTGCAGCTTTCAAACTCAGAAATCGAGACGGAAATACTTTTTTCAGTTACCGAGCGGAACTAAGTGTGTAGGACGGGTAGCCGGCTATACAATATCGTACGGAACTGTGTAGAGTGCCTCTACAAAATAAGGATCGAACTTTGTGGAGTGACACCGATAGTAAATCCTCGGCTAGACCCCCATGTTAAGGTCGCATACACGAGCTAACTTTCACTTCATGTTACTACTCTTCGGGGGCCCCCATAGAGTATTCGATCTACCACCGATACGTAACTGGTCGATGACGCTAAAAACGTGGCACATCTCTAGGAAAGACCACTATTACAAGGCTTATTTCCAGCGTCGAACCATTGATGAAAGGATTTCTGTGGGCCATTGTAACTAGTACGAACTATTTGTCTTTGCGTATTTTAATAATTCACTGCTGTAATGGTTCAAAGCTTAACAGCAAACAAATAGAGATACGTTATCCACTGGGAATGACATAGCCGGAAGCCCCGTGCCTATTAGCTTGCGGTAGGCTTCTAGTGCCAACGAAACGCGTCACAGTAAATCCCAGCGCGGAAAAACGCCTTAGTCGAAAGAGGGCCAAGTCGTATGTATCGGTTGATCGCTTCAGCCCTGTCCCTTTACTTGTCCAAATGCAATGTGATGCGGTGCTGGCATCGGTGATTCAACTGTGTACTACGGTATGAAGAGCCCAAAGGCATCCTGCGCTCTTCACTCAATGACCACCTTGCTCACGGTTTGAACTATGAACAGGGTACGCTCAAAGATAAATCTAAGTAGGACGCCACTCATAGATGCTAATATGATCACGGCAACAATAACTGAATTAGGCGTGGCATTCCCATCTAACTGCGTGACCCTT' q = 'TTGAGCTAGGGGGAGGTCCTTATCGAACCGTGGAAGGTTCTTAATCTCGTTGTGCGCCCAGAGTAGTATTGCACCGGAAAACCTATCACTGGGATTGATGCAGGGGAGCTGCAAAATCTGTGCGGAGCGCCAGAACCACATTAATCTGGCACTGTGTTAGAAAGGCCTGACTGGGCCTCGAAGCCCCCCTTAGCCGTGCAGCGATCAAGAACAGAAGATGATCACGGCTGGTCTGTTCGAGTGTGGCCCGGGAACGAGTGTCGTAACTATGGATGAAAGTATACCCTCCTGCATCACCGGAGCTGCGAGAATCCCTGCGTTTACAATTTCAAAACCGGAAAAGATCAGCATTTTGGGAGTCAAGTGACCAGAGGTATTAATGCGATGGGGTAGAGCGAAGTAAGGCGCCACGGGGCACCTCTCACATATATGTCATTCGACGGCATGCTGTTGTAAATACGCTAGGAGCACGGCCTTGAGGCAGCGTGCGGAGCAGGTATAGCGAAGCAGGGAGATCCTACGAACGCCACTTATGCGATTGACTCACAGGCGGACTCCGTACAATCACCATCGCAGGGTCCTTCCGATAACTAGGGTCTACTGCGAGCACCTAGCACATCGTTCTAGTCGTAGACTGCCGCTCTCGCGCTCAAATTCGCCTTTCAGGGAGGTGCACCCCACGGATATAGGGCGGCATTGAATGTGTCCCACCTTCGGCCTTCGATGTGGCCTCTACACAGGCCCCGTTCTCGGCAGCTGTTGGAGCGCACTGGACGTCGTCGAAACTTGTGAAATGAAGAGGCGAGGGGTTATTCAATTTATAATCGTATAAATCGCACTGTATTGAGCGGATACGATTTGACAATGCTCTGAGAGTATGTCGTGTGGCACATATCTACAGCTTTTCCTCTGACGCGCACGCTGCGATCACCCCACCAGTAAACCCTTACTCCCACCGTCCTGGGCTTTAGATTTCAAACCTACGCTCAAGTGGTTGGCCGTAGGACAGCAGGCTAGATCACTACCACCATGTGCTATGA' a = hamming_distance(p, q) b = 769 self.assertEqual(a, b)