Esempio n. 1
0
def distance_between_pattern_and_strings(pattern, dna):
    k = len(pattern)
    # return sum([min([hamming_distance(string[i:i+k], pattern) for i in range(len(string) - k + 1)]) for string in dna])
    distance = 0
    for i in range(len(dna)):
        hamming_dist = k * len(dna)
        for j in range(len(dna[i]) - k + 1):
            if hamming_dist > hamming_distance(pattern, dna[i][j:j + k]):
                hamming_dist = hamming_distance(pattern, dna[i][j:j + k])
        distance += hamming_dist
    return distance
def approximate_pattern_count(text, pattern, d):
  count = 0
  for i in range(len(text)-len(pattern)+1):
    window = text[i:i+len(pattern)]
    if hamming_distance(pattern, window) <= d:
      count += 1
  return count
Esempio n. 3
0
def neighbors(pattern, d):
    if d == 0:
        return pattern
    if len(pattern) == 1:
        return {'A', 'C', 'G', 'T'}
    neighborhood = set()
    suffix_neighbors = neighbors(pattern[1:], d)
    for suffix in suffix_neighbors:
        if hamming_distance(pattern[1:], suffix) < d:
            for nucleotide in {'A', 'C', 'G', 'T'}:
                neighborhood.add(nucleotide + suffix)
        else:
            neighborhood.add(pattern[:1] + suffix)
    return neighborhood
def neighbors(pattern, d):
  nucleotides = ["A", "C", "G", "T"]

  if d == 0:
    return [pattern]
  if len(pattern) == 1:
    return nucleotides

  neighborhood = []
  suffix_neighbors = neighbors(pattern[1:], d)
  for text in suffix_neighbors:
    if hamming_distance(pattern[1:], text) < d:
      for x in nucleotides:
        neighborhood.append(x + text)
    else:
      neighborhood.append(pattern[0] + text)
  return neighborhood
Esempio n. 5
0
def median_sorting(dna, k):
    distance = k * len(dna)
    kmers = []
    median = ''
    for i in range(4**k):
        kmers.append(number_to_pattern(i, k))
    for kmer in kmers:
        new_distance = sum([
            min([
                hamming_distance(string[i:i + k], kmer)
                for i in range(len(string) - k + 1)
            ]) for string in dna
        ])
        if distance > new_distance:
            distance = new_distance
            median = kmer
    return median
Esempio n. 6
0
from neighbors import *
from hamming_distance import *
from skew_array import *

print(len(set(neighbors('ACGT', 3))))

print(approximate_pattern_count('CATGCCATTCGCATTGTCCCAGTGA', 'CCC', 2))

print(
    hamming_distance('CAGAAAGGAAGGTCCCCATACACCGACGCACCAGTTTA',
                     'CACGCCGTATGCATAAACGAGCCGCACGAACCAGAGAG'))

print(skew_array('CATTCCAGTACTTCATGATGGCGTGAAGA'))
Esempio n. 7
0
 def test_hamming_distance(self):
     self.assertEqual(hamming_distance('GGGCCGTTGGT', 'GGACCGTTGAC'), 3)
 def test_hamming_distance_0(self):
     p = 'GGGCCGTTGGT'
     q = 'GGACCGTTGAC'
     a = hamming_distance(p, q)
     b = 3
     self.assertEqual(a, b)
 def test_hamming_distance_1(self):
     p = 'GCTAAACTGTTAAAGGTAGACCCGGGACGATGTGGGGATCATCACACCGTAAAACGTATGGCGCCGCGCGGTGCTAGCCCGGGGTTCAGTAGAGGCTAATGTTTTGTGGGTGACACAATACGATTGAGCTCATCTACCCATTCACCCGTGACTATGGAACTCCATAAGCTTGCAGCTTTCAAACTCAGAAATCGAGACGGAAATACTTTTTTCAGTTACCGAGCGGAACTAAGTGTGTAGGACGGGTAGCCGGCTATACAATATCGTACGGAACTGTGTAGAGTGCCTCTACAAAATAAGGATCGAACTTTGTGGAGTGACACCGATAGTAAATCCTCGGCTAGACCCCCATGTTAAGGTCGCATACACGAGCTAACTTTCACTTCATGTTACTACTCTTCGGGGGCCCCCATAGAGTATTCGATCTACCACCGATACGTAACTGGTCGATGACGCTAAAAACGTGGCACATCTCTAGGAAAGACCACTATTACAAGGCTTATTTCCAGCGTCGAACCATTGATGAAAGGATTTCTGTGGGCCATTGTAACTAGTACGAACTATTTGTCTTTGCGTATTTTAATAATTCACTGCTGTAATGGTTCAAAGCTTAACAGCAAACAAATAGAGATACGTTATCCACTGGGAATGACATAGCCGGAAGCCCCGTGCCTATTAGCTTGCGGTAGGCTTCTAGTGCCAACGAAACGCGTCACAGTAAATCCCAGCGCGGAAAAACGCCTTAGTCGAAAGAGGGCCAAGTCGTATGTATCGGTTGATCGCTTCAGCCCTGTCCCTTTACTTGTCCAAATGCAATGTGATGCGGTGCTGGCATCGGTGATTCAACTGTGTACTACGGTATGAAGAGCCCAAAGGCATCCTGCGCTCTTCACTCAATGACCACCTTGCTCACGGTTTGAACTATGAACAGGGTACGCTCAAAGATAAATCTAAGTAGGACGCCACTCATAGATGCTAATATGATCACGGCAACAATAACTGAATTAGGCGTGGCATTCCCATCTAACTGCGTGACCCTT'
     q = 'TTGAGCTAGGGGGAGGTCCTTATCGAACCGTGGAAGGTTCTTAATCTCGTTGTGCGCCCAGAGTAGTATTGCACCGGAAAACCTATCACTGGGATTGATGCAGGGGAGCTGCAAAATCTGTGCGGAGCGCCAGAACCACATTAATCTGGCACTGTGTTAGAAAGGCCTGACTGGGCCTCGAAGCCCCCCTTAGCCGTGCAGCGATCAAGAACAGAAGATGATCACGGCTGGTCTGTTCGAGTGTGGCCCGGGAACGAGTGTCGTAACTATGGATGAAAGTATACCCTCCTGCATCACCGGAGCTGCGAGAATCCCTGCGTTTACAATTTCAAAACCGGAAAAGATCAGCATTTTGGGAGTCAAGTGACCAGAGGTATTAATGCGATGGGGTAGAGCGAAGTAAGGCGCCACGGGGCACCTCTCACATATATGTCATTCGACGGCATGCTGTTGTAAATACGCTAGGAGCACGGCCTTGAGGCAGCGTGCGGAGCAGGTATAGCGAAGCAGGGAGATCCTACGAACGCCACTTATGCGATTGACTCACAGGCGGACTCCGTACAATCACCATCGCAGGGTCCTTCCGATAACTAGGGTCTACTGCGAGCACCTAGCACATCGTTCTAGTCGTAGACTGCCGCTCTCGCGCTCAAATTCGCCTTTCAGGGAGGTGCACCCCACGGATATAGGGCGGCATTGAATGTGTCCCACCTTCGGCCTTCGATGTGGCCTCTACACAGGCCCCGTTCTCGGCAGCTGTTGGAGCGCACTGGACGTCGTCGAAACTTGTGAAATGAAGAGGCGAGGGGTTATTCAATTTATAATCGTATAAATCGCACTGTATTGAGCGGATACGATTTGACAATGCTCTGAGAGTATGTCGTGTGGCACATATCTACAGCTTTTCCTCTGACGCGCACGCTGCGATCACCCCACCAGTAAACCCTTACTCCCACCGTCCTGGGCTTTAGATTTCAAACCTACGCTCAAGTGGTTGGCCGTAGGACAGCAGGCTAGATCACTACCACCATGTGCTATGA'
     a = hamming_distance(p, q)
     b = 769
     self.assertEqual(a, b)