def approx_pattern_count(text, pattern, d):
    count = 0
    pattern_len = len(pattern)
    for i in range(len(text) - (pattern_len-1)):
        substr = text[i:i+pattern_len]
        if hamming_distance(pattern, substr) <= d:
            count += 1
    return count
def hamming_in_all(dna_strings, kmer, d):
    for dna in dna_strings:
        k = len(kmer)
        found_match = False
        for j in xrange(len(dna) - k + 1):
            kmer2 = dna[j:j + k]
            if hamming_distance(kmer, kmer2) <= d:
                found_match = True
                break
        if not found_match:
            return False
    return True
Example #3
0
def neighbours(pattern, d):
    nucleotides = ['A', 'C', 'G', 'T']
    if d == 0:
        return {pattern}
    if len(pattern) == 1:
        return set(nucleotides)
    neighbourhood = set()
    suffix_pattern = pattern[1:]
    suffix_neighbours = neighbours(suffix_pattern, d)
    for t in suffix_neighbours:
        if hamming_distance(suffix_pattern, t) < d:
            for n in nucleotides:
                neighbourhood.add(n + t)
        else:
            neighbourhood.add(pattern[0] + t)

    return neighbourhood