def approx_pattern_count(text, pattern, d): fragment_length = len(pattern) count = 0 for i, fragment in enumerate(["".join(x) for x in window(text, fragment_length)]): if hamming_distance(fragment, pattern) <= d: count += 1 return count
def approx_pattern_count(text, pattern, d): fragment_length = len(pattern) count = 0 for i, fragment in enumerate( ["".join(x) for x in window(text, fragment_length)]): if hamming_distance(fragment, pattern) <= d: count += 1 return count
def enumerate_motifs(dnas, k, d): kmers = [] for text in dnas: kmers.extend(["".join(x) for x in window(text, k)]) resulting_set = set() for kmer in kmers: for modified_kmer in generate_words_with_mismatches(kmer, d): will_add = True for text in dnas: was_found = False for fragment in ["".join(x) for x in window(text, k)]: if hamming_distance(fragment, modified_kmer) <= d: was_found = True break if not was_found: will_add = False break if will_add: resulting_set.add(modified_kmer) return list(resulting_set)
def median(dnas, k): kmers = set() for dna in dnas: substr_kmers = set(["".join(x) for x in window(dna, k)]) for kmer in substr_kmers: kmers.update(set(generate_words_with_mismatches(kmer, k))) opt_kmer = '' opt_distance = len(dnas) * k + 1 for kmer in kmers: if opt_distance > d(kmer, dnas, k): opt_distance = d(kmer, dnas, k) opt_kmer = kmer return opt_kmer, opt_distance
def median(dnas, k): kmers = set() for dna in dnas: substr_kmers = set(["".join(x) for x in window(dna, k)]) for kmer in substr_kmers: kmers.update(set(generate_words_with_mismatches(kmer, k))) opt_kmer = '' opt_distance = len(dnas)*k + 1 for kmer in kmers: if opt_distance > d(kmer, dnas, k): opt_distance = d(kmer, dnas, k) opt_kmer = kmer return opt_kmer, opt_distance
def repeat_subst(text): for k in range(len(text), 1, -1): for kmer in ["".join(x) for x in window(text, k)]: if len(list(find_all(kmer, text))) > 1: return kmer return None
from _01_07_hamming_distance import hamming_distance from _01_02_frequent_words import window if __name__ == '__main__': with open('in.txt', 'r') as f: pattern = f.readline().strip() text = f.readline().strip() d = int(f.readline()) fragment_length = len(pattern) indexes = list() for i, fragment in enumerate(["".join(x) for x in window(text, fragment_length)]): if hamming_distance(fragment, pattern) <= d: indexes.append(i) with open('out.txt', 'w') as f: f.write(' '.join(map(str, indexes)))
def common_subst(text1, text2): for k in range(1, min([len(text1), len(text2)])): for kmer in ["".join(x) for x in window(text1, k)]: if kmer not in text2: return kmer return None
from _01_07_hamming_distance import hamming_distance from _01_02_frequent_words import window if __name__ == '__main__': with open('in.txt', 'r') as f: pattern = f.readline().strip() text = f.readline().strip() d = int(f.readline()) fragment_length = len(pattern) indexes = list() for i, fragment in enumerate( ["".join(x) for x in window(text, fragment_length)]): if hamming_distance(fragment, pattern) <= d: indexes.append(i) with open('out.txt', 'w') as f: f.write(' '.join(map(str, indexes)))
count_dict = dict() max_count = 0 for kmer in kmers: app = approx_pattern_count(text, kmer, d) if app > 0: if kmer not in count_dict: count_dict[kmer] = 0 count_dict[kmer] += app if count_dict[kmer] > max_count: max_count = count_dict[kmer] result = [key for key in count_dict if count_dict[key] == max_count] return result, max_count if __name__ == '__main__': with open('in.txt', 'r') as f: text = f.readline().strip() k, d = map(int, f.readline().split()) substr_kmers = set(["".join(x) for x in window(text, k)]) kmers = set() for kmer in substr_kmers: kmers.update(set(generate_words_with_mismatches(kmer, d))) result, max_count = frequent_words_with_mismatch(text, kmers, d) with open('out.txt', 'w') as f: f.write(str(' '.join(result)))
def get_kmers(s, k): return list(set(["".join(x) for x in window(s, k)]))
def distance(kmer, string, k): result = k+2 for fragment in ["".join(x) for x in window(string, k)]: if hamming_distance(kmer, fragment) < result: result = hamming_distance(kmer, fragment) return result
import random from _01_02_frequent_words import window from _03_03_profile import profile_most_probable_kmer from _03_04_greedy import profile_most_probable_kmer_swap, score, profile from _03_05_pseudo import profile_with_pseudocounts if __name__ == '__main__': with open('in.txt', 'r') as f: k = int(f.readline()) dna = f.readline().strip() substr_kmers = set(["".join(x) for x in window(dna, k)]) result = sorted(substr_kmers) with open('out.txt', 'w') as f: f.write('\n'.join(sorted(substr_kmers)))
def common_subst(text1, text2): for k in range(min([len(text1), len(text2)]), 1, -1): for kmer in ["".join(x) for x in window(text1, k)]: if kmer in text2: return kmer
max_count = 0 visited_kmers = set() for kmer in kmers: app = approx_pattern_count(text, kmer, d) + approx_pattern_count(text, reverse_dna(kmer), d) if app > 0 and kmer not in visited_kmers: if kmer not in count_dict: count_dict[kmer] = 0 count_dict[kmer] += app if count_dict[kmer] > max_count: max_count = count_dict[kmer] result = [key for key in count_dict if count_dict[key] == max_count] return result, max_count if __name__ == '__main__': with open('in.txt', 'r') as f: text = f.readline().strip() k, d = map(int, f.readline().split()) substr_kmers = set(["".join(x) for x in window(text, k)]) kmers = set() for kmer in substr_kmers: kmers.update(set(generate_words_with_mismatches(kmer, d))) result, max_count = frequent_words_with_mismatch(text, kmers, d) with open('out.txt', 'w') as f: f.write(str(' '.join(result)))
def distance(kmer, string, k): result = k + 2 for fragment in ["".join(x) for x in window(string, k)]: if hamming_distance(kmer, fragment) < result: result = hamming_distance(kmer, fragment) return result