def frequent_words_with_mismatches_by_sorting(text, k, d): frequent_patterns = [] neighborhoods = [] index = [] count = [] for i in range(0,len(text)-k +1): neighborhoods.append(neighbors(text[i:i+k],d)) neighborhoods = [item for sublist in neighborhoods for item in sublist] neighborhood_array = neighborhoods for i in range(0, len(neighborhoods)): index.append(0) count.append(0) for i in range(0,len(neighborhoods)): pattern = neighborhood_array[i] index[i] = p_to_n_modify(pattern) count[i] = 1 sorted_index = sorted(index) for i in range(0, neighborhoods.__len__()-1): if sorted_index[i] == sorted_index[i+1]: count[i+1] = count[i] +1 maxcount = max(count) for i in range(0, len(neighborhoods)): if count[i] == maxcount: pattern = n_to_p_modify(sorted_index[i], k) frequent_patterns.append(pattern) return frequent_patterns
def motif_enumeration(dna, k, d): patterns = [] for slice in dna: for i in range(0,len(slice)-k +1): neighborhood = set(neighbors(slice[i:i+k],d)) for pattern in neighborhood: count = 0 for slice_II in dna: if patterncount_with_mismatchs(slice_II,pattern,d) > 0: count += 1 if count >= len(dna): patterns.append(pattern) patterns = set(patterns) return patterns
def frequent_words_with_mismatches(text, k, d): frequent_patterns = [] close = [] frequent_array = [] for i in range(0, pow(4,k)): close.append(0) frequent_array.append(0) for i in range(0,len(text)-k+1): neighborhood = neighbors(text[i:i+k],d) for pattern in neighborhood: index = p_to_n_modify(pattern) close[index] = 1 for i in range(0, pow(4,k)): if close[i] == 1: pattern = n_to_p_modify(i,k) frequent_array[i] = len(approximate_pattern_matching(pattern,text,d)) max_count = max(frequent_array) for i in range(0,pow(4,k)): if frequent_array[i] == max_count: pattern = n_to_p_modify(i,k) frequent_patterns.append(pattern) return frequent_patterns