def approximateMatches(Pattern, Text, d): # Input: Strings Pattern and Text along with an integer d. # Output: All starting positions where Pattern appears as a substring of Text with at most d mismatches. positions = list() k = len(Pattern) for i in range(0, len(Text) - k + 1): if (hammingDistance(Pattern, Text[i:i + k]) <= d): positions.append(i) return listToString(positions)
def findClumps(Text, k, L, t): # Input: A string Genome, and integers k, L, and t. # Output: All distinct k-mers forming (L, t)-clumps in Genome. patterns = set() for i in range(0, len(Text) - L): textWindow = Text[i:i + L] freqMap = frequencyTable(textWindow, k) for key in freqMap: if freqMap.get(key) >= t: patterns.add(key) return listToString(list(patterns))
while True: profile = constructProfile(motifs, k, t) motifs = Motifs(Dna, k, profile) if score(motifs) < score(bestMotif): bestMotif = motifs else: return bestMotif def Motifs(Dna, k, profile): motifs = [] for seq in Dna.split("\n"): motifs.append(profileMostProbable(seq, k, profile)) return motifs def iteration_RMS(Dna, k, t, iter): bestMotifs = RandomizedMotifSearch(Dna, k, t) for i in range(0, iter - 1): motifs = RandomizedMotifSearch(Dna, k, t) if score(motifs) < score(bestMotifs): bestMotifs = motifs return bestMotifs if __name__ == "__main__": bestMotifs = iteration_RMS( "AGTCAACCCGGATCGCGGGTCGGTATGGCCACAGCACTCCGCGAACAGTCGGATAGGCAAGCACGGGGATCCGAACGCTCCAAGCGGCCATAAATTAGTTTCTTCTGGGTGCCATTAGAACTAGCCATCCTGCTAACCGCTCGGCGTAATGTGCACACTGCCTAAATTGCTTTAACGCATCCTATAAGTCAACCCGGATCG\nCGGGTCGGTATGGCCACAGCACTCCGCGAACAGTCGGATAGGCAAGCACGGGGATCCGAACGCTCCAAGCGGCCATATCGCTCCGGACCTCCAATTAGTTTCTTCTGGGTGCCATTAGAACTAGCCATCCTGCTAACCGCTCGGCGTAATGTGCACACTGCCTAAATTGCTTTAACGCATCCTATAAGTCAACCCGGATCG\nTGCGGGACTTGTAGATGGAATGCAGTTCTTGAGAGGTCCGTGCCTGGTTTTATCATTCCCGGGGAGCCCGATTGGGTGCCTGAGGCTTTCGGACTACAGGTTCCCCATAAACCACGAACCAATAGTAGCAGAAGCCAATAGCATCGAACTGGGCCCCCTTGCGAACGGACCTCCGCACGGGTGAGTATACTTCGGATACCC\nCGATATTGACACTGGTGACTCCCGCACCCTCATCGAGCGACCCCTGGGTTGCAGACATTAAGTTCTTGTTACACGTCATATTCATTTGGCTAAGGCTGCGGTAGTAAGCTACCCGGGACGATCTCAAAATCGAAAGTAACCTCCTTAGGGAAAACCTACTCACTCACCCACGGCCGTTCATCACTTTCGGACCATTCAGGT\nACTGTAGGCTTTTCAGGTTAAGACCACTACCCTTGGAAGCGCCAGGTGGGACCACCAATGAGATGATACATATGGGGCAGTTCTCTGGATGATTACACGCCACTCAGTCTAGCTTGTTTGCGCCAGTGAAAATTTGTCAACAAAGACGGCGATTGTAGGTCGAAACGGTAGTCCACTCGTGTTTCCATAAATGCCCTCGAA\nCATAGTTAGATCCACGTGCTGTTCCTAATAGAGACTTTACGCAATGCCGTATGGCTCTCTACGATGGGCTCAGGGGCATAGTTAAGTTACAAGTCAATTGAGATAGACTGGCGCAGACTTTGTGTCGAAACCTTCCTCCACTACGATGTTACCAACAGGGTTAGTGCAAGCATCCATCCATTATTTCACTCTGGCGGACTA\nAATGGCATGGTCCGCAGCCAAATATCGAGACACGAGTTGATACACCGTTCGATTAGCAACTCGAAAGTTACCTCCCTAGAGCGGCGTCGTCGGTTGATGTCATCTTTTCTCTGATCGCATTAATCTCGTGTAAAAAAATTAATGCATACTGAGGCACAGCTTCTTACAATCATGTGAGGTACTTCCGCCTGAATTCTGATG\nGCCTGTTGATAGGTCGGCTTTGTGTTGCCACTGCACAAGGAGGTAGCGTAGAGTGGCGTCACTTTCCAACTCGCGCTTTCAATGACCCCAACGTGCTGCAGATATGTGTAGAGTGAAATTACATAACGCCTTGCTATCCGTCCGTATAATTCCCAGTGGTTCGAAACGCTGCTCCAGGGATTAGGTGGCTAGCGAGGGCTT\nAAAGCTCTAAGATGTTCGCATGCGCAACGTGTAATTCAGGTACGCACGTCGAAACGGACCAAGTGACGATGGTTCACAACCGTTTGCCTAAGCGCGAGCAGTAACGTAAGCAAGTAGCGTGAACCAAGAGCATACCCTGTCTCTGGTTGAGACTATCGGCTTTGATAGAGCGTCTACAGTGCCACTCGTACATGTTCTCGA\nCAATAGCACTCGTTTTCCGATCGACCAGGACCTCCCCTTACCAGGCGGACTGTAATGAGCTGAACGGTTAAAAGGTCCAGTAGTAGCGTCGTGTCAATACCGGTCTTCTGACATCGGGGGAGGCCTGCGGAACAAAAAAAACAATATTATTTTGAATCGGGATCAGGGCACACACGAAATCATAGACACTACGTCTCGGCT\nGGTAGCATGCAAGACGGGGTCACCGTATGCAAGACAACTCACGTTAATAAAGGTCGAGCCAGCGGTTATAGGACACGAATTGAAGGTTTGTATAGACGGGCAAGCCTCATCCGTTGCCCATTGAGTCCCCTCGGGTCGACGAGGACCTCCTACACTGGATTTTTACCCAAGCCAAGACGAAAAACCTAGGCAAGTAACATT\nAGTTTTTGTATGCCTTTATTTGGGGGCGCAAAGGTTTGGATTTTAACTCAGGCGAGCTACGCCTACCGAAATTAAGCACCTGGTTAACGACGGGAAGAATATCTCACCTACAACTGACAAAGACCAGGCGCCACGTTCAGGTGACACGATTGGTAGGCGGTTCCTACCTTCCGATCGAAACGGACACACGATGTCTTGACA\nCCAGGCTGCTCCCAGTTATCGAGGCATCCTTGTGACTAGCGCCTCGAACGAGACCTCCTACCTATTGCTATATAAATCCAGCGTCGGATCTCACCCGAGACTCATCCCGTGCCTTCCCAGCCCTTGTTGGTCTAGTGCAGCTTAATTCTGTCGATAAAAGGGATGCTAGGTAAGTCATTCACGAATTTTCGTGTGTAGTAT\nACCTTGTAATATCTAATCGGCAGCTTCTCCAATACCATCGTCTCGGACCTCCAGTCGCTCGCTGCGTACGTACTCGTGTTCACACAGGGCGAACGCTGGCGGGCCGGGACGCTTCAGGGAGAGCGTTAGGCCATCAGCGTGACTCGGACGGTGACTATTGGATCTCACCCCCACTCAGGCGTCTCGGCACCCTCATAACCC\nCACTATCCTACCGTATCGGGCTCGAATATGCGCCATCCGAAGGGAGCGATTCAATAGGGACGGCCGGGTCGGCGCGCATATCTTGACTGCACGAAAATGAAACGGACCTCATATTCATGATCCACTGACTAGTAAATAGCTGCGAGGGCAGTTGTGGCAGGTCTTACAGGGTAAACGTTAACGAACTCCACTTTTCGTGGG\nTCACGCCCCTCGACGAAGTCGAAACGGAGACCCGTCCCACGCACATAGGAAGCCAGCGTCATCTGGCTTATGGGGCCGTAGGCTGGATGGTAGGCTCGACGTAGAGTGTCTCAATACAAGCTATTGGGCGCAGATTGATAAACCGCCCGCTTTTCAGACGTTAGTTACCTTCTGTGTGAGGATTGCAACATTACTTGCTAA\nACCCATCGCAGACCGAACTGGGGTTCTAGTACACGACGCCCACGCGATTTGTAGCGCACTTATTGGACTTGAGCCACTCAGCTCCCAGTCCATTCATATTTGTTACAATACGTCCCGTACCAGTGGAACCCCGTTTCCCCGACGGGCGTGCCCGGAGCATCTCGTAGAGGAAAAACGGACCTCCTGAAGGCGATTCGCCTG\nCTATATCCCTGGGTGCTTCACTCAGTCGAATGTGACCTCCAAGTAAAGAACCGGGATTACTTACCAACCTTAGCCGATAGCGATACTATAACGCTCCACCATCCTGACTGGCAACGCACCATTCCTCTTGCAACATGACGACCTGCCTTATTGGCCGATATTTAAGTCAAGAGACTCTGGTAAGTGCAGACGGTGTAAAAC\nGCTACGCCCGGGTGAGGAACGGGGTCCTCCACAGTTAACTATATTGCCAAATTCTTGGAGAAAGTCATAAACTTAAAATATCCATTGCAATACGAAGACCGTGGGGATGAACGCCGCGGGCTGTCTCTGTGTGGTGCTAAAACTCCCGGTGTAACCGATGCATGTTCTTTATCAGCACGGACCTCCCCATGATGCGCTCGC\nGGACATGATGGACTCAAGAGACTTCAGACGCCCTGGCCCCCGACTGCTTTTCTCGCCCGTGCGTAGCCCGCCGAAACGGACCTAAATAACCCCTTGACATACCAATGTACGATCTCGTCAATGTCCTTTTTACCCAACCCGATCAACAAGTCAGGGTCGAGCTACCGCCTGAAGTTGGCCACACACCGCAACCCAGACCGG", 15, 20, 1000) print(listToString(bestMotifs).replace(" ", "\n")) print(score(bestMotifs))
# Output: All integer(s) i minimizing Skewi (Genome) among all values of i (from 0 to |Genome|) minVal = 999 # Is it best to start from 0? skewness = 0 positions = list() skewValues = { "G": 1, "C": -1, "A": 0, "T": 0, "g": 1, "c": -1, "a": 0, "t": 0 } for i in range(0, len(Genome)): skewness = skewness + skewValues.get(Genome[i]) if (minVal > skewness): positions = list() minVal = skewness if (minVal == skewness): positions.append(i + 1) return (skewness, positions) if __name__ == "__main__": with open('dataset_369238_6.txt', 'r') as file: data = file.read().replace("\n", "") print(listToString(minimumSkew(data)[1]))
def profile_random_probability_kmer(string, k, profile): translation = {"A": 0, "C": 1, "G": 2, "T": 3} probabilities = {} for i in range(0, len(string) - k + 1): kmer = string[i:i + k] probabilities[kmer] = 1.0 for j in range(0, len(kmer)): probabilities[kmer] = probabilities[kmer] * profile[translation[ kmer[j]]][j] * 1.0 random_choice = random.choices(list(probabilities.keys()), weights=probabilities.values()) return string.join(random_choice) def iteration_GS(Dna, k, t, N, iter): bestMotifs = GibbsSampler(Dna, k, t, N) for i in range(0, iter - 1): motifs = GibbsSampler(Dna, k, t, N) if score(motifs) < score(bestMotifs): bestMotifs = motifs return bestMotifs if __name__ == "__main__": print( listToString( iteration_GS( "GCGCCCCGCCCGGACAGCCATGCGCTAACCCTGGCTTCGATGGCGCCGGCTCAGTTAGGGCCGGAAGTCCCCAATGTGGCAGACCTTTCGCCCCTGGCGGACGAATGACCCCAGTGGCCGGGACTTCAGGCCCTATCGGAGGGCTCCGGCGCGGTGGTCGGATTTGTCTGTGGAGGTTACACCCCAATCGCAAGGATGCATTATGACCAGCGAGCTGAGCCTGGTCGCCACTGGAAAGGGGAGCAACATC\nCCGATCGGCATCACTATCGGTCCTGCGGCCGCCCATAGCGCTATATCCGGCTGGTGAAATCAATTGACAACCTTCGACTTTGAGGTGGCCTACGGCGAGGACAAGCCAGGCAAGCCAGCTGCCTCAACGCGCGCCAGTACGGGTCCATCGACCCGCGGCCCACGGGTCAAACGACCCTAGTGTTCGCTACGACGTGGTCGTACCTTCGGCAGCAGATCAGCAATAGCACCCCGACTCGAGGAGGATCCCG\nACCGTCGATGTGCCCGGTCGCGCCGCGTCCACCTCGGTCATCGACCCCACGATGAGGACGCCATCGGCCGCGACCAAGCCCCGTGAAACTCTGACGGCGTGCTGGCCGGGCTGCGGCACCTGATCACCTTAGGGCACTTGGGCCACCACAACGGGCCGCCGGTCTCGACAGTGGCCACCACCACACAGGTGACTTCCGGCGGGACGTAAGTCCCTAACGCGTCGTTCCGCACGCGGTTAGCTTTGCTGCC\nGGGTCAGGTATATTTATCGCACACTTGGGCACATGACACACAAGCGCCAGAATCCCGGACCGAACCGAGCACCGTGGGTGGGCAGCCTCCATACAGCGATGACCTGATCGATCATCGGCCAGGGCGCCGGGCTTCCAACCGTGGCCGTCTCAGTACCCAGCCTCATTGACCCTTCGACGCATCCACTGCGCGTAAGTCGGCTCAACCCTTTCAAACCGCTGGATTACCGACCGCAGAAAGGGGGCAGGAC\nGTAGGTCAAACCGGGTGTACATACCCGCTCAATCGCCCAGCACTTCGGGCAGATCACCGGGTTTCCCCGGTATCACCAATACTGCCACCAAACACAGCAGGCGGGAAGGGGCGAAAGTCCCTTATCCGACAATAAAACTTCGCTTGTTCGACGCCCGGTTCACCCGATATGCACGGCGCCCAGCCATTCGTGACCGACGTCCCCAGCCCCAAGGCCGAACGACCCTAGGAGCCACGAGCAATTCACAGCG\nCCGCTGGCGACGCTGTTCGCCGGCAGCGTGCGTGACGACTTCGAGCTGCCCGACTACACCTGGTGACCACCGCCGACGGGCACCTCTCCGCCAGGTAGGCACGGTTTGTCGCCGGCAATGTGACCTTTGGGCGCGGTCTTGAGGACCTTCGGCCCCACCCACGAGGCCGCCGCCGGCCGATCGTATGACGTGCAATGTACGCCATAGGGTGCGTGTTACGGCGATTACCTGAAGGCGGCGGTGGTCCGGA\nGGCCAACTGCACCGCGCTCTTGATGACATCGGTGGTCACCATGGTGTCCGGCATGATCAACCTCCGCTGTTCGATATCACCCCGATCTTTCTGAACGGCGGTTGGCAGACAACAGGGTCAATGGTCCCCAAGTGGATCACCGACGGGCGCGGACAAATGGCCCGCGCTTCGGGGACTTCTGTCCCTAGCCCTGGCCACGATGGGCTGGTCGGATCAAAGGCATCCGTTTCCATCGATTAGGAGGCATCAA\nGTACATGTCCAGAGCGAGCCTCAGCTTCTGCGCAGCGACGGAAACTGCCACACTCAAAGCCTACTGGGCGCACGTGTGGCAACGAGTCGATCCACACGAAATGCCGCCGTTGGGCCGCGGACTAGCCGAATTTTCCGGGTGGTGACACAGCCCACATTTGGCATGGGACTTTCGGCCCTGTCCGCGTCCGTGTCGGCCAGACAAGCTTTGGGCATTGGCCACAATCGGGCCACAATCGAAAGCCGAGCAG\nGGCAGCTGTCGGCAACTGTAAGCCATTTCTGGGACTTTGCTGTGAAAAGCTGGGCGATGGTTGTGGACCTGGACGAGCCACCCGTGCGATAGGTGAGATTCATTCTCGCCCTGACGGGTTGCGTCTGTCATCGGTCGATAAGGACTAACGGCCCTCAGGTGGGGACCAACGCCCCTGGGAGATAGCGGTCCCCGCCAGTAACGTACCGCTGAACCGACGGGATGTATCCGCCCCAGCGAAGGAGACGGCG\nTCAGCACCATGACCGCCTGGCCACCAATCGCCCGTAACAAGCGGGACGTCCGCGACGACGCGTGCGCTAGCGCCGTGGCGGTGACAACGACCAGATATGGTCCGAGCACGCGGGCGAACCTCGTGTTCTGGCCTCGGCCAGTTGTGTAGAGCTCATCGCTGTCATCGAGCGATATCCGACCACTGATCCAAGTCGGGGGCTCTGGGGACCGAAGTCCCCGGGCTCGGAGCTATCGGACCTCACGATCACC", 15, 10, 2000, 20)).replace(" ", "\n"))
# 1.8 Some Hidden Messages are More Elusive than Others from Ex7 import hammingDistance from Ex4 import listToString from Ex2 import MaxMap from Ex10 import Neighbors def FrequentWordsWithMismatches(Text, k, d): # Input: A string Text as well as integers k and d. (You may assume k <= 12 and d <= 3.) # Output: All most frequent k-mers with up to d mismatches in Text. Patterns = list() freqMap = {} n = len(Text) for i in range(0, n-k): pattern = Text[i:i+k] neighborhood = list(Neighbors(pattern, d)) for j in range(0, len(neighborhood)-1): neighbor = neighborhood[j] if neighbor not in freqMap.keys(): freqMap[neighbor] = 1 else: freqMap[neighbor] = freqMap.get(neighbor) + 1 m = MaxMap(freqMap) for key in freqMap: if freqMap[key] == m: Patterns.append(key) return Patterns if __name__ == "__main__": print(listToString(FrequentWordsWithMismatches("GGTAATATCTTAAATCTTGGTAGGTAATAAATTAAAGGTAATAATAAATTGGTAAATTAAAATAGGTAAAAATAGGTAATAAAAATAAATTAAAAATTATAATAAAAATATCTTTCTTATAATAAAAAAATCTTGGTATCTTTCTTAAAAATTAAAATAAATTAATTTCTTAAATCTTATAGGTAGGTAGGTAAATTAATTAAAAAAGGTAATATCTTAAAAAAAAAAAATCTTTCTTTCTTGGTATCTTTCTTAATTGGTATCTTATAGGTAATATCTTTCTTGGTAATAAAAAATTTCTTTCTTGGTAATAGGTAAATTAAAAATTAATTATAAATTAAATCTTGGTAGGTAATATCTTTCTTAAATCTTAATT", 6, 3)))