def iteration_RMS(Dna, k, t, iter): bestMotifs = RandomizedMotifSearch(Dna, k, t) for i in range(0, iter - 1): motifs = RandomizedMotifSearch(Dna, k, t) if score(motifs) < score(bestMotifs): bestMotifs = motifs return bestMotifs
def iteration_GS(Dna, k, t, N, iter): bestMotifs = GibbsSampler(Dna, k, t, N) for i in range(0, iter - 1): motifs = GibbsSampler(Dna, k, t, N) if score(motifs) < score(bestMotifs): bestMotifs = motifs return bestMotifs
def RandomizedMotifSearch(Dna, k, t): ''' :param Dna: collection of strings :param k: k-mer length :param t: length of DNA :return: t x k array of best motifs ''' motifs = [] for seq in Dna.split("\n"): rndIndex = rnd.randrange(0, len(seq) - k) motifs.append(seq[rndIndex:rndIndex + k]) bestMotif = motifs while True: profile = constructProfile(motifs, k, t) motifs = Motifs(Dna, k, profile) if score(motifs) < score(bestMotif): bestMotif = motifs else: return bestMotif
def GibbsSampler(Dna, k, t, N): ''' :param Dna: collection of strings :param k: k-mer length :param t: length of DNA :param N: number of runs :return: t x k array of best motifs ''' motifs = [] for seq in Dna.split("\n"): rndIndex = random.randrange(0, len(seq) - k) motifs.append(seq[rndIndex:rndIndex + k]) bestMotif = motifs for i in range(0, N): j = random.randrange(0, t) gMotifs = [motif for index, motif in enumerate(motifs) if index != j] profile = constructProfile(gMotifs, k, t) motifs[j] = profile_random_probability_kmer( Dna.split("\n")[j], k, profile) if score(motifs) < score(bestMotif): bestMotif = motifs return bestMotif
profile = constructProfile(motifs, k, t) motifs = Motifs(Dna, k, profile) if score(motifs) < score(bestMotif): bestMotif = motifs else: return bestMotif def Motifs(Dna, k, profile): motifs = [] for seq in Dna.split("\n"): motifs.append(profileMostProbable(seq, k, profile)) return motifs def iteration_RMS(Dna, k, t, iter): bestMotifs = RandomizedMotifSearch(Dna, k, t) for i in range(0, iter - 1): motifs = RandomizedMotifSearch(Dna, k, t) if score(motifs) < score(bestMotifs): bestMotifs = motifs return bestMotifs if __name__ == "__main__": bestMotifs = iteration_RMS( "AGTCAACCCGGATCGCGGGTCGGTATGGCCACAGCACTCCGCGAACAGTCGGATAGGCAAGCACGGGGATCCGAACGCTCCAAGCGGCCATAAATTAGTTTCTTCTGGGTGCCATTAGAACTAGCCATCCTGCTAACCGCTCGGCGTAATGTGCACACTGCCTAAATTGCTTTAACGCATCCTATAAGTCAACCCGGATCG\nCGGGTCGGTATGGCCACAGCACTCCGCGAACAGTCGGATAGGCAAGCACGGGGATCCGAACGCTCCAAGCGGCCATATCGCTCCGGACCTCCAATTAGTTTCTTCTGGGTGCCATTAGAACTAGCCATCCTGCTAACCGCTCGGCGTAATGTGCACACTGCCTAAATTGCTTTAACGCATCCTATAAGTCAACCCGGATCG\nTGCGGGACTTGTAGATGGAATGCAGTTCTTGAGAGGTCCGTGCCTGGTTTTATCATTCCCGGGGAGCCCGATTGGGTGCCTGAGGCTTTCGGACTACAGGTTCCCCATAAACCACGAACCAATAGTAGCAGAAGCCAATAGCATCGAACTGGGCCCCCTTGCGAACGGACCTCCGCACGGGTGAGTATACTTCGGATACCC\nCGATATTGACACTGGTGACTCCCGCACCCTCATCGAGCGACCCCTGGGTTGCAGACATTAAGTTCTTGTTACACGTCATATTCATTTGGCTAAGGCTGCGGTAGTAAGCTACCCGGGACGATCTCAAAATCGAAAGTAACCTCCTTAGGGAAAACCTACTCACTCACCCACGGCCGTTCATCACTTTCGGACCATTCAGGT\nACTGTAGGCTTTTCAGGTTAAGACCACTACCCTTGGAAGCGCCAGGTGGGACCACCAATGAGATGATACATATGGGGCAGTTCTCTGGATGATTACACGCCACTCAGTCTAGCTTGTTTGCGCCAGTGAAAATTTGTCAACAAAGACGGCGATTGTAGGTCGAAACGGTAGTCCACTCGTGTTTCCATAAATGCCCTCGAA\nCATAGTTAGATCCACGTGCTGTTCCTAATAGAGACTTTACGCAATGCCGTATGGCTCTCTACGATGGGCTCAGGGGCATAGTTAAGTTACAAGTCAATTGAGATAGACTGGCGCAGACTTTGTGTCGAAACCTTCCTCCACTACGATGTTACCAACAGGGTTAGTGCAAGCATCCATCCATTATTTCACTCTGGCGGACTA\nAATGGCATGGTCCGCAGCCAAATATCGAGACACGAGTTGATACACCGTTCGATTAGCAACTCGAAAGTTACCTCCCTAGAGCGGCGTCGTCGGTTGATGTCATCTTTTCTCTGATCGCATTAATCTCGTGTAAAAAAATTAATGCATACTGAGGCACAGCTTCTTACAATCATGTGAGGTACTTCCGCCTGAATTCTGATG\nGCCTGTTGATAGGTCGGCTTTGTGTTGCCACTGCACAAGGAGGTAGCGTAGAGTGGCGTCACTTTCCAACTCGCGCTTTCAATGACCCCAACGTGCTGCAGATATGTGTAGAGTGAAATTACATAACGCCTTGCTATCCGTCCGTATAATTCCCAGTGGTTCGAAACGCTGCTCCAGGGATTAGGTGGCTAGCGAGGGCTT\nAAAGCTCTAAGATGTTCGCATGCGCAACGTGTAATTCAGGTACGCACGTCGAAACGGACCAAGTGACGATGGTTCACAACCGTTTGCCTAAGCGCGAGCAGTAACGTAAGCAAGTAGCGTGAACCAAGAGCATACCCTGTCTCTGGTTGAGACTATCGGCTTTGATAGAGCGTCTACAGTGCCACTCGTACATGTTCTCGA\nCAATAGCACTCGTTTTCCGATCGACCAGGACCTCCCCTTACCAGGCGGACTGTAATGAGCTGAACGGTTAAAAGGTCCAGTAGTAGCGTCGTGTCAATACCGGTCTTCTGACATCGGGGGAGGCCTGCGGAACAAAAAAAACAATATTATTTTGAATCGGGATCAGGGCACACACGAAATCATAGACACTACGTCTCGGCT\nGGTAGCATGCAAGACGGGGTCACCGTATGCAAGACAACTCACGTTAATAAAGGTCGAGCCAGCGGTTATAGGACACGAATTGAAGGTTTGTATAGACGGGCAAGCCTCATCCGTTGCCCATTGAGTCCCCTCGGGTCGACGAGGACCTCCTACACTGGATTTTTACCCAAGCCAAGACGAAAAACCTAGGCAAGTAACATT\nAGTTTTTGTATGCCTTTATTTGGGGGCGCAAAGGTTTGGATTTTAACTCAGGCGAGCTACGCCTACCGAAATTAAGCACCTGGTTAACGACGGGAAGAATATCTCACCTACAACTGACAAAGACCAGGCGCCACGTTCAGGTGACACGATTGGTAGGCGGTTCCTACCTTCCGATCGAAACGGACACACGATGTCTTGACA\nCCAGGCTGCTCCCAGTTATCGAGGCATCCTTGTGACTAGCGCCTCGAACGAGACCTCCTACCTATTGCTATATAAATCCAGCGTCGGATCTCACCCGAGACTCATCCCGTGCCTTCCCAGCCCTTGTTGGTCTAGTGCAGCTTAATTCTGTCGATAAAAGGGATGCTAGGTAAGTCATTCACGAATTTTCGTGTGTAGTAT\nACCTTGTAATATCTAATCGGCAGCTTCTCCAATACCATCGTCTCGGACCTCCAGTCGCTCGCTGCGTACGTACTCGTGTTCACACAGGGCGAACGCTGGCGGGCCGGGACGCTTCAGGGAGAGCGTTAGGCCATCAGCGTGACTCGGACGGTGACTATTGGATCTCACCCCCACTCAGGCGTCTCGGCACCCTCATAACCC\nCACTATCCTACCGTATCGGGCTCGAATATGCGCCATCCGAAGGGAGCGATTCAATAGGGACGGCCGGGTCGGCGCGCATATCTTGACTGCACGAAAATGAAACGGACCTCATATTCATGATCCACTGACTAGTAAATAGCTGCGAGGGCAGTTGTGGCAGGTCTTACAGGGTAAACGTTAACGAACTCCACTTTTCGTGGG\nTCACGCCCCTCGACGAAGTCGAAACGGAGACCCGTCCCACGCACATAGGAAGCCAGCGTCATCTGGCTTATGGGGCCGTAGGCTGGATGGTAGGCTCGACGTAGAGTGTCTCAATACAAGCTATTGGGCGCAGATTGATAAACCGCCCGCTTTTCAGACGTTAGTTACCTTCTGTGTGAGGATTGCAACATTACTTGCTAA\nACCCATCGCAGACCGAACTGGGGTTCTAGTACACGACGCCCACGCGATTTGTAGCGCACTTATTGGACTTGAGCCACTCAGCTCCCAGTCCATTCATATTTGTTACAATACGTCCCGTACCAGTGGAACCCCGTTTCCCCGACGGGCGTGCCCGGAGCATCTCGTAGAGGAAAAACGGACCTCCTGAAGGCGATTCGCCTG\nCTATATCCCTGGGTGCTTCACTCAGTCGAATGTGACCTCCAAGTAAAGAACCGGGATTACTTACCAACCTTAGCCGATAGCGATACTATAACGCTCCACCATCCTGACTGGCAACGCACCATTCCTCTTGCAACATGACGACCTGCCTTATTGGCCGATATTTAAGTCAAGAGACTCTGGTAAGTGCAGACGGTGTAAAAC\nGCTACGCCCGGGTGAGGAACGGGGTCCTCCACAGTTAACTATATTGCCAAATTCTTGGAGAAAGTCATAAACTTAAAATATCCATTGCAATACGAAGACCGTGGGGATGAACGCCGCGGGCTGTCTCTGTGTGGTGCTAAAACTCCCGGTGTAACCGATGCATGTTCTTTATCAGCACGGACCTCCCCATGATGCGCTCGC\nGGACATGATGGACTCAAGAGACTTCAGACGCCCTGGCCCCCGACTGCTTTTCTCGCCCGTGCGTAGCCCGCCGAAACGGACCTAAATAACCCCTTGACATACCAATGTACGATCTCGTCAATGTCCTTTTTACCCAACCCGATCAACAAGTCAGGGTCGAGCTACCGCCTGAAGTTGGCCACACACCGCAACCCAGACCGG", 15, 20, 1000) print(listToString(bestMotifs).replace(" ", "\n")) print(score(bestMotifs))