예제 #1
0
def iteration_RMS(Dna, k, t, iter):
    bestMotifs = RandomizedMotifSearch(Dna, k, t)
    for i in range(0, iter - 1):
        motifs = RandomizedMotifSearch(Dna, k, t)
        if score(motifs) < score(bestMotifs):
            bestMotifs = motifs
    return bestMotifs
예제 #2
0
def iteration_GS(Dna, k, t, N, iter):
    bestMotifs = GibbsSampler(Dna, k, t, N)
    for i in range(0, iter - 1):
        motifs = GibbsSampler(Dna, k, t, N)
        if score(motifs) < score(bestMotifs):
            bestMotifs = motifs
    return bestMotifs
예제 #3
0
def RandomizedMotifSearch(Dna, k, t):
    '''
    :param Dna: collection of strings
    :param k: k-mer length
    :param t: length of DNA
    :return: t x k array of best motifs
    '''
    motifs = []
    for seq in Dna.split("\n"):
        rndIndex = rnd.randrange(0, len(seq) - k)
        motifs.append(seq[rndIndex:rndIndex + k])
    bestMotif = motifs
    while True:
        profile = constructProfile(motifs, k, t)
        motifs = Motifs(Dna, k, profile)
        if score(motifs) < score(bestMotif):
            bestMotif = motifs
        else:
            return bestMotif
예제 #4
0
def GibbsSampler(Dna, k, t, N):
    '''
    :param Dna: collection of strings
    :param k: k-mer length
    :param t: length of DNA
    :param N: number of runs
    :return: t x k array of best motifs
    '''
    motifs = []
    for seq in Dna.split("\n"):
        rndIndex = random.randrange(0, len(seq) - k)
        motifs.append(seq[rndIndex:rndIndex + k])
    bestMotif = motifs
    for i in range(0, N):
        j = random.randrange(0, t)
        gMotifs = [motif for index, motif in enumerate(motifs) if index != j]
        profile = constructProfile(gMotifs, k, t)
        motifs[j] = profile_random_probability_kmer(
            Dna.split("\n")[j], k, profile)
        if score(motifs) < score(bestMotif):
            bestMotif = motifs
    return bestMotif
예제 #5
0
        profile = constructProfile(motifs, k, t)
        motifs = Motifs(Dna, k, profile)
        if score(motifs) < score(bestMotif):
            bestMotif = motifs
        else:
            return bestMotif


def Motifs(Dna, k, profile):
    motifs = []
    for seq in Dna.split("\n"):
        motifs.append(profileMostProbable(seq, k, profile))
    return motifs


def iteration_RMS(Dna, k, t, iter):
    bestMotifs = RandomizedMotifSearch(Dna, k, t)
    for i in range(0, iter - 1):
        motifs = RandomizedMotifSearch(Dna, k, t)
        if score(motifs) < score(bestMotifs):
            bestMotifs = motifs
    return bestMotifs


if __name__ == "__main__":
    bestMotifs = iteration_RMS(
        "AGTCAACCCGGATCGCGGGTCGGTATGGCCACAGCACTCCGCGAACAGTCGGATAGGCAAGCACGGGGATCCGAACGCTCCAAGCGGCCATAAATTAGTTTCTTCTGGGTGCCATTAGAACTAGCCATCCTGCTAACCGCTCGGCGTAATGTGCACACTGCCTAAATTGCTTTAACGCATCCTATAAGTCAACCCGGATCG\nCGGGTCGGTATGGCCACAGCACTCCGCGAACAGTCGGATAGGCAAGCACGGGGATCCGAACGCTCCAAGCGGCCATATCGCTCCGGACCTCCAATTAGTTTCTTCTGGGTGCCATTAGAACTAGCCATCCTGCTAACCGCTCGGCGTAATGTGCACACTGCCTAAATTGCTTTAACGCATCCTATAAGTCAACCCGGATCG\nTGCGGGACTTGTAGATGGAATGCAGTTCTTGAGAGGTCCGTGCCTGGTTTTATCATTCCCGGGGAGCCCGATTGGGTGCCTGAGGCTTTCGGACTACAGGTTCCCCATAAACCACGAACCAATAGTAGCAGAAGCCAATAGCATCGAACTGGGCCCCCTTGCGAACGGACCTCCGCACGGGTGAGTATACTTCGGATACCC\nCGATATTGACACTGGTGACTCCCGCACCCTCATCGAGCGACCCCTGGGTTGCAGACATTAAGTTCTTGTTACACGTCATATTCATTTGGCTAAGGCTGCGGTAGTAAGCTACCCGGGACGATCTCAAAATCGAAAGTAACCTCCTTAGGGAAAACCTACTCACTCACCCACGGCCGTTCATCACTTTCGGACCATTCAGGT\nACTGTAGGCTTTTCAGGTTAAGACCACTACCCTTGGAAGCGCCAGGTGGGACCACCAATGAGATGATACATATGGGGCAGTTCTCTGGATGATTACACGCCACTCAGTCTAGCTTGTTTGCGCCAGTGAAAATTTGTCAACAAAGACGGCGATTGTAGGTCGAAACGGTAGTCCACTCGTGTTTCCATAAATGCCCTCGAA\nCATAGTTAGATCCACGTGCTGTTCCTAATAGAGACTTTACGCAATGCCGTATGGCTCTCTACGATGGGCTCAGGGGCATAGTTAAGTTACAAGTCAATTGAGATAGACTGGCGCAGACTTTGTGTCGAAACCTTCCTCCACTACGATGTTACCAACAGGGTTAGTGCAAGCATCCATCCATTATTTCACTCTGGCGGACTA\nAATGGCATGGTCCGCAGCCAAATATCGAGACACGAGTTGATACACCGTTCGATTAGCAACTCGAAAGTTACCTCCCTAGAGCGGCGTCGTCGGTTGATGTCATCTTTTCTCTGATCGCATTAATCTCGTGTAAAAAAATTAATGCATACTGAGGCACAGCTTCTTACAATCATGTGAGGTACTTCCGCCTGAATTCTGATG\nGCCTGTTGATAGGTCGGCTTTGTGTTGCCACTGCACAAGGAGGTAGCGTAGAGTGGCGTCACTTTCCAACTCGCGCTTTCAATGACCCCAACGTGCTGCAGATATGTGTAGAGTGAAATTACATAACGCCTTGCTATCCGTCCGTATAATTCCCAGTGGTTCGAAACGCTGCTCCAGGGATTAGGTGGCTAGCGAGGGCTT\nAAAGCTCTAAGATGTTCGCATGCGCAACGTGTAATTCAGGTACGCACGTCGAAACGGACCAAGTGACGATGGTTCACAACCGTTTGCCTAAGCGCGAGCAGTAACGTAAGCAAGTAGCGTGAACCAAGAGCATACCCTGTCTCTGGTTGAGACTATCGGCTTTGATAGAGCGTCTACAGTGCCACTCGTACATGTTCTCGA\nCAATAGCACTCGTTTTCCGATCGACCAGGACCTCCCCTTACCAGGCGGACTGTAATGAGCTGAACGGTTAAAAGGTCCAGTAGTAGCGTCGTGTCAATACCGGTCTTCTGACATCGGGGGAGGCCTGCGGAACAAAAAAAACAATATTATTTTGAATCGGGATCAGGGCACACACGAAATCATAGACACTACGTCTCGGCT\nGGTAGCATGCAAGACGGGGTCACCGTATGCAAGACAACTCACGTTAATAAAGGTCGAGCCAGCGGTTATAGGACACGAATTGAAGGTTTGTATAGACGGGCAAGCCTCATCCGTTGCCCATTGAGTCCCCTCGGGTCGACGAGGACCTCCTACACTGGATTTTTACCCAAGCCAAGACGAAAAACCTAGGCAAGTAACATT\nAGTTTTTGTATGCCTTTATTTGGGGGCGCAAAGGTTTGGATTTTAACTCAGGCGAGCTACGCCTACCGAAATTAAGCACCTGGTTAACGACGGGAAGAATATCTCACCTACAACTGACAAAGACCAGGCGCCACGTTCAGGTGACACGATTGGTAGGCGGTTCCTACCTTCCGATCGAAACGGACACACGATGTCTTGACA\nCCAGGCTGCTCCCAGTTATCGAGGCATCCTTGTGACTAGCGCCTCGAACGAGACCTCCTACCTATTGCTATATAAATCCAGCGTCGGATCTCACCCGAGACTCATCCCGTGCCTTCCCAGCCCTTGTTGGTCTAGTGCAGCTTAATTCTGTCGATAAAAGGGATGCTAGGTAAGTCATTCACGAATTTTCGTGTGTAGTAT\nACCTTGTAATATCTAATCGGCAGCTTCTCCAATACCATCGTCTCGGACCTCCAGTCGCTCGCTGCGTACGTACTCGTGTTCACACAGGGCGAACGCTGGCGGGCCGGGACGCTTCAGGGAGAGCGTTAGGCCATCAGCGTGACTCGGACGGTGACTATTGGATCTCACCCCCACTCAGGCGTCTCGGCACCCTCATAACCC\nCACTATCCTACCGTATCGGGCTCGAATATGCGCCATCCGAAGGGAGCGATTCAATAGGGACGGCCGGGTCGGCGCGCATATCTTGACTGCACGAAAATGAAACGGACCTCATATTCATGATCCACTGACTAGTAAATAGCTGCGAGGGCAGTTGTGGCAGGTCTTACAGGGTAAACGTTAACGAACTCCACTTTTCGTGGG\nTCACGCCCCTCGACGAAGTCGAAACGGAGACCCGTCCCACGCACATAGGAAGCCAGCGTCATCTGGCTTATGGGGCCGTAGGCTGGATGGTAGGCTCGACGTAGAGTGTCTCAATACAAGCTATTGGGCGCAGATTGATAAACCGCCCGCTTTTCAGACGTTAGTTACCTTCTGTGTGAGGATTGCAACATTACTTGCTAA\nACCCATCGCAGACCGAACTGGGGTTCTAGTACACGACGCCCACGCGATTTGTAGCGCACTTATTGGACTTGAGCCACTCAGCTCCCAGTCCATTCATATTTGTTACAATACGTCCCGTACCAGTGGAACCCCGTTTCCCCGACGGGCGTGCCCGGAGCATCTCGTAGAGGAAAAACGGACCTCCTGAAGGCGATTCGCCTG\nCTATATCCCTGGGTGCTTCACTCAGTCGAATGTGACCTCCAAGTAAAGAACCGGGATTACTTACCAACCTTAGCCGATAGCGATACTATAACGCTCCACCATCCTGACTGGCAACGCACCATTCCTCTTGCAACATGACGACCTGCCTTATTGGCCGATATTTAAGTCAAGAGACTCTGGTAAGTGCAGACGGTGTAAAAC\nGCTACGCCCGGGTGAGGAACGGGGTCCTCCACAGTTAACTATATTGCCAAATTCTTGGAGAAAGTCATAAACTTAAAATATCCATTGCAATACGAAGACCGTGGGGATGAACGCCGCGGGCTGTCTCTGTGTGGTGCTAAAACTCCCGGTGTAACCGATGCATGTTCTTTATCAGCACGGACCTCCCCATGATGCGCTCGC\nGGACATGATGGACTCAAGAGACTTCAGACGCCCTGGCCCCCGACTGCTTTTCTCGCCCGTGCGTAGCCCGCCGAAACGGACCTAAATAACCCCTTGACATACCAATGTACGATCTCGTCAATGTCCTTTTTACCCAACCCGATCAACAAGTCAGGGTCGAGCTACCGCCTGAAGTTGGCCACACACCGCAACCCAGACCGG",
        15, 20, 1000)
    print(listToString(bestMotifs).replace(" ", "\n"))
    print(score(bestMotifs))