Esempio n. 1
0
def randomized_motif_search(dna, k, t):
    motifs = []
    for seq in dna:
        rand_pos = random.randint(0, len(seq) - k)
        motifs.append(seq[rand_pos:rand_pos + k])

    best = motifs
    score_best = score(best)

    while True:
        profile = profile_matrix(motifs, k)
        motifs = [profile_most_probable(seq, k, profile) for seq in dna]

        score_motifs = score(motifs)
        if score_motifs < score_best:
            best = motifs
            score_best = score_motifs
        else:
            return best, score_best
Esempio n. 2
0
def greedy_motif_search_with_pseudocounts(dna, k, t):
    best = [t[:k] for t in dna]
    score_best = score(best)
    first = dna[0]

    for i in range(len(first) - k + 1):
        motif = first[i : i + k]
        motifs = [motif]
        for j in range(1, t):
            current = dna[j]
            profile = profile_matrix(motifs, k)
            motifs_j = profile_most_probable(current, k, profile)
            motifs.append(motifs_j)

        score_motifs = score(motifs)
        if score_motifs < score_best:
            best = motifs
            score_best = score_motifs

    return best
Esempio n. 3
0
def gibbs_sampler(dna, k, t, N):
    motifs = []
    for seq in dna:
        rand_pos = random.randint(0, len(seq) - k)
        motifs.append(seq[rand_pos:rand_pos + k])

    best = motifs
    score_best = score(best)

    for j in range(N):
        i = random.randint(0, t - 1)

        motif_subset = motifs[:]
        motif_subset.pop(i)

        profile = profile_matrix(motif_subset, k)
        motifs[i] = profile_most_probable(dna[i], k, profile)

        score_motifs = score(motifs)
        if score_motifs < score_best:
            best = motifs
            score_best = score_motifs

    return best, score_best
Esempio n. 4
0
        else:
            return best, score_best


def repeated_randomized_motif_search(dna, k, t, *, times=1000):
    best, score_best = randomized_motif_search(dna, k, t)
    for _ in range(times - 1):
        new_best, new_score = randomized_motif_search(dna, k, t)
        if new_score < score_best:
            best = new_best
            score_best = new_score

    return best


result = repeated_randomized_motif_search(
    "CGCCCCTCTCGGGGGTGTTCAGTAAACGGCCA GGGCGAGGTATGTGTAAGTGCCAAGGTGCCAG TAGTACCGAGACCGAAAGAAGTATACAGGCGT TAGATCAAGTTTCAGGTGCACGTCGGTGAACC AATCCACCAGCTCCACGTGCAATGTTGGCCTA"
    .split(),
    8,
    5,
)
assert (
    score(result) <=
    score("TCTCGGGG CCAAGGTG TACAGGCG TTCAGGTG TCCACGTG".split()) + 1), result

if __name__ == "__main__":
    with open("data/rosalind_ba2f.txt", "r") as dataset:
        k, t = [int(d) for d in dataset.readline().rstrip().split()]
        text = [l.rstrip() for l in dataset.readlines()]
    print(*repeated_randomized_motif_search(text, k, t), sep="\n")