def randomized_motif_search(dna, k, t): # Randomly generate k-mers from each sequence in the dna list. rand_ints = [randint(0, len(dna[0]) - k) for a in xrange(t)] motifs = [dna_list[i][r:r + k] for i, r in enumerate(rand_ints)] # Initialize the best score as a score higher than the highest possible score. best_score = [score(motifs), motifs] # Iterate motifs. while True: current_profile = profile_with_pseudocounts(motifs) motifs = motifs_from_profile(current_profile, dna_list, k) current_score = score(motifs) if current_score < best_score[0]: best_score = [current_score, motifs] else: return best_score
def randomized_motif_search(dna,k,t): # Randomly generate k-mers from each sequence in the dna list. rand_ints = [randint(0,len(dna[0])-k) for a in xrange(t)] motifs = [dna_list[i][r:r+k] for i,r in enumerate(rand_ints)] # Initialize the best score as a score higher than the highest possible score. best_score = [score(motifs), motifs] # Iterate motifs. while True: current_profile = profile_with_pseudocounts(motifs) motifs = motifs_from_profile(current_profile, dna_list, k) current_score = score(motifs) if current_score < best_score[0]: best_score = [current_score, motifs] else: return best_score
def gibbs_sampler(dna, k, t, N): # Randomly generate k-mers from each sequence in the dna list. rand_ints = [randint(0, len(dna[0]) - k) for a in xrange(t)] motifs = [dna_list[i][r:r + k] for i, r in enumerate(rand_ints)] # Initialize the best score as a score higher than the highest possible score. best_score = [score(motifs), motifs] # Iterate motifs. for i in xrange(N): r = randint(0, t - 1) current_profile = profile_with_pseudocounts( [motif for index, motif in enumerate(motifs) if index != r]) # print 'a: ', motifs motifs = [ profile_most_probable_kmer(dna[index], k, current_profile) if index == r else motif for index, motif in enumerate(motifs) ] # print 'b: ', motifs current_score = score(motifs) if current_score < best_score[0]: best_score = [current_score, motifs] return best_score
if __name__ == '__main__': with open('data/stepic_3e.txt') as input_data: k,t = map(int, input_data.readline().split()) dna_list = [line.strip() for line in input_data.readlines()] # Initialize the best score as a score higher than the highest possible score. best_score = [t*k, None] # Run the greedy motif search. for i in xrange(len(dna_list[0])-k+1): # Initialize the motifs as each k-mer from the first dna sequence. motifs = [dna_list[0][i:i+k]] current_profile = profile_with_pseudocounts(motifs) # Find the most probable k-mer in the next string, using pseudocounts. for j in xrange(1,t): motifs.append(profile_most_probable_kmer(dna_list[j],k,current_profile)) current_profile = profile_with_pseudocounts(motifs) # Check to see if we have a new best scoring list of motifs. current_score = score(motifs) if current_score < best_score[0]: best_score = [current_score, motifs] # Print and save the answer. print '\n'.join(best_score[1]) with open('output/Assignment_03E.txt', 'w') as output_data: output_data.write('\n'.join(best_score[1]))