def main(): seqs = list(parse_fasta('problem_datasets/rosalind_long.txt').values()) answer = getContig(seqs) print('Shortest superstring is %i nucleotides long.' % len(answer)) with open('output/rosalind_long_out.txt', 'w') as f: f.write(answer)
def main(): seqs = list(parse_fasta('problem_datasets/rosalind_long.txt').values()) answer = getContig(seqs) print('Shortest superstring is %i nucleotides long.' % len(answer)) with open('output/rosalind_long_out.txt', 'w') as f: f.write(answer)
def main(): strings = parse_fasta('problem_datasets/rosalind_pdst.txt') matrix = distance_matrix(strings) with open('output/rosalind_pdst_out.txt', 'w') as outfile: for line in matrix: outfile.write(' '.join(map(str, line))+'\n')
def main(): rna = parse_fasta('problem_datasets/rosalind_pmch.txt') perfect = factorial(rna.count('A')) * factorial(rna.count('C')) print(perfect) with open('output/rosalind_pmch_out.txt', 'w') as outfile: outfile.write(str(perfect))
def main(): s, t = parse_fasta('problem_datasets/rosalind_loca.txt') alignment = alignment_score(s, t, PAM250(), -5) with open('output/rosalind_loca_out.txt', 'w') as outfile: outfile.write('\n'.join(alignment)) print('Maximum alignment score =', alignment[0])
def main(): s, t = parse_fasta('problem_datasets/rosalind_edta.txt', 'seq') aligned = edit_dist_with_align(s, t) with open('output/rosalind_edta_out.txt', 'w') as outfile: outfile.write('\n'.join(aligned)) print('Edit distance =', aligned[0])
def main(): seq = parse_fasta('problem_datasets/rosalind_orf.txt') peptides = raw_translate(seq) orfs = find_orfs(peptides) with open('output/rosalind_orf_out.txt', 'w') as outfile: outfile.write('\n'.join(orfs))
def main(): s, t = parse_fasta('problem_datasets/rosalind_smgb.txt', True) alignment = semiglobal_align(s, t) with open('output/rosalind_smgb_out.txt', 'w') as outfile: outfile.write('\n'.join(alignment)) print('Maximum alignment score =', alignment[0])
def main(): s, t = parse_fasta('problem_datasets/rosalind_lcsq.txt') seq = longest_sub(s, t) with open('output/rosalind_lcsq_out.txt', 'w') as outfile: outfile.write(seq) print('The longest common subsequence is', len(seq), 'bases long.')
def main(): s, t = parse_fasta("problem_datasets/rosalind_loca.txt", True) alignment = alignment_score(s, t, PAM250(), -5) with open("output/rosalind_loca_out.txt", "w") as outfile: outfile.write("\n".join(alignment)) print("Maximum alignment score =", alignment[0])
def main(): s, t = parse_fasta('problem_datasets/rosalind_smgb.txt', True) alignment = semiglobal_align(s, t) with open('output/rosalind_smgb_out.txt', 'w') as outfile: outfile.write('\n'.join(alignment)) print('Maximum alignment score =', alignment[0])
def main(): s, t = parse_fasta('problem_datasets/rosalind_gaff.txt', True) alignment = global_align_with_affine(s, t, BLOSUM62(), -11, -1) with open('output/rosalind_gaff_out.txt', 'w') as f: f.write('\n'.join(alignment)) print('Maximum alignment score =', alignment[0])
def main(): rna = parse_fasta('problem_datasets/rosalind_pmch.txt') perfect = factorial(rna.count('A')) * factorial(rna.count('C')) print(perfect) with open('output/rosalind_pmch_out.txt', 'w') as outfile: outfile.write(str(perfect))
def main(): s, t = parse_fasta('problem_datasets/rosalind_edta.txt') aligned = edit_dist_with_align(s, t) with open('output/rosalind_edta_out.txt', 'w') as outfile: outfile.write('\n'.join(map(str, aligned))) print('Edit distance =', aligned[0])
def main(): s, t = parse_fasta('problem_datasets/rosalind_lcsq.txt') seq = longest_sub(s, t) with open('output/rosalind_lcsq_out.txt', 'w') as outfile: outfile.write(seq) print('The longest common subsequence is', len(seq), 'bases long.')
def main(): s, t = parse_fasta('problem_datasets/rosalind_laff.txt', True) alignment = local_align_with_affine(s, t, BLOSUM62(), -11, -1) with open('output/rosalind_laff_out.txt', 'w') as outfile: outfile.write('\n'.join(alignment)) print('Maximum alignment score =', alignment[0])
def main(): ''' The input file for this problem contains two FASTA sequences, which can be split into seperate sequences based on the position of the header lines. ''' s, t = parse_fasta('problem_datasets/rosalind_sseq.txt') pos = find_subsequence(s, t) print(' '.join(pos))
def main(): strings = list(parse_fasta('problem_datasets/rosalind_corr.txt').values()) strings += [rev_comp(i) for i in strings] corr = error_correct(strings) with open('output/rosalind_corr_out.txt', 'w') as outfile: for i in corr: outfile.write('->'.join(i) + '\n')
def main(filename): dat = parse_fasta(filename) if len(dat.values()) > 2: print "More than two sequences in input file, " \ "only calculting edit distance between" \ " two sequences" # need to clarify message as parse_fasta retuns a dict, # not just first two seqs print calc_edit_distance(dat.values()[0], dat.values()[1])
def main(): sequences = parse_fasta('problem_datasets/rosalind_lcsm.txt') answer = longest_motif(sequences) if answer != None: print(answer) else: print('No common substring found.')
def main(): sequences = parse_fasta('rosalind_cons.txt') profile = profile_matrix(sequences) consensus = consensus_seq(profile) with open('rosalind_cons_out.txt', 'w') as outfile: outfile.write(consensus + '\n') for line in format_profile(profile): outfile.write(line + '\n')
def main(): ''' The input file for this problem contains two FASTA sequences, which can be split into seperate sequences based on the position of the header lines. ''' s, t = list(parse_fasta('problem_datasets/rosalind_sseq.txt').values())[:2] pos = findSubSeq(s, t) print(' '.join(pos))
def main(): sequences = list(parse_fasta('problem_datasets/rosalind_cons.txt').values()) profile = profile_matrix(sequences) consensus = consensus_seq(profile) with open('output/rosalind_cons_out.txt', 'w') as outfile: outfile.write(consensus + '\n') for line in format_profile(profile): outfile.write(line + '\n')
def main(filename): dat = parse_fasta(filename) if len(dat.values()) > 2: print "More than two sequences in input file, " \ "only calculting edit distance between" \ " two sequences" # need to clarify message as parse_fasta retuns a dict, # not just first two seqs print calc_edit_distance(dat.values()[0], dat.values()[1])
def main(): sequences = parse_fasta('problem_datasets/rosalind_cons.txt') profile = profile_matrix(sequences) consensus = consensus_seq(profile) with open('output/rosalind_cons_out.txt', 'w') as outfile: outfile.write(consensus + '\n') for line in format_profile(profile): outfile.write(line + '\n')
def main(): strings = parse_fasta('problem_datasets/rosalind_corr.txt') strings += [rev_comp(i) for i in strings] corr = error_correct(strings) with open('output/rosalind_corr_out.txt', 'w') as outfile: for i in corr: outfile.write('->'.join(i) + '\n')
def main(): # Get the collection of sequences. #seqs = ['ATATCCG', 'TCCG', 'ATGTACTG', 'ATGTCTG'] seqs = parse_fasta('problem_datasets/rosalind_mult.txt') # Create two arrays to keep track of which sequences are already aligned. alignment = ['' for i in seqs] remaining = [i for i in range(len(seqs))] # Start by aligning the two most similar sequences. scores = {} for i in range(len(seqs)): for j in range(len(seqs)-1, i, -1): scores[(i, j)] = alignment_score(seqs[i], seqs[j]) a, b = max(scores) max_score, matrix = scores[(a, b)] alignment[a], alignment[b] = align_sequences(seqs[a], seqs[b], matrix) remaining.remove(a) remaining.remove(b) # Pick the sequence that aligned best to one of the already aligned # sequences and align it to the set; repeat until all sequences are # aligned. while len(remaining) > 0: scores = {} i = remaining[0] for j in range(len(alignment)): if alignment[j] != '': scores[j] = alignment_score(seqs[i], alignment[j]) best = max(scores) best_score, matrix = scores[best] max_score += best_score alignment[i], alignment[j] = align_sequences(seqs[i], alignment[best], matrix) remaining.remove(i) # Calulate the maxumum score max_score = 0 for i in range(len(alignment)): for j in range(len(alignment)-1, i, -1): max_score += alignment_score(alignment[i], alignment[j])[0] # Output the answer. with open('output/rosalind_mult_out.txt', 'w') as outfile: outfile.write(str(max_score) + '\n') outfile.write('\n'.join(alignment)) print('-'*37 + 'ANSWER' + '-'*37) with open('output/rosalind_mult_out.txt', 'r') as answer: print(answer.read())
def main(): sequences = list(parse_fasta('problem_datasets/rosalind_splc.txt').values()) rna = max(sequences, key=len) introns = [i for i in sequences if i != rna] spliced = splice_RNA(rna, introns) peptide = translate(spliced) with open('output/rosalind_splc_out.txt', 'w') as outfile: outfile.write(peptide)
def main(): s, t = parse_fasta('problem_datasets/rosalind_gap.txt') alignment = semiglobal_align(s, t) with open('output/rosalind_gap_out.txt', 'w') as outfile: outfile.write('\n'.join(alignment)) print('-'*37 + 'ANSWER' + '-'*37) with open('output/rosalind_gap_out.txt', 'r') as answer: print(answer.read())
def main(): sequences = list( parse_fasta('problem_datasets/rosalind_splc.txt').values()) rna = max(sequences, key=len) introns = [i for i in sequences if i != rna] spliced = splice_RNA(rna, introns) peptide = translate(spliced) with open('output/rosalind_splc_out.txt', 'w') as outfile: outfile.write(peptide)
def main(): # Extract sequences from a fasta file. seqs = parse_fasta('problem_datasets/rosalind_long.txt') # Find the shortest superstring. answer = shortest_contig(seqs) # Write the answer. open('output/rosalind_long_out.txt', 'w').write(answer) # Optional: Print the length of the superstring. print('Shortest superstring is %i nucleotides long.' % len(answer))
def main(): # Read in the two sequences. s, t = parse_fasta('problem_datasets/rosalind_sims.txt') # Get the alignment. alignment = fitting_alignment(s, t) # Save the answer. with open('output/rosalind_sims_out.txt', 'w') as outfile: outfile.write('\n'.join(alignment)) # Optional: Print the alignment score. print('Optimal fitting alignment score =', alignment[0])
def main(): # Read in the two strings. s, t = parse_fasta('problem_datasets/rosalind_oap.txt') # Find the alignment. alignment = overlap_align(s, t) # Output the answer. with open('output/rosalind_oap_out.txt', 'w') as outfile: outfile.write('\n'.join(alignment)) # Optional: Print the max alignment score. print('Maximum alignment score =', alignment[0])
def main(): sequences = parse_fasta('problem_datasets/rosalind_splc.txt') rna = max(sequences, key=len) introns = [i for i in sequences if i != rna] spliced = splice_RNA(rna, introns) peptide = translate(spliced) if peptide == '': print('No exon found.') else: with open('output/rosalind_splc_out.txt', 'w') as outfile: outfile.write(peptide)
def main(filename): dat = parse_fasta(filename) profile = dna_profile(dat.values()) print profile_consensus(profile) print_profile(profile)
def main(): s = parse_fasta('problem_datasets/rosalind_kmp.txt') with open('output/rosalind_kmp_out.txt', 'w') as outfile: outfile.write(' '.join(map(str, failure_array(s))))
def main(): strings = list(parse_fasta('problem_datasets/rosalind_lcsq.txt').values()) seq = longest_sub(strings[0], strings[1]) with open('output/rosalind_lcsq_out.txt', 'w') as outfile: outfile.write(seq)
def main(): # Get the sequences from the .txt file. s, t = parse_fasta('problem_datasets/rosalind_osym.txt') # Compute the maximum alignment score, and the sum of all alignment scores. print('\n'.join(map(str, align_to_symbols(s, t))))
def main(): # Read in the two input strings. s, t = parse_fasta('problem_datasets/rosalind_ctea.txt') # Print the number of optimal alignments (modulo 2^27 - 1). print(count_alignments(s, t))
def main(): sequences = list(parse_fasta('problem_datasets/rosalind_lcsm.txt').values()) answer = longest_motif(sequences) print(answer)
def main(): s1, s2 = parse_fasta('problem_datasets/rosalind_tran.txt') print(pointMutations(s1, s2))
def main(): s, t = parse_fasta('problem_datasets/rosalind_edit.txt') print(edit_dist(s, t))
def main(): fastas = parse_fasta('problem_datasets/rosalind_gc.txt', no_id=False) max_h, max_gc = compute_gc(fastas) print(max_h, '\n', '%.6f' % max_gc, sep='')
def main(): s, t = parse_fasta('problem_datasets/rosalind_mgap.txt') print(max_global_align_gaps(s, t))
def main(): dataset = parse_fasta('problem_datasets/rosalind_grph.txt', no_id=False) with open('output/rosalind_grph_out.txt', 'w') as outfile: for line in overlap_seqs(dataset): outfile.write(line + '\n')
def main(): fastas = parse_fasta('problem_datasets/rosalind_gc.txt') max_h, max_gc = compute_gc(fastas) print(max_h, '\n', '%.6f' % max_gc, sep='')
def main(): s1, s2 = parse_fasta('problem_datasets/rosalind_tran.txt') print(pointMutations(s1, s2))
def main(filename): dat = parse_fasta(filename) profile = dna_profile(dat.values()) print profile_consensus(profile) print_profile(profile)
def main(filename): dat = parse_fasta(filename) for i in find_max_gc(dat): print i
def main(): strings = list(parse_fasta('problem_datasets/rosalind_lcsq.txt').values()) seq = longest_sub(strings[0], strings[1]) with open('output/rosalind_lcsq_out.txt', 'w') as outfile: outfile.write(seq)
def main(): s, t = parse_fasta('problem_datasets/rosalind_glob.txt') max_score = global_align(s, t, BLOSUM62(), -5) print(max_score)
def main(): dataset = parse_fasta('problem_datasets/rosalind_grph.txt', no_id=False) with open('output/rosalind_grph_out.txt', 'w') as outfile: for line in overlap_seqs(dataset): outfile.write(line + '\n')
def main(): sequences = list( parse_fasta('problem_datasets/rosalind_lcsm.txt').values()) answer = longest_motif(sequences) print(answer)
def main(): s = parse_fasta('problem_datasets/rosalind_mmch.txt') print(max_matches(s))
def main(): s, t = parse_fasta('problem_datasets/rosalind_gcon.txt') max_score = global_align(s, t, BLOSUM62(), -5) print(max_score)