예제 #1
0
def main():
    seqs = list(parse_fasta('problem_datasets/rosalind_long.txt').values())
    answer = getContig(seqs)
    print('Shortest superstring is %i nucleotides long.' % len(answer))

    with open('output/rosalind_long_out.txt', 'w') as f:
        f.write(answer)
예제 #2
0
def main(): 
    seqs = list(parse_fasta('problem_datasets/rosalind_long.txt').values())
    answer = getContig(seqs)
    print('Shortest superstring is %i nucleotides long.' % len(answer))
    
    with open('output/rosalind_long_out.txt', 'w') as f:
        f.write(answer)
예제 #3
0
def main():
    strings = parse_fasta('problem_datasets/rosalind_pdst.txt')
    matrix = distance_matrix(strings)

    with open('output/rosalind_pdst_out.txt', 'w') as outfile:
        for line in matrix:
            outfile.write(' '.join(map(str, line))+'\n')
예제 #4
0
def main():
    rna = parse_fasta('problem_datasets/rosalind_pmch.txt')

    perfect = factorial(rna.count('A')) * factorial(rna.count('C'))
    print(perfect)

    with open('output/rosalind_pmch_out.txt', 'w') as outfile:
        outfile.write(str(perfect))
예제 #5
0
def main():
    s, t = parse_fasta('problem_datasets/rosalind_loca.txt')
    alignment = alignment_score(s, t, PAM250(), -5)

    with open('output/rosalind_loca_out.txt', 'w') as outfile:
        outfile.write('\n'.join(alignment))

    print('Maximum alignment score =', alignment[0])
예제 #6
0
def main():
    s, t = parse_fasta('problem_datasets/rosalind_edta.txt', 'seq')
    aligned = edit_dist_with_align(s, t)

    with open('output/rosalind_edta_out.txt', 'w') as outfile:
        outfile.write('\n'.join(aligned))

    print('Edit distance =', aligned[0])
예제 #7
0
def main():
    seq = parse_fasta('problem_datasets/rosalind_orf.txt')
            
    peptides = raw_translate(seq)
    orfs = find_orfs(peptides)

    with open('output/rosalind_orf_out.txt', 'w') as outfile:
        outfile.write('\n'.join(orfs))
예제 #8
0
def main():
    s, t = parse_fasta('problem_datasets/rosalind_smgb.txt', True)
    alignment = semiglobal_align(s, t)

    with open('output/rosalind_smgb_out.txt', 'w') as outfile:
        outfile.write('\n'.join(alignment))

    print('Maximum alignment score =', alignment[0])
예제 #9
0
def main():
    s, t = parse_fasta('problem_datasets/rosalind_lcsq.txt')
    seq = longest_sub(s, t)

    with open('output/rosalind_lcsq_out.txt', 'w') as outfile:
        outfile.write(seq)

    print('The longest common subsequence is', len(seq), 'bases long.')
예제 #10
0
def main():
    s, t = parse_fasta("problem_datasets/rosalind_loca.txt", True)
    alignment = alignment_score(s, t, PAM250(), -5)

    with open("output/rosalind_loca_out.txt", "w") as outfile:
        outfile.write("\n".join(alignment))

    print("Maximum alignment score =", alignment[0])
예제 #11
0
def main():
    s, t = parse_fasta('problem_datasets/rosalind_smgb.txt', True)
    alignment = semiglobal_align(s, t)

    with open('output/rosalind_smgb_out.txt', 'w') as outfile:
        outfile.write('\n'.join(alignment))

    print('Maximum alignment score =', alignment[0])
예제 #12
0
def main():
    s, t = parse_fasta('problem_datasets/rosalind_gaff.txt', True)
    alignment = global_align_with_affine(s, t, BLOSUM62(), -11, -1)

    with open('output/rosalind_gaff_out.txt', 'w') as f:
        f.write('\n'.join(alignment))

    print('Maximum alignment score =', alignment[0])
예제 #13
0
def main():
    rna = parse_fasta('problem_datasets/rosalind_pmch.txt')

    perfect = factorial(rna.count('A')) * factorial(rna.count('C'))
    print(perfect)
    
    with open('output/rosalind_pmch_out.txt', 'w') as outfile:
        outfile.write(str(perfect))
예제 #14
0
def main():
    s, t = parse_fasta('problem_datasets/rosalind_edta.txt')
    aligned = edit_dist_with_align(s, t)

    with open('output/rosalind_edta_out.txt', 'w') as outfile:
        outfile.write('\n'.join(map(str, aligned)))

    print('Edit distance =', aligned[0])
예제 #15
0
def main():
    s, t = parse_fasta('problem_datasets/rosalind_lcsq.txt')
    seq = longest_sub(s, t)

    with open('output/rosalind_lcsq_out.txt', 'w') as outfile:
        outfile.write(seq)

    print('The longest common subsequence is', len(seq), 'bases long.')
예제 #16
0
def main():
    s, t = parse_fasta('problem_datasets/rosalind_laff.txt', True)
    alignment = local_align_with_affine(s, t, BLOSUM62(), -11, -1)
    
    with open('output/rosalind_laff_out.txt', 'w') as outfile:
        outfile.write('\n'.join(alignment))

    print('Maximum alignment score =', alignment[0])
예제 #17
0
def main():
    ''' The input file for this problem contains two FASTA sequences, which can
        be split into seperate sequences based on the position of the header
        lines.
    '''
    s, t = parse_fasta('problem_datasets/rosalind_sseq.txt')

    pos = find_subsequence(s, t)
    print(' '.join(pos))
예제 #18
0
def main():
    strings = list(parse_fasta('problem_datasets/rosalind_corr.txt').values())
    strings += [rev_comp(i) for i in strings]

    corr = error_correct(strings)

    with open('output/rosalind_corr_out.txt', 'w') as outfile:
        for i in corr:
            outfile.write('->'.join(i) + '\n')
예제 #19
0
def main(filename):
    dat = parse_fasta(filename)
    if len(dat.values()) > 2:
        print "More than two sequences in input file, " \
              "only calculting edit distance between" \
              " two sequences"
        # need to clarify message as parse_fasta retuns a dict,
        # not just first two seqs
    print calc_edit_distance(dat.values()[0], dat.values()[1])
예제 #20
0
def main():
    sequences = parse_fasta('problem_datasets/rosalind_lcsm.txt')

    answer = longest_motif(sequences)
    
    if answer != None:
        print(answer)
    else:
        print('No common substring found.')
예제 #21
0
def main():
    sequences = parse_fasta('rosalind_cons.txt')
    profile = profile_matrix(sequences)
    consensus = consensus_seq(profile)

    with open('rosalind_cons_out.txt', 'w') as outfile:
        outfile.write(consensus + '\n')
        for line in format_profile(profile):
            outfile.write(line + '\n')
예제 #22
0
def main():
    ''' The input file for this problem contains two FASTA sequences, which can
        be split into seperate sequences based on the position of the header
        lines.
    '''
    s, t = list(parse_fasta('problem_datasets/rosalind_sseq.txt').values())[:2]

    pos = findSubSeq(s, t)
    print(' '.join(pos))
예제 #23
0
def main():
    sequences = list(parse_fasta('problem_datasets/rosalind_cons.txt').values())
    profile = profile_matrix(sequences)
    consensus = consensus_seq(profile)

    with open('output/rosalind_cons_out.txt', 'w') as outfile:
        outfile.write(consensus + '\n')
        for line in format_profile(profile):
            outfile.write(line + '\n')
def main(filename):
    dat = parse_fasta(filename)
    if len(dat.values()) > 2:
        print "More than two sequences in input file, " \
              "only calculting edit distance between" \
              " two sequences"
        # need to clarify message as parse_fasta retuns a dict,
        # not just first two seqs
    print calc_edit_distance(dat.values()[0], dat.values()[1])
예제 #25
0
def main():
    sequences = parse_fasta('problem_datasets/rosalind_cons.txt')
    profile = profile_matrix(sequences)
    consensus = consensus_seq(profile)

    with open('output/rosalind_cons_out.txt', 'w') as outfile:
        outfile.write(consensus + '\n')
        for line in format_profile(profile):
            outfile.write(line + '\n')
예제 #26
0
def main():
    strings = parse_fasta('problem_datasets/rosalind_corr.txt')
    strings += [rev_comp(i) for i in strings]

    corr = error_correct(strings)

    with open('output/rosalind_corr_out.txt', 'w') as outfile:
        for i in corr:
            outfile.write('->'.join(i) + '\n')
예제 #27
0
def main():
    # Get the collection of sequences.
    #seqs = ['ATATCCG', 'TCCG', 'ATGTACTG', 'ATGTCTG']
    seqs = parse_fasta('problem_datasets/rosalind_mult.txt')
    
    # Create two arrays to keep track of which sequences are already aligned.
    alignment = ['' for i in seqs]    
    remaining = [i for i in range(len(seqs))]
    
    # Start by aligning the two most similar sequences.
    scores = {}
    for i in range(len(seqs)):
        for j in range(len(seqs)-1, i, -1):
            scores[(i, j)] = alignment_score(seqs[i], seqs[j])
    
    a, b = max(scores)
    max_score, matrix = scores[(a, b)]
    alignment[a], alignment[b] = align_sequences(seqs[a], seqs[b], matrix)
    
    remaining.remove(a)
    remaining.remove(b)
    
    # Pick the sequence that aligned best to one of the already aligned 
    # sequences and align it to the set; repeat until all sequences are 
    # aligned. 
    while len(remaining) > 0:
        scores = {}
        i = remaining[0]
        
        for j in range(len(alignment)):
            if alignment[j] != '':
                scores[j] = alignment_score(seqs[i], alignment[j])
                
        best = max(scores)
        best_score, matrix = scores[best]
        
        max_score += best_score
        alignment[i], alignment[j] = align_sequences(seqs[i], alignment[best], matrix)
        
        remaining.remove(i)
    
    # Calulate the maxumum score
    max_score = 0
    for i in range(len(alignment)):
        for j in range(len(alignment)-1, i, -1):
            max_score += alignment_score(alignment[i], alignment[j])[0]
    
    
    # Output the answer.
    with open('output/rosalind_mult_out.txt', 'w') as outfile:
        outfile.write(str(max_score) + '\n')
        outfile.write('\n'.join(alignment))
    
    print('-'*37 + 'ANSWER' + '-'*37)
    with open('output/rosalind_mult_out.txt', 'r') as answer:
        print(answer.read())
예제 #28
0
def main():
    sequences = list(parse_fasta('problem_datasets/rosalind_splc.txt').values())
    rna = max(sequences, key=len)
    introns = [i for i in sequences if i != rna]

    spliced = splice_RNA(rna, introns)
    peptide = translate(spliced)

    with open('output/rosalind_splc_out.txt', 'w') as outfile:
        outfile.write(peptide)
예제 #29
0
def main():
    s, t = parse_fasta('problem_datasets/rosalind_gap.txt')    
    
    alignment = semiglobal_align(s, t)
    
    with open('output/rosalind_gap_out.txt', 'w') as outfile:
        outfile.write('\n'.join(alignment))

    print('-'*37 + 'ANSWER' + '-'*37)
    with open('output/rosalind_gap_out.txt', 'r') as answer:
        print(answer.read())
예제 #30
0
def main():
    sequences = list(
        parse_fasta('problem_datasets/rosalind_splc.txt').values())
    rna = max(sequences, key=len)
    introns = [i for i in sequences if i != rna]

    spliced = splice_RNA(rna, introns)
    peptide = translate(spliced)

    with open('output/rosalind_splc_out.txt', 'w') as outfile:
        outfile.write(peptide)
예제 #31
0
def main():
    # Extract sequences from a fasta file.
    seqs = parse_fasta('problem_datasets/rosalind_long.txt')
    
    # Find the shortest superstring.
    answer = shortest_contig(seqs)
    
    # Write the answer.
    open('output/rosalind_long_out.txt', 'w').write(answer)

    # Optional: Print the length of the superstring.
    print('Shortest superstring is %i nucleotides long.' % len(answer))
예제 #32
0
def main():
    # Read in the two sequences.
    s, t = parse_fasta('problem_datasets/rosalind_sims.txt')
    
    # Get the alignment.
    alignment = fitting_alignment(s, t)

    # Save the answer.
    with open('output/rosalind_sims_out.txt', 'w') as outfile:
        outfile.write('\n'.join(alignment))

    # Optional: Print the alignment score.
    print('Optimal fitting alignment score =', alignment[0])
예제 #33
0
def main():
    # Read in the two strings.
    s, t = parse_fasta('problem_datasets/rosalind_oap.txt')

    # Find the alignment.
    alignment = overlap_align(s, t)

    # Output the answer.
    with open('output/rosalind_oap_out.txt', 'w') as outfile:
        outfile.write('\n'.join(alignment))

    # Optional: Print the max alignment score.
    print('Maximum alignment score =', alignment[0])
예제 #34
0
def main():
    sequences = parse_fasta('problem_datasets/rosalind_splc.txt')
    rna = max(sequences, key=len)
    introns = [i for i in sequences if i != rna]

    spliced = splice_RNA(rna, introns)
    peptide = translate(spliced)

    if peptide == '':
        print('No exon found.')
    else:
        with open('output/rosalind_splc_out.txt', 'w') as outfile:
            outfile.write(peptide)
def main(filename):
    dat = parse_fasta(filename)
    profile = dna_profile(dat.values())
    print profile_consensus(profile)
    print_profile(profile)
예제 #36
0
def main():
    s = parse_fasta('problem_datasets/rosalind_kmp.txt')
    
    with open('output/rosalind_kmp_out.txt', 'w') as outfile:
        outfile.write(' '.join(map(str, failure_array(s))))
예제 #37
0
def main():
    strings = list(parse_fasta('problem_datasets/rosalind_lcsq.txt').values())
    seq = longest_sub(strings[0], strings[1])

    with open('output/rosalind_lcsq_out.txt', 'w') as outfile:
        outfile.write(seq)
예제 #38
0
def main():
    # Get the sequences from the .txt file.
    s, t = parse_fasta('problem_datasets/rosalind_osym.txt')
    
    # Compute the maximum alignment score, and the sum of all alignment scores.
    print('\n'.join(map(str, align_to_symbols(s, t))))
예제 #39
0
def main():
    # Read in the two input strings.
    s, t = parse_fasta('problem_datasets/rosalind_ctea.txt')

    # Print the number of optimal alignments (modulo 2^27 - 1).
    print(count_alignments(s, t))
예제 #40
0
def main():
    sequences = list(parse_fasta('problem_datasets/rosalind_lcsm.txt').values())

    answer = longest_motif(sequences)
    print(answer)
예제 #41
0
def main():
    s1, s2 = parse_fasta('problem_datasets/rosalind_tran.txt')
    
    print(pointMutations(s1, s2))
예제 #42
0
def main():
    s, t = parse_fasta('problem_datasets/rosalind_edit.txt')

    print(edit_dist(s, t))
예제 #43
0
def main():
    fastas = parse_fasta('problem_datasets/rosalind_gc.txt', no_id=False)
    max_h, max_gc = compute_gc(fastas)
    
    print(max_h, '\n', '%.6f' % max_gc, sep='')
예제 #44
0
def main():
    s, t = parse_fasta('problem_datasets/rosalind_mgap.txt')

    print(max_global_align_gaps(s, t))
예제 #45
0
def main():
    dataset = parse_fasta('problem_datasets/rosalind_grph.txt', no_id=False)

    with open('output/rosalind_grph_out.txt', 'w') as outfile:
        for line in overlap_seqs(dataset):
            outfile.write(line + '\n')
예제 #46
0
def main():
    fastas = parse_fasta('problem_datasets/rosalind_gc.txt')
    max_h, max_gc = compute_gc(fastas)
    print(max_h, '\n', '%.6f' % max_gc, sep='')
예제 #47
0
def main():
    s1, s2 = parse_fasta('problem_datasets/rosalind_tran.txt')

    print(pointMutations(s1, s2))
def main(filename):
    dat = parse_fasta(filename)
    profile = dna_profile(dat.values())
    print profile_consensus(profile)
    print_profile(profile)
예제 #49
0
def main(filename):
    dat = parse_fasta(filename)
    for i in find_max_gc(dat):
        print i
예제 #50
0
def main():
    strings = list(parse_fasta('problem_datasets/rosalind_lcsq.txt').values())
    seq = longest_sub(strings[0], strings[1])

    with open('output/rosalind_lcsq_out.txt', 'w') as outfile:
        outfile.write(seq)
예제 #51
0
def main():
    s, t = parse_fasta('problem_datasets/rosalind_glob.txt')
    max_score = global_align(s, t, BLOSUM62(), -5)
    
    print(max_score)
예제 #52
0
def main():
    dataset = parse_fasta('problem_datasets/rosalind_grph.txt', no_id=False)
    
    with open('output/rosalind_grph_out.txt', 'w') as outfile:
        for line in overlap_seqs(dataset):
            outfile.write(line + '\n')
예제 #53
0
def main():
    sequences = list(
        parse_fasta('problem_datasets/rosalind_lcsm.txt').values())

    answer = longest_motif(sequences)
    print(answer)
예제 #54
0
def main():
    s = parse_fasta('problem_datasets/rosalind_mmch.txt')

    print(max_matches(s))
예제 #55
0
def main():
    s, t = parse_fasta('problem_datasets/rosalind_gcon.txt')
    max_score = global_align(s, t, BLOSUM62(), -5)

    print(max_score)