Python parse_fasta 예제들, rosalind_utils.parse_fasta Python 예제들

예제 #1

0

파일 보기

def main():
    seqs = list(parse_fasta('problem_datasets/rosalind_long.txt').values())
    answer = getContig(seqs)
    print('Shortest superstring is %i nucleotides long.' % len(answer))

    with open('output/rosalind_long_out.txt', 'w') as f:
        f.write(answer)

예제 #2

0

파일 보기

파일: rosalind_LONG.py 프로젝트: ywang931030/Rosalind

def main(): 
    seqs = list(parse_fasta('problem_datasets/rosalind_long.txt').values())
    answer = getContig(seqs)
    print('Shortest superstring is %i nucleotides long.' % len(answer))
    
    with open('output/rosalind_long_out.txt', 'w') as f:
        f.write(answer)

예제 #3

0

파일 보기

파일: rosalind_PDST.py 프로젝트: sdwfrost/Rosalind

def main():
    strings = parse_fasta('problem_datasets/rosalind_pdst.txt')
    matrix = distance_matrix(strings)

    with open('output/rosalind_pdst_out.txt', 'w') as outfile:
        for line in matrix:
            outfile.write(' '.join(map(str, line))+'\n')

예제 #4

0

파일 보기

파일: rosalind_PMCH.py 프로젝트: sdwfrost/Rosalind

def main():
    rna = parse_fasta('problem_datasets/rosalind_pmch.txt')

    perfect = factorial(rna.count('A')) * factorial(rna.count('C'))
    print(perfect)

    with open('output/rosalind_pmch_out.txt', 'w') as outfile:
        outfile.write(str(perfect))

예제 #5

0

파일 보기

파일: rosalind_LOCA.py 프로젝트: Davo36/Rosalind-1

def main():
    s, t = parse_fasta('problem_datasets/rosalind_loca.txt')
    alignment = alignment_score(s, t, PAM250(), -5)

    with open('output/rosalind_loca_out.txt', 'w') as outfile:
        outfile.write('\n'.join(alignment))

    print('Maximum alignment score =', alignment[0])

예제 #6

0

파일 보기

파일: rosalind_EDTA.py 프로젝트: ywang931030/Rosalind

def main():
    s, t = parse_fasta('problem_datasets/rosalind_edta.txt', 'seq')
    aligned = edit_dist_with_align(s, t)

    with open('output/rosalind_edta_out.txt', 'w') as outfile:
        outfile.write('\n'.join(aligned))

    print('Edit distance =', aligned[0])

예제 #7

0

파일 보기

파일: rosalind_ORF.py 프로젝트: sdwfrost/Rosalind

def main():
    seq = parse_fasta('problem_datasets/rosalind_orf.txt')
            
    peptides = raw_translate(seq)
    orfs = find_orfs(peptides)

    with open('output/rosalind_orf_out.txt', 'w') as outfile:
        outfile.write('\n'.join(orfs))

예제 #8

0

파일 보기

파일: rosalind_SMGB.py 프로젝트: ywang931030/Rosalind

def main():
    s, t = parse_fasta('problem_datasets/rosalind_smgb.txt', True)
    alignment = semiglobal_align(s, t)

    with open('output/rosalind_smgb_out.txt', 'w') as outfile:
        outfile.write('\n'.join(alignment))

    print('Maximum alignment score =', alignment[0])

예제 #9

0

파일 보기

파일: rosalind_LCSQ.py 프로젝트: sdwfrost/Rosalind

def main():
    s, t = parse_fasta('problem_datasets/rosalind_lcsq.txt')
    seq = longest_sub(s, t)

    with open('output/rosalind_lcsq_out.txt', 'w') as outfile:
        outfile.write(seq)

    print('The longest common subsequence is', len(seq), 'bases long.')

예제 #10

0

파일 보기

파일: rosalind_LOCA.py 프로젝트: ywang931030/Rosalind

def main():
    s, t = parse_fasta("problem_datasets/rosalind_loca.txt", True)
    alignment = alignment_score(s, t, PAM250(), -5)

    with open("output/rosalind_loca_out.txt", "w") as outfile:
        outfile.write("\n".join(alignment))

    print("Maximum alignment score =", alignment[0])

예제 #11

0

파일 보기

def main():
    s, t = parse_fasta('problem_datasets/rosalind_smgb.txt', True)
    alignment = semiglobal_align(s, t)

    with open('output/rosalind_smgb_out.txt', 'w') as outfile:
        outfile.write('\n'.join(alignment))

    print('Maximum alignment score =', alignment[0])

예제 #12

0

파일 보기

def main():
    s, t = parse_fasta('problem_datasets/rosalind_gaff.txt', True)
    alignment = global_align_with_affine(s, t, BLOSUM62(), -11, -1)

    with open('output/rosalind_gaff_out.txt', 'w') as f:
        f.write('\n'.join(alignment))

    print('Maximum alignment score =', alignment[0])

예제 #13

0

파일 보기

파일: rosalind_PMCH.py 프로젝트: Davo36/Rosalind-1

def main():
    rna = parse_fasta('problem_datasets/rosalind_pmch.txt')

    perfect = factorial(rna.count('A')) * factorial(rna.count('C'))
    print(perfect)
    
    with open('output/rosalind_pmch_out.txt', 'w') as outfile:
        outfile.write(str(perfect))

예제 #14

0

파일 보기

파일: rosalind_EDTA.py 프로젝트: Davo36/Rosalind-1

def main():
    s, t = parse_fasta('problem_datasets/rosalind_edta.txt')
    aligned = edit_dist_with_align(s, t)

    with open('output/rosalind_edta_out.txt', 'w') as outfile:
        outfile.write('\n'.join(map(str, aligned)))

    print('Edit distance =', aligned[0])

예제 #15

0

파일 보기

파일: rosalind_LCSQ.py 프로젝트: Davo36/Rosalind-1

def main():
    s, t = parse_fasta('problem_datasets/rosalind_lcsq.txt')
    seq = longest_sub(s, t)

    with open('output/rosalind_lcsq_out.txt', 'w') as outfile:
        outfile.write(seq)

    print('The longest common subsequence is', len(seq), 'bases long.')

예제 #16

0

파일 보기

파일: rosalind_LAFF.py 프로젝트: ywang931030/Rosalind

def main():
    s, t = parse_fasta('problem_datasets/rosalind_laff.txt', True)
    alignment = local_align_with_affine(s, t, BLOSUM62(), -11, -1)
    
    with open('output/rosalind_laff_out.txt', 'w') as outfile:
        outfile.write('\n'.join(alignment))

    print('Maximum alignment score =', alignment[0])

예제 #17

0

파일 보기

파일: rosalind_SSEQ.py 프로젝트: sdwfrost/Rosalind

def main():
    ''' The input file for this problem contains two FASTA sequences, which can
        be split into seperate sequences based on the position of the header
        lines.
    '''
    s, t = parse_fasta('problem_datasets/rosalind_sseq.txt')

    pos = find_subsequence(s, t)
    print(' '.join(pos))

예제 #18

0

파일 보기

파일: rosalind_CORR.py 프로젝트: ywang931030/Rosalind

def main():
    strings = list(parse_fasta('problem_datasets/rosalind_corr.txt').values())
    strings += [rev_comp(i) for i in strings]

    corr = error_correct(strings)

    with open('output/rosalind_corr_out.txt', 'w') as outfile:
        for i in corr:
            outfile.write('->'.join(i) + '\n')

예제 #19

0

파일 보기

def main(filename):
    dat = parse_fasta(filename)
    if len(dat.values()) > 2:
        print "More than two sequences in input file, " \
              "only calculting edit distance between" \
              " two sequences"
        # need to clarify message as parse_fasta retuns a dict,
        # not just first two seqs
    print calc_edit_distance(dat.values()[0], dat.values()[1])

예제 #20

0

파일 보기

파일: rosalind_LCSM.py 프로젝트: Davo36/Rosalind-1

def main():
    sequences = parse_fasta('problem_datasets/rosalind_lcsm.txt')

    answer = longest_motif(sequences)
    
    if answer != None:
        print(answer)
    else:
        print('No common substring found.')

예제 #21

0

파일 보기

파일: cons.py 프로젝트: kanglicheng/rosalind

def main():
    sequences = parse_fasta('rosalind_cons.txt')
    profile = profile_matrix(sequences)
    consensus = consensus_seq(profile)

    with open('rosalind_cons_out.txt', 'w') as outfile:
        outfile.write(consensus + '\n')
        for line in format_profile(profile):
            outfile.write(line + '\n')

예제 #22

0

파일 보기

파일: rosalind_SSEQ.py 프로젝트: ywang931030/Rosalind

def main():
    ''' The input file for this problem contains two FASTA sequences, which can
        be split into seperate sequences based on the position of the header
        lines.
    '''
    s, t = list(parse_fasta('problem_datasets/rosalind_sseq.txt').values())[:2]

    pos = findSubSeq(s, t)
    print(' '.join(pos))

예제 #23

0

파일 보기

def main():
    sequences = list(parse_fasta('problem_datasets/rosalind_cons.txt').values())
    profile = profile_matrix(sequences)
    consensus = consensus_seq(profile)

    with open('output/rosalind_cons_out.txt', 'w') as outfile:
        outfile.write(consensus + '\n')
        for line in format_profile(profile):
            outfile.write(line + '\n')

예제 #24

0

파일 보기

파일: rosalind_editDist.py 프로젝트: nate-d-olson/CBBG_Rosalind_Bioinf

def main(filename):
    dat = parse_fasta(filename)
    if len(dat.values()) > 2:
        print "More than two sequences in input file, " \
              "only calculting edit distance between" \
              " two sequences"
        # need to clarify message as parse_fasta retuns a dict,
        # not just first two seqs
    print calc_edit_distance(dat.values()[0], dat.values()[1])

예제 #25

0

파일 보기

파일: rosalind_CONS.py 프로젝트: Davo36/Rosalind-1

def main():
    sequences = parse_fasta('problem_datasets/rosalind_cons.txt')
    profile = profile_matrix(sequences)
    consensus = consensus_seq(profile)

    with open('output/rosalind_cons_out.txt', 'w') as outfile:
        outfile.write(consensus + '\n')
        for line in format_profile(profile):
            outfile.write(line + '\n')

예제 #26

0

파일 보기

파일: rosalind_CORR.py 프로젝트: sdwfrost/Rosalind

def main():
    strings = parse_fasta('problem_datasets/rosalind_corr.txt')
    strings += [rev_comp(i) for i in strings]

    corr = error_correct(strings)

    with open('output/rosalind_corr_out.txt', 'w') as outfile:
        for i in corr:
            outfile.write('->'.join(i) + '\n')

예제 #27

0

파일 보기

파일: rosalind_MULT.py 프로젝트: Davo36/Rosalind-1

def main():
    # Get the collection of sequences.
    #seqs = ['ATATCCG', 'TCCG', 'ATGTACTG', 'ATGTCTG']
    seqs = parse_fasta('problem_datasets/rosalind_mult.txt')
    
    # Create two arrays to keep track of which sequences are already aligned.
    alignment = ['' for i in seqs]    
    remaining = [i for i in range(len(seqs))]
    
    # Start by aligning the two most similar sequences.
    scores = {}
    for i in range(len(seqs)):
        for j in range(len(seqs)-1, i, -1):
            scores[(i, j)] = alignment_score(seqs[i], seqs[j])
    
    a, b = max(scores)
    max_score, matrix = scores[(a, b)]
    alignment[a], alignment[b] = align_sequences(seqs[a], seqs[b], matrix)
    
    remaining.remove(a)
    remaining.remove(b)
    
    # Pick the sequence that aligned best to one of the already aligned 
    # sequences and align it to the set; repeat until all sequences are 
    # aligned. 
    while len(remaining) > 0:
        scores = {}
        i = remaining[0]
        
        for j in range(len(alignment)):
            if alignment[j] != '':
                scores[j] = alignment_score(seqs[i], alignment[j])
                
        best = max(scores)
        best_score, matrix = scores[best]
        
        max_score += best_score
        alignment[i], alignment[j] = align_sequences(seqs[i], alignment[best], matrix)
        
        remaining.remove(i)
    
    # Calulate the maxumum score
    max_score = 0
    for i in range(len(alignment)):
        for j in range(len(alignment)-1, i, -1):
            max_score += alignment_score(alignment[i], alignment[j])[0]
    
    
    # Output the answer.
    with open('output/rosalind_mult_out.txt', 'w') as outfile:
        outfile.write(str(max_score) + '\n')
        outfile.write('\n'.join(alignment))
    
    print('-'*37 + 'ANSWER' + '-'*37)
    with open('output/rosalind_mult_out.txt', 'r') as answer:
        print(answer.read())

예제 #28

0

파일 보기

파일: rosalind_SPLC.py 프로젝트: ywang931030/Rosalind

def main():
    sequences = list(parse_fasta('problem_datasets/rosalind_splc.txt').values())
    rna = max(sequences, key=len)
    introns = [i for i in sequences if i != rna]

    spliced = splice_RNA(rna, introns)
    peptide = translate(spliced)

    with open('output/rosalind_splc_out.txt', 'w') as outfile:
        outfile.write(peptide)

예제 #29

0

파일 보기

파일: rosalind_GAP.py 프로젝트: Davo36/Rosalind-1

def main():
    s, t = parse_fasta('problem_datasets/rosalind_gap.txt')    
    
    alignment = semiglobal_align(s, t)
    
    with open('output/rosalind_gap_out.txt', 'w') as outfile:
        outfile.write('\n'.join(alignment))

    print('-'*37 + 'ANSWER' + '-'*37)
    with open('output/rosalind_gap_out.txt', 'r') as answer:
        print(answer.read())

예제 #30

0

파일 보기

def main():
    sequences = list(
        parse_fasta('problem_datasets/rosalind_splc.txt').values())
    rna = max(sequences, key=len)
    introns = [i for i in sequences if i != rna]

    spliced = splice_RNA(rna, introns)
    peptide = translate(spliced)

    with open('output/rosalind_splc_out.txt', 'w') as outfile:
        outfile.write(peptide)

예제 #31

0

파일 보기

def main():
    # Extract sequences from a fasta file.
    seqs = parse_fasta('problem_datasets/rosalind_long.txt')
    
    # Find the shortest superstring.
    answer = shortest_contig(seqs)
    
    # Write the answer.
    open('output/rosalind_long_out.txt', 'w').write(answer)

    # Optional: Print the length of the superstring.
    print('Shortest superstring is %i nucleotides long.' % len(answer))

예제 #32

0

파일 보기

파일: rosalind_SIMS.py 프로젝트: sdwfrost/Rosalind

def main():
    # Read in the two sequences.
    s, t = parse_fasta('problem_datasets/rosalind_sims.txt')
    
    # Get the alignment.
    alignment = fitting_alignment(s, t)

    # Save the answer.
    with open('output/rosalind_sims_out.txt', 'w') as outfile:
        outfile.write('\n'.join(alignment))

    # Optional: Print the alignment score.
    print('Optimal fitting alignment score =', alignment[0])

예제 #33

0

파일 보기

파일: rosalind_OAP.py 프로젝트: sdwfrost/Rosalind

def main():
    # Read in the two strings.
    s, t = parse_fasta('problem_datasets/rosalind_oap.txt')

    # Find the alignment.
    alignment = overlap_align(s, t)

    # Output the answer.
    with open('output/rosalind_oap_out.txt', 'w') as outfile:
        outfile.write('\n'.join(alignment))

    # Optional: Print the max alignment score.
    print('Maximum alignment score =', alignment[0])

예제 #34

0

파일 보기

파일: rosalind_SPLC.py 프로젝트: sdwfrost/Rosalind

def main():
    sequences = parse_fasta('problem_datasets/rosalind_splc.txt')
    rna = max(sequences, key=len)
    introns = [i for i in sequences if i != rna]

    spliced = splice_RNA(rna, introns)
    peptide = translate(spliced)

    if peptide == '':
        print('No exon found.')
    else:
        with open('output/rosalind_splc_out.txt', 'w') as outfile:
            outfile.write(peptide)

예제 #35

0

파일 보기

파일: rosalind_profileConsensus.py 프로젝트: nate-d-olson/CBBG_Rosalind_Bioinf

def main(filename):
    dat = parse_fasta(filename)
    profile = dna_profile(dat.values())
    print profile_consensus(profile)
    print_profile(profile)

예제 #36

0

파일 보기

파일: rosalind_KMP.py 프로젝트: Davo36/Rosalind-1

def main():
    s = parse_fasta('problem_datasets/rosalind_kmp.txt')
    
    with open('output/rosalind_kmp_out.txt', 'w') as outfile:
        outfile.write(' '.join(map(str, failure_array(s))))

예제 #37

0

파일 보기

파일: rosalind_LCSQ.py 프로젝트: ywang931030/Rosalind

def main():
    strings = list(parse_fasta('problem_datasets/rosalind_lcsq.txt').values())
    seq = longest_sub(strings[0], strings[1])

    with open('output/rosalind_lcsq_out.txt', 'w') as outfile:
        outfile.write(seq)

예제 #38

0

파일 보기

파일: rosalind_OSYM.py 프로젝트: sdwfrost/Rosalind

def main():
    # Get the sequences from the .txt file.
    s, t = parse_fasta('problem_datasets/rosalind_osym.txt')
    
    # Compute the maximum alignment score, and the sum of all alignment scores.
    print('\n'.join(map(str, align_to_symbols(s, t))))

예제 #39

0

파일 보기

def main():
    # Read in the two input strings.
    s, t = parse_fasta('problem_datasets/rosalind_ctea.txt')

    # Print the number of optimal alignments (modulo 2^27 - 1).
    print(count_alignments(s, t))

예제 #40

0

파일 보기

파일: rosalind_LCSM.py 프로젝트: ywang931030/Rosalind

def main():
    sequences = list(parse_fasta('problem_datasets/rosalind_lcsm.txt').values())

    answer = longest_motif(sequences)
    print(answer)

예제 #41

0

파일 보기

파일: rosalind_TRAN.py 프로젝트: Davo36/Rosalind-1

def main():
    s1, s2 = parse_fasta('problem_datasets/rosalind_tran.txt')
    
    print(pointMutations(s1, s2))

예제 #42

0

파일 보기

파일: rosalind_EDIT.py 프로젝트: sdwfrost/Rosalind

def main():
    s, t = parse_fasta('problem_datasets/rosalind_edit.txt')

    print(edit_dist(s, t))

예제 #43

0

파일 보기

파일: rosalind_GC.py 프로젝트: Davo36/Rosalind-1

def main():
    fastas = parse_fasta('problem_datasets/rosalind_gc.txt', no_id=False)
    max_h, max_gc = compute_gc(fastas)
    
    print(max_h, '\n', '%.6f' % max_gc, sep='')

예제 #44

0

파일 보기

def main():
    s, t = parse_fasta('problem_datasets/rosalind_mgap.txt')

    print(max_global_align_gaps(s, t))

예제 #45

0

파일 보기

파일: rosalind_GRPH.py 프로젝트: sdwfrost/Rosalind

def main():
    dataset = parse_fasta('problem_datasets/rosalind_grph.txt', no_id=False)

    with open('output/rosalind_grph_out.txt', 'w') as outfile:
        for line in overlap_seqs(dataset):
            outfile.write(line + '\n')

예제 #46

0

파일 보기

def main():
    fastas = parse_fasta('problem_datasets/rosalind_gc.txt')
    max_h, max_gc = compute_gc(fastas)
    print(max_h, '\n', '%.6f' % max_gc, sep='')

예제 #47

0

파일 보기

파일: rosalind_TRAN.py 프로젝트: sdwfrost/Rosalind

def main():
    s1, s2 = parse_fasta('problem_datasets/rosalind_tran.txt')

    print(pointMutations(s1, s2))

예제 #48

0

파일 보기

파일: rosalind_profileConsensus.py 프로젝트: nate-d-olson/CBBG_Rosalind_Bioinf

def main(filename):
    dat = parse_fasta(filename)
    profile = dna_profile(dat.values())
    print profile_consensus(profile)
    print_profile(profile)

예제 #49

0

파일 보기

def main(filename):
    dat = parse_fasta(filename)
    for i in find_max_gc(dat):
        print i

예제 #50

0

파일 보기

def main():
    strings = list(parse_fasta('problem_datasets/rosalind_lcsq.txt').values())
    seq = longest_sub(strings[0], strings[1])

    with open('output/rosalind_lcsq_out.txt', 'w') as outfile:
        outfile.write(seq)

예제 #51

0

파일 보기

파일: rosalind_GLOB.py 프로젝트: Davo36/Rosalind-1

def main():
    s, t = parse_fasta('problem_datasets/rosalind_glob.txt')
    max_score = global_align(s, t, BLOSUM62(), -5)
    
    print(max_score)

예제 #52

0

파일 보기

파일: rosalind_GRPH.py 프로젝트: Davo36/Rosalind-1

def main():
    dataset = parse_fasta('problem_datasets/rosalind_grph.txt', no_id=False)
    
    with open('output/rosalind_grph_out.txt', 'w') as outfile:
        for line in overlap_seqs(dataset):
            outfile.write(line + '\n')

예제 #53

0

파일 보기

def main():
    sequences = list(
        parse_fasta('problem_datasets/rosalind_lcsm.txt').values())

    answer = longest_motif(sequences)
    print(answer)

예제 #54

0

파일 보기

파일: rosalind_MMCH.py 프로젝트: Davo36/Rosalind-1

def main():
    s = parse_fasta('problem_datasets/rosalind_mmch.txt')

    print(max_matches(s))

예제 #55

0

파일 보기

파일: rosalind_GCON.py 프로젝트: sdwfrost/Rosalind

def main():
    s, t = parse_fasta('problem_datasets/rosalind_gcon.txt')
    max_score = global_align(s, t, BLOSUM62(), -5)

    print(max_score)