Esempi in Python per parse_fasta, esempi in Python per rosalind_utils.parse_fasta

Esempio n. 1

0

Mostra file

def main():
    seqs = list(parse_fasta('problem_datasets/rosalind_long.txt').values())
    answer = getContig(seqs)
    print('Shortest superstring is %i nucleotides long.' % len(answer))

    with open('output/rosalind_long_out.txt', 'w') as f:
        f.write(answer)

Esempio n. 2

0

Mostra file

File: rosalind_LONG.py Progetto: ywang931030/Rosalind

def main(): 
    seqs = list(parse_fasta('problem_datasets/rosalind_long.txt').values())
    answer = getContig(seqs)
    print('Shortest superstring is %i nucleotides long.' % len(answer))
    
    with open('output/rosalind_long_out.txt', 'w') as f:
        f.write(answer)

Esempio n. 3

0

Mostra file

File: rosalind_PDST.py Progetto: sdwfrost/Rosalind

def main():
    strings = parse_fasta('problem_datasets/rosalind_pdst.txt')
    matrix = distance_matrix(strings)

    with open('output/rosalind_pdst_out.txt', 'w') as outfile:
        for line in matrix:
            outfile.write(' '.join(map(str, line))+'\n')

Esempio n. 4

0

Mostra file

File: rosalind_PMCH.py Progetto: sdwfrost/Rosalind

def main():
    rna = parse_fasta('problem_datasets/rosalind_pmch.txt')

    perfect = factorial(rna.count('A')) * factorial(rna.count('C'))
    print(perfect)

    with open('output/rosalind_pmch_out.txt', 'w') as outfile:
        outfile.write(str(perfect))

Esempio n. 5

0

Mostra file

File: rosalind_LOCA.py Progetto: Davo36/Rosalind-1

def main():
    s, t = parse_fasta('problem_datasets/rosalind_loca.txt')
    alignment = alignment_score(s, t, PAM250(), -5)

    with open('output/rosalind_loca_out.txt', 'w') as outfile:
        outfile.write('\n'.join(alignment))

    print('Maximum alignment score =', alignment[0])

Esempio n. 6

0

Mostra file

File: rosalind_EDTA.py Progetto: ywang931030/Rosalind

def main():
    s, t = parse_fasta('problem_datasets/rosalind_edta.txt', 'seq')
    aligned = edit_dist_with_align(s, t)

    with open('output/rosalind_edta_out.txt', 'w') as outfile:
        outfile.write('\n'.join(aligned))

    print('Edit distance =', aligned[0])

Esempio n. 7

0

Mostra file

File: rosalind_ORF.py Progetto: sdwfrost/Rosalind

def main():
    seq = parse_fasta('problem_datasets/rosalind_orf.txt')
            
    peptides = raw_translate(seq)
    orfs = find_orfs(peptides)

    with open('output/rosalind_orf_out.txt', 'w') as outfile:
        outfile.write('\n'.join(orfs))

Esempio n. 8

0

Mostra file

File: rosalind_SMGB.py Progetto: ywang931030/Rosalind

def main():
    s, t = parse_fasta('problem_datasets/rosalind_smgb.txt', True)
    alignment = semiglobal_align(s, t)

    with open('output/rosalind_smgb_out.txt', 'w') as outfile:
        outfile.write('\n'.join(alignment))

    print('Maximum alignment score =', alignment[0])

Esempio n. 9

0

Mostra file

File: rosalind_LCSQ.py Progetto: sdwfrost/Rosalind

def main():
    s, t = parse_fasta('problem_datasets/rosalind_lcsq.txt')
    seq = longest_sub(s, t)

    with open('output/rosalind_lcsq_out.txt', 'w') as outfile:
        outfile.write(seq)

    print('The longest common subsequence is', len(seq), 'bases long.')

Esempio n. 10

0

Mostra file

File: rosalind_LOCA.py Progetto: ywang931030/Rosalind

def main():
    s, t = parse_fasta("problem_datasets/rosalind_loca.txt", True)
    alignment = alignment_score(s, t, PAM250(), -5)

    with open("output/rosalind_loca_out.txt", "w") as outfile:
        outfile.write("\n".join(alignment))

    print("Maximum alignment score =", alignment[0])

Esempio n. 11

0

Mostra file

def main():
    s, t = parse_fasta('problem_datasets/rosalind_smgb.txt', True)
    alignment = semiglobal_align(s, t)

    with open('output/rosalind_smgb_out.txt', 'w') as outfile:
        outfile.write('\n'.join(alignment))

    print('Maximum alignment score =', alignment[0])

Esempio n. 12

0

Mostra file

def main():
    s, t = parse_fasta('problem_datasets/rosalind_gaff.txt', True)
    alignment = global_align_with_affine(s, t, BLOSUM62(), -11, -1)

    with open('output/rosalind_gaff_out.txt', 'w') as f:
        f.write('\n'.join(alignment))

    print('Maximum alignment score =', alignment[0])

Esempio n. 13

0

Mostra file

File: rosalind_PMCH.py Progetto: Davo36/Rosalind-1

def main():
    rna = parse_fasta('problem_datasets/rosalind_pmch.txt')

    perfect = factorial(rna.count('A')) * factorial(rna.count('C'))
    print(perfect)
    
    with open('output/rosalind_pmch_out.txt', 'w') as outfile:
        outfile.write(str(perfect))

Esempio n. 14

0

Mostra file

File: rosalind_EDTA.py Progetto: Davo36/Rosalind-1

def main():
    s, t = parse_fasta('problem_datasets/rosalind_edta.txt')
    aligned = edit_dist_with_align(s, t)

    with open('output/rosalind_edta_out.txt', 'w') as outfile:
        outfile.write('\n'.join(map(str, aligned)))

    print('Edit distance =', aligned[0])

Esempio n. 15

0

Mostra file

File: rosalind_LCSQ.py Progetto: Davo36/Rosalind-1

def main():
    s, t = parse_fasta('problem_datasets/rosalind_lcsq.txt')
    seq = longest_sub(s, t)

    with open('output/rosalind_lcsq_out.txt', 'w') as outfile:
        outfile.write(seq)

    print('The longest common subsequence is', len(seq), 'bases long.')

Esempio n. 16

0

Mostra file

File: rosalind_LAFF.py Progetto: ywang931030/Rosalind

def main():
    s, t = parse_fasta('problem_datasets/rosalind_laff.txt', True)
    alignment = local_align_with_affine(s, t, BLOSUM62(), -11, -1)
    
    with open('output/rosalind_laff_out.txt', 'w') as outfile:
        outfile.write('\n'.join(alignment))

    print('Maximum alignment score =', alignment[0])

Esempio n. 17

0

Mostra file

File: rosalind_SSEQ.py Progetto: sdwfrost/Rosalind

def main():
    ''' The input file for this problem contains two FASTA sequences, which can
        be split into seperate sequences based on the position of the header
        lines.
    '''
    s, t = parse_fasta('problem_datasets/rosalind_sseq.txt')

    pos = find_subsequence(s, t)
    print(' '.join(pos))

Esempio n. 18

0

Mostra file

File: rosalind_CORR.py Progetto: ywang931030/Rosalind

def main():
    strings = list(parse_fasta('problem_datasets/rosalind_corr.txt').values())
    strings += [rev_comp(i) for i in strings]

    corr = error_correct(strings)

    with open('output/rosalind_corr_out.txt', 'w') as outfile:
        for i in corr:
            outfile.write('->'.join(i) + '\n')

Esempio n. 19

0

Mostra file

def main(filename):
    dat = parse_fasta(filename)
    if len(dat.values()) > 2:
        print "More than two sequences in input file, " \
              "only calculting edit distance between" \
              " two sequences"
        # need to clarify message as parse_fasta retuns a dict,
        # not just first two seqs
    print calc_edit_distance(dat.values()[0], dat.values()[1])

Esempio n. 20

0

Mostra file

File: rosalind_LCSM.py Progetto: Davo36/Rosalind-1

def main():
    sequences = parse_fasta('problem_datasets/rosalind_lcsm.txt')

    answer = longest_motif(sequences)
    
    if answer != None:
        print(answer)
    else:
        print('No common substring found.')

Esempio n. 21

0

Mostra file

File: cons.py Progetto: kanglicheng/rosalind

def main():
    sequences = parse_fasta('rosalind_cons.txt')
    profile = profile_matrix(sequences)
    consensus = consensus_seq(profile)

    with open('rosalind_cons_out.txt', 'w') as outfile:
        outfile.write(consensus + '\n')
        for line in format_profile(profile):
            outfile.write(line + '\n')

Esempio n. 22

0

Mostra file

File: rosalind_SSEQ.py Progetto: ywang931030/Rosalind

def main():
    ''' The input file for this problem contains two FASTA sequences, which can
        be split into seperate sequences based on the position of the header
        lines.
    '''
    s, t = list(parse_fasta('problem_datasets/rosalind_sseq.txt').values())[:2]

    pos = findSubSeq(s, t)
    print(' '.join(pos))

Esempio n. 23

0

Mostra file

def main():
    sequences = list(parse_fasta('problem_datasets/rosalind_cons.txt').values())
    profile = profile_matrix(sequences)
    consensus = consensus_seq(profile)

    with open('output/rosalind_cons_out.txt', 'w') as outfile:
        outfile.write(consensus + '\n')
        for line in format_profile(profile):
            outfile.write(line + '\n')

Esempio n. 24

0

Mostra file

File: rosalind_editDist.py Progetto: nate-d-olson/CBBG_Rosalind_Bioinf

def main(filename):
    dat = parse_fasta(filename)
    if len(dat.values()) > 2:
        print "More than two sequences in input file, " \
              "only calculting edit distance between" \
              " two sequences"
        # need to clarify message as parse_fasta retuns a dict,
        # not just first two seqs
    print calc_edit_distance(dat.values()[0], dat.values()[1])

Esempio n. 25

0

Mostra file

File: rosalind_CONS.py Progetto: Davo36/Rosalind-1

def main():
    sequences = parse_fasta('problem_datasets/rosalind_cons.txt')
    profile = profile_matrix(sequences)
    consensus = consensus_seq(profile)

    with open('output/rosalind_cons_out.txt', 'w') as outfile:
        outfile.write(consensus + '\n')
        for line in format_profile(profile):
            outfile.write(line + '\n')

Esempio n. 26

0

Mostra file

File: rosalind_CORR.py Progetto: sdwfrost/Rosalind

def main():
    strings = parse_fasta('problem_datasets/rosalind_corr.txt')
    strings += [rev_comp(i) for i in strings]

    corr = error_correct(strings)

    with open('output/rosalind_corr_out.txt', 'w') as outfile:
        for i in corr:
            outfile.write('->'.join(i) + '\n')

Esempio n. 27

0

Mostra file

File: rosalind_MULT.py Progetto: Davo36/Rosalind-1

def main():
    # Get the collection of sequences.
    #seqs = ['ATATCCG', 'TCCG', 'ATGTACTG', 'ATGTCTG']
    seqs = parse_fasta('problem_datasets/rosalind_mult.txt')
    
    # Create two arrays to keep track of which sequences are already aligned.
    alignment = ['' for i in seqs]    
    remaining = [i for i in range(len(seqs))]
    
    # Start by aligning the two most similar sequences.
    scores = {}
    for i in range(len(seqs)):
        for j in range(len(seqs)-1, i, -1):
            scores[(i, j)] = alignment_score(seqs[i], seqs[j])
    
    a, b = max(scores)
    max_score, matrix = scores[(a, b)]
    alignment[a], alignment[b] = align_sequences(seqs[a], seqs[b], matrix)
    
    remaining.remove(a)
    remaining.remove(b)
    
    # Pick the sequence that aligned best to one of the already aligned 
    # sequences and align it to the set; repeat until all sequences are 
    # aligned. 
    while len(remaining) > 0:
        scores = {}
        i = remaining[0]
        
        for j in range(len(alignment)):
            if alignment[j] != '':
                scores[j] = alignment_score(seqs[i], alignment[j])
                
        best = max(scores)
        best_score, matrix = scores[best]
        
        max_score += best_score
        alignment[i], alignment[j] = align_sequences(seqs[i], alignment[best], matrix)
        
        remaining.remove(i)
    
    # Calulate the maxumum score
    max_score = 0
    for i in range(len(alignment)):
        for j in range(len(alignment)-1, i, -1):
            max_score += alignment_score(alignment[i], alignment[j])[0]
    
    
    # Output the answer.
    with open('output/rosalind_mult_out.txt', 'w') as outfile:
        outfile.write(str(max_score) + '\n')
        outfile.write('\n'.join(alignment))
    
    print('-'*37 + 'ANSWER' + '-'*37)
    with open('output/rosalind_mult_out.txt', 'r') as answer:
        print(answer.read())

Esempio n. 28

0

Mostra file

File: rosalind_SPLC.py Progetto: ywang931030/Rosalind

def main():
    sequences = list(parse_fasta('problem_datasets/rosalind_splc.txt').values())
    rna = max(sequences, key=len)
    introns = [i for i in sequences if i != rna]

    spliced = splice_RNA(rna, introns)
    peptide = translate(spliced)

    with open('output/rosalind_splc_out.txt', 'w') as outfile:
        outfile.write(peptide)

Esempio n. 29

0

Mostra file

File: rosalind_GAP.py Progetto: Davo36/Rosalind-1

def main():
    s, t = parse_fasta('problem_datasets/rosalind_gap.txt')    
    
    alignment = semiglobal_align(s, t)
    
    with open('output/rosalind_gap_out.txt', 'w') as outfile:
        outfile.write('\n'.join(alignment))

    print('-'*37 + 'ANSWER' + '-'*37)
    with open('output/rosalind_gap_out.txt', 'r') as answer:
        print(answer.read())

Esempio n. 30

0

Mostra file

def main():
    sequences = list(
        parse_fasta('problem_datasets/rosalind_splc.txt').values())
    rna = max(sequences, key=len)
    introns = [i for i in sequences if i != rna]

    spliced = splice_RNA(rna, introns)
    peptide = translate(spliced)

    with open('output/rosalind_splc_out.txt', 'w') as outfile:
        outfile.write(peptide)

Esempio n. 31

0

Mostra file

def main():
    # Extract sequences from a fasta file.
    seqs = parse_fasta('problem_datasets/rosalind_long.txt')
    
    # Find the shortest superstring.
    answer = shortest_contig(seqs)
    
    # Write the answer.
    open('output/rosalind_long_out.txt', 'w').write(answer)

    # Optional: Print the length of the superstring.
    print('Shortest superstring is %i nucleotides long.' % len(answer))

Esempio n. 32

0

Mostra file

File: rosalind_SIMS.py Progetto: sdwfrost/Rosalind

def main():
    # Read in the two sequences.
    s, t = parse_fasta('problem_datasets/rosalind_sims.txt')
    
    # Get the alignment.
    alignment = fitting_alignment(s, t)

    # Save the answer.
    with open('output/rosalind_sims_out.txt', 'w') as outfile:
        outfile.write('\n'.join(alignment))

    # Optional: Print the alignment score.
    print('Optimal fitting alignment score =', alignment[0])

Esempio n. 33

0

Mostra file

File: rosalind_OAP.py Progetto: sdwfrost/Rosalind

def main():
    # Read in the two strings.
    s, t = parse_fasta('problem_datasets/rosalind_oap.txt')

    # Find the alignment.
    alignment = overlap_align(s, t)

    # Output the answer.
    with open('output/rosalind_oap_out.txt', 'w') as outfile:
        outfile.write('\n'.join(alignment))

    # Optional: Print the max alignment score.
    print('Maximum alignment score =', alignment[0])

Esempio n. 34

0

Mostra file

File: rosalind_SPLC.py Progetto: sdwfrost/Rosalind

def main():
    sequences = parse_fasta('problem_datasets/rosalind_splc.txt')
    rna = max(sequences, key=len)
    introns = [i for i in sequences if i != rna]

    spliced = splice_RNA(rna, introns)
    peptide = translate(spliced)

    if peptide == '':
        print('No exon found.')
    else:
        with open('output/rosalind_splc_out.txt', 'w') as outfile:
            outfile.write(peptide)

Esempio n. 35

0

Mostra file

File: rosalind_profileConsensus.py Progetto: nate-d-olson/CBBG_Rosalind_Bioinf

def main(filename):
    dat = parse_fasta(filename)
    profile = dna_profile(dat.values())
    print profile_consensus(profile)
    print_profile(profile)

Esempio n. 36

0

Mostra file

File: rosalind_KMP.py Progetto: Davo36/Rosalind-1

def main():
    s = parse_fasta('problem_datasets/rosalind_kmp.txt')
    
    with open('output/rosalind_kmp_out.txt', 'w') as outfile:
        outfile.write(' '.join(map(str, failure_array(s))))

Esempio n. 37

0

Mostra file

File: rosalind_LCSQ.py Progetto: ywang931030/Rosalind

def main():
    strings = list(parse_fasta('problem_datasets/rosalind_lcsq.txt').values())
    seq = longest_sub(strings[0], strings[1])

    with open('output/rosalind_lcsq_out.txt', 'w') as outfile:
        outfile.write(seq)

Esempio n. 38

0

Mostra file

File: rosalind_OSYM.py Progetto: sdwfrost/Rosalind

def main():
    # Get the sequences from the .txt file.
    s, t = parse_fasta('problem_datasets/rosalind_osym.txt')
    
    # Compute the maximum alignment score, and the sum of all alignment scores.
    print('\n'.join(map(str, align_to_symbols(s, t))))

Esempio n. 39

0

Mostra file

def main():
    # Read in the two input strings.
    s, t = parse_fasta('problem_datasets/rosalind_ctea.txt')

    # Print the number of optimal alignments (modulo 2^27 - 1).
    print(count_alignments(s, t))

Esempio n. 40

0

Mostra file

File: rosalind_LCSM.py Progetto: ywang931030/Rosalind

def main():
    sequences = list(parse_fasta('problem_datasets/rosalind_lcsm.txt').values())

    answer = longest_motif(sequences)
    print(answer)

Esempio n. 41

0

Mostra file

File: rosalind_TRAN.py Progetto: Davo36/Rosalind-1

def main():
    s1, s2 = parse_fasta('problem_datasets/rosalind_tran.txt')
    
    print(pointMutations(s1, s2))

Esempio n. 42

0

Mostra file

File: rosalind_EDIT.py Progetto: sdwfrost/Rosalind

def main():
    s, t = parse_fasta('problem_datasets/rosalind_edit.txt')

    print(edit_dist(s, t))

Esempio n. 43

0

Mostra file

File: rosalind_GC.py Progetto: Davo36/Rosalind-1

def main():
    fastas = parse_fasta('problem_datasets/rosalind_gc.txt', no_id=False)
    max_h, max_gc = compute_gc(fastas)
    
    print(max_h, '\n', '%.6f' % max_gc, sep='')

Esempio n. 44

0

Mostra file

def main():
    s, t = parse_fasta('problem_datasets/rosalind_mgap.txt')

    print(max_global_align_gaps(s, t))

Esempio n. 45

0

Mostra file

File: rosalind_GRPH.py Progetto: sdwfrost/Rosalind

def main():
    dataset = parse_fasta('problem_datasets/rosalind_grph.txt', no_id=False)

    with open('output/rosalind_grph_out.txt', 'w') as outfile:
        for line in overlap_seqs(dataset):
            outfile.write(line + '\n')

Esempio n. 46

0

Mostra file

def main():
    fastas = parse_fasta('problem_datasets/rosalind_gc.txt')
    max_h, max_gc = compute_gc(fastas)
    print(max_h, '\n', '%.6f' % max_gc, sep='')

Esempio n. 47

0

Mostra file

File: rosalind_TRAN.py Progetto: sdwfrost/Rosalind

def main():
    s1, s2 = parse_fasta('problem_datasets/rosalind_tran.txt')

    print(pointMutations(s1, s2))

Esempio n. 48

0

Mostra file

File: rosalind_profileConsensus.py Progetto: nate-d-olson/CBBG_Rosalind_Bioinf

def main(filename):
    dat = parse_fasta(filename)
    profile = dna_profile(dat.values())
    print profile_consensus(profile)
    print_profile(profile)

Esempio n. 49

0

Mostra file

def main(filename):
    dat = parse_fasta(filename)
    for i in find_max_gc(dat):
        print i

Esempio n. 50

0

Mostra file

def main():
    strings = list(parse_fasta('problem_datasets/rosalind_lcsq.txt').values())
    seq = longest_sub(strings[0], strings[1])

    with open('output/rosalind_lcsq_out.txt', 'w') as outfile:
        outfile.write(seq)

Esempio n. 51

0

Mostra file

File: rosalind_GLOB.py Progetto: Davo36/Rosalind-1

def main():
    s, t = parse_fasta('problem_datasets/rosalind_glob.txt')
    max_score = global_align(s, t, BLOSUM62(), -5)
    
    print(max_score)

Esempio n. 52

0

Mostra file

File: rosalind_GRPH.py Progetto: Davo36/Rosalind-1

def main():
    dataset = parse_fasta('problem_datasets/rosalind_grph.txt', no_id=False)
    
    with open('output/rosalind_grph_out.txt', 'w') as outfile:
        for line in overlap_seqs(dataset):
            outfile.write(line + '\n')

Esempio n. 53

0

Mostra file

def main():
    sequences = list(
        parse_fasta('problem_datasets/rosalind_lcsm.txt').values())

    answer = longest_motif(sequences)
    print(answer)

Esempio n. 54

0

Mostra file

File: rosalind_MMCH.py Progetto: Davo36/Rosalind-1

def main():
    s = parse_fasta('problem_datasets/rosalind_mmch.txt')

    print(max_matches(s))

Esempio n. 55

0

Mostra file

File: rosalind_GCON.py Progetto: sdwfrost/Rosalind

def main():
    s, t = parse_fasta('problem_datasets/rosalind_gcon.txt')
    max_score = global_align(s, t, BLOSUM62(), -5)

    print(max_score)