Ejemplo n.º 1
0
def truncate_rev_primers(fasta_f,
                         output_fp,
                         reverse_primers,
                         truncate_option='truncate_only',
                         primer_mismatches=2):
    """ Locally aligns reverse primers, trucates or removes seqs
    
    fasta_f:  open file of fasta file
    output_fp: open filepath to write truncated fasta to
    reverse_primers: dictionary of SampleID:reverse primer sequence
    truncate_option: either truncate_only, truncate_remove
    primer_mismatches: number of allowed primer mismatches
    """
    
    log_data = {
     'sample_id_not_found':0,
     'reverse_primer_not_found':0,
     'total_seqs':0,
     'seqs_written':0
     }
    
    for label, seq in MinimalFastaParser(fasta_f):
        curr_label = label.split('_')[0]
        
        log_data['total_seqs'] += 1
        
        # Check fasta label for valid SampleID, if not found, just write seq
        try:
            curr_rev_primer = reverse_primers[curr_label]
        except KeyError:
            log_data['sample_id_not_found'] += 1
            output_fp.write('>%s\n%s\n' % (label, seq))
            log_data['seqs_written'] += 1
            continue
        
        mm_tests = {}
        for rev_primer in curr_rev_primer: 
        
            rev_primer_mm, rev_primer_index =\
             local_align_primer_seq(rev_primer, seq)
             
            mm_tests[rev_primer_mm] = rev_primer_index
            
        rev_primer_mm = min(mm_tests.keys())
        rev_primer_index = mm_tests[rev_primer_mm]
         
        if rev_primer_mm > primer_mismatches:
            if truncate_option == "truncate_remove":
                log_data['reverse_primer_not_found'] += 1
            else:
                log_data['reverse_primer_not_found'] += 1
                log_data['seqs_written'] += 1
                output_fp.write('>%s\n%s\n' % (label, seq))
        else:
            # Check for zero seq length after truncation, will not write seq
            if rev_primer_index > 0:
                log_data['seqs_written'] += 1
                output_fp.write('>%s\n%s\n' % (label, seq[0:rev_primer_index]))
            
    return log_data
Ejemplo n.º 2
0
def truncate_rev_primers(fasta_f,
                         output_fp,
                         reverse_primers,
                         truncate_option='truncate_only',
                         primer_mismatches=2):
    """ Locally aligns reverse primers, trucates or removes seqs

    fasta_f:  open file of fasta file
    output_fp: open filepath to write truncated fasta to
    reverse_primers: dictionary of SampleID:reverse primer sequence
    truncate_option: either truncate_only, truncate_remove
    primer_mismatches: number of allowed primer mismatches
    """

    log_data = {
        'sample_id_not_found': 0,
        'reverse_primer_not_found': 0,
        'total_seqs': 0,
        'seqs_written': 0
    }

    for label, seq in parse_fasta(fasta_f):
        curr_label = label.split('_')[0]

        log_data['total_seqs'] += 1

        # Check fasta label for valid SampleID, if not found, just write seq
        try:
            curr_rev_primer = reverse_primers[curr_label]
        except KeyError:
            log_data['sample_id_not_found'] += 1
            output_fp.write('>%s\n%s\n' % (label, seq))
            log_data['seqs_written'] += 1
            continue

        mm_tests = {}
        for rev_primer in curr_rev_primer:

            rev_primer_mm, rev_primer_index =\
                local_align_primer_seq(rev_primer, seq)

            mm_tests[rev_primer_mm] = rev_primer_index

        rev_primer_mm = min(mm_tests.keys())
        rev_primer_index = mm_tests[rev_primer_mm]

        if rev_primer_mm > primer_mismatches:
            if truncate_option == "truncate_remove":
                log_data['reverse_primer_not_found'] += 1
            else:
                log_data['reverse_primer_not_found'] += 1
                log_data['seqs_written'] += 1
                output_fp.write('>%s\n%s\n' % (label, seq))
        else:
            # Check for zero seq length after truncation, will not write seq
            if rev_primer_index > 0:
                log_data['seqs_written'] += 1
                output_fp.write('>%s\n%s\n' % (label, seq[0:rev_primer_index]))

    return log_data
Ejemplo n.º 3
0
def strip_primer(seqs, primer, maxmismatch=0, keep_primer=False):
    '''strips 3 prime primer from sequences in fasta file and returns MinimalFastaParser
    formatted arrays for stripped and not stripped sequences'''
    nostripped = []
    stripped = []
    pri = primer.upper()
    for head, seq in seqs:
        RNA = False
        seq = seq.upper()
        if 'U' in seq:
            seq = seq.replace('U', 'T')
            RNA = True
        #code adapted from truncate_reverse_primers.py in qiime
        rev_primer_mm, rev_primer_index =\
            local_align_primer_seq(pri, seq)
        if rev_primer_mm > maxmismatch:
            nostripped.append((head, seq))
            continue
        if keep_primer:
            seqnew = seq[:rev_primer_index + len(primer)]
        else:
            seqnew = seq[:rev_primer_index]
        if RNA:
            seqnew = seqnew.replace('T', 'U')
        stripped.append((head, seqnew))
    #end for
    return stripped, nostripped