Beispiel #1
0
def check_consensus_length(data_folder, adaID, fragment, VERBOSE=0):
    '''Check consensus length, and if too short or absent complain'''
    from Bio import AlignIO
    ali_fn = get_reference_consensus_ali_filename(data_folder, adaID, fragment)
    if not os.path.isfile(ali_fn):
        if VERBOSE >= 2:
            print 'Consensus alignment to reference not found', adaID, fragment
        return False

    ali = AlignIO.read(ali_fn, 'fasta')
    len_ref = len(ali[0].seq.ungap('-'))
    len_cons = len(ali[1].seq.ungap('-'))
    if len_cons < len_ref - 200:
        if VERBOSE >= 2:
            print 'Consensus alignment to reference too short: ref', len_ref, 'cons:', len_cons
        return False
    elif len_cons > len_ref + 200:
        if VERBOSE >= 2:
            print 'Consensus alignment to reference too long: ref', len_ref, 'cons:', len_cons
        return False

    if VERBOSE >= 2:
        print 'Consensus checked, has approximately the right length: ref', len_ref, 'cons:', len_cons
    return True
def check_consensus_length(data_folder, adaID, fragment, VERBOSE=0):
    '''Check consensus length, and if too short or absent complain'''
    from Bio import AlignIO
    ali_fn = get_reference_consensus_ali_filename(data_folder, adaID, fragment)
    if not os.path.isfile(ali_fn):
        if VERBOSE >= 2:
            print 'Consensus alignment to reference not found', adaID, fragment
        return False

    ali = AlignIO.read(ali_fn, 'fasta')
    len_ref = len(ali[0].seq.ungap('-'))
    len_cons = len(ali[1].seq.ungap('-'))
    if len_cons < len_ref - 200:
        if VERBOSE >= 2:
            print 'Consensus alignment to reference too short: ref', len_ref, 'cons:', len_cons
        return False
    elif len_cons > len_ref + 200:
        if VERBOSE >= 2:
            print 'Consensus alignment to reference too long: ref', len_ref, 'cons:', len_cons
        return False

    if VERBOSE >= 2:
        print 'Consensus checked, has approximately the right length: ref', len_ref, 'cons:', len_cons
    return True
Beispiel #3
0
            if VERBOSE >= 2:
                print ali[:, :30]
                print ali[:, -30:]
                print 'Lenghts: ref', len(refseq), 'consensus', len(
                    consensusseq)
                len_ali = ali.get_alignment_length()
                n_diff = sum(ali[0, i] != ali[1, i] for i in xrange(len_ali))
                print 'Differences from ref:', n_diff, '(' + '{:3.1f}'.format(
                    100.0 * n_diff / len_ali) + '%)'

            # Ungap consensus
            consensusseq = SeqRecord(ali[1].seq, id=name, name=name)
            if '-' in consensusseq:
                consensusseq.seq = consensusseq.seq.ungap('-')

            # Write output
            outfile = get_consensus_filename(data_folder,
                                             adaID,
                                             frag_out,
                                             trim_primers=True)
            SeqIO.write(consensusseq, outfile, 'fasta')

            AlignIO.write(
                ali,
                get_reference_consensus_ali_filename(data_folder, adaID,
                                                     fragment), 'fasta')

            if store_allele_counts:
                allele_counts.dump(
                    get_allele_counts_filename(data_folder, adaID, frag_out))
            ali = align_muscle(refseq, consensusseq, sort=True)

            if ali[0][-1] == '-':
                start_nongap = len(ali[0]) - len(ali[0].seq.lstrip('-'))
                end_nongap = len(ali[0].seq.rstrip('-'))
                ali = ali[:, start_nongap: end_nongap]

            if VERBOSE >= 2:
                print ali[:, :30]
                print ali[:, -30:]
                print 'Lenghts: ref', len(refseq), 'consensus', len(consensusseq)
                len_ali = ali.get_alignment_length()
                n_diff = sum(ali[0, i] != ali[1, i] for i in xrange(len_ali))
                print 'Differences from ref:', n_diff, '('+'{:3.1f}'.format(100.0 * n_diff / len_ali)+'%)'

            # Ungap consensus
            consensusseq = SeqRecord(ali[1].seq, id=name, name=name)
            if '-' in consensusseq:
                consensusseq.seq = consensusseq.seq.ungap('-')

            # Write output
            outfile = get_consensus_filename(data_folder, adaID, frag_out, trim_primers=True)
            SeqIO.write(consensusseq, outfile, 'fasta')

            AlignIO.write(ali, get_reference_consensus_ali_filename(data_folder, adaID, fragment), 'fasta')

            if store_allele_counts:
                allele_counts.dump(get_allele_counts_filename(data_folder, adaID, frag_out))