Example #1
0
def recursive_align(seqsA, seqsB, depth=0, max_seqs=1000):
    
    #print depth, len(seqsA), len(seqsB)
    if len(seqsA) > max_seqs:
        print 'recursingA', len(seqsA), depth
        seqsA = recursive_align(*split_seqs(seqsA), 
                                depth=depth+1, max_seqs=max_seqs)
    elif any(len(seqsA[0][1]) != len(s) for _, s in seqsA):
        print 'aligningA', len(seqsA), depth
        seqsA = call_muscle(seqsA)
    else:
        print 'FinishedA', depth
        
    if len(seqsB) > max_seqs:
        print 'recursingB', len(seqsB), depth
        seqsB = recursive_align(*split_seqs(seqsB), 
                                depth=depth+1, max_seqs=max_seqs)
    elif any(len(seqsB[0][1]) != len(s) for _, s in seqsB):
        print 'aligningB', len(seqsB), depth
        seqsB = call_muscle(seqsB)
    else:
        print 'FinishedB', depth
    
    if len(seqsA[0][1]) == len(seqsB[0][1]):
        print 'Easy join', depth
        return seqsA + seqsB
    else:
        print 'Hard join', depth
        return call_muscle(seqsA + seqsB)
def align_seq_ser(seq_series):
    
    nseqs = [(i, s) for i, s in zip(seq_series.index, seq_series.values)]
    nseqs += [('hxb2', hxb2_ltr)]
    daln = dict(call_muscle(nseqs))
    aln = [daln[str(s)] for s, _ in nseqs]
    aln_ser = Series(aln[:-1], seq_series.index)
    return aln_ser
Example #3
0
def get_wanted_seq_cols(seq_series):
    
    nseqs = [('test', seq_series.values[0]),
             ('hxb2', hxb2_ltr)]
    daln = dict(call_muscle(nseqs))
    out = dict([(str(x), None) for x in wanted_seq_cols])
    scols = set(wanted_seq_cols)
    hxb2pos = 0
    for hxb2_l, test_l in zip(daln['hxb2'], daln['test']):
        if hxb2_l != '-':
            hxb2pos += 1 #1-based!
            
        if hxb2pos in scols:
            out[str(hxb2pos)] = test_l
    
    out_ser = Series(out)
    return out_ser
def get_wanted_seq_cols(seq_series):
    
    nseqs = [(i, s) for i, s in zip(seq_series.index, seq_series.values)]
    nseqs += [('hxb2', hxb2_ltr)]
    daln = dict(call_muscle(nseqs))
    aln = [daln[str(s)] for s, _ in nseqs]
    outs = [[] for _ in range(len(aln)-1)]
    hxb2pos = 0
    for tup in zip(*aln):
        if tup[-1] != '-':
            hxb2pos += 1 #1-based!
        if hxb2pos in wanted_seq_cols:
            for out, let in zip(outs, tup):
                out.append(let)
    
    out_ser = Series(outs, seq_series.index)
    return out_ser
#from Bio import SeqIO
from Bio.SeqIO.AbiIO import AbiIterator
files = glob.glob('../Wigdahl Trace files/2:11:11/*.ab1')
seqs = []
for f in files:
    rec = AbiIterator(open(f, mode = 'rb'), trim = True).next()
    seqs.append( (rec.id, rec.seq.tostring()) )

# <codecell>

!/home/will/staden-2.0.0b9.x86_64/bin/convert_trace --help

# <codecell>

res = call_muscle(seqs)
with open('align_data.fasta', 'w') as handle:
    fasta_writer(handle, res)

# <codecell>

from HIVTransTool import process_seqs

results = list(process_seqs(seqs[:50], extract_regions = True, known_names = 50))

# <codecell>

for row in results:
    if row['RegionName'] == 'LTR5':
        print row['Name'], row['QueryNuc']