def recursive_align(seqsA, seqsB, depth=0, max_seqs=1000): #print depth, len(seqsA), len(seqsB) if len(seqsA) > max_seqs: print 'recursingA', len(seqsA), depth seqsA = recursive_align(*split_seqs(seqsA), depth=depth+1, max_seqs=max_seqs) elif any(len(seqsA[0][1]) != len(s) for _, s in seqsA): print 'aligningA', len(seqsA), depth seqsA = call_muscle(seqsA) else: print 'FinishedA', depth if len(seqsB) > max_seqs: print 'recursingB', len(seqsB), depth seqsB = recursive_align(*split_seqs(seqsB), depth=depth+1, max_seqs=max_seqs) elif any(len(seqsB[0][1]) != len(s) for _, s in seqsB): print 'aligningB', len(seqsB), depth seqsB = call_muscle(seqsB) else: print 'FinishedB', depth if len(seqsA[0][1]) == len(seqsB[0][1]): print 'Easy join', depth return seqsA + seqsB else: print 'Hard join', depth return call_muscle(seqsA + seqsB)
def align_seq_ser(seq_series): nseqs = [(i, s) for i, s in zip(seq_series.index, seq_series.values)] nseqs += [('hxb2', hxb2_ltr)] daln = dict(call_muscle(nseqs)) aln = [daln[str(s)] for s, _ in nseqs] aln_ser = Series(aln[:-1], seq_series.index) return aln_ser
def get_wanted_seq_cols(seq_series): nseqs = [('test', seq_series.values[0]), ('hxb2', hxb2_ltr)] daln = dict(call_muscle(nseqs)) out = dict([(str(x), None) for x in wanted_seq_cols]) scols = set(wanted_seq_cols) hxb2pos = 0 for hxb2_l, test_l in zip(daln['hxb2'], daln['test']): if hxb2_l != '-': hxb2pos += 1 #1-based! if hxb2pos in scols: out[str(hxb2pos)] = test_l out_ser = Series(out) return out_ser
def get_wanted_seq_cols(seq_series): nseqs = [(i, s) for i, s in zip(seq_series.index, seq_series.values)] nseqs += [('hxb2', hxb2_ltr)] daln = dict(call_muscle(nseqs)) aln = [daln[str(s)] for s, _ in nseqs] outs = [[] for _ in range(len(aln)-1)] hxb2pos = 0 for tup in zip(*aln): if tup[-1] != '-': hxb2pos += 1 #1-based! if hxb2pos in wanted_seq_cols: for out, let in zip(outs, tup): out.append(let) out_ser = Series(outs, seq_series.index) return out_ser
#from Bio import SeqIO from Bio.SeqIO.AbiIO import AbiIterator files = glob.glob('../Wigdahl Trace files/2:11:11/*.ab1') seqs = [] for f in files: rec = AbiIterator(open(f, mode = 'rb'), trim = True).next() seqs.append( (rec.id, rec.seq.tostring()) ) # <codecell> !/home/will/staden-2.0.0b9.x86_64/bin/convert_trace --help # <codecell> res = call_muscle(seqs) with open('align_data.fasta', 'w') as handle: fasta_writer(handle, res) # <codecell> from HIVTransTool import process_seqs results = list(process_seqs(seqs[:50], extract_regions = True, known_names = 50)) # <codecell> for row in results: if row['RegionName'] == 'LTR5': print row['Name'], row['QueryNuc']