def add_sequence(self, label, seq, align_to=None): """Append another sequence, aligned to the sequence 'contig' if it exists. If there is no 'contig', then it starts at 0.""" if align_to is None or align_to not in self: self[label] = aflist(0, seq, '-') return self template = ''.join(self[align_to].values) ((contig_offset, aligned_contig), (seq_offset, aligned_seq)) = align.ssearch36(template, seq) aligned1 = aflist(self[align_to].offset, aligned_contig, gap='-') aligned2 = aflist(self[align_to].offset + seq_offset - contig_offset, aligned_seq, '-') assem, al1, al2 = conform_gaps(self, align_to, aligned1, aligned2) assem[label] = al2 return assem
def assemble(seq1, conf1, traces1, seq2, conf2, traces2): """Combine two reads into a contig. Returns an Assembly with the reads (with used sections marked), and a string specifying fate: 'both', 'strand 1', 'strand 2', 'none'. If the fate is not 'none', then there will be a key 'contig' in the Assembly. """ assert len(seq1) == len(conf1) assert len(seq2) == len(conf2) # Pull out high quality segments hqint1, hqint2 = highqualityinterval(conf1), highqualityinterval(conf2) segment1 = seq1[hqint1.left() : hqint1.right()] if hqint1.isproper() else "" segment2 = seq2[hqint2.left() : hqint2.right()] if hqint2.isproper() else "" # Align them # If you were going to add assembly against a template, the major # change would be to write a function that took segment1, # segment2, and template, and returned a third argument # (templateoffset, rawaltemplate), then called added the template # in the Assembly lines below. The easiest path would probably be # to make the template argument to assemble optional, defaulting # to None, and call ssearch36 if it is None. # The function to do the templated alignment would align both # segments against the template, and then go through the two # alignments to combine them (inserting -'s appropriately, etc.). # For a very similar algorithm that may help in writing that, see assembly.conform_gaps. (offset1, rawalsegment1), (offset2, rawalsegment2) = align.ssearch36(segment1, segment2) alsegment1, alsegment2 = ( aflist(offset1, rawalsegment1, gap="-", trackclass="nucleotide"), aflist(offset2, rawalsegment2, gap="-", trackclass="nucleotide"), ) alhqint1 = ProperInterval(offset1, offset1 + alsegment1.width()) alhqint2 = ProperInterval(offset2, offset2 + alsegment2.width()) alseq1, alseq2 = extend(alsegment1, hqint1, aflist(0, seq1, "-")), extend(alsegment2, hqint2, aflist(0, seq2, "-")) alconf1, alconf2 = tracealong(conf1, alseq1), tracealong(conf2, alseq2) altraces1, altraces2 = ( tracealong(traces1, alseq1) if traces1 else None, tracealong(traces2, alseq2) if traces2 else None, ) for i, s in ( (alhqint1, alseq1), (alhqint1, alconf1), (alhqint1, altraces1), (alhqint2, alseq2), (alhqint2, alconf2), (alhqint2, altraces2), ): if s is None: continue if i.isempty(): s.appendfeature(interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)) else: if i.left() > s.left(): s.appendfeature(interval(neginf, i.left(), name="leftunused", red=0, green=0, blue=0, alpha=0.5)) if i.right() < s.right(): s.appendfeature(interval(i.right(), posinf, name="rightunused", red=0, green=0, blue=0, alpha=0.5)) assert alsegment1.width() == alconf1[alhqint1].width() assert altraces1 is None or alsegment1.width() == altraces1[alhqint1].width() assert alsegment2.width() == alconf2[alhqint2].width() assert altraces2 is None or alsegment2.width() == altraces2[alhqint2].width() contig = combine((alsegment1, alconf1[alhqint1]), (alsegment2, alconf2[alhqint2])) alconf1.setmeta("trackclass", "integer") alconf2.setmeta("trackclass", "integer") if altraces1: altraces1.setmeta("trackclass", "svg") if altraces2: altraces2.setmeta("trackclass", "svg") alseq1.setmeta("trackclass", "nucleotide") alseq2.setmeta("trackclass", "nucleotide") contig.setmeta("trackclass", "nucleotide") if alsegment1.width() != 0 and alsegment2.width() != 0: # both strands a = Assembly() if altraces1: a["traces 1"] = altraces1 a["confidences 1"] = alconf1 a["bases 1"] = alseq1 if altraces2: a["traces 2"] = altraces2 a["confidences 2"] = alconf2 a["bases 2"] = alseq2 a["contig"] = contig return a.toorigin() elif alsegment1.width() != 0: # strand 1 only a = Assembly() if altraces2: a["traces 2"] = ( ProperList( 0, traces2, gap=None, trackclass="svg", features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)], ) >> alconf1.left() ) a["confidences 2"] = ( ProperList( 0, conf2, gap=None, trackclass="integer", features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)], ) >> alconf1.left() ) a["bases 2"] = ( ProperList( 0, seq2, gap="-", trackclass="nucleotide", features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)], ) >> alconf1.left() ) if altraces1: a["traces 1"] = altraces1 a["confidences 1"] = alconf1 a["bases 1"] = alseq1 a["contig"] = contig return a.toorigin() elif alsegment2.width() != 0: # strand 2 only a = Assembly() if altraces1: a["traces 1"] = ( ProperList( 0, traces1, gap=None, trackclass="svg", features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)], ) >> alconf2.left() ) a["confidences 1"] = ( ProperList( 0, conf1, gap=None, trackclass="integer", features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)], ) >> alconf2.left() ) a["bases 1"] = ( ProperList( 0, seq1, gap="-", trackclass="nucleotide", features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)], ) >> alconf2.left() ) if altraces2: a["traces 2"] = altraces2 a["confidences 2"] = alconf2 a["bases 2"] = alseq2 a["contig"] = contig return a.toorigin() else: a = Assembly() if traces1: a["traces 1"] = ProperList( 0, traces1, gap=None, trackclass="svg", features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)], ) a["confidences 1"] = ProperList( 0, conf1, gap=None, trackclass="integer", features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)], ) a["bases 1"] = ProperList( 0, seq1, gap="-", trackclass="nucleotide", features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)], ) if traces2: a["traces 2"] = ProperList( 0, traces2, gap=None, trackclass="svg", features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)], ) a["confidences 2"] = ProperList( 0, conf2, gap=None, trackclass="integer", features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)], ) a["bases 2"] = ProperList( 0, seq2, gap="-", trackclass="nucleotide", features=[interval(neginf, posinf, name="unused", red=0, green=0, blue=0, alpha=0.5)], ) return a
def assemble(seq1, conf1, traces1, seq2, conf2, traces2): """Combine two reads into a contig. Returns an Assembly with the reads (with used sections marked), and a string specifying fate: 'both', 'strand 1', 'strand 2', 'none'. If the fate is not 'none', then there will be a key 'contig' in the Assembly. """ assert len(seq1) == len(conf1) assert len(seq2) == len(conf2) # Pull out high quality segments hqint1, hqint2 = highqualityinterval(conf1), highqualityinterval(conf2) segment1 = seq1[hqint1.left():hqint1.right()] if hqint1.isproper() else "" segment2 = seq2[hqint2.left():hqint2.right()] if hqint2.isproper() else "" # Align them # If you were going to add assembly against a template, the major # change would be to write a function that took segment1, # segment2, and template, and returned a third argument # (templateoffset, rawaltemplate), then called added the template # in the Assembly lines below. The easiest path would probably be # to make the template argument to assemble optional, defaulting # to None, and call ssearch36 if it is None. # The function to do the templated alignment would align both # segments against the template, and then go through the two # alignments to combine them (inserting -'s appropriately, etc.). # For a very similar algorithm that may help in writing that, see assembly.conform_gaps. (offset1, rawalsegment1), (offset2, rawalsegment2) = align.ssearch36(segment1, segment2) alsegment1, alsegment2 = aflist(offset1, rawalsegment1, gap='-', trackclass='nucleotide'), \ aflist(offset2, rawalsegment2, gap='-', trackclass='nucleotide') alhqint1 = ProperInterval(offset1, offset1 + alsegment1.width()) alhqint2 = ProperInterval(offset2, offset2 + alsegment2.width()) alseq1, alseq2 = extend(alsegment1, hqint1, aflist(0,seq1,'-')), \ extend(alsegment2, hqint2, aflist(0,seq2,'-')) alconf1, alconf2 = tracealong(conf1, alseq1), tracealong(conf2, alseq2) altraces1, altraces2 = tracealong(traces1, alseq1) if traces1 else None, \ tracealong(traces2, alseq2) if traces2 else None for i,s in (alhqint1, alseq1), (alhqint1, alconf1), (alhqint1, altraces1), \ (alhqint2, alseq2), (alhqint2, alconf2), (alhqint2, altraces2): if s is None: continue if i.isempty(): s.appendfeature( interval(neginf, posinf, name='unused', red=0, green=0, blue=0, alpha=0.5)) else: if i.left() > s.left(): s.appendfeature( interval(neginf, i.left(), name='leftunused', red=0, green=0, blue=0, alpha=0.5)) if i.right() < s.right(): s.appendfeature( interval(i.right(), posinf, name='rightunused', red=0, green=0, blue=0, alpha=0.5)) assert alsegment1.width() == alconf1[alhqint1].width() assert altraces1 is None or alsegment1.width( ) == altraces1[alhqint1].width() assert alsegment2.width() == alconf2[alhqint2].width() assert altraces2 is None or alsegment2.width( ) == altraces2[alhqint2].width() contig = combine((alsegment1, alconf1[alhqint1]), (alsegment2, alconf2[alhqint2])) alconf1.setmeta('trackclass', 'integer') alconf2.setmeta('trackclass', 'integer') if altraces1: altraces1.setmeta('trackclass', 'svg') if altraces2: altraces2.setmeta('trackclass', 'svg') alseq1.setmeta('trackclass', 'nucleotide') alseq2.setmeta('trackclass', 'nucleotide') contig.setmeta('trackclass', 'nucleotide') if alsegment1.width() != 0 and alsegment2.width() != 0: # both strands a = Assembly() if altraces1: a['traces 1'] = altraces1 a['confidences 1'] = alconf1 a['bases 1'] = alseq1 if altraces2: a['traces 2'] = altraces2 a['confidences 2'] = alconf2 a['bases 2'] = alseq2 a['contig'] = contig return a.toorigin() elif alsegment1.width() != 0: # strand 1 only a = Assembly() if altraces2: a['traces 2'] = ProperList(0, traces2, gap=None, trackclass='svg', features=[ interval(neginf, posinf, name='unused', red=0, green=0, blue=0, alpha=0.5) ]) >> alconf1.left() a['confidences 2'] = ProperList(0, conf2, gap=None, trackclass='integer', features=[ interval(neginf, posinf, name='unused', red=0, green=0, blue=0, alpha=0.5) ]) >> alconf1.left() a['bases 2'] = ProperList(0, seq2, gap='-', trackclass='nucleotide', features=[ interval(neginf, posinf, name='unused', red=0, green=0, blue=0, alpha=0.5) ]) >> alconf1.left() if altraces1: a['traces 1'] = altraces1 a['confidences 1'] = alconf1 a['bases 1'] = alseq1 a['contig'] = contig return a.toorigin() elif alsegment2.width() != 0: # strand 2 only a = Assembly() if altraces1: a['traces 1'] = ProperList(0, traces1, gap=None, trackclass='svg', features=[ interval(neginf, posinf, name='unused', red=0, green=0, blue=0, alpha=0.5) ]) >> alconf2.left() a['confidences 1'] = ProperList(0, conf1, gap=None, trackclass='integer', features=[ interval(neginf, posinf, name='unused', red=0, green=0, blue=0, alpha=0.5) ]) >> alconf2.left() a['bases 1'] = ProperList(0, seq1, gap='-', trackclass='nucleotide', features=[ interval(neginf, posinf, name='unused', red=0, green=0, blue=0, alpha=0.5) ]) >> alconf2.left() if altraces2: a['traces 2'] = altraces2 a['confidences 2'] = alconf2 a['bases 2'] = alseq2 a['contig'] = contig return a.toorigin() else: a = Assembly() if traces1: a['traces 1'] = ProperList(0, traces1, gap=None, trackclass='svg', features=[ interval(neginf, posinf, name='unused', red=0, green=0, blue=0, alpha=0.5) ]) a['confidences 1'] = ProperList(0, conf1, gap=None, trackclass='integer', features=[ interval(neginf, posinf, name='unused', red=0, green=0, blue=0, alpha=0.5) ]) a['bases 1'] = ProperList(0, seq1, gap='-', trackclass='nucleotide', features=[ interval(neginf, posinf, name='unused', red=0, green=0, blue=0, alpha=0.5) ]) if traces2: a['traces 2'] = ProperList(0, traces2, gap=None, trackclass='svg', features=[ interval(neginf, posinf, name='unused', red=0, green=0, blue=0, alpha=0.5) ]) a['confidences 2'] = ProperList(0, conf2, gap=None, trackclass='integer', features=[ interval(neginf, posinf, name='unused', red=0, green=0, blue=0, alpha=0.5) ]) a['bases 2'] = ProperList(0, seq2, gap='-', trackclass='nucleotide', features=[ interval(neginf, posinf, name='unused', red=0, green=0, blue=0, alpha=0.5) ]) return a