def orient_subreads(self): """Find orientation of subreads with respect to consensus sequence. :returns: `medaka.align.Alignment` s of subreads to consensus. """ # TODO: use a profile here # TODO: refactor with align_to_template self._orient = [] alignments = [] for sr in self.subreads: rc_seq = medaka.common.reverse_complement(sr.seq) result_fwd = parasail.sw_trace_striped_16( sr.seq, self.consensus, 8, 4, parasail.dnafull) result_rev = parasail.sw_trace_striped_16( rc_seq, self.consensus, 8, 4, parasail.dnafull) is_fwd = result_fwd.score > result_rev.score self._orient.append(is_fwd) result = result_fwd if is_fwd else result_rev seq = sr.seq if is_fwd else rc_seq if result.cigar.beg_ref >= result.end_ref or \ result.cigar.beg_query >= result.end_query: # unsure why this can happen continue rstart, cigar = medaka.align.parasail_to_sam(result, seq) flag = 0 if is_fwd else 16 aln = Alignment( 'consensus_{}'.format(self.name), sr.name, flag, rstart, seq, cigar) alignments.append(aln) return alignments
def align_to_template(self, template, template_name): """Align subreads to a template sequence using Smith-Waterman. :param template: sequence to which to align subreads. :param template_name: name of template sequence. :returns: `Alignment` tuples. """ self.initialize() alignments = [] for orient, sr in zip(self._orient, self.subreads): if orient: seq = sr.seq else: seq = medaka.common.reverse_complement(sr.seq) result = parasail.sw_trace_striped_16( seq, template, 8, 4, parasail.dnafull) if result.cigar.beg_ref >= result.end_ref or \ result.cigar.beg_query >= result.end_query: # unsure why this can happen continue rstart, cigar = medaka.align.parasail_to_sam(result, seq) flag = 0 if orient else 16 aln = Alignment(template_name, sr.name, flag, rstart, seq, cigar) alignments.append(aln) return alignments