def accuracy(ref, seq, balanced=False, min_coverage=0.0): """ Calculate the accuracy between `ref` and `seq` """ alignment = parasail.sw_trace_striped_32(seq, ref, 8, 4, parasail.dnafull) counts = defaultdict(int) q_coverage = len(alignment.traceback.query) / len(seq) r_coverage = len(alignment.traceback.ref) / len(ref) if r_coverage < min_coverage: return 0.0 _, cigar = parasail_to_sam(alignment, seq) for count, op in re.findall(split_cigar, cigar): counts[op] += int(count) if balanced: accuracy = (counts['='] - counts['I']) / (counts['='] + counts['X'] + counts['D']) else: accuracy = counts['='] / (counts['='] + counts['I'] + counts['X'] + counts['D']) return accuracy * 100
def update(self, annotation, prediction): alignment = parasail.sw_trace_striped_32(prediction.label, annotation.label, 8, 4, parasail.dnafull) counts = defaultdict(int) _, cigar = self._parasail_to_sam(alignment, prediction.label) r_coverage = len(alignment.traceback.ref) / len(annotation.label) if r_coverage < self.min_coverage: self.accuracy.append(0.0) return 0.0 for count, op in re.findall(split_cigar, cigar): counts[op] += int(count) if self.balanced: accuracy = (counts['='] - counts['I']) / ( counts['='] + counts['X'] + counts['D']) else: accuracy = counts['='] / (counts['='] + counts['I'] + counts['X'] + counts['D']) self.accuracy.append(accuracy) return accuracy
def print_alignment(ref, seq): """ Print the alignment between `ref` and `seq` """ alignment = parasail.sw_trace_striped_32(ref, seq, 8, 4, parasail.dnafull) print(alignment.traceback.query) print(alignment.traceback.comp) print(alignment.traceback.ref) print(" Score=%s" % alignment.score) return alignment.score
def parasail_alignment(query, ref): """Run a Smith-Waterman alignment between two sequences. :param query: the query sequence. :param ref: the reference sequence. :returns: reference start co-ordinate, cigar string """ result = parasail.sw_trace_striped_32(query, ref, 5, 3, parasail.dnafull) rstart, cigar = medaka.smolecule.parasail_to_sam(result, query) return rstart, cigar
def align_read_to_haps(read, haps, g_open=5, g_ext=3, matrix=parasail.dnafull): """Get trimmed reads without reference. :param read: str trimmed read sequence :param haps: (padded ref, padded alt 1, ... padded alt n) :param g_open: int, gap opening penalty :param g_ext: int, gap extend penalty :param matrix: int matrix shape (16, 16) substitution matrix. :returns: [int, scores] """ scores = [] for i, hap in enumerate(haps): algn = parasail.sw_trace_striped_32(read, hap, g_open, g_ext, matrix) scores.append(algn.score) return scores
def accuracy(ref, seq, balanced=False): """ Calculate the accuracy between `ref` and `seq` """ alignment = parasail.sw_trace_striped_32(ref, seq, 8, 4, parasail.dnafull) counts = defaultdict(int) _, cigar = parasail_to_sam(alignment, seq) for count, op in re.findall(split_cigar, cigar): counts[op] += int(count) if balanced: accuracy = (counts['='] - counts['I']) / (counts['='] + counts['X'] + counts['D']) else: accuracy = counts['='] / (counts['='] + counts['I'] + counts['X'] + counts['D']) return accuracy * 100
def pair_align(reference, query, query_name, subs_mat, params): """ Perform pairwise local alignment using parsail-python """ aln = parasail.sw_trace_striped_32(query, reference, params['gap_open'], params['gap_extend'], subs_mat) return process_alignment(aln, query, query_name, params)
def align(r, h, go=5, ge=3, matrix=parasail.dnafull): return parasail.sw_trace_striped_32(r, h, go, ge, matrix).score