def get_alignment_parasail(reference_genome, input_genome): # the dna full matrix supports ambiguity codes, although "N"s are not given free mismatches as we might like # the alignments appear good enough for our purpose however result = parasail.nw_trace_striped_32(input_genome.sequence, reference_genome.sequence, 10, 1, parasail.dnafull) traceback = result.traceback return traceback.ref, traceback.comp, traceback.query
def build_envelope(len1, seq1, path1, len2, seq2, path2, padding=15): # needleman-wunsch alignment with constant gap penalty. aln = parasail.nw_trace_striped_32(seq2, seq1, 2, 2, parasail.dnafull) # pair up positions alignment = np.column_stack([ np.cumsum([x != '-' for x in aln.traceback.ref]) - 1, np.cumsum([x != '-' for x in aln.traceback.query]) - 1 ]) path_range1 = np.column_stack([path1, path1[1:] + [len1]]) path_range2 = np.column_stack([path2, path2[1:] + [len2]]) envelope = np.full((len1, 2), -1, dtype=int) for idx1, idx2 in alignment.clip(0): st_1, en_1 = path_range1[idx1] st_2, en_2 = path_range2[idx2] for idx in range(st_1, en_1): if st_2 < envelope[idx, 0] or envelope[idx, 0] < 0: envelope[idx, 0] = st_2 if en_2 > envelope[idx, 1] or envelope[idx, 1] < 0: envelope[idx, 1] = en_2 # add a little padding to ensure some overlap envelope[:, 0] = envelope[:, 0] - padding envelope[:, 1] = envelope[:, 1] + padding envelope = np.clip(envelope, 0, len2) prev_end = 0 for i in range(envelope.shape[0]): if envelope[i, 0] > envelope[i, 1]: envelope[i, 0] = 0 if envelope[i, 0] > prev_end: envelope[i, 0] = prev_end prev_end = envelope[i, 1] return envelope.astype(np.uint64)
def get_alignment(reference_genome, input_genome): # the dna full matrix supports ambiguity codes, although "N"s are not given free mismatches as we might like # the alignments appear good enough for our purpose however result = parasail.nw_trace_striped_32(input_genome.sequence, reference_genome.sequence, 10, 1, parasail.dnafull) traceback = result.traceback columns = 120 position_map = list() reference_index = 0 input_index = 0 for (ref, query) in zip(traceback.ref, traceback.query): if ref != '-' and query != '-': position_map.append((reference_index, input_index)) if ref != '-': reference_index += 1 if query != '-': input_index += 1 return position_map