def find_variations_md(cigartuples, chromosome, pos, qual, seq, md, subsegments): """ Loops through read to find variant base. This function is used with BWA-MEM, Minimap2 and NGMLR bam files :param cigartuples: :param chromosome: :param pos: :param qual: :param seq: :param md: :param subsegments: """ global tmp_variants md_tag = re.split("\^\D+", md) read_cursor = 0 ref_cursor = 0 md_cursor = 0 md_string = '' for tuple in cigartuples: if tuple[0] == 4: read_cursor += tuple[1] if tuple[0] == 1: read_cursor += tuple[1] if tuple[0] == 2: md_cursor += 1 md_string = '' for deletion in range(tuple[1]): if (pos + 1 + ref_cursor) not in tmp_variants: tmp_variants[pos + 1 + ref_cursor] = v.Variant(chromosome, pos + 1 + ref_cursor) for segment in subsegments: if segment.pos <= (pos + 1 + ref_cursor) <= segment.end: tmp_variants[pos + 1 + ref_cursor].add_segment(segment.id, ['-']) break if segment.end < ref_cursor: del subsegments[subsegments.index(segment)] ref_cursor += 1 if tuple[0] == 0: if re.search("\D+", md_tag[md_cursor]): if md_string == '': for m in re.split("\D", md_tag[md_cursor]): if md_string != '': md_string += 'X' md_string += "=" * int(m) for seq_mismatch in re.finditer('X', md_string[:tuple[1]]): if (pos + 1 + ref_cursor) not in tmp_variants: tmp_variants[pos + 1 + ref_cursor] = v.Variant(chromosome, pos + 1 + ref_cursor) for segment in subsegments: if segment.pos <= (pos + 1 + ref_cursor) <= segment.end: tmp_variants[pos + 1 + ref_cursor].add_segment(segment.id, [seq[read_cursor + seq_mismatch.start()], qual[read_cursor + seq_mismatch.start()]]) break if segment.end < ref_cursor: del subsegments[subsegments.index(segment)] md_string = md_string[tuple[1]:] ref_cursor += tuple[1] read_cursor += tuple[1]
def find_SNPs(chromosome, snp_position): """ Looks for SNPs on given position in the genome using pileup. :param chromosome: :param snp_position: """ base_ratios = {'A': [0, 0], 'C': [0, 0], 'G': [0, 0], 'T': [0, 0], '=': [0, 0]} deletions = 0 total_n = 0 variant = v.Variant(chromosome, int(snp_position)) for pileupcolumn in F.pileup(chromosome, int(snp_position)-1, int(snp_position), truncate=True): for pileupread in pileupcolumn.pileups: if not keep_segment(pileupread.alignment, pileupread.alignment.query_alignment_length): continue clip, clip_2 = calculate_clip(pileupread.alignment) if pileupread.is_del: variant.add_segment([pileupread.alignment.reference_name, pileupread.alignment.reference_start, str(pileupread.alignment.query_name) + ";" + str(clip)], '-') deletions += 1 total_n += 1 if not pileupread.is_del and not pileupread.is_refskip: if pileupread.alignment.query_qualities[pileupread.query_position] >= NanoSV.opts_min_base_qual_ph: base_ratios[pileupread.alignment.query_sequence[pileupread.query_position]][0] += 1 else: base_ratios[pileupread.alignment.query_sequence[pileupread.query_position]][1] += 1 variant.add_segment([pileupread.alignment.reference_name, pileupread.alignment.reference_start, str(pileupread.alignment.query_name) + ";" + str(clip)], [pileupread.alignment.query_sequence[pileupread.query_position], pileupread.alignment.query_qualities[pileupread.query_position]]) total_n += 1 if deletions < (NanoSV.opts_max_deletions * total_n): haplotypes = sorted(base_ratios.items(), key=lambda x: sum(x[1]))[-2:] try: if haplotypes[0][1][0] / sum(haplotypes[0][1]) > NanoSV.opts_min_occurences_of_highq_var and haplotypes[1][1][0] / sum(haplotypes[1][1]) > NanoSV.opts_min_occurences_of_highq_var: if sum(haplotypes[0][1]) / (sum(haplotypes[1][1]) + sum(haplotypes[0][1])) > NanoSV.opts_min_occurences_of_var: bin = int(int(snp_position) / NanoSV.opts_variant_bin_size) variants[chromosome][bin][int(snp_position)] = variant except ZeroDivisionError: ""
def find_variations_cigar(cigartuples, chromosome, pos, qual, seq, subsegments): """ Loops through read to find variant base. This function is used with LAST bam files :param cigartuples: :param chromosome: :param pos: :param qual: :param seq: :param subsegments: """ global tmp_variants ref_cursor = (int(pos)) read_cursor = 0 for tuple in cigartuples: if tuple[0] == 4: read_cursor += tuple[1] if tuple[0] == 8: for mismatch in range(tuple[1]): ref_cursor += 1 if ref_cursor not in tmp_variants: tmp_variants[ref_cursor] = v.Variant(chromosome, ref_cursor) for segment in subsegments: if segment.pos <= ref_cursor <= segment.end: tmp_variants[ref_cursor].add_segment(segment.id, [seq[read_cursor], qual[read_cursor]]) break if segment.end < ref_cursor: del subsegments[subsegments.index(segment)] read_cursor += 1 elif tuple[0] == 2: for deletion in range(tuple[1]): ref_cursor += 1 if ref_cursor not in tmp_variants: tmp_variants[ref_cursor] = v.Variant(chromosome, ref_cursor) for segment in subsegments: if segment.pos <= ref_cursor <= segment.end: tmp_variants[ref_cursor].add_segment(segment.id, ['-']) break if segment.end < ref_cursor: del subsegments[subsegments.index(segment)] elif tuple[0] == 7: ref_cursor += tuple[1] read_cursor += tuple[1] elif tuple[0] == 1: read_cursor += tuple[1]