def EBFilter_worker_vcf(targetMutationFile, targetBamPath, controlBamPathList, outputPath, mapping_qual_thres, base_qual_thres, is_loption, region, debug_mode): controlFileNum = sum(1 for line in open(controlBamPathList, 'r')) ########## # generate pileup files process_vcf.vcf2pileup(targetMutationFile, outputPath + '.target.pileup', targetBamPath, mapping_qual_thres, base_qual_thres, False, is_loption, region) process_vcf.vcf2pileup(targetMutationFile, outputPath + '.control.pileup', controlBamPathList, mapping_qual_thres, base_qual_thres, True, is_loption, region) ########## ########## # load pileup files pos2pileup_target = {} pos2pileup_control = {} hIN = open(outputPath + '.target.pileup') for line in hIN: F = line.rstrip('\n').split('\t') pos2pileup_target[F[0] + '\t' + F[1]] = '\t'.join(F[3:]) hIN.close() hIN = open(outputPath + '.control.pileup') for line in hIN: F = line.rstrip('\n').split('\t') pos2pileup_control[F[0] + '\t' + F[1]] = '\t'.join(F[3:]) hIN.close() ########## ########## # get restricted region if not None if is_loption == True and region != "": region_match = region_exp.match(region) reg_chr = region_match.group(1) reg_start = int(region_match.group(2)) reg_end = int(region_match.group(3)) ########## vcf_reader = vcf.Reader(open(targetMutationFile, 'r')) vcf_reader.infos['EB'] = vcf.parser._Info('EB', 1, 'Float', "EBCall Score", "EBCall", "ver0.1.2") vcf_writer = vcf.Writer(open(outputPath, 'w'), vcf_reader) for vcf_record in vcf_reader: current_pos = str(vcf_record.CHROM) + '\t' + str(vcf_record.POS) if is_loption == True and region != "": if reg_chr != vcf_record.CHROM: continue if int(vcf_record.POS) < reg_start or int( vcf_record.POS) > reg_end: continue F_target = pos2pileup_target[current_pos].split( '\t') if current_pos in pos2pileup_target else [] F_control = pos2pileup_control[current_pos].split( '\t') if current_pos in pos2pileup_control else [] current_ref = str(vcf_record.REF) current_alt = str(vcf_record.ALT[0]) var = "" if len(current_ref) == 1 and len(current_alt) == 1: var = current_alt else: if len(current_ref) == 1: var = "+" + current_alt[1:] elif len(current_alt) == 1: var = "-" + current_ref[1:] EB_score = "." # if the variant is complex, we ignore that if not var == "": EB_score = get_eb_score.get_eb_score(var, F_target, F_control, base_qual_thres, controlFileNum) # add the score and write the vcf record vcf_record.INFO['EB'] = EB_score vcf_writer.write_record(vcf_record) vcf_writer.close() # delete intermediate files if debug_mode == False: subprocess.call(["rm", outputPath + '.target.pileup']) subprocess.call(["rm", outputPath + '.control.pileup'])
def EBFilter_worker_anno( targetMutationFile, targetBamPath, controlBamPathList, outputPath, mapping_qual_thres, base_qual_thres, is_loption, region, debug_mode, ): controlFileNum = sum(1 for line in open(controlBamPathList, "r")) ########## # generate pileup files process_anno.anno2pileup( targetMutationFile, outputPath + ".target.pileup", targetBamPath, mapping_qual_thres, base_qual_thres, False, is_loption, region, ) process_anno.anno2pileup( targetMutationFile, outputPath + ".control.pileup", controlBamPathList, mapping_qual_thres, base_qual_thres, True, is_loption, region, ) ########## ########## # load pileup files pos2pileup_target = {} pos2pileup_control = {} hIN = open(outputPath + ".target.pileup") for line in hIN: F = line.rstrip("\n").split("\t") pos2pileup_target[F[0] + "\t" + F[1]] = "\t".join(F[3:]) hIN.close() hIN = open(outputPath + ".control.pileup") for line in hIN: F = line.rstrip("\n").split("\t") pos2pileup_control[F[0] + "\t" + F[1]] = "\t".join(F[3:]) hIN.close() ########## ########## # get restricted region if not None if is_loption == True and region != "": region_match = region_exp.match(region) reg_chr = region_match.group(1) reg_start = int(region_match.group(2)) reg_end = int(region_match.group(3)) ########## hIN = open(targetMutationFile, "r") hOUT = open(outputPath, "w") for line in hIN: F = line.rstrip("\n").split("\t") chr, pos, pos2, ref, alt = F[0], F[1], F[2], F[3], F[4] if alt == "-": pos = str(int(pos) - 1) if is_loption == True and region != "": if reg_chr != chr: continue if int(pos) < reg_start or int(pos) > reg_end: continue F_target = pos2pileup_target[chr + "\t" + pos].split("\t") if chr + "\t" + pos in pos2pileup_target else [] F_control = pos2pileup_control[chr + "\t" + pos].split("\t") if chr + "\t" + pos in pos2pileup_control else [] var = "" if ref != "-" and alt != "-": var = alt else: if ref == "-": var = "+" + alt elif alt == "-": var = "-" + ref EB_score = "." # if the variant is complex, we ignore that if not var == "": EB_score = get_eb_score.get_eb_score(var, F_target, F_control, base_qual_thres, controlFileNum) # add the score and write the vcf record print >> hOUT, "\t".join(F + [str(EB_score)]) hIN.close() hOUT.close() # delete intermediate files if debug_mode == False: subprocess.call(["rm", outputPath + ".target.pileup"]) subprocess.call(["rm", outputPath + ".control.pileup"])
def EBFilter_worker_anno(targetMutationFile, targetBamPath, controlBamPathList, outputPath, mapping_qual_thres, base_qual_thres, is_loption, region, debug_mode): controlFileNum = sum(1 for line in open(controlBamPathList, 'r')) ########## # generate pileup files process_anno.anno2pileup(targetMutationFile, outputPath + '.target.pileup', targetBamPath, mapping_qual_thres, base_qual_thres, False, is_loption, region) process_anno.anno2pileup(targetMutationFile, outputPath + '.control.pileup', controlBamPathList, mapping_qual_thres, base_qual_thres, True, is_loption, region) ########## ########## # load pileup files pos2pileup_target = {} pos2pileup_control = {} hIN = open(outputPath + '.target.pileup') for line in hIN: F = line.rstrip('\n').split('\t') pos2pileup_target[F[0] + '\t' + F[1]] = '\t'.join(F[3:]) hIN.close() hIN = open(outputPath + '.control.pileup') for line in hIN: F = line.rstrip('\n').split('\t') pos2pileup_control[F[0] + '\t' + F[1]] = '\t'.join(F[3:]) hIN.close() ########## ########## # get restricted region if not None if is_loption == True and region != "": region_match = region_exp.match(region) reg_chr = region_match.group(1) reg_start = int(region_match.group(2)) reg_end = int(region_match.group(3)) ########## hIN = open(targetMutationFile, 'r') hOUT = open(outputPath, 'w') for line in hIN: F = line.rstrip('\n').split('\t') chr, pos, pos2, ref, alt = F[0], F[1], F[2], F[3], F[4] if alt == "-": pos = str(int(pos) - 1) if is_loption == True and region != "": if reg_chr != chr: continue if int(pos) < reg_start or int(pos) > reg_end: continue F_target = pos2pileup_target[chr + '\t' + pos].split( '\t') if chr + '\t' + pos in pos2pileup_target else [] F_control = pos2pileup_control[chr + '\t' + pos].split( '\t') if chr + '\t' + pos in pos2pileup_control else [] var = "" if ref != "-" and alt != "-": var = alt else: if ref == "-": var = "+" + alt elif alt == "-": var = "-" + ref EB_score = "." # if the variant is complex, we ignore that if not var == "": EB_score = get_eb_score.get_eb_score(var, F_target, F_control, base_qual_thres, controlFileNum) # add the score and write the vcf record print >> hOUT, '\t'.join(F + [str(EB_score)]) hIN.close() hOUT.close() # delete intermediate files if debug_mode == False: subprocess.call(["rm", outputPath + '.target.pileup']) subprocess.call(["rm", outputPath + '.control.pileup'])
def EBFilter_worker_vcf( targetMutationFile, targetBamPath, controlBamPathList, outputPath, mapping_qual_thres, base_qual_thres, is_loption, region, debug_mode, ): controlFileNum = sum(1 for line in open(controlBamPathList, "r")) ########## # generate pileup files process_vcf.vcf2pileup( targetMutationFile, outputPath + ".target.pileup", targetBamPath, mapping_qual_thres, base_qual_thres, False, is_loption, region, ) process_vcf.vcf2pileup( targetMutationFile, outputPath + ".control.pileup", controlBamPathList, mapping_qual_thres, base_qual_thres, True, is_loption, region, ) ########## ########## # load pileup files pos2pileup_target = {} pos2pileup_control = {} hIN = open(outputPath + ".target.pileup") for line in hIN: F = line.rstrip("\n").split("\t") pos2pileup_target[F[0] + "\t" + F[1]] = "\t".join(F[3:]) hIN.close() hIN = open(outputPath + ".control.pileup") for line in hIN: F = line.rstrip("\n").split("\t") pos2pileup_control[F[0] + "\t" + F[1]] = "\t".join(F[3:]) hIN.close() ########## ########## # get restricted region if not None if is_loption == True and region != "": region_match = region_exp.match(region) reg_chr = region_match.group(1) reg_start = int(region_match.group(2)) reg_end = int(region_match.group(3)) ########## vcf_reader = vcf.Reader(open(targetMutationFile, "r")) vcf_reader.infos["EB"] = vcf.parser._Info("EB", 1, "Float", "EBCall Score", "EBCall", "ver0.1.2") vcf_writer = vcf.Writer(open(outputPath, "w"), vcf_reader) for vcf_record in vcf_reader: current_pos = str(vcf_record.CHROM) + "\t" + str(vcf_record.POS) if is_loption == True and region != "": if reg_chr != vcf_record.CHROM: continue if int(vcf_record.POS) < reg_start or int(vcf_record.POS) > reg_end: continue F_target = pos2pileup_target[current_pos].split("\t") if current_pos in pos2pileup_target else [] F_control = pos2pileup_control[current_pos].split("\t") if current_pos in pos2pileup_control else [] current_ref = str(vcf_record.REF) current_alt = str(vcf_record.ALT[0]) var = "" if len(current_ref) == 1 and len(current_alt) == 1: var = current_alt else: if len(current_ref) == 1: var = "+" + current_alt[1:] elif len(current_alt) == 1: var = "-" + current_ref[1:] EB_score = "." # if the variant is complex, we ignore that if not var == "": EB_score = get_eb_score.get_eb_score(var, F_target, F_control, base_qual_thres, controlFileNum) # add the score and write the vcf record vcf_record.INFO["EB"] = EB_score vcf_writer.write_record(vcf_record) vcf_writer.close() # delete intermediate files if debug_mode == False: subprocess.call(["rm", outputPath + ".target.pileup"]) subprocess.call(["rm", outputPath + ".control.pileup"])