Example #1
0
def EBFilter_worker_vcf(targetMutationFile, targetBamPath, controlBamPathList,
                        outputPath, mapping_qual_thres, base_qual_thres,
                        is_loption, region, debug_mode):

    controlFileNum = sum(1 for line in open(controlBamPathList, 'r'))

    ##########
    # generate pileup files
    process_vcf.vcf2pileup(targetMutationFile, outputPath + '.target.pileup',
                           targetBamPath, mapping_qual_thres, base_qual_thres,
                           False, is_loption, region)
    process_vcf.vcf2pileup(targetMutationFile, outputPath + '.control.pileup',
                           controlBamPathList, mapping_qual_thres,
                           base_qual_thres, True, is_loption, region)
    ##########

    ##########
    # load pileup files
    pos2pileup_target = {}
    pos2pileup_control = {}

    hIN = open(outputPath + '.target.pileup')
    for line in hIN:
        F = line.rstrip('\n').split('\t')
        pos2pileup_target[F[0] + '\t' + F[1]] = '\t'.join(F[3:])
    hIN.close()

    hIN = open(outputPath + '.control.pileup')
    for line in hIN:
        F = line.rstrip('\n').split('\t')
        pos2pileup_control[F[0] + '\t' + F[1]] = '\t'.join(F[3:])
    hIN.close()
    ##########

    ##########
    # get restricted region if not None
    if is_loption == True and region != "":
        region_match = region_exp.match(region)
        reg_chr = region_match.group(1)
        reg_start = int(region_match.group(2))
        reg_end = int(region_match.group(3))
    ##########

    vcf_reader = vcf.Reader(open(targetMutationFile, 'r'))
    vcf_reader.infos['EB'] = vcf.parser._Info('EB', 1, 'Float', "EBCall Score",
                                              "EBCall", "ver0.1.2")
    vcf_writer = vcf.Writer(open(outputPath, 'w'), vcf_reader)

    for vcf_record in vcf_reader:
        current_pos = str(vcf_record.CHROM) + '\t' + str(vcf_record.POS)

        if is_loption == True and region != "":
            if reg_chr != vcf_record.CHROM: continue
            if int(vcf_record.POS) < reg_start or int(
                    vcf_record.POS) > reg_end:
                continue

        F_target = pos2pileup_target[current_pos].split(
            '\t') if current_pos in pos2pileup_target else []
        F_control = pos2pileup_control[current_pos].split(
            '\t') if current_pos in pos2pileup_control else []

        current_ref = str(vcf_record.REF)
        current_alt = str(vcf_record.ALT[0])
        var = ""
        if len(current_ref) == 1 and len(current_alt) == 1:
            var = current_alt
        else:
            if len(current_ref) == 1:
                var = "+" + current_alt[1:]
            elif len(current_alt) == 1:
                var = "-" + current_ref[1:]

        EB_score = "."  # if the variant is complex, we ignore that
        if not var == "":
            EB_score = get_eb_score.get_eb_score(var, F_target, F_control,
                                                 base_qual_thres,
                                                 controlFileNum)

        # add the score and write the vcf record
        vcf_record.INFO['EB'] = EB_score
        vcf_writer.write_record(vcf_record)

    vcf_writer.close()

    # delete intermediate files
    if debug_mode == False:
        subprocess.call(["rm", outputPath + '.target.pileup'])
        subprocess.call(["rm", outputPath + '.control.pileup'])
Example #2
0
def EBFilter_worker_anno(
    targetMutationFile,
    targetBamPath,
    controlBamPathList,
    outputPath,
    mapping_qual_thres,
    base_qual_thres,
    is_loption,
    region,
    debug_mode,
):

    controlFileNum = sum(1 for line in open(controlBamPathList, "r"))

    ##########
    # generate pileup files
    process_anno.anno2pileup(
        targetMutationFile,
        outputPath + ".target.pileup",
        targetBamPath,
        mapping_qual_thres,
        base_qual_thres,
        False,
        is_loption,
        region,
    )
    process_anno.anno2pileup(
        targetMutationFile,
        outputPath + ".control.pileup",
        controlBamPathList,
        mapping_qual_thres,
        base_qual_thres,
        True,
        is_loption,
        region,
    )
    ##########

    ##########
    # load pileup files
    pos2pileup_target = {}
    pos2pileup_control = {}

    hIN = open(outputPath + ".target.pileup")
    for line in hIN:
        F = line.rstrip("\n").split("\t")
        pos2pileup_target[F[0] + "\t" + F[1]] = "\t".join(F[3:])
    hIN.close()

    hIN = open(outputPath + ".control.pileup")
    for line in hIN:
        F = line.rstrip("\n").split("\t")
        pos2pileup_control[F[0] + "\t" + F[1]] = "\t".join(F[3:])
    hIN.close()
    ##########

    ##########
    # get restricted region if not None
    if is_loption == True and region != "":
        region_match = region_exp.match(region)
        reg_chr = region_match.group(1)
        reg_start = int(region_match.group(2))
        reg_end = int(region_match.group(3))
    ##########

    hIN = open(targetMutationFile, "r")
    hOUT = open(outputPath, "w")

    for line in hIN:

        F = line.rstrip("\n").split("\t")
        chr, pos, pos2, ref, alt = F[0], F[1], F[2], F[3], F[4]
        if alt == "-":
            pos = str(int(pos) - 1)

        if is_loption == True and region != "":
            if reg_chr != chr:
                continue
            if int(pos) < reg_start or int(pos) > reg_end:
                continue

        F_target = pos2pileup_target[chr + "\t" + pos].split("\t") if chr + "\t" + pos in pos2pileup_target else []
        F_control = pos2pileup_control[chr + "\t" + pos].split("\t") if chr + "\t" + pos in pos2pileup_control else []

        var = ""
        if ref != "-" and alt != "-":
            var = alt
        else:
            if ref == "-":
                var = "+" + alt
            elif alt == "-":
                var = "-" + ref

        EB_score = "."  # if the variant is complex, we ignore that
        if not var == "":
            EB_score = get_eb_score.get_eb_score(var, F_target, F_control, base_qual_thres, controlFileNum)

        # add the score and write the vcf record
        print >> hOUT, "\t".join(F + [str(EB_score)])

    hIN.close()
    hOUT.close()

    # delete intermediate files
    if debug_mode == False:
        subprocess.call(["rm", outputPath + ".target.pileup"])
        subprocess.call(["rm", outputPath + ".control.pileup"])
Example #3
0
def EBFilter_worker_anno(targetMutationFile, targetBamPath, controlBamPathList,
                         outputPath, mapping_qual_thres, base_qual_thres,
                         is_loption, region, debug_mode):

    controlFileNum = sum(1 for line in open(controlBamPathList, 'r'))

    ##########
    # generate pileup files
    process_anno.anno2pileup(targetMutationFile, outputPath + '.target.pileup',
                             targetBamPath, mapping_qual_thres,
                             base_qual_thres, False, is_loption, region)
    process_anno.anno2pileup(targetMutationFile,
                             outputPath + '.control.pileup',
                             controlBamPathList, mapping_qual_thres,
                             base_qual_thres, True, is_loption, region)
    ##########

    ##########
    # load pileup files
    pos2pileup_target = {}
    pos2pileup_control = {}

    hIN = open(outputPath + '.target.pileup')
    for line in hIN:
        F = line.rstrip('\n').split('\t')
        pos2pileup_target[F[0] + '\t' + F[1]] = '\t'.join(F[3:])
    hIN.close()

    hIN = open(outputPath + '.control.pileup')
    for line in hIN:
        F = line.rstrip('\n').split('\t')
        pos2pileup_control[F[0] + '\t' + F[1]] = '\t'.join(F[3:])
    hIN.close()
    ##########

    ##########
    # get restricted region if not None
    if is_loption == True and region != "":
        region_match = region_exp.match(region)
        reg_chr = region_match.group(1)
        reg_start = int(region_match.group(2))
        reg_end = int(region_match.group(3))
    ##########

    hIN = open(targetMutationFile, 'r')
    hOUT = open(outputPath, 'w')

    for line in hIN:

        F = line.rstrip('\n').split('\t')
        chr, pos, pos2, ref, alt = F[0], F[1], F[2], F[3], F[4]
        if alt == "-": pos = str(int(pos) - 1)

        if is_loption == True and region != "":
            if reg_chr != chr: continue
            if int(pos) < reg_start or int(pos) > reg_end: continue

        F_target = pos2pileup_target[chr + '\t' + pos].split(
            '\t') if chr + '\t' + pos in pos2pileup_target else []
        F_control = pos2pileup_control[chr + '\t' + pos].split(
            '\t') if chr + '\t' + pos in pos2pileup_control else []

        var = ""
        if ref != "-" and alt != "-":
            var = alt
        else:
            if ref == "-":
                var = "+" + alt
            elif alt == "-":
                var = "-" + ref

        EB_score = "."  # if the variant is complex, we ignore that
        if not var == "":
            EB_score = get_eb_score.get_eb_score(var, F_target, F_control,
                                                 base_qual_thres,
                                                 controlFileNum)

        # add the score and write the vcf record
        print >> hOUT, '\t'.join(F + [str(EB_score)])

    hIN.close()
    hOUT.close()

    # delete intermediate files
    if debug_mode == False:
        subprocess.call(["rm", outputPath + '.target.pileup'])
        subprocess.call(["rm", outputPath + '.control.pileup'])
Example #4
0
def EBFilter_worker_vcf(
    targetMutationFile,
    targetBamPath,
    controlBamPathList,
    outputPath,
    mapping_qual_thres,
    base_qual_thres,
    is_loption,
    region,
    debug_mode,
):

    controlFileNum = sum(1 for line in open(controlBamPathList, "r"))

    ##########
    # generate pileup files
    process_vcf.vcf2pileup(
        targetMutationFile,
        outputPath + ".target.pileup",
        targetBamPath,
        mapping_qual_thres,
        base_qual_thres,
        False,
        is_loption,
        region,
    )
    process_vcf.vcf2pileup(
        targetMutationFile,
        outputPath + ".control.pileup",
        controlBamPathList,
        mapping_qual_thres,
        base_qual_thres,
        True,
        is_loption,
        region,
    )
    ##########

    ##########
    # load pileup files
    pos2pileup_target = {}
    pos2pileup_control = {}

    hIN = open(outputPath + ".target.pileup")
    for line in hIN:
        F = line.rstrip("\n").split("\t")
        pos2pileup_target[F[0] + "\t" + F[1]] = "\t".join(F[3:])
    hIN.close()

    hIN = open(outputPath + ".control.pileup")
    for line in hIN:
        F = line.rstrip("\n").split("\t")
        pos2pileup_control[F[0] + "\t" + F[1]] = "\t".join(F[3:])
    hIN.close()
    ##########

    ##########
    # get restricted region if not None
    if is_loption == True and region != "":
        region_match = region_exp.match(region)
        reg_chr = region_match.group(1)
        reg_start = int(region_match.group(2))
        reg_end = int(region_match.group(3))
    ##########

    vcf_reader = vcf.Reader(open(targetMutationFile, "r"))
    vcf_reader.infos["EB"] = vcf.parser._Info("EB", 1, "Float", "EBCall Score", "EBCall", "ver0.1.2")
    vcf_writer = vcf.Writer(open(outputPath, "w"), vcf_reader)

    for vcf_record in vcf_reader:
        current_pos = str(vcf_record.CHROM) + "\t" + str(vcf_record.POS)

        if is_loption == True and region != "":
            if reg_chr != vcf_record.CHROM:
                continue
            if int(vcf_record.POS) < reg_start or int(vcf_record.POS) > reg_end:
                continue

        F_target = pos2pileup_target[current_pos].split("\t") if current_pos in pos2pileup_target else []
        F_control = pos2pileup_control[current_pos].split("\t") if current_pos in pos2pileup_control else []

        current_ref = str(vcf_record.REF)
        current_alt = str(vcf_record.ALT[0])
        var = ""
        if len(current_ref) == 1 and len(current_alt) == 1:
            var = current_alt
        else:
            if len(current_ref) == 1:
                var = "+" + current_alt[1:]
            elif len(current_alt) == 1:
                var = "-" + current_ref[1:]

        EB_score = "."  # if the variant is complex, we ignore that
        if not var == "":
            EB_score = get_eb_score.get_eb_score(var, F_target, F_control, base_qual_thres, controlFileNum)

        # add the score and write the vcf record
        vcf_record.INFO["EB"] = EB_score
        vcf_writer.write_record(vcf_record)

    vcf_writer.close()

    # delete intermediate files
    if debug_mode == False:
        subprocess.call(["rm", outputPath + ".target.pileup"])
        subprocess.call(["rm", outputPath + ".control.pileup"])