Beispiel #1
0
def rmdup_main(args):

    if os.path.getsize(args.fusion) == 0:
        hOUT = open(args.output, 'w')
        hOUT.close()
        return

    process.convert_to_bedpe(args.fusion, args.output + ".fusion.bedpe", 10,
                             10, args.type)

    hOUT = open(args.output + ".fusion_comp.bedpe", 'w')
    subprocess.check_call([
        "bedtools", "pairtopair", "-a", args.output + ".fusion.bedpe", "-b",
        args.output + ".fusion.bedpe"
    ],
                          stdout=hOUT)
    hOUT.close()

    # create dictionary
    fusion_comp = {}
    hIN = open(args.output + ".fusion_comp.bedpe", 'r')
    for line in hIN:
        F = line.rstrip('\n').split('\t')
        if F[6] != F[16]:
            if int(F[7]) < int(F[17]) or (int(F[7]) == int(F[17])
                                          and F[6] not in fusion_comp):
                fusion_comp[F[6]] = F[16]

    hIN.close()

    hIN = open(args.fusion, 'r')
    hOUT = open(args.output, 'w')
    for line in hIN:
        F = line.rstrip('\n').split('\t')

        # header is removed
        if F[0] == "#fusion_name" and args.type == "star_fusion": continue

        chr1, pos1, dir1, chr2, pos2, dir2 = process.get_position(F, args.type)
        ID = chr1 + ':' + dir1 + pos1 + '-' + chr2 + ':' + dir2 + pos2

        if ID not in fusion_comp:
            print >> hOUT, '\t'.join(F)

    hIN.close()
    hOUT.close()

    # remove intermediate files
    subprocess.check_call(["rm", "-rf", args.output + ".fusion.bedpe"])
    subprocess.check_call(["rm", "-rf", args.output + ".fusion_comp.bedpe"])
Beispiel #2
0
def rmdup_main(args):

    if os.path.getsize(args.fusion) == 0:
        hOUT = open(args.output, 'w')
        hOUT.close()
        return

    process.convert_to_bedpe(args.fusion, args.output + ".fusion.bedpe", 10, 10, args.type)

    hOUT = open(args.output + ".fusion_comp.bedpe", 'w')
    subprocess.call([args.bedtools_path + "/bedtools", "pairtopair", "-a", args.output + ".fusion.bedpe", "-b", args.output + ".fusion.bedpe"], stdout = hOUT)
    hOUT.close()

    # create dictionary
    fusion_comp = {}
    hIN = open(args.output + ".fusion_comp.bedpe", 'r')
    for line in hIN:
        F = line.rstrip('\n').split('\t')
        if F[6] != F[16]:
            if int(F[7]) < int(F[17]) or (int(F[7]) == int(F[17]) and F[6] not in fusion_comp):
                fusion_comp[F[6]] = F[16]

    hIN.close()

    hIN = open(args.fusion, 'r')
    hOUT = open(args.output, 'w')
    for line in hIN:
        F = line.rstrip('\n').split('\t')

        # header is removed
        if F[0] == "#fusion_name" and args.type == "star_fusion": continue

        chr1, pos1, dir1, chr2, pos2, dir2 = process.get_position(F, args.type)
        ID = chr1 + ':' + dir1 + pos1 + '-' + chr2 + ':' + dir2 + pos2

        if ID not in fusion_comp:
            print >> hOUT, '\t'.join(F)


    hIN.close()
    hOUT.close()

    # remove intermediate files
    subprocess.call(["rm", "-rf", args.output + ".fusion.bedpe"])
    subprocess.call(["rm", "-rf", args.output + ".fusion_comp.bedpe"])
Beispiel #3
0
def comp_main(args):

    if os.path.getsize(args.fusion1) == 0:
        hOUT = open(args.output, 'w')
        hOUT.close()
        return

    if args.type1 in [
            "fusionfusion", "fusionfusion_part", "star_fusion",
            "genomon_fusion", "mapsplice2", "tophat_fusion"
    ] and args.type2 == "genomonSV":
        process.convert_to_bedpe(args.fusion1, args.output + ".fusion1.bedpe",
                                 args.sv_margin_major, args.sv_margin_minor,
                                 args.type1)
        process.convert_to_bedpe(args.fusion2, args.output + ".fusion2.bedpe",
                                 args.margin, args.margin, "genomonSV")
    elif args.type2 in [
            "fusionfusion", "fusionfusion_part", "star_fusion",
            "genomon_fusion", "mapsplice2", "tophat_fusion"
    ] and args.type1 == "genomonSV":
        process.convert_to_bedpe(args.fusion1, args.output + ".fusion1.bedpe",
                                 args.margin, args.margin, "genomonSV")
        process.convert_to_bedpe(args.fusion2, args.output + ".fusion2.bedpe",
                                 args.sv_margin_major, args.sv_margin_minor,
                                 args.type2)
    else:
        process.convert_to_bedpe(args.fusion1, args.output + ".fusion1.bedpe",
                                 args.margin, args.margin, args.type1)
        process.convert_to_bedpe(args.fusion2, args.output + ".fusion2.bedpe",
                                 args.margin, args.margin, args.type2)

    hOUT = open(args.output + ".fusion_comp.bedpe", 'w')
    subprocess.check_call([
        "bedtools", "pairtopair", "-a", args.output + ".fusion1.bedpe", "-b",
        args.output + ".fusion2.bedpe"
    ],
                          stdout=hOUT)
    hOUT.close()

    # create dictionary
    fusion_comp = {}
    hIN = open(args.output + ".fusion_comp.bedpe", 'r')
    for line in hIN:
        F = line.rstrip('\n').split('\t')
        fusion_comp[F[6]] = F[16]

    hIN.close()

    # add SV annotation to fusion
    hIN = open(args.fusion1, 'r')
    hOUT = open(args.output, 'w')
    for line in hIN:
        F = line.rstrip('\n').split('\t')
        chr1, pos1, dir1, chr2, pos2, dir2 = process.get_position(
            F, args.type1)
        ID = chr1 + ':' + dir1 + pos1 + '-' + chr2 + ':' + dir2 + pos2

        SV_info = fusion_comp[ID] if ID in fusion_comp else "---"
        print >> hOUT, '\t'.join(F) + '\t' + SV_info

    hIN.close()
    hOUT.close()

    # remove intermediate files
    subprocess.check_call(["rm", "-rf", args.output + ".fusion1.bedpe"])
    subprocess.check_call(["rm", "-rf", args.output + ".fusion2.bedpe"])
    subprocess.check_call(["rm", "-rf", args.output + ".fusion_comp.bedpe"])
Beispiel #4
0
def comp_main(args):

    if os.path.getsize(args.fusion1) == 0:
        hOUT = open(args.output, 'w')
        hOUT.close()
        return
    
    if args.type1 in ["fusionfusion", "fusionfusion_part", "star_fusion", "genomon_fusion", "mapsplice2", "tophat_fusion"] and args.type2 == "genomonSV":
        process.convert_to_bedpe(args.fusion1, args.output + ".fusion1.bedpe", 500000, 10, args.type1)
        process.convert_to_bedpe(args.fusion2, args.output + ".fusion2.bedpe", 10, 10, "genomonSV")
    elif args.type2 in ["fusionfusion", "fusionfusion_part", "star_fusion", "genomon_fusion", "mapsplice2", "tophat_fusion"] and args.type1 == "genomonSV":
        process.convert_to_bedpe(args.fusion1, args.output + ".fusion1.bedpe", 10, 10, "genomonSV")
        process.convert_to_bedpe(args.fusion2, args.output + ".fusion2.bedpe", 500000, 10, args.type2)
    else:
        process.convert_to_bedpe(args.fusion1, args.output + ".fusion1.bedpe", 10, 10, args.type1)
        process.convert_to_bedpe(args.fusion2, args.output + ".fusion2.bedpe", 10, 10, args.type2)


    hOUT = open(args.output + ".fusion_comp.bedpe", 'w')
    subprocess.call([args.bedtools_path + "/bedtools", "pairtopair", "-a", args.output + ".fusion1.bedpe", "-b", args.output + ".fusion2.bedpe"], stdout = hOUT)
    hOUT.close()

    # create dictionary
    fusion_comp = {}
    hIN = open(args.output + ".fusion_comp.bedpe", 'r')
    for line in hIN:
        F = line.rstrip('\n').split('\t')
        fusion_comp[F[6]] = F[16]
    
    hIN.close()

    # add SV annotation to fusion
    hIN = open(args.fusion1, 'r')
    hOUT = open(args.output, 'w')
    for line in hIN:
        F = line.rstrip('\n').split('\t')
        chr1, pos1, dir1, chr2, pos2, dir2 = process.get_position(F, args.type1)
        ID = chr1 + ':' + dir1 + pos1 + '-' + chr2 + ':' + dir2 + pos2

        SV_info = fusion_comp[ID] if ID in fusion_comp else "---"
        print >> hOUT, '\t'.join(F) + '\t' + SV_info

    hIN.close()
    hOUT.close()

    # remove intermediate files
    subprocess.call(["rm", "-rf", args.output + ".fusion1.bedpe"])
    subprocess.call(["rm", "-rf", args.output + ".fusion2.bedpe"])
    subprocess.call(["rm", "-rf", args.output + ".fusion_comp.bedpe"])