def rmdup_main(args): if os.path.getsize(args.fusion) == 0: hOUT = open(args.output, 'w') hOUT.close() return process.convert_to_bedpe(args.fusion, args.output + ".fusion.bedpe", 10, 10, args.type) hOUT = open(args.output + ".fusion_comp.bedpe", 'w') subprocess.check_call([ "bedtools", "pairtopair", "-a", args.output + ".fusion.bedpe", "-b", args.output + ".fusion.bedpe" ], stdout=hOUT) hOUT.close() # create dictionary fusion_comp = {} hIN = open(args.output + ".fusion_comp.bedpe", 'r') for line in hIN: F = line.rstrip('\n').split('\t') if F[6] != F[16]: if int(F[7]) < int(F[17]) or (int(F[7]) == int(F[17]) and F[6] not in fusion_comp): fusion_comp[F[6]] = F[16] hIN.close() hIN = open(args.fusion, 'r') hOUT = open(args.output, 'w') for line in hIN: F = line.rstrip('\n').split('\t') # header is removed if F[0] == "#fusion_name" and args.type == "star_fusion": continue chr1, pos1, dir1, chr2, pos2, dir2 = process.get_position(F, args.type) ID = chr1 + ':' + dir1 + pos1 + '-' + chr2 + ':' + dir2 + pos2 if ID not in fusion_comp: print >> hOUT, '\t'.join(F) hIN.close() hOUT.close() # remove intermediate files subprocess.check_call(["rm", "-rf", args.output + ".fusion.bedpe"]) subprocess.check_call(["rm", "-rf", args.output + ".fusion_comp.bedpe"])
def rmdup_main(args): if os.path.getsize(args.fusion) == 0: hOUT = open(args.output, 'w') hOUT.close() return process.convert_to_bedpe(args.fusion, args.output + ".fusion.bedpe", 10, 10, args.type) hOUT = open(args.output + ".fusion_comp.bedpe", 'w') subprocess.call([args.bedtools_path + "/bedtools", "pairtopair", "-a", args.output + ".fusion.bedpe", "-b", args.output + ".fusion.bedpe"], stdout = hOUT) hOUT.close() # create dictionary fusion_comp = {} hIN = open(args.output + ".fusion_comp.bedpe", 'r') for line in hIN: F = line.rstrip('\n').split('\t') if F[6] != F[16]: if int(F[7]) < int(F[17]) or (int(F[7]) == int(F[17]) and F[6] not in fusion_comp): fusion_comp[F[6]] = F[16] hIN.close() hIN = open(args.fusion, 'r') hOUT = open(args.output, 'w') for line in hIN: F = line.rstrip('\n').split('\t') # header is removed if F[0] == "#fusion_name" and args.type == "star_fusion": continue chr1, pos1, dir1, chr2, pos2, dir2 = process.get_position(F, args.type) ID = chr1 + ':' + dir1 + pos1 + '-' + chr2 + ':' + dir2 + pos2 if ID not in fusion_comp: print >> hOUT, '\t'.join(F) hIN.close() hOUT.close() # remove intermediate files subprocess.call(["rm", "-rf", args.output + ".fusion.bedpe"]) subprocess.call(["rm", "-rf", args.output + ".fusion_comp.bedpe"])
def comp_main(args): if os.path.getsize(args.fusion1) == 0: hOUT = open(args.output, 'w') hOUT.close() return if args.type1 in [ "fusionfusion", "fusionfusion_part", "star_fusion", "genomon_fusion", "mapsplice2", "tophat_fusion" ] and args.type2 == "genomonSV": process.convert_to_bedpe(args.fusion1, args.output + ".fusion1.bedpe", args.sv_margin_major, args.sv_margin_minor, args.type1) process.convert_to_bedpe(args.fusion2, args.output + ".fusion2.bedpe", args.margin, args.margin, "genomonSV") elif args.type2 in [ "fusionfusion", "fusionfusion_part", "star_fusion", "genomon_fusion", "mapsplice2", "tophat_fusion" ] and args.type1 == "genomonSV": process.convert_to_bedpe(args.fusion1, args.output + ".fusion1.bedpe", args.margin, args.margin, "genomonSV") process.convert_to_bedpe(args.fusion2, args.output + ".fusion2.bedpe", args.sv_margin_major, args.sv_margin_minor, args.type2) else: process.convert_to_bedpe(args.fusion1, args.output + ".fusion1.bedpe", args.margin, args.margin, args.type1) process.convert_to_bedpe(args.fusion2, args.output + ".fusion2.bedpe", args.margin, args.margin, args.type2) hOUT = open(args.output + ".fusion_comp.bedpe", 'w') subprocess.check_call([ "bedtools", "pairtopair", "-a", args.output + ".fusion1.bedpe", "-b", args.output + ".fusion2.bedpe" ], stdout=hOUT) hOUT.close() # create dictionary fusion_comp = {} hIN = open(args.output + ".fusion_comp.bedpe", 'r') for line in hIN: F = line.rstrip('\n').split('\t') fusion_comp[F[6]] = F[16] hIN.close() # add SV annotation to fusion hIN = open(args.fusion1, 'r') hOUT = open(args.output, 'w') for line in hIN: F = line.rstrip('\n').split('\t') chr1, pos1, dir1, chr2, pos2, dir2 = process.get_position( F, args.type1) ID = chr1 + ':' + dir1 + pos1 + '-' + chr2 + ':' + dir2 + pos2 SV_info = fusion_comp[ID] if ID in fusion_comp else "---" print >> hOUT, '\t'.join(F) + '\t' + SV_info hIN.close() hOUT.close() # remove intermediate files subprocess.check_call(["rm", "-rf", args.output + ".fusion1.bedpe"]) subprocess.check_call(["rm", "-rf", args.output + ".fusion2.bedpe"]) subprocess.check_call(["rm", "-rf", args.output + ".fusion_comp.bedpe"])
def comp_main(args): if os.path.getsize(args.fusion1) == 0: hOUT = open(args.output, 'w') hOUT.close() return if args.type1 in ["fusionfusion", "fusionfusion_part", "star_fusion", "genomon_fusion", "mapsplice2", "tophat_fusion"] and args.type2 == "genomonSV": process.convert_to_bedpe(args.fusion1, args.output + ".fusion1.bedpe", 500000, 10, args.type1) process.convert_to_bedpe(args.fusion2, args.output + ".fusion2.bedpe", 10, 10, "genomonSV") elif args.type2 in ["fusionfusion", "fusionfusion_part", "star_fusion", "genomon_fusion", "mapsplice2", "tophat_fusion"] and args.type1 == "genomonSV": process.convert_to_bedpe(args.fusion1, args.output + ".fusion1.bedpe", 10, 10, "genomonSV") process.convert_to_bedpe(args.fusion2, args.output + ".fusion2.bedpe", 500000, 10, args.type2) else: process.convert_to_bedpe(args.fusion1, args.output + ".fusion1.bedpe", 10, 10, args.type1) process.convert_to_bedpe(args.fusion2, args.output + ".fusion2.bedpe", 10, 10, args.type2) hOUT = open(args.output + ".fusion_comp.bedpe", 'w') subprocess.call([args.bedtools_path + "/bedtools", "pairtopair", "-a", args.output + ".fusion1.bedpe", "-b", args.output + ".fusion2.bedpe"], stdout = hOUT) hOUT.close() # create dictionary fusion_comp = {} hIN = open(args.output + ".fusion_comp.bedpe", 'r') for line in hIN: F = line.rstrip('\n').split('\t') fusion_comp[F[6]] = F[16] hIN.close() # add SV annotation to fusion hIN = open(args.fusion1, 'r') hOUT = open(args.output, 'w') for line in hIN: F = line.rstrip('\n').split('\t') chr1, pos1, dir1, chr2, pos2, dir2 = process.get_position(F, args.type1) ID = chr1 + ':' + dir1 + pos1 + '-' + chr2 + ':' + dir2 + pos2 SV_info = fusion_comp[ID] if ID in fusion_comp else "---" print >> hOUT, '\t'.join(F) + '\t' + SV_info hIN.close() hOUT.close() # remove intermediate files subprocess.call(["rm", "-rf", args.output + ".fusion1.bedpe"]) subprocess.call(["rm", "-rf", args.output + ".fusion2.bedpe"]) subprocess.call(["rm", "-rf", args.output + ".fusion_comp.bedpe"])