def write_split_tad(TAD1, TAD2, contact_map_file1, contact_map_file2, map1, map2, up1, up2, adjust_quality, output, start1, end, aliases1, aliases2): TAD_s, TAD1_only, TAD2_only = TAD_matching(TAD2, TAD1) merged_TADs, D2, split_TAD = corner_split_score(TAD1, TAD2, TAD_s) if adjust_quality == 0: split1_1, split2_1, split_TAD_location, merged_TAD_location = split_region( contact_map_file2, contact_map_file1, split_TAD, merged_TADs, up2, up1, 1) elif adjust_quality == 1: ratio1 = get_ratio(map1, TAD1, up1) ratio2 = get_ratio(map2, TAD2, up2) fold = np.mean(ratio1) / np.mean(ratio2) split1_1, split2_1, split_TAD_location, merged_TAD_location = split_region( contact_map_file2, contact_map_file1, split_TAD, merged_TADs, up2, up1, fold) else: print( "\nusage:\npython3 DACTAD.py TAD_calculator <contact_map_file_paths> [optional arguments]\n\n" "for more help, please try: python DACTAD.py TAD_calculator -h\n") try: if split1_1 == 0: print('No split TAD') except: if (not os.path.exists(output)): print('path does not exist\n') else: scc = similarity_score.similarity_scc(contact_map_file1, contact_map_file2, merged_TAD_location[:, 0:3], output) Laplacian = similarity_score.similarity_Laplacian( map1, map2, merged_TAD_location[:, 0:3]) hash = similarity_score.hash_similarity( map1, map2, merged_TAD_location[:, 0:3]) loc_u2 = np.c_[merged_TAD_location, scc[:(-1), 3], Laplacian[:, 3], hash[:, 3]] np.savetxt( os.path.join( output, aliases1 + '->' + aliases2 + '.' + str(start1) + '.' + end + '.split.txt'), split_TAD_location) np.savetxt( os.path.join( output, aliases1 + '->' + aliases2 + '.' + str(start1) + '.' + end + '.merge.txt'), loc_u2)
def TAD_similarity(command='TAD_similarity'): if (len(sys.argv) < 3) and ('-h' not in sys.argv) and ('--help' not in sys.argv): # at least two parameter need to be specified, will print help message if no parameter is specified print("\nusage:\npython3 TADsplimer.py TAD_similarity <contact_map_file_paths> [optional arguments]\n\n" "for more help, please try: python3 TADsplimer.py TAD_similarity -h\n") return 0 parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, usage="\n\npython3 TADsplimer.py TAD_similarity <contact_map_file_paths> " "[optional arguments]\n\n", description='') parser.add_argument('command', default=None, help="set as 'TAD_similarity' to calculate similarity of two TADs") parser.add_argument('-c', '--contact_maps', dest="contact_map", default=None, help="paths to Hi-C contact maps in two conditions. paths must be separated by the comma ','.") parser.add_argument('-t', '--TAD', dest="TAD", default=None, help="input files of TADs for two compared Hi-C contact maps. Paths must be separated by the" " comma ','.") parser.add_argument('-o', '--output', dest="output", default=None, help="path to output files") args = parser.parse_args() file1 = args.contact_map.split(',') file2 = args.TAD.split(',') map1 = np.loadtxt(file1[0]) map2 = np.loadtxt(file1[1]) TAD1 = np.loadtxt(file2[0]) TAD2 = np.loadtxt(file2[1]) head1, tail1 = os.path.split(file1[0]) base1 = os.path.splitext(tail1) head2, tail2 = os.path.split(file1[1]) base2 = os.path.splitext(tail2) TAD = np.concatenate((TAD1, TAD2), axis=0) scc = similarity_score.similarity_scc(map1, map2, TAD) Laplacian = similarity_score.similarity_Laplacian(map1, map2, TAD) hash = similarity_score.hash_similarity(map1, map2, TAD) np.savetxt(os.path.join(args.output, base1[0]+'_'+base2[0]+'_scc.txt'), scc, delimiter='\t') np.savetxt(os.path.join(args.output, base1[0]+'_'+base2[0]+'_laplacian.txt'), Laplacian, delimiter='\t') np.savetxt(os.path.join(args.output, base1[0]+'_'+base2[0]+'_hash.txt'), hash, delimiter='\t')
def split_TADs_alternate(command='split_TADs_alternate'): ''' corner split algorithm for identifying split TAD ''' if (len(sys.argv) < 3) and ('-h' not in sys.argv) and ('--help' not in sys.argv): # at least two parameter need to be specified, will print help message if no parameter is specified print( "\nusage:\npython3 TADsplimer.py TAD_calculator <contact_map_file_paths> [optional arguments]\n\n" "for more help, please try: python3 TADsplimer.py TAD_calculator -h\n" ) return 0 parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, usage="\n\npython3 TADsplimer.py corner_split <contact_map_file_paths> " "[optional arguments]\n\n", description='') parser.add_argument( 'command', default=None, help="set as 'split_TADs_alternate' to identify split TAD") parser.add_argument( '-c', '--contact_maps', dest="contact_map", default=None, help= "paths to Hi-C contact maps in two conditions. paths must be separated by the comma ','." ) parser.add_argument( '--contact_maps_aliases', dest="aliases", default=None, help= "A set of short aliases for the contact map. Paths must be separated by the comma ','." ) parser.add_argument( '-t', '--TAD', dest="TAD", default=None, help= "input files of TADs for two compared Hi-C contact maps. Paths must be separated by the" " comma ','.") parser.add_argument( '-u', '--up_cutoff', dest="up", default="0,0", help= "up cutoff for two compared Hi-C contact maps, paths must be separated by the comma ','." ) parser.add_argument( '-j', '--adjust_quality', dest="adjust_quality", default=0, type=int, help= "set as 1 to normalize sequence quality for two Hi-C contact maps, set as 0 not to " "normalize sequence quality for two Hi-C contact maps") parser.add_argument('-o', '--output', dest="output", default=None, help="path to output files") parser.add_argument( '-d', '--split_direction', dest="direction", default=0, type=int, help= "set as 0: output TADs split in both two contact maps, set as 1: output TADs split in " "contact map 1, set as 2: output TADs split in contact map 2") args = parser.parse_args() file = args.contact_map.split(',') TAD = args.TAD.split(',') up = args.up.split(',') aliases = args.aliases.split(',') TAD1 = np.loadtxt(TAD[0]) TAD2 = np.loadtxt(TAD[1]) TAD_s, TAD1_only, TAD2_only = TAD_split.TAD_matching(TAD2, TAD1) D1, D2, D3 = TAD_split.corner_split_score(TAD1, TAD2, TAD_s) map1 = np.loadtxt(file[0]) map2 = np.loadtxt(file[1]) up1, _, _ = TAD_calling.file_split(TAD[0], outfile="", print_subcontact=1) up2, _, _ = TAD_calling.file_split(TAD[1], outfile="", print_subcontact=1) if up[0] != 0 or up[1] != 0: up1 = float(up[0]) up2 = float(up[1]) if args.adjust_quality == 0: split1_1, split2_1, loc_d, loc_u = TAD_split.split_region( file[1], file[0], D3, D1, up2, up1, 1) elif args.adjust_quality == 1: map1 = np.loadtxt(file[0]) map2 = np.loadtxt(file[1]) ratio1 = TAD_split.get_ratio(map1, TAD1, float(up[0])) ratio2 = TAD_split.get_ratio(map2, TAD2, float(up[1])) fold = np.mean(ratio1) / np.mean(ratio2) split1_1, split2_1, loc_d, loc_u = TAD_split.split_region( file[1], file[0], D3, D1, up2, up1, fold) else: print( "\nusage:\npython3 TADsplimer.py TAD_calculator <contact_map_file_paths> [optional arguments]\n\n" "for more help, please try: python3 TADsplimer.py TAD_calculator -h\n" ) try: if split1_1 == 0: print('No split TAD') except: if (not os.path.exists(args.output)): os.makedirs(args.output) else: scc = similarity_score.similarity_scc(map1, map2, loc_u[:, 0:3]) Laplacian = similarity_score.similarity_Laplacian( map1, map2, loc_u[:, 0:3]) hash = similarity_score.hash_similarity(map1, map2, loc_u[:, 0:3]) loc_u2 = np.c_[loc_u, scc[:, 3], Laplacian[:, 3], hash[:, 3]] np.savetxt( os.path.join(args.output, aliases[0] + '->' + aliases[1] + '.split.txt'), loc_d) np.savetxt( os.path.join(args.output, aliases[0] + '->' + aliases[1] + '.merge.txt'), loc_u2)