Exemple #1
0
def write_split_tad(TAD1, TAD2, contact_map_file1, contact_map_file2, map1,
                    map2, up1, up2, adjust_quality, output, start1, end,
                    aliases1, aliases2):

    TAD_s, TAD1_only, TAD2_only = TAD_matching(TAD2, TAD1)
    merged_TADs, D2, split_TAD = corner_split_score(TAD1, TAD2, TAD_s)

    if adjust_quality == 0:
        split1_1, split2_1, split_TAD_location, merged_TAD_location = split_region(
            contact_map_file2, contact_map_file1, split_TAD, merged_TADs, up2,
            up1, 1)
    elif adjust_quality == 1:
        ratio1 = get_ratio(map1, TAD1, up1)
        ratio2 = get_ratio(map2, TAD2, up2)
        fold = np.mean(ratio1) / np.mean(ratio2)
        split1_1, split2_1, split_TAD_location, merged_TAD_location = split_region(
            contact_map_file2, contact_map_file1, split_TAD, merged_TADs, up2,
            up1, fold)
    else:
        print(
            "\nusage:\npython3 DACTAD.py TAD_calculator <contact_map_file_paths> [optional arguments]\n\n"
            "for more help, please try: python DACTAD.py TAD_calculator -h\n")
    try:
        if split1_1 == 0:
            print('No split TAD')
    except:
        if (not os.path.exists(output)):
            print('path does not exist\n')
        else:
            scc = similarity_score.similarity_scc(contact_map_file1,
                                                  contact_map_file2,
                                                  merged_TAD_location[:, 0:3],
                                                  output)
            Laplacian = similarity_score.similarity_Laplacian(
                map1, map2, merged_TAD_location[:, 0:3])
            hash = similarity_score.hash_similarity(
                map1, map2, merged_TAD_location[:, 0:3])
            loc_u2 = np.c_[merged_TAD_location, scc[:(-1), 3], Laplacian[:, 3],
                           hash[:, 3]]
            np.savetxt(
                os.path.join(
                    output, aliases1 + '->' + aliases2 + '.' + str(start1) +
                    '.' + end + '.split.txt'), split_TAD_location)
            np.savetxt(
                os.path.join(
                    output, aliases1 + '->' + aliases2 + '.' + str(start1) +
                    '.' + end + '.merge.txt'), loc_u2)
Exemple #2
0
def TAD_similarity(command='TAD_similarity'):
    if (len(sys.argv) < 3) and ('-h' not in sys.argv) and ('--help' not in sys.argv):
        # at least two parameter need to be specified, will print help message if no parameter is specified
        print("\nusage:\npython3 TADsplimer.py TAD_similarity <contact_map_file_paths> [optional arguments]\n\n"
              "for more help, please try: python3 TADsplimer.py TAD_similarity -h\n")
        return 0

    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
                                     usage="\n\npython3 TADsplimer.py TAD_similarity <contact_map_file_paths> "
                                           "[optional arguments]\n\n", description='')
    parser.add_argument('command', default=None,
                        help="set as 'TAD_similarity' to calculate similarity of two TADs")

    parser.add_argument('-c', '--contact_maps', dest="contact_map", default=None,
                        help="paths to Hi-C contact maps in two conditions. paths must be separated by the comma ','.")

    parser.add_argument('-t', '--TAD', dest="TAD", default=None,
                        help="input files of TADs for two compared Hi-C contact maps. Paths must be separated by the"
                             " comma ','.")

    parser.add_argument('-o', '--output', dest="output", default=None,
                        help="path to output files")

    args = parser.parse_args()


    file1 = args.contact_map.split(',')
    file2 = args.TAD.split(',')
    map1 = np.loadtxt(file1[0])
    map2 = np.loadtxt(file1[1])
    TAD1 = np.loadtxt(file2[0])
    TAD2 = np.loadtxt(file2[1])
    head1, tail1 = os.path.split(file1[0])
    base1 = os.path.splitext(tail1)
    head2, tail2 = os.path.split(file1[1])
    base2 = os.path.splitext(tail2)

    TAD = np.concatenate((TAD1, TAD2), axis=0)
    scc = similarity_score.similarity_scc(map1, map2, TAD)
    Laplacian = similarity_score.similarity_Laplacian(map1, map2, TAD)
    hash = similarity_score.hash_similarity(map1, map2, TAD)
    np.savetxt(os.path.join(args.output, base1[0]+'_'+base2[0]+'_scc.txt'), scc, delimiter='\t')
    np.savetxt(os.path.join(args.output, base1[0]+'_'+base2[0]+'_laplacian.txt'), Laplacian, delimiter='\t')
    np.savetxt(os.path.join(args.output, base1[0]+'_'+base2[0]+'_hash.txt'), hash, delimiter='\t')
Exemple #3
0
def split_TADs_alternate(command='split_TADs_alternate'):
    '''
    corner split algorithm for identifying split TAD
    '''

    if (len(sys.argv) < 3) and ('-h' not in sys.argv) and ('--help'
                                                           not in sys.argv):
        # at least two parameter need to be specified, will print help message if no parameter is specified
        print(
            "\nusage:\npython3 TADsplimer.py TAD_calculator <contact_map_file_paths> [optional arguments]\n\n"
            "for more help, please try: python3 TADsplimer.py TAD_calculator -h\n"
        )
        return 0

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        usage="\n\npython3 TADsplimer.py corner_split <contact_map_file_paths> "
        "[optional arguments]\n\n",
        description='')
    parser.add_argument(
        'command',
        default=None,
        help="set as 'split_TADs_alternate' to identify split TAD")

    parser.add_argument(
        '-c',
        '--contact_maps',
        dest="contact_map",
        default=None,
        help=
        "paths to Hi-C contact maps in two conditions. paths must be separated by the comma ','."
    )

    parser.add_argument(
        '--contact_maps_aliases',
        dest="aliases",
        default=None,
        help=
        "A set of short aliases for the contact map. Paths must be separated by the comma ','."
    )

    parser.add_argument(
        '-t',
        '--TAD',
        dest="TAD",
        default=None,
        help=
        "input files of TADs for two compared Hi-C contact maps. Paths must be separated by the"
        " comma ','.")

    parser.add_argument(
        '-u',
        '--up_cutoff',
        dest="up",
        default="0,0",
        help=
        "up cutoff for two compared Hi-C contact maps, paths must be separated by the comma ','."
    )

    parser.add_argument(
        '-j',
        '--adjust_quality',
        dest="adjust_quality",
        default=0,
        type=int,
        help=
        "set as 1 to normalize sequence quality for two Hi-C contact maps, set as 0 not to "
        "normalize sequence quality for two Hi-C contact maps")

    parser.add_argument('-o',
                        '--output',
                        dest="output",
                        default=None,
                        help="path to output files")

    parser.add_argument(
        '-d',
        '--split_direction',
        dest="direction",
        default=0,
        type=int,
        help=
        "set as 0: output TADs split in both two contact maps, set as 1: output TADs split in "
        "contact map 1, set as 2: output TADs split in contact map 2")

    args = parser.parse_args()

    file = args.contact_map.split(',')
    TAD = args.TAD.split(',')
    up = args.up.split(',')
    aliases = args.aliases.split(',')

    TAD1 = np.loadtxt(TAD[0])
    TAD2 = np.loadtxt(TAD[1])

    TAD_s, TAD1_only, TAD2_only = TAD_split.TAD_matching(TAD2, TAD1)

    D1, D2, D3 = TAD_split.corner_split_score(TAD1, TAD2, TAD_s)

    map1 = np.loadtxt(file[0])
    map2 = np.loadtxt(file[1])

    up1, _, _ = TAD_calling.file_split(TAD[0], outfile="", print_subcontact=1)
    up2, _, _ = TAD_calling.file_split(TAD[1], outfile="", print_subcontact=1)

    if up[0] != 0 or up[1] != 0:
        up1 = float(up[0])
        up2 = float(up[1])

    if args.adjust_quality == 0:
        split1_1, split2_1, loc_d, loc_u = TAD_split.split_region(
            file[1], file[0], D3, D1, up2, up1, 1)
    elif args.adjust_quality == 1:
        map1 = np.loadtxt(file[0])
        map2 = np.loadtxt(file[1])
        ratio1 = TAD_split.get_ratio(map1, TAD1, float(up[0]))
        ratio2 = TAD_split.get_ratio(map2, TAD2, float(up[1]))
        fold = np.mean(ratio1) / np.mean(ratio2)
        split1_1, split2_1, loc_d, loc_u = TAD_split.split_region(
            file[1], file[0], D3, D1, up2, up1, fold)
    else:
        print(
            "\nusage:\npython3 TADsplimer.py TAD_calculator <contact_map_file_paths> [optional arguments]\n\n"
            "for more help, please try: python3 TADsplimer.py TAD_calculator -h\n"
        )
    try:
        if split1_1 == 0:
            print('No split TAD')
    except:
        if (not os.path.exists(args.output)):
            os.makedirs(args.output)
        else:
            scc = similarity_score.similarity_scc(map1, map2, loc_u[:, 0:3])
            Laplacian = similarity_score.similarity_Laplacian(
                map1, map2, loc_u[:, 0:3])
            hash = similarity_score.hash_similarity(map1, map2, loc_u[:, 0:3])
            loc_u2 = np.c_[loc_u, scc[:, 3], Laplacian[:, 3], hash[:, 3]]
            np.savetxt(
                os.path.join(args.output,
                             aliases[0] + '->' + aliases[1] + '.split.txt'),
                loc_d)
            np.savetxt(
                os.path.join(args.output,
                             aliases[0] + '->' + aliases[1] + '.merge.txt'),
                loc_u2)