Exemple #1
0
    def test_04_chromosome_batch(self):
        if ONLY and ONLY != "04":
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(
            name="Test Chromosome",
            experiment_resolutions=[20000] * 3,
            experiment_hic_data=[
                PATH + "/20Kb/chrT/chrT_A.tsv",
                PATH + "/20Kb/chrT/chrT_D.tsv",
                PATH + "/20Kb/chrT/chrT_C.tsv",
            ],
            experiment_names=["exp1", "exp2", "exp3"],
            silent=True,
        )
        test_chr.find_tad(["exp1", "exp2", "exp3"], batch_mode=True, verbose=False, silent=True)
        tads = test_chr.get_experiment("batch_exp1_exp2_exp3").tads
        found = [tads[t]["end"] for t in tads if tads[t]["score"] > 0]
        # Values obtained with square root normalization.
        # self.assertEqual([3.0, 8.0, 16.0, 21.0, 28.0, 35.0, 43.0,
        #                  49.0, 61.0, 66.0, 75.0, 89.0, 94.0, 99.0], found)
        self.assertEqual([3.0, 14.0, 19.0, 33.0, 43.0, 49.0, 61.0, 66.0, 71.0, 89.0, 94.0, 99.0], found)

        if CHKTIME:
            print "4", time() - t0
Exemple #2
0
    def test_04_chromosome_batch(self):
        if ONLY and ONLY != '04':
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome',
                              experiment_resolutions=[20000]*3,
                              experiment_hic_data=[
                                  PATH + '/20Kb/chrT/chrT_A.tsv',
                                  PATH + '/20Kb/chrT/chrT_D.tsv',
                                  PATH + '/20Kb/chrT/chrT_C.tsv'],
                              experiment_names=['exp1', 'exp2', 'exp3'],
                              silent=True)
        test_chr.find_tad(['exp1', 'exp2', 'exp3'], batch_mode=True,
                          verbose=False, silent=True)
        tads = test_chr.get_experiment('batch_exp1_exp2_exp3').tads
        found = [tads[t]['end'] for t in tads if tads[t]['score'] > 0]
        # Values obtained with square root normalization.
        #self.assertEqual([3.0, 8.0, 16.0, 21.0, 28.0, 35.0, 43.0,
        #                  49.0, 61.0, 66.0, 75.0, 89.0, 94.0, 99.0], found)
        self.assertEqual([3.0, 14.0, 19.0, 33.0, 43.0, 49.0, 61.0, 66.0,
                           71.0, 89.0, 94.0, 99.0], found)
        
        if CHKTIME:
            print '4', time() - t0
Exemple #3
0
    def test_04_chromosome_batch(self):
        if ONLY and ONLY != '04':
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome',
                              experiment_resolutions=[20000]*3,
                              experiment_hic_data=[
                                  PATH + '/20Kb/chrT/chrT_A.tsv',
                                  PATH + '/20Kb/chrT/chrT_D.tsv',
                                  PATH + '/20Kb/chrT/chrT_C.tsv'],
                              experiment_names=['exp1', 'exp2', 'exp3'],
                              silent=True)
        test_chr.find_tad(['exp1', 'exp2', 'exp3'], batch_mode=True,
                          verbose=False, silent=True)
        tads = test_chr.get_experiment('batch_exp1_exp2_exp3').tads
        found = [tads[t]['end'] for t in tads if tads[t]['score'] > 0]
        # Values obtained with square root normalization.
        #self.assertEqual([3.0, 8.0, 16.0, 21.0, 28.0, 35.0, 43.0,
        #                  49.0, 61.0, 66.0, 75.0, 89.0, 94.0, 99.0], found)
        self.assertEqual([3.0, 14.0, 19.0, 33.0, 43.0, 49.0, 61.0, 66.0,
                           71.0, 89.0, 94.0, 99.0], found)
        
        if CHKTIME:
            print '4', time() - t0
Exemple #4
0
    def test_04_chromosome_batch(self):
        if ONLY and not "04" in ONLY:
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name="Test Chromosome",
                              experiment_resolutions=[20000] * 3,
                              experiment_hic_data=[
                                  PATH + "/20Kb/chrT/chrT_A.tsv",
                                  PATH + "/20Kb/chrT/chrT_D.tsv",
                                  PATH + "/20Kb/chrT/chrT_C.tsv"
                              ],
                              experiment_names=["exp1", "exp2", "exp3"],
                              silent=True)
        test_chr.find_tad(["exp1", "exp2", "exp3"],
                          batch_mode=True,
                          verbose=False,
                          silent=True)
        tads = test_chr.get_experiment("batch_exp1_exp2_exp3").tads
        found = [tads[t]["end"] for t in tads if tads[t]["score"] > 0]
        # Values obtained with square root normalization.
        #self.assertEqual([3.0, 8.0, 16.0, 21.0, 28.0, 35.0, 43.0,
        #                  49.0, 61.0, 66.0, 75.0, 89.0, 94.0, 99.0], found)
        self.assertEqual([
            3.0, 14.0, 19.0, 33.0, 43.0, 49.0, 61.0, 66.0, 71.0, 89.0, 94.0,
            99.0
        ], found)

        if CHKTIME:
            print "4", time() - t0
Exemple #5
0
def main():
    """
    main function
    """
    # retieve HOX genes

    distmatrix, geneids = get_genes()
    # compute TADs for human chromosome 19
    test_chr = Chromosome(name='Test Chromosome')
    test_chr.add_experiment('exp1',
                            100000,
                            xp_handler=PATH +
                            'HIC_gm06690_chr19_chr19_100000_obs.txt')
    test_chr.find_tad(['exp1'])
    exp = test_chr.experiments['exp1']
    clust = linkage(distmatrix['19'])
    cl_idx = list(fcluster(clust, t=1, criterion='inconsistent'))
    print max(cl_idx), 'clusters'
    cluster = [[] for _ in xrange(1, max(cl_idx) + 1)]
    for i, j in enumerate(cl_idx):
        cluster[j - 1].append(geneids['19'][i][1])
    for i, _ in enumerate(cluster):
        cluster[i] = min(cluster[i]), max(cluster[i])
    tad_breaker(exp.tads,
                cluster,
                exp.resolution,
                show_plot=True,
                bins=5,
                title='Proportion of HOX genes according to position in a TAD')
Exemple #6
0
    def tb_generate_tads(self, expt_name, adj_list, chrom, resolution,
                         normalized, tad_file):
        """
        Function to the predict TAD sites for a given resolution from the Hi-C
        matrix

        Parameters
        ----------
        expt_name : str
                Location of the adjacency list
        matrix_file : str
            Location of the HDF5 output matrix file
        resolution : int
            Resolution to read the Hi-C adjacency list at
        tad_file : str
            Location of the output TAD file

        Returns
        -------
        tad_file : str
            Location of the output TAD file

        """
        # chr_hic_data = read_matrix(matrix_file, resolution=int(resolution))

        print("TB TAD GENERATOR:", expt_name, adj_list, chrom, resolution,
              normalized, tad_file)

        hic_data = load_hic_data_from_reads(adj_list,
                                            resolution=int(resolution))

        if normalized is False:
            hic_data.normalize_hic(iterations=9, max_dev=0.1)

        save_matrix_file = adj_list + "_" + str(chrom) + "_tmp.txt"
        hic_data.write_matrix(save_matrix_file, (chrom, chrom),
                              normalized=True)

        chr_hic_data = hic_data.get_matrix((chrom, chrom))
        print("TB - chr_hic_data:", chr_hic_data)

        my_chrom = Chromosome(name=chrom, centromere_search=True)
        my_chrom.add_experiment(expt_name,
                                hic_data=save_matrix_file,
                                resolution=int(resolution))

        # Run core TADbit function to find TADs on each expt.
        my_chrom.find_tad(expt_name, n_cpus=15)

        exp = my_chrom.experiments[expt_name]
        exp.write_tad_borders(savedata=tad_file + ".tmp")

        with open(tad_file, "wb") as f_out:
            with open(tad_file + ".tmp", "rb") as f_in:
                f_out.write(f_in.read())

        return True
Exemple #7
0
 def test_04_chromosome_batch(self):
     test_chr = Chromosome(name='Test Chromosome',
                           experiment_resolutions=[20000]*3,
                           experiment_hic_data=['20Kb/chrT/chrT_A.tsv',
                                                '20Kb/chrT/chrT_D.tsv',
                                                '20Kb/chrT/chrT_C.tsv'],
                           experiment_names=['exp1', 'exp2', 'exp3'])
     test_chr.find_tad(['exp1', 'exp2', 'exp3'], batch_mode=True,
                       verbose=False)
     tads = test_chr.get_experiment('batch_exp1_exp2_exp3').tads
     found = [tads[t]['end'] for t in tads if tads[t]['score'] > 0]
     self.assertEqual([3.0, 8.0, 16.0, 21.0, 28.0, 35.0, 43.0,
                       49.0, 61.0, 66.0, 75.0, 89.0, 99.0], found)
Exemple #8
0
def main():
    """
    main function
    """
    n_pick = 4
    n_tot  = 10
    test_chr = Chromosome(name='Test Chromosome')
    test_chr.add_experiment('exp1', 100000, xp_handler=PATH +
                            'HIC_gm06690_chr19_chr19_100000_obs.txt')
    test_chr.find_tad(['exp1'])
    real_tads = {}
    for i, t in enumerate(test_chr.iter_tads('exp1', normed=False)):
        real_tads[i] = test_chr.experiments['exp1'].tads[i]
        real_tads[i]['hic'] = t[1]
    global DISTRA
    global DISTRD
    DISTRA, DISTRD = get_hic_distr(real_tads)
    # pick some tads
    picked_tads = []
    picked_keys = []
    for i in xrange(n_pick):
        key, new_tad = get_random_tad(real_tads)
        while key in picked_keys or (new_tad['end'] - new_tad['start'] < 15):
            key, new_tad = get_random_tad(real_tads)
        picked_tads.append(new_tad)
        picked_keys.append(key)
    # mutate this tads
    tads = {}
    tad_matrices = []
    tad_names = []
    for i in xrange(n_pick):
        print i
        tads[uppercase[i] + '_' + str(0)] = picked_tads[i]
        tad_names.append(uppercase[i] + '_' + str(0))
        for j in xrange(1, n_tot):
            hic, indels = generate_random_contacts(
                tad1=picked_tads[i]['hic'], prob=0.05, ext=int(random()*4) + 1,
                indel=int(random() * 4) + 1)[1:]
            # indels = '|'.join([str(n-1) if n>0 else '-' + str((abs(n)-1)) for n in indels])
            tads[uppercase[i] + '_' + str(j)] = {
                'hic'  : hic,
                'start': picked_tads[i]['start'],
                'end'  : picked_tads[i]['end']}
            tad_matrices.append(hic)
            tad_names.append(uppercase[i] + '_' + str(j))
    distances, cci = get_distances(tad_matrices, max_num_v=4,
                                   n_cpus=mu.cpu_count())
    results, clusters = pre_cluster(distances, cci, len(tad_matrices))
    paint_clustering(results, clusters, len(tad_matrices), test_chr,
                     tad_names, tad_matrices)
Exemple #9
0
def main():
    test_chr = Chromosome(name='Test Chromosome')
    test_chr.add_experiment('exp1', 100000, xp_handler=PATH +
                            'HIC_k562_chr19_chr19_100000_obs.txt')
    test_chr.find_tad(['exp1'])
    tad_names = []
    tad_matrices = []
    for name, matrix in test_chr.iter_tads('exp1'):
        if test_chr.experiments['exp1'].tads[name]['score'] < 0:
            continue
        if (test_chr.experiments['exp1'].tads[name]['end'] -
            test_chr.experiments['exp1'].tads[name]['start']) < 10:
            continue
        tad_names.append(name)
        tad_matrices.append(matrix)
    num = len(tad_names)
    distances, cci = get_distances(tad_matrices, max_num_v=mu.cpu_count())
    results, clusters = pre_cluster(distances, cci, num)
    paint_clustering(results, clusters, num, test_chr, tad_names)
Exemple #10
0
def main():
    test_chr = Chromosome(name='Test Chromosome')
    test_chr.add_experiment('exp1',
                            100000,
                            xp_handler=PATH +
                            'HIC_k562_chr19_chr19_100000_obs.txt')
    test_chr.find_tad(['exp1'])
    tad_names = []
    tad_matrices = []
    for name, matrix in test_chr.iter_tads('exp1'):
        if test_chr.experiments['exp1'].tads[name]['score'] < 0:
            continue
        if (test_chr.experiments['exp1'].tads[name]['end'] -
                test_chr.experiments['exp1'].tads[name]['start']) < 10:
            continue
        tad_names.append(name)
        tad_matrices.append(matrix)
    num = len(tad_names)
    distances, cci = get_distances(tad_matrices, max_num_v=mu.cpu_count())
    results, clusters = pre_cluster(distances, cci, num)
    paint_clustering(results, clusters, num, test_chr, tad_names)
Exemple #11
0
def main():
    """
    main function
    """
    # retieve HOX genes

    distmatrix, geneids = get_genes()
    # compute TADs for human chromosome 19
    test_chr = Chromosome(name='Test Chromosome')
    test_chr.add_experiment('exp1', 100000, xp_handler=PATH +
                            'HIC_gm06690_chr19_chr19_100000_obs.txt')
    test_chr.find_tad(['exp1'])
    exp = test_chr.experiments['exp1']
    clust = linkage(distmatrix['19'])
    cl_idx = list(fcluster(clust, t=1, criterion='inconsistent'))
    print max(cl_idx), 'clusters'
    cluster=[[] for _ in xrange(1, max(cl_idx)+1)]
    for i, j in enumerate(cl_idx):
        cluster[j-1].append(geneids['19'][i][1])
    for i, _ in enumerate(cluster):
        cluster[i] = min(cluster[i]), max(cluster[i])
    tad_breaker(exp.tads, cluster, exp.resolution, show_plot=True, bins=5,
                title='Proportion of HOX genes according to position in a TAD')
Exemple #12
0
def process():

    if (options.outputFilename != ""):
        outfilefileprefix = options.outputDir + options.outputFilename
    else:
        outfilefileprefix = options.outputDir + os.path.basename(args[0])

    for matrixFile in xrange(len(args)):
        sample = os.path.splitext(os.path.basename(
            args[matrixFile]))[0].split(".matrix")[0]
        chr = sample.rsplit(".", 1)[-1]
        sample = sample.rsplit(".", 1)[0]
        chrom = Chromosome(name=chr,
                           centromere_search=True,
                           species=options.species,
                           assembly=options.assembly)
        chrom.set_max_tad_size(5000000)
        chrom.add_experiment(sample,
                             exp_type='Hi-C',
                             identifier=sample,
                             hic_data=args[matrixFile],
                             resolution=options.resolution)

        exp = chrom.experiments[sample]
        exp.normalize_hic(silent=True)
        chrom.find_tad(sample,
                       n_cpus=options.threads,
                       normalized=True,
                       verbose=False)
        exp.write_tad_borders(outfilefileprefix + "." + chr + ".border")

        # chrom.tad_density_plot(sample,savefig=outfilefileprefix+".density."+chr+".pdf")
        chrom.visualize(exp.name,
                        paint_tads=True,
                        savefig=outfilefileprefix + "chr." + chr + ".pdf")
        chrom.save_chromosome(outfilefileprefix + "chr." + chr + ".tdb",
                              force=True)
    def generate_tads(self, chrom):
        """
        Uses TADbit to generate the TAD borders based on the computed hic_data
        """
        from pytadbit import Chromosome

        exptName = self.library + "_" + str(
            self.resolution) + "_" + str(chrom) + "-" + str(chrom)
        fname = self.parsed_reads_dir + '/adjlist_map_' + str(
            chrom) + '-' + str(chrom) + '_' + str(self.resolution) + '.tsv'
        chr_hic_data = read_matrix(fname, resolution=int(self.resolution))

        my_chrom = Chromosome(name=exptName, centromere_search=True)
        my_chrom.add_experiment(exptName,
                                hic_data=chr_hic_data,
                                resolution=int(self.resolution))

        # Run core TADbit function to find TADs on each expt.
        # For the current dataset required 61GB of RAM
        my_chrom.find_tad(exptName, n_cpus=15)

        exp = my_chrom.experiments[exptName]
        tad_file = self.library_dir + exptName + '_tads.tsv'
        exp.write_tad_borders(savedata=tad_file)
Exemple #14
0
def process():

    if ( options.outputFilename != "" ):
        outfilefileprefix=options.outputDir+options.outputFilename
    else:
        outfilefileprefix=options.outputDir+os.path.basename(args[0])
    
    for matrixFile in xrange(len(args)):
        sample=os.path.splitext(os.path.basename(args[matrixFile]))[0].split(".matrix")[0]
        chr = sample.rsplit(".",1)[-1]
        sample = sample.rsplit(".",1)[0]
        chrom = Chromosome(name=chr, centromere_search=True, species=options.species, assembly=options.assembly)
        chrom.set_max_tad_size(5000000)
        chrom.add_experiment(sample, exp_type='Hi-C', identifier=sample,
                        hic_data=args[matrixFile], resolution=options.resolution)
        
        exp = chrom.experiments[sample]
        exp.normalize_hic(silent=True)
        chrom.find_tad(sample, n_cpus=options.threads, normalized=True, verbose=False)
        exp.write_tad_borders(outfilefileprefix+"."+chr+".border")

        # chrom.tad_density_plot(sample,savefig=outfilefileprefix+".density."+chr+".pdf")
        chrom.visualize(exp.name, paint_tads=True, savefig=outfilefileprefix+"chr."+chr+".pdf")
        chrom.save_chromosome(outfilefileprefix+"chr."+chr+".tdb", force=True)
Exemple #15
0
# Parsing input arguments
parser = argparse.ArgumentParser()
parser.add_argument('path_input_raw', type = str, help = 'Input maps raw', default = 'RESOLUTION/Ecoli_20M_3kb_rep2_raw.tab')
parser.add_argument('path_input_norm', type = str, help = 'Input maps normalized', default = 'RESOLUTION/Ecoli_20M_3kb_rep2.tab')
parser.add_argument('path_output', type = str, help = 'Output folder for bed files', default = 'RESOLUTION/Tadbit/Ecoli_20M_3kb_rep1_final')
parser.add_argument('restictase', type = str, help = 'Restrictase', default = 'HpaII')
parser.add_argument('resolution', type = int, help = 'Resolution of map')
args = parser.parse_args()

path_input_raw = args.path_input_raw
path_input_norm = args.path_input_norm
path_output = args.path_output
restrictase = args.restictase
resolution = args.resolution

my_chrom = Chromosome(name = '1',centromere_search = False)
my_chrom.add_experiment(restrictase + '1_stat', resolution = resolution, hic_data = path_input_raw,\
                        norm_data = path_input_norm, enzyme = restrictase)
exp = my_chrom.experiments[restrictase + '1_stat']
my_chrom.find_tad([restrictase + '1_stat'], verbose = True, batch_mode = False)
my_chrom.experiments[restrictase + '1_stat']
exp.write_tad_borders(density = True, savedata = 'tmp.txt', normalized = False)
data = pd.read_csv('tmp.txt', sep = '\t')
data['start'] -= 1
data *= resolution
data = data[ data['end'] - data['start'] >= resolution * 4 ]
data['ix'] = 'chr1'
data = data[['ix', 'start', 'end']]
data.to_csv(path_output, sep = '\t', header = False, index = False)
Exemple #16
0
def call_tads(matrix_filenames, chrom_name):
    print
    print "Call TADs for chromosome " + chrom_name + '...'
    print "Contact matrices: "
    for matrix_filename in matrix_filenames:
        print matrix_filename
    chrom_number = search(r'\d+|X|Y', chrom_name).group(0)
    if len(chrom_number) == 1 and chrom_number != 'X' and chrom_number != 'Y':
        chrom_number = '0' + chrom_number
        chrom_id = 'chr' + chrom_number
    else:
        chrom_id = chrom_name
        
    output_txt_filename = join(txt_directory, chrom_id + '_TADs.txt')
    print 'Output TXT file:', output_txt_filename
    output_bed_filename = join(bed_directory, chrom_id + '_TADs.bed')
    print 'Output BED file:', output_bed_filename
    filename_list.append(output_bed_filename)

    """tads_2D_filename = join(png_directory, chrom_id + '_TADs_2D.png')
    print 'Output 2D TAD plot file:', tads_2D_filename
    tads_1D_filename = join(png_directory, chrom_id + '_TADs_1D.png')
    print 'Output 1D TAD plot file:', tads_1D_filename"""

    # Call TADs and write their borders in TADbit text format and in BED format
    chrom = Chromosome(name=chrom_name)
    if len(matrix_filenames) > 1: # several matrices for one chromosome
        combined_experiment_name = 'batch'
        experiment_names = []
        for matrix_index, matrix_filename in enumerate(matrix_filenames):
            experiment_name = splitext(basename(matrix_filename))[0] + '_' + str(matrix_index)
            experiment_names.append(experiment_name)
            combined_experiment_name += '_' + experiment_name 
            chrom.add_experiment(experiment_name, hic_data=matrix_filename, \
                                 resolution=matrix_resolution)
        chrom.find_tad(experiment_names, batch_mode = True, n_cpus=thread_number)
        chrom.experiments[combined_experiment_name].write_tad_borders(savedata=output_txt_filename)
        #chrom.visualize(combined_experiment_name, paint_tads=True, savefig=tads_2D_filename)
        #chrom.tad_density_plot(combined_experiment_name, savefig=tads_1D_filename)
    else: # only one matrix for one chromosome
        matrix_filename = matrix_filenames[0]
        experiment_name = splitext(basename(matrix_filename))[0]
        chrom.add_experiment(experiment_name, hic_data=matrix_filename, \
                             resolution=matrix_resolution)
        chrom.find_tad(experiment_name, n_cpus=thread_number)
        chrom.experiments[experiment_name].write_tad_borders(savedata=output_txt_filename)
        #chrom.visualize(experiment_name, paint_tads=True, savefig=tads_2D_filename)
        #chrom.tad_density_plot(experiment_name, savefig=tads_1D_filename)

    with open(output_txt_filename, 'r') as src, open(output_bed_filename, 'w') as dst:
        track_line = 'track name="' + chrom_name + '_TADs" visibility=1 itemRgb="On"'
        dst.write(track_line + '\n')
        for i, line in enumerate(src):
            if line.split()[0] == '#':
                continue
            line_list = line.split()
            tad_name =  chrom_name + '.' + 'TAD' + '.' + str(i)
            # Coordinates in BED format are 0-based, 
            # and a region is presented by [x,y) interval.
            start_pos = (int(line_list[1]) - 1) * matrix_resolution
            end_pos = int(line_list[2]) * matrix_resolution
            score = 0 # Just to fill in the field
            strand = '.' # Just to fill in the field
            if i%2:
                color = '0,0,255' # blue
            else:
                color = '255,0,0' # red
            bed_line = chrom_name + '\t' + str(start_pos) + '\t' + str(end_pos) + '\t' + \
                        tad_name + '\t' + str(score) + '\t' + strand + '\t' + \
                        str(start_pos) + '\t' + str(end_pos) + '\t' + color
            dst.write(bed_line + '\n')
    chrom_filename = join(tdb_directory, chrom_id + '.tdb')
    chrom.save_chromosome(chrom_filename, force=True)
    print 'Finish.'
Exemple #17
0
def call_tads(matrix_filenames, chrom_name):
    print
    print "Call TADs for chromosome " + chrom_name + '...'
    print "Contact matrices: "
    for matrix_filename in matrix_filenames:
        print matrix_filename
    chrom_number = search(r'\d+|X|Y', chrom_name).group(0)
    if len(chrom_number) == 1 and chrom_number != 'X' and chrom_number != 'Y':
        chrom_number = '0' + chrom_number
        chrom_id = 'chr' + chrom_number
    else:
        chrom_id = chrom_name

    output_txt_filename = join(txt_directory, chrom_id + '_TADs.txt')
    print 'Output TXT file:', output_txt_filename
    output_bed_filename = join(bed_directory, chrom_id + '_TADs.bed')
    print 'Output BED file:', output_bed_filename
    filename_list.append(output_bed_filename)
    """tads_2D_filename = join(png_directory, chrom_id + '_TADs_2D.png')
    print 'Output 2D TAD plot file:', tads_2D_filename
    tads_1D_filename = join(png_directory, chrom_id + '_TADs_1D.png')
    print 'Output 1D TAD plot file:', tads_1D_filename"""

    # Call TADs and write their borders in TADbit text format and in BED format
    chrom = Chromosome(name=chrom_name)
    if len(matrix_filenames) > 1:  # several matrices for one chromosome
        combined_experiment_name = 'batch'
        experiment_names = []
        for matrix_index, matrix_filename in enumerate(matrix_filenames):
            experiment_name = splitext(
                basename(matrix_filename))[0] + '_' + str(matrix_index)
            experiment_names.append(experiment_name)
            combined_experiment_name += '_' + experiment_name
            chrom.add_experiment(experiment_name, hic_data=matrix_filename, \
                                 resolution=matrix_resolution)
        chrom.find_tad(experiment_names, batch_mode=True, n_cpus=thread_number)
        chrom.experiments[combined_experiment_name].write_tad_borders(
            savedata=output_txt_filename)
        #chrom.visualize(combined_experiment_name, paint_tads=True, savefig=tads_2D_filename)
        #chrom.tad_density_plot(combined_experiment_name, savefig=tads_1D_filename)
    else:  # only one matrix for one chromosome
        matrix_filename = matrix_filenames[0]
        experiment_name = splitext(basename(matrix_filename))[0]
        chrom.add_experiment(experiment_name, hic_data=matrix_filename, \
                             resolution=matrix_resolution)
        chrom.find_tad(experiment_name, n_cpus=thread_number)
        chrom.experiments[experiment_name].write_tad_borders(
            savedata=output_txt_filename)
        #chrom.visualize(experiment_name, paint_tads=True, savefig=tads_2D_filename)
        #chrom.tad_density_plot(experiment_name, savefig=tads_1D_filename)

    with open(output_txt_filename, 'r') as src, open(output_bed_filename,
                                                     'w') as dst:
        track_line = 'track name="' + chrom_name + '_TADs" visibility=1 itemRgb="On"'
        dst.write(track_line + '\n')
        for i, line in enumerate(src):
            if line.split()[0] == '#':
                continue
            line_list = line.split()
            tad_name = chrom_name + '.' + 'TAD' + '.' + str(i)
            # Coordinates in BED format are 0-based,
            # and a region is presented by [x,y) interval.
            start_pos = (int(line_list[1]) - 1) * matrix_resolution
            end_pos = int(line_list[2]) * matrix_resolution
            score = 0  # Just to fill in the field
            strand = '.'  # Just to fill in the field
            if i % 2:
                color = '0,0,255'  # blue
            else:
                color = '255,0,0'  # red
            bed_line = chrom_name + '\t' + str(start_pos) + '\t' + str(end_pos) + '\t' + \
                        tad_name + '\t' + str(score) + '\t' + strand + '\t' + \
                        str(start_pos) + '\t' + str(end_pos) + '\t' + color
            dst.write(bed_line + '\n')
    chrom_filename = join(tdb_directory, chrom_id + '.tdb')
    chrom.save_chromosome(chrom_filename, force=True)
    print 'Finish.'
Exemple #18
0
02 Jul 2013

script that follows Tadbit tutorial presented in the documentation
"""

from pytadbit import Chromosome

# initiate a chromosome object that will store all Hi-C data and analysis
my_chrom = Chromosome(name='My fisrt chromsome')

# load Hi-C data
my_chrom.add_experiment('First Hi-C experiment', xp_handler="sample_data/HIC_k562_chr19_chr19_100000_obs.txt", resolution=100000)
my_chrom.add_experiment('Second Hi-C experiment', xp_handler="sample_data/HIC_gm06690_chr19_chr19_100000_obs.txt", resolution=100000)

# run core tadbit function to find TADs, on each experiment
my_chrom.find_tad('First Hi-C experiment' , n_cpus=8, verbose=False)
my_chrom.find_tad('Second Hi-C experiment', n_cpus=8, verbose=False)

print my_chrom.experiments


my_chrom.align_experiments(names=["First Hi-C experiment", "Second Hi-C experiment"])

print my_chrom.alignment

ali = my_chrom.alignment[('First Hi-C experiment', 'Second Hi-C experiment')]


print ali.write_alignment(ftype='html')

score, pval = my_chrom.align_experiments(randomize=True, rnd_num=1000)
Exemple #19
0
example run:
python2 script_TADbit.py infile.txt outfile.txt S2 chr2L 20000 8

From examples folder:

~/anaconda3/envs/tadbit/bin/python ../TADselect/script_TADbit.py ../data/test_S2.20000.chr2L.txt tmp/tadbit_output.txt S2 chr2L 20000 8
"""

from sys import argv

infile = argv[1]  # txt matrix
output = argv[2]
exp = argv[3]
ch = argv[4]
resolution = int(argv[5])  # in bp
nth = int(argv[6])  # 8

from pytadbit import Chromosome

my_chrom = Chromosome(name=ch, centromere_search=False)
my_chrom.add_experiment(exp,
                        exp_type='Hi-C',
                        identifier=exp,
                        hic_data=infile,
                        resolution=resolution)

my_chrom.find_tad(exp, n_cpus=nth)

experiment = my_chrom.experiments[exp]

experiment.write_tad_borders(savedata=output, density=True)