Exemple #1
0
def main():
    matrix_path   = sys.argv[1]
    config_string = sys.argv[2]
    compute_keep = sys.argv[3]

    uf, lf, md = config_string.split(':')
    lf = float(lf)
    uf = float(uf)
    md = int  (md)
    config = {'reference' : '', 'kforce'    : 5,
              'maxdist'   : md,
              'upfreq'    : uf,
              'lowfreq'   : lf,
              'scale'     : 0.01,
              'kbending'  : 0.0,
              }

    compute, keep = map(int, compute_keep.split(':'))

    chrom = Chromosome('chr')
    chrom.add_experiment('sample', norm_data=matrix_path, resolution=15000)
    exp = chrom.experiments[0]

    models = exp.model_region(n_models=compute, n_keep=keep, n_cpus=8, config=config)

    models.save_models('models_%s.pickle' % (config_string))
Exemple #2
0
    def test_04_chromosome_batch(self):
        if ONLY and ONLY != '04':
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome',
                              experiment_resolutions=[20000]*3,
                              experiment_hic_data=[
                                  PATH + '/20Kb/chrT/chrT_A.tsv',
                                  PATH + '/20Kb/chrT/chrT_D.tsv',
                                  PATH + '/20Kb/chrT/chrT_C.tsv'],
                              experiment_names=['exp1', 'exp2', 'exp3'],
                              silent=True)
        test_chr.find_tad(['exp1', 'exp2', 'exp3'], batch_mode=True,
                          verbose=False, silent=True)
        tads = test_chr.get_experiment('batch_exp1_exp2_exp3').tads
        found = [tads[t]['end'] for t in tads if tads[t]['score'] > 0]
        # Values obtained with square root normalization.
        #self.assertEqual([3.0, 8.0, 16.0, 21.0, 28.0, 35.0, 43.0,
        #                  49.0, 61.0, 66.0, 75.0, 89.0, 94.0, 99.0], found)
        self.assertEqual([3.0, 14.0, 19.0, 33.0, 43.0, 49.0, 61.0, 66.0,
                           71.0, 89.0, 94.0, 99.0], found)
        
        if CHKTIME:
            print '4', time() - t0
Exemple #3
0
    def test_11_write_interaction_pairs(self):
        if ONLY and not "11" in ONLY:
            return
        """
        writes interaction pair file.
        """
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
        test_chr.add_experiment("exp1",
                                20000,
                                tad_def=exp4,
                                hic_data=PATH + "/20Kb/chrT/chrT_D.tsv")
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True)
        exp.filter_columns(silent=True)
        exp.normalize_hic(factor=None, silent=True)
        exp.get_hic_zscores(zscored=False)
        exp.write_interaction_pairs("lala")
        lines = open("lala").readlines()
        self.assertEqual(len(lines), 4674)
        self.assertEqual(lines[25], "1\t28\t0.612332461036\n")
        self.assertEqual(lines[2000], "26\t70\t0.0738742984321\n")
        system("rm -f lala")
        if CHKTIME:
            print "11", time() - t0
Exemple #4
0
    def test_06_tad_clustering(self):
        if ONLY and ONLY != '06':
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome',
                              experiment_tads=[exp4],
                              experiment_names=['exp1'],
                              experiment_hic_data=[
                                  PATH + '/20Kb/chrT/chrT_D.tsv'],
                              experiment_resolutions=[20000,20000],
                              silent=True)
        all_tads = []
        for _, tad in test_chr.iter_tads('exp1', normed=False):
            all_tads.append(tad)
        #align1, align2, _ = optimal_cmo(all_tads[7], all_tads[10], 7,
        #                                method='score')
        align1, align2, _ = optimal_cmo(all_tads[1], all_tads[3], 7,
                                        method='score')
        # Values with square root normalization.
        #self.assertEqual(align1, [0, 1, '-', 2, 3, '-', 4, 5, 6, 7, 8, 9, 10])
        #self.assertEqual(align2,[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
        self.assertEqual(align1, [0, 1, 2, '-', '-', 3, 4, 5, 6, 7, 8, '-', 9])
        self.assertEqual(align2, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
        if CHKTIME:
            print '6', time() - t0
Exemple #5
0
    def test_03_tad_multi_aligner(self):

        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome', centromere_search=True,
                              experiment_tads=[exp1, exp2, exp3, exp4],
                              experiment_hic_data=[
                                  PATH + '/40Kb/chrT/chrT_A.tsv',
                                  PATH + '/20Kb/chrT/chrT_B.tsv',
                                  PATH + '/20Kb/chrT/chrT_C.tsv',
                                  PATH + '/20Kb/chrT/chrT_D.tsv'],
                              experiment_names=['exp1', 'exp2', 'exp3', 'exp4'],
                              experiment_resolutions=[40000,20000,20000,20000],
                              silent=True)
        for exp in test_chr.experiments:
            exp.normalize_hic(silent=True, factor=None)

        test_chr.align_experiments(verbose=False, randomize=False,
                                   method='global')
        _, (score1, pval1) = test_chr.align_experiments(verbose=False,
                                                        method='global',
                                                        randomize=True, rnd_num=100)
        _, (_, pval2) = test_chr.align_experiments(verbose=False, randomize=True,
                                                   rnd_method='shuffle', rnd_num=100)
        # Values with alignments obtained with square root normalization.
        #self.assertEqual(round(-26.095, 3), round(score1, 3))
        #self.assertEqual(round(0.001, 1), round(pval1, 1))
        #self.assertTrue(abs(0.175 - pval2) < 0.2)
        self.assertEqual(round(-11.002, 3), round(score1, 3))
        self.assertEqual(round(0.001, 1), round(pval1, 1))
        self.assertTrue(abs(0.04 - pval2) < 0.1)
        if CHKTIME:
            print '3', time() - t0
Exemple #6
0
 def test_08_changing_resolution(self):
     test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
     test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                             hic_data='20Kb/chrT/chrT_D.tsv')
     exp = test_chr.experiments['exp1']
     sum20 = sum(exp.hic_data[0])
     exp.set_resolution(80000)
     sum80 = sum(exp.hic_data[0])
     check_hic(exp.hic_data[0], exp.size)
     exp.set_resolution(160000)
     sum160 = sum(exp.hic_data[0])
     check_hic(exp.hic_data[0], exp.size)
     exp.set_resolution(360000)
     sum360 = sum(exp.hic_data[0])
     check_hic(exp.hic_data[0], exp.size)
     exp.set_resolution(2400000)
     sum2400 = sum(exp.hic_data[0])
     check_hic(exp.hic_data[0], exp.size)
     exp.set_resolution(40000)
     sum40 = sum(exp.hic_data[0])
     check_hic(exp.hic_data[0], exp.size)
     exp.set_resolution(20000)
     sum21 = sum(exp.hic_data[0])
     check_hic(exp.hic_data[0], exp.size)
     exp.set_resolution(40000)
     sum41 = sum(exp.hic_data[0])
     check_hic(exp.hic_data[0], exp.size)
     self.assertTrue(sum20 == sum80 == sum160 == sum360 == sum40 \
                     == sum21 == sum2400 == sum41)
Exemple #7
0
    def test_11_write_interaction_pairs(self):
        if ONLY and ONLY != '11':
            return
        """
        writes interaction pair file.
        """
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
        test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv')
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True)
        exp.filter_columns(silent=True)
        exp.normalize_hic(factor=None, silent=True)
        exp.get_hic_zscores(zscored=False)
        exp.write_interaction_pairs('lala')
        lines = open('lala').readlines()
        self.assertEqual(len(lines), 4674)
        self.assertEqual(lines[25], '1\t28\t0.612332461036\n')
        self.assertEqual(lines[2000], '26\t70\t0.0738742984321\n')
        system('rm -f lala')
        if CHKTIME:
            print '11', time() - t0
Exemple #8
0
    def test_04_chromosome_batch(self):
        if ONLY and not "04" in ONLY:
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name="Test Chromosome",
                              experiment_resolutions=[20000] * 3,
                              experiment_hic_data=[
                                  PATH + "/20Kb/chrT/chrT_A.tsv",
                                  PATH + "/20Kb/chrT/chrT_D.tsv",
                                  PATH + "/20Kb/chrT/chrT_C.tsv"
                              ],
                              experiment_names=["exp1", "exp2", "exp3"],
                              silent=True)
        test_chr.find_tad(["exp1", "exp2", "exp3"],
                          batch_mode=True,
                          verbose=False,
                          silent=True)
        tads = test_chr.get_experiment("batch_exp1_exp2_exp3").tads
        found = [tads[t]["end"] for t in tads if tads[t]["score"] > 0]
        # Values obtained with square root normalization.
        #self.assertEqual([3.0, 8.0, 16.0, 21.0, 28.0, 35.0, 43.0,
        #                  49.0, 61.0, 66.0, 75.0, 89.0, 94.0, 99.0], found)
        self.assertEqual([
            3.0, 14.0, 19.0, 33.0, 43.0, 49.0, 61.0, 66.0, 71.0, 89.0, 94.0,
            99.0
        ], found)

        if CHKTIME:
            print "4", time() - t0
def main():
    """
    main function
    """

    opts = get_options()
    crm = Chromosome(':P')

    for i, data in enumerate(opts.data):
        crm.add_experiment('exp' + str(i), resolution=int(opts.resolution[i]),
                           hic_data=data)
        crm.experiments['exp' + str(i)].normalize_hic()

    if len(opts.data) > 1:
        exp = crm.experiments[0] + crm.experiments[1]
        for i in range(2, len(opts.data)):
            exp += crm.experiments[i]
    else:
        exp = crm.experiments[0]

    if opts.abc:
        exp.write_interaction_pairs(opts.output, normalized=opts.norm,
                                    zscored=False)
    else:
        if type(opts.output) == file:
            out = opts.output
        else:
            out = open(opts.output, 'w')
        out.write(exp.print_hic_matrix(print_it=False,
                                       normalized=opts.norm))
Exemple #10
0
def load_genome_from_tad_def(genome_path, res, verbose=False):
    """
    Search, at a given path, for chromosome folders containing TAD
    definitions in tsv files.

    :param genome_path: Path where to search for TADbit chromosomes
    :param res: Resolution at were saved chromosomes
    :param False verbose:

    :returns: a dictionary with all TADbit chromosomes found
    """
    ref_genome = {}
    for crm in listdir(genome_path):
        crm_path = os.path.join(genome_path, crm)
        if not isfile(crm_path):
            continue
        if crm in ref_genome:
            raise Exception('More than 1 TAD definition file found\n')
        crm = crm.replace('.tsv', '').replace('chr', '').upper()
        if verbose:
            print '  Chromosome:', crm
        crmO = Chromosome(crm)
        crmO.add_experiment('sample', res)
        crmO.experiments[0].load_tad_def(crm_path)
        ref_genome[crm] = crmO
    return ref_genome
Exemple #11
0
    def test_11_write_interaction_pairs(self):
        if ONLY and not "11" in ONLY:
            return
        """
        writes interaction pair file.
        """
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
        test_chr.add_experiment("exp1",
                                20000,
                                tad_def=exp4,
                                hic_data=PATH + "/20Kb/chrT/chrT_D.tsv")
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True)
        exp.filter_columns(silent=True)
        exp.normalize_hic(factor=1, silent=True)
        exp.get_hic_zscores(zscored=False)
        exp.write_interaction_pairs("lala")
        with open("lala") as f_lala:
            lines = f_lala.readlines()
        self.assertEqual(len(lines), 4674)
        self.assertAlmostEqual(float(lines[25].split('\t')[2]),
                               0.5852295196345679)
        self.assertAlmostEqual(float(lines[2000].split('\t')[2]),
                               0.07060448846960976)
        system("rm -f lala")
        if CHKTIME:
            print("11", time() - t0)
Exemple #12
0
    def test_12_3d_modelling_optimization(self):
        """
        quick test to generate 3D coordinates from 3? simple models???
        """
        if CHKTIME:
            t0 = time()

        try:
            __import__('IMP')
        except ImportError:
            warn('IMP not found, skipping test\n')
            return
        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
        test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv')
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv')
        exp.filter_columns(silent=True)
        exp.normalize_hic(silent=True, factor=None)
        result = exp.optimal_imp_parameters(50, 70, n_cpus=4,
                                            n_models=8, n_keep=2,
                                            lowfreq_range=[-0.6],
                                            upfreq_range=(0, 1.1, 1.1),
                                            maxdist_range=[500, 600],
                                            verbose=False)

        # get best correlations
        config = result.get_best_parameters_dict()
        wanted = {'maxdist': 600.0, 'upfreq': 0.0, 'kforce': 5,
                  'dcutoff': 2,
                  'reference': '', 'lowfreq': -0.6, 'scale': 0.01}
        self.assertEqual([round(i, 4) for i in config.values()if not type(i) is str],
                         [round(i, 4) for i in wanted.values()if not type(i) is str])
        if CHKTIME:
            print '12', time() - t0
Exemple #13
0
    def test_06_tad_clustering(self):
        if ONLY and not "06" in ONLY:
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(
            name="Test Chromosome",
            experiment_tads=[exp4],
            experiment_names=["exp1"],
            experiment_hic_data=[PATH + "/20Kb/chrT/chrT_D.tsv"],
            experiment_resolutions=[20000, 20000],
            silent=True)
        all_tads = []
        for _, tad in test_chr.iter_tads("exp1", normed=False):
            all_tads.append(tad)
        #align1, align2, _ = optimal_cmo(all_tads[7], all_tads[10], 7,
        #                                method="score")
        align1, align2, _ = optimal_cmo(all_tads[1],
                                        all_tads[3],
                                        7,
                                        method="score")
        # Values with square root normalization.
        #self.assertEqual(align1, [0, 1, "-", 2, 3, "-", 4, 5, 6, 7, 8, 9, 10])
        #self.assertEqual(align2,[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
        self.assertEqual(align1, [0, 1, 2, "-", "-", 3, 4, 5, 6, 7, 8, "-", 9])
        self.assertEqual(align2, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
        if CHKTIME:
            print "6", time() - t0
Exemple #14
0
def main():
    """
    main function
    """
    # retieve HOX genes

    distmatrix, geneids = get_genes()
    # compute TADs for human chromosome 19
    test_chr = Chromosome(name='Test Chromosome')
    test_chr.add_experiment('exp1',
                            100000,
                            xp_handler=PATH +
                            'HIC_gm06690_chr19_chr19_100000_obs.txt')
    test_chr.find_tad(['exp1'])
    exp = test_chr.experiments['exp1']
    clust = linkage(distmatrix['19'])
    cl_idx = list(fcluster(clust, t=1, criterion='inconsistent'))
    print max(cl_idx), 'clusters'
    cluster = [[] for _ in xrange(1, max(cl_idx) + 1)]
    for i, j in enumerate(cl_idx):
        cluster[j - 1].append(geneids['19'][i][1])
    for i, _ in enumerate(cluster):
        cluster[i] = min(cluster[i]), max(cluster[i])
    tad_breaker(exp.tads,
                cluster,
                exp.resolution,
                show_plot=True,
                bins=5,
                title='Proportion of HOX genes according to position in a TAD')
Exemple #15
0
    def test_08_changing_resolution(self):
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
        test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv',
                                silent=True)
        exp = test_chr.experiments['exp1']
        sum20 = sum(exp.hic_data[0].values())
        exp.set_resolution(80000)
        sum80 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(160000)
        sum160 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(360000)
        sum360 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(2400000)
        sum2400 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(40000)
        sum40 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(20000)
        sum21 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(40000)
        sum41 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        self.assertTrue(sum20 == sum80 == sum160 == sum360 == sum40 \
                        == sum21 == sum2400 == sum41)
        if CHKTIME:
            print '8', time() - t0
Exemple #16
0
    def test_09_hic_normalization(self):
        """
        writes interaction pair file.
        """
        if ONLY and not "09" in ONLY:
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
        test_chr.add_experiment("exp1",
                                20000,
                                tad_def=exp4,
                                hic_data=PATH + "/20Kb/chrT/chrT_D.tsv",
                                silent=True)
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True)
        exp.normalize_hic(silent=True)
        exp.get_hic_zscores()
        exp.get_hic_zscores(zscored=False)
        sumz = sum([
            exp._zscores[k1][k2] for k1 in exp._zscores.keys()
            for k2 in exp._zscores[k1]
        ])
        self.assertEqual(round(sumz, 4), round(4059.2877, 4))
        if CHKTIME:
            print "9", time() - t0
Exemple #17
0
def main():
    args = getArgs()
    samples = args.i
    output = args.o
    chr = args.c
    ncpu = args.p
    resolution = args.r
    species = args.s
    gbuild = args.b

    # initiate a chromosome object that will store all Hi-C data and analysis
    my_chrom = Chromosome(
        name=chr,  # 染色体名
        centromere_search=True,  # centromereを検出するか
        species=species,
        assembly=gbuild  # genome build
    )
    for sample in samples:
        label, path = sample.split(",")
        print(label)
        print(path)
        getHiCData(my_chrom, output, label, path, resolution, ncpu)


#    if not os.path.exists('tdb'):
#       os.makedirs("tdb")

    my_chrom.save_chromosome(output + ".tdb", force=True)
Exemple #18
0
    def test_07_forbidden_regions(self):
        if ONLY and ONLY != '07':
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000,
                              centromere_search=True,)
        test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv',
                                silent=True)
        # Values with square root normalization.
        #brks = [2.0, 7.0, 12.0, 18.0, 38.0, 43.0, 49.0,
        #        61.0, 66.0, 75.0, 89.0, 94.0, 99.0]
        brks = [3.0, 14.0, 19.0, 33.0, 38.0, 43.0, 49.0, 61.0,
                  66.0, 71.0, 83.0, 89.0, 94.0, 99.0]
        tads = test_chr.experiments['exp1'].tads
        found = [tads[t]['end'] for t in tads if tads[t]['score'] > 0]
        self.assertEqual(brks, found)
        items1 = test_chr.forbidden.keys(), test_chr.forbidden.values()
        test_chr.add_experiment('exp2', 20000, tad_def=exp3,
                                hic_data=PATH + '/20Kb/chrT/chrT_C.tsv',
                                silent=True)
        items2 = test_chr.forbidden.keys(), test_chr.forbidden.values()
        know1 = ([38, 39], ['Centromere', 'Centromere'])
        #know1 = ([32, 33, 34, 38, 39, 19, 20, 21, 22,
        #          23, 24, 25, 26, 27, 28, 29, 30, 31],
        #         [None, None, None, 'Centromere', 'Centromere',
        #          None, None, None, None, None, None, None,
        #          None, None, None, None, None, None])
        know2 = ([38], ['Centromere'])
        self.assertEqual(items1, know1)
        self.assertEqual(items2, know2)
        if CHKTIME:
            print '7', time() - t0
Exemple #19
0
    def tb_generate_tads(self, expt_name, adj_list, chrom, resolution,
                         normalized, tad_file):
        """
        Function to the predict TAD sites for a given resolution from the Hi-C
        matrix

        Parameters
        ----------
        expt_name : str
                Location of the adjacency list
        matrix_file : str
            Location of the HDF5 output matrix file
        resolution : int
            Resolution to read the Hi-C adjacency list at
        tad_file : str
            Location of the output TAD file

        Returns
        -------
        tad_file : str
            Location of the output TAD file

        """
        # chr_hic_data = read_matrix(matrix_file, resolution=int(resolution))

        print("TB TAD GENERATOR:", expt_name, adj_list, chrom, resolution,
              normalized, tad_file)

        hic_data = load_hic_data_from_reads(adj_list,
                                            resolution=int(resolution))

        if normalized is False:
            hic_data.normalize_hic(iterations=9, max_dev=0.1)

        save_matrix_file = adj_list + "_" + str(chrom) + "_tmp.txt"
        hic_data.write_matrix(save_matrix_file, (chrom, chrom),
                              normalized=True)

        chr_hic_data = hic_data.get_matrix((chrom, chrom))
        print("TB - chr_hic_data:", chr_hic_data)

        my_chrom = Chromosome(name=chrom, centromere_search=True)
        my_chrom.add_experiment(expt_name,
                                hic_data=save_matrix_file,
                                resolution=int(resolution))

        # Run core TADbit function to find TADs on each expt.
        my_chrom.find_tad(expt_name, n_cpus=15)

        exp = my_chrom.experiments[expt_name]
        exp.write_tad_borders(savedata=tad_file + ".tmp")

        with open(tad_file, "wb") as f_out:
            with open(tad_file + ".tmp", "rb") as f_in:
                f_out.write(f_in.read())

        return True
Exemple #20
0
 def test_05_save_load(self):
     test_chr = Chromosome(name='Test Chromosome',
                           experiment_tads=[exp1, exp2],
                           experiment_names=['exp1', 'exp2'],
                           experiment_resolutions=[20000,20000])
     test_chr.save_chromosome('lolo', force=True)
     test_chr = load_chromosome('lolo')
     system('rm -f lolo')
     system('rm -f lolo_hic')
Exemple #21
0
    def test_13_3d_modelling_centroid(self):  #model with no optimisation
        """
        quick test to generate 3D coordinates from 3? simple models???
        """
        if ONLY and ONLY != '13':
            return
        if CHKTIME:
            t0 = time()

        try:
            __import__('IMP')
        except ImportError:
            warn('IMP not found, skipping test\n')
            return
        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
        test_chr.add_experiment('exp1',
                                20000,
                                tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv',
                                silent=True)
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True)
        exp.filter_columns(silent=True)
        exp.normalize_hic(silent=True, factor=None)
        models = exp.model_region(51,
                                  71,
                                  n_models=40,
                                  n_keep=25,
                                  n_cpus=4,
                                  config={
                                      'kforce': 5,
                                      'maxdist': 500,
                                      'scale': 0.01,
                                      'upfreq': 1.0,
                                      'lowfreq': -0.6
                                  })
        models.save_models('models.pick')

        avg = models.average_model()
        nmd = len(models)
        dev = rmsdRMSD_wrapper([models[m]['x']
                                for m in xrange(nmd)] + [avg['x']],
                               [models[m]['y']
                                for m in xrange(nmd)] + [avg['y']],
                               [models[m]['z']
                                for m in xrange(nmd)] + [avg['z']],
                               models._zeros, models.nloci, 200,
                               range(len(models) + 1),
                               len(models) + 1, int(False), 'rmsd', 0)
        centroid = models[models.centroid_model()]
        # find closest
        model = min([(k, dev[(k, nmd)]) for k in range(nmd)],
                    key=lambda x: x[1])[0]
        self.assertEqual(centroid['rand_init'], models[model]['rand_init'])
        if CHKTIME:
            print '13', time() - t0
Exemple #22
0
    def test_12_3d_modelling_optimization(self):
        """
        quick test to generate 3D coordinates from 3? simple models???
        """
        if ONLY and not "12" in ONLY:
            return
        if CHKTIME:
            t0 = time()

        try:
            __import__("IMP")
        except ImportError:
            warn("IMP not found, skipping test\n")
            return
        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
        test_chr.add_experiment("exp1",
                                20000,
                                tad_def=exp4,
                                hic_data=PATH + "/20Kb/chrT/chrT_D.tsv")
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv")
        exp.filter_columns(silent=True)
        exp.normalize_hic(silent=True, factor=None)
        result = exp.optimal_imp_parameters(
            50,
            70,
            n_cpus=4,
            n_models=8,
            n_keep=2,
            lowfreq_range=[-0.6],
            upfreq_range=(0, 1.1,
                          1.1),  #from 0 till 1.1 in step of 1.1 with ()
            maxdist_range=[500, 600],  # it will use 500 and 600 with []
            verbose=False)

        # get best correlations
        config = result.get_best_parameters_dict()  #dict with parameters
        wanted = {
            "maxdist": 600.0,
            "upfreq": 0.0,
            "kforce": 5,
            "dcutoff": 2,
            "reference": "",
            "lowfreq": -0.6,
            "scale": 0.01
        }

        self.assertEqual([
            round(config[i], 4)
            for i in list(config.keys()) if not type(i) is str
        ], [
            round(config[i], 4)
            for i in list(wanted.keys()) if not type(i) is str
        ])
        if CHKTIME:
            print("12", time() - t0)
Exemple #23
0
    def _sub_experiment_zscore(self, start, end):
        """
        Get the z-score of a sub-region of an  experiment.

        TODO: find a nicer way to do this...

        :param start: first bin to model (bin number)
        :param end: first bin to model (bin number)

        :returns: z-score and raw values of the experiment
        """
        if self._normalization != 'visibility':
            warn('WARNING: normalizing according to visibility method')
            self.normalize_hic(method='visibility')
        from pytadbit import Chromosome
        matrix = self.get_hic_matrix()
        end += 1
        new_matrix = [[] for _ in range(end - start)]
        for i in xrange(start, end):
            for j in xrange(start, end):
                new_matrix[i - start].append(matrix[i][j])

        tmp = Chromosome('tmp')
        tmp.add_experiment('exp1',
                           hic_data=[new_matrix],
                           resolution=self.resolution,
                           filter_columns=False)
        exp = tmp.experiments[0]
        # We want the weights and zeros calculated in the full chromosome
        siz = self.size
        exp.norm = [[
            self.norm[0][i + siz * j] for i in xrange(start, end)
            for j in xrange(start, end)
        ]]
        exp._zeros = dict([(z - start, None) for z in self._zeros
                           if start <= z <= end])
        if len(exp._zeros) == (end + 1 - start):
            raise Exception('ERROR: no interaction found in selected regions')
        # ... but the z-scores in this particular region
        exp.get_hic_zscores(remove_zeros=True)
        values = [[float('nan') for _ in xrange(exp.size)]
                  for _ in xrange(exp.size)]
        for i in xrange(exp.size):
            # zeros are rows or columns having a zero in the diagonal
            if i in exp._zeros:
                continue
            for j in xrange(i + 1, exp.size):
                if j in exp._zeros:
                    continue
                if (not exp.hic_data[0][i * exp.size + j]
                        or not exp.hic_data[0][i * exp.size + j]):
                    continue
                values[i][j] = exp.norm[0][i * exp.size + j]
                values[j][i] = exp.norm[0][i * exp.size + j]
        return exp._zscores, values
Exemple #24
0
 def test_09_hic_normalization(self):
     """
     TODO: check with Davide's script
     """
     test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
     test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                             hic_data='20Kb/chrT/chrT_D.tsv')
     exp = test_chr.experiments[0]
     exp.load_experiment('20Kb/chrT/chrT_A.tsv')
     exp.get_hic_zscores()
     exp.get_hic_zscores(zscored=False)
Exemple #25
0
 def test_10_generate_weights(self):
     """
     method names are: 'sqrt' or 'over_tot'
     """
     test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
     test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                             hic_data='20Kb/chrT/chrT_D.tsv')
     exp = test_chr.experiments[0]
     tadbit_weigths = exp.norm[:]
     exp.norm = None
     exp.normalize_hic()
     self.assertEqual(tadbit_weigths[0], exp.norm[0])
Exemple #26
0
 def test_06_tad_clustering(self):
     test_chr = Chromosome(name='Test Chromosome',
                           experiment_tads=[exp4],
                           experiment_names=['exp1'],
                           experiment_hic_data=['20Kb/chrT/chrT_D.tsv'],
                           experiment_resolutions=[20000,20000])
     all_tads = []
     for _, tad in test_chr.iter_tads('exp1'):
         all_tads.append(tad)
     align1, align2, _ = optimal_cmo(all_tads[7], all_tads[10], 7,
                                     method='score')
     self.assertEqual(align1, [0, 1, '-', 2, 3, '-', 4, 5, 6, 7, 8, 9, 10])
     self.assertEqual(align2,[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
Exemple #27
0
 def test_04_chromosome_batch(self):
     test_chr = Chromosome(name='Test Chromosome',
                           experiment_resolutions=[20000]*3,
                           experiment_hic_data=['20Kb/chrT/chrT_A.tsv',
                                                '20Kb/chrT/chrT_D.tsv',
                                                '20Kb/chrT/chrT_C.tsv'],
                           experiment_names=['exp1', 'exp2', 'exp3'])
     test_chr.find_tad(['exp1', 'exp2', 'exp3'], batch_mode=True,
                       verbose=False)
     tads = test_chr.get_experiment('batch_exp1_exp2_exp3').tads
     found = [tads[t]['end'] for t in tads if tads[t]['score'] > 0]
     self.assertEqual([3.0, 8.0, 16.0, 21.0, 28.0, 35.0, 43.0,
                       49.0, 61.0, 66.0, 75.0, 89.0, 99.0], found)
Exemple #28
0
def main():
    """
    main function
    """
    n_pick = 4
    n_tot  = 10
    test_chr = Chromosome(name='Test Chromosome')
    test_chr.add_experiment('exp1', 100000, xp_handler=PATH +
                            'HIC_gm06690_chr19_chr19_100000_obs.txt')
    test_chr.find_tad(['exp1'])
    real_tads = {}
    for i, t in enumerate(test_chr.iter_tads('exp1', normed=False)):
        real_tads[i] = test_chr.experiments['exp1'].tads[i]
        real_tads[i]['hic'] = t[1]
    global DISTRA
    global DISTRD
    DISTRA, DISTRD = get_hic_distr(real_tads)
    # pick some tads
    picked_tads = []
    picked_keys = []
    for i in xrange(n_pick):
        key, new_tad = get_random_tad(real_tads)
        while key in picked_keys or (new_tad['end'] - new_tad['start'] < 15):
            key, new_tad = get_random_tad(real_tads)
        picked_tads.append(new_tad)
        picked_keys.append(key)
    # mutate this tads
    tads = {}
    tad_matrices = []
    tad_names = []
    for i in xrange(n_pick):
        print i
        tads[uppercase[i] + '_' + str(0)] = picked_tads[i]
        tad_names.append(uppercase[i] + '_' + str(0))
        for j in xrange(1, n_tot):
            hic, indels = generate_random_contacts(
                tad1=picked_tads[i]['hic'], prob=0.05, ext=int(random()*4) + 1,
                indel=int(random() * 4) + 1)[1:]
            # indels = '|'.join([str(n-1) if n>0 else '-' + str((abs(n)-1)) for n in indels])
            tads[uppercase[i] + '_' + str(j)] = {
                'hic'  : hic,
                'start': picked_tads[i]['start'],
                'end'  : picked_tads[i]['end']}
            tad_matrices.append(hic)
            tad_names.append(uppercase[i] + '_' + str(j))
    distances, cci = get_distances(tad_matrices, max_num_v=4,
                                   n_cpus=mu.cpu_count())
    results, clusters = pre_cluster(distances, cci, len(tad_matrices))
    paint_clustering(results, clusters, len(tad_matrices), test_chr,
                     tad_names, tad_matrices)
Exemple #29
0
    def test_05_save_load(self):
        if CHKTIME:
            t0 = time()

        test_chr1 = Chromosome(name='Test Chromosome',
                               experiment_tads=[exp1, exp2],
                               experiment_names=['exp1', 'exp2'],
                               experiment_resolutions=[20000, 20000],
                               silent=True)
        test_chr1.save_chromosome('lolo', force=True)
        test_chr2 = load_chromosome('lolo')
        system('rm -f lolo')
        system('rm -f lolo_hic')
        self.assertEqual(str(test_chr1.__dict__), str(test_chr2.__dict__))
        if CHKTIME:
            print '5', time() - t0
Exemple #30
0
    def test_07_forbidden_regions(self):
        if ONLY and not "07" in ONLY:
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(
            name="Test Chromosome",
            max_tad_size=260000,
            centromere_search=True,
        )
        test_chr.add_experiment("exp1",
                                20000,
                                tad_def=exp4,
                                hic_data=PATH + "/20Kb/chrT/chrT_D.tsv",
                                silent=True)
        # Values with square root normalization.
        #brks = [2.0, 7.0, 12.0, 18.0, 38.0, 43.0, 49.0,
        #        61.0, 66.0, 75.0, 89.0, 94.0, 99.0]
        brks = [
            3.0, 14.0, 19.0, 33.0, 38.0, 43.0, 49.0, 61.0, 66.0, 71.0, 83.0,
            89.0, 94.0, 99.0
        ]
        tads = test_chr.experiments["exp1"].tads
        found = [tads[t]["end"] for t in tads if tads[t]["score"] > 0]
        self.assertEqual(brks, found)
        items1 = list(test_chr.forbidden.keys()), list(
            test_chr.forbidden.values())
        test_chr.add_experiment("exp2",
                                20000,
                                tad_def=exp3,
                                hic_data=PATH + "/20Kb/chrT/chrT_C.tsv",
                                silent=True)
        items2 = list(test_chr.forbidden.keys()), list(
            test_chr.forbidden.values())
        know1 = ([38, 39], ["Centromere", "Centromere"])
        #know1 = ([32, 33, 34, 38, 39, 19, 20, 21, 22,
        #          23, 24, 25, 26, 27, 28, 29, 30, 31],
        #         [None, None, None, "Centromere", "Centromere",
        #          None, None, None, None, None, None, None,
        #          None, None, None, None, None, None])
        know2 = ([38], ["Centromere"])
        self.assertEqual(items1, know1)
        self.assertEqual(items2, know2)
        if CHKTIME:
            print("7", time() - t0)