Ejemplo n.º 1
0
def main():

    opts, params = get_options()
    if opts.inabc:
        zscores = parse_zscores(opts.inabc)
        models = generate_3d_models(zscores, opts.resolution, start=1,
                                    n_models=opts.nmodels,
                                    n_keep=opts.nkeep, n_cpus=opts.ncpus,
                                    keep_all=False, verbose=False,
                                    outfile=None,
                                    config=params)
    
    else:
        crm  = 'crm'
        xnam = 'X'
        crmbit=Chromosome(crm)
        crmbit.add_experiment(xnam, resolution=opts.resolution, xp_handler=opts.incrm)
        exp = crmbit.experiments[xnam]
        models = exp.model_region(start=opts.start, end=opts.end,
                                  n_models=opts.nmodels,
                                  n_keep=opts.nkeep, n_cpus=opts.ncpus,
                                  keep_all=False, verbose=False,
                                  config=params)

    if opts.save:
        models.save_models('%s/models_%s_%s.pik' % (opts.out, opts.start,
                                                    opts.start + opts.nmodels))
    for i in xrange(int(opts.cmm)):
        models.write_cmm(i, opts.out)

    if opts.full_report:
        
        models.cluster_models(dcutoff=200)
        models.cluster_analysis_dendrogram(n_best_clusters=10)
        models.model_consistency()
Ejemplo n.º 2
0
def load_hic_data(opts, xnames):
    """
    Load Hi-C data
    """
    # Start reading the data
    crm = Chromosome(opts.crm, species=(
        opts.species.split('_')[0].capitalize() + opts.species.split('_')[1]
                          if '_' in opts.species else opts.species),
                          centromere_search=opts.centromere,
                          assembly=opts.assembly) # Create chromosome object

    # Load three different experimental data sets named TR1, TR2 and BR.
    # Data obtained from Hou et al (2012) Molecular Cell.
    # doi:10.1016/j.molcel.2012.08.031
    logging.info("\tReading input data...")
    for xnam, xpath, xnorm in zip(xnames, opts.data, opts.norm):
        crm.add_experiment(
            xnam, exp_type='Hi-C', enzyme=opts.enzyme,
            cell_type=opts.cell,
            identifier=opts.identifier, # general descriptive fields
            project=opts.project, # user descriptions
            resolution=opts.res,
            hic_data=xpath,
            norm_data=xnorm)
        if not xnorm:
            logging.info("\tNormalizing HiC data of %s..." % xnam)
            crm.experiments[xnam].normalize_hic(iterations=5)
    if opts.beg > crm.experiments[-1].size:
        raise Exception('ERROR: beg parameter is larger than chromosome size.')
    if opts.end > crm.experiments[-1].size:
        logging.info('WARNING: end parameter is larger than chromosome ' +
                     'size. Setting end to %s.\n' % (crm.experiments[-1].size *
                                                     opts.res))
        opts.end = crm.experiments[-1].size
    return crm
Ejemplo n.º 3
0
    def test_11_write_interaction_pairs(self):
        if ONLY and not "11" in ONLY:
            return
        """
        writes interaction pair file.
        """
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
        test_chr.add_experiment("exp1",
                                20000,
                                tad_def=exp4,
                                hic_data=PATH + "/20Kb/chrT/chrT_D.tsv")
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True)
        exp.filter_columns(silent=True)
        exp.normalize_hic(factor=None, silent=True)
        exp.get_hic_zscores(zscored=False)
        exp.write_interaction_pairs("lala")
        lines = open("lala").readlines()
        self.assertEqual(len(lines), 4674)
        self.assertEqual(lines[25], "1\t28\t0.612332461036\n")
        self.assertEqual(lines[2000], "26\t70\t0.0738742984321\n")
        system("rm -f lala")
        if CHKTIME:
            print "11", time() - t0
Ejemplo n.º 4
0
    def test_11_write_interaction_pairs(self):
        if ONLY and ONLY != '11':
            return
        """
        writes interaction pair file.
        """
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
        test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv')
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True)
        exp.filter_columns(silent=True)
        exp.normalize_hic(factor=None, silent=True)
        exp.get_hic_zscores(zscored=False)
        exp.write_interaction_pairs('lala')
        lines = open('lala').readlines()
        self.assertEqual(len(lines), 4674)
        self.assertEqual(lines[25], '1\t28\t0.612332461036\n')
        self.assertEqual(lines[2000], '26\t70\t0.0738742984321\n')
        system('rm -f lala')
        if CHKTIME:
            print '11', time() - t0
Ejemplo n.º 5
0
def main():
    matrix_path   = sys.argv[1]
    config_string = sys.argv[2]
    compute_keep = sys.argv[3]

    uf, lf, md = config_string.split(':')
    lf = float(lf)
    uf = float(uf)
    md = int  (md)
    config = {'reference' : '', 'kforce'    : 5,
              'maxdist'   : md,
              'upfreq'    : uf,
              'lowfreq'   : lf,
              'scale'     : 0.01,
              'kbending'  : 0.0,
              }

    compute, keep = map(int, compute_keep.split(':'))

    chrom = Chromosome('chr')
    chrom.add_experiment('sample', norm_data=matrix_path, resolution=15000)
    exp = chrom.experiments[0]

    models = exp.model_region(n_models=compute, n_keep=keep, n_cpus=8, config=config)

    models.save_models('models_%s.pickle' % (config_string))
Ejemplo n.º 6
0
    def test_04_chromosome_batch(self):
        if ONLY and ONLY != '04':
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome',
                              experiment_resolutions=[20000]*3,
                              experiment_hic_data=[
                                  PATH + '/20Kb/chrT/chrT_A.tsv',
                                  PATH + '/20Kb/chrT/chrT_D.tsv',
                                  PATH + '/20Kb/chrT/chrT_C.tsv'],
                              experiment_names=['exp1', 'exp2', 'exp3'],
                              silent=True)
        test_chr.find_tad(['exp1', 'exp2', 'exp3'], batch_mode=True,
                          verbose=False, silent=True)
        tads = test_chr.get_experiment('batch_exp1_exp2_exp3').tads
        found = [tads[t]['end'] for t in tads if tads[t]['score'] > 0]
        # Values obtained with square root normalization.
        #self.assertEqual([3.0, 8.0, 16.0, 21.0, 28.0, 35.0, 43.0,
        #                  49.0, 61.0, 66.0, 75.0, 89.0, 94.0, 99.0], found)
        self.assertEqual([3.0, 14.0, 19.0, 33.0, 43.0, 49.0, 61.0, 66.0,
                           71.0, 89.0, 94.0, 99.0], found)
        
        if CHKTIME:
            print '4', time() - t0
Ejemplo n.º 7
0
    def test_06_tad_clustering(self):
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome',
                              experiment_tads=[exp4],
                              experiment_names=['exp1'],
                              experiment_hic_data=[
                                  PATH + '/20Kb/chrT/chrT_D.tsv'],
                              experiment_resolutions=[20000,20000],
                              silent=True)
        all_tads = []
        for _, tad in test_chr.iter_tads('exp1'):
            all_tads.append(tad)
        #align1, align2, _ = optimal_cmo(all_tads[7], all_tads[10], 7,
        #                                method='score')
        align1, align2, _ = optimal_cmo(all_tads[1], all_tads[3], 7,
                                        method='score')
        # Values with square root normalization.
        #self.assertEqual(align1, [0, 1, '-', 2, 3, '-', 4, 5, 6, 7, 8, 9, 10])
        #self.assertEqual(align2,[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
        self.assertEqual(align1, [0, 1, 2, '-', '-', 3, 4, 5, 6, 7, 8, '-', 9])
        self.assertEqual(align2, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
        if CHKTIME:
            print '6', time() - t0
Ejemplo n.º 8
0
def load_genome_from_tad_def(genome_path, res, verbose=False):
    """
    Search, at a given path, for chromosome folders containing TAD
    definitions in tsv files.

    :param genome_path: Path where to search for TADbit chromosomes
    :param res: Resolution at were saved chromosomes
    :param False verbose:

    :returns: a dictionary with all TADbit chromosomes found
    """
    ref_genome = {}
    for crm in listdir(genome_path):
        crm_path = os.path.join(genome_path, crm)
        if not isfile(crm_path):
            continue
        if crm in ref_genome:
            raise Exception('More than 1 TAD definition file found\n')
        crm = crm.replace('.tsv', '').replace('chr', '').upper()
        if verbose:
            print '  Chromosome:', crm
        crmO = Chromosome(crm)
        crmO.add_experiment('sample', res)
        crmO.experiments[0].load_tad_def(crm_path)
        ref_genome[crm] = crmO
    return ref_genome
Ejemplo n.º 9
0
    def test_11_write_interaction_pairs(self):
        if ONLY and ONLY != '11':
            return
        """
        writes interaction pair file.
        """
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
        test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv')
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True)
        exp.filter_columns(silent=True)
        exp.normalize_hic(factor=None, silent=True)
        exp.get_hic_zscores(zscored=False)
        exp.write_interaction_pairs('lala')
        lines = open('lala').readlines()
        self.assertEqual(len(lines), 4674)
        self.assertEqual(lines[25], '1\t28\t0.612332461036\n')
        self.assertEqual(lines[2000], '26\t70\t0.0738742984321\n')
        system('rm -f lala')
        if CHKTIME:
            print '11', time() - t0
Ejemplo n.º 10
0
    def test_06_tad_clustering(self):
        if ONLY and ONLY != "06":
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(
            name="Test Chromosome",
            experiment_tads=[exp4],
            experiment_names=["exp1"],
            experiment_hic_data=[PATH + "/20Kb/chrT/chrT_D.tsv"],
            experiment_resolutions=[20000, 20000],
            silent=True,
        )
        all_tads = []
        for _, tad in test_chr.iter_tads("exp1", normed=False):
            all_tads.append(tad)
        # align1, align2, _ = optimal_cmo(all_tads[7], all_tads[10], 7,
        #                                method='score')
        align1, align2, _ = optimal_cmo(all_tads[1], all_tads[3], 7, method="score")
        # Values with square root normalization.
        # self.assertEqual(align1, [0, 1, '-', 2, 3, '-', 4, 5, 6, 7, 8, 9, 10])
        # self.assertEqual(align2,[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
        self.assertEqual(align1, [0, 1, 2, "-", "-", 3, 4, 5, 6, 7, 8, "-", 9])
        self.assertEqual(align2, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
        if CHKTIME:
            print "6", time() - t0
Ejemplo n.º 11
0
    def test_12_3d_modelling_optimization(self):
        """
        quick test to generate 3D coordinates from 3? simple models???
        """
        if CHKTIME:
            t0 = time()

        try:
            __import__('IMP')
        except ImportError:
            warn('IMP not found, skipping test\n')
            return
        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
        test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv')
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv')
        exp.filter_columns(silent=True)
        exp.normalize_hic(silent=True, factor=None)
        result = exp.optimal_imp_parameters(50, 70, n_cpus=4,
                                            n_models=8, n_keep=2,
                                            lowfreq_range=[-0.6],
                                            upfreq_range=(0, 1.1, 1.1),
                                            maxdist_range=[500, 600],
                                            verbose=False)

        # get best correlations
        config = result.get_best_parameters_dict()
        wanted = {'maxdist': 600.0, 'upfreq': 0.0, 'kforce': 5,
                  'dcutoff': 2,
                  'reference': '', 'lowfreq': -0.6, 'scale': 0.01}
        self.assertEqual([round(i, 4) for i in config.values()if not type(i) is str],
                         [round(i, 4) for i in wanted.values()if not type(i) is str])
        if CHKTIME:
            print '12', time() - t0
Ejemplo n.º 12
0
    def test_07_forbidden_regions(self):
        if ONLY and ONLY != "07":
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000, centromere_search=True)
        test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True)
        # Values with square root normalization.
        # brks = [2.0, 7.0, 12.0, 18.0, 38.0, 43.0, 49.0,
        #        61.0, 66.0, 75.0, 89.0, 94.0, 99.0]
        brks = [3.0, 14.0, 19.0, 33.0, 38.0, 43.0, 49.0, 61.0, 66.0, 71.0, 83.0, 89.0, 94.0, 99.0]
        tads = test_chr.experiments["exp1"].tads
        found = [tads[t]["end"] for t in tads if tads[t]["score"] > 0]
        self.assertEqual(brks, found)
        items1 = test_chr.forbidden.keys(), test_chr.forbidden.values()
        test_chr.add_experiment("exp2", 20000, tad_def=exp3, hic_data=PATH + "/20Kb/chrT/chrT_C.tsv", silent=True)
        items2 = test_chr.forbidden.keys(), test_chr.forbidden.values()
        know1 = ([38, 39], ["Centromere", "Centromere"])
        # know1 = ([32, 33, 34, 38, 39, 19, 20, 21, 22,
        #          23, 24, 25, 26, 27, 28, 29, 30, 31],
        #         [None, None, None, 'Centromere', 'Centromere',
        #          None, None, None, None, None, None, None,
        #          None, None, None, None, None, None])
        know2 = ([38], ["Centromere"])
        self.assertEqual(items1, know1)
        self.assertEqual(items2, know2)
        if CHKTIME:
            print "7", time() - t0
Ejemplo n.º 13
0
    def test_04_chromosome_batch(self):
        if ONLY and ONLY != "04":
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(
            name="Test Chromosome",
            experiment_resolutions=[20000] * 3,
            experiment_hic_data=[
                PATH + "/20Kb/chrT/chrT_A.tsv",
                PATH + "/20Kb/chrT/chrT_D.tsv",
                PATH + "/20Kb/chrT/chrT_C.tsv",
            ],
            experiment_names=["exp1", "exp2", "exp3"],
            silent=True,
        )
        test_chr.find_tad(["exp1", "exp2", "exp3"], batch_mode=True, verbose=False, silent=True)
        tads = test_chr.get_experiment("batch_exp1_exp2_exp3").tads
        found = [tads[t]["end"] for t in tads if tads[t]["score"] > 0]
        # Values obtained with square root normalization.
        # self.assertEqual([3.0, 8.0, 16.0, 21.0, 28.0, 35.0, 43.0,
        #                  49.0, 61.0, 66.0, 75.0, 89.0, 94.0, 99.0], found)
        self.assertEqual([3.0, 14.0, 19.0, 33.0, 43.0, 49.0, 61.0, 66.0, 71.0, 89.0, 94.0, 99.0], found)

        if CHKTIME:
            print "4", time() - t0
Ejemplo n.º 14
0
    def test_08_changing_resolution(self):
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
        test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv',
                                silent=True)
        exp = test_chr.experiments['exp1']
        sum20 = sum(exp.hic_data[0].values())
        exp.set_resolution(80000)
        sum80 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(160000)
        sum160 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(360000)
        sum360 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(2400000)
        sum2400 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(40000)
        sum40 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(20000)
        sum21 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(40000)
        sum41 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        self.assertTrue(sum20 == sum80 == sum160 == sum360 == sum40 \
                        == sum21 == sum2400 == sum41)
        if CHKTIME:
            print '8', time() - t0
Ejemplo n.º 15
0
    def test_03_tad_multi_aligner(self):

        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome', centromere_search=True,
                              experiment_tads=[exp1, exp2, exp3, exp4],
                              experiment_hic_data=[
                                  PATH + '/40Kb/chrT/chrT_A.tsv',
                                  PATH + '/20Kb/chrT/chrT_B.tsv',
                                  PATH + '/20Kb/chrT/chrT_C.tsv',
                                  PATH + '/20Kb/chrT/chrT_D.tsv'],
                              experiment_names=['exp1', 'exp2', 'exp3', 'exp4'],
                              experiment_resolutions=[40000,20000,20000,20000],
                              silent=True)
        for exp in test_chr.experiments:
            exp.normalize_hic(silent=True, factor=None)

        test_chr.align_experiments(verbose=False, randomize=False,
                                   method='global')
        _, (score1, pval1) = test_chr.align_experiments(verbose=False,
                                                        method='global',
                                                        randomize=True, rnd_num=100)
        _, (_, pval2) = test_chr.align_experiments(verbose=False, randomize=True,
                                                   rnd_method='shuffle', rnd_num=100)
        # Values with alignments obtained with square root normalization.
        #self.assertEqual(round(-26.095, 3), round(score1, 3))
        #self.assertEqual(round(0.001, 1), round(pval1, 1))
        #self.assertTrue(abs(0.175 - pval2) < 0.2)
        self.assertEqual(round(-11.002, 3), round(score1, 3))
        self.assertEqual(round(0.001, 1), round(pval1, 1))
        self.assertTrue(abs(0.04 - pval2) < 0.1)
        if CHKTIME:
            print '3', time() - t0
Ejemplo n.º 16
0
def load_genome_from_tad_def(genome_path, res, verbose=False):
    """
    Search, at a given path, for chromosome folders containing TAD
    definitions in tsv files.

    :param genome_path: Path where to search for TADbit chromosomes
    :param res: Resolution at were saved chromosomes
    :param False verbose:

    :returns: a dictionary with all TADbit chromosomes found
    """
    ref_genome = {}
    for crm in listdir(genome_path):
        crm_path = os.path.join(genome_path, crm)
        if not isfile(crm_path):
            continue
        if crm in ref_genome:
            raise Exception('More than 1 TAD definition file found\n')
        crm = crm.replace('.tsv', '').replace('chr', '').upper()
        if verbose:
            print '  Chromosome:', crm
        crmO = Chromosome(crm)
        crmO.add_experiment('sample', res)
        crmO.experiments[0].load_tad_def(crm_path)
        ref_genome[crm] = crmO
    return ref_genome
Ejemplo n.º 17
0
 def test_08_changing_resolution(self):
     test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
     test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                             hic_data='20Kb/chrT/chrT_D.tsv')
     exp = test_chr.experiments['exp1']
     sum20 = sum(exp.hic_data[0])
     exp.set_resolution(80000)
     sum80 = sum(exp.hic_data[0])
     check_hic(exp.hic_data[0], exp.size)
     exp.set_resolution(160000)
     sum160 = sum(exp.hic_data[0])
     check_hic(exp.hic_data[0], exp.size)
     exp.set_resolution(360000)
     sum360 = sum(exp.hic_data[0])
     check_hic(exp.hic_data[0], exp.size)
     exp.set_resolution(2400000)
     sum2400 = sum(exp.hic_data[0])
     check_hic(exp.hic_data[0], exp.size)
     exp.set_resolution(40000)
     sum40 = sum(exp.hic_data[0])
     check_hic(exp.hic_data[0], exp.size)
     exp.set_resolution(20000)
     sum21 = sum(exp.hic_data[0])
     check_hic(exp.hic_data[0], exp.size)
     exp.set_resolution(40000)
     sum41 = sum(exp.hic_data[0])
     check_hic(exp.hic_data[0], exp.size)
     self.assertTrue(sum20 == sum80 == sum160 == sum360 == sum40 \
                     == sum21 == sum2400 == sum41)
Ejemplo n.º 18
0
    def test_09_hic_normalization(self):
        """
        writes interaction pair file.
        """
        if ONLY and not "09" in ONLY:
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
        test_chr.add_experiment("exp1",
                                20000,
                                tad_def=exp4,
                                hic_data=PATH + "/20Kb/chrT/chrT_D.tsv",
                                silent=True)
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True)
        exp.normalize_hic(silent=True)
        exp.get_hic_zscores()
        exp.get_hic_zscores(zscored=False)
        sumz = sum([
            exp._zscores[k1][k2] for k1 in exp._zscores.keys()
            for k2 in exp._zscores[k1]
        ])
        self.assertEqual(round(sumz, 4), round(4059.2877, 4))
        if CHKTIME:
            print "9", time() - t0
Ejemplo n.º 19
0
def main():
    args = getArgs()
    samples = args.i
    output = args.o
    chr = args.c
    ncpu = args.p
    resolution = args.r
    species = args.s
    gbuild = args.b

    # initiate a chromosome object that will store all Hi-C data and analysis
    my_chrom = Chromosome(
        name=chr,  # 染色体名
        centromere_search=True,  # centromereを検出するか
        species=species,
        assembly=gbuild  # genome build
    )
    for sample in samples:
        label, path = sample.split(",")
        print(label)
        print(path)
        getHiCData(my_chrom, output, label, path, resolution, ncpu)


#    if not os.path.exists('tdb'):
#       os.makedirs("tdb")

    my_chrom.save_chromosome(output + ".tdb", force=True)
Ejemplo n.º 20
0
    def test_08_changing_resolution(self):
        if ONLY and ONLY != "08":
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
        test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True)
        exp = test_chr.experiments["exp1"]
        sum20 = sum(exp.hic_data[0].values())
        exp.set_resolution(80000)
        sum80 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(160000)
        sum160 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(360000)
        sum360 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(2400000)
        sum2400 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(40000)
        sum40 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(20000)
        sum21 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        exp.set_resolution(40000)
        sum41 = sum(exp.hic_data[0].values())
        check_hic(exp.hic_data[0], exp.size)
        self.assertTrue(sum20 == sum80 == sum160 == sum360 == sum40 == sum21 == sum2400 == sum41)
        if CHKTIME:
            print "8", time() - t0
Ejemplo n.º 21
0
def main():
    """
    main function
    """

    opts = get_options()
    crm = Chromosome(':P')

    for i, data in enumerate(opts.data):
        crm.add_experiment('exp' + str(i), resolution=int(opts.resolution[i]),
                           hic_data=data)
        crm.experiments['exp' + str(i)].normalize_hic()

    if len(opts.data) > 1:
        exp = crm.experiments[0] + crm.experiments[1]
        for i in range(2, len(opts.data)):
            exp += crm.experiments[i]
    else:
        exp = crm.experiments[0]

    if opts.abc:
        exp.write_interaction_pairs(opts.output, normalized=opts.norm,
                                    zscored=False)
    else:
        if type(opts.output) == file:
            out = opts.output
        else:
            out = open(opts.output, 'w')
        out.write(exp.print_hic_matrix(print_it=False,
                                       normalized=opts.norm))
Ejemplo n.º 22
0
    def test_06_tad_clustering(self):
        if ONLY and ONLY != '06':
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome',
                              experiment_tads=[exp4],
                              experiment_names=['exp1'],
                              experiment_hic_data=[
                                  PATH + '/20Kb/chrT/chrT_D.tsv'],
                              experiment_resolutions=[20000,20000],
                              silent=True)
        all_tads = []
        for _, tad in test_chr.iter_tads('exp1', normed=False):
            all_tads.append(tad)
        #align1, align2, _ = optimal_cmo(all_tads[7], all_tads[10], 7,
        #                                method='score')
        align1, align2, _ = optimal_cmo(all_tads[1], all_tads[3], 7,
                                        method='score')
        # Values with square root normalization.
        #self.assertEqual(align1, [0, 1, '-', 2, 3, '-', 4, 5, 6, 7, 8, 9, 10])
        #self.assertEqual(align2,[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
        self.assertEqual(align1, [0, 1, 2, '-', '-', 3, 4, 5, 6, 7, 8, '-', 9])
        self.assertEqual(align2, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
        if CHKTIME:
            print '6', time() - t0
Ejemplo n.º 23
0
    def test_06_tad_clustering(self):
        if ONLY and not "06" in ONLY:
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(
            name="Test Chromosome",
            experiment_tads=[exp4],
            experiment_names=["exp1"],
            experiment_hic_data=[PATH + "/20Kb/chrT/chrT_D.tsv"],
            experiment_resolutions=[20000, 20000],
            silent=True)
        all_tads = []
        for _, tad in test_chr.iter_tads("exp1", normed=False):
            all_tads.append(tad)
        #align1, align2, _ = optimal_cmo(all_tads[7], all_tads[10], 7,
        #                                method="score")
        align1, align2, _ = optimal_cmo(all_tads[1],
                                        all_tads[3],
                                        7,
                                        method="score")
        # Values with square root normalization.
        #self.assertEqual(align1, [0, 1, "-", 2, 3, "-", 4, 5, 6, 7, 8, 9, 10])
        #self.assertEqual(align2,[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
        self.assertEqual(align1, [0, 1, 2, "-", "-", 3, 4, 5, 6, 7, 8, "-", 9])
        self.assertEqual(align2, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
        if CHKTIME:
            print "6", time() - t0
Ejemplo n.º 24
0
    def test_04_chromosome_batch(self):
        if ONLY and ONLY != '04':
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome',
                              experiment_resolutions=[20000]*3,
                              experiment_hic_data=[
                                  PATH + '/20Kb/chrT/chrT_A.tsv',
                                  PATH + '/20Kb/chrT/chrT_D.tsv',
                                  PATH + '/20Kb/chrT/chrT_C.tsv'],
                              experiment_names=['exp1', 'exp2', 'exp3'],
                              silent=True)
        test_chr.find_tad(['exp1', 'exp2', 'exp3'], batch_mode=True,
                          verbose=False, silent=True)
        tads = test_chr.get_experiment('batch_exp1_exp2_exp3').tads
        found = [tads[t]['end'] for t in tads if tads[t]['score'] > 0]
        # Values obtained with square root normalization.
        #self.assertEqual([3.0, 8.0, 16.0, 21.0, 28.0, 35.0, 43.0,
        #                  49.0, 61.0, 66.0, 75.0, 89.0, 94.0, 99.0], found)
        self.assertEqual([3.0, 14.0, 19.0, 33.0, 43.0, 49.0, 61.0, 66.0,
                           71.0, 89.0, 94.0, 99.0], found)
        
        if CHKTIME:
            print '4', time() - t0
Ejemplo n.º 25
0
    def test_13_3d_modelling_centroid(self):
        """
        quick test to generate 3D coordinates from 3? simple models???
        """
        if ONLY and "13" not in ONLY:
            return
        if CHKTIME:
            t0 = time()

        try:
            __import__("IMP")
        except ImportError:
            warn("IMP not found, skipping test\n")
            return
        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
        test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True)
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True)
        exp.filter_columns(silent=True)
        exp.normalize_hic(silent=True, factor=None)
        models = exp.model_region(
            51,
            71,
            ncopies=4,
            n_models=10,
            n_keep=10,
            n_cpus=10,
            # verbose=3,
            config={"kforce": 5, "maxdist": 500, "scale": 0.01, "upfreq": 0.5, "lowfreq": -0.5},
        )
        models.save_models("models.pick")

        avg = models.average_model()
        nmd = len(models)
        print "I'm here test 13"
Ejemplo n.º 26
0
    def test_11_write_interaction_pairs(self):
        if ONLY and not "11" in ONLY:
            return
        """
        writes interaction pair file.
        """
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
        test_chr.add_experiment("exp1",
                                20000,
                                tad_def=exp4,
                                hic_data=PATH + "/20Kb/chrT/chrT_D.tsv")
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True)
        exp.filter_columns(silent=True)
        exp.normalize_hic(factor=1, silent=True)
        exp.get_hic_zscores(zscored=False)
        exp.write_interaction_pairs("lala")
        with open("lala") as f_lala:
            lines = f_lala.readlines()
        self.assertEqual(len(lines), 4674)
        self.assertAlmostEqual(float(lines[25].split('\t')[2]),
                               0.5852295196345679)
        self.assertAlmostEqual(float(lines[2000].split('\t')[2]),
                               0.07060448846960976)
        system("rm -f lala")
        if CHKTIME:
            print("11", time() - t0)
Ejemplo n.º 27
0
    def test_04_chromosome_batch(self):
        if ONLY and not "04" in ONLY:
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name="Test Chromosome",
                              experiment_resolutions=[20000] * 3,
                              experiment_hic_data=[
                                  PATH + "/20Kb/chrT/chrT_A.tsv",
                                  PATH + "/20Kb/chrT/chrT_D.tsv",
                                  PATH + "/20Kb/chrT/chrT_C.tsv"
                              ],
                              experiment_names=["exp1", "exp2", "exp3"],
                              silent=True)
        test_chr.find_tad(["exp1", "exp2", "exp3"],
                          batch_mode=True,
                          verbose=False,
                          silent=True)
        tads = test_chr.get_experiment("batch_exp1_exp2_exp3").tads
        found = [tads[t]["end"] for t in tads if tads[t]["score"] > 0]
        # Values obtained with square root normalization.
        #self.assertEqual([3.0, 8.0, 16.0, 21.0, 28.0, 35.0, 43.0,
        #                  49.0, 61.0, 66.0, 75.0, 89.0, 94.0, 99.0], found)
        self.assertEqual([
            3.0, 14.0, 19.0, 33.0, 43.0, 49.0, 61.0, 66.0, 71.0, 89.0, 94.0,
            99.0
        ], found)

        if CHKTIME:
            print "4", time() - t0
Ejemplo n.º 28
0
    def test_12_3d_modelling_optimization(self):
        """
        quick test to generate 3D coordinates from 3? simple models???
        """
        if ONLY and "12" not in ONLY:
            return
        if CHKTIME:
            t0 = time()

        try:
            __import__("IMP")
        except ImportError:
            warn("IMP not found, skipping test\n")
            return
        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
        test_chr.add_experiment(
            "exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv"
        )  # norm_data para dar directamente la matrix normalizada
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv")
        exp.filter_columns(silent=True)
        exp.normalize_hic(silent=True, factor=None)
        result = exp.optimal_imp_parameters(
            50,
            70,
            ncopies=4,
            n_cpus=1,  # It can be that this function requires also the raw hic_data matrix
            n_models=8,
            n_keep=2,
            lowfreq_range=[-0.6],
            upfreq_range=(0, 1.1, 1.1),
            maxdist_range=[500, 600],
            verbose=True,
        )

        # get best correlations
        config = result.get_best_parameters_dict()

        # Save the models and the contact map
        # result.save_model or result.save_data
        # result.write_cmm to visualize the best models
        # result.write_xyz to visualize the best models

        wanted = {
            "maxdist": 600.0,
            "upfreq": 0.0,
            "kforce": 5,
            "dcutoff": 2,
            "reference": "",
            "lowfreq": -0.6,
            "scale": 0.01,
        }
        self.assertEqual(
            [round(i, 4) for i in config.values() if not type(i) is str],
            [round(i, 4) for i in wanted.values() if not type(i) is str],
        )
        if CHKTIME:
            print "12", time() - t0
Ejemplo n.º 29
0
 def test_05_save_load(self):
     test_chr = Chromosome(name='Test Chromosome',
                           experiment_tads=[exp1, exp2],
                           experiment_names=['exp1', 'exp2'],
                           experiment_resolutions=[20000,20000])
     test_chr.save_chromosome('lolo', force=True)
     test_chr = load_chromosome('lolo')
     system('rm -f lolo')
     system('rm -f lolo_hic')
Ejemplo n.º 30
0
    def test_13_3d_modelling_centroid(self):  #model with no optimisation
        """
        quick test to generate 3D coordinates from 3? simple models???
        """
        if ONLY and ONLY != '13':
            return
        if CHKTIME:
            t0 = time()

        try:
            __import__('IMP')
        except ImportError:
            warn('IMP not found, skipping test\n')
            return
        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
        test_chr.add_experiment('exp1',
                                20000,
                                tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv',
                                silent=True)
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True)
        exp.filter_columns(silent=True)
        exp.normalize_hic(silent=True, factor=None)
        models = exp.model_region(51,
                                  71,
                                  n_models=40,
                                  n_keep=25,
                                  n_cpus=4,
                                  config={
                                      'kforce': 5,
                                      'maxdist': 500,
                                      'scale': 0.01,
                                      'upfreq': 1.0,
                                      'lowfreq': -0.6
                                  })
        models.save_models('models.pick')

        avg = models.average_model()
        nmd = len(models)
        dev = rmsdRMSD_wrapper([models[m]['x']
                                for m in xrange(nmd)] + [avg['x']],
                               [models[m]['y']
                                for m in xrange(nmd)] + [avg['y']],
                               [models[m]['z']
                                for m in xrange(nmd)] + [avg['z']],
                               models._zeros, models.nloci, 200,
                               range(len(models) + 1),
                               len(models) + 1, int(False), 'rmsd', 0)
        centroid = models[models.centroid_model()]
        # find closest
        model = min([(k, dev[(k, nmd)]) for k in range(nmd)],
                    key=lambda x: x[1])[0]
        self.assertEqual(centroid['rand_init'], models[model]['rand_init'])
        if CHKTIME:
            print '13', time() - t0
Ejemplo n.º 31
0
    def test_12_3d_modelling_optimization(self):
        """
        quick test to generate 3D coordinates from 3? simple models???
        """
        if ONLY and not "12" in ONLY:
            return
        if CHKTIME:
            t0 = time()

        try:
            __import__("IMP")
        except ImportError:
            warn("IMP not found, skipping test\n")
            return
        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
        test_chr.add_experiment("exp1",
                                20000,
                                tad_def=exp4,
                                hic_data=PATH + "/20Kb/chrT/chrT_D.tsv")
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv")
        exp.filter_columns(silent=True)
        exp.normalize_hic(silent=True, factor=None)
        result = exp.optimal_imp_parameters(
            50,
            70,
            n_cpus=4,
            n_models=8,
            n_keep=2,
            lowfreq_range=[-0.6],
            upfreq_range=(0, 1.1,
                          1.1),  #from 0 till 1.1 in step of 1.1 with ()
            maxdist_range=[500, 600],  # it will use 500 and 600 with []
            verbose=False)

        # get best correlations
        config = result.get_best_parameters_dict()  #dict with parameters
        wanted = {
            "maxdist": 600.0,
            "upfreq": 0.0,
            "kforce": 5,
            "dcutoff": 2,
            "reference": "",
            "lowfreq": -0.6,
            "scale": 0.01
        }

        self.assertEqual([
            round(config[i], 4)
            for i in list(config.keys()) if not type(i) is str
        ], [
            round(config[i], 4)
            for i in list(wanted.keys()) if not type(i) is str
        ])
        if CHKTIME:
            print("12", time() - t0)
Ejemplo n.º 32
0
    def _sub_experiment_zscore(self, start, end):
        """
        Get the z-score of a sub-region of an  experiment.

        TODO: find a nicer way to do this...

        :param start: first bin to model (bin number)
        :param end: first bin to model (bin number)

        :returns: z-score and raw values of the experiment
        """
        if self._normalization != 'visibility':
            warn('WARNING: normalizing according to visibility method')
            self.normalize_hic(method='visibility')
        from pytadbit import Chromosome
        matrix = self.get_hic_matrix()
        end += 1
        new_matrix = [[] for _ in range(end - start)]
        for i in xrange(start, end):
            for j in xrange(start, end):
                new_matrix[i - start].append(matrix[i][j])

        tmp = Chromosome('tmp')
        tmp.add_experiment('exp1',
                           hic_data=[new_matrix],
                           resolution=self.resolution,
                           filter_columns=False)
        exp = tmp.experiments[0]
        # We want the weights and zeros calculated in the full chromosome
        siz = self.size
        exp.norm = [[
            self.norm[0][i + siz * j] for i in xrange(start, end)
            for j in xrange(start, end)
        ]]
        exp._zeros = dict([(z - start, None) for z in self._zeros
                           if start <= z <= end])
        if len(exp._zeros) == (end + 1 - start):
            raise Exception('ERROR: no interaction found in selected regions')
        # ... but the z-scores in this particular region
        exp.get_hic_zscores(remove_zeros=True)
        values = [[float('nan') for _ in xrange(exp.size)]
                  for _ in xrange(exp.size)]
        for i in xrange(exp.size):
            # zeros are rows or columns having a zero in the diagonal
            if i in exp._zeros:
                continue
            for j in xrange(i + 1, exp.size):
                if j in exp._zeros:
                    continue
                if (not exp.hic_data[0][i * exp.size + j]
                        or not exp.hic_data[0][i * exp.size + j]):
                    continue
                values[i][j] = exp.norm[0][i * exp.size + j]
                values[j][i] = exp.norm[0][i * exp.size + j]
        return exp._zscores, values
Ejemplo n.º 33
0
 def test_09_hic_normalization(self):
     """
     TODO: check with Davide's script
     """
     test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
     test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                             hic_data='20Kb/chrT/chrT_D.tsv')
     exp = test_chr.experiments[0]
     exp.load_experiment('20Kb/chrT/chrT_A.tsv')
     exp.get_hic_zscores()
     exp.get_hic_zscores(zscored=False)
Ejemplo n.º 34
0
 def test_10_generate_weights(self):
     """
     method names are: 'sqrt' or 'over_tot'
     """
     test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
     test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                             hic_data='20Kb/chrT/chrT_D.tsv')
     exp = test_chr.experiments[0]
     tadbit_weigths = exp.norm[:]
     exp.norm = None
     exp.normalize_hic()
     self.assertEqual(tadbit_weigths[0], exp.norm[0])
Ejemplo n.º 35
0
 def test_10_generate_weights(self):
     """
     TODO: using Francois' formula
     method names are: 'sqrt' or 'over_tot'
     """
     test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
     test_chr.add_experiment('exp1', 20000, tad_handler=exp4,
                             xp_handler='20Kb/chrT/chrT_D.tsv')
     exp = test_chr.experiments[0]
     tadbit_weigths = exp.wght[:]
     exp.wght = None
     exp.normalize_hic()
     self.assertEqual(tadbit_weigths[0], exp.wght[0])
Ejemplo n.º 36
0
 def test_04_chromosome_batch(self):
     test_chr = Chromosome(name='Test Chromosome',
                           experiment_resolutions=[20000]*3,
                           experiment_hic_data=['20Kb/chrT/chrT_A.tsv',
                                                '20Kb/chrT/chrT_D.tsv',
                                                '20Kb/chrT/chrT_C.tsv'],
                           experiment_names=['exp1', 'exp2', 'exp3'])
     test_chr.find_tad(['exp1', 'exp2', 'exp3'], batch_mode=True,
                       verbose=False)
     tads = test_chr.get_experiment('batch_exp1_exp2_exp3').tads
     found = [tads[t]['end'] for t in tads if tads[t]['score'] > 0]
     self.assertEqual([3.0, 8.0, 16.0, 21.0, 28.0, 35.0, 43.0,
                       49.0, 61.0, 66.0, 75.0, 89.0, 99.0], found)
Ejemplo n.º 37
0
 def test_06_tad_clustering(self):
     test_chr = Chromosome(name='Test Chromosome',
                           experiment_tads=[exp4],
                           experiment_names=['exp1'],
                           experiment_hic_data=['20Kb/chrT/chrT_D.tsv'],
                           experiment_resolutions=[20000,20000])
     all_tads = []
     for _, tad in test_chr.iter_tads('exp1'):
         all_tads.append(tad)
     align1, align2, _ = optimal_cmo(all_tads[7], all_tads[10], 7,
                                     method='score')
     self.assertEqual(align1, [0, 1, '-', 2, 3, '-', 4, 5, 6, 7, 8, 9, 10])
     self.assertEqual(align2,[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
Ejemplo n.º 38
0
    def _sub_experiment_zscore(self, start, end):
        """
        Get the z-score of a sub-region of an  experiment.

        TODO: find a nicer way to do this...

        :param start: first bin to model (bin number)
        :param end: first bin to model (bin number)

        :returns: z-score and raw values of the experiment
        """
        if self._normalization != 'visibility':
            warn('WARNING: normalizing according to visibility method')
            self.normalize_hic(method='visibility')
        from pytadbit import Chromosome
        matrix = self.get_hic_matrix()
        end += 1
        new_matrix = [[] for _ in range(end-start)]
        for i in xrange(start, end):
            for j in xrange(start, end):
                new_matrix[i - start].append(matrix[i][j])
                
        tmp = Chromosome('tmp')
        tmp.add_experiment('exp1', hic_data=[new_matrix],
                           resolution=self.resolution, filter_columns=False)
        exp = tmp.experiments[0]
        # We want the weights and zeros calculated in the full chromosome
        siz = self.size
        exp.norm = [[self.norm[0][i + siz * j] for i in xrange(start, end)
                     for j in xrange(start, end)]]
        exp._zeros = dict([(z - start, None) for z in self._zeros
                           if start <= z <= end])
        if len(exp._zeros) == (end + 1 - start):
            raise Exception('ERROR: no interaction found in selected regions')
        # ... but the z-scores in this particular region
        exp.get_hic_zscores(remove_zeros=True)
        values = [[float('nan') for _ in xrange(exp.size)]
                  for _ in xrange(exp.size)]
        for i in xrange(exp.size):
            # zeros are rows or columns having a zero in the diagonal
            if i in exp._zeros:
                continue
            for j in xrange(i + 1, exp.size):
                if j in exp._zeros:
                    continue
                if (not exp.hic_data[0][i * exp.size + j] 
                    or not exp.hic_data[0][i * exp.size + j]):
                    continue
                values[i][j] = exp.norm[0][i * exp.size + j]
                values[j][i] = exp.norm[0][i * exp.size + j]
        return exp._zscores, values
Ejemplo n.º 39
0
    def test_03_tad_multi_aligner(self):
        test_chr = Chromosome(name='Test Chromosome',
                              tad_handlers=[exp1, exp2, exp3, exp4],
                              experiment_names=['exp1', 'exp2', 'exp3', 'exp4'],
                              experiment_resolutions=[40000,20000,20000,20000])

        test_chr.align_experiments(verbose=False, randomize=False,method='global')
        score1, pval1 = test_chr.align_experiments(verbose=False,method='global',
                                                   randomize=True)
        _, pval2 = test_chr.align_experiments(verbose=False, randomize=True,
                                              rnd_method='shuffle')
        self.assertEqual(round(-26.095, 3), round(score1, 3))
        self.assertEqual(round(0.001, 1), round(pval1, 1))
        self.assertTrue(abs(0.175 - pval2) < 0.2)
Ejemplo n.º 40
0
def load_experiments(opts):
    crm = Chromosome(opts.crm)
    for i, xpr in enumerate(opts.hic_files):
        if opts.exp_names:
            name = opts.exp_names[i]
        else:
            name = ''.join(xpr.split('/')[-1].split('.')[:-1])
        if opts.verbose:
            print ' Reading Hi-C datafile #%s (%s)' % (i+1, name)
        crm.add_experiment(name, hic_data=xpr,
                           resolution=int(opts.resolution))
        if opts.verbose:
            print '     loaded as: %s\n' % (crm.experiments[name])
    return crm
Ejemplo n.º 41
0
 def test_tad_multi_aligner(self):
     exp1 = tadbit("chrT/chrT_A.tsv", max_tad_size="auto", verbose=False, no_heuristic=False)
     exp2 = tadbit("chrT/chrT_B.tsv", max_tad_size="auto", verbose=False, no_heuristic=False)
     exp3 = tadbit("chrT/chrT_C.tsv", max_tad_size="auto", verbose=False, no_heuristic=False)
     exp4 = tadbit("chrT/chrT_D.tsv", max_tad_size="auto", verbose=False, no_heuristic=False)
     test_chr = Chromosome(
         name="Test Chromosome",
         resolution=20000,
         experiments=[exp1, exp2, exp3, exp4],
         experiment_names=["exp1", "exp2", "exp3", "exp4"],
     )
     score, pval = test_chr.align_experiments(verbose=False, randomize=True)
     self.assertEqual(round(19.555803, 3), round(score, 3))
     self.assertEqual(round(0.4, 1), round(pval, 1))
Ejemplo n.º 42
0
    def test_05_save_load(self):
        if CHKTIME:
            t0 = time()

        test_chr1 = Chromosome(name='Test Chromosome',
                               experiment_tads=[exp1, exp2],
                               experiment_names=['exp1', 'exp2'],
                               experiment_resolutions=[20000,20000],
                               silent=True)
        test_chr1.save_chromosome('lolo', force=True)
        test_chr2 = load_chromosome('lolo')
        system('rm -f lolo')
        system('rm -f lolo_hic')
        self.assertEqual(str(test_chr1.__dict__), str(test_chr2.__dict__))
        if CHKTIME:
            print '5', time() - t0
Ejemplo n.º 43
0
    def test_05_save_load(self):
        if CHKTIME:
            t0 = time()

        test_chr1 = Chromosome(name='Test Chromosome',
                               experiment_tads=[exp1, exp2],
                               experiment_names=['exp1', 'exp2'],
                               experiment_resolutions=[20000, 20000],
                               silent=True)
        test_chr1.save_chromosome('lolo', force=True)
        test_chr2 = load_chromosome('lolo')
        system('rm -f lolo')
        system('rm -f lolo_hic')
        self.assertEqual(str(test_chr1.__dict__), str(test_chr2.__dict__))
        if CHKTIME:
            print '5', time() - t0
Ejemplo n.º 44
0
    def test_13_3d_modelling_centroid(self):
        """
        quick test to generate 3D coordinates from 3? simple models???
        """
        if ONLY and ONLY != '13':
            return
        if CHKTIME:
            t0 = time()

        try:
            __import__('IMP')
        except ImportError:
            warn('IMP not found, skipping test\n')
            return
        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
        test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv',
                                silent=True)
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True)
        exp.filter_columns(silent=True)
        exp.normalize_hic(silent=True, factor=None)
        models = exp.model_region(51, 71, n_models=40, n_keep=25,
                                  n_cpus=4,
                                  config={'kforce': 5, 'maxdist': 500,
                                          'scale': 0.01,
                                          'upfreq': 1.0, 'lowfreq': -0.6})
        models.save_models('models.pick')

        avg = models.average_model()
        nmd = len(models)
        dev = rmsdRMSD_wrapper(
            [models[m]['x'] for m in xrange(nmd)] + [avg['x']],
            [models[m]['y'] for m in xrange(nmd)] + [avg['y']],
            [models[m]['z'] for m in xrange(nmd)] + [avg['z']],
            models._zeros,
            models.nloci, 200, range(len(models)+1),
            len(models)+1, int(False), 'rmsd', 0)
        centroid = models[models.centroid_model()]
        # find closest
        model = min([(k, dev[(k, nmd)] )
                     for k in range(nmd)], key=lambda x: x[1])[0]
        self.assertEqual(centroid['rand_init'], models[model]['rand_init'])
        if CHKTIME:
            print '13', time() - t0
Ejemplo n.º 45
0
    def test_05_save_load(self):
        if ONLY and not "05" in ONLY:
            return
        if CHKTIME:
            t0 = time()

        test_chr1 = Chromosome(name="Test Chromosome",
                               experiment_tads=[exp1, exp2],
                               experiment_names=["exp1", "exp2"],
                               experiment_resolutions=[20000, 20000],
                               silent=True)
        test_chr1.save_chromosome("lolo", force=True)
        test_chr2 = load_chromosome("lolo")
        system("rm -f lolo")
        system("rm -f lolo_hic")
        self.assertEqual(str(test_chr1.__dict__), str(test_chr2.__dict__))
        if CHKTIME:
            print "5", time() - t0
Ejemplo n.º 46
0
def main():

    opts, params = get_options()
    if opts.inabc:
        zscores = parse_zscores(opts.inabc)
        models = generate_3d_models(zscores,
                                    opts.resolution,
                                    start=1,
                                    n_models=opts.nmodels,
                                    n_keep=opts.nkeep,
                                    n_cpus=opts.ncpus,
                                    keep_all=False,
                                    verbose=False,
                                    outfile=None,
                                    config=params)

    else:
        crm = 'crm'
        xnam = 'X'
        crmbit = Chromosome(crm)
        crmbit.add_experiment(xnam,
                              resolution=opts.resolution,
                              xp_handler=opts.incrm)
        exp = crmbit.experiments[xnam]
        models = exp.model_region(start=opts.start,
                                  end=opts.end,
                                  n_models=opts.nmodels,
                                  n_keep=opts.nkeep,
                                  n_cpus=opts.ncpus,
                                  keep_all=False,
                                  verbose=False,
                                  config=params)

    if opts.save:
        models.save_models('%s/models_%s_%s.pik' %
                           (opts.out, opts.start, opts.start + opts.nmodels))
    for i in xrange(int(opts.cmm)):
        models.write_cmm(i, opts.out)

    if opts.full_report:

        models.cluster_models(dcutoff=200)
        models.cluster_analysis_dendrogram(n_best_clusters=10)
        models.model_consistency()
Ejemplo n.º 47
0
 def test_10_compartments(self):
     """
     """
     if ONLY and not "10" in ONLY:
         return
     if CHKTIME:
         t0 = time()
     test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
     test_chr.add_experiment("exp1", 20000, tad_def=exp4,
                             hic_data=PATH + "/20Kb/chrT/chrT_D.tsv",
                             silent=True)
     exp = test_chr.experiments[0]
     exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True)
     hic_data = exp.hic_data[0]
     hic_data.find_compartments(label_compartments="cluster")
     self.assertEqual(len(hic_data.compartments[None]), 39)
     # self.assertEqual(round(hic_data.compartments[None][24]["dens"], 5),
     #                  0.75434)
     if CHKTIME:
         print "10", time() - t0
Ejemplo n.º 48
0
 def test_10_compartments(self):
     """
     """
     if ONLY and ONLY != '10':
         return
     if CHKTIME:
         t0 = time()
     test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000)
     test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                             hic_data=PATH + '/20Kb/chrT/chrT_D.tsv',
                             silent=True)
     exp = test_chr.experiments[0]
     exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True)
     hic_data = exp.hic_data[0]
     hic_data.find_compartments(label_compartments='cluster')
     self.assertEqual(len(hic_data.compartments[None]), 39)
     # self.assertEqual(round(hic_data.compartments[None][24]['dens'], 5),
     #                  0.75434)
     if CHKTIME:
         print '10', time() - t0
Ejemplo n.º 49
0
def main():
    """
    main function
    """
    # retieve HOX genes

    distmatrix, geneids = get_genes()
    # compute TADs for human chromosome 19
    test_chr = Chromosome(name='Test Chromosome')
    test_chr.add_experiment('exp1',
                            100000,
                            xp_handler=PATH +
                            'HIC_gm06690_chr19_chr19_100000_obs.txt')
    test_chr.find_tad(['exp1'])
    exp = test_chr.experiments['exp1']
    clust = linkage(distmatrix['19'])
    cl_idx = list(fcluster(clust, t=1, criterion='inconsistent'))
    print max(cl_idx), 'clusters'
    cluster = [[] for _ in xrange(1, max(cl_idx) + 1)]
    for i, j in enumerate(cl_idx):
        cluster[j - 1].append(geneids['19'][i][1])
    for i, _ in enumerate(cluster):
        cluster[i] = min(cluster[i]), max(cluster[i])
    tad_breaker(exp.tads,
                cluster,
                exp.resolution,
                show_plot=True,
                bins=5,
                title='Proportion of HOX genes according to position in a TAD')
Ejemplo n.º 50
0
    def test_07_forbidden_regions(self):
        if ONLY and ONLY != '07':
            return
        if CHKTIME:
            t0 = time()

        test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000,
                              centromere_search=True,)
        test_chr.add_experiment('exp1', 20000, tad_def=exp4,
                                hic_data=PATH + '/20Kb/chrT/chrT_D.tsv',
                                silent=True)
        # Values with square root normalization.
        #brks = [2.0, 7.0, 12.0, 18.0, 38.0, 43.0, 49.0,
        #        61.0, 66.0, 75.0, 89.0, 94.0, 99.0]
        brks = [3.0, 14.0, 19.0, 33.0, 38.0, 43.0, 49.0, 61.0,
                  66.0, 71.0, 83.0, 89.0, 94.0, 99.0]
        tads = test_chr.experiments['exp1'].tads
        found = [tads[t]['end'] for t in tads if tads[t]['score'] > 0]
        self.assertEqual(brks, found)
        items1 = test_chr.forbidden.keys(), test_chr.forbidden.values()
        test_chr.add_experiment('exp2', 20000, tad_def=exp3,
                                hic_data=PATH + '/20Kb/chrT/chrT_C.tsv',
                                silent=True)
        items2 = test_chr.forbidden.keys(), test_chr.forbidden.values()
        know1 = ([38, 39], ['Centromere', 'Centromere'])
        #know1 = ([32, 33, 34, 38, 39, 19, 20, 21, 22,
        #          23, 24, 25, 26, 27, 28, 29, 30, 31],
        #         [None, None, None, 'Centromere', 'Centromere',
        #          None, None, None, None, None, None, None,
        #          None, None, None, None, None, None])
        know2 = ([38], ['Centromere'])
        self.assertEqual(items1, know1)
        self.assertEqual(items2, know2)
        if CHKTIME:
            print '7', time() - t0