def main(): opts, params = get_options() if opts.inabc: zscores = parse_zscores(opts.inabc) models = generate_3d_models(zscores, opts.resolution, start=1, n_models=opts.nmodels, n_keep=opts.nkeep, n_cpus=opts.ncpus, keep_all=False, verbose=False, outfile=None, config=params) else: crm = 'crm' xnam = 'X' crmbit=Chromosome(crm) crmbit.add_experiment(xnam, resolution=opts.resolution, xp_handler=opts.incrm) exp = crmbit.experiments[xnam] models = exp.model_region(start=opts.start, end=opts.end, n_models=opts.nmodels, n_keep=opts.nkeep, n_cpus=opts.ncpus, keep_all=False, verbose=False, config=params) if opts.save: models.save_models('%s/models_%s_%s.pik' % (opts.out, opts.start, opts.start + opts.nmodels)) for i in xrange(int(opts.cmm)): models.write_cmm(i, opts.out) if opts.full_report: models.cluster_models(dcutoff=200) models.cluster_analysis_dendrogram(n_best_clusters=10) models.model_consistency()
def load_hic_data(opts, xnames): """ Load Hi-C data """ # Start reading the data crm = Chromosome(opts.crm, species=( opts.species.split('_')[0].capitalize() + opts.species.split('_')[1] if '_' in opts.species else opts.species), centromere_search=opts.centromere, assembly=opts.assembly) # Create chromosome object # Load three different experimental data sets named TR1, TR2 and BR. # Data obtained from Hou et al (2012) Molecular Cell. # doi:10.1016/j.molcel.2012.08.031 logging.info("\tReading input data...") for xnam, xpath, xnorm in zip(xnames, opts.data, opts.norm): crm.add_experiment( xnam, exp_type='Hi-C', enzyme=opts.enzyme, cell_type=opts.cell, identifier=opts.identifier, # general descriptive fields project=opts.project, # user descriptions resolution=opts.res, hic_data=xpath, norm_data=xnorm) if not xnorm: logging.info("\tNormalizing HiC data of %s..." % xnam) crm.experiments[xnam].normalize_hic(iterations=5) if opts.beg > crm.experiments[-1].size: raise Exception('ERROR: beg parameter is larger than chromosome size.') if opts.end > crm.experiments[-1].size: logging.info('WARNING: end parameter is larger than chromosome ' + 'size. Setting end to %s.\n' % (crm.experiments[-1].size * opts.res)) opts.end = crm.experiments[-1].size return crm
def test_11_write_interaction_pairs(self): if ONLY and not "11" in ONLY: return """ writes interaction pair file. """ if CHKTIME: t0 = time() test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv") exp = test_chr.experiments[0] exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True) exp.filter_columns(silent=True) exp.normalize_hic(factor=None, silent=True) exp.get_hic_zscores(zscored=False) exp.write_interaction_pairs("lala") lines = open("lala").readlines() self.assertEqual(len(lines), 4674) self.assertEqual(lines[25], "1\t28\t0.612332461036\n") self.assertEqual(lines[2000], "26\t70\t0.0738742984321\n") system("rm -f lala") if CHKTIME: print "11", time() - t0
def test_11_write_interaction_pairs(self): if ONLY and ONLY != '11': return """ writes interaction pair file. """ if CHKTIME: t0 = time() test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv') exp = test_chr.experiments[0] exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True) exp.filter_columns(silent=True) exp.normalize_hic(factor=None, silent=True) exp.get_hic_zscores(zscored=False) exp.write_interaction_pairs('lala') lines = open('lala').readlines() self.assertEqual(len(lines), 4674) self.assertEqual(lines[25], '1\t28\t0.612332461036\n') self.assertEqual(lines[2000], '26\t70\t0.0738742984321\n') system('rm -f lala') if CHKTIME: print '11', time() - t0
def main(): matrix_path = sys.argv[1] config_string = sys.argv[2] compute_keep = sys.argv[3] uf, lf, md = config_string.split(':') lf = float(lf) uf = float(uf) md = int (md) config = {'reference' : '', 'kforce' : 5, 'maxdist' : md, 'upfreq' : uf, 'lowfreq' : lf, 'scale' : 0.01, 'kbending' : 0.0, } compute, keep = map(int, compute_keep.split(':')) chrom = Chromosome('chr') chrom.add_experiment('sample', norm_data=matrix_path, resolution=15000) exp = chrom.experiments[0] models = exp.model_region(n_models=compute, n_keep=keep, n_cpus=8, config=config) models.save_models('models_%s.pickle' % (config_string))
def test_04_chromosome_batch(self): if ONLY and ONLY != '04': return if CHKTIME: t0 = time() test_chr = Chromosome(name='Test Chromosome', experiment_resolutions=[20000]*3, experiment_hic_data=[ PATH + '/20Kb/chrT/chrT_A.tsv', PATH + '/20Kb/chrT/chrT_D.tsv', PATH + '/20Kb/chrT/chrT_C.tsv'], experiment_names=['exp1', 'exp2', 'exp3'], silent=True) test_chr.find_tad(['exp1', 'exp2', 'exp3'], batch_mode=True, verbose=False, silent=True) tads = test_chr.get_experiment('batch_exp1_exp2_exp3').tads found = [tads[t]['end'] for t in tads if tads[t]['score'] > 0] # Values obtained with square root normalization. #self.assertEqual([3.0, 8.0, 16.0, 21.0, 28.0, 35.0, 43.0, # 49.0, 61.0, 66.0, 75.0, 89.0, 94.0, 99.0], found) self.assertEqual([3.0, 14.0, 19.0, 33.0, 43.0, 49.0, 61.0, 66.0, 71.0, 89.0, 94.0, 99.0], found) if CHKTIME: print '4', time() - t0
def test_06_tad_clustering(self): if CHKTIME: t0 = time() test_chr = Chromosome(name='Test Chromosome', experiment_tads=[exp4], experiment_names=['exp1'], experiment_hic_data=[ PATH + '/20Kb/chrT/chrT_D.tsv'], experiment_resolutions=[20000,20000], silent=True) all_tads = [] for _, tad in test_chr.iter_tads('exp1'): all_tads.append(tad) #align1, align2, _ = optimal_cmo(all_tads[7], all_tads[10], 7, # method='score') align1, align2, _ = optimal_cmo(all_tads[1], all_tads[3], 7, method='score') # Values with square root normalization. #self.assertEqual(align1, [0, 1, '-', 2, 3, '-', 4, 5, 6, 7, 8, 9, 10]) #self.assertEqual(align2,[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) self.assertEqual(align1, [0, 1, 2, '-', '-', 3, 4, 5, 6, 7, 8, '-', 9]) self.assertEqual(align2, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) if CHKTIME: print '6', time() - t0
def load_genome_from_tad_def(genome_path, res, verbose=False): """ Search, at a given path, for chromosome folders containing TAD definitions in tsv files. :param genome_path: Path where to search for TADbit chromosomes :param res: Resolution at were saved chromosomes :param False verbose: :returns: a dictionary with all TADbit chromosomes found """ ref_genome = {} for crm in listdir(genome_path): crm_path = os.path.join(genome_path, crm) if not isfile(crm_path): continue if crm in ref_genome: raise Exception('More than 1 TAD definition file found\n') crm = crm.replace('.tsv', '').replace('chr', '').upper() if verbose: print ' Chromosome:', crm crmO = Chromosome(crm) crmO.add_experiment('sample', res) crmO.experiments[0].load_tad_def(crm_path) ref_genome[crm] = crmO return ref_genome
def test_06_tad_clustering(self): if ONLY and ONLY != "06": return if CHKTIME: t0 = time() test_chr = Chromosome( name="Test Chromosome", experiment_tads=[exp4], experiment_names=["exp1"], experiment_hic_data=[PATH + "/20Kb/chrT/chrT_D.tsv"], experiment_resolutions=[20000, 20000], silent=True, ) all_tads = [] for _, tad in test_chr.iter_tads("exp1", normed=False): all_tads.append(tad) # align1, align2, _ = optimal_cmo(all_tads[7], all_tads[10], 7, # method='score') align1, align2, _ = optimal_cmo(all_tads[1], all_tads[3], 7, method="score") # Values with square root normalization. # self.assertEqual(align1, [0, 1, '-', 2, 3, '-', 4, 5, 6, 7, 8, 9, 10]) # self.assertEqual(align2,[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) self.assertEqual(align1, [0, 1, 2, "-", "-", 3, 4, 5, 6, 7, 8, "-", 9]) self.assertEqual(align2, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) if CHKTIME: print "6", time() - t0
def test_12_3d_modelling_optimization(self): """ quick test to generate 3D coordinates from 3? simple models??? """ if CHKTIME: t0 = time() try: __import__('IMP') except ImportError: warn('IMP not found, skipping test\n') return test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv') exp = test_chr.experiments[0] exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv') exp.filter_columns(silent=True) exp.normalize_hic(silent=True, factor=None) result = exp.optimal_imp_parameters(50, 70, n_cpus=4, n_models=8, n_keep=2, lowfreq_range=[-0.6], upfreq_range=(0, 1.1, 1.1), maxdist_range=[500, 600], verbose=False) # get best correlations config = result.get_best_parameters_dict() wanted = {'maxdist': 600.0, 'upfreq': 0.0, 'kforce': 5, 'dcutoff': 2, 'reference': '', 'lowfreq': -0.6, 'scale': 0.01} self.assertEqual([round(i, 4) for i in config.values()if not type(i) is str], [round(i, 4) for i in wanted.values()if not type(i) is str]) if CHKTIME: print '12', time() - t0
def test_07_forbidden_regions(self): if ONLY and ONLY != "07": return if CHKTIME: t0 = time() test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000, centromere_search=True) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True) # Values with square root normalization. # brks = [2.0, 7.0, 12.0, 18.0, 38.0, 43.0, 49.0, # 61.0, 66.0, 75.0, 89.0, 94.0, 99.0] brks = [3.0, 14.0, 19.0, 33.0, 38.0, 43.0, 49.0, 61.0, 66.0, 71.0, 83.0, 89.0, 94.0, 99.0] tads = test_chr.experiments["exp1"].tads found = [tads[t]["end"] for t in tads if tads[t]["score"] > 0] self.assertEqual(brks, found) items1 = test_chr.forbidden.keys(), test_chr.forbidden.values() test_chr.add_experiment("exp2", 20000, tad_def=exp3, hic_data=PATH + "/20Kb/chrT/chrT_C.tsv", silent=True) items2 = test_chr.forbidden.keys(), test_chr.forbidden.values() know1 = ([38, 39], ["Centromere", "Centromere"]) # know1 = ([32, 33, 34, 38, 39, 19, 20, 21, 22, # 23, 24, 25, 26, 27, 28, 29, 30, 31], # [None, None, None, 'Centromere', 'Centromere', # None, None, None, None, None, None, None, # None, None, None, None, None, None]) know2 = ([38], ["Centromere"]) self.assertEqual(items1, know1) self.assertEqual(items2, know2) if CHKTIME: print "7", time() - t0
def test_04_chromosome_batch(self): if ONLY and ONLY != "04": return if CHKTIME: t0 = time() test_chr = Chromosome( name="Test Chromosome", experiment_resolutions=[20000] * 3, experiment_hic_data=[ PATH + "/20Kb/chrT/chrT_A.tsv", PATH + "/20Kb/chrT/chrT_D.tsv", PATH + "/20Kb/chrT/chrT_C.tsv", ], experiment_names=["exp1", "exp2", "exp3"], silent=True, ) test_chr.find_tad(["exp1", "exp2", "exp3"], batch_mode=True, verbose=False, silent=True) tads = test_chr.get_experiment("batch_exp1_exp2_exp3").tads found = [tads[t]["end"] for t in tads if tads[t]["score"] > 0] # Values obtained with square root normalization. # self.assertEqual([3.0, 8.0, 16.0, 21.0, 28.0, 35.0, 43.0, # 49.0, 61.0, 66.0, 75.0, 89.0, 94.0, 99.0], found) self.assertEqual([3.0, 14.0, 19.0, 33.0, 43.0, 49.0, 61.0, 66.0, 71.0, 89.0, 94.0, 99.0], found) if CHKTIME: print "4", time() - t0
def test_08_changing_resolution(self): if CHKTIME: t0 = time() test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv', silent=True) exp = test_chr.experiments['exp1'] sum20 = sum(exp.hic_data[0].values()) exp.set_resolution(80000) sum80 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(160000) sum160 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(360000) sum360 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(2400000) sum2400 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(40000) sum40 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(20000) sum21 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(40000) sum41 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) self.assertTrue(sum20 == sum80 == sum160 == sum360 == sum40 \ == sum21 == sum2400 == sum41) if CHKTIME: print '8', time() - t0
def test_03_tad_multi_aligner(self): if CHKTIME: t0 = time() test_chr = Chromosome(name='Test Chromosome', centromere_search=True, experiment_tads=[exp1, exp2, exp3, exp4], experiment_hic_data=[ PATH + '/40Kb/chrT/chrT_A.tsv', PATH + '/20Kb/chrT/chrT_B.tsv', PATH + '/20Kb/chrT/chrT_C.tsv', PATH + '/20Kb/chrT/chrT_D.tsv'], experiment_names=['exp1', 'exp2', 'exp3', 'exp4'], experiment_resolutions=[40000,20000,20000,20000], silent=True) for exp in test_chr.experiments: exp.normalize_hic(silent=True, factor=None) test_chr.align_experiments(verbose=False, randomize=False, method='global') _, (score1, pval1) = test_chr.align_experiments(verbose=False, method='global', randomize=True, rnd_num=100) _, (_, pval2) = test_chr.align_experiments(verbose=False, randomize=True, rnd_method='shuffle', rnd_num=100) # Values with alignments obtained with square root normalization. #self.assertEqual(round(-26.095, 3), round(score1, 3)) #self.assertEqual(round(0.001, 1), round(pval1, 1)) #self.assertTrue(abs(0.175 - pval2) < 0.2) self.assertEqual(round(-11.002, 3), round(score1, 3)) self.assertEqual(round(0.001, 1), round(pval1, 1)) self.assertTrue(abs(0.04 - pval2) < 0.1) if CHKTIME: print '3', time() - t0
def test_08_changing_resolution(self): test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data='20Kb/chrT/chrT_D.tsv') exp = test_chr.experiments['exp1'] sum20 = sum(exp.hic_data[0]) exp.set_resolution(80000) sum80 = sum(exp.hic_data[0]) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(160000) sum160 = sum(exp.hic_data[0]) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(360000) sum360 = sum(exp.hic_data[0]) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(2400000) sum2400 = sum(exp.hic_data[0]) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(40000) sum40 = sum(exp.hic_data[0]) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(20000) sum21 = sum(exp.hic_data[0]) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(40000) sum41 = sum(exp.hic_data[0]) check_hic(exp.hic_data[0], exp.size) self.assertTrue(sum20 == sum80 == sum160 == sum360 == sum40 \ == sum21 == sum2400 == sum41)
def test_09_hic_normalization(self): """ writes interaction pair file. """ if ONLY and not "09" in ONLY: return if CHKTIME: t0 = time() test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True) exp.normalize_hic(silent=True) exp.get_hic_zscores() exp.get_hic_zscores(zscored=False) sumz = sum([ exp._zscores[k1][k2] for k1 in exp._zscores.keys() for k2 in exp._zscores[k1] ]) self.assertEqual(round(sumz, 4), round(4059.2877, 4)) if CHKTIME: print "9", time() - t0
def main(): args = getArgs() samples = args.i output = args.o chr = args.c ncpu = args.p resolution = args.r species = args.s gbuild = args.b # initiate a chromosome object that will store all Hi-C data and analysis my_chrom = Chromosome( name=chr, # 染色体名 centromere_search=True, # centromereを検出するか species=species, assembly=gbuild # genome build ) for sample in samples: label, path = sample.split(",") print(label) print(path) getHiCData(my_chrom, output, label, path, resolution, ncpu) # if not os.path.exists('tdb'): # os.makedirs("tdb") my_chrom.save_chromosome(output + ".tdb", force=True)
def test_08_changing_resolution(self): if ONLY and ONLY != "08": return if CHKTIME: t0 = time() test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True) exp = test_chr.experiments["exp1"] sum20 = sum(exp.hic_data[0].values()) exp.set_resolution(80000) sum80 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(160000) sum160 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(360000) sum360 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(2400000) sum2400 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(40000) sum40 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(20000) sum21 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(40000) sum41 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) self.assertTrue(sum20 == sum80 == sum160 == sum360 == sum40 == sum21 == sum2400 == sum41) if CHKTIME: print "8", time() - t0
def main(): """ main function """ opts = get_options() crm = Chromosome(':P') for i, data in enumerate(opts.data): crm.add_experiment('exp' + str(i), resolution=int(opts.resolution[i]), hic_data=data) crm.experiments['exp' + str(i)].normalize_hic() if len(opts.data) > 1: exp = crm.experiments[0] + crm.experiments[1] for i in range(2, len(opts.data)): exp += crm.experiments[i] else: exp = crm.experiments[0] if opts.abc: exp.write_interaction_pairs(opts.output, normalized=opts.norm, zscored=False) else: if type(opts.output) == file: out = opts.output else: out = open(opts.output, 'w') out.write(exp.print_hic_matrix(print_it=False, normalized=opts.norm))
def test_06_tad_clustering(self): if ONLY and ONLY != '06': return if CHKTIME: t0 = time() test_chr = Chromosome(name='Test Chromosome', experiment_tads=[exp4], experiment_names=['exp1'], experiment_hic_data=[ PATH + '/20Kb/chrT/chrT_D.tsv'], experiment_resolutions=[20000,20000], silent=True) all_tads = [] for _, tad in test_chr.iter_tads('exp1', normed=False): all_tads.append(tad) #align1, align2, _ = optimal_cmo(all_tads[7], all_tads[10], 7, # method='score') align1, align2, _ = optimal_cmo(all_tads[1], all_tads[3], 7, method='score') # Values with square root normalization. #self.assertEqual(align1, [0, 1, '-', 2, 3, '-', 4, 5, 6, 7, 8, 9, 10]) #self.assertEqual(align2,[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) self.assertEqual(align1, [0, 1, 2, '-', '-', 3, 4, 5, 6, 7, 8, '-', 9]) self.assertEqual(align2, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) if CHKTIME: print '6', time() - t0
def test_06_tad_clustering(self): if ONLY and not "06" in ONLY: return if CHKTIME: t0 = time() test_chr = Chromosome( name="Test Chromosome", experiment_tads=[exp4], experiment_names=["exp1"], experiment_hic_data=[PATH + "/20Kb/chrT/chrT_D.tsv"], experiment_resolutions=[20000, 20000], silent=True) all_tads = [] for _, tad in test_chr.iter_tads("exp1", normed=False): all_tads.append(tad) #align1, align2, _ = optimal_cmo(all_tads[7], all_tads[10], 7, # method="score") align1, align2, _ = optimal_cmo(all_tads[1], all_tads[3], 7, method="score") # Values with square root normalization. #self.assertEqual(align1, [0, 1, "-", 2, 3, "-", 4, 5, 6, 7, 8, 9, 10]) #self.assertEqual(align2,[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) self.assertEqual(align1, [0, 1, 2, "-", "-", 3, 4, 5, 6, 7, 8, "-", 9]) self.assertEqual(align2, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) if CHKTIME: print "6", time() - t0
def test_13_3d_modelling_centroid(self): """ quick test to generate 3D coordinates from 3? simple models??? """ if ONLY and "13" not in ONLY: return if CHKTIME: t0 = time() try: __import__("IMP") except ImportError: warn("IMP not found, skipping test\n") return test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True) exp.filter_columns(silent=True) exp.normalize_hic(silent=True, factor=None) models = exp.model_region( 51, 71, ncopies=4, n_models=10, n_keep=10, n_cpus=10, # verbose=3, config={"kforce": 5, "maxdist": 500, "scale": 0.01, "upfreq": 0.5, "lowfreq": -0.5}, ) models.save_models("models.pick") avg = models.average_model() nmd = len(models) print "I'm here test 13"
def test_11_write_interaction_pairs(self): if ONLY and not "11" in ONLY: return """ writes interaction pair file. """ if CHKTIME: t0 = time() test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv") exp = test_chr.experiments[0] exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True) exp.filter_columns(silent=True) exp.normalize_hic(factor=1, silent=True) exp.get_hic_zscores(zscored=False) exp.write_interaction_pairs("lala") with open("lala") as f_lala: lines = f_lala.readlines() self.assertEqual(len(lines), 4674) self.assertAlmostEqual(float(lines[25].split('\t')[2]), 0.5852295196345679) self.assertAlmostEqual(float(lines[2000].split('\t')[2]), 0.07060448846960976) system("rm -f lala") if CHKTIME: print("11", time() - t0)
def test_04_chromosome_batch(self): if ONLY and not "04" in ONLY: return if CHKTIME: t0 = time() test_chr = Chromosome(name="Test Chromosome", experiment_resolutions=[20000] * 3, experiment_hic_data=[ PATH + "/20Kb/chrT/chrT_A.tsv", PATH + "/20Kb/chrT/chrT_D.tsv", PATH + "/20Kb/chrT/chrT_C.tsv" ], experiment_names=["exp1", "exp2", "exp3"], silent=True) test_chr.find_tad(["exp1", "exp2", "exp3"], batch_mode=True, verbose=False, silent=True) tads = test_chr.get_experiment("batch_exp1_exp2_exp3").tads found = [tads[t]["end"] for t in tads if tads[t]["score"] > 0] # Values obtained with square root normalization. #self.assertEqual([3.0, 8.0, 16.0, 21.0, 28.0, 35.0, 43.0, # 49.0, 61.0, 66.0, 75.0, 89.0, 94.0, 99.0], found) self.assertEqual([ 3.0, 14.0, 19.0, 33.0, 43.0, 49.0, 61.0, 66.0, 71.0, 89.0, 94.0, 99.0 ], found) if CHKTIME: print "4", time() - t0
def test_12_3d_modelling_optimization(self): """ quick test to generate 3D coordinates from 3? simple models??? """ if ONLY and "12" not in ONLY: return if CHKTIME: t0 = time() try: __import__("IMP") except ImportError: warn("IMP not found, skipping test\n") return test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment( "exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv" ) # norm_data para dar directamente la matrix normalizada exp = test_chr.experiments[0] exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv") exp.filter_columns(silent=True) exp.normalize_hic(silent=True, factor=None) result = exp.optimal_imp_parameters( 50, 70, ncopies=4, n_cpus=1, # It can be that this function requires also the raw hic_data matrix n_models=8, n_keep=2, lowfreq_range=[-0.6], upfreq_range=(0, 1.1, 1.1), maxdist_range=[500, 600], verbose=True, ) # get best correlations config = result.get_best_parameters_dict() # Save the models and the contact map # result.save_model or result.save_data # result.write_cmm to visualize the best models # result.write_xyz to visualize the best models wanted = { "maxdist": 600.0, "upfreq": 0.0, "kforce": 5, "dcutoff": 2, "reference": "", "lowfreq": -0.6, "scale": 0.01, } self.assertEqual( [round(i, 4) for i in config.values() if not type(i) is str], [round(i, 4) for i in wanted.values() if not type(i) is str], ) if CHKTIME: print "12", time() - t0
def test_05_save_load(self): test_chr = Chromosome(name='Test Chromosome', experiment_tads=[exp1, exp2], experiment_names=['exp1', 'exp2'], experiment_resolutions=[20000,20000]) test_chr.save_chromosome('lolo', force=True) test_chr = load_chromosome('lolo') system('rm -f lolo') system('rm -f lolo_hic')
def test_13_3d_modelling_centroid(self): #model with no optimisation """ quick test to generate 3D coordinates from 3? simple models??? """ if ONLY and ONLY != '13': return if CHKTIME: t0 = time() try: __import__('IMP') except ImportError: warn('IMP not found, skipping test\n') return test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv', silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True) exp.filter_columns(silent=True) exp.normalize_hic(silent=True, factor=None) models = exp.model_region(51, 71, n_models=40, n_keep=25, n_cpus=4, config={ 'kforce': 5, 'maxdist': 500, 'scale': 0.01, 'upfreq': 1.0, 'lowfreq': -0.6 }) models.save_models('models.pick') avg = models.average_model() nmd = len(models) dev = rmsdRMSD_wrapper([models[m]['x'] for m in xrange(nmd)] + [avg['x']], [models[m]['y'] for m in xrange(nmd)] + [avg['y']], [models[m]['z'] for m in xrange(nmd)] + [avg['z']], models._zeros, models.nloci, 200, range(len(models) + 1), len(models) + 1, int(False), 'rmsd', 0) centroid = models[models.centroid_model()] # find closest model = min([(k, dev[(k, nmd)]) for k in range(nmd)], key=lambda x: x[1])[0] self.assertEqual(centroid['rand_init'], models[model]['rand_init']) if CHKTIME: print '13', time() - t0
def test_12_3d_modelling_optimization(self): """ quick test to generate 3D coordinates from 3? simple models??? """ if ONLY and not "12" in ONLY: return if CHKTIME: t0 = time() try: __import__("IMP") except ImportError: warn("IMP not found, skipping test\n") return test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv") exp = test_chr.experiments[0] exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv") exp.filter_columns(silent=True) exp.normalize_hic(silent=True, factor=None) result = exp.optimal_imp_parameters( 50, 70, n_cpus=4, n_models=8, n_keep=2, lowfreq_range=[-0.6], upfreq_range=(0, 1.1, 1.1), #from 0 till 1.1 in step of 1.1 with () maxdist_range=[500, 600], # it will use 500 and 600 with [] verbose=False) # get best correlations config = result.get_best_parameters_dict() #dict with parameters wanted = { "maxdist": 600.0, "upfreq": 0.0, "kforce": 5, "dcutoff": 2, "reference": "", "lowfreq": -0.6, "scale": 0.01 } self.assertEqual([ round(config[i], 4) for i in list(config.keys()) if not type(i) is str ], [ round(config[i], 4) for i in list(wanted.keys()) if not type(i) is str ]) if CHKTIME: print("12", time() - t0)
def _sub_experiment_zscore(self, start, end): """ Get the z-score of a sub-region of an experiment. TODO: find a nicer way to do this... :param start: first bin to model (bin number) :param end: first bin to model (bin number) :returns: z-score and raw values of the experiment """ if self._normalization != 'visibility': warn('WARNING: normalizing according to visibility method') self.normalize_hic(method='visibility') from pytadbit import Chromosome matrix = self.get_hic_matrix() end += 1 new_matrix = [[] for _ in range(end - start)] for i in xrange(start, end): for j in xrange(start, end): new_matrix[i - start].append(matrix[i][j]) tmp = Chromosome('tmp') tmp.add_experiment('exp1', hic_data=[new_matrix], resolution=self.resolution, filter_columns=False) exp = tmp.experiments[0] # We want the weights and zeros calculated in the full chromosome siz = self.size exp.norm = [[ self.norm[0][i + siz * j] for i in xrange(start, end) for j in xrange(start, end) ]] exp._zeros = dict([(z - start, None) for z in self._zeros if start <= z <= end]) if len(exp._zeros) == (end + 1 - start): raise Exception('ERROR: no interaction found in selected regions') # ... but the z-scores in this particular region exp.get_hic_zscores(remove_zeros=True) values = [[float('nan') for _ in xrange(exp.size)] for _ in xrange(exp.size)] for i in xrange(exp.size): # zeros are rows or columns having a zero in the diagonal if i in exp._zeros: continue for j in xrange(i + 1, exp.size): if j in exp._zeros: continue if (not exp.hic_data[0][i * exp.size + j] or not exp.hic_data[0][i * exp.size + j]): continue values[i][j] = exp.norm[0][i * exp.size + j] values[j][i] = exp.norm[0][i * exp.size + j] return exp._zscores, values
def test_09_hic_normalization(self): """ TODO: check with Davide's script """ test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data='20Kb/chrT/chrT_D.tsv') exp = test_chr.experiments[0] exp.load_experiment('20Kb/chrT/chrT_A.tsv') exp.get_hic_zscores() exp.get_hic_zscores(zscored=False)
def test_10_generate_weights(self): """ method names are: 'sqrt' or 'over_tot' """ test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data='20Kb/chrT/chrT_D.tsv') exp = test_chr.experiments[0] tadbit_weigths = exp.norm[:] exp.norm = None exp.normalize_hic() self.assertEqual(tadbit_weigths[0], exp.norm[0])
def test_10_generate_weights(self): """ TODO: using Francois' formula method names are: 'sqrt' or 'over_tot' """ test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_handler=exp4, xp_handler='20Kb/chrT/chrT_D.tsv') exp = test_chr.experiments[0] tadbit_weigths = exp.wght[:] exp.wght = None exp.normalize_hic() self.assertEqual(tadbit_weigths[0], exp.wght[0])
def test_04_chromosome_batch(self): test_chr = Chromosome(name='Test Chromosome', experiment_resolutions=[20000]*3, experiment_hic_data=['20Kb/chrT/chrT_A.tsv', '20Kb/chrT/chrT_D.tsv', '20Kb/chrT/chrT_C.tsv'], experiment_names=['exp1', 'exp2', 'exp3']) test_chr.find_tad(['exp1', 'exp2', 'exp3'], batch_mode=True, verbose=False) tads = test_chr.get_experiment('batch_exp1_exp2_exp3').tads found = [tads[t]['end'] for t in tads if tads[t]['score'] > 0] self.assertEqual([3.0, 8.0, 16.0, 21.0, 28.0, 35.0, 43.0, 49.0, 61.0, 66.0, 75.0, 89.0, 99.0], found)
def test_06_tad_clustering(self): test_chr = Chromosome(name='Test Chromosome', experiment_tads=[exp4], experiment_names=['exp1'], experiment_hic_data=['20Kb/chrT/chrT_D.tsv'], experiment_resolutions=[20000,20000]) all_tads = [] for _, tad in test_chr.iter_tads('exp1'): all_tads.append(tad) align1, align2, _ = optimal_cmo(all_tads[7], all_tads[10], 7, method='score') self.assertEqual(align1, [0, 1, '-', 2, 3, '-', 4, 5, 6, 7, 8, 9, 10]) self.assertEqual(align2,[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
def _sub_experiment_zscore(self, start, end): """ Get the z-score of a sub-region of an experiment. TODO: find a nicer way to do this... :param start: first bin to model (bin number) :param end: first bin to model (bin number) :returns: z-score and raw values of the experiment """ if self._normalization != 'visibility': warn('WARNING: normalizing according to visibility method') self.normalize_hic(method='visibility') from pytadbit import Chromosome matrix = self.get_hic_matrix() end += 1 new_matrix = [[] for _ in range(end-start)] for i in xrange(start, end): for j in xrange(start, end): new_matrix[i - start].append(matrix[i][j]) tmp = Chromosome('tmp') tmp.add_experiment('exp1', hic_data=[new_matrix], resolution=self.resolution, filter_columns=False) exp = tmp.experiments[0] # We want the weights and zeros calculated in the full chromosome siz = self.size exp.norm = [[self.norm[0][i + siz * j] for i in xrange(start, end) for j in xrange(start, end)]] exp._zeros = dict([(z - start, None) for z in self._zeros if start <= z <= end]) if len(exp._zeros) == (end + 1 - start): raise Exception('ERROR: no interaction found in selected regions') # ... but the z-scores in this particular region exp.get_hic_zscores(remove_zeros=True) values = [[float('nan') for _ in xrange(exp.size)] for _ in xrange(exp.size)] for i in xrange(exp.size): # zeros are rows or columns having a zero in the diagonal if i in exp._zeros: continue for j in xrange(i + 1, exp.size): if j in exp._zeros: continue if (not exp.hic_data[0][i * exp.size + j] or not exp.hic_data[0][i * exp.size + j]): continue values[i][j] = exp.norm[0][i * exp.size + j] values[j][i] = exp.norm[0][i * exp.size + j] return exp._zscores, values
def test_03_tad_multi_aligner(self): test_chr = Chromosome(name='Test Chromosome', tad_handlers=[exp1, exp2, exp3, exp4], experiment_names=['exp1', 'exp2', 'exp3', 'exp4'], experiment_resolutions=[40000,20000,20000,20000]) test_chr.align_experiments(verbose=False, randomize=False,method='global') score1, pval1 = test_chr.align_experiments(verbose=False,method='global', randomize=True) _, pval2 = test_chr.align_experiments(verbose=False, randomize=True, rnd_method='shuffle') self.assertEqual(round(-26.095, 3), round(score1, 3)) self.assertEqual(round(0.001, 1), round(pval1, 1)) self.assertTrue(abs(0.175 - pval2) < 0.2)
def load_experiments(opts): crm = Chromosome(opts.crm) for i, xpr in enumerate(opts.hic_files): if opts.exp_names: name = opts.exp_names[i] else: name = ''.join(xpr.split('/')[-1].split('.')[:-1]) if opts.verbose: print ' Reading Hi-C datafile #%s (%s)' % (i+1, name) crm.add_experiment(name, hic_data=xpr, resolution=int(opts.resolution)) if opts.verbose: print ' loaded as: %s\n' % (crm.experiments[name]) return crm
def test_tad_multi_aligner(self): exp1 = tadbit("chrT/chrT_A.tsv", max_tad_size="auto", verbose=False, no_heuristic=False) exp2 = tadbit("chrT/chrT_B.tsv", max_tad_size="auto", verbose=False, no_heuristic=False) exp3 = tadbit("chrT/chrT_C.tsv", max_tad_size="auto", verbose=False, no_heuristic=False) exp4 = tadbit("chrT/chrT_D.tsv", max_tad_size="auto", verbose=False, no_heuristic=False) test_chr = Chromosome( name="Test Chromosome", resolution=20000, experiments=[exp1, exp2, exp3, exp4], experiment_names=["exp1", "exp2", "exp3", "exp4"], ) score, pval = test_chr.align_experiments(verbose=False, randomize=True) self.assertEqual(round(19.555803, 3), round(score, 3)) self.assertEqual(round(0.4, 1), round(pval, 1))
def test_05_save_load(self): if CHKTIME: t0 = time() test_chr1 = Chromosome(name='Test Chromosome', experiment_tads=[exp1, exp2], experiment_names=['exp1', 'exp2'], experiment_resolutions=[20000,20000], silent=True) test_chr1.save_chromosome('lolo', force=True) test_chr2 = load_chromosome('lolo') system('rm -f lolo') system('rm -f lolo_hic') self.assertEqual(str(test_chr1.__dict__), str(test_chr2.__dict__)) if CHKTIME: print '5', time() - t0
def test_05_save_load(self): if CHKTIME: t0 = time() test_chr1 = Chromosome(name='Test Chromosome', experiment_tads=[exp1, exp2], experiment_names=['exp1', 'exp2'], experiment_resolutions=[20000, 20000], silent=True) test_chr1.save_chromosome('lolo', force=True) test_chr2 = load_chromosome('lolo') system('rm -f lolo') system('rm -f lolo_hic') self.assertEqual(str(test_chr1.__dict__), str(test_chr2.__dict__)) if CHKTIME: print '5', time() - t0
def test_13_3d_modelling_centroid(self): """ quick test to generate 3D coordinates from 3? simple models??? """ if ONLY and ONLY != '13': return if CHKTIME: t0 = time() try: __import__('IMP') except ImportError: warn('IMP not found, skipping test\n') return test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv', silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True) exp.filter_columns(silent=True) exp.normalize_hic(silent=True, factor=None) models = exp.model_region(51, 71, n_models=40, n_keep=25, n_cpus=4, config={'kforce': 5, 'maxdist': 500, 'scale': 0.01, 'upfreq': 1.0, 'lowfreq': -0.6}) models.save_models('models.pick') avg = models.average_model() nmd = len(models) dev = rmsdRMSD_wrapper( [models[m]['x'] for m in xrange(nmd)] + [avg['x']], [models[m]['y'] for m in xrange(nmd)] + [avg['y']], [models[m]['z'] for m in xrange(nmd)] + [avg['z']], models._zeros, models.nloci, 200, range(len(models)+1), len(models)+1, int(False), 'rmsd', 0) centroid = models[models.centroid_model()] # find closest model = min([(k, dev[(k, nmd)] ) for k in range(nmd)], key=lambda x: x[1])[0] self.assertEqual(centroid['rand_init'], models[model]['rand_init']) if CHKTIME: print '13', time() - t0
def test_05_save_load(self): if ONLY and not "05" in ONLY: return if CHKTIME: t0 = time() test_chr1 = Chromosome(name="Test Chromosome", experiment_tads=[exp1, exp2], experiment_names=["exp1", "exp2"], experiment_resolutions=[20000, 20000], silent=True) test_chr1.save_chromosome("lolo", force=True) test_chr2 = load_chromosome("lolo") system("rm -f lolo") system("rm -f lolo_hic") self.assertEqual(str(test_chr1.__dict__), str(test_chr2.__dict__)) if CHKTIME: print "5", time() - t0
def main(): opts, params = get_options() if opts.inabc: zscores = parse_zscores(opts.inabc) models = generate_3d_models(zscores, opts.resolution, start=1, n_models=opts.nmodels, n_keep=opts.nkeep, n_cpus=opts.ncpus, keep_all=False, verbose=False, outfile=None, config=params) else: crm = 'crm' xnam = 'X' crmbit = Chromosome(crm) crmbit.add_experiment(xnam, resolution=opts.resolution, xp_handler=opts.incrm) exp = crmbit.experiments[xnam] models = exp.model_region(start=opts.start, end=opts.end, n_models=opts.nmodels, n_keep=opts.nkeep, n_cpus=opts.ncpus, keep_all=False, verbose=False, config=params) if opts.save: models.save_models('%s/models_%s_%s.pik' % (opts.out, opts.start, opts.start + opts.nmodels)) for i in xrange(int(opts.cmm)): models.write_cmm(i, opts.out) if opts.full_report: models.cluster_models(dcutoff=200) models.cluster_analysis_dendrogram(n_best_clusters=10) models.model_consistency()
def test_10_compartments(self): """ """ if ONLY and not "10" in ONLY: return if CHKTIME: t0 = time() test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True) hic_data = exp.hic_data[0] hic_data.find_compartments(label_compartments="cluster") self.assertEqual(len(hic_data.compartments[None]), 39) # self.assertEqual(round(hic_data.compartments[None][24]["dens"], 5), # 0.75434) if CHKTIME: print "10", time() - t0
def test_10_compartments(self): """ """ if ONLY and ONLY != '10': return if CHKTIME: t0 = time() test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv', silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True) hic_data = exp.hic_data[0] hic_data.find_compartments(label_compartments='cluster') self.assertEqual(len(hic_data.compartments[None]), 39) # self.assertEqual(round(hic_data.compartments[None][24]['dens'], 5), # 0.75434) if CHKTIME: print '10', time() - t0
def main(): """ main function """ # retieve HOX genes distmatrix, geneids = get_genes() # compute TADs for human chromosome 19 test_chr = Chromosome(name='Test Chromosome') test_chr.add_experiment('exp1', 100000, xp_handler=PATH + 'HIC_gm06690_chr19_chr19_100000_obs.txt') test_chr.find_tad(['exp1']) exp = test_chr.experiments['exp1'] clust = linkage(distmatrix['19']) cl_idx = list(fcluster(clust, t=1, criterion='inconsistent')) print max(cl_idx), 'clusters' cluster = [[] for _ in xrange(1, max(cl_idx) + 1)] for i, j in enumerate(cl_idx): cluster[j - 1].append(geneids['19'][i][1]) for i, _ in enumerate(cluster): cluster[i] = min(cluster[i]), max(cluster[i]) tad_breaker(exp.tads, cluster, exp.resolution, show_plot=True, bins=5, title='Proportion of HOX genes according to position in a TAD')
def test_07_forbidden_regions(self): if ONLY and ONLY != '07': return if CHKTIME: t0 = time() test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000, centromere_search=True,) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv', silent=True) # Values with square root normalization. #brks = [2.0, 7.0, 12.0, 18.0, 38.0, 43.0, 49.0, # 61.0, 66.0, 75.0, 89.0, 94.0, 99.0] brks = [3.0, 14.0, 19.0, 33.0, 38.0, 43.0, 49.0, 61.0, 66.0, 71.0, 83.0, 89.0, 94.0, 99.0] tads = test_chr.experiments['exp1'].tads found = [tads[t]['end'] for t in tads if tads[t]['score'] > 0] self.assertEqual(brks, found) items1 = test_chr.forbidden.keys(), test_chr.forbidden.values() test_chr.add_experiment('exp2', 20000, tad_def=exp3, hic_data=PATH + '/20Kb/chrT/chrT_C.tsv', silent=True) items2 = test_chr.forbidden.keys(), test_chr.forbidden.values() know1 = ([38, 39], ['Centromere', 'Centromere']) #know1 = ([32, 33, 34, 38, 39, 19, 20, 21, 22, # 23, 24, 25, 26, 27, 28, 29, 30, 31], # [None, None, None, 'Centromere', 'Centromere', # None, None, None, None, None, None, None, # None, None, None, None, None, None]) know2 = ([38], ['Centromere']) self.assertEqual(items1, know1) self.assertEqual(items2, know2) if CHKTIME: print '7', time() - t0