def main(): matrix_path = sys.argv[1] config_string = sys.argv[2] compute_keep = sys.argv[3] uf, lf, md = config_string.split(':') lf = float(lf) uf = float(uf) md = int (md) config = {'reference' : '', 'kforce' : 5, 'maxdist' : md, 'upfreq' : uf, 'lowfreq' : lf, 'scale' : 0.01, 'kbending' : 0.0, } compute, keep = map(int, compute_keep.split(':')) chrom = Chromosome('chr') chrom.add_experiment('sample', norm_data=matrix_path, resolution=15000) exp = chrom.experiments[0] models = exp.model_region(n_models=compute, n_keep=keep, n_cpus=8, config=config) models.save_models('models_%s.pickle' % (config_string))
def test_04_chromosome_batch(self): if ONLY and ONLY != '04': return if CHKTIME: t0 = time() test_chr = Chromosome(name='Test Chromosome', experiment_resolutions=[20000]*3, experiment_hic_data=[ PATH + '/20Kb/chrT/chrT_A.tsv', PATH + '/20Kb/chrT/chrT_D.tsv', PATH + '/20Kb/chrT/chrT_C.tsv'], experiment_names=['exp1', 'exp2', 'exp3'], silent=True) test_chr.find_tad(['exp1', 'exp2', 'exp3'], batch_mode=True, verbose=False, silent=True) tads = test_chr.get_experiment('batch_exp1_exp2_exp3').tads found = [tads[t]['end'] for t in tads if tads[t]['score'] > 0] # Values obtained with square root normalization. #self.assertEqual([3.0, 8.0, 16.0, 21.0, 28.0, 35.0, 43.0, # 49.0, 61.0, 66.0, 75.0, 89.0, 94.0, 99.0], found) self.assertEqual([3.0, 14.0, 19.0, 33.0, 43.0, 49.0, 61.0, 66.0, 71.0, 89.0, 94.0, 99.0], found) if CHKTIME: print '4', time() - t0
def test_11_write_interaction_pairs(self): if ONLY and not "11" in ONLY: return """ writes interaction pair file. """ if CHKTIME: t0 = time() test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv") exp = test_chr.experiments[0] exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True) exp.filter_columns(silent=True) exp.normalize_hic(factor=None, silent=True) exp.get_hic_zscores(zscored=False) exp.write_interaction_pairs("lala") lines = open("lala").readlines() self.assertEqual(len(lines), 4674) self.assertEqual(lines[25], "1\t28\t0.612332461036\n") self.assertEqual(lines[2000], "26\t70\t0.0738742984321\n") system("rm -f lala") if CHKTIME: print "11", time() - t0
def test_06_tad_clustering(self): if ONLY and ONLY != '06': return if CHKTIME: t0 = time() test_chr = Chromosome(name='Test Chromosome', experiment_tads=[exp4], experiment_names=['exp1'], experiment_hic_data=[ PATH + '/20Kb/chrT/chrT_D.tsv'], experiment_resolutions=[20000,20000], silent=True) all_tads = [] for _, tad in test_chr.iter_tads('exp1', normed=False): all_tads.append(tad) #align1, align2, _ = optimal_cmo(all_tads[7], all_tads[10], 7, # method='score') align1, align2, _ = optimal_cmo(all_tads[1], all_tads[3], 7, method='score') # Values with square root normalization. #self.assertEqual(align1, [0, 1, '-', 2, 3, '-', 4, 5, 6, 7, 8, 9, 10]) #self.assertEqual(align2,[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) self.assertEqual(align1, [0, 1, 2, '-', '-', 3, 4, 5, 6, 7, 8, '-', 9]) self.assertEqual(align2, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) if CHKTIME: print '6', time() - t0
def test_03_tad_multi_aligner(self): if CHKTIME: t0 = time() test_chr = Chromosome(name='Test Chromosome', centromere_search=True, experiment_tads=[exp1, exp2, exp3, exp4], experiment_hic_data=[ PATH + '/40Kb/chrT/chrT_A.tsv', PATH + '/20Kb/chrT/chrT_B.tsv', PATH + '/20Kb/chrT/chrT_C.tsv', PATH + '/20Kb/chrT/chrT_D.tsv'], experiment_names=['exp1', 'exp2', 'exp3', 'exp4'], experiment_resolutions=[40000,20000,20000,20000], silent=True) for exp in test_chr.experiments: exp.normalize_hic(silent=True, factor=None) test_chr.align_experiments(verbose=False, randomize=False, method='global') _, (score1, pval1) = test_chr.align_experiments(verbose=False, method='global', randomize=True, rnd_num=100) _, (_, pval2) = test_chr.align_experiments(verbose=False, randomize=True, rnd_method='shuffle', rnd_num=100) # Values with alignments obtained with square root normalization. #self.assertEqual(round(-26.095, 3), round(score1, 3)) #self.assertEqual(round(0.001, 1), round(pval1, 1)) #self.assertTrue(abs(0.175 - pval2) < 0.2) self.assertEqual(round(-11.002, 3), round(score1, 3)) self.assertEqual(round(0.001, 1), round(pval1, 1)) self.assertTrue(abs(0.04 - pval2) < 0.1) if CHKTIME: print '3', time() - t0
def test_08_changing_resolution(self): test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data='20Kb/chrT/chrT_D.tsv') exp = test_chr.experiments['exp1'] sum20 = sum(exp.hic_data[0]) exp.set_resolution(80000) sum80 = sum(exp.hic_data[0]) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(160000) sum160 = sum(exp.hic_data[0]) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(360000) sum360 = sum(exp.hic_data[0]) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(2400000) sum2400 = sum(exp.hic_data[0]) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(40000) sum40 = sum(exp.hic_data[0]) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(20000) sum21 = sum(exp.hic_data[0]) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(40000) sum41 = sum(exp.hic_data[0]) check_hic(exp.hic_data[0], exp.size) self.assertTrue(sum20 == sum80 == sum160 == sum360 == sum40 \ == sum21 == sum2400 == sum41)
def test_11_write_interaction_pairs(self): if ONLY and ONLY != '11': return """ writes interaction pair file. """ if CHKTIME: t0 = time() test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv') exp = test_chr.experiments[0] exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True) exp.filter_columns(silent=True) exp.normalize_hic(factor=None, silent=True) exp.get_hic_zscores(zscored=False) exp.write_interaction_pairs('lala') lines = open('lala').readlines() self.assertEqual(len(lines), 4674) self.assertEqual(lines[25], '1\t28\t0.612332461036\n') self.assertEqual(lines[2000], '26\t70\t0.0738742984321\n') system('rm -f lala') if CHKTIME: print '11', time() - t0
def test_04_chromosome_batch(self): if ONLY and not "04" in ONLY: return if CHKTIME: t0 = time() test_chr = Chromosome(name="Test Chromosome", experiment_resolutions=[20000] * 3, experiment_hic_data=[ PATH + "/20Kb/chrT/chrT_A.tsv", PATH + "/20Kb/chrT/chrT_D.tsv", PATH + "/20Kb/chrT/chrT_C.tsv" ], experiment_names=["exp1", "exp2", "exp3"], silent=True) test_chr.find_tad(["exp1", "exp2", "exp3"], batch_mode=True, verbose=False, silent=True) tads = test_chr.get_experiment("batch_exp1_exp2_exp3").tads found = [tads[t]["end"] for t in tads if tads[t]["score"] > 0] # Values obtained with square root normalization. #self.assertEqual([3.0, 8.0, 16.0, 21.0, 28.0, 35.0, 43.0, # 49.0, 61.0, 66.0, 75.0, 89.0, 94.0, 99.0], found) self.assertEqual([ 3.0, 14.0, 19.0, 33.0, 43.0, 49.0, 61.0, 66.0, 71.0, 89.0, 94.0, 99.0 ], found) if CHKTIME: print "4", time() - t0
def main(): """ main function """ opts = get_options() crm = Chromosome(':P') for i, data in enumerate(opts.data): crm.add_experiment('exp' + str(i), resolution=int(opts.resolution[i]), hic_data=data) crm.experiments['exp' + str(i)].normalize_hic() if len(opts.data) > 1: exp = crm.experiments[0] + crm.experiments[1] for i in range(2, len(opts.data)): exp += crm.experiments[i] else: exp = crm.experiments[0] if opts.abc: exp.write_interaction_pairs(opts.output, normalized=opts.norm, zscored=False) else: if type(opts.output) == file: out = opts.output else: out = open(opts.output, 'w') out.write(exp.print_hic_matrix(print_it=False, normalized=opts.norm))
def load_genome_from_tad_def(genome_path, res, verbose=False): """ Search, at a given path, for chromosome folders containing TAD definitions in tsv files. :param genome_path: Path where to search for TADbit chromosomes :param res: Resolution at were saved chromosomes :param False verbose: :returns: a dictionary with all TADbit chromosomes found """ ref_genome = {} for crm in listdir(genome_path): crm_path = os.path.join(genome_path, crm) if not isfile(crm_path): continue if crm in ref_genome: raise Exception('More than 1 TAD definition file found\n') crm = crm.replace('.tsv', '').replace('chr', '').upper() if verbose: print ' Chromosome:', crm crmO = Chromosome(crm) crmO.add_experiment('sample', res) crmO.experiments[0].load_tad_def(crm_path) ref_genome[crm] = crmO return ref_genome
def test_11_write_interaction_pairs(self): if ONLY and not "11" in ONLY: return """ writes interaction pair file. """ if CHKTIME: t0 = time() test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv") exp = test_chr.experiments[0] exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True) exp.filter_columns(silent=True) exp.normalize_hic(factor=1, silent=True) exp.get_hic_zscores(zscored=False) exp.write_interaction_pairs("lala") with open("lala") as f_lala: lines = f_lala.readlines() self.assertEqual(len(lines), 4674) self.assertAlmostEqual(float(lines[25].split('\t')[2]), 0.5852295196345679) self.assertAlmostEqual(float(lines[2000].split('\t')[2]), 0.07060448846960976) system("rm -f lala") if CHKTIME: print("11", time() - t0)
def test_12_3d_modelling_optimization(self): """ quick test to generate 3D coordinates from 3? simple models??? """ if CHKTIME: t0 = time() try: __import__('IMP') except ImportError: warn('IMP not found, skipping test\n') return test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv') exp = test_chr.experiments[0] exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv') exp.filter_columns(silent=True) exp.normalize_hic(silent=True, factor=None) result = exp.optimal_imp_parameters(50, 70, n_cpus=4, n_models=8, n_keep=2, lowfreq_range=[-0.6], upfreq_range=(0, 1.1, 1.1), maxdist_range=[500, 600], verbose=False) # get best correlations config = result.get_best_parameters_dict() wanted = {'maxdist': 600.0, 'upfreq': 0.0, 'kforce': 5, 'dcutoff': 2, 'reference': '', 'lowfreq': -0.6, 'scale': 0.01} self.assertEqual([round(i, 4) for i in config.values()if not type(i) is str], [round(i, 4) for i in wanted.values()if not type(i) is str]) if CHKTIME: print '12', time() - t0
def test_06_tad_clustering(self): if ONLY and not "06" in ONLY: return if CHKTIME: t0 = time() test_chr = Chromosome( name="Test Chromosome", experiment_tads=[exp4], experiment_names=["exp1"], experiment_hic_data=[PATH + "/20Kb/chrT/chrT_D.tsv"], experiment_resolutions=[20000, 20000], silent=True) all_tads = [] for _, tad in test_chr.iter_tads("exp1", normed=False): all_tads.append(tad) #align1, align2, _ = optimal_cmo(all_tads[7], all_tads[10], 7, # method="score") align1, align2, _ = optimal_cmo(all_tads[1], all_tads[3], 7, method="score") # Values with square root normalization. #self.assertEqual(align1, [0, 1, "-", 2, 3, "-", 4, 5, 6, 7, 8, 9, 10]) #self.assertEqual(align2,[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) self.assertEqual(align1, [0, 1, 2, "-", "-", 3, 4, 5, 6, 7, 8, "-", 9]) self.assertEqual(align2, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) if CHKTIME: print "6", time() - t0
def main(): """ main function """ # retieve HOX genes distmatrix, geneids = get_genes() # compute TADs for human chromosome 19 test_chr = Chromosome(name='Test Chromosome') test_chr.add_experiment('exp1', 100000, xp_handler=PATH + 'HIC_gm06690_chr19_chr19_100000_obs.txt') test_chr.find_tad(['exp1']) exp = test_chr.experiments['exp1'] clust = linkage(distmatrix['19']) cl_idx = list(fcluster(clust, t=1, criterion='inconsistent')) print max(cl_idx), 'clusters' cluster = [[] for _ in xrange(1, max(cl_idx) + 1)] for i, j in enumerate(cl_idx): cluster[j - 1].append(geneids['19'][i][1]) for i, _ in enumerate(cluster): cluster[i] = min(cluster[i]), max(cluster[i]) tad_breaker(exp.tads, cluster, exp.resolution, show_plot=True, bins=5, title='Proportion of HOX genes according to position in a TAD')
def test_08_changing_resolution(self): if CHKTIME: t0 = time() test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv', silent=True) exp = test_chr.experiments['exp1'] sum20 = sum(exp.hic_data[0].values()) exp.set_resolution(80000) sum80 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(160000) sum160 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(360000) sum360 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(2400000) sum2400 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(40000) sum40 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(20000) sum21 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(40000) sum41 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) self.assertTrue(sum20 == sum80 == sum160 == sum360 == sum40 \ == sum21 == sum2400 == sum41) if CHKTIME: print '8', time() - t0
def test_09_hic_normalization(self): """ writes interaction pair file. """ if ONLY and not "09" in ONLY: return if CHKTIME: t0 = time() test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True) exp.normalize_hic(silent=True) exp.get_hic_zscores() exp.get_hic_zscores(zscored=False) sumz = sum([ exp._zscores[k1][k2] for k1 in exp._zscores.keys() for k2 in exp._zscores[k1] ]) self.assertEqual(round(sumz, 4), round(4059.2877, 4)) if CHKTIME: print "9", time() - t0
def main(): args = getArgs() samples = args.i output = args.o chr = args.c ncpu = args.p resolution = args.r species = args.s gbuild = args.b # initiate a chromosome object that will store all Hi-C data and analysis my_chrom = Chromosome( name=chr, # 染色体名 centromere_search=True, # centromereを検出するか species=species, assembly=gbuild # genome build ) for sample in samples: label, path = sample.split(",") print(label) print(path) getHiCData(my_chrom, output, label, path, resolution, ncpu) # if not os.path.exists('tdb'): # os.makedirs("tdb") my_chrom.save_chromosome(output + ".tdb", force=True)
def test_07_forbidden_regions(self): if ONLY and ONLY != '07': return if CHKTIME: t0 = time() test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000, centromere_search=True,) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv', silent=True) # Values with square root normalization. #brks = [2.0, 7.0, 12.0, 18.0, 38.0, 43.0, 49.0, # 61.0, 66.0, 75.0, 89.0, 94.0, 99.0] brks = [3.0, 14.0, 19.0, 33.0, 38.0, 43.0, 49.0, 61.0, 66.0, 71.0, 83.0, 89.0, 94.0, 99.0] tads = test_chr.experiments['exp1'].tads found = [tads[t]['end'] for t in tads if tads[t]['score'] > 0] self.assertEqual(brks, found) items1 = test_chr.forbidden.keys(), test_chr.forbidden.values() test_chr.add_experiment('exp2', 20000, tad_def=exp3, hic_data=PATH + '/20Kb/chrT/chrT_C.tsv', silent=True) items2 = test_chr.forbidden.keys(), test_chr.forbidden.values() know1 = ([38, 39], ['Centromere', 'Centromere']) #know1 = ([32, 33, 34, 38, 39, 19, 20, 21, 22, # 23, 24, 25, 26, 27, 28, 29, 30, 31], # [None, None, None, 'Centromere', 'Centromere', # None, None, None, None, None, None, None, # None, None, None, None, None, None]) know2 = ([38], ['Centromere']) self.assertEqual(items1, know1) self.assertEqual(items2, know2) if CHKTIME: print '7', time() - t0
def tb_generate_tads(self, expt_name, adj_list, chrom, resolution, normalized, tad_file): """ Function to the predict TAD sites for a given resolution from the Hi-C matrix Parameters ---------- expt_name : str Location of the adjacency list matrix_file : str Location of the HDF5 output matrix file resolution : int Resolution to read the Hi-C adjacency list at tad_file : str Location of the output TAD file Returns ------- tad_file : str Location of the output TAD file """ # chr_hic_data = read_matrix(matrix_file, resolution=int(resolution)) print("TB TAD GENERATOR:", expt_name, adj_list, chrom, resolution, normalized, tad_file) hic_data = load_hic_data_from_reads(adj_list, resolution=int(resolution)) if normalized is False: hic_data.normalize_hic(iterations=9, max_dev=0.1) save_matrix_file = adj_list + "_" + str(chrom) + "_tmp.txt" hic_data.write_matrix(save_matrix_file, (chrom, chrom), normalized=True) chr_hic_data = hic_data.get_matrix((chrom, chrom)) print("TB - chr_hic_data:", chr_hic_data) my_chrom = Chromosome(name=chrom, centromere_search=True) my_chrom.add_experiment(expt_name, hic_data=save_matrix_file, resolution=int(resolution)) # Run core TADbit function to find TADs on each expt. my_chrom.find_tad(expt_name, n_cpus=15) exp = my_chrom.experiments[expt_name] exp.write_tad_borders(savedata=tad_file + ".tmp") with open(tad_file, "wb") as f_out: with open(tad_file + ".tmp", "rb") as f_in: f_out.write(f_in.read()) return True
def test_05_save_load(self): test_chr = Chromosome(name='Test Chromosome', experiment_tads=[exp1, exp2], experiment_names=['exp1', 'exp2'], experiment_resolutions=[20000,20000]) test_chr.save_chromosome('lolo', force=True) test_chr = load_chromosome('lolo') system('rm -f lolo') system('rm -f lolo_hic')
def test_13_3d_modelling_centroid(self): #model with no optimisation """ quick test to generate 3D coordinates from 3? simple models??? """ if ONLY and ONLY != '13': return if CHKTIME: t0 = time() try: __import__('IMP') except ImportError: warn('IMP not found, skipping test\n') return test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv', silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True) exp.filter_columns(silent=True) exp.normalize_hic(silent=True, factor=None) models = exp.model_region(51, 71, n_models=40, n_keep=25, n_cpus=4, config={ 'kforce': 5, 'maxdist': 500, 'scale': 0.01, 'upfreq': 1.0, 'lowfreq': -0.6 }) models.save_models('models.pick') avg = models.average_model() nmd = len(models) dev = rmsdRMSD_wrapper([models[m]['x'] for m in xrange(nmd)] + [avg['x']], [models[m]['y'] for m in xrange(nmd)] + [avg['y']], [models[m]['z'] for m in xrange(nmd)] + [avg['z']], models._zeros, models.nloci, 200, range(len(models) + 1), len(models) + 1, int(False), 'rmsd', 0) centroid = models[models.centroid_model()] # find closest model = min([(k, dev[(k, nmd)]) for k in range(nmd)], key=lambda x: x[1])[0] self.assertEqual(centroid['rand_init'], models[model]['rand_init']) if CHKTIME: print '13', time() - t0
def test_12_3d_modelling_optimization(self): """ quick test to generate 3D coordinates from 3? simple models??? """ if ONLY and not "12" in ONLY: return if CHKTIME: t0 = time() try: __import__("IMP") except ImportError: warn("IMP not found, skipping test\n") return test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv") exp = test_chr.experiments[0] exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv") exp.filter_columns(silent=True) exp.normalize_hic(silent=True, factor=None) result = exp.optimal_imp_parameters( 50, 70, n_cpus=4, n_models=8, n_keep=2, lowfreq_range=[-0.6], upfreq_range=(0, 1.1, 1.1), #from 0 till 1.1 in step of 1.1 with () maxdist_range=[500, 600], # it will use 500 and 600 with [] verbose=False) # get best correlations config = result.get_best_parameters_dict() #dict with parameters wanted = { "maxdist": 600.0, "upfreq": 0.0, "kforce": 5, "dcutoff": 2, "reference": "", "lowfreq": -0.6, "scale": 0.01 } self.assertEqual([ round(config[i], 4) for i in list(config.keys()) if not type(i) is str ], [ round(config[i], 4) for i in list(wanted.keys()) if not type(i) is str ]) if CHKTIME: print("12", time() - t0)
def _sub_experiment_zscore(self, start, end): """ Get the z-score of a sub-region of an experiment. TODO: find a nicer way to do this... :param start: first bin to model (bin number) :param end: first bin to model (bin number) :returns: z-score and raw values of the experiment """ if self._normalization != 'visibility': warn('WARNING: normalizing according to visibility method') self.normalize_hic(method='visibility') from pytadbit import Chromosome matrix = self.get_hic_matrix() end += 1 new_matrix = [[] for _ in range(end - start)] for i in xrange(start, end): for j in xrange(start, end): new_matrix[i - start].append(matrix[i][j]) tmp = Chromosome('tmp') tmp.add_experiment('exp1', hic_data=[new_matrix], resolution=self.resolution, filter_columns=False) exp = tmp.experiments[0] # We want the weights and zeros calculated in the full chromosome siz = self.size exp.norm = [[ self.norm[0][i + siz * j] for i in xrange(start, end) for j in xrange(start, end) ]] exp._zeros = dict([(z - start, None) for z in self._zeros if start <= z <= end]) if len(exp._zeros) == (end + 1 - start): raise Exception('ERROR: no interaction found in selected regions') # ... but the z-scores in this particular region exp.get_hic_zscores(remove_zeros=True) values = [[float('nan') for _ in xrange(exp.size)] for _ in xrange(exp.size)] for i in xrange(exp.size): # zeros are rows or columns having a zero in the diagonal if i in exp._zeros: continue for j in xrange(i + 1, exp.size): if j in exp._zeros: continue if (not exp.hic_data[0][i * exp.size + j] or not exp.hic_data[0][i * exp.size + j]): continue values[i][j] = exp.norm[0][i * exp.size + j] values[j][i] = exp.norm[0][i * exp.size + j] return exp._zscores, values
def test_09_hic_normalization(self): """ TODO: check with Davide's script """ test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data='20Kb/chrT/chrT_D.tsv') exp = test_chr.experiments[0] exp.load_experiment('20Kb/chrT/chrT_A.tsv') exp.get_hic_zscores() exp.get_hic_zscores(zscored=False)
def test_10_generate_weights(self): """ method names are: 'sqrt' or 'over_tot' """ test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data='20Kb/chrT/chrT_D.tsv') exp = test_chr.experiments[0] tadbit_weigths = exp.norm[:] exp.norm = None exp.normalize_hic() self.assertEqual(tadbit_weigths[0], exp.norm[0])
def test_06_tad_clustering(self): test_chr = Chromosome(name='Test Chromosome', experiment_tads=[exp4], experiment_names=['exp1'], experiment_hic_data=['20Kb/chrT/chrT_D.tsv'], experiment_resolutions=[20000,20000]) all_tads = [] for _, tad in test_chr.iter_tads('exp1'): all_tads.append(tad) align1, align2, _ = optimal_cmo(all_tads[7], all_tads[10], 7, method='score') self.assertEqual(align1, [0, 1, '-', 2, 3, '-', 4, 5, 6, 7, 8, 9, 10]) self.assertEqual(align2,[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
def test_04_chromosome_batch(self): test_chr = Chromosome(name='Test Chromosome', experiment_resolutions=[20000]*3, experiment_hic_data=['20Kb/chrT/chrT_A.tsv', '20Kb/chrT/chrT_D.tsv', '20Kb/chrT/chrT_C.tsv'], experiment_names=['exp1', 'exp2', 'exp3']) test_chr.find_tad(['exp1', 'exp2', 'exp3'], batch_mode=True, verbose=False) tads = test_chr.get_experiment('batch_exp1_exp2_exp3').tads found = [tads[t]['end'] for t in tads if tads[t]['score'] > 0] self.assertEqual([3.0, 8.0, 16.0, 21.0, 28.0, 35.0, 43.0, 49.0, 61.0, 66.0, 75.0, 89.0, 99.0], found)
def main(): """ main function """ n_pick = 4 n_tot = 10 test_chr = Chromosome(name='Test Chromosome') test_chr.add_experiment('exp1', 100000, xp_handler=PATH + 'HIC_gm06690_chr19_chr19_100000_obs.txt') test_chr.find_tad(['exp1']) real_tads = {} for i, t in enumerate(test_chr.iter_tads('exp1', normed=False)): real_tads[i] = test_chr.experiments['exp1'].tads[i] real_tads[i]['hic'] = t[1] global DISTRA global DISTRD DISTRA, DISTRD = get_hic_distr(real_tads) # pick some tads picked_tads = [] picked_keys = [] for i in xrange(n_pick): key, new_tad = get_random_tad(real_tads) while key in picked_keys or (new_tad['end'] - new_tad['start'] < 15): key, new_tad = get_random_tad(real_tads) picked_tads.append(new_tad) picked_keys.append(key) # mutate this tads tads = {} tad_matrices = [] tad_names = [] for i in xrange(n_pick): print i tads[uppercase[i] + '_' + str(0)] = picked_tads[i] tad_names.append(uppercase[i] + '_' + str(0)) for j in xrange(1, n_tot): hic, indels = generate_random_contacts( tad1=picked_tads[i]['hic'], prob=0.05, ext=int(random()*4) + 1, indel=int(random() * 4) + 1)[1:] # indels = '|'.join([str(n-1) if n>0 else '-' + str((abs(n)-1)) for n in indels]) tads[uppercase[i] + '_' + str(j)] = { 'hic' : hic, 'start': picked_tads[i]['start'], 'end' : picked_tads[i]['end']} tad_matrices.append(hic) tad_names.append(uppercase[i] + '_' + str(j)) distances, cci = get_distances(tad_matrices, max_num_v=4, n_cpus=mu.cpu_count()) results, clusters = pre_cluster(distances, cci, len(tad_matrices)) paint_clustering(results, clusters, len(tad_matrices), test_chr, tad_names, tad_matrices)
def test_05_save_load(self): if CHKTIME: t0 = time() test_chr1 = Chromosome(name='Test Chromosome', experiment_tads=[exp1, exp2], experiment_names=['exp1', 'exp2'], experiment_resolutions=[20000, 20000], silent=True) test_chr1.save_chromosome('lolo', force=True) test_chr2 = load_chromosome('lolo') system('rm -f lolo') system('rm -f lolo_hic') self.assertEqual(str(test_chr1.__dict__), str(test_chr2.__dict__)) if CHKTIME: print '5', time() - t0
def test_07_forbidden_regions(self): if ONLY and not "07" in ONLY: return if CHKTIME: t0 = time() test_chr = Chromosome( name="Test Chromosome", max_tad_size=260000, centromere_search=True, ) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True) # Values with square root normalization. #brks = [2.0, 7.0, 12.0, 18.0, 38.0, 43.0, 49.0, # 61.0, 66.0, 75.0, 89.0, 94.0, 99.0] brks = [ 3.0, 14.0, 19.0, 33.0, 38.0, 43.0, 49.0, 61.0, 66.0, 71.0, 83.0, 89.0, 94.0, 99.0 ] tads = test_chr.experiments["exp1"].tads found = [tads[t]["end"] for t in tads if tads[t]["score"] > 0] self.assertEqual(brks, found) items1 = list(test_chr.forbidden.keys()), list( test_chr.forbidden.values()) test_chr.add_experiment("exp2", 20000, tad_def=exp3, hic_data=PATH + "/20Kb/chrT/chrT_C.tsv", silent=True) items2 = list(test_chr.forbidden.keys()), list( test_chr.forbidden.values()) know1 = ([38, 39], ["Centromere", "Centromere"]) #know1 = ([32, 33, 34, 38, 39, 19, 20, 21, 22, # 23, 24, 25, 26, 27, 28, 29, 30, 31], # [None, None, None, "Centromere", "Centromere", # None, None, None, None, None, None, None, # None, None, None, None, None, None]) know2 = ([38], ["Centromere"]) self.assertEqual(items1, know1) self.assertEqual(items2, know2) if CHKTIME: print("7", time() - t0)