def main(): opts, params = get_options() if opts.inabc: zscores = parse_zscores(opts.inabc) models = generate_3d_models(zscores, opts.resolution, start=1, n_models=opts.nmodels, n_keep=opts.nkeep, n_cpus=opts.ncpus, keep_all=False, verbose=False, outfile=None, config=params) else: crm = 'crm' xnam = 'X' crmbit=Chromosome(crm) crmbit.add_experiment(xnam, resolution=opts.resolution, xp_handler=opts.incrm) exp = crmbit.experiments[xnam] models = exp.model_region(start=opts.start, end=opts.end, n_models=opts.nmodels, n_keep=opts.nkeep, n_cpus=opts.ncpus, keep_all=False, verbose=False, config=params) if opts.save: models.save_models('%s/models_%s_%s.pik' % (opts.out, opts.start, opts.start + opts.nmodels)) for i in xrange(int(opts.cmm)): models.write_cmm(i, opts.out) if opts.full_report: models.cluster_models(dcutoff=200) models.cluster_analysis_dendrogram(n_best_clusters=10) models.model_consistency()
def load_hic_data(opts, xnames): """ Load Hi-C data """ # Start reading the data crm = Chromosome(opts.crm, species=( opts.species.split('_')[0].capitalize() + opts.species.split('_')[1] if '_' in opts.species else opts.species), centromere_search=opts.centromere, assembly=opts.assembly) # Create chromosome object # Load three different experimental data sets named TR1, TR2 and BR. # Data obtained from Hou et al (2012) Molecular Cell. # doi:10.1016/j.molcel.2012.08.031 logging.info("\tReading input data...") for xnam, xpath, xnorm in zip(xnames, opts.data, opts.norm): crm.add_experiment( xnam, exp_type='Hi-C', enzyme=opts.enzyme, cell_type=opts.cell, identifier=opts.identifier, # general descriptive fields project=opts.project, # user descriptions resolution=opts.res, hic_data=xpath, norm_data=xnorm) if not xnorm: logging.info("\tNormalizing HiC data of %s..." % xnam) crm.experiments[xnam].normalize_hic(iterations=5) if opts.beg > crm.experiments[-1].size: raise Exception('ERROR: beg parameter is larger than chromosome size.') if opts.end > crm.experiments[-1].size: logging.info('WARNING: end parameter is larger than chromosome ' + 'size. Setting end to %s.\n' % (crm.experiments[-1].size * opts.res)) opts.end = crm.experiments[-1].size return crm
def test_08_changing_resolution(self): test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data='20Kb/chrT/chrT_D.tsv') exp = test_chr.experiments['exp1'] sum20 = sum(exp.hic_data[0]) exp.set_resolution(80000) sum80 = sum(exp.hic_data[0]) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(160000) sum160 = sum(exp.hic_data[0]) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(360000) sum360 = sum(exp.hic_data[0]) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(2400000) sum2400 = sum(exp.hic_data[0]) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(40000) sum40 = sum(exp.hic_data[0]) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(20000) sum21 = sum(exp.hic_data[0]) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(40000) sum41 = sum(exp.hic_data[0]) check_hic(exp.hic_data[0], exp.size) self.assertTrue(sum20 == sum80 == sum160 == sum360 == sum40 \ == sum21 == sum2400 == sum41)
def load_genome_from_tad_def(genome_path, res, verbose=False): """ Search, at a given path, for chromosome folders containing TAD definitions in tsv files. :param genome_path: Path where to search for TADbit chromosomes :param res: Resolution at were saved chromosomes :param False verbose: :returns: a dictionary with all TADbit chromosomes found """ ref_genome = {} for crm in listdir(genome_path): crm_path = os.path.join(genome_path, crm) if not isfile(crm_path): continue if crm in ref_genome: raise Exception('More than 1 TAD definition file found\n') crm = crm.replace('.tsv', '').replace('chr', '').upper() if verbose: print ' Chromosome:', crm crmO = Chromosome(crm) crmO.add_experiment('sample', res) crmO.experiments[0].load_tad_def(crm_path) ref_genome[crm] = crmO return ref_genome
def main(): """ main function """ opts = get_options() crm = Chromosome(':P') for i, data in enumerate(opts.data): crm.add_experiment('exp' + str(i), resolution=int(opts.resolution[i]), hic_data=data) crm.experiments['exp' + str(i)].normalize_hic() if len(opts.data) > 1: exp = crm.experiments[0] + crm.experiments[1] for i in range(2, len(opts.data)): exp += crm.experiments[i] else: exp = crm.experiments[0] if opts.abc: exp.write_interaction_pairs(opts.output, normalized=opts.norm, zscored=False) else: if type(opts.output) == file: out = opts.output else: out = open(opts.output, 'w') out.write(exp.print_hic_matrix(print_it=False, normalized=opts.norm))
def test_08_changing_resolution(self): if CHKTIME: t0 = time() test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv', silent=True) exp = test_chr.experiments['exp1'] sum20 = sum(exp.hic_data[0].values()) exp.set_resolution(80000) sum80 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(160000) sum160 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(360000) sum360 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(2400000) sum2400 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(40000) sum40 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(20000) sum21 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(40000) sum41 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) self.assertTrue(sum20 == sum80 == sum160 == sum360 == sum40 \ == sum21 == sum2400 == sum41) if CHKTIME: print '8', time() - t0
def test_12_3d_modelling_optimization(self): """ quick test to generate 3D coordinates from 3? simple models??? """ if CHKTIME: t0 = time() try: __import__('IMP') except ImportError: warn('IMP not found, skipping test\n') return test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv') exp = test_chr.experiments[0] exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv') exp.filter_columns(silent=True) exp.normalize_hic(silent=True, factor=None) result = exp.optimal_imp_parameters(50, 70, n_cpus=4, n_models=8, n_keep=2, lowfreq_range=[-0.6], upfreq_range=(0, 1.1, 1.1), maxdist_range=[500, 600], verbose=False) # get best correlations config = result.get_best_parameters_dict() wanted = {'maxdist': 600.0, 'upfreq': 0.0, 'kforce': 5, 'dcutoff': 2, 'reference': '', 'lowfreq': -0.6, 'scale': 0.01} self.assertEqual([round(i, 4) for i in config.values()if not type(i) is str], [round(i, 4) for i in wanted.values()if not type(i) is str]) if CHKTIME: print '12', time() - t0
def test_08_changing_resolution(self): if ONLY and ONLY != "08": return if CHKTIME: t0 = time() test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True) exp = test_chr.experiments["exp1"] sum20 = sum(exp.hic_data[0].values()) exp.set_resolution(80000) sum80 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(160000) sum160 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(360000) sum360 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(2400000) sum2400 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(40000) sum40 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(20000) sum21 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(40000) sum41 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) self.assertTrue(sum20 == sum80 == sum160 == sum360 == sum40 == sum21 == sum2400 == sum41) if CHKTIME: print "8", time() - t0
def main(): """ main function """ # retieve HOX genes distmatrix, geneids = get_genes() # compute TADs for human chromosome 19 test_chr = Chromosome(name='Test Chromosome') test_chr.add_experiment('exp1', 100000, xp_handler=PATH + 'HIC_gm06690_chr19_chr19_100000_obs.txt') test_chr.find_tad(['exp1']) exp = test_chr.experiments['exp1'] clust = linkage(distmatrix['19']) cl_idx = list(fcluster(clust, t=1, criterion='inconsistent')) print max(cl_idx), 'clusters' cluster = [[] for _ in xrange(1, max(cl_idx) + 1)] for i, j in enumerate(cl_idx): cluster[j - 1].append(geneids['19'][i][1]) for i, _ in enumerate(cluster): cluster[i] = min(cluster[i]), max(cluster[i]) tad_breaker(exp.tads, cluster, exp.resolution, show_plot=True, bins=5, title='Proportion of HOX genes according to position in a TAD')
def test_09_hic_normalization(self): """ writes interaction pair file. """ if ONLY and not "09" in ONLY: return if CHKTIME: t0 = time() test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True) exp.normalize_hic(silent=True) exp.get_hic_zscores() exp.get_hic_zscores(zscored=False) sumz = sum([ exp._zscores[k1][k2] for k1 in exp._zscores.keys() for k2 in exp._zscores[k1] ]) self.assertEqual(round(sumz, 4), round(4059.2877, 4)) if CHKTIME: print "9", time() - t0
def main(): matrix_path = sys.argv[1] config_string = sys.argv[2] compute_keep = sys.argv[3] uf, lf, md = config_string.split(':') lf = float(lf) uf = float(uf) md = int (md) config = {'reference' : '', 'kforce' : 5, 'maxdist' : md, 'upfreq' : uf, 'lowfreq' : lf, 'scale' : 0.01, 'kbending' : 0.0, } compute, keep = map(int, compute_keep.split(':')) chrom = Chromosome('chr') chrom.add_experiment('sample', norm_data=matrix_path, resolution=15000) exp = chrom.experiments[0] models = exp.model_region(n_models=compute, n_keep=keep, n_cpus=8, config=config) models.save_models('models_%s.pickle' % (config_string))
def test_11_write_interaction_pairs(self): if ONLY and not "11" in ONLY: return """ writes interaction pair file. """ if CHKTIME: t0 = time() test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv") exp = test_chr.experiments[0] exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True) exp.filter_columns(silent=True) exp.normalize_hic(factor=None, silent=True) exp.get_hic_zscores(zscored=False) exp.write_interaction_pairs("lala") lines = open("lala").readlines() self.assertEqual(len(lines), 4674) self.assertEqual(lines[25], "1\t28\t0.612332461036\n") self.assertEqual(lines[2000], "26\t70\t0.0738742984321\n") system("rm -f lala") if CHKTIME: print "11", time() - t0
def test_11_write_interaction_pairs(self): if ONLY and ONLY != '11': return """ writes interaction pair file. """ if CHKTIME: t0 = time() test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv') exp = test_chr.experiments[0] exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True) exp.filter_columns(silent=True) exp.normalize_hic(factor=None, silent=True) exp.get_hic_zscores(zscored=False) exp.write_interaction_pairs('lala') lines = open('lala').readlines() self.assertEqual(len(lines), 4674) self.assertEqual(lines[25], '1\t28\t0.612332461036\n') self.assertEqual(lines[2000], '26\t70\t0.0738742984321\n') system('rm -f lala') if CHKTIME: print '11', time() - t0
def test_11_write_interaction_pairs(self): if ONLY and not "11" in ONLY: return """ writes interaction pair file. """ if CHKTIME: t0 = time() test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv") exp = test_chr.experiments[0] exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True) exp.filter_columns(silent=True) exp.normalize_hic(factor=1, silent=True) exp.get_hic_zscores(zscored=False) exp.write_interaction_pairs("lala") with open("lala") as f_lala: lines = f_lala.readlines() self.assertEqual(len(lines), 4674) self.assertAlmostEqual(float(lines[25].split('\t')[2]), 0.5852295196345679) self.assertAlmostEqual(float(lines[2000].split('\t')[2]), 0.07060448846960976) system("rm -f lala") if CHKTIME: print("11", time() - t0)
def test_13_3d_modelling_centroid(self): """ quick test to generate 3D coordinates from 3? simple models??? """ if ONLY and "13" not in ONLY: return if CHKTIME: t0 = time() try: __import__("IMP") except ImportError: warn("IMP not found, skipping test\n") return test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True) exp.filter_columns(silent=True) exp.normalize_hic(silent=True, factor=None) models = exp.model_region( 51, 71, ncopies=4, n_models=10, n_keep=10, n_cpus=10, # verbose=3, config={"kforce": 5, "maxdist": 500, "scale": 0.01, "upfreq": 0.5, "lowfreq": -0.5}, ) models.save_models("models.pick") avg = models.average_model() nmd = len(models) print "I'm here test 13"
def test_07_forbidden_regions(self): if ONLY and ONLY != '07': return if CHKTIME: t0 = time() test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000, centromere_search=True,) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv', silent=True) # Values with square root normalization. #brks = [2.0, 7.0, 12.0, 18.0, 38.0, 43.0, 49.0, # 61.0, 66.0, 75.0, 89.0, 94.0, 99.0] brks = [3.0, 14.0, 19.0, 33.0, 38.0, 43.0, 49.0, 61.0, 66.0, 71.0, 83.0, 89.0, 94.0, 99.0] tads = test_chr.experiments['exp1'].tads found = [tads[t]['end'] for t in tads if tads[t]['score'] > 0] self.assertEqual(brks, found) items1 = test_chr.forbidden.keys(), test_chr.forbidden.values() test_chr.add_experiment('exp2', 20000, tad_def=exp3, hic_data=PATH + '/20Kb/chrT/chrT_C.tsv', silent=True) items2 = test_chr.forbidden.keys(), test_chr.forbidden.values() know1 = ([38, 39], ['Centromere', 'Centromere']) #know1 = ([32, 33, 34, 38, 39, 19, 20, 21, 22, # 23, 24, 25, 26, 27, 28, 29, 30, 31], # [None, None, None, 'Centromere', 'Centromere', # None, None, None, None, None, None, None, # None, None, None, None, None, None]) know2 = ([38], ['Centromere']) self.assertEqual(items1, know1) self.assertEqual(items2, know2) if CHKTIME: print '7', time() - t0
def test_07_forbidden_regions(self): if ONLY and ONLY != "07": return if CHKTIME: t0 = time() test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000, centromere_search=True) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True) # Values with square root normalization. # brks = [2.0, 7.0, 12.0, 18.0, 38.0, 43.0, 49.0, # 61.0, 66.0, 75.0, 89.0, 94.0, 99.0] brks = [3.0, 14.0, 19.0, 33.0, 38.0, 43.0, 49.0, 61.0, 66.0, 71.0, 83.0, 89.0, 94.0, 99.0] tads = test_chr.experiments["exp1"].tads found = [tads[t]["end"] for t in tads if tads[t]["score"] > 0] self.assertEqual(brks, found) items1 = test_chr.forbidden.keys(), test_chr.forbidden.values() test_chr.add_experiment("exp2", 20000, tad_def=exp3, hic_data=PATH + "/20Kb/chrT/chrT_C.tsv", silent=True) items2 = test_chr.forbidden.keys(), test_chr.forbidden.values() know1 = ([38, 39], ["Centromere", "Centromere"]) # know1 = ([32, 33, 34, 38, 39, 19, 20, 21, 22, # 23, 24, 25, 26, 27, 28, 29, 30, 31], # [None, None, None, 'Centromere', 'Centromere', # None, None, None, None, None, None, None, # None, None, None, None, None, None]) know2 = ([38], ["Centromere"]) self.assertEqual(items1, know1) self.assertEqual(items2, know2) if CHKTIME: print "7", time() - t0
def tad_clustering(self): test_chr = Chromosome(name="Test Chromosome", resolution=20000) test_chr.add_experiment("chrT/chrT_A.tsv", name="exp1") test_chr.find_TAD(["exp1"]) all_tads = list(test_chr.iter_tads("exp1")) align1, align2 = optimal_cmo(all_tads[4], all_tads[8], 9) self.assertEqual(align1, [1, 2, "-", "-", "-", "-", 3, "-", 4, 5, 6, "-", 7, 8]) self.assertEqual(align2, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
def test_12_3d_modelling_optimization(self): """ quick test to generate 3D coordinates from 3? simple models??? """ if ONLY and "12" not in ONLY: return if CHKTIME: t0 = time() try: __import__("IMP") except ImportError: warn("IMP not found, skipping test\n") return test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment( "exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv" ) # norm_data para dar directamente la matrix normalizada exp = test_chr.experiments[0] exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv") exp.filter_columns(silent=True) exp.normalize_hic(silent=True, factor=None) result = exp.optimal_imp_parameters( 50, 70, ncopies=4, n_cpus=1, # It can be that this function requires also the raw hic_data matrix n_models=8, n_keep=2, lowfreq_range=[-0.6], upfreq_range=(0, 1.1, 1.1), maxdist_range=[500, 600], verbose=True, ) # get best correlations config = result.get_best_parameters_dict() # Save the models and the contact map # result.save_model or result.save_data # result.write_cmm to visualize the best models # result.write_xyz to visualize the best models wanted = { "maxdist": 600.0, "upfreq": 0.0, "kforce": 5, "dcutoff": 2, "reference": "", "lowfreq": -0.6, "scale": 0.01, } self.assertEqual( [round(i, 4) for i in config.values() if not type(i) is str], [round(i, 4) for i in wanted.values() if not type(i) is str], ) if CHKTIME: print "12", time() - t0
def tb_generate_tads(self, expt_name, adj_list, chrom, resolution, normalized, tad_file): """ Function to the predict TAD sites for a given resolution from the Hi-C matrix Parameters ---------- expt_name : str Location of the adjacency list matrix_file : str Location of the HDF5 output matrix file resolution : int Resolution to read the Hi-C adjacency list at tad_file : str Location of the output TAD file Returns ------- tad_file : str Location of the output TAD file """ # chr_hic_data = read_matrix(matrix_file, resolution=int(resolution)) print("TB TAD GENERATOR:", expt_name, adj_list, chrom, resolution, normalized, tad_file) hic_data = load_hic_data_from_reads(adj_list, resolution=int(resolution)) if normalized is False: hic_data.normalize_hic(iterations=9, max_dev=0.1) save_matrix_file = adj_list + "_" + str(chrom) + "_tmp.txt" hic_data.write_matrix(save_matrix_file, (chrom, chrom), normalized=True) chr_hic_data = hic_data.get_matrix((chrom, chrom)) print("TB - chr_hic_data:", chr_hic_data) my_chrom = Chromosome(name=chrom, centromere_search=True) my_chrom.add_experiment(expt_name, hic_data=save_matrix_file, resolution=int(resolution)) # Run core TADbit function to find TADs on each expt. my_chrom.find_tad(expt_name, n_cpus=15) exp = my_chrom.experiments[expt_name] exp.write_tad_borders(savedata=tad_file + ".tmp") with open(tad_file, "wb") as f_out: with open(tad_file + ".tmp", "rb") as f_in: f_out.write(f_in.read()) return True
def test_12_3d_modelling_optimization(self): """ quick test to generate 3D coordinates from 3? simple models??? """ if ONLY and not "12" in ONLY: return if CHKTIME: t0 = time() try: __import__("IMP") except ImportError: warn("IMP not found, skipping test\n") return test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv") exp = test_chr.experiments[0] exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv") exp.filter_columns(silent=True) exp.normalize_hic(silent=True, factor=None) result = exp.optimal_imp_parameters( 50, 70, n_cpus=4, n_models=8, n_keep=2, lowfreq_range=[-0.6], upfreq_range=(0, 1.1, 1.1), #from 0 till 1.1 in step of 1.1 with () maxdist_range=[500, 600], # it will use 500 and 600 with [] verbose=False) # get best correlations config = result.get_best_parameters_dict() #dict with parameters wanted = { "maxdist": 600.0, "upfreq": 0.0, "kforce": 5, "dcutoff": 2, "reference": "", "lowfreq": -0.6, "scale": 0.01 } self.assertEqual([ round(config[i], 4) for i in list(config.keys()) if not type(i) is str ], [ round(config[i], 4) for i in list(wanted.keys()) if not type(i) is str ]) if CHKTIME: print("12", time() - t0)
def test_13_3d_modelling_centroid(self): #model with no optimisation """ quick test to generate 3D coordinates from 3? simple models??? """ if ONLY and ONLY != '13': return if CHKTIME: t0 = time() try: __import__('IMP') except ImportError: warn('IMP not found, skipping test\n') return test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv', silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True) exp.filter_columns(silent=True) exp.normalize_hic(silent=True, factor=None) models = exp.model_region(51, 71, n_models=40, n_keep=25, n_cpus=4, config={ 'kforce': 5, 'maxdist': 500, 'scale': 0.01, 'upfreq': 1.0, 'lowfreq': -0.6 }) models.save_models('models.pick') avg = models.average_model() nmd = len(models) dev = rmsdRMSD_wrapper([models[m]['x'] for m in xrange(nmd)] + [avg['x']], [models[m]['y'] for m in xrange(nmd)] + [avg['y']], [models[m]['z'] for m in xrange(nmd)] + [avg['z']], models._zeros, models.nloci, 200, range(len(models) + 1), len(models) + 1, int(False), 'rmsd', 0) centroid = models[models.centroid_model()] # find closest model = min([(k, dev[(k, nmd)]) for k in range(nmd)], key=lambda x: x[1])[0] self.assertEqual(centroid['rand_init'], models[model]['rand_init']) if CHKTIME: print '13', time() - t0
def _sub_experiment_zscore(self, start, end): """ Get the z-score of a sub-region of an experiment. TODO: find a nicer way to do this... :param start: first bin to model (bin number) :param end: first bin to model (bin number) :returns: z-score and raw values of the experiment """ if self._normalization != 'visibility': warn('WARNING: normalizing according to visibility method') self.normalize_hic(method='visibility') from pytadbit import Chromosome matrix = self.get_hic_matrix() end += 1 new_matrix = [[] for _ in range(end - start)] for i in xrange(start, end): for j in xrange(start, end): new_matrix[i - start].append(matrix[i][j]) tmp = Chromosome('tmp') tmp.add_experiment('exp1', hic_data=[new_matrix], resolution=self.resolution, filter_columns=False) exp = tmp.experiments[0] # We want the weights and zeros calculated in the full chromosome siz = self.size exp.norm = [[ self.norm[0][i + siz * j] for i in xrange(start, end) for j in xrange(start, end) ]] exp._zeros = dict([(z - start, None) for z in self._zeros if start <= z <= end]) if len(exp._zeros) == (end + 1 - start): raise Exception('ERROR: no interaction found in selected regions') # ... but the z-scores in this particular region exp.get_hic_zscores(remove_zeros=True) values = [[float('nan') for _ in xrange(exp.size)] for _ in xrange(exp.size)] for i in xrange(exp.size): # zeros are rows or columns having a zero in the diagonal if i in exp._zeros: continue for j in xrange(i + 1, exp.size): if j in exp._zeros: continue if (not exp.hic_data[0][i * exp.size + j] or not exp.hic_data[0][i * exp.size + j]): continue values[i][j] = exp.norm[0][i * exp.size + j] values[j][i] = exp.norm[0][i * exp.size + j] return exp._zscores, values
def test_09_hic_normalization(self): """ TODO: check with Davide's script """ test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data='20Kb/chrT/chrT_D.tsv') exp = test_chr.experiments[0] exp.load_experiment('20Kb/chrT/chrT_A.tsv') exp.get_hic_zscores() exp.get_hic_zscores(zscored=False)
def test_10_generate_weights(self): """ method names are: 'sqrt' or 'over_tot' """ test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data='20Kb/chrT/chrT_D.tsv') exp = test_chr.experiments[0] tadbit_weigths = exp.norm[:] exp.norm = None exp.normalize_hic() self.assertEqual(tadbit_weigths[0], exp.norm[0])
def test_10_generate_weights(self): """ TODO: using Francois' formula method names are: 'sqrt' or 'over_tot' """ test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_handler=exp4, xp_handler='20Kb/chrT/chrT_D.tsv') exp = test_chr.experiments[0] tadbit_weigths = exp.wght[:] exp.wght = None exp.normalize_hic() self.assertEqual(tadbit_weigths[0], exp.wght[0])
def _sub_experiment_zscore(self, start, end): """ Get the z-score of a sub-region of an experiment. TODO: find a nicer way to do this... :param start: first bin to model (bin number) :param end: first bin to model (bin number) :returns: z-score and raw values of the experiment """ if self._normalization != 'visibility': warn('WARNING: normalizing according to visibility method') self.normalize_hic(method='visibility') from pytadbit import Chromosome matrix = self.get_hic_matrix() end += 1 new_matrix = [[] for _ in range(end-start)] for i in xrange(start, end): for j in xrange(start, end): new_matrix[i - start].append(matrix[i][j]) tmp = Chromosome('tmp') tmp.add_experiment('exp1', hic_data=[new_matrix], resolution=self.resolution, filter_columns=False) exp = tmp.experiments[0] # We want the weights and zeros calculated in the full chromosome siz = self.size exp.norm = [[self.norm[0][i + siz * j] for i in xrange(start, end) for j in xrange(start, end)]] exp._zeros = dict([(z - start, None) for z in self._zeros if start <= z <= end]) if len(exp._zeros) == (end + 1 - start): raise Exception('ERROR: no interaction found in selected regions') # ... but the z-scores in this particular region exp.get_hic_zscores(remove_zeros=True) values = [[float('nan') for _ in xrange(exp.size)] for _ in xrange(exp.size)] for i in xrange(exp.size): # zeros are rows or columns having a zero in the diagonal if i in exp._zeros: continue for j in xrange(i + 1, exp.size): if j in exp._zeros: continue if (not exp.hic_data[0][i * exp.size + j] or not exp.hic_data[0][i * exp.size + j]): continue values[i][j] = exp.norm[0][i * exp.size + j] values[j][i] = exp.norm[0][i * exp.size + j] return exp._zscores, values
def main(): """ main function """ n_pick = 4 n_tot = 10 test_chr = Chromosome(name='Test Chromosome') test_chr.add_experiment('exp1', 100000, xp_handler=PATH + 'HIC_gm06690_chr19_chr19_100000_obs.txt') test_chr.find_tad(['exp1']) real_tads = {} for i, t in enumerate(test_chr.iter_tads('exp1', normed=False)): real_tads[i] = test_chr.experiments['exp1'].tads[i] real_tads[i]['hic'] = t[1] global DISTRA global DISTRD DISTRA, DISTRD = get_hic_distr(real_tads) # pick some tads picked_tads = [] picked_keys = [] for i in xrange(n_pick): key, new_tad = get_random_tad(real_tads) while key in picked_keys or (new_tad['end'] - new_tad['start'] < 15): key, new_tad = get_random_tad(real_tads) picked_tads.append(new_tad) picked_keys.append(key) # mutate this tads tads = {} tad_matrices = [] tad_names = [] for i in xrange(n_pick): print i tads[uppercase[i] + '_' + str(0)] = picked_tads[i] tad_names.append(uppercase[i] + '_' + str(0)) for j in xrange(1, n_tot): hic, indels = generate_random_contacts( tad1=picked_tads[i]['hic'], prob=0.05, ext=int(random()*4) + 1, indel=int(random() * 4) + 1)[1:] # indels = '|'.join([str(n-1) if n>0 else '-' + str((abs(n)-1)) for n in indels]) tads[uppercase[i] + '_' + str(j)] = { 'hic' : hic, 'start': picked_tads[i]['start'], 'end' : picked_tads[i]['end']} tad_matrices.append(hic) tad_names.append(uppercase[i] + '_' + str(j)) distances, cci = get_distances(tad_matrices, max_num_v=4, n_cpus=mu.cpu_count()) results, clusters = pre_cluster(distances, cci, len(tad_matrices)) paint_clustering(results, clusters, len(tad_matrices), test_chr, tad_names, tad_matrices)
def load_experiments(opts): crm = Chromosome(opts.crm) for i, xpr in enumerate(opts.hic_files): if opts.exp_names: name = opts.exp_names[i] else: name = ''.join(xpr.split('/')[-1].split('.')[:-1]) if opts.verbose: print ' Reading Hi-C datafile #%s (%s)' % (i+1, name) crm.add_experiment(name, hic_data=xpr, resolution=int(opts.resolution)) if opts.verbose: print ' loaded as: %s\n' % (crm.experiments[name]) return crm
def test_07_forbidden_regions(self): if ONLY and not "07" in ONLY: return if CHKTIME: t0 = time() test_chr = Chromosome( name="Test Chromosome", max_tad_size=260000, centromere_search=True, ) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True) # Values with square root normalization. #brks = [2.0, 7.0, 12.0, 18.0, 38.0, 43.0, 49.0, # 61.0, 66.0, 75.0, 89.0, 94.0, 99.0] brks = [ 3.0, 14.0, 19.0, 33.0, 38.0, 43.0, 49.0, 61.0, 66.0, 71.0, 83.0, 89.0, 94.0, 99.0 ] tads = test_chr.experiments["exp1"].tads found = [tads[t]["end"] for t in tads if tads[t]["score"] > 0] self.assertEqual(brks, found) items1 = list(test_chr.forbidden.keys()), list( test_chr.forbidden.values()) test_chr.add_experiment("exp2", 20000, tad_def=exp3, hic_data=PATH + "/20Kb/chrT/chrT_C.tsv", silent=True) items2 = list(test_chr.forbidden.keys()), list( test_chr.forbidden.values()) know1 = ([38, 39], ["Centromere", "Centromere"]) #know1 = ([32, 33, 34, 38, 39, 19, 20, 21, 22, # 23, 24, 25, 26, 27, 28, 29, 30, 31], # [None, None, None, "Centromere", "Centromere", # None, None, None, None, None, None, None, # None, None, None, None, None, None]) know2 = ([38], ["Centromere"]) self.assertEqual(items1, know1) self.assertEqual(items2, know2) if CHKTIME: print("7", time() - t0)
def test_13_3d_modelling_centroid(self): """ quick test to generate 3D coordinates from 3? simple models??? """ if ONLY and ONLY != '13': return if CHKTIME: t0 = time() try: __import__('IMP') except ImportError: warn('IMP not found, skipping test\n') return test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv', silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True) exp.filter_columns(silent=True) exp.normalize_hic(silent=True, factor=None) models = exp.model_region(51, 71, n_models=40, n_keep=25, n_cpus=4, config={'kforce': 5, 'maxdist': 500, 'scale': 0.01, 'upfreq': 1.0, 'lowfreq': -0.6}) models.save_models('models.pick') avg = models.average_model() nmd = len(models) dev = rmsdRMSD_wrapper( [models[m]['x'] for m in xrange(nmd)] + [avg['x']], [models[m]['y'] for m in xrange(nmd)] + [avg['y']], [models[m]['z'] for m in xrange(nmd)] + [avg['z']], models._zeros, models.nloci, 200, range(len(models)+1), len(models)+1, int(False), 'rmsd', 0) centroid = models[models.centroid_model()] # find closest model = min([(k, dev[(k, nmd)] ) for k in range(nmd)], key=lambda x: x[1])[0] self.assertEqual(centroid['rand_init'], models[model]['rand_init']) if CHKTIME: print '13', time() - t0
def test_10_compartments(self): """ """ if ONLY and ONLY != "10": return if CHKTIME: t0 = time() test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True) hic_data = exp.hic_data[0] hic_data.find_compartments(label_compartments="cluster") self.assertEqual(len(hic_data.compartments[None]), 39) # self.assertEqual(round(hic_data.compartments[None][24]['dens'], 5), # 0.75434) if CHKTIME: print "10", time() - t0
def test_10_generate_weights(self): """ """ if CHKTIME: t0 = time() test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv', silent=True) exp = test_chr.experiments[0] tadbit_weights = exp.norm[:] exp.norm = None exp.normalize_hic() self.assertEqual([round(i, 3) for i in tadbit_weights[0][:100]], [round(i, 3) for i in exp.norm[0][:100]]) if CHKTIME: print '10', time() - t0
def test_13_3d_modelling_centroid(self): """ quick test to generate 3D coordinates from 3? simple models??? """ if CHKTIME: t0 = time() try: __import__('IMP') except ImportError: warn('IMP not found, skipping test\n') return test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv', silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True) exp.normalize_hic(silent=True) models = exp.model_region(51, 71, n_models=110, n_keep=25, n_cpus=4, config={'kforce': 5, 'maxdist': 500, 'scale': 0.01, 'upfreq': 1.0, 'lowfreq': -0.6}) models.save_models('models.pick') avg = models.average_model() a = rmsdRMSD_wrapper([models[m]['x'] for m in xrange(len(models))] + [avg['x']], [models[m]['y'] for m in xrange(len(models))] + [avg['y']], [models[m]['z'] for m in xrange(len(models))] + [avg['z']], models.nloci, 410, range(len(models)+1), len(models)+1, int(False), 'score', 1) self.assertEqual(21, sorted([(k, sum([a[(i, j)] for i, j in a if i==k or j==k])) for k in range(26)], key=lambda x: x[1])[-1][0]) centroid = models[models.centroid_model()] expsc = sum([sum([a[(i, j)] for i, j in a if i==k or j==k]) for k in range(26)]) / 26 # find closest model = min([(k, sum([a[(i, j)] for i, j in a if i==k or j==k])) for k in range(26)], key=lambda x:abs(x[1]-expsc))[0] self.assertEqual(centroid['rand_init'], models[model]['rand_init']) if CHKTIME: print '13', time() - t0
def main(): opts, params = get_options() if opts.inabc: zscores = parse_zscores(opts.inabc) models = generate_3d_models(zscores, opts.resolution, start=1, n_models=opts.nmodels, n_keep=opts.nkeep, n_cpus=opts.ncpus, keep_all=False, verbose=False, outfile=None, config=params) else: crm = 'crm' xnam = 'X' crmbit = Chromosome(crm) crmbit.add_experiment(xnam, resolution=opts.resolution, xp_handler=opts.incrm) exp = crmbit.experiments[xnam] models = exp.model_region(start=opts.start, end=opts.end, n_models=opts.nmodels, n_keep=opts.nkeep, n_cpus=opts.ncpus, keep_all=False, verbose=False, config=params) if opts.save: models.save_models('%s/models_%s_%s.pik' % (opts.out, opts.start, opts.start + opts.nmodels)) for i in xrange(int(opts.cmm)): models.write_cmm(i, opts.out) if opts.full_report: models.cluster_models(dcutoff=200) models.cluster_analysis_dendrogram(n_best_clusters=10) models.model_consistency()
def main(): test_chr = Chromosome(name='Test Chromosome') test_chr.add_experiment('exp1', 100000, xp_handler=PATH + 'HIC_k562_chr19_chr19_100000_obs.txt') test_chr.find_tad(['exp1']) tad_names = [] tad_matrices = [] for name, matrix in test_chr.iter_tads('exp1'): if test_chr.experiments['exp1'].tads[name]['score'] < 0: continue if (test_chr.experiments['exp1'].tads[name]['end'] - test_chr.experiments['exp1'].tads[name]['start']) < 10: continue tad_names.append(name) tad_matrices.append(matrix) num = len(tad_names) distances, cci = get_distances(tad_matrices, max_num_v=mu.cpu_count()) results, clusters = pre_cluster(distances, cci, num) paint_clustering(results, clusters, num, test_chr, tad_names)
def load_hic_data(opts): """ Load Hi-C data """ # Start reading the data crm = Chromosome(opts.crm) # Create chromosome object crm.add_experiment('test', exp_type='Hi-C', resolution=opts.reso, norm_data=opts.matrix) # TODO: if not bad columns:... crm.experiments[-1].filter_columns(perc_zero=opts.perc_zero) if opts.beg > crm.experiments[-1].size: raise Exception('ERROR: beg parameter is larger than chromosome size.') if opts.end > crm.experiments[-1].size: print ('WARNING: end parameter is larger than chromosome ' + 'size. Setting end to %s.\n' % (crm.experiments[-1].size * opts.reso)) opts.end = crm.experiments[-1].size return crm
def test_09_hic_normalization(self): """ writes interaction pair file. """ if CHKTIME: t0 = time() test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv', silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True) exp.get_hic_zscores() exp.get_hic_zscores(zscored=False) sumz = sum([exp._zscores[k1][k2] for k1 in exp._zscores.keys() for k2 in exp._zscores[k1]]) self.assertEqual(round(sumz, 4), round(3993.7842, 4)) if CHKTIME: print '9', time() - t0
def test_09_hic_normalization(self): """ writes interaction pair file. """ if ONLY and ONLY != "09": return if CHKTIME: t0 = time() test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True) exp.normalize_hic(silent=True) exp.get_hic_zscores() exp.get_hic_zscores(zscored=False) sumz = sum([exp._zscores[k1][k2] for k1 in exp._zscores.keys() for k2 in exp._zscores[k1]]) self.assertEqual(round(sumz, 4), round(4059.2877, 4)) if CHKTIME: print "9", time() - t0
def test_10_compartments(self): """ """ if ONLY and not "10" in ONLY: return if CHKTIME: t0 = time() test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True) hic_data = exp.hic_data[0] hic_data.find_compartments(label_compartments="cluster") self.assertEqual(len(hic_data.compartments[None]), 39) # self.assertEqual(round(hic_data.compartments[None][24]["dens"], 5), # 0.75434) if CHKTIME: print "10", time() - t0
def test_10_compartments(self): """ """ if ONLY and ONLY != '10': return if CHKTIME: t0 = time() test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv', silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True) hic_data = exp.hic_data[0] hic_data.find_compartments(label_compartments='cluster') self.assertEqual(len(hic_data.compartments[None]), 39) # self.assertEqual(round(hic_data.compartments[None][24]['dens'], 5), # 0.75434) if CHKTIME: print '10', time() - t0
def test_07_forbidden_regions(self): test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data='20Kb/chrT/chrT_D.tsv') brks = [2.0, 7.0, 12.0, 18.0, 49.0, 61.0, 66.0, 75.0, 89.0, 94.0, 99.0] tads = test_chr.experiments['exp1'].tads found = [tads[t]['end'] for t in tads if tads[t]['score'] > 0] self.assertEqual(brks, found) items1 = test_chr.forbidden.keys(), test_chr.forbidden.values() test_chr.add_experiment('exp2', 20000, tad_def=exp3, hic_data='20Kb/chrT/chrT_C.tsv') items2 = test_chr.forbidden.keys(), test_chr.forbidden.values() know1 = ([32, 33, 34, 38, 39, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [None, None, None, 'Centromere', 'Centromere', None, None, None, None, None, None, None, None, None, None, None, None, None]) know2 = ([38], ['Centromere']) self.assertEqual(items1, know1) self.assertEqual(items2, know2)
def load_hic_data(opts): """ Load Hi-C data """ # Start reading the data crm = Chromosome(opts.crm) # Create chromosome object crm.add_experiment('test', exp_type='Hi-C', resolution=opts.reso, norm_data=opts.matrix) # TODO: if not bad columns:... crm.experiments[-1].filter_columns(perc_zero=opts.perc_zero) if opts.beg > crm.experiments[-1].size: raise Exception('ERROR: beg parameter is larger than chromosome size.') if opts.end > crm.experiments[-1].size: print( 'WARNING: end parameter is larger than chromosome ' + 'size. Setting end to %s.\n' % (crm.experiments[-1].size * opts.reso)) opts.end = crm.experiments[-1].size return crm
def load_hic_data(opts): """ Load Hi-C data """ # Start reading the data crm = Chromosome(opts.crm) # Create chromosome object print ' o Loading Hi-C matrix' try: hic = optimal_reader(open(opts.matrix), normalized=True, resolution=opts.reso) crm.add_experiment('test', exp_type='Hi-C', resolution=opts.reso, norm_data=hic) except Exception, e: print str(e) warn('WARNING: failed to load data as TADbit standardized matrix\n') crm.add_experiment('test', exp_type='Hi-C', resolution=opts.reso, norm_data=opts.matrix)
def test_08_changing_resolution(self): if ONLY and not "08" in ONLY: return if CHKTIME: t0 = time() test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000) test_chr.add_experiment("exp1", 20000, tad_def=exp4, hic_data=PATH + "/20Kb/chrT/chrT_D.tsv", silent=True) exp = test_chr.experiments["exp1"] sum20 = sum(exp.hic_data[0].values()) exp.set_resolution(80000) sum80 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(160000) sum160 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(360000) sum360 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(2400000) sum2400 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(40000) sum40 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(20000) sum21 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) exp.set_resolution(40000) sum41 = sum(exp.hic_data[0].values()) check_hic(exp.hic_data[0], exp.size) self.assertTrue(sum20 == sum80 == sum160 == sum360 == sum40 \ == sum21 == sum2400 == sum41) if CHKTIME: print "8", time() - t0
def test_11_write_interaction_pairs(self): """ writes interaction pair file. """ if CHKTIME: t0 = time() test_chr = Chromosome(name='Test Chromosome', max_tad_size=260000) test_chr.add_experiment('exp1', 20000, tad_def=exp4, hic_data=PATH + '/20Kb/chrT/chrT_D.tsv', silent=True) exp = test_chr.experiments[0] exp.load_hic_data(PATH + '/20Kb/chrT/chrT_A.tsv', silent=True) exp.get_hic_zscores(zscored=False) exp.write_interaction_pairs('lala') lines = open('lala').readlines() self.assertEqual(len(lines), 4851) self.assertEqual(lines[25], '1\t28\t0.933380667098\n') self.assertEqual(lines[2000], '24\t100\t0.233201219512\n') system('rm -f lala') if CHKTIME: print '11', time() - t0
def process(): if (options.outputFilename != ""): outfilefileprefix = options.outputDir + options.outputFilename else: outfilefileprefix = options.outputDir + os.path.basename(args[0]) for matrixFile in xrange(len(args)): sample = os.path.splitext(os.path.basename( args[matrixFile]))[0].split(".matrix")[0] chr = sample.rsplit(".", 1)[-1] sample = sample.rsplit(".", 1)[0] chrom = Chromosome(name=chr, centromere_search=True, species=options.species, assembly=options.assembly) chrom.set_max_tad_size(5000000) chrom.add_experiment(sample, exp_type='Hi-C', identifier=sample, hic_data=args[matrixFile], resolution=options.resolution) exp = chrom.experiments[sample] exp.normalize_hic(silent=True) chrom.find_tad(sample, n_cpus=options.threads, normalized=True, verbose=False) exp.write_tad_borders(outfilefileprefix + "." + chr + ".border") # chrom.tad_density_plot(sample,savefig=outfilefileprefix+".density."+chr+".pdf") chrom.visualize(exp.name, paint_tads=True, savefig=outfilefileprefix + "chr." + chr + ".pdf") chrom.save_chromosome(outfilefileprefix + "chr." + chr + ".tdb", force=True)
def generate_tads(self, chrom): """ Uses TADbit to generate the TAD borders based on the computed hic_data """ from pytadbit import Chromosome exptName = self.library + "_" + str( self.resolution) + "_" + str(chrom) + "-" + str(chrom) fname = self.parsed_reads_dir + '/adjlist_map_' + str( chrom) + '-' + str(chrom) + '_' + str(self.resolution) + '.tsv' chr_hic_data = read_matrix(fname, resolution=int(self.resolution)) my_chrom = Chromosome(name=exptName, centromere_search=True) my_chrom.add_experiment(exptName, hic_data=chr_hic_data, resolution=int(self.resolution)) # Run core TADbit function to find TADs on each expt. # For the current dataset required 61GB of RAM my_chrom.find_tad(exptName, n_cpus=15) exp = my_chrom.experiments[exptName] tad_file = self.library_dir + exptName + '_tads.tsv' exp.write_tad_borders(savedata=tad_file)