def test_14_3d_clustering(self): """ """ if ONLY and "14" not in ONLY: return if CHKTIME: t0 = time() models = load_structuralmodels("models.pick") if find_executable("mcl"): models.cluster_models(method="mcl", fact=0.9, verbose=False, dcutoff=200) self.assertTrue(5 <= len(models.clusters.keys()) <= 7) models.cluster_models(method="ward", verbose=False, dcutoff=200) self.assertTrue(2 <= len(models.clusters.keys()) <= 3) d = models.cluster_analysis_dendrogram() self.assertEqual(d["icoord"], [[5.0, 5.0, 15.0, 15.0]]) # align models m1, m2 = models.align_models(models=[1, 2]) nrmsd = sum( [ ((m1[0][i] - m2[0][i]) ** 2 + (m1[1][i] - m2[1][i]) ** 2 + (m1[2][i] - m2[2][i]) ** 2) ** 0.5 for i in xrange(len(m1[0])) ] ) / (len(m1[0])) self.assertTrue(nrmsd < 160) # fetching models models.define_best_models(5) m = models.fetch_model_by_rand_init("1", all_models=True) self.assertEqual(m, 8) models.define_best_models(25) m = models.fetch_model_by_rand_init("1", all_models=False) self.assertEqual(m, 8) if CHKTIME: print "14", time() - t0
def test_16_models_stats(self): if ONLY and "16" not in ONLY: return if CHKTIME: t0 = time() models = load_structuralmodels("models.pick") # write cmm models.write_cmm(".", model_num=2) model = load_impmodel_from_cmm("model.%s.cmm" % models[2]["rand_init"]) # clean system("rm -f model.*") # stats self.assertEqual(200, round(model.distance(2, 3), 0)) self.assertTrue(9 <= round(model.distance(8, 20) / 100, 0) <= 10) self.assertEqual(round(30, 0), round(model.radius_of_gyration() / 20, 0)) self.assertEqual(400, round(model.contour() / 10, 0)) self.assertTrue(21 <= round((model.shortest_axe() + model.longest_axe()) / 100, 0) <= 22) self.assertEqual([15, 16], model.inaccessible_particles(1000)) acc, num, acc_area, tot_area, bypt = model.accessible_surface(150, superradius=200, nump=150) self.assertTrue(210 <= acc <= 240) self.assertTrue(500 <= num <= 600) self.assertEqual(0.4, round(acc_area, 1)) self.assertEqual(4, round(tot_area, 0)) self.assertEqual(101, len(bypt)) self.assertTrue(19 <= bypt[100][0] <= 22 and 8 <= bypt[100][1] <= 38 and 8 <= bypt[100][2] <= 23) if CHKTIME: print "16", time() - t0
def test_16_models_stats(self): if CHKTIME: t0 = time() models = load_structuralmodels('models.pick') # write cmm models.write_cmm('.', model_num=2) model = load_impmodel_from_cmm('model.%s.cmm' % models[2]['rand_init']) # clean system('rm -f model.*') # stats self.assertEqual(200, round(model.distance(2, 3), 0)) self.assertTrue(9 <= round(model.distance(8, 20) / 100, 0) <= 10) self.assertEqual(round(30, 0), round(model.radius_of_gyration() / 20, 0)) self.assertEqual(400, round(model.contour() / 10, 0)) self.assertTrue( 21 <= round((model.shortest_axe() + model.longest_axe()) / 100, 0) <= 22) self.assertEqual([15, 16], model.inaccessible_particles(1000)) acc, num, acc_area, tot_area, bypt = model.accessible_surface( 150, superradius=200, nump=150) self.assertTrue(210 <= acc <= 240) self.assertTrue(500 <= num <= 600) self.assertEqual(0.4, round(acc_area, 1)) self.assertEqual(4, round(tot_area, 0)) self.assertEqual(101, len(bypt)) self.assertTrue(19 <= bypt[100][0] <= 22 and 8 <= bypt[100][1] <= 38 and 8 <= bypt[100][2] <= 23) if CHKTIME: print '16', time() - t0
def test_14_3d_clustering(self): """ """ if CHKTIME: t0 = time() models = load_structuralmodels('models.pick') if find_executable('mcl'): models.cluster_models(method='mcl', fact=0.9, verbose=False, dcutoff=200) self.assertTrue(5 <= len(models.clusters.keys()) <= 7) models.cluster_models(method='ward', verbose=False, dcutoff=200) self.assertTrue(2 <= len(models.clusters.keys()) <= 3) d = models.cluster_analysis_dendrogram() self.assertEqual(d['icoord'], [[5., 5., 15., 15.]]) # align models m1, m2 = models.align_models(models=[1, 2]) nrmsd = (sum([((m1[0][i] - m2[0][i])**2 + (m1[1][i] - m2[1][i])**2 + (m1[2][i] - m2[2][i])**2)**.5 for i in xrange(len(m1[0]))]) / (len(m1[0]))) self.assertTrue(nrmsd < 150) # fetching models models.define_best_models(5) m = models.fetch_model_by_rand_init('1', all_models=True) self.assertEqual(m, 2) models.define_best_models(25) m = models.fetch_model_by_rand_init('1', all_models=False) self.assertEqual(m, 2) if CHKTIME: print '14', time() - t0
def test_14_3d_clustering(self): """ """ if CHKTIME: t0 = time() models = load_structuralmodels('models.pick') if find_executable('mcl'): models.cluster_models(method='mcl', fact=0.9, verbose=False, dcutoff=200) self.assertTrue(5 <= len(models.clusters.keys()) <= 7) models.cluster_models(method='ward', verbose=False, dcutoff=200) self.assertTrue(2 <= len(models.clusters.keys()) <= 3) d = models.cluster_analysis_dendrogram() self.assertEqual(d['icoord'], [[5., 5., 15., 15.]]) # align models m1, m2 = models.align_models(models=[1,2]) nrmsd = (sum([((m1[0][i] - m2[0][i])**2 + (m1[1][i] - m2[1][i])**2 + (m1[2][i] - m2[2][i])**2)**.5 for i in xrange(len(m1[0]))]) / (len(m1[0]))) self.assertTrue(nrmsd < 160) # fetching models models.define_best_models(5) m = models.fetch_model_by_rand_init('1', all_models=True) self.assertEqual(m, 8) models.define_best_models(25) m = models.fetch_model_by_rand_init('1', all_models=False) self.assertEqual(m, 8) if CHKTIME: print '14', time() - t0
def test_16_models_stats(self): if CHKTIME: t0 = time() models = load_structuralmodels('models.pick') # write cmm models.write_cmm('.', model_num=2) model = load_impmodel_from_cmm('model.%s.cmm' % models[2]['rand_init']) # clean system('rm -f model.*') # stats self.assertEqual(200, round(model.distance(2, 3), 0)) self.assertEqual(11, round(model.distance(8, 20)/100, 0)) self.assertEqual(round(593, 0), round(model.radius_of_gyration(), 0)) self.assertEqual(400, round(model.contour()/10, 0)) self.assertEqual(21, round((model.shortest_axe()+model.longest_axe())/100, 0)) self.assertEqual([11, 16], model.inaccessible_particles(1000)) acc, num, acc_area, tot_area, bypt = model.accessible_surface( 150, superradius=200, nump=150) self.assertEqual(214, acc) self.assertEqual(502, num) self.assertEqual(0.4, round(acc_area, 1)) self.assertEqual(4, round(tot_area, 0)) self.assertEqual(101, len(bypt)) self.assertEqual(bypt[100], (21, 11, 8)) if CHKTIME: print '16', time() - t0
def test_14_3d_clustering(self): """ """ if CHKTIME: t0 = time() models = load_structuralmodels('models.pick') if find_executable('mcl'): models.cluster_models(method='mcl', fact=0.9, verbose=False, dcutoff=200) self.assertTrue(2 <= len(models.clusters.keys()) <= 3) models.cluster_models(method='ward', verbose=False, dcutoff=200) self.assertTrue(2 <= len(models.clusters.keys()) <= 3) if CHKTIME: print '14', time() - t0
def main(): """ main function """ opts = get_options() nmodels_opt, nkeep_opt, ncpus = (int(opts.nmodels_opt), int(opts.nkeep_opt), int(opts.ncpus)) nmodels_mod, nkeep_mod = int(opts.nmodels_mod), int(opts.nkeep_mod) if opts.xname: xnames = opts.xname elif opts.data[0]: xnames = [os.path.split(d)[-1] for d in opts.data] else: xnames = [os.path.split(d)[-1] for d in opts.norm] name = '{0}_{1}_{2}'.format(opts.crm, opts.beg, opts.end) opts.outdir ############################################################################ ############################ LOAD HI-C DATA ############################## ############################################################################ if not opts.analyze_only: crm = load_hic_data(opts, xnames) ############################################################################ ########################## SEARCH TADs PARAMETERS ######################### ############################################################################ if opts.tad and not opts.analyze_only: search_tads(opts, crm, name) # Save the chromosome # Chromosomes can later on be loaded to avoid re-reading the original # matrices. See function "load_chromosome". if not opts.tad_only and not opts.analyze_only: # Sum all experiments into a new one if len(xnames) > 1: logging.info("\tSumming experiments %s..." % (' + '.join(xnames))) exp = crm.experiments[0] + crm.experiments[1] for i in range(2, len(xnames)): exp += crm.experiments[i] crm.add_experiment(exp) else: exp = crm.experiments[0] if not opts.tad_only and not opts.analyze_only: exp.filter_columns(draw_hist="column filtering" in opts.analyze, perc_zero=opts.filt, savefig=os.path.join( opts.outdir, name , name + '_column_filtering.pdf'), diagonal=not opts.nodiag) if (not opts.tad_only and "column filtering" in opts.analyze and not opts.analyze_only): out = open(os.path.join(opts.outdir, name , name + '_column_filtering.dat'), 'w') out.write('# particles not considered in the analysis\n' + '\n'.join(map(str, sorted(exp._zeros.keys())))) if not opts.analyze_only: logging.info("\tSaving the chromosome...") crm.save_chromosome(os.path.join(opts.outdir, name, '{0}.tdb'.format(name)), force=True) if opts.tad_only: exit() ############################################################################ ####################### LOAD OPTIMAL IMP PARAMETERS ####################### ############################################################################ if not opts.analyze_only: results = load_optimal_imp_parameters(opts, name, exp) ############################################################################ ######################### OPTIMIZE IMP PARAMETERS ######################### ############################################################################ if not opts.analyze_only: optpar = optimize(results, opts, name) ############################################################################ ############################## MODEL REGION ############################### ############################################################################ # if models are already calculated and we just want to load them if opts.analyze_only: ######################################################################## # function for loading models try: models = load_structuralmodels( os.path.join(opts.outdir, name, name + '.models')) dcutoff = int(models._config['dcutoff'] * models._config['scale'] * models.resolution) except IOError: pass ######################################################################## else: # Build 3D models based on the HiC data. logging.info("\tModeling (this can take long)...") models = model_region(exp, optpar, opts, name) for line in repr(models).split('\n'): logging.info(line) ############################################################################ ############################## ANALYZE MODELS ############################# ############################################################################ if "correlation real/models" in opts.analyze: # Calculate the correlation coefficient between a set of kept models and # the original HiC matrix logging.info("\tCorrelation with data...") rho, pval = models.correlate_with_real_data( cutoff=dcutoff, savefig=os.path.join(opts.outdir, name, name + '_corre_real.pdf'), plot=True) logging.info("\t Correlation coefficient: %s [p-value: %s]" % ( rho, pval)) if "z-score plot" in opts.analyze: # zscore plots logging.info("\tZ-score plot...") models.zscore_plot( savefig=os.path.join(opts.outdir, name, name + '_zscores.pdf')) # Cluster models based on structural similarity logging.info("\tClustering all models into sets of structurally similar" + " models...") ffact = 0.95 # Fraction of particles that are within the dcutoff value clcutoff = dcutoff - 50 # RMSD cut-off to consider two models equivalent(nm) for ffact in [0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6, 0.55, 0.5]: logging.info(' fact = ' + str(ffact)) for clcutoff in [dcutoff / 2 , dcutoff, dcutoff * 1.5]: try: logging.info(' cutoff = ' + str(clcutoff)) models.cluster_models(fact=ffact, dcutoff=clcutoff, n_cpus=int(opts.ncpus)) break except: continue else: continue break logging.info("\tSaving again the models this time with clusters...") models.save_models(os.path.join(opts.outdir, name, name + '.models')) # Plot the clustering try: models.cluster_analysis_dendrogram( color=True, savefig=os.path.join( opts.outdir, name, name + '_clusters.pdf')) except: logging.info("\t\tWARNING: plot for clusters could not be made...") if not (opts.not_write_xyz and opts.not_write_cmm): # Save the clustered models into directories for easy visualization with # Chimera (http://www.cgl.ucsf.edu/chimera/) # Move into the cluster directory and run in the prompt # "chimera cl_1_superimpose.cmd" logging.info("\t\tWriting models, list and chimera files...") for cluster in models.clusters: logging.info("\t\tCluster #{0} has {1} models {2}".format( cluster, len(models.clusters[cluster]), models.clusters[cluster])) if not os.path.exists(os.path.join( opts.outdir, name, 'models', 'cl_' + str(cluster))): os.makedirs(os.path.join( opts.outdir, name, 'models', 'cl_' + str(cluster))) if not opts.not_write_xyz: models.write_xyz(directory=os.path.join( opts.outdir, name, 'models', 'cl_' + str(cluster)), cluster=cluster) if not opts.not_write_cmm: models.write_cmm(directory=os.path.join( opts.outdir, name, 'models', 'cl_' + str(cluster)), cluster=cluster) # Write list file clslstfile = os.path.join( opts.outdir, name, 'models', 'cl_{}.lst'.format(str(cluster))) out = open(clslstfile,'w') for model_n in models.clusters[cluster]: out.write("model.{0}\n".format(model_n)) out.close() if not opts.not_write_cmm: # Write chimera file clschmfile = os.path.join( opts.outdir, name, 'models', 'cl_{}_superimpose.cmd'.format(str(cluster))) out = open(clschmfile, 'w') out.write("open " + " ".join(["cl_{0}/model.{1}.cmm".format( cluster, model_n) for model_n in models.clusters[cluster]])) out.write("\nlabel; represent wire; ~bondcolor\n") for i in range(1, len(models.clusters[cluster]) + 1): out.write("match #{0} #0\n".format(i-1)) out.close() # same with singletons singletons = [m['rand_init'] for m in models if m['cluster']=='Singleton'] logging.info("\t\tSingletons has {1} models {2}".format( 'Singletons', len(singletons), singletons)) if not os.path.exists(os.path.join( opts.outdir, name, 'models', 'Singletons')): os.makedirs(os.path.join( opts.outdir, name, 'models', 'Singletons')) if not opts.not_write_xyz: models.write_xyz(directory=os.path.join( opts.outdir, name, 'models', 'Singletons'), models=singletons) if not opts.not_write_cmm: models.write_cmm(directory=os.path.join( opts.outdir, name, 'models', 'Singletons'), models=singletons) # Write best model and centroid model models[models.centroid_model()].write_cmm( directory=os.path.join(opts.outdir, name, 'models'), filename='centroid.cmm') models[models.centroid_model()].write_cmm( directory=os.path.join(opts.outdir, name, 'models'), filename='centroid.xyz') models[0].write_cmm( directory=os.path.join(opts.outdir, name, 'models'), filename='best.cmm') models[0].write_xyz( directory=os.path.join(opts.outdir, name, 'models'), filename='best.xyz') # Write list file clslstfile = os.path.join( opts.outdir, name, 'models', 'Singletons.lst') out = open(clslstfile,'w') for model_n in singletons: out.write("model.{0}\n".format(model_n)) out.close() if not opts.not_write_cmm: # Write chimera file clschmfile = os.path.join( opts.outdir, name, 'models', 'Singletons_superimpose.cmd') out = open(clschmfile, 'w') out.write("open " + " ".join(["Singletons/model.{0}.cmm".format( model_n) for model_n in singletons])) out.write("\nlabel; represent wire; ~bondcolor\n") for i in range(1, len(singletons) + 1): out.write("match #{0} #0\n".format(i-1)) out.close() if "objective function" in opts.analyze: logging.info("\tPlotting objective function decay for vbest model...") models.objective_function_model( 0, log=True, smooth=False, savefig=os.path.join(opts.outdir, name, name + '_obj-func.pdf')) if "centroid" in opts.analyze: # Get the centroid model of cluster #1 logging.info("\tGetting centroid...") centroid = models.centroid_model(cluster=1) logging.info("\t\tThe model centroid (closest to the average) " + "for cluster 1 is: {}".format(centroid)) if "consistency" in opts.analyze: # Calculate a consistency plot for all models in cluster #1 logging.info("\tGetting consistency data...") models.model_consistency( cluster=1, cutoffs=range(50, dcutoff + 50, 50), savefig =os.path.join(opts.outdir, name, name + '_consistency.pdf'), savedata=os.path.join(opts.outdir, name, name + '_consistency.dat')) if "density" in opts.analyze: # Calculate a DNA density plot logging.info("\tGetting density data...") models.density_plot( error=True, steps=(1,3,5,7), savefig =os.path.join(opts.outdir, name, name + '_density.pdf'), savedata=os.path.join(opts.outdir, name, name + '_density.dat')) if "contact map" in opts.analyze: # Get a contact map at cut-off of 150nm for cluster #1 logging.info("\tGetting a contact map...") models.contact_map( cluster=1, cutoff=dcutoff, savedata=os.path.join(opts.outdir, name, name + '_contact.dat')) if "walking angle" in opts.analyze: # Get Dihedral angle plot for cluster #1 logging.info("\tGetting angle data...") models.walking_angle( cluster=1, steps=(1,5), savefig = os.path.join(opts.outdir, name, name + '_wang.pdf'), savedata= os.path.join(opts.outdir, name, name + '_wang.dat')) if "persistence length" in opts.analyze: # Get persistence length of all models logging.info("\tGetting persistence length data...") pltfile = os.path.join(opts.outdir, name, name + '_pL.dat') f = open(pltfile,'w') f.write('#Model_Number\tpL\n') for model in models: try: f.write('%s\t%.2f\n' % (model["rand_init"], model.persistence_length())) except: sys.stderr.write('WARNING: failed to compute persistence ' + 'length for model %s' % model["rand_init"]) if "accessibility" in opts.analyze: # Calculate a DNA density plot logging.info("\tGetting accessibility data...") radius = 75 # Radius of an object to calculate accessibility nump = 30 # number of particles (resolution) logging.info("\tGetting accessibility data (this can take long)...") models.accessibility(radius, nump=nump, error=True, savefig =os.path.join(opts.outdir, name, name + '_accessibility.pdf'), savedata=os.path.join(opts.outdir, name, name + '_accessibility.dat')) # if "accessibility" in opts.analyze: # # Get accessibility of all models # radius = 75 # Radius of an object to calculate accessibility # nump = 30 # number of particles (resolution) # logging.info("\tGetting accessibility data (this can take long)...") # if not os.path.exists( # os.path.join(opts.outdir, name, 'models', 'asa')): # os.makedirs(os.path.join(opts.outdir, name, 'models', 'asa')) # for model in models: # by_part = model.accessible_surface(radius, nump=nump, # include_edges=False)[4] # asafile = os.path.join(opts.outdir, name, 'models', # 'asa', 'model_{}.asa'.format(model['rand_init'])) # out = open(asafile, 'w') # for part, acc, ina in by_part: # try: # out.write('%s\t%.2f\n' % (part, # 100*float(acc) / (acc + ina))) # except ZeroDivisionError: # out.write('%s\t%s\n' % (part, 'nan')) # out.close() if "interaction" in opts.analyze: # Get interaction data of all models at 200 nm cut-off logging.info("\tGetting interaction data...") models.interactions( cutoff=dcutoff, steps=(1,3,5), savefig =os.path.join(opts.outdir, name, name + '_interactions.pdf'), savedata=os.path.join(opts.outdir, name, name + '_interactions.dat'), error=True)
def model_region(exp, optpar, opts, name): """ generate structural models """ zscores, values, zeros = exp._sub_experiment_zscore(opts.beg, opts.end) tmp_name = ''.join([letters[int(random()*52)]for _ in xrange(50)]) tmp = open('_tmp_zscore_' + tmp_name, 'w') dump([zscores, values, zeros, optpar], tmp) tmp.close() tmp = open('_tmp_opts_' + tmp_name, 'w') dump(opts, tmp) tmp.close() tmp = open('_tmp_model_' + tmp_name + '.py', 'w') tmp.write(''' from cPickle import load, dump from pytadbit.imp.imp_modelling import generate_3d_models import os tmp_name = "%s" zscore_file = open("_tmp_zscore_" + tmp_name) zscores, values, zeros, optpar = load(zscore_file) zscore_file.close() opts_file = open("_tmp_opts_" + tmp_name) opts = load(opts_file) opts_file.close() nloci = opts.end - opts.beg + 1 coords = {"crm" : opts.crm, "start": opts.beg, "end" : opts.end} zeros = tuple([i not in zeros for i in xrange(opts.end - opts.beg + 1)]) models= generate_3d_models(zscores, opts.res, nloci, values=values, n_models=opts.nmodels_mod, n_keep=opts.nkeep_mod, n_cpus=opts.ncpus, keep_all=True, first=0, container=opts.container, config=optpar, verbose=0.5, coords=coords, zeros=zeros) # Save models models.save_models( os.path.join(opts.outdir, "%s", "%s" + ".models")) ''' % (tmp_name, name, name)) tmp.close() constraints = Popen("python _tmp_model_%s.py" % tmp_name, shell=True, stdout=PIPE).communicate()[0] if "constraints" in opts.analyze: out = open(os.path.join(opts.outdir, name, name + '_constraints.txt'), 'w') out.write(constraints) out.close() os.system('rm -f _tmp_zscore_%s' % (tmp_name)) os.system('rm -f _tmp_model_%s.py' % (tmp_name)) os.system('rm -f _tmp_opts_%s' % (tmp_name)) models = load_structuralmodels( os.path.join(opts.outdir, name, name + '.models')) models.experiment = exp coords = {"crm" : opts.crm, "start": opts.beg, "end" : opts.end} crm = exp.crm description = {'identifier' : exp.identifier, 'chromosome' : coords['crm'], 'start' : exp.resolution * coords['start'], 'end' : exp.resolution * coords['end'], 'species' : crm.species, 'cell type' : exp.cell_type, 'experiment type': exp.exp_type, 'resolution' : exp.resolution, 'assembly' : crm.assembly} for desc in exp.description: description[desc] = exp.description[desc] for desc in crm.description: description[desc] = exp.description[desc] for i, m in enumerate([m for m in models] + models._bad_models.values()): m['index'] = i m['description'] = description models.description = description return models
def test_15_3d_modelling(self): """ """ if CHKTIME: t0 = time() models = load_structuralmodels('models.pick') models.cluster_models(method='ward', verbose=False) # density models.density_plot(savedata='lala', plot=False) lines = open('lala').readlines() self.assertEqual(len(lines), 22) self.assertEqual([ round(float(i), 1) if i != 'nan' else i for i in lines[1].split('\t')[:3] ], [1.0, 'nan', 'nan']) self.assertEqual( [round(float(i), 1) for i in lines[15].split('\t')[:3]], [15, 100.0, 100.0]) # contacts cmap = models.get_contact_matrix(cutoff=300) self.assertEqual( round( round( sum([ i if i >= 0 else 0 for i in reduce(lambda x, y: x + y, cmap) ]) / 10, 0), 3), 8) # define best models models.define_best_models(10) self.assertEqual(len(models), 10) m1 = models[9] models.define_best_models(25) self.assertEqual(len(models), 25) self.assertEqual(m1, models[9]) # correlation corr, pval = models.correlate_with_real_data(cutoff=300) self.assertTrue(0.6 <= round(corr, 1) <= 0.7) self.assertEqual(round(pval, 4), round(0, 4)) # consistency models.model_consistency(cutoffs=(50, 100, 150, 200), plot=False, savedata='lala') lines = open('lala').readlines() self.assertEqual(len(lines), 22) self.assertEqual( [round(float(i) / 15, 0) for i in lines[1].split('\t')], [0, 1, 2, 3, 3]) self.assertEqual( [round(float(i) / 15, 0) for i in lines[15].split('\t')], [1, 6, 7, 7, 7]) # measure angle self.assertTrue( 13 <= round(models.angle_between_3_particles(2, 8, 15) / 10, 0) <= 14) self.assertEqual( round(models.angle_between_3_particles(19, 20, 21), 0), 60) self.assertEqual( round(models.angle_between_3_particles(15, 14, 11) / 5, 0), 13) # coordinates self.assertEqual( [round(x, 2) for x in models.particle_coordinates(15)], [1529.39, 4703.51, -1793.39]) # dihedral_angle self.assertTrue(round(models.dihedral_angle(2, 8, 15, 16), 2), -13.44) self.assertEqual(round(models.dihedral_angle(15, 19, 20, 21), 2), 83.07) self.assertEqual(round(models.dihedral_angle(15, 14, 11, 12), 2), 7.31) # median distance self.assertEqual( round(models.median_3d_dist(3, 20, plot=False) / 100, 0), 15) self.assertEqual( round( models.median_3d_dist(3, 20, cluster=1, plot=False) / 200, 0), 8) self.assertEqual( round(models.median_3d_dist(7, 10, models=range(5), plot=False), 0), 250) # accessibility models.accessibility(radius=75, nump=10, plot=False, savedata='model.acc') vals = [l.split() for l in open('model.acc').readlines()[1:]] self.assertEqual(vals[0][1:3], ['0.520', '0.999']) self.assertEqual(vals[20][1:3], ['1.000', '0.000']) # contact map models.contact_map(savedata='model.contacts') vals = [l.split() for l in open('model.contacts').readlines()[1:]] self.assertEqual(vals[0], ['0', '1', '1.0']) self.assertEqual(vals[1], ['0', '2', '0.92']) self.assertEqual(vals[192], ['14', '18', '0.12']) # interactions models.interactions(plot=False, savedata='model.inter') vals = [[float(i) for i in l.split()] for l in open('model.inter').readlines()[1:]] self.assertEqual( vals[2], [3.0, 4.88, 1.03, 3.94, 0.52, 4.72, 0.64, 4.02, 0.51, 4.82, 0.41]) # walking angle models.walking_angle(savedata='model.walkang') vals = [[round(float(i), 2) if i != 'None' else i for i in l.split()] for l in open('model.walkang').readlines()[1:]] self.assertEqual( vals[0], [1.0, 137.99, 'None'], ) self.assertEqual( vals[14], [15.0, -50.1, 'None'], ) self.assertEqual(vals[13], [14.0, -95.73, 'None']) self.assertEqual(vals[12], [13.0, 155.7, 3.29]) # write cmm models.write_cmm('.', model_num=2) models.write_cmm('.', models=range(5)) models.write_cmm('.', cluster=2) # write xyz models.write_xyz('.', model_num=2) models.write_xyz('.', models=range(5)) models.write_xyz('.', cluster=2) # write json models.write_json('model.json', model_num=2) models.write_json('model.json', models=range(5)) models.write_json('model.json', cluster=2) # clean system('rm -f model.*') system('rm -rf lala*') if CHKTIME: print '15', time() - t0
def main(): """ main function """ opts = get_options() nmodels_opt, nkeep_opt, ncpus = (int(opts.nmodels_opt), int(opts.nkeep_opt), int(opts.ncpus)) nmodels_mod, nkeep_mod = int(opts.nmodels_mod), int(opts.nkeep_mod) if opts.xname: xnames = opts.xname elif opts.data[0]: xnames = [os.path.split(d)[-1] for d in opts.data] else: xnames = [os.path.split(d)[-1] for d in opts.norm] name = '{0}_{1}_{2}'.format(opts.crm, opts.beg, opts.end) opts.outdir ############################################################################ ############################ LOAD HI-C DATA ############################## ############################################################################ if not opts.analyze_only: crm = load_hic_data(opts, xnames) ############################################################################ ########################## SEARCH TADs PARAMETERS ######################### ############################################################################ if opts.tad and not opts.analyze_only: search_tads(opts, crm, name) # Save the chromosome # Chromosomes can later on be loaded to avoid re-reading the original # matrices. See function "load_chromosome". if not opts.tad_only and not opts.analyze_only: # Sum all experiments into a new one if len(xnames) > 1: logging.info("\tSumming experiments %s..." % (' + '.join(xnames))) exp = crm.experiments[0] + crm.experiments[1] for i in range(2, len(xnames)): exp += crm.experiments[i] crm.add_experiment(exp) else: exp = crm.experiments[0] if not opts.tad_only and not opts.analyze_only: exp.filter_columns(draw_hist="column filtering" in opts.analyze, perc_zero=opts.filt, savefig=os.path.join( opts.outdir, name , name + '_column_filtering.pdf'), diagonal=not opts.nodiag) if (not opts.tad_only and "column filtering" in opts.analyze and not opts.analyze_only): out = open(os.path.join(opts.outdir, name , name + '_column_filtering.dat'), 'w') out.write('# particles not considered in the analysis\n' + '\n'.join(map(str, sorted(exp._zeros.keys())))) if not opts.analyze_only: logging.info("\tSaving the chromosome...") crm.save_chromosome(os.path.join(opts.outdir, name, '{0}.tdb'.format(name)), force=True) if opts.tad_only: exit() ############################################################################ ####################### LOAD OPTIMAL IMP PARAMETERS ####################### ############################################################################ if not opts.analyze_only: results = load_optimal_imp_parameters(opts, name, exp) ############################################################################ ######################### OPTIMIZE IMP PARAMETERS ######################### ############################################################################ if not opts.analyze_only: optpar = optimize(results, opts, name) ############################################################################ ############################## MODEL REGION ############################### ############################################################################ # if models are already calculated and we just want to load them if opts.analyze_only: ######################################################################## # function for loading models try: models = load_structuralmodels( os.path.join(opts.outdir, name, name + '.models')) except IOError: pass ######################################################################## else: # Build 3D models based on the HiC data. logging.info("\tModeling (this can take long)...") models = model_region(exp, optpar, opts, name) for line in repr(models).split('\n'): logging.info(line) dcutoff = int(models._config['dcutoff'] * models._config['scale'] * models.resolution) ############################################################################ ############################## ANALYZE MODELS ############################# ############################################################################ if "correlation real/models" in opts.analyze: # Calculate the correlation coefficient between a set of kept models and # the original HiC matrix logging.info("\tCorrelation with data...") rho, pval = models.correlate_with_real_data( cutoff=dcutoff, savefig=os.path.join(opts.outdir, name, name + '_corre_real.pdf'), plot=True) logging.info("\t Correlation coefficient: %s [p-value: %s]" % ( rho, pval)) if "z-score plot" in opts.analyze: # zscore plots logging.info("\tZ-score plot...") models.zscore_plot( savefig=os.path.join(opts.outdir, name, name + '_zscores.pdf')) # Cluster models based on structural similarity logging.info("\tClustering all models into sets of structurally similar" + " models...") ffact = 0.95 # Fraction of particles that are within the dcutoff value clcutoff = dcutoff - 50 # RMSD cut-off to consider two models equivalent(nm) for ffact in [0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6, 0.55, 0.5]: logging.info(' fact = ' + str(ffact)) for clcutoff in [dcutoff / 2 , dcutoff, dcutoff * 1.5]: try: logging.info(' cutoff = ' + str(clcutoff)) models.cluster_models(fact=ffact, dcutoff=clcutoff, n_cpus=int(opts.ncpus)) break except: continue else: continue break logging.info("\tSaving again the models this time with clusters...") models.save_models(os.path.join(opts.outdir, name, name + '.models')) # Plot the clustering try: models.cluster_analysis_dendrogram( color=True, savefig=os.path.join( opts.outdir, name, name + '_clusters.pdf')) except: logging.info("\t\tWARNING: plot for clusters could not be made...") if not (opts.not_write_xyz and opts.not_write_cmm): # Save the clustered models into directories for easy visualization with # Chimera (http://www.cgl.ucsf.edu/chimera/) # Move into the cluster directory and run in the prompt # "chimera cl_1_superimpose.cmd" logging.info("\t\tWriting models, list and chimera files...") for cluster in models.clusters: logging.info("\t\tCluster #{0} has {1} models {2}".format( cluster, len(models.clusters[cluster]), models.clusters[cluster])) if not os.path.exists(os.path.join( opts.outdir, name, 'models', 'cl_' + str(cluster))): os.makedirs(os.path.join( opts.outdir, name, 'models', 'cl_' + str(cluster))) if not opts.not_write_xyz: models.write_xyz(directory=os.path.join( opts.outdir, name, 'models', 'cl_' + str(cluster)), cluster=cluster) if not opts.not_write_cmm: models.write_cmm(directory=os.path.join( opts.outdir, name, 'models', 'cl_' + str(cluster)), cluster=cluster) # Write list file clslstfile = os.path.join( opts.outdir, name, 'models', 'cl_{}.lst'.format(str(cluster))) out = open(clslstfile,'w') for model_n in models.clusters[cluster]: out.write("model.{0}\n".format(model_n)) out.close() if not opts.not_write_cmm: # Write chimera file clschmfile = os.path.join( opts.outdir, name, 'models', 'cl_{}_superimpose.cmd'.format(str(cluster))) out = open(clschmfile, 'w') out.write("open " + " ".join(["cl_{0}/model.{1}.cmm".format( cluster, model_n) for model_n in models.clusters[cluster]])) out.write("\nlabel; represent wire; ~bondcolor\n") for i in range(1, len(models.clusters[cluster]) + 1): out.write("match #{0} #0\n".format(i-1)) out.close() # same with singletons singletons = [m['rand_init'] for m in models if m['cluster']=='Singleton'] logging.info("\t\tSingletons has {1} models {2}".format( 'Singletons', len(singletons), singletons)) if not os.path.exists(os.path.join( opts.outdir, name, 'models', 'Singletons')): os.makedirs(os.path.join( opts.outdir, name, 'models', 'Singletons')) if not opts.not_write_xyz: models.write_xyz(directory=os.path.join( opts.outdir, name, 'models', 'Singletons'), models=singletons) if not opts.not_write_cmm: models.write_cmm(directory=os.path.join( opts.outdir, name, 'models', 'Singletons'), models=singletons) # Write best model and centroid model models[models.centroid_model()].write_cmm( directory=os.path.join(opts.outdir, name, 'models'), filename='centroid.cmm') models[models.centroid_model()].write_cmm( directory=os.path.join(opts.outdir, name, 'models'), filename='centroid.xyz') models[0].write_cmm( directory=os.path.join(opts.outdir, name, 'models'), filename='best.cmm') models[0].write_xyz( directory=os.path.join(opts.outdir, name, 'models'), filename='best.xyz') # Write list file clslstfile = os.path.join( opts.outdir, name, 'models', 'Singletons.lst') out = open(clslstfile,'w') for model_n in singletons: out.write("model.{0}\n".format(model_n)) out.close() if not opts.not_write_cmm: # Write chimera file clschmfile = os.path.join( opts.outdir, name, 'models', 'Singletons_superimpose.cmd') out = open(clschmfile, 'w') out.write("open " + " ".join(["Singletons/model.{0}.cmm".format( model_n) for model_n in singletons])) out.write("\nlabel; represent wire; ~bondcolor\n") for i in range(1, len(singletons) + 1): out.write("match #{0} #0\n".format(i-1)) out.close() if "objective function" in opts.analyze: logging.info("\tPlotting objective function decay for vbest model...") models.objective_function_model( 0, log=True, smooth=False, savefig=os.path.join(opts.outdir, name, name + '_obj-func.pdf')) if "centroid" in opts.analyze: # Get the centroid model of cluster #1 logging.info("\tGetting centroid...") centroid = models.centroid_model(cluster=1) logging.info("\t\tThe model centroid (closest to the average) " + "for cluster 1 is: {}".format(centroid)) if "consistency" in opts.analyze: # Calculate a consistency plot for all models in cluster #1 logging.info("\tGetting consistency data...") models.model_consistency( cluster=1, cutoffs=range(50, dcutoff + 50, 50), savefig =os.path.join(opts.outdir, name, name + '_consistency.pdf'), savedata=os.path.join(opts.outdir, name, name + '_consistency.dat')) if "density" in opts.analyze: # Calculate a DNA density plot logging.info("\tGetting density data...") models.density_plot( error=True, steps=(1,3,5,7), savefig =os.path.join(opts.outdir, name, name + '_density.pdf'), savedata=os.path.join(opts.outdir, name, name + '_density.dat')) if "contact map" in opts.analyze: # Get a contact map at cut-off of 150nm for cluster #1 logging.info("\tGetting a contact map...") models.contact_map( cluster=1, cutoff=dcutoff, savedata=os.path.join(opts.outdir, name, name + '_contact.dat')) if "walking angle" in opts.analyze: # Get Dihedral angle plot for cluster #1 logging.info("\tGetting angle data...") models.walking_angle( cluster=1, steps=(1,5), savefig = os.path.join(opts.outdir, name, name + '_wang.pdf'), savedata= os.path.join(opts.outdir, name, name + '_wang.dat')) if "persistence length" in opts.analyze: # Get persistence length of all models logging.info("\tGetting persistence length data...") pltfile = os.path.join(opts.outdir, name, name + '_pL.dat') f = open(pltfile,'w') f.write('#Model_Number\tpL\n') for model in models: try: f.write('%s\t%.2f\n' % (model["rand_init"], model.persistence_length())) except: sys.stderr.write('WARNING: failed to compute persistence ' + 'length for model %s' % model["rand_init"]) if "accessibility" in opts.analyze: # Calculate a DNA density plot logging.info("\tGetting accessibility data...") radius = 75 # Radius of an object to calculate accessibility nump = 30 # number of particles (resolution) logging.info("\tGetting accessibility data (this can take long)...") models.accessibility(radius, nump=nump, error=True, savefig =os.path.join(opts.outdir, name, name + '_accessibility.pdf'), savedata=os.path.join(opts.outdir, name, name + '_accessibility.dat')) # if "accessibility" in opts.analyze: # # Get accessibility of all models # radius = 75 # Radius of an object to calculate accessibility # nump = 30 # number of particles (resolution) # logging.info("\tGetting accessibility data (this can take long)...") # if not os.path.exists( # os.path.join(opts.outdir, name, 'models', 'asa')): # os.makedirs(os.path.join(opts.outdir, name, 'models', 'asa')) # for model in models: # by_part = model.accessible_surface(radius, nump=nump, # include_edges=False)[4] # asafile = os.path.join(opts.outdir, name, 'models', # 'asa', 'model_{}.asa'.format(model['rand_init'])) # out = open(asafile, 'w') # for part, acc, ina in by_part: # try: # out.write('%s\t%.2f\n' % (part, # 100*float(acc) / (acc + ina))) # except ZeroDivisionError: # out.write('%s\t%s\n' % (part, 'nan')) # out.close() if "interaction" in opts.analyze: # Get interaction data of all models at 200 nm cut-off logging.info("\tGetting interaction data...") models.interactions( cutoff=dcutoff, steps=(1,3,5), savefig =os.path.join(opts.outdir, name, name + '_interactions.pdf'), savedata=os.path.join(opts.outdir, name, name + '_interactions.dat'), error=True)
def model_region(exp, optpar, opts, name): """ generate structural models """ beg, end = opts.beg or 1, opts.end or exp.size zscores, values, zeros = exp._sub_experiment_zscore(beg, end) tmp_name = "".join([letters[int(random() * 52)] for _ in xrange(50)]) tmp = open("_tmp_zscore_" + tmp_name, "w") dump([zscores, values, zeros, optpar, beg, end], tmp) tmp.close() tmp = open("_tmp_opts_" + tmp_name, "w") dump(opts, tmp) tmp.close() tmp = open("_tmp_model_" + tmp_name + ".py", "w") tmp.write( """ from cPickle import load, dump from pytadbit.imp.imp_modelling import generate_3d_models import os tmp_name = "%s" zscore_file = open("_tmp_zscore_" + tmp_name) zscores, values, zeros, optpar, beg, end = load(zscore_file) zscore_file.close() opts_file = open("_tmp_opts_" + tmp_name) opts = load(opts_file) opts_file.close() nloci = end - beg + 1 coords = {"crm" : opts.crm, "start": opts.beg, "end" : opts.end} zeros = tuple([i not in zeros for i in xrange(end - beg + 1)]) models = generate_3d_models(zscores, opts.res, nloci, values=values, n_models=opts.nmodels_mod, n_keep=opts.nkeep_mod, n_cpus=opts.ncpus, keep_all=True, first=0, container=opts.container, config=optpar, coords=coords, zeros=zeros) # Save models models.save_models( os.path.join(opts.outdir, "%s", "%s" + ".models")) """ % (tmp_name, name, name) ) tmp.close() check_call(["python", "_tmp_model_%s.py" % tmp_name]) os.system("rm -f _tmp_zscore_%s" % (tmp_name)) os.system("rm -f _tmp_model_%s.py" % (tmp_name)) os.system("rm -f _tmp_opts_%s" % (tmp_name)) models = load_structuralmodels(os.path.join(opts.outdir, name, name + ".models")) if "constraints" in opts.analyze: out = open(os.path.join(opts.outdir, name, name + "_constraints.txt"), "w") out.write("# Harmonic\tpart1\tpart2\tdist\tkforce\n") out.write( "\n".join( [ "%s\t%s\t%s\t%.1f\t%.3f" % (harm, p1, p2, dist, kforce) for (p1, p2), (harm, dist, kforce) in models._restraints.iteritems() ] ) + "\n" ) out.close() models.experiment = exp coords = {"crm": opts.crm, "start": opts.beg, "end": opts.end} crm = exp.crm description = { "identifier": exp.identifier, "chromosome": coords["crm"], "start": (exp.resolution * coords["start"]) if coords["start"] else None, "end": (exp.resolution * coords["end"]) if coords["end"] else None, "species": crm.species, "cell type": exp.cell_type, "experiment type": exp.exp_type, "resolution": exp.resolution, "assembly": crm.assembly, } for key in opts.description: description[key] = opts.description[key] for desc in exp.description: description[desc] = exp.description[desc] for desc in crm.description: description[desc] = exp.description[desc] for i, m in enumerate([m for m in models] + models._bad_models.values()): m["index"] = i m["description"] = description models.description = description return models
def test_15_3d_modelling(self): """ """ if CHKTIME: t0 = time() models = load_structuralmodels('models.pick') models.cluster_models(method='ward', verbose=False) # density models.density_plot(savedata='lala', plot=False) lines = open('lala').readlines() self.assertEqual(len(lines), 22) self.assertEqual([round(float(i), 1) for i in lines[1].split('\t')[:3]], [1, 100.0, 100.0]) self.assertEqual([round(float(i), 1) for i in lines[15].split('\t')[:3]], [15, 99.9, 100.0]) # contacts cmap = models.get_contact_matrix(cutoff=300) self.assertEqual(round( round(sum([i if i >=0 else 0 for i in reduce(lambda x, y: x+y, cmap)])/10, 0), 3), 8) # define best models models.define_best_models(10) self.assertEqual(len(models), 10) m1 = models[9] models.define_best_models(25) self.assertEqual(len(models), 25) self.assertEqual(m1, models[9]) # correlation corr, pval = models.correlate_with_real_data(cutoff=300) self.assertEqual(round(corr, 1), 0.7) self.assertEqual(round(pval, 4), round(0, 4)) # consistency models.model_consistency(cutoffs=(50, 100, 150, 200), plot=False, savedata='lala') lines = open('lala').readlines() self.assertEqual(len(lines), 22) self.assertEqual([round(float(i)/10, 0) for i in lines[1].split('\t')], [0, 1, 3, 4, 5]) self.assertEqual([round(float(i)/10, 0) for i in lines[15].split('\t')], [2, 8, 10, 10, 10]) # measure angle self.assertEqual(round(models.angle_between_3_particles(2,8,15)/10, 0), 13) self.assertEqual(round(models.angle_between_3_particles(19,20,21), 0), 60) self.assertEqual(round(models.angle_between_3_particles(15,14,11)/5, 0), 13) # coordinates # self.assertEqual([round(x, 3) for x in models.particle_coordinates(15)], # [2372.253, -1193.602, -1145.397]) # dihedral_angle # self.assertTrue(round(models.dihedral_angle(2,8,15, 16), 3), -13.443) # self.assertEqual(round(models.dihedral_angle(15,19,20,21), 3), 79.439) # self.assertEqual(round(models.dihedral_angle(15,14,11, 12), 3), 8.136) # median distance self.assertEqual(round(models.median_3d_dist(3, 20, plot=False)/100, 0), 15) self.assertEqual(round(models.median_3d_dist(3, 20, cluster=1, plot=False)/200, 0), 8) self.assertEqual(round(models.median_3d_dist(7, 10, models=range(5), plot=False), 0), 250) # write cmm models.write_cmm('.', model_num=2) models.write_cmm('.', models=range(5)) models.write_cmm('.', cluster=2) # write xyz models.write_xyz('.', model_num=2) models.write_xyz('.', models=range(5)) models.write_xyz('.', cluster=2) # clean system('rm -f model.*') system('rm -f lala') if CHKTIME: print '15', time() - t0
def test_15_3d_modelling(self): """ """ if CHKTIME: t0 = time() models = load_structuralmodels('models.pick') models.cluster_models(method='ward', verbose=False) # density models.density_plot(savedata='lala', plot=False) lines = open('lala').readlines() self.assertEqual(len(lines), 22) self.assertEqual([round(float(i), 1) if i != 'nan' else i for i in lines[1].split('\t')[:3]], [1.0, 'nan', 'nan']) self.assertEqual([round(float(i), 1) for i in lines[15].split('\t')[:3]], [15, 100.0, 100.0]) # contacts cmap = models.get_contact_matrix(cutoff=300) self.assertEqual(round( round(sum([i if i >=0 else 0 for i in reduce(lambda x, y: x+y, cmap)])/10, 0), 3), 8) # define best models models.define_best_models(10) self.assertEqual(len(models), 10) m1 = models[9] models.define_best_models(25) self.assertEqual(len(models), 25) self.assertEqual(m1, models[9]) # correlation corr, pval = models.correlate_with_real_data(cutoff=300) self.assertTrue(0.6 <= round(corr, 1) <= 0.7) self.assertEqual(round(pval, 4), round(0, 4)) # consistency models.model_consistency(cutoffs=(50, 100, 150, 200), plot=False, savedata='lala') lines = open('lala').readlines() self.assertEqual(len(lines), 22) self.assertEqual([round(float(i)/15, 0) for i in lines[1].split('\t')], [0, 2, 3, 4, 4]) self.assertEqual([round(float(i)/15, 0) for i in lines[15].split('\t')], [1, 6, 7, 7, 7]) # measure angle self.assertTrue(13 <= round(models.angle_between_3_particles(2,8,15)/10, 0) <= 14) self.assertEqual(round(models.angle_between_3_particles(19,20,21), 0), 60) self.assertEqual(round(models.angle_between_3_particles(15,14,11)/5, 0), 13) # coordinates # self.assertEqual([round(x, 3) for x in models.particle_coordinates(15)], # [2372.253, -1193.602, -1145.397]) # dihedral_angle # self.assertTrue(round(models.dihedral_angle(2,8,15, 16), 3), -13.443) # self.assertEqual(round(models.dihedral_angle(15,19,20,21), 3), 79.439) # self.assertEqual(round(models.dihedral_angle(15,14,11, 12), 3), 8.136) # median distance self.assertEqual(round(models.median_3d_dist(3, 20, plot=False)/100, 0), 15) self.assertEqual(round(models.median_3d_dist(3, 20, cluster=1, plot=False)/200, 0), 8) self.assertEqual(round(models.median_3d_dist(7, 10, models=range(5), plot=False), 0), 250) # write cmm models.write_cmm('.', model_num=2) models.write_cmm('.', models=range(5)) models.write_cmm('.', cluster=2) # write xyz models.write_xyz('.', model_num=2) models.write_xyz('.', models=range(5)) models.write_xyz('.', cluster=2) # clean system('rm -f model.*') system('rm -f lala') if CHKTIME: print '15', time() - t0
def test_15_3d_modelling(self): """ """ if ONLY and "15" not in ONLY: return if CHKTIME: t0 = time() models = load_structuralmodels("models.pick") models.cluster_models(method="ward", verbose=False) # density models.density_plot(savedata="lala", plot=False) lines = open("lala").readlines() self.assertEqual(len(lines), 22) self.assertEqual( [round(float(i), 1) if i != "nan" else i for i in lines[1].split("\t")[:3]], [1.0, "nan", "nan"] ) self.assertEqual([round(float(i), 1) for i in lines[15].split("\t")[:3]], [15, 99.9, 0.0]) # contacts cmap = models.get_contact_matrix(cutoff=300) self.assertEqual( round(round(sum([i if i >= 0 else 0 for i in reduce(lambda x, y: x + y, cmap)]) / 10, 0), 3), 8 ) # define best models models.define_best_models(10) self.assertEqual(len(models), 10) m1 = models[9] models.define_best_models(25) self.assertEqual(len(models), 25) self.assertEqual(m1, models[9]) # correlation corr, pval = models.correlate_with_real_data(cutoff=300) self.assertTrue(0.6 <= round(corr, 1) <= 0.7) self.assertEqual(round(pval, 4), round(0, 4)) # consistency models.model_consistency(cutoffs=(50, 100, 150, 200), plot=False, savedata="lala") lines = open("lala").readlines() self.assertEqual(len(lines), 22) self.assertEqual([round(float(i) / 15, 0) for i in lines[1].split("\t")], [0, 2, 2, 3, 4]) self.assertEqual([round(float(i) / 15, 0) for i in lines[15].split("\t")], [1, 5, 6, 7, 7]) # measure angle self.assertTrue(13 <= round(models.angle_between_3_particles(2, 8, 15) / 10, 0) <= 14) self.assertEqual(round(models.angle_between_3_particles(19, 20, 21), 0), 60) self.assertEqual(round(models.angle_between_3_particles(15, 14, 11) / 5, 0), 14) # coordinates self.assertEqual([round(x, 2) for x in models.particle_coordinates(15)], [3199.84, 4361.61, -4695.41]) # dihedral_angle self.assertTrue(round(models.dihedral_angle(2, 8, 15, 8, 16, [0])[0], 2), -13.44) self.assertEqual(round(models.dihedral_angle(15, 19, 20, 19, 21, [0])[0], 2), 75.95) self.assertEqual(round(models.dihedral_angle(15, 14, 11, 14, 12, [0])[0], 2), 2.07) # median distance self.assertEqual(round(models.median_3d_dist(3, 20, plot=False) / 100, 0), 15) self.assertEqual(round(models.median_3d_dist(3, 20, cluster=1, plot=False) / 200, 0), 8) self.assertEqual(round(models.median_3d_dist(7, 10, models=range(5), plot=False), 0), 250) # accessibility models.accessibility(radius=75, nump=10, plot=False, savedata="model.acc") vals = [l.split() for l in open("model.acc").readlines()[1:]] self.assertEqual(vals[0][1:3], ["0.56", "0.993"]) self.assertEqual(vals[20][1:3], ["1.0", "0.0"]) # contact map models.contact_map(savedata="model.contacts") vals = [l.split() for l in open("model.contacts").readlines()[1:]] self.assertEqual(vals[0], ["0", "1", "1.0"]) self.assertEqual(vals[1], ["0", "2", "0.72"]) self.assertEqual(vals[192], ["14", "18", "0.12"]) # interactions models.interactions(plot=False, savedata="model.inter") vals = [[float(i) for i in l.split()] for l in open("model.inter").readlines()[1:]] self.assertEqual(vals[2], [3.0, 4.68, 1.23, 3.78, 0.7, 4.65, 0.87, 3.92, 0.72, 4.74, 0.57]) # walking angle models.walking_angle(savedata="model.walkang") vals = [ [round(float(i), 2) if i != "None" else i for i in l.split()] for l in open("model.walkang").readlines()[1:] ] self.assertEqual(vals[17], [18.0, -45.42, 100.0, -9.78, 135.0]) self.assertEqual(vals[3], [4.0, 124.97, 274.0, 2.05, 254.0]) self.assertEqual(vals[16], [17.0, -62.84, 201.0, -3.20, 77.0]) self.assertEqual(vals[15], [16.0, -132.38, 286.0, -12.70, 124.0]) # write cmm models.write_cmm(".", model_num=2) models.write_cmm(".", models=range(5)) models.write_cmm(".", cluster=2) # write xyz models.write_xyz(".", model_num=2) models.write_xyz(".", models=range(5)) models.write_xyz(".", cluster=2) # write json models.write_json("model.json", model_num=2) models.write_json("model.json", models=range(5)) models.write_json("model.json", cluster=2) # clean system("rm -f model.*") system("rm -rf lala*") if CHKTIME: print "15", time() - t0
def test_15_3d_modelling(self): """ """ if CHKTIME: t0 = time() models = load_structuralmodels('models.pick') models.cluster_models(method='ward', verbose=False) # density models.density_plot(savedata='lala', plot=False) lines = open('lala').readlines() self.assertEqual(len(lines), 22) self.assertEqual([round(float(i), 1) if i != 'nan' else i for i in lines[1].split('\t')[:3]], [1.0, 'nan', 'nan']) self.assertEqual([round(float(i), 1) for i in lines[15].split('\t')[:3]], [15, 100.0, 100.0]) # contacts cmap = models.get_contact_matrix(cutoff=300) self.assertEqual(round( round(sum([i if i >=0 else 0 for i in reduce(lambda x, y: x+y, cmap)])/10, 0), 3), 8) # define best models models.define_best_models(10) self.assertEqual(len(models), 10) m1 = models[9] models.define_best_models(25) self.assertEqual(len(models), 25) self.assertEqual(m1, models[9]) # correlation corr, pval = models.correlate_with_real_data(cutoff=300) self.assertTrue(0.6 <= round(corr, 1) <= 0.7) self.assertEqual(round(pval, 4), round(0, 4)) # consistency models.model_consistency(cutoffs=(50, 100, 150, 200), plot=False, savedata='lala') lines = open('lala').readlines() self.assertEqual(len(lines), 22) self.assertEqual([round(float(i)/15, 0) for i in lines[1].split('\t')], [0, 2, 2, 3, 4]) self.assertEqual([round(float(i)/15, 0) for i in lines[15].split('\t')], [1, 5, 6, 7, 7]) # measure angle self.assertTrue(13 <= round(models.angle_between_3_particles(2,8,15)/10, 0) <= 14) self.assertEqual(round(models.angle_between_3_particles(19,20,21), 0), 60) self.assertEqual(round(models.angle_between_3_particles(15,14,11)/5, 0), 14) # coordinates self.assertEqual([round(x, 2) for x in models.particle_coordinates(15)], [3199.84, 4361.61, -4695.41]) # dihedral_angle self.assertTrue (round(models.dihedral_angle(2,8,15, 16) , 2), -13.44) self.assertEqual(round(models.dihedral_angle(15,19,20,21) , 2), 64.26 ) self.assertEqual(round(models.dihedral_angle(15,14,11, 12), 2), 8.95 ) # median distance self.assertEqual(round(models.median_3d_dist(3, 20, plot=False)/100, 0), 15) self.assertEqual(round(models.median_3d_dist(3, 20, cluster=1, plot=False)/200, 0), 8) self.assertEqual(round(models.median_3d_dist(7, 10, models=range(5), plot=False), 0), 250) # accessibility models.accessibility(radius=75, nump=10, plot=False, savedata='model.acc') vals = [l.split() for l in open('model.acc').readlines()[1:]] self.assertEqual(vals[0][1:3], ['0.560', '0.993']) self.assertEqual(vals[20][1:3], ['1.000', '0.000']) # contact map models.contact_map(savedata='model.contacts') vals = [l.split() for l in open('model.contacts').readlines()[1:]] self.assertEqual(vals[0], ['0', '1', '1.0']) self.assertEqual(vals[1], ['0', '2', '0.72']) self.assertEqual(vals[192], ['14', '18', '0.12']) # interactions models.interactions(plot=False, savedata='model.inter') vals = [[float(i) for i in l.split()] for l in open('model.inter').readlines()[1:]] self.assertEqual(vals[2], [3.0, 4.68, 1.23, 3.78, 0.7, 4.65, 0.87, 3.92, 0.72, 4.74, 0.57]) # walking angle models.walking_angle(savedata='model.walkang') vals = [[round(float(i), 2) if i != 'None' else i for i in l.split()] for l in open('model.walkang').readlines()[1:]] self.assertEqual(vals[0], [1.0, 137.06, 'None'],) self.assertEqual(vals[14], [15.0, -49.65, 'None'],) self.assertEqual(vals[13], [14.0, -101.41, 'None']) self.assertEqual(vals[12], [13.0, 150.45, -0.2]) # write cmm models.write_cmm('.', model_num=2) models.write_cmm('.', models=range(5)) models.write_cmm('.', cluster=2) # write xyz models.write_xyz('.', model_num=2) models.write_xyz('.', models=range(5)) models.write_xyz('.', cluster=2) # write json models.write_json('model.json', model_num=2) models.write_json('model.json', models=range(5)) models.write_json('model.json', cluster=2) # clean system('rm -f model.*') system('rm -rf lala*') if CHKTIME: print '15', time() - t0
def model_region(exp, optpar, opts, name): """ generate structural models """ beg, end = opts.beg or 1, opts.end or exp.size zscores, values, zeros = exp._sub_experiment_zscore(beg, end) tmp_name = ''.join([letters[int(random()*52)]for _ in xrange(50)]) tmp = open('_tmp_zscore_' + tmp_name, 'w') dump([zscores, values, zeros, optpar, beg, end], tmp) tmp.close() tmp = open('_tmp_opts_' + tmp_name, 'w') dump(opts, tmp) tmp.close() tmp = open('_tmp_model_' + tmp_name + '.py', 'w') tmp.write(''' from cPickle import load, dump from pytadbit.imp.imp_modelling import generate_3d_models import os tmp_name = "%s" zscore_file = open("_tmp_zscore_" + tmp_name) zscores, values, zeros, optpar, beg, end = load(zscore_file) zscore_file.close() opts_file = open("_tmp_opts_" + tmp_name) opts = load(opts_file) opts_file.close() nloci = end - beg + 1 coords = {"crm" : opts.crm, "start": opts.beg, "end" : opts.end} zeros = tuple([i not in zeros for i in xrange(end - beg + 1)]) models= generate_3d_models(zscores, opts.res, nloci, values=values, n_models=opts.nmodels_mod, n_keep=opts.nkeep_mod, n_cpus=opts.ncpus, keep_all=True, first=0, container=opts.container, config=optpar, verbose=0.5, coords=coords, zeros=zeros) # Save models models.save_models( os.path.join(opts.outdir, "%s", "%s" + ".models")) ''' % (tmp_name, name, name)) tmp.close() constraints = Popen("python _tmp_model_%s.py" % tmp_name, shell=True, stdout=PIPE).communicate()[0] if "constraints" in opts.analyze: out = open(os.path.join(opts.outdir, name, name + '_constraints.txt'), 'w') out.write(constraints) out.close() os.system('rm -f _tmp_zscore_%s' % (tmp_name)) os.system('rm -f _tmp_model_%s.py' % (tmp_name)) os.system('rm -f _tmp_opts_%s' % (tmp_name)) models = load_structuralmodels( os.path.join(opts.outdir, name, name + '.models')) models.experiment = exp coords = {"crm" : opts.crm, "start": opts.beg, "end" : opts.end} crm = exp.crm description = {'identifier' : exp.identifier, 'chromosome' : coords['crm'], 'start' : (exp.resolution * coords['start']) if coords['start'] else None, 'end' : (exp.resolution * coords['end']) if coords['end' ] else None, 'species' : crm.species, 'cell type' : exp.cell_type, 'experiment type': exp.exp_type, 'resolution' : exp.resolution, 'assembly' : crm.assembly} for desc in exp.description: description[desc] = exp.description[desc] for desc in crm.description: description[desc] = exp.description[desc] for i, m in enumerate([m for m in models] + models._bad_models.values()): m['index'] = i m['description'] = description models.description = description return models