Beispiel #1
0
    def test_16_models_stats(self):
        if ONLY and not "16" in ONLY:
            return
        if CHKTIME:
            t0 = time()

        models = load_structuralmodels(PATH + "/models.pick")
        # write cmm
        models.write_cmm(".", model_num=2)
        model = load_impmodel_from_cmm("model.%s.cmm" % models[2]["rand_init"])
        # clean
        system("rm -f model.*")
        # stats
        self.assertEqual(200, round(model.distance(2, 3), 0))
        self.assertTrue(9 <= round(model.distance(8, 20) / 100, 0) <= 10)
        self.assertEqual(round(30, 0), round(model.radius_of_gyration() / 20,
                                             0))
        self.assertEqual(400, round(model.contour() / 10, 0))
        self.assertTrue(
            21 <= round((model.shortest_axe() + model.longest_axe()) /
                        100, 0) <= 22)
        self.assertEqual([15, 16], model.inaccessible_particles(1000))

        acc, num, acc_area, tot_area, bypt = model.accessible_surface(
            150, superradius=200, nump=150)
        self.assertTrue(210 <= acc <= 240)
        self.assertTrue(500 <= num <= 600)
        self.assertEqual(0.4, round(acc_area, 1))
        self.assertEqual(4, round(tot_area, 0))
        self.assertEqual(101, len(bypt))
        self.assertTrue(19 <= bypt[100][0] <= 22 and 8 <= bypt[100][1] <= 38
                        and 8 <= bypt[100][2] <= 23)
        if CHKTIME:
            print "16", time() - t0
Beispiel #2
0
    def test_14_3d_clustering(self):
        """
        """
        if ONLY and not "14" in ONLY:
            return
        if CHKTIME:
            t0 = time()

        models = load_structuralmodels(PATH + "/models.pick")
        if find_executable("mcl"):
            models.cluster_models(method="mcl",
                                  fact=0.9,
                                  verbose=False,
                                  dcutoff=200)
            self.assertTrue(5 <= len(models.clusters.keys()) <= 7)
        models.cluster_models(method="ward", verbose=False, dcutoff=200)
        self.assertTrue(2 <= len(models.clusters.keys()) <= 3)
        d = models.cluster_analysis_dendrogram()
        self.assertEqual(d["icoord"], [[5., 5., 15., 15.]])
        # align models
        m1, m2 = models.align_models(models=[1, 2])
        nrmsd = (sum([((m1[0][i] - m2[0][i])**2 + (m1[1][i] - m2[1][i])**2 +
                       (m1[2][i] - m2[2][i])**2)**.5
                      for i in xrange(len(m1[0]))]) / (len(m1[0])))
        self.assertTrue(nrmsd < 160)
        # fetching models
        models.define_best_models(5)
        m = models.fetch_model_by_rand_init("1", all_models=True)
        self.assertEqual(m, 8)
        models.define_best_models(25)
        m = models.fetch_model_by_rand_init("1", all_models=False)
        self.assertEqual(m, 8)
        if CHKTIME:
            print "14", time() - t0
Beispiel #3
0
    def test_14_3d_clustering(self):
        """
        """
        if ONLY and ONLY != '14':
            return
        if CHKTIME:
            t0 = time()

        models = load_structuralmodels('models.pick')
        if find_executable('mcl'):
            models.cluster_models(method='mcl', fact=0.9, verbose=False,
                                  dcutoff=200)
            self.assertTrue(5 <= len(models.clusters.keys()) <= 7)
        models.cluster_models(method='ward', verbose=False, dcutoff=200)
        self.assertTrue(2 <= len(models.clusters.keys()) <= 3)
        d = models.cluster_analysis_dendrogram()
        self.assertEqual(d['icoord'], [[5., 5., 15., 15.]])
        # align models
        m1, m2 = models.align_models(models=[1,2])
        nrmsd = (sum([((m1[0][i] - m2[0][i])**2 + (m1[1][i] - m2[1][i])**2 + (m1[2][i] - m2[2][i])**2)**.5
                      for i in xrange(len(m1[0]))]) / (len(m1[0])))
        self.assertTrue(nrmsd < 160)
        # fetching models
        models.define_best_models(5)
        m = models.fetch_model_by_rand_init('1', all_models=True)
        self.assertEqual(m, 6)
        models.define_best_models(25)
        m = models.fetch_model_by_rand_init('1', all_models=False)
        self.assertEqual(m, 6)
        if CHKTIME:
            print '14', time() - t0
Beispiel #4
0
def main():
    args = getArgs()
    sample = args.i
    output = args.o

    models = load_structuralmodels(sample)

    #        models.align_models(in_place=True)
    #        models.deconvolve(fact=0.6, dcutoff=1000, represent_models='best', n_best_clusters=5)

    #    getHeatMap(models, output)
    #    clusterModels(models, output)
    viewModels(models, output)
Beispiel #5
0
def main():
    args = getArgs()
    sample = args.i
    output = args.o
    s = args.s
    e = args.e

    models = load_structuralmodels(sample)
    models.median_3d_dist(s,
                          e,
                          plot=True,
                          savefig=output + ".distance-" + str(s) + "-" +
                          str(e) + ".png")
Beispiel #6
0
    def test_15_3d_modelling(self):
        """
        """
        if ONLY and "15" not in ONLY:
            return
        if CHKTIME:
            t0 = time()

        models = load_structuralmodels(PATH + "/models.pick")
        models.cluster_models(method="ward", verbose=False)
        # density
        models.density_plot(savedata="lala", plot=False)
        lines = open("lala").readlines()
        self.assertEqual(len(lines), 22)
        self.assertEqual([
            round(float(i), 1) if i != "nan" else i
            for i in lines[1].split("\t")[:3]
        ], [1.0, "nan", "nan"])
        self.assertEqual(
            [round(float(i), 1) for i in lines[15].split("\t")[:3]],
            [15, 100.0, 0.0])
        # contacts
        cmap = models.get_contact_matrix(cutoff=300)
        self.assertEqual(
            round(
                round(
                    sum([
                        i if i >= 0 else 0
                        for i in reduce(lambda x, y: x + y, cmap)
                    ]) / 10, 0), 3), 8)
        # define best models
        models.define_best_models(10)
        self.assertEqual(len(models), 10)
        m1 = models[9]
        models.define_best_models(25)
        self.assertEqual(len(models), 25)
        self.assertEqual(m1, models[9])
        # correlation
        corr, pval = models.correlate_with_real_data(cutoff=300)
        self.assertTrue(0.5 <= round(corr, 1) <= 0.7)
        self.assertEqual(round(pval, 3), round(0, 3))
        # consistency
        models.model_consistency(cutoffs=(50, 100, 150, 200),
                                 plot=False,
                                 savedata="lala")
        lines = open("lala").readlines()
        self.assertEqual(len(lines), 22)
        self.assertEqual(
            [round(float(i) / 15, 0) for i in lines[1].split("\t")],
            [0, 2, 2, 3, 4])
        self.assertEqual(
            [round(float(i) / 15, 0) for i in lines[15].split("\t")],
            [1, 5, 6, 7, 7])
        # measure angle
        self.assertTrue(
            13 <= round(models.angle_between_3_particles(2, 8, 15) /
                        10, 0) <= 14)
        self.assertEqual(
            round(models.angle_between_3_particles(19, 20, 21), 0), 60)
        self.assertEqual(
            round(models.angle_between_3_particles(15, 14, 11) / 5, 0), 14)
        # dihedral_angle
        self.assertTrue(
            round(models.dihedral_angle(2, 8, 15, 8, 16, [0])[0], 2), -13.44)
        self.assertEqual(
            round(models.dihedral_angle(15, 19, 20, 19, 21, [0])[0], 0), 76)
        self.assertEqual(
            round(models.dihedral_angle(15, 14, 11, 14, 12, [0])[0], 2), 2.07)
        # median distance
        self.assertEqual(
            round(models.median_3d_dist(3, 20, plot=False) / 100, 0), 15)
        self.assertEqual(
            round(
                models.median_3d_dist(3, 20, cluster=1, plot=False) / 200, 0),
            8)
        self.assertEqual(
            round(models.median_3d_dist(7, 10, models=range(5), plot=False),
                  0), 250)
        # accessibility
        models.accessibility(radius=75,
                             nump=10,
                             plot=False,
                             savedata="model.acc")
        vals = [l.split() for l in open("model.acc").readlines()[1:]]
        self.assertEqual(vals[0][1:3], ["0.56", "0.993"])
        self.assertEqual(vals[20][1:3], ["1.0", "0.0"])
        # contact map
        models.contact_map(savedata="model.contacts")
        vals = [l.split() for l in open("model.contacts").readlines()[1:]]
        self.assertEqual(vals[0], ["0", "1", "1.0"])
        self.assertEqual(vals[1], ["0", "2", "0.72"])
        self.assertEqual(vals[192], ["14", "18", "0.12"])
        # interactions
        models.interactions(plot=False, savedata="model.inter")
        vals = [[float(i) for i in l.split()]
                for l in open("model.inter").readlines()[1:]]
        self.assertEqual(
            vals[2],
            [3.0, 4.68, 1.23, 3.78, 0.7, 4.65, 0.87, 3.92, 0.72, 4.74, 0.57])
        # walking angle
        models.walking_angle(savedata="model.walkang")
        vals = [[round(float(i), 2) if i != "None" else i for i in l.split()]
                for l in open("model.walkang").readlines()[1:]]
        self.assertEqual(
            vals[17],
            [18.0, -45.42, 100.0, -9.78, 135.0],
        )
        self.assertEqual(
            vals[3],
            [4.0, 124.97, 274.0, 2.05, 254.0],
        )
        self.assertEqual(vals[16], [17.0, -62.84, 201.0, -3.2, 77.0])
        self.assertEqual(vals[15], [16.0, -132.38, 286.0, -12.7, 124.0])
        # write cmm
        models.write_cmm(".", model_num=2)
        models.write_cmm(".", models=range(5))
        models.write_cmm(".", cluster=2)
        # write xyz
        models.write_xyz(".", model_num=2)
        models.write_xyz(".", models=range(5))
        models.write_xyz(".", cluster=2)
        # write json
        models.write_json("model.json", model_num=2)
        models.write_json("model.json", models=range(5))
        models.write_json("model.json", cluster=2)
        # clean
        system("rm -f model.*")
        system("rm -rf lala*")
        if CHKTIME:
            print "15", time() - t0
Beispiel #7
0
    def test_13_3d_modelling_centroid(self):  #model with no optimisation
        """
        quick test to generate 3D coordinates from 3? simple models???
        """
        if ONLY and not "13" in ONLY:
            return
        if CHKTIME:
            t0 = time()

        try:
            __import__("IMP")
        except ImportError:
            warn("IMP not found, skipping test\n")
            return
        test_chr = Chromosome(name="Test Chromosome", max_tad_size=260000)
        test_chr.add_experiment("exp1",
                                20000,
                                tad_def=exp4,
                                hic_data=PATH + "/20Kb/chrT/chrT_D.tsv",
                                silent=True)
        exp = test_chr.experiments[0]
        exp.load_hic_data(PATH + "/20Kb/chrT/chrT_A.tsv", silent=True)
        exp.filter_columns(silent=True)
        exp.normalize_hic(silent=True, factor=None)
        models = exp.model_region(51,
                                  71,
                                  n_models=40,
                                  n_keep=25,
                                  n_cpus=4,
                                  config={
                                      'kforce': 5,
                                      'maxdist': 500,
                                      'scale': 0.01,
                                      'kbending': 0.0,
                                      'upfreq': 1.0,
                                      'lowfreq': -0.6
                                  })
        #models.save_models('models.pick')

        avg = models.average_model()
        nmd = len(models)
        dev = rmsdRMSD_wrapper([models[m]["x"]
                                for m in xrange(nmd)] + [avg["x"]],
                               [models[m]["y"]
                                for m in xrange(nmd)] + [avg["y"]],
                               [models[m]["z"]
                                for m in xrange(nmd)] + [avg["z"]],
                               models._zeros, models.nloci, 200,
                               range(len(models) + 1),
                               len(models) + 1, int(False), "rmsd", 0)
        centroid = models[models.centroid_model()]
        # find closest
        model = min([(k, dev[(k, nmd)]) for k in range(nmd)],
                    key=lambda x: x[1])[0]
        self.assertEqual(centroid["rand_init"], models[model]["rand_init"])

        refmodels = load_structuralmodels(PATH + "/models.pick")
        refrestraints = refmodels._restraints
        refrestraints = dict(
            (r, (refrestraints[r][0], round(refrestraints[r][1], 2),
                 round(refrestraints[r][2], 2))) for r in refrestraints)
        restraints = models._restraints
        restraints = dict((r, (restraints[r][0], round(restraints[r][1], 2),
                               round(restraints[r][2], 2)))
                          for r in restraints)
        self.assertEqual(refrestraints, restraints)
        if CHKTIME:
            print "13", time() - t0
Beispiel #8
0
    def test_15_3d_modelling(self):
        """
        """
        if ONLY and ONLY != '15':
            return
        if CHKTIME:
            t0 = time()

        models = load_structuralmodels('models.pick') 
        models.cluster_models(method='ward', verbose=False)
        # density
        models.density_plot(savedata='lala', plot=False)
        lines = open('lala').readlines()
        self.assertEqual(len(lines), 22)
        self.assertEqual([round(float(i), 1) if i != 'nan' else i for i in lines[1].split('\t')[:3]],
                         [1.0, 'nan', 'nan'])
        self.assertEqual([round(float(i), 1) for i in lines[15].split('\t')[:3]],
                         [15, 100.0, 0.0])
        # contacts
        cmap = models.get_contact_matrix(cutoff=300)
        self.assertEqual(round(
            round(sum([i if i >=0 else 0 for i in
                       reduce(lambda x, y: x+y, cmap)])/10, 0),
            3), 8)
        # define best models
        models.define_best_models(10)
        self.assertEqual(len(models), 10)
        m1 = models[9]
        models.define_best_models(25)
        self.assertEqual(len(models), 25)
        self.assertEqual(m1, models[9])
        # correlation
        corr, pval = models.correlate_with_real_data(cutoff=300)
        self.assertTrue(0.6 <= round(corr, 1) <= 0.7)
        self.assertEqual(round(pval, 4), round(0, 4))
        # consistency
        models.model_consistency(cutoffs=(50, 100, 150, 200), plot=False,
                                 savedata='lala')
        lines = open('lala').readlines()
        self.assertEqual(len(lines), 22)
        self.assertEqual([round(float(i)/15, 0) for i in lines[1].split('\t')],
                         [0, 2, 3, 3, 3])
        self.assertEqual([round(float(i)/15, 0) for i in lines[15].split('\t')],
                         [1, 5, 6, 7, 7])
        # measure angle
        self.assertTrue(13 <= round(models.angle_between_3_particles(2,8,15)/10,
                                    0) <= 14)
        self.assertEqual(round(models.angle_between_3_particles(19,20,21), 0),
                         60)
        self.assertEqual(round(models.angle_between_3_particles(15,14,11)/5, 0),
                         13)
        # coordinates
        self.assertEqual([round(x, 2) for x in models.particle_coordinates(15)],
                         [2098.32, 1565.63, -4319.62])
        # dihedral_angle
        self.assertTrue (round(models.dihedral_angle(2 ,  8, 15,  8, 16, [0])[0], 2), -13.44)
        self.assertEqual(round(models.dihedral_angle(15, 19, 20, 19, 21, [0])[0], 2),  76.24)
        self.assertEqual(round(models.dihedral_angle(15, 14, 11, 14, 12, [0])[0], 2),   0.07)
        # median distance
        self.assertEqual(round(models.median_3d_dist(3, 20, plot=False)/100, 0),
                         15)
        self.assertEqual(round(models.median_3d_dist(3, 20, cluster=1,
                                                     plot=False)/200, 0), 8)
        self.assertEqual(round(models.median_3d_dist(7, 10, models=range(5),
                                                     plot=False), 0), 250)
        # accessibility
        models.accessibility(radius=75, nump=10, plot=False, savedata='model.acc')
        vals = [l.split() for l in open('model.acc').readlines()[1:]]
        self.assertEqual(vals[0][1:3], ['0.68', '0.933'])
        self.assertEqual(vals[20][1:3], ['1.0', '0.0'])
        # contact map
        models.contact_map(savedata='model.contacts')
        vals = [l.split() for l in open('model.contacts').readlines()[1:]]
        self.assertEqual(vals[0], ['0', '1', '1.0'])
        self.assertEqual(vals[1], ['0', '2', '0.96'])
        self.assertEqual(vals[192], ['14', '18', '0.12'])
        # interactions
        models.interactions(plot=False, savedata='model.inter')
        vals = [[float(i) for i in l.split()] for l in open('model.inter').readlines()[1:]]
        self.assertEqual(vals[2], [3.0, 4.92, 1.12, 3.88, 0.65, 4.69, 0.82, 4.01, 0.62, 4.81, 0.5])
        # walking angle
        models.walking_angle(savedata='model.walkang')
        vals = [[round(float(i), 2) if i != 'None' else i for i in l.split()] for l in open('model.walkang').readlines()[1:]]
        self.assertEqual(vals[17], [18.0, -45.42, 100.0, -14.14, 137.0],)
        self.assertEqual(vals[3],  [4.0, 125.36, 273.0, 1.96, 253.0],)
        self.assertEqual(vals[16], [17.0, -70.14, 200.0, -2.5, 84.0])
        self.assertEqual(vals[15], [16.0, -134.88, 287.0, -20.48, 121.0])
        # write cmm
        models.write_cmm('.', model_num=2)
        models.write_cmm('.', models=range(5))
        models.write_cmm('.', cluster=2)
        # write xyz
        models.write_xyz('.', model_num=2)
        models.write_xyz('.', models=range(5))
        models.write_xyz('.', cluster=2)
        # write json
        models.write_json('model.json', model_num=2)
        models.write_json('model.json', models=range(5))
        models.write_json('model.json', cluster=2)
        # clean
        system('rm -f model.*')
        system('rm -rf lala*')
        if CHKTIME:
            print '15', time() - t0
Beispiel #9
0
def main():
    """
    main function
    """
    opts = get_options()
    nmodels_opt, nkeep_opt, ncpus = (int(opts.nmodels_opt),
                                     int(opts.nkeep_opt), int(opts.ncpus))
    nmodels_mod, nkeep_mod = int(opts.nmodels_mod), int(opts.nkeep_mod)
    if opts.xname:
        xnames = opts.xname
    elif opts.data[0]:
        xnames = [os.path.split(d)[-1] for d in opts.data]
    else:
        xnames = [os.path.split(d)[-1] for d in opts.norm]

    name = '{0}_{1}_{2}'.format(opts.crm, opts.beg, opts.end)

    ############################################################################
    ############################  LOAD HI-C DATA  ##############################
    ############################################################################

    if not opts.analyze_only:
        crm = load_hic_data(opts, xnames)

    ############################################################################
    ##########################  SEARCH TADs PARAMETERS #########################
    ############################################################################
    if opts.tad and not opts.analyze_only:
        search_tads(opts, crm, name)
        
    # Save the chromosome
    # Chromosomes can later on be loaded to avoid re-reading the original
    # matrices. See function "load_chromosome".
    if not opts.tad_only and not opts.analyze_only:
        # Sum all experiments into a new one
        if len(xnames) > 1:
            logging.info("\tSumming experiments %s...", ' + '.join(xnames))
            exp = crm.experiments[0] + crm.experiments[1]
            for i in range(2, len(xnames)):
                exp += crm.experiments[i]
            crm.add_experiment(exp)
        else:
            exp = crm.experiments[0]

    if  not opts.tad_only and not opts.analyze_only:
        exp.filter_columns(draw_hist="column filtering" in opts.analyze,
                           perc_zero=opts.filt, savefig=os.path.join(
                               opts.outdir, name ,
                               name + '_column_filtering.pdf'),
                           diagonal=not opts.nodiag)
    if (not opts.tad_only and "column filtering" in opts.analyze
        and not opts.analyze_only):
        out = open(os.path.join(opts.outdir, name ,
                                name + '_column_filtering.dat'), 'w')
        out.write('# particles not considered in the analysis\n' +
                  '\n'.join(map(str, sorted(exp._zeros.keys()))))

    if not opts.analyze_only:
        logging.info("\tSaving the chromosome...")
        crm.save_chromosome(os.path.join(opts.outdir, name,
                                         '{0}.tdb'.format(name)),
                            force=True)
    if opts.tad_only:
        exit()

    ############################################################################
    #######################  LOAD OPTIMAL IMP PARAMETERS #######################
    ############################################################################

    if not opts.analyze_only:
        results = load_optimal_imp_parameters(opts, name, exp)
        
    ############################################################################
    #########################  OPTIMIZE IMP PARAMETERS #########################
    ############################################################################

    if not opts.analyze_only:
        optpar = optimize(results, opts, name)

    ############################################################################
    ##############################  MODEL REGION ###############################
    ############################################################################

    # if models are already calculated and we just want to load them
    if opts.analyze_only:
        ########################################################################
        # function for loading models
        try:
            models = load_structuralmodels(
                os.path.join(opts.outdir, name, name + '.models'))
        except IOError:
            pass
        ########################################################################
    else:
        # Build 3D models based on the HiC data.
        logging.info("\tModeling (this can take long)...")
        models = model_region(exp, optpar, opts, name)
        for line in repr(models).split('\n'):
            logging.info(line)


    dcutoff = int(models._config['dcutoff'] *
                  models._config['scale']   *
                  models.resolution)
    ############################################################################
    ##############################  ANALYZE MODELS #############################
    ############################################################################
    
    if "correlation real/models" in opts.analyze:
        # Calculate the correlation coefficient between a set of kept models and
        # the original HiC matrix
        logging.info("\tCorrelation with data...")
        rho, pval = models.correlate_with_real_data(
            cutoff=dcutoff,
            savefig=os.path.join(opts.outdir, name,
                                 name + '_corre_real.pdf'),
            plot=True)
        logging.info("\t Correlation coefficient: %s [p-value: %s]", rho, pval)

    if "z-score plot" in opts.analyze:
        # zscore plots
        logging.info("\tZ-score plot...")
        models.zscore_plot(
            savefig=os.path.join(opts.outdir, name, name + '_zscores.pdf'))

    # Cluster models based on structural similarity
    logging.info("\tClustering all models into sets of structurally similar" +
                 " models...")
    ffact    = 0.95 # Fraction of particles that are within the dcutoff value
    clcutoff = dcutoff - 50 # RMSD cut-off to consider two models equivalent(nm)
    for ffact in [0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6, 0.55, 0.5]:
        logging.info('   fact = ' + str(ffact))
        for clcutoff in [dcutoff / 2 , dcutoff, dcutoff * 1.5]:
            try:
                logging.info('      cutoff = ' + str(clcutoff))
                models.cluster_models(fact=ffact, dcutoff=clcutoff,
                                      n_cpus=int(opts.ncpus))
                break
            except:
                continue
        else:
            continue
        break
    logging.info("\tSaving again the models this time with clusters...")
    models.save_models(os.path.join(opts.outdir, name, name + '.models'))
    # Plot the clustering
    try:
        models.cluster_analysis_dendrogram(
            color=True, savefig=os.path.join(
                opts.outdir, name, name + '_clusters.pdf'))
    except:
        logging.info("\t\tWARNING: plot for clusters could not be made...")

    if not opts.not_write_json:
        models.write_json(os.path.join(opts.outdir, name, name + '.json'))

    if not (opts.not_write_xyz and opts.not_write_cmm):
        # Save the clustered models into directories for easy visualization with
        # Chimera (http://www.cgl.ucsf.edu/chimera/)
        # Move into the cluster directory and run in the prompt
        # "chimera cl_1_superimpose.cmd"
        logging.info("\t\tWriting models, list and chimera files...")
        for cluster in models.clusters:
            logging.info("\t\tCluster #{0} has {1} models {2}".format(
                cluster, len(models.clusters[cluster]),
                models.clusters[cluster]))
            if not os.path.exists(os.path.join(
                opts.outdir, name, 'models', 'cl_' + str(cluster))):
                os.makedirs(os.path.join(
                    opts.outdir, name, 'models', 'cl_' + str(cluster)))
            if not opts.not_write_xyz:
                models.write_xyz(directory=os.path.join(
                    opts.outdir, name, 'models', 'cl_' + str(cluster)),
                                 cluster=cluster)
            if not opts.not_write_cmm:
                models.write_cmm(directory=os.path.join(
                    opts.outdir, name, 'models', 'cl_' + str(cluster)),
                                 cluster=cluster)
            # Write list file
            clslstfile = os.path.join(
                opts.outdir, name,
                'models', 'cl_{}.lst'.format(str(cluster)))
            out = open(clslstfile,'w')
            for model_n in models.clusters[cluster]:
                out.write("model.{0}\n".format(model_n))
            out.close()
            if not opts.not_write_cmm:
                # Write chimera file
                clschmfile = os.path.join(
                    opts.outdir, name, 'models',
                    'cl_{}_superimpose.cmd'.format(str(cluster)))
                out = open(clschmfile, 'w')
                out.write("open " + " ".join(["cl_{0}/model.{1}.cmm".format(
                    cluster, model_n) for model_n in models.clusters[cluster]]))
                out.write("\nlabel; represent wire; ~bondcolor\n")
                for i in range(1, len(models.clusters[cluster]) + 1):
                    out.write("match #{0} #0\n".format(i-1))
                out.close()
        # same with singletons
        singletons = [m['rand_init'] for m in models if m['cluster']=='Singleton']
        logging.info("\t\tSingletons has %s models %s", len(singletons),
                     singletons)
        if not os.path.exists(os.path.join(
            opts.outdir, name, 'models', 'Singletons')):
            os.makedirs(os.path.join(
                opts.outdir, name, 'models', 'Singletons'))
        if not opts.not_write_xyz:
            models.write_xyz(directory=os.path.join(
                opts.outdir, name, 'models', 'Singletons'),
                             models=singletons)
        if not opts.not_write_cmm:
            models.write_cmm(directory=os.path.join(
                opts.outdir, name, 'models', 'Singletons'),
                             models=singletons)
        # Write best model and centroid model
        models[models.centroid_model()].write_cmm(
            directory=os.path.join(opts.outdir, name, 'models'),
            filename='centroid.cmm')
        models[models.centroid_model()].write_cmm(
            directory=os.path.join(opts.outdir, name, 'models'),
            filename='centroid.xyz')
        models[0].write_cmm(
            directory=os.path.join(opts.outdir, name, 'models'),
            filename='best.cmm')
        models[0].write_xyz(
            directory=os.path.join(opts.outdir, name, 'models'),
            filename='best.xyz')
        # Write list file
        clslstfile = os.path.join(
            opts.outdir, name, 'models', 'Singletons.lst')
        out = open(clslstfile,'w')
        for model_n in singletons:
            out.write("model.{0}\n".format(model_n))
        out.close()
        if not opts.not_write_cmm:
            # Write chimera file
            clschmfile = os.path.join(
                opts.outdir, name, 'models', 'Singletons_superimpose.cmd')
            out = open(clschmfile, 'w')
            out.write("open " + " ".join(["Singletons/model.{0}.cmm".format(
                model_n) for model_n in singletons]))
            out.write("\nlabel; represent wire; ~bondcolor\n")
            for i in range(1, len(singletons) + 1):
                out.write("match #{0} #0\n".format(i-1))
            out.close()

    if "objective function" in opts.analyze:
        logging.info("\tPlotting objective function decay for vbest model...")
        models.objective_function_model(
            0, log=True, smooth=False,
            savefig=os.path.join(opts.outdir, name, name + '_obj-func.pdf'))
        
    if "centroid" in opts.analyze:
        # Get the centroid model of cluster #1
        logging.info("\tGetting centroid...")
        centroid = models.centroid_model(cluster=1)
        logging.info("\t\tThe model centroid (closest to the average) " +
                     "for cluster 1 is: {}".format(centroid))

    if "consistency" in opts.analyze:
        # Calculate a consistency plot for all models in cluster #1
        logging.info("\tGetting consistency data...")
        models.model_consistency(
            cluster=1, cutoffs=range(50, dcutoff + 50, 50),
            savefig =os.path.join(opts.outdir, name,
                                  name + '_consistency.pdf'),
            savedata=os.path.join(opts.outdir, name,
                                  name + '_consistency.dat'))

    if "density" in opts.analyze:
        # Calculate a DNA density plot
        logging.info("\tGetting density data...")
        models.density_plot(
            error=True, steps=(1,3,5,7),
            savefig =os.path.join(opts.outdir, name, name + '_density.pdf'),
            savedata=os.path.join(opts.outdir, name, name + '_density.dat'))

    if "contact map" in opts.analyze:
        # Get a contact map at cut-off of 150nm for cluster #1
        logging.info("\tGetting a contact map...")
        models.contact_map(
            cluster=1, cutoff=dcutoff,
            savedata=os.path.join(opts.outdir, name, name + '_contact.dat'))

    if "walking angle" in opts.analyze:
        # Get Dihedral angle plot for cluster #1
        logging.info("\tGetting angle data...")
        models.walking_angle(
            cluster=1, steps=(1,5),
            savefig = os.path.join(opts.outdir, name, name + '_wang.pdf'),
            savedata= os.path.join(opts.outdir, name, name + '_wang.dat'))

    if "persistence length" in opts.analyze:
        # Get persistence length of all models
        logging.info("\tGetting persistence length data...")
        pltfile = os.path.join(opts.outdir, name, name + '_pL.dat')
        f = open(pltfile,'w')
        f.write('#Model_Number\tpL\n')
        for model in models:
            try:
                f.write('%s\t%.2f\n' % (model["rand_init"],
                                        model.persistence_length()))
            except:
                sys.stderr.write('WARNING: failed to compute persistence ' +
                     'length for model %s' % model["rand_init"])

    if "accessibility" in opts.analyze:
        # Calculate a DNA density plot
        logging.info("\tGetting accessibility data...")
        radius = 75   # Radius of an object to calculate accessibility
        nump   = 30   # number of particles (resolution)
        logging.info("\tGetting accessibility data (this can take long)...")
        models.accessibility(radius, nump=nump,
            error=True, 
            savefig =os.path.join(opts.outdir, name, name + '_accessibility.pdf'),
            savedata=os.path.join(opts.outdir, name, name + '_accessibility.dat'))

    # if "accessibility" in opts.analyze:
    #     # Get accessibility of all models
    #     radius = 75   # Radius of an object to calculate accessibility
    #     nump   = 30   # number of particles (resolution)
    #     logging.info("\tGetting accessibility data (this can take long)...")
    #     if not os.path.exists(
    #         os.path.join(opts.outdir, name, 'models', 'asa')):
    #         os.makedirs(os.path.join(opts.outdir, name, 'models', 'asa'))
    #     for model in models:
    #         by_part = model.accessible_surface(radius, nump=nump,
    #                                            include_edges=False)[4]
    #         asafile = os.path.join(opts.outdir, name, 'models',
    #                                'asa', 'model_{}.asa'.format(model['rand_init']))
    #         out = open(asafile, 'w')
    #         for part, acc, ina in by_part:
    #             try:
    #                 out.write('%s\t%.2f\n' % (part,
    #                                           100*float(acc) / (acc + ina)))
    #             except ZeroDivisionError:
    #                 out.write('%s\t%s\n' % (part, 'nan'))
    #         out.close()

    if "interaction" in opts.analyze:
        # Get interaction data of all models at 200 nm cut-off
        logging.info("\tGetting interaction data...")
        models.interactions(
            cutoff=dcutoff, steps=(1,3,5),
            savefig =os.path.join(opts.outdir, name,
                                  name + '_interactions.pdf'),
            savedata=os.path.join(opts.outdir, name,
                                  name + '_interactions.dat'),
            error=True)
Beispiel #10
0
def model_region(exp, optpar, opts, name):
    """
    generate structural models
    """
    beg, end = opts.beg or 1, opts.end or exp.size
    zscores, values, zeros = exp._sub_experiment_zscore(beg, end)

    tmp_name = ''.join([letters[int(random()*52)]for _ in xrange(50)])

    tmp = open('_tmp_zscore_' + tmp_name, 'w')
    dump([zscores, values, zeros, optpar, beg, end], tmp)
    tmp.close()

    tmp = open('_tmp_opts_' + tmp_name, 'w')
    dump(opts, tmp)
    tmp.close()

    tmp = open('_tmp_model_' + tmp_name + '.py', 'w')
    tmp.write('''
from cPickle import load, dump
from pytadbit.modelling.imp_modelling import generate_3d_models
import os

tmp_name = "%s"

zscore_file = open("_tmp_zscore_" + tmp_name)
zscores, values, zeros, optpar, beg, end = load(zscore_file)
zscore_file.close()

opts_file = open("_tmp_opts_" + tmp_name)
opts = load(opts_file)
opts_file.close()

nloci = end - beg + 1
coords = {"crm"  : opts.crm,
          "start": opts.beg,
          "end"  : opts.end}

zeros = tuple([i not in zeros for i in xrange(end - beg + 1)])

models =  generate_3d_models(zscores, opts.res, nloci,
                             values=values, n_models=opts.nmodels_mod,
                             n_keep=opts.nkeep_mod,
                             n_cpus=opts.ncpus,
                             keep_all=True,
                             first=0, container=opts.container,
                             config=optpar, coords=coords, zeros=zeros)
# Save models
models.save_models(
    os.path.join(opts.outdir, "%s", "%s" + ".models"))

''' % (tmp_name, name, name))

    tmp.close()
    check_call(["python", "_tmp_model_%s.py" % tmp_name])
    os.system('rm -f _tmp_zscore_%s' % (tmp_name))
    os.system('rm -f _tmp_model_%s.py' % (tmp_name))
    os.system('rm -f _tmp_opts_%s' % (tmp_name))
    models = load_structuralmodels(
        os.path.join(opts.outdir, name, name + '.models'))
    if "constraints" in opts.analyze:
        out = open(os.path.join(opts.outdir, name, name + '_constraints.txt'),
                   'w')
        out.write('# Harmonic\tpart1\tpart2\tdist\tkforce\n')
        out.write('\n'.join(['%s\t%s\t%s\t%.1f\t%.3f' % (
            harm, p1, p2, dist, kforce)
                             for (p1, p2), (harm, dist, kforce)
                             in models._restraints.iteritems()]) + '\n')
        out.close()
    models.experiment = exp
    coords = {"crm"  : opts.crm,
              "start": opts.beg,
              "end"  : opts.end}
    crm = exp.crm
    description = {'identifier'     : exp.identifier,
                   'chromosome'     : coords['crm'],
                   'start'          : (exp.resolution * coords['start']) if coords['start'] else None,
                   'end'            : (exp.resolution * coords['end'])   if coords['end'  ] else None,
                   'species'        : crm.species,
                   'cell type'      : exp.cell_type,
                   'experiment type': exp.exp_type,
                   'resolution'     : exp.resolution,
                   'assembly'       : crm.assembly}
    for key in opts.description:
        description[key] = opts.description[key]
    for desc in exp.description:
        description[desc] = exp.description[desc]
    for desc in crm.description:
        description[desc] = exp.description[desc]
    for i, m in enumerate([m for m in models] + models._bad_models.values()):
        m['index'] = i
        m['description'] = description
    models.description = description
    return models