Esempio n. 1
0
def checkIntegrityClusteringObject(objectPath):
    """
        Test whether the found clustering object to reload is a valid object

        :param objectPath: Clustering object path
        :type objectPath: str

        :returns: bool -- True if the found clustering object is valid
    """
    try:
        utilities.readClusteringObject(objectPath)
        return True
    except EOFError:
        return False
Esempio n. 2
0
def main(epoch_num, trajectory, snapshot_num, resname, clustering_object,
         topology):
    calc = RMSDCalculator.RMSDCalculator()
    clustering_object = utilities.readClusteringObject(clustering_object)
    n_clusters = utilities.loadtxtfile(
        os.path.join(str(max(0, epoch_num - 1)), "clustering",
                     "summary.txt")).shape[0]
    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology_contents = None
    filename = glob.glob(
        os.path.join(str(epoch_num), "*traj*_%d.*" % trajectory))
    if not filename:
        raise ValueError(
            "No file with the specified epoch and trajectory found")
    try:
        snapshots = utilities.getSnapshots(filename[0],
                                           topology=topology)[snapshot_num]
    except IndexError:
        raise IndexError(
            "Snapshot number %d not found in trajectory %d for epoch %d, please check that the arguments provided are correct"
            % (snapshot_num, trajectory, epoch_num))
    pdb = atomset.PDB()
    pdb.initialise(snapshots, resname=resname, topology=topology_contents)
    for i, cluster in enumerate(clustering_object[:n_clusters]):
        dist = calc.computeRMSD(pdb, cluster.pdb)
        if dist < cluster.threshold:
            print("Snapshot belongs to cluster", i)
            return
    print("Snapshot not assigned to any cluster! :(")
Esempio n. 3
0
def main(plots, lagtimes_ITS, nRuns, nStates, plot_ITS, plot_CK, save_plot, path):
    if lagtimes_ITS is None:
        lagtimes_ITS = [1, 50, 100, 200, 400, 600, 800, 1000]
    MSM_object = estimate.MSM()
    MSM_object.lagtimes = lagtimes_ITS
    if plot_ITS and save_plot and not os.path.exists(os.path.join(path, "ck_plots")):
        os.makedirs(os.path.join(path, "ck_plots"))
    print("Analysing folder ", path)
    for i in range(nRuns):
        print("Plotting validity checks for run %d" % i)
        MSM = utilities.readClusteringObject(os.path.join(path, "MSM_object_%d.pkl" % i))
        assert len(MSM.dtrajs_full) == len(MSM.dtrajs_active)
        MSM_object.MSM_object = MSM
        MSM_object.dtrajs = MSM.dtrajs_full
        if plot_ITS and (save_plot or plots):
            MSM_object._calculateITS()
            if save_plot:
                plt.savefig(os.path.join(path, "its_%d.png" % i))
        if plot_CK and (save_plot or plots):
            # setting mlags to None chooses automatically the number of lagtimes
            # according to the longest trajectory available
            CK_test = MSM_object.MSM_object.cktest(nStates, mlags=None)
            mplt.plot_cktest(CK_test)
            if save_plot:
                plt.savefig(os.path.join(path, "ck_plots/", "ck_%d.png" % i))
        if plots:
            plt.show()
        plt.close('all')
Esempio n. 4
0
def writeStructures(clusteringObject,
                    listStructures,
                    checker=lambda x: True,
                    outputPath="cluster.pdb"):
    """
        Print all clusters in listStructures that meet the condition specified
        by the checker

        :param clusteringObject: Clustering object with clusters to print
        :type clusteringObject: :py:class:`.Clustering`
        :param checker: Lambda function with the checker that should evaluate to True for intersted structures
        :type checker: function
        :param outputPath: Output cluster pdb filename
        :type outputPath: str
    """
    clObject = utilities.readClusteringObject(clusteringObject)
    nameStructure = os.path.splitext(outputPath)
    outputName = nameStructure[0] + '_%d' + nameStructure[1]
    path = os.path.split(outputName)
    pathToWrite = path[1]
    if path[0]:
        utilities.makeFolder(path[0])
        pathToWrite = os.path.join(path[0], path[1])

    if listStructures is None or len(
            listStructures) == 0:  # If no listStructures, write all
        listStructures = range(len(clObject.clusters.clusters))

    for element in listStructures:
        cluster = clObject.clusters.clusters[element]
        if checker is None or checker(cluster):
            print("Writing", pathToWrite % element)
            cluster.writePDB(pathToWrite % element)
Esempio n. 5
0
def getWorkingClusteringObjectAndReclusterIfNecessary(
        firstRun, outputPathConstants, clusteringBlock, spawningParams,
        simulationRunner, topologies, processManager):
    """
        It reads the previous clustering method, and, if there are changes,
        it reclusters the previous trajectories. Returns the clustering object to use

        :param firstRun: New epoch to run
        :type firstRun: int
        :param outputPathConstants: Contains outputPath-related constants
        :type outputPathConstants: :py:class:`.OutputPathConstants`
        :param clusteringBlock: Contains the new clustering block
        :type clusteringBlock: json
        :param spawningParams: Spawning params, to know what reportFile and column to read
        :type spawningParams: :py:class:`.SpawningParams`
        :param topologies: Topology object containing the set of topologies needed for the simulation
        :type topologies: :py:class:`.Topology`
        :param processManager: Object to synchronize the possibly multiple processes
        :type processManager: :py:class:`.ProcessesManager`

        :returns: :py:class:`.Clustering` -- The clustering method to use in the
            adaptive sampling simulation
    """
    if not processManager.isMaster():
        for ij in range(firstRun):
            topologies.readMappingFromDisk(
                outputPathConstants.epochOutputPathTempletized % ij, ij)
        return
    lastClusteringEpoch = firstRun - 1
    clusteringObjectPath = outputPathConstants.clusteringOutputObject % (
        lastClusteringEpoch)
    oldClusteringMethod = utilities.readClusteringObject(clusteringObjectPath)

    clusteringBuilder = clustering.ClusteringBuilder()
    clusteringMethod = clusteringBuilder.buildClustering(
        clusteringBlock, spawningParams.reportFilename,
        spawningParams.reportCol)

    clusteringMethod.setProcessors(simulationRunner.getWorkingProcessors())
    if needToRecluster(oldClusteringMethod, clusteringMethod):
        utilities.print_unbuffered("Reclustering!")
        startTime = time.time()
        clusterPreviousEpochs(clusteringMethod, firstRun,
                              outputPathConstants.epochOutputPathTempletized,
                              simulationRunner, topologies,
                              outputPathConstants.allTrajsPath)
        endTime = time.time()
        utilities.print_unbuffered("Reclustering took %s sec" %
                                   (endTime - startTime))
    else:
        clusteringMethod = oldClusteringMethod
        clusteringMethod.setCol(spawningParams.reportCol)
        for ij in range(firstRun):
            topologies.readMappingFromDisk(
                outputPathConstants.epochOutputPathTempletized % ij, ij)

    return clusteringMethod
Esempio n. 6
0
def getTopologyObject(topology_file):
    ext = utilities.getFileExtension(topology_file)
    if ext == ".pdb":
        return TopologyCompat(topology_file)
    elif ext == ".pkl":
        return utilities.readClusteringObject(topology_file)
    else:
        raise ValueError(
            "The topology parameter needs to be the path to a pickled Topology object or a pdb!"
        )
Esempio n. 7
0
def main(args):

    # Parameters
    clusteringObj = utilities.readClusteringObject(args.clusteringObj)
    native = args.native
    pathwayFilename = args.pathwayFilename
    ntrajs = args.ntrajs
    threshold = args.threshold
    RMSDCalc = RMSDCalculator.RMSDCalculator(clusteringObj.symmetries)

    # use graph algorithm to establish a path
    initial_cluster = 0
    final_cluster = getOptimalCluster(clusteringObj, native, RMSDCalc)
    distanceMatrix = createNetworkMatrix(clusteringObj, threshold, RMSDCalc)
    predecessors = obtainShortestPath(distanceMatrix)
    pathway = createPathway(initial_cluster, final_cluster, predecessors)
    print "Pathway clusters:"
    print pathway

    # write pathway into a single trajectory
    writePathwayTrajectory(clusteringObj, pathway, pathwayFilename, native)

    # create clustering object with only the pathway clusters
    ClPath = clustering.Clustering()
    ClPath.clusters.clusters = map(
        lambda x: clusteringObj.clusters.clusters[x], pathway)

    # spawning along the trajectory
    spawningParams = spawning.SpawningParams()
    densityCalculatorBuilder = densitycalculator.DensityCalculatorBuilder()
    densityCalculator = densityCalculatorBuilder.build({})
    spawningPathway = spawning.InverselyProportionalToPopulationCalculator(
        densityCalculator)
    # Set a least 1 processors from the extrems of the path
    degeneracies = spawningPathway.calculate(ClPath.clusters.clusters,
                                             ntrajs - 2, spawningParams)
    degeneracies[0] += 1
    degeneracies[-1] += 1
    print "degeneracies over pathway:"
    print degeneracies
    print ""
Esempio n. 8
0
import matplotlib.pyplot as plt
plt.style.use("ggplot")


def reward_new(x, rews):
    return -(x * rews).sum()


def reward(x, rews):
    return -(x[:, np.newaxis] * rews).sum()


folders = utilities.get_epoch_folders(".")
for folder in folders[::-1]:
    if os.path.exists(folder + "/clustering/object.pkl"):
        cl_object = utilities.readClusteringObject(folder +
                                                   "/clustering/object.pkl")
        break
# first_cluster = 0
trajToDivide = 144 * 2
rewardsEvol = []
weightsEvol = []
weightsEvol_new = []
weights = None
weights_new = None
metricInd = 4
labels = ["TE", "RMSD", "BE", "SASA"]
plots = True
for folder in folders[10:]:
    print("")
    print("Epoch", folder)
    summary = np.loadtxt(folder + "/clustering/summary.txt")
Esempio n. 9
0
    T = 300
    gpmf = -kb * T * np.log(dist / volume)
    gpmf -= gpmf.min()
    if node is not None:
        np.savetxt("GPMF/gmpf_%d.dat" % node, gpmf)
    return gpmf.max()


filename = "differences_400_gpmf.dat"
if os.path.exists(filename):
    differences = np.loadtxt(filename)
    plt.plot(differences)
    plt.show()
    sys.exit()

m1 = utilities.readClusteringObject("MSM_object_0.pkl")
# m2 = utilities.readClusteringObject("/home/jgilaber/3PTB_free_energies/3ptb_PELE_short_steps/run2/400/MSM_0/MSM_object_0.pkl")

vol1 = np.loadtxt("volumeOfClusters_0.dat")
vol2 = np.loadtxt(
    "/home/jgilaber/3PTB_free_energies/3ptb_PELE_short_steps/run2/400/MSM_0/volumeOfClusters_0.dat"
)

if len(m1.active_set) != 100:
    c1 = m1.count_matrix_full + 1 / float(100)
    # trans = run.buildRevTransitionMatrix(c1)
    # eiv, eic = run.getSortedEigen(trans)
    # pi = run.getStationaryDistr(eic[:,0])
else:
    # pi = np.loadtxt("stationaryDistribution.dat")
    c1 = m1.count_matrix_full
Esempio n. 10
0
from builtins import range
import networkx as nx
from AdaptivePELE.utilities import utilities
from AdaptivePELE.atomset import RMSDCalculator


def weight(pathway, confs):
    w = 0
    for j in range(1, len(path)):
        w += confs[pathway[j - 1]][path[j]]['metric']
    return w


metricCol = 4
RMSDCalc = RMSDCalculator.RMSDCalculator()
cl = utilities.readClusteringObject("ClCont.pkl")
nodeFin = cl.getOptimalMetric(column=metricCol)
conf = nx.DiGraph()
nx.read_edgelist("conformationNetwork_4DAJ.edgelist",
                 create_using=conf,
                 data=True,
                 nodetype=int)
for source, target in conf.edges_iter():
    clusterSource = cl.getCluster(source)
    clusterTarget = cl.getCluster(target)
    conf[source][target]['metric'] = RMSDCalc.computeRMSD(
        clusterSource.pdb, clusterTarget.pdb)

# paths = nx.all_simple_paths(conf, 0, nodeFin)
paths = nx.shortest_simple_paths(conf, 0, nodeFin, weight='metric')
for i, path in enumerate(paths, start=1):
Esempio n. 11
0
        cluster_center = clustering.getCluster(cluster)
        RMSD_cluster = 0
        n = 0
        for cl in np.where(model.labels_ == i)[0]:
            if cluster == cl:
                continue
            else:
                RMSD_cluster += RMSDCalc.computeRMSD(clustering.getCluster(cl).pdb, cluster_center.pdb)
                n += 1
        RMSD_array.append(RMSD_cluster/float(n))
    return RMSD_array


n_clusters, clustering_object, output = parseArgs()
print("Reading clustering object")
cluster_object = clu.readClusteringObject(clustering_object)
clusters = [cl.pdb.getCOM() for cl in cluster_object.clusterIterator()]
clusters_pop = np.array([cl.elements for cl in cluster_object.clusterIterator()])
clusters_contacts = np.array([cl.contacts for cl in cluster_object.clusterIterator()])
COMArray = np.array(clusters)
print("Number of adaptive clusters", len(clusters))
model = KMeans(n_clusters=n_clusters)
print("Reclustering")
model.fit(clusters)

# RMSDCalc = RMSDCalculator.RMSDCalculator()
# clusters_first = first_cluster_as_representative(model)
# clusters_pop = most_populated_cluster_as_representative(model, clusters_pop, n_clusters)
# distances_first = calculate_intercluster_distance(cluster_object, model, clusters_first, RMSDCalc)
# distances_pop = calculate_intercluster_distance(cluster_object, model, clusters_pop, RMSDCalc)
# print("Total intercluster distance first", sum(distances_first))
Esempio n. 12
0
def readClustering(clusteringPath, metricCol):
    print("Reading clustering object...")
    clustering = utilities.readClusteringObject(clusteringPath)
    network = clustering.conformationNetwork.network
    metrics = [cl.metrics[metricCol] for cl in clustering.clusterIterator()]
    return clustering, network, metrics
Esempio n. 13
0
def main(nEigenvectors,
         nRuns,
         m,
         outputFolder,
         plotEigenvectors,
         plotGMRQ,
         plotPMF,
         clusters,
         lagtimes,
         native,
         save_plots,
         showPlots,
         filtered,
         destFolder,
         resname,
         plotTransitions=True):
    minPos = get_min_Pos(native, resname)
    if save_plots and outputFolder is None:
        outputFolder = "plots_MSM"
    eigenPlots = os.path.join(outputFolder, "eigenvector_plots")
    GMRQPlots = os.path.join(outputFolder, "GMRQ_plots")
    PMFPlots = os.path.join(outputFolder, "PMF_plots")
    TransitionPlots = os.path.join(outputFolder, "transitions")
    if save_plots and not os.path.exists(outputFolder):
        os.makedirs(outputFolder)
    if filtered is not None:
        filter_str = "_filtered"
    else:
        filter_str = ""
    if plotEigenvectors and save_plots and not os.path.exists(eigenPlots):
        os.makedirs(eigenPlots)
    if plotGMRQ and save_plots and not os.path.exists(GMRQPlots):
        os.makedirs(GMRQPlots)
    if plotPMF and save_plots and not os.path.exists(PMFPlots):
        os.makedirs(PMFPlots)
    if plotTransitions and save_plots and not os.path.exists(TransitionPlots):
        os.makedirs(TransitionPlots)
    minPos = np.array(minPos)
    GMRQValues = {}
    print("Running from " + destFolder)
    if plotGMRQ:
        GMRQValues = []

    if not os.path.exists(os.path.join(destFolder, "eigenvectors")):
        os.makedirs(os.path.join(destFolder, "eigenvectors"))
    for i in range(nRuns):
        titleVar = "%s, run %d" % (destFolder, i)
        if plotGMRQ or plotEigenvectors:
            msm_object = utilities.readClusteringObject(
                os.path.join(destFolder, "MSM_object_%d.pkl" % i))
        if plotGMRQ:
            GMRQValues.append(np.sum(msm_object.eigenvalues()[:m]))
        if plotEigenvectors or plotPMF:
            clusters = np.loadtxt(
                os.path.join(destFolder, "clusterCenters_%d.dat" % i))
            distance = np.linalg.norm(clusters - minPos, axis=1)
            volume = np.loadtxt(
                os.path.join(destFolder, "volumeOfClusters_%d.dat" % i))
            print("Total volume for system %s , run %d" % (destFolder, i),
                  volume.sum())
            if filtered is not None:
                volume = volume[filtered]
                clusters = clusters[filtered]
                distance = distance[filtered]
        if plotEigenvectors:
            if clusters.size != msm_object.stationary_distribution.size:
                mat = computeDeltaG.reestimate_transition_matrix(
                    msm_object.count_matrix_full)
            else:
                mat = msm_object.transition_matrix
            _, _, L = rdl_decomposition(mat)
            figures = []
            axes = []
            for _ in range((nEigenvectors - 1) // 4 + 1):
                f, axarr = plt.subplots(2, 2, figsize=(12, 12))
                f.suptitle(titleVar)
                figures.append(f)
                axes.append(axarr)

            for j, row in enumerate(L[:nEigenvectors]):
                pdb_filename = os.path.join(destFolder, "eigenvectors",
                                            "eigen_%d_run_%d.pdb" % (j + 1, i))
                if j:
                    atomnames = utilities.getAtomNames(
                        utilities.sign(row, tol=1e-3))
                    utilities.write_PDB_clusters(clusters,
                                                 use_beta=False,
                                                 elements=atomnames,
                                                 title=pdb_filename)
                else:
                    utilities.write_PDB_clusters(np.vstack(
                        (clusters.T, row)).T,
                                                 use_beta=True,
                                                 elements=None,
                                                 title=pdb_filename)
                if filtered is not None:
                    row = row[filtered]
                np.savetxt(
                    os.path.join(
                        destFolder, "eigenvectors",
                        "eigen_%d_run_%d%s.dat" % (j + 1, i, filter_str)), row)
                axes[j // 4][(j // 2) % 2, j % 2].scatter(distance, row)
                axes[j // 4][(j // 2) % 2,
                             j % 2].set_xlabel("Distance to minimum")
                axes[j // 4][(j // 2) % 2,
                             j % 2].set_ylabel("Eigenvector %d" % (j + 1))
            if save_plots:
                for j, fg in enumerate(figures):
                    fg.savefig(
                        os.path.join(
                            eigenPlots, "eigenvector_%d_run_%d%s.png" %
                            (j + 1, i, filter_str)))
                plt.figure()
                plt.scatter(distance, L[0])
                plt.xlabel("Distance to minimum")
                plt.ylabel("Eigenvector 1")
                plt.savefig(
                    os.path.join(
                        eigenPlots,
                        "eigenvector_1_alone_run_%d%s.png" % (i, filter_str)))
        if plotPMF:
            data = np.loadtxt(os.path.join(destFolder, "pmf_xyzg_%d.dat" % i))
            g = data[:, -1]
            if filtered is not None:
                g = g[filtered]
            print("Clusters with less than 2 PMF:")
            print(" ".join(map(str, np.where(g < 2)[0])))
            print("")
            plt.figure()
            plt.title("%s" % (destFolder))
            plt.scatter(distance, g)
            plt.xlabel("Distance to minima")
            plt.ylabel("PMF")
            if save_plots:
                plt.savefig(
                    os.path.join(PMFPlots,
                                 "pmf_run_%d%s.png" % (i, filter_str)))
    if plotGMRQ:
        for t in GMRQValues:
            plt.figure()
            plt.title("%s" % (destFolder))
            plt.xlabel("Number of states")
            plt.ylabel("GMRQ")
            plt.boxplot(GMRQValues)
            if save_plots:
                plt.savefig(os.path.join(GMRQPlots, "GMRQ.png" % t))
    if plotTransitions:
        sasas = []
        for file in glob.glob("*/repor*"):
            sasas.extend(
                pd.read_csv(file, sep='    ',
                            engine='python')["sasaLig"].values)
        sasas = np.array(sasas)
        plt.figure()
        plt.title("%s" % (destFolder))
        plt.xlabel("SASA")
        plt.ylabel("Transition Counts")
        plt.hist(sasas, 50, alpha=0.75)
        if save_plots:
            plt.savefig(os.path.join(TransitionPlots, "transition_hist.png"))
    if showPlots and (plotEigenvectors or plotGMRQ or plotPMF):
        plt.show()
Esempio n. 14
0
    parser.add_argument("-c", "--cond", type=str, default="min", help="Condition on the metric optimality, options are max or min")
    parser.add_argument("-b", "--bindEn", type=int, default=None, help="Column of the binding energy in the report file")
    args = parser.parse_args()
    return args.clusteringObject, args.suffix, args.metricCol, args.o, args.cond, args.bindEn


if __name__ == "__main__":
    clusteringObject, suffix, metricCol, outputPath, metricOptimization, bindingEnergy = parseArguments()
    if outputPath is not None:
        outputPath = os.path.join(outputPath, "")
        if not os.path.exists(outputPath):
            os.makedirs(outputPath)
    else:
        outputPath = ""
    sys.stderr.write("Reading clustering object...\n")
    cl = utilities.readClusteringObject(clusteringObject)
    if cl.conformationNetwork is None:
        sys.exit("Clustering object loaded has no conformation network!!")
    conf = cl.conformationNetwork
    optimalCluster = cl.getOptimalMetric(metricCol, simulationType=metricOptimization)
    pathway = conf.createPathwayToCluster(optimalCluster)
    if not os.path.exists(outputPath+"conformationNetwork%s.edgelist" % suffix):
        sys.stderr.write("Writing conformation network...\n")
        conf.writeConformationNetwork(outputPath+"conformationNetwork%s.edgelist" % suffix)
    if not os.path.exists(outputPath+"FDT%s.edgelist" % suffix):
        sys.stderr.write("Writing FDT...\n")
        conf.writeFDT(outputPath+"FDT%s.edgelist" % suffix)
    if not os.path.exists(outputPath+"pathwayFDT%s.pdb" % suffix):
        sys.stderr.write("Writing pathway to optimal cluster...\n")
        # cl.writePathwayOptimalCluster(outputPath+"pathwayFDT%s.pdb" % suffix)
        cl.writePathwayTrajectory(pathway, outputPath+"pathwayFDT%s.pdb" % suffix)
Esempio n. 15
0
def main(path, native, resname, nEigen, path_sasa):
    # Z = np.array([[4380, 153, 15, 2, 0, 0], [211, 4788, 1, 0, 0, 0], [169, 1, 4604, 226, 0, 0],
    #               [3, 13, 158, 4823, 3, 0], [0, 0, 0, 4, 4978, 18], [7, 5, 0, 0, 62, 4926]])
    MSM = utilities.readClusteringObject(os.path.join(path, "MSM_object_0.pkl"))
    Z = MSM.count_matrix_full
    np.set_printoptions(precision=4)
    m = 100
    k = Z.shape[0]
    alpha = 1/float(k)
    U_counts = Z + alpha
    w = U_counts.sum(axis=1)
    P_rev = utils.buildRevTransitionMatrix(U_counts)
    P = U_counts / w[:, np.newaxis]
    P = P_rev
    eigvalues, _ = np.linalg.eig(P)
    eigvalues.sort()
    eigvalues = eigvalues[::-1]
    eigvalues_rev, _ = np.linalg.eig(P_rev)
    eigvalues_rev.sort()
    eigvalues_rev = eigvalues_rev[::-1]
    ek = np.zeros(k)
    ek[k-1] = 1.0
    variance = []
    contribution = []
    nEigs = 10
    for index in range(1, nEigs):
        A = P - eigvalues[index]*np.eye(k)
        q = calculate_q(A, P, k, ek)
        score = (q/(w+1))-(q/(w+1+m))
        norm_q = q/q.sum()
        contribution.append(score/score.sum())
        variance.append(norm_q)
    sasa = getSASAvalues(os.path.join(path, "representative_structures", "representative_structures_0.dat"), 4, path_sasa)
    pdb_native = atomset.PDB()
    pdb_native.initialise(u"%s" % native, resname=resname)
    minim = pdb_native.getCOM()
    clusters = np.loadtxt(os.path.join(path, "clusterCenters_0.dat"))
    distance = np.linalg.norm(clusters-minim, axis=1)

    variance = np.array(variance[:nEigen])
    variance = variance.sum(axis=0)
    variance /= variance.sum()
    print(variance)
    # states = variance.argsort()[-1:-10:-1]
    # print(" ".join(["structures/cluster_%d.pdb" % st for st in states]))
    f, axarr = plt.subplots(1, 2)
    axarr[0].scatter(distance, variance)
    axarr[0].set_xlabel("Distance to minimum")
    axarr[0].set_ylabel("Variance")
    axarr[1].scatter(sasa, variance)
    axarr[1].set_xlabel("SASA")
    axarr[0].set_ylabel("Variance")
    f.suptitle("Variance for eigenvalues 2-%d" % (nEigen+1))
    # for ind, var in enumerate(variance[:5]):
    #     f, axarr = plt.subplots(1, 2)
    #     axarr[0].scatter(distance, var)
    #     axarr[0].set_xlabel("Distance to minimum")
    #     axarr[0].set_ylabel("Variance")
    #     axarr[1].scatter(sasa, var)
    #     axarr[1].set_xlabel("SASA")
    #     axarr[0].set_ylabel("Variance")
    #     f.suptitle("Variance for eigenvalue %d" % (ind+2))
    plt.show()
    parser = argparse.ArgumentParser(description=desc)
    parser.add_argument("path", type=str, help="Path to the simulation")
    parser.add_argument("resname", type=str, help="Resname in the pdb")
    parser.add_argument("nEpochs", type=int, help="Number of epochs to cluster")
    parser.add_argument("ntrajs", type=int, help="Number of trajectories per epoch")
    parser.add_argument("--altSel", action="store_true", help="Whether to use alternative selection")
    parser.add_argument("--writeClusters", action="store_true", help="Whether to write pdb structures for the clusters")
    args = parser.parse_args()
    return args.path, args.resname, args.nEpochs, args.ntrajs, args.altSel, args.writeClusters


if __name__ == "__main__":
    simulation_path, resname, nEpochs, ntrajs, altSel, writeClusters = parseArguments()
    contactThresholdDistance = 8
    altSel = False
    top_object = utilities.readClusteringObject("%s/topologies/topologies.pkl" % simulation_path)

    thresholdCalculatorBuilder = thresholdcalculator.ThresholdCalculatorBuilder()
    thresholdCalculator = thresholdCalculatorBuilder.build({
        "thresholdCalculator": {
            "type": "heaviside",
            "params": {
                "values": [2, 3, 4, 5],
                "conditions": [1, 0.75, 0.5]
            }
        }
    })
    thresholdCalculator = thresholdCalculatorBuilder.build({
        "thresholdCalculator": {
            "type": "constant",
            "params": {
Esempio n. 17
0
            snapshots = utilities.getSnapshots(traj, topology=topology)
            for snapshot in snapshots:
                PDBobj = atomset.PDB()
                PDBobj.initialise(snapshot, resname=resname, topology=topology_contents)
                yield PDBobj
    else:
        for cluster in clAcc.clusters.clusters:
            yield cluster.pdb

if __name__ == "__main__":
    traj_name, clustering, nRes, lig_resname, contactThreshold, top = parseArguments()

    if clustering is None:
        clusterAcc = None
    else:
        clusetrAcc = utilities.readClusteringObject(clustering)

    totalAcc = []
    symEval = SymmetryContactMapEvaluator.SymmetryContactMapEvaluator()
    refPDB = None

    for pdb in generateConformations(lig_resname, clusetrAcc, traj_name, top):
        if refPDB is None:
            refPDB = pdb
        contactMap, foo = symEval.createContactMap(pdb, lig_resname, contactThreshold)
        if len(totalAcc):
            totalAcc += contactMap.sum(axis=0, dtype=bool).astype(int)
        else:
            totalAcc = contactMap.sum(axis=0, dtype=bool).astype(int)

    proteinList = symEval.proteinList
Esempio n. 18
0
def main(nEigenvectors, nRuns, m, outputFolder, plotEigenvectors, plotGMRQ,
         plotPMF, clusters, lagtimes, minPos, save_plots, showPlots, filtered,
         destFolder, sasa_col, path_to_report):
    if save_plots and outputFolder is None:
        outputFolder = "plots_MSM"
    if outputFolder is not None:
        eigenPlots = os.path.join(outputFolder, "eigenvector_plots")
        GMRQPlots = os.path.join(outputFolder, "GMRQ_plots")
        PMFPlots = os.path.join(outputFolder, "PMF_plots")
        if save_plots and not os.path.exists(outputFolder):
            os.makedirs(outputFolder)
    if filtered is not None:
        filter_str = "_filtered"
    else:
        filter_str = ""
    if plotEigenvectors and save_plots and not os.path.exists(eigenPlots):
        os.makedirs(eigenPlots)
    if plotGMRQ and save_plots and not os.path.exists(GMRQPlots):
        os.makedirs(GMRQPlots)
    if plotPMF and save_plots and not os.path.exists(PMFPlots):
        os.makedirs(PMFPlots)
    minPos = np.array(minPos)
    GMRQValues = {}
    print("Running from", destFolder)
    if plotGMRQ:
        GMRQValues = []

    if not os.path.exists(os.path.join(destFolder, "eigenvectors")):
        os.makedirs(os.path.join(destFolder, "eigenvectors"))
    for i in range(nRuns):
        if sasa_col is not None:
            representatives_files = os.path.join(
                destFolder,
                "representative_structures/representative_structures_%d.dat" %
                i)
            sasa = getSASAvalues(representatives_files, sasa_col,
                                 path_to_report)

        titleVar = "%s, run %d" % (destFolder, i)
        if plotGMRQ or plotEigenvectors:
            msm_object = utilities.readClusteringObject(
                os.path.join(destFolder, "MSM_object_%d.pkl" % i))
        if plotGMRQ:
            GMRQValues.append(np.sum(msm_object.eigenvalues()[:m]))
        if plotEigenvectors or plotPMF:
            clusters = np.loadtxt(
                os.path.join(destFolder, "clusterCenters_%d.dat" % i))
            distance = np.linalg.norm(clusters - minPos, axis=1)
            volume = np.loadtxt(
                os.path.join(destFolder, "volumeOfClusters_%d.dat" % i))
            print("Total volume for system %s , run %d" % (destFolder, i),
                  volume.sum())
            if filtered is not None:
                volume = volume[filtered]
                clusters = clusters[filtered]
                distance = distance[filtered]
                if sasa_col is not None:
                    sasa = sasa[filtered]
        if plotEigenvectors:
            if clusters.size != msm_object.stationary_distribution.size:
                mat = computeDeltaG.reestimate_transition_matrix(
                    msm_object.count_matrix_full)
            else:
                mat = msm_object.transition_matrix
            _, _, L = rdl_decomposition(mat)
            figures = []
            axes = []
            for _ in range((nEigenvectors - 1) // 4 + 1):
                f, axarr = plt.subplots(2, 2, figsize=(12, 12))
                f.suptitle(titleVar)
                figures.append(f)
                axes.append(axarr)

            for j, row in enumerate(L[:nEigenvectors]):
                pdb_filename = os.path.join(destFolder, "eigenvectors",
                                            "eigen_%d_run_%d.pdb" % (j + 1, i))
                if j:
                    atomnames = utilities.getAtomNames(
                        utilities.sign(row, tol=1e-3))
                    utilities.write_PDB_clusters(clusters,
                                                 use_beta=False,
                                                 elements=atomnames,
                                                 title=pdb_filename)
                else:
                    utilities.write_PDB_clusters(np.vstack(
                        (clusters.T, row)).T,
                                                 use_beta=True,
                                                 elements=None,
                                                 title=pdb_filename)
                if filtered is not None:
                    row = row[filtered]
                np.savetxt(
                    os.path.join(
                        destFolder, "eigenvectors",
                        "eigen_%d_run_%d%s.dat" % (j + 1, i, filter_str)), row)
                axes[j // 4][(j // 2) % 2, j % 2].scatter(distance, row)
                axes[j // 4][(j // 2) % 2,
                             j % 2].set_xlabel("Distance to minimum")
                axes[j // 4][(j // 2) % 2,
                             j % 2].set_ylabel("Eigenvector %d" % (j + 1))
            Q = msm_object.count_matrix_full.diagonal(
            ) / msm_object.count_matrix_full.sum()
            plt.figure()
            plt.scatter(distance, Q)
            plt.xlabel("Distance to minimum")
            plt.ylabel("Metastability")
            if save_plots:
                plt.savefig(
                    os.path.join(eigenPlots,
                                 "Q_run_%d%s.png" % (i, filter_str)))
            if save_plots:
                for j, fg in enumerate(figures):
                    fg.savefig(
                        os.path.join(
                            eigenPlots, "eigenvector_%d_run_%d%s.png" %
                            (j + 1, i, filter_str)))
                plt.figure()
                plt.scatter(distance, L[0])
                plt.xlabel("Distance to minimum")
                plt.ylabel("Eigenvector 1")
                plt.savefig(
                    os.path.join(
                        eigenPlots,
                        "eigenvector_1_alone_run_%d%s.png" % (i, filter_str)))
        if plotPMF:
            data = np.loadtxt(os.path.join(destFolder, "pmf_xyzg_%d.dat" % i))
            g = data[:, -1]
            annotations = ["Cluster %d" % i for i in range(g.size)]
            if filtered is not None:
                g = g[filtered]
                annotations = np.array(annotations)[filtered].tolist()
            print("Clusters with less than 2 PMF:")
            print(" ".join(map(str, np.where(g < 2)[0])))
            print("")
            fig_pmf, axarr = plt.subplots(2, 2, figsize=(12, 12))
            fig_pmf.suptitle(titleVar)
            sc1 = axarr[1, 0].scatter(distance, g)
            sc2 = axarr[0, 1].scatter(distance, volume)
            sc3 = axarr[0, 0].scatter(g, volume)
            axes = [axarr[0, 1], axarr[1, 0], axarr[0, 0]]
            scs = [sc2, sc1, sc3]
            if sasa_col is not None:
                axarr[1, 1].scatter(sasa, g)
            axarr[1, 0].set_xlabel("Distance to minima")
            axarr[1, 0].set_ylabel("PMF")
            axarr[0, 1].set_xlabel("Distance to minima")
            axarr[0, 1].set_ylabel("Volume")
            axarr[0, 0].set_xlabel("PMF")
            axarr[0, 0].set_ylabel("Volume")
            annot1 = axarr[1, 0].annotate("",
                                          xy=(0, 0),
                                          xytext=(20, 20),
                                          textcoords="offset points",
                                          bbox=dict(boxstyle="round", fc="w"),
                                          arrowprops=dict(arrowstyle="->"))
            annot1.set_visible(False)
            annot2 = axarr[0, 1].annotate("",
                                          xy=(0, 0),
                                          xytext=(20, 20),
                                          textcoords="offset points",
                                          bbox=dict(boxstyle="round", fc="w"),
                                          arrowprops=dict(arrowstyle="->"))
            annot2.set_visible(False)
            annot3 = axarr[0, 0].annotate("",
                                          xy=(0, 0),
                                          xytext=(20, 20),
                                          textcoords="offset points",
                                          bbox=dict(boxstyle="round", fc="w"),
                                          arrowprops=dict(arrowstyle="->"))
            annot3.set_visible(False)
            annot_list = [annot2, annot1, annot3]
            if sasa_col is not None:
                axarr[1, 1].set_xlabel("SASA")
                axarr[1, 1].set_ylabel("PMF")
            if save_plots:
                fig_pmf.savefig(
                    os.path.join(PMFPlots,
                                 "pmf_run_%d%s.png" % (i, filter_str)))
    if plotGMRQ:
        for t in GMRQValues:
            plt.figure()
            plt.title("%s" % (destFolder))
            plt.xlabel("Number of states")
            plt.ylabel("GMRQ")
            plt.boxplot(GMRQValues)
            if save_plots:
                plt.savefig(os.path.join(GMRQPlots, "GMRQ.png" % t))
    if showPlots and (plotEigenvectors or plotGMRQ or plotPMF):
        if plotPMFs:

            def update_annot(ind, sc, annot):
                """Update the information box of the selected point"""
                pos = sc.get_offsets()[ind["ind"][0]]
                annot.xy = pos
                annot.set_text(annotations[int(ind["ind"][0])])
                # annot.get_bbox_patch().set_facecolor(cmap(norm( z_values[ind["ind"][0]])))

            def hover(event):
                """Action to perform when hovering the mouse on a point"""
                # vis = any([annot.get_visible() for annot in annot_list])
                for i, ax_comp in enumerate(axes):
                    vis = annot_list[i].get_visible()
                    if event.inaxes == ax_comp:
                        for j in range(len(axes)):
                            if j != i:
                                annot_list[j].set_visible(False)
                        cont, ind = scs[i].contains(event)
                        if cont:
                            update_annot(ind, scs[i], annot_list[i])
                            annot_list[i].set_visible(True)
                            fig_pmf.canvas.draw_idle()
                        else:
                            if vis:
                                annot_list[i].set_visible(False)
                                fig_pmf.canvas.draw_idle()

            fig_pmf.canvas.mpl_connect("motion_notify_event", hover)
        plt.show()