コード例 #1
0
def main(num_clusters, output_folder, ligand_resname, atom_ids, traj_folder):

    extractCoords.main(folder_name=traj_folder,
                       lig_resname=ligand_resname,
                       non_Repeat=True,
                       atom_Ids=atom_ids)
    trajectoryFolder = "allTrajs"
    trajectoryBasename = "*trajectory*"
    stride = 1
    clusterCountsThreshold = 0

    clusteringObject = cluster.Cluster(num_clusters,
                                       trajectoryFolder,
                                       trajectoryBasename,
                                       alwaysCluster=False,
                                       stride=stride)
    clusteringObject.clusterTrajectories()
    clusteringObject.eliminateLowPopulatedClusters(clusterCountsThreshold)
    clusterCenters = clusteringObject.clusterCenters

    centersInfo = get_centers_info(trajectoryFolder, trajectoryBasename,
                                   num_clusters, clusterCenters)
    COMArray = [centersInfo[i]['center'] for i in xrange(num_clusters)]
    if output_folder is not None:
        outputFolder = os.path.join(traj_folder, output_folder)
        if not os.path.exists(outputFolder):
            os.makedirs(outputFolder)
    else:
        outputFolder = ""
    writePDB(
        COMArray,
        os.path.join(outputFolder,
                     "clusters_%d_KMeans_allSnapshots.pdb" % num_clusters))
    writeInitialStructures(centersInfo, outputFolder, traj_folder)
    return trajectoryFolder, "discretized"
コード例 #2
0
def main(ligand, clusters_file, conf_folder, topology=None):
    trajFolder = "allTrajs_nonRepeat"
    cluster_centers = np.loadtxt(clusters_file)
    if not os.path.exists("discretized"):
        os.makedirs("discretized")
    if not os.path.exists(trajFolder):
        os.makedirs(trajFolder)
    stride = 1
    clusterCountsThreshold = 0
    trajBasename = "coord*"
    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology_contents = None
    epoch_folders = utilities.get_epoch_folders(conf_folder)
    numClusters = cluster_centers.shape[0]
    coordinates = [[] for cl in range(numClusters)]
    for it in epoch_folders:
        files = glob.glob(conf_folder + "%s/extractedCoordinates/coord*" % it)
        for f in files:
            traj = os.path.splitext(f)[0].split("_")[-1]
            shutil.copy(f, trajFolder + "/coord_%s_%s.dat" % (it, traj))
    clusteringObject = cluster.Cluster(numClusters,
                                       trajFolder,
                                       trajBasename,
                                       alwaysCluster=False,
                                       stride=stride)
    clusteringObject.clusterTrajectories()
    clusteringObject.eliminateLowPopulatedClusters(clusterCountsThreshold)
    for i in range(numClusters):
        if not os.path.exists("cluster_%d" % i):
            os.makedirs("cluster_%d/allStructures" % i)
    dtrajs_files = glob.glob("discretized/*.disctraj")
    for dtraj in dtrajs_files:
        print(dtraj)
        traj = np.loadtxt(dtraj)
        epoch, traj_num = map(int,
                              os.path.splitext(dtraj)[0].split("_", 3)[1:])
        trajPositions = np.loadtxt(trajFolder + "/coord_%d_%d.dat" %
                                   (epoch, traj_num))
        trajFile = glob.glob(
            os.path.join(conf_folder + "%d/trajectory_%d*" %
                         (epoch, traj_num)))[0]
        snapshots = utilities.getSnapshots(trajFile, topology=topology)
        for nSnap, cluster_num in enumerate(traj):
            coordinates[int(cluster_num)].append(trajPositions[nSnap])
            filename = "cluster_%d/allStructures/conf_%d_%d_%d.pdb" % (
                cluster_num, epoch, traj_num, nSnap)
            if isinstance(snapshots[nSnap], basestring):
                with open(filename, "w") as fw:
                    fw.write(snapshots[nSnap])
            else:
                utilities.write_mdtraj_object_PDB(snapshots[nSnap], filename,
                                                  topology_contents)
    for cl in range(numClusters):
        np.savetxt("cluster_%d/positions.dat" % cl, coordinates[cl])
コード例 #3
0
def cluster_TICA_space(numClusters, trajectoryFolder, trajectoryBasename,
                       stride, clusterCountsThreshold):
    clusteringObject = cluster.Cluster(numClusters,
                                       trajectoryFolder,
                                       trajectoryBasename,
                                       alwaysCluster=False,
                                       stride=stride)
    clusteringObject.clusterTrajectories()
    clusteringObject.eliminateLowPopulatedClusters(clusterCountsThreshold)
    return clusteringObject
コード例 #4
0
ファイル: plotQ.py プロジェクト: cescgina/PyTools
def main(lagtimes,
         clusters_file,
         disctraj,
         trajs,
         n_clusters,
         plots_path,
         save_plot,
         show_plot,
         lagtime_resolution=20):
    if disctraj is not None:
        dtraj_files = glob.glob(os.path.join(disctraj, "*traj*.disctraj"))
        dtrajs = [np.loadtxt(f, dtype=int) for f in dtraj_files]
        clusterCenters = np.loadtxt(clusters_file)
    else:
        clusteringObject = cluster.Cluster(n_clusters,
                                           trajs,
                                           "traj*",
                                           alwaysCluster=False,
                                           discretizedPath=disctraj)
        if clusters_file is not None:
            # only assign
            clusteringObject.clusterCentersFile = clusters_file
        clusteringObject.clusterTrajectories()
        clusterCenters = clusteringObject.clusterCenters
        dtrajs = clusteringObject.dtrajs
    Q = []
    for lag in lagtimes:
        msm_obj = msm.estimate_markov_model(dtrajs, lag)
        counts = msm_obj.count_matrix_full
        Q.append(counts.diagonal() / counts.sum())
    Q = np.array(Q)

    print("Clusters over 0.01 metastability")
    correlation_limit = 0.01
    states2 = np.where(Q[-1] > correlation_limit)[0]
    size2 = states2.size
    if len(states2):
        print(" ".join(map(str, states2)))
    print("Number of clusters:", size2,
          ", %.2f%% of the total" % (100 * size2 / float(n_clusters)))
    utilities.write_PDB_clusters(np.hstack((clusterCenters, Q[:-1].T)),
                                 use_beta=True,
                                 title="cluster_Q.pdb")
    if plots_path is None:
        plots_path = ""
    else:
        utilities.makeFolder(plots_path)
    create_plots(Q,
                 plots_path,
                 save_plot,
                 show_plot,
                 n_clusters,
                 lagtimes,
                 threshold=2.0)
コード例 #5
0
ファイル: estimateDG.py プロジェクト: leelasd/AdaptivePELE
def getRepresentativePDBs(filesWildcard, run):
    files = glob.glob(filesWildcard)
    trajs = [utilities.loadtxtfile(f)[:, 1:] for f in files]
    cl = cluster.Cluster(0, "", "")
    cl.clusterCenters = utilities.loadtxtfile(cl.clusterCentersFile)
    dtrajs = cl.assignNewTrajectories(trajs)
    numClusters = cl.clusterCenters.shape[0]
    centersInfo = getCentersInfo(cl.clusterCenters, trajs, files, dtrajs)

    if not os.path.exists("representative_structures"):
        os.makedirs("representative_structures")
    with open("representative_structures/representative_structures_%d.dat" % run, "w") as fw:
        fw.write("Cluster\tEpoch\tTrajectory\tSnapshot\n")
        for clNum in range(numClusters):
            fw.write("%d\t" % clNum+"\t".join(centersInfo[clNum]["structure"])+"\n")
コード例 #6
0
def main(num_clusters,
         output_folder,
         ligand_resname,
         atom_ids,
         folder_name=".",
         topology=None):
    extractCoords.main(folder_name,
                       lig_resname=ligand_resname,
                       non_Repeat=True,
                       atom_Ids=atom_ids)
    trajectoryFolder = "allTrajs"
    trajectoryBasename = "traj*"
    stride = 1
    clusterCountsThreshold = 0

    folders = utilities.get_epoch_folders(folder_name)
    folders.sort(key=int)

    if os.path.exists("discretized"):
        # If there is a previous clustering, remove to cluster again
        shutil.rmtree("discretized")
    clusteringObject = cluster.Cluster(num_clusters,
                                       trajectoryFolder,
                                       trajectoryBasename,
                                       alwaysCluster=False,
                                       stride=stride)
    clusteringObject.clusterTrajectories()
    clusteringObject.eliminateLowPopulatedClusters(clusterCountsThreshold)
    clusterCenters = clusteringObject.clusterCenters

    centersInfo = get_centers_info(trajectoryFolder, trajectoryBasename,
                                   num_clusters, clusterCenters)
    COMArray = [centersInfo[i]['center'][:3] for i in range(num_clusters)]
    if output_folder is not None:
        outputFolder = os.path.join(output_folder, "")
        if not os.path.exists(outputFolder):
            os.makedirs(outputFolder)
    else:
        outputFolder = ""
    writePDB(
        COMArray,
        outputFolder + "clusters_%d_KMeans_allSnapshots.pdb" % num_clusters)
    writeInitialStructures(centersInfo,
                           outputFolder + "initial_%d.pdb",
                           topology=topology)
コード例 #7
0
def main(control_file):

    # parameters
    trajectoryFolder, trajectoryBasename, numClusters, stride, lagtimes, _, _, numberOfITS, _, _, lagtime, clusterCountsThreshold = readParams(
        control_file)

    # program
    clusteringObject = cluster.Cluster(numClusters,
                                       trajectoryFolder,
                                       trajectoryBasename,
                                       alwaysCluster=False,
                                       stride=stride)
    clusteringObject.clusterTrajectories()
    clusteringObject.eliminateLowPopulatedClusters(clusterCountsThreshold)
    calculateMSM = estimate.MSM(error=False, dtrajs=clusteringObject.dtrajs)
    calculateMSM.estimate(lagtime=lagtime,
                          lagtimes=lagtimes,
                          numberOfITS=numberOfITS)
コード例 #8
0
def main(lagtime,
         clusters_file,
         disctraj,
         trajs,
         n_clusters,
         plots_path,
         save_plot,
         show_plot,
         lagtime_resolution=20):
    lagtimes = list(range(1, lagtime, lagtime_resolution))
    n_lags = len(lagtimes)
    if disctraj is None:
        clusteringObject = cluster.Cluster(n_clusters,
                                           trajs,
                                           "traj*",
                                           alwaysCluster=False)
        if clusters_file is not None:
            # only assign
            utilities.makeFolder(clusteringObject.discretizedFolder)
            clusteringObject.clusterCentersFile = clusters_file
        clusteringObject.clusterTrajectories()
        disctraj = clusteringObject.discretizedFolder
        clusterCenters = clusteringObject.clusterCenters
    else:
        clusterCenters = utilities.loadtxtfile(clusters_file)
    if len(clusterCenters) != n_clusters:
        raise ValueError(
            "Number of clusters specified in the -n parameter does not match the provided clusters"
        )
    print("Calculating autocorrelation...")
    dtrajs = glob.glob(os.path.join(disctraj, "traj*"))
    dtrajs_loaded = [
        utilities.loadtxtfile(dtraj, dtype=int) for dtraj in dtrajs
    ]

    autoCorr = utils.calculateAutoCorrelation(lagtimes, dtrajs_loaded,
                                              n_clusters, n_lags)
    np.save("autoCorr.npy", autoCorr)
    # __cleanupFiles(parameters.trajWildcard, False)

    utilities.write_PDB_clusters(np.vstack(
        (clusterCenters.T, np.abs(autoCorr[:, -1]))).T,
                                 use_beta=True,
                                 title="cluster_autoCorr.pdb")
    print("Clusters over correlation time limit")
    correlation_limit = np.exp(-1)
    states2 = np.where(autoCorr[:, -1] > correlation_limit)[0]
    size2 = states2.size
    if len(states2):
        print(" ".join(map(str, states2)))
    print("Number of clusters:", size2,
          ", %.2f%% of the total" % (100 * size2 / float(n_clusters)))
    print("Clusters with more than 0.1 autocorrelation")
    states1 = np.where(autoCorr[:, -1] > 0.1)[0]
    size1 = states1.size
    if len(states1):
        print(" ".join(map(str, states1)))
    print("Number of clusters:", size1,
          ", %.2f%% of the total" % (100 * size1 / float(n_clusters)))
    if size2 > 0:
        print("Correlation time not achieved at lagtime %d" % lagtime)
    else:
        for i in range(len(lagtimes)):
            states = np.where(autoCorr[:, -i - 1] > correlation_limit)[0]
            if len(states):
                string_states = ", ".join(map(str, states))
                print("Correlation time %d, for states: %s" %
                      (lagtimes[-i], string_states))
                break

    if plots_path is None:
        plots_path = ""
    else:
        utilities.makeFolder(plots_path)
    create_plots(autoCorr,
                 plots_path,
                 save_plot,
                 show_plot,
                 n_clusters,
                 lagtimes,
                 threshold=2.0)
コード例 #9
0
def main(num_clusters,
         criteria1,
         criteria2,
         ligand_resname,
         output_folder="ClusterCentroids",
         atom_ids="",
         cpus=2,
         topology=None,
         report="report_",
         traj="trajectory_",
         use_pdb=False,
         png=False,
         CA=0,
         sidechains=0,
         restart="all"):
    # Create multiprocess pool
    if cpus > 1:
        pool = mp.Pool(cpus)
    else:
        pool = mp.Pool(1)
    # Extract COM ligand for each snapshot
    if not glob.glob("allTrajs/traj*"):
        extractCoords.main(lig_resname=ligand_resname,
                           non_Repeat=True,
                           atom_Ids=atom_ids,
                           nProcessors=cpus,
                           parallelize=True,
                           topology=topology,
                           protein_CA=CA,
                           sidechains=sidechains)

    print("Clusterize trajectories by RMSD of COM")
    trajectoryFolder = "allTrajs"
    trajectoryBasename = "*traj*"
    stride = 1
    clusterCountsThreshold = 0
    folders = utilities.get_epoch_folders(".")
    folders.sort(key=int)
    if not restart:

        clusteringObject = cluster.Cluster(num_clusters,
                                           trajectoryFolder,
                                           trajectoryBasename,
                                           alwaysCluster=True,
                                           stride=stride)
        clusteringObject.clusterTrajectories()
        clusteringObject.eliminateLowPopulatedClusters(clusterCountsThreshold)
        clusterCenters = clusteringObject.clusterCenters
        np.savetxt("clustercenters.dat", clusterCenters)
        dtrajs = clusteringObject.dtrajs

        print("Extract metrics for each snapshot")
        min_metric_trajs = {}
        epochs = [folder for folder in glob.glob("./*/") if folder.isdigit()]
        reports = simulationToCsv.gather_reports()
        fields = simulationToCsv.retrieve_fields(reports[0])
        df = simulationToCsv.init_df(fields)
        df = simulationToCsv.fill_data(reports, df, pool)

        print("Update data with metrics and clusters")
        df.index = range(df.shape[0])
        df["Cluster"] = [None] * df.shape[0]
        input_list = [[
            df, Traj, d
        ] for d, Traj in zip(dtrajs, clusteringObject.trajFilenames)]
        results = pool.map(save_to_df, input_list)
        for data in results:
            for df_tmp in data:
                df.update(df_tmp)
        df.to_csv("Simulation.csv", index=False)
    if restart:
        df = pd.read_csv("Simulation.csv")
        clusterCenters = utilities.loadtxtfile("clustercenters.dat")
        print(clusterCenters)
    centersInfo = get_centers_info(trajectoryFolder, trajectoryBasename,
                                   num_clusters, clusterCenters)
    COMArray = [centersInfo[i]['center'] for i in range(num_clusters)]

    print("Retrieve clusters and metric")
    fields1 = []
    fields2 = []
    print(centersInfo)
    for cluster_num in centersInfo:
        epoch_num, traj_num, snap_num = map(
            int, centersInfo[cluster_num]['structure'])
        field1, crit1_name = get_metric(criteria1, epoch_num, traj_num,
                                        snap_num, report)
        field2, crit2_name = get_metric(criteria2, epoch_num, traj_num,
                                        snap_num, report)
        fields1.append(field1)
        fields2.append(field2)

    if output_folder is not None:
        outputFolder = os.path.join(output_folder, "")
        if not os.path.exists(outputFolder):
            os.makedirs(outputFolder)
    else:
        outputFolder = ""
    print("Output structures")
    writePDB(
        COMArray,
        outputFolder + "clusters_%d_KMeans_allSnapshots.pdb" % num_clusters)
    writeInitialStructures(fields1,
                           fields2,
                           crit1_name,
                           crit2_name,
                           centersInfo,
                           outputFolder + "cluster_{}_{}_{}_{}_{}.pdb",
                           traj,
                           topology=topology,
                           use_pdb=use_pdb)
    plotClusters(fields1,
                 fields2,
                 crit1_name,
                 crit2_name,
                 outputFolder,
                 png=png)
    assesClusterConvergence(df, num_clusters, traj, topology)
    return
コード例 #10
0
def main(num_clusters,
         criteria1,
         criteria2,
         output_folder,
         ligand_resname,
         atom_ids,
         cpus=2,
         topology=None,
         report="report_",
         traj="trajectory_",
         use_pdb=False):
    if not glob.glob("*/extractedCoordinates/coord_*"):
        extractCoords.main(lig_resname=ligand_resname,
                           non_Repeat=True,
                           atom_Ids=atom_ids,
                           nProcessors=cpus,
                           parallelize=False,
                           topology=topology,
                           use_pdb=use_pdb)
    trajectoryFolder = "allTrajs"
    trajectoryBasename = "*traj*"
    stride = 1
    clusterCountsThreshold = 0
    folders = utilities.get_epoch_folders(".")
    folders.sort(key=int)

    clusteringObject = cluster.Cluster(num_clusters,
                                       trajectoryFolder,
                                       trajectoryBasename,
                                       alwaysCluster=True,
                                       stride=stride)
    clusteringObject.clusterTrajectories()
    clusteringObject.eliminateLowPopulatedClusters(clusterCountsThreshold)
    clusterCenters = clusteringObject.clusterCenters
    centersInfo = get_centers_info(trajectoryFolder, trajectoryBasename,
                                   num_clusters, clusterCenters)
    COMArray = [centersInfo[i]['center'] for i in xrange(num_clusters)]

    fields1 = []
    fields2 = []
    for cluster_num in centersInfo:
        epoch_num, traj_num, snap_num = map(
            int, centersInfo[cluster_num]['structure'])
        field1, crit1_name = get_metric(criteria1, epoch_num, traj_num,
                                        snap_num, report)
        field2, crit2_name = get_metric(criteria2, epoch_num, traj_num,
                                        snap_num, report)
        fields1.append(field1)
        fields2.append(field2)

    if output_folder is not None:
        outputFolder = os.path.join(output_folder, "")
        if not os.path.exists(outputFolder):
            os.makedirs(outputFolder)
    else:
        outputFolder = ""
    writePDB(
        COMArray,
        outputFolder + "clusters_%d_KMeans_allSnapshots.pdb" % num_clusters)
    writeInitialStructures(fields1,
                           fields2,
                           crit1_name,
                           crit2_name,
                           centersInfo,
                           outputFolder + "cluster_{}_{}_{}_{}_{}.pdb",
                           traj,
                           topology=topology,
                           use_pdb=use_pdb)
    plotClusters(fields1, fields2, crit1_name, crit2_name, outputFolder)