Exemple #1
0
def main(metricCol, lig_resname, nTrajs, filter_val, stride, atomId, saving_frequency, trajectory_name, report_name, topology=None):
    folders = utilities.get_epoch_folders(".")
    data = []
    minMetric = 1e6
    confData = []
    for epoch in folders:
        print("Processing epoch %s" % epoch)
        for iTraj in range(1, nTrajs):
            report = np.loadtxt("%s/%s_%d" % (epoch, report_name, iTraj))
            if len(report.shape) < 2:
                report = report[np.newaxis, :]
            traj_file = glob.glob("%s/%s_%d.*" % (epoch, trajectory_name, iTraj))[0]
            snapshots = utilities.getSnapshots(traj_file, topology=topology)
            for i, snapshot in enumerate(itertools.islice(snapshots, 0, None, stride)):
                report_line = i * stride * saving_frequency
                data.append(get_coords(snapshot, atomId, lig_resname) + [report[report_line, metricCol]])
                confData.append((epoch, iTraj, report_line))

    data = np.array(data)
    minInd = np.argmin(data[:, -1])
    minMetric = data[minInd, -1]
    data[:, -1] -= minMetric
    if filter_val is not None:
        data_filter = data.copy()
        data_filter[data_filter > filter_val] = filter_val
        namesPDB = utilities.write_PDB_clusters(data_filter, title="cluster_metric.pdb", use_beta=True)
    else:
        namesPDB = utilities.write_PDB_clusters(data, title="cluster_metric.pdb", use_beta=True)
    print("Min value for metric", minMetric, namesPDB[minInd])

    with open("conformation_data.dat", "w") as fw:
        fw.write("PDB name      Epoch Trajectory   Snapshot   COM x       y       z     Metric\n")
        for j, name in enumerate(namesPDB):
            info = [name.rjust(8)]+[str(x).rjust(10) for x in confData[j]]+[str(np.round(d, 3)).rjust(7) for d in data[j, :-1]] + [str(np.round(data[j, -1], 2)).rjust(10)]
            fw.write("{:s} {:s} {:s} {:s} {:s} {:s} {:s} {:s}\n".format(*tuple(info)))
Exemple #2
0
def cleanPreviousSimulation(output_path, allTrajs):
    """
        Clean the uneeded data from a previous simulation

        :param output_path: Path where the data is stored
        :type output_path: str
        :param allTrajs: Path where the discretized trajectories for MSM are stored
        :type allTrajs: str
    """
    equilibration_folders = glob.glob(
        os.path.join(output_path, "equilibration*"))
    for folder in equilibration_folders:
        try:
            shutil.rmtree(folder)
        except OSError as exc:
            if exc.errno != errno.ENOENT:
                raise
            # If another process deleted the folder between the glob and the
            # actual removing an OSError is raised
    epochs = utilities.get_epoch_folders(output_path)
    for epoch in epochs:
        try:
            shutil.rmtree(os.path.join(output_path, epoch))
        except OSError as exc:
            if exc.errno != errno.ENOENT:
                raise
    try:
        shutil.rmtree(allTrajs)
    except OSError as exc:
        # this folder may not exist, in which case we just carry on
        pass
def main(col_energy, folder, out_report_name, format_out, nProcessors,
         output_folder, new_report, reportName, trajs_to_select):
    """
        Calculate the relative SASA values of the ligand

        :param col_energy: Column corresponding to the energy in the reports
        :type col_energy: int
        :param folder: Path the simulation
        :type folder: str
        :param out_report_name: Name of the output file
        :type out_report_name: str
        :param format_out: String with the format of the output
        :type format_out: str
        :param nProcessors: Number of processors to use
        :type nProcessors: int
        :param output_folder: Path where to store the new reports
        :type output_folder: str
        :param new_report: Whether to create new reports
        :type new_report: bool
    """
    # Constants
    if output_folder is not None:
        out_report_name = os.path.join(output_folder, out_report_name)
    outputFilename = "_".join([out_report_name, "%d"])
    trajName = "*traj*"
    if reportName is None:
        reportName = "report_%d"
    else:
        reportName += "_%d"
    if nProcessors is None:
        nProcessors = utilities.getCpuCount()
    nProcessors = max(1, nProcessors)
    print("Standarizing energy with %d processors" % nProcessors)
    epochs = utilities.get_epoch_folders(folder)
    files = []
    if not epochs:
        # path does not contain an adaptive simulation, we'll try to retrieve
        # trajectories from the specified path
        files = analysis_utils.process_folder(
            None, folder, trajName, reportName,
            os.path.join(folder, outputFilename), None, trajs_to_select)
    for epoch in epochs:
        print("Epoch", epoch)
        files.extend(
            analysis_utils.process_folder(
                epoch, folder, trajName, reportName,
                os.path.join(folder, epoch, outputFilename), None,
                trajs_to_select))
    pool = mp.Pool(nProcessors)
    results = [
        pool.apply_async(process_file,
                         args=(info[1], info[4], format_out, new_report,
                               info[3], col_energy)) for info in files
    ]
    pool.close()
    pool.join()
    for res in results:
        res.get()
def main(resname, folder, top, out_report_name, format_out, nProcessors,
         output_folder, new_report):
    """
        Calculate the relative SASA values of the ligand

        :param resname: Ligand resname
        :type resname: str
        :param folder: Path the simulation
        :type folder: str
        :param top: Path to the topology
        :type top: str
        :param out_report_name: Name of the output file
        :type out_report_name: str
        :param format_out: String with the format of the output
        :type format_out: str
        :param nProcessors: Number of processors to use
        :type nProcessors: int
        :param output_folder: Path where to store the new reports
        :type output_folder: str
        :param new_report: Whether to create new reports
        :type new_report: bool
    """
    # Constants
    if output_folder is not None:
        out_report_name = os.path.join(output_folder, out_report_name)
    outputFilename = "_".join([out_report_name, "%d"])
    trajName = "*traj*"
    reportName = "*report*_%d"
    if nProcessors is None:
        nProcessors = utilities.getCpuCount()
    nProcessors = max(1, nProcessors)
    print("Calculating SASA with %d processors" % nProcessors)
    pool = mp.Pool(nProcessors)
    epochs = utilities.get_epoch_folders(folder)
    if top is not None:
        top_obj = getTopologyObject(top)
    else:
        top_obj = None
    files = []
    if not epochs:
        # path does not contain an adaptive simulation, we'll try to retrieve
        # trajectories from the specified path
        files = process_folder(None, folder, trajName, reportName,
                               os.path.join(folder, outputFilename), top_obj)
    for epoch in epochs:
        print("Epoch", epoch)
        files.extend(
            process_folder(epoch, folder, trajName, reportName,
                           os.path.join(folder, epoch, outputFilename),
                           top_obj))
    results = []
    for info in files:
        results.append(
            pool.apply_async(process_file,
                             args=(info[0], info[2], resname, info[1], info[4],
                                   format_out, new_report, info[3])))
    for res in results:
        res.get()
Exemple #5
0
def main(ligand, clusters_file, conf_folder, topology=None):
    trajFolder = "allTrajs_nonRepeat"
    cluster_centers = np.loadtxt(clusters_file)
    if not os.path.exists("discretized"):
        os.makedirs("discretized")
    if not os.path.exists(trajFolder):
        os.makedirs(trajFolder)
    stride = 1
    clusterCountsThreshold = 0
    trajBasename = "coord*"
    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology_contents = None
    epoch_folders = utilities.get_epoch_folders(conf_folder)
    numClusters = cluster_centers.shape[0]
    coordinates = [[] for cl in range(numClusters)]
    for it in epoch_folders:
        files = glob.glob(conf_folder + "%s/extractedCoordinates/coord*" % it)
        for f in files:
            traj = os.path.splitext(f)[0].split("_")[-1]
            shutil.copy(f, trajFolder + "/coord_%s_%s.dat" % (it, traj))
    clusteringObject = cluster.Cluster(numClusters,
                                       trajFolder,
                                       trajBasename,
                                       alwaysCluster=False,
                                       stride=stride)
    clusteringObject.clusterTrajectories()
    clusteringObject.eliminateLowPopulatedClusters(clusterCountsThreshold)
    for i in range(numClusters):
        if not os.path.exists("cluster_%d" % i):
            os.makedirs("cluster_%d/allStructures" % i)
    dtrajs_files = glob.glob("discretized/*.disctraj")
    for dtraj in dtrajs_files:
        print(dtraj)
        traj = np.loadtxt(dtraj)
        epoch, traj_num = map(int,
                              os.path.splitext(dtraj)[0].split("_", 3)[1:])
        trajPositions = np.loadtxt(trajFolder + "/coord_%d_%d.dat" %
                                   (epoch, traj_num))
        trajFile = glob.glob(
            os.path.join(conf_folder + "%d/trajectory_%d*" %
                         (epoch, traj_num)))[0]
        snapshots = utilities.getSnapshots(trajFile, topology=topology)
        for nSnap, cluster_num in enumerate(traj):
            coordinates[int(cluster_num)].append(trajPositions[nSnap])
            filename = "cluster_%d/allStructures/conf_%d_%d_%d.pdb" % (
                cluster_num, epoch, traj_num, nSnap)
            if isinstance(snapshots[nSnap], basestring):
                with open(filename, "w") as fw:
                    fw.write(snapshots[nSnap])
            else:
                utilities.write_mdtraj_object_PDB(snapshots[nSnap], filename,
                                                  topology_contents)
    for cl in range(numClusters):
        np.savetxt("cluster_%d/positions.dat" % cl, coordinates[cl])
Exemple #6
0
def main(top_path):
    sim_folder = os.path.abspath(os.path.join(top_path, os.path.pardir))
    epochs = utilities.get_epoch_folders(sim_folder)
    top = utilities.Topology(top_path)
    topology_files = glob.glob(os.path.join(top_path, "topology*.pdb"))
    topology_files.sort(key=utilities.getTrajNum)
    top.setTopologies(topology_files)
    for epoch in epochs:
        top.readMappingFromDisk(os.path.join(sim_folder, epoch), int(epoch))
    top.writeTopologyObject()
Exemple #7
0
def main(residues, folder, top, out_report_name, format_out, nProcessors, output_folder, new_report, trajs_to_select):
    """
        Calculate the distances between paris of atoms

        :param residues: Pairs of atoms to calculate distances
        :type residues: list
        :param folder: Path the simulation
        :type folder: str
        :param top: Path to the topology
        :type top: str
        :param out_report_name: Name of the output file
        :type out_report_name: str
        :param format_out: String with the format of the output
        :type format_out: str
        :param nProcessors: Number of processors to use
        :type nProcessors: int
        :param output_folder: Path where to store the new reports
        :type output_folder: str
        :param new_report: Whether to create new reports
        :type new_report: bool
        :param trajs_to_select: Number of the reports to read, if don't want to select all
        :type trajs_to_select: set
    """
    # Constants
    if output_folder is not None:
        out_report_name = os.path.join(output_folder, out_report_name)
    outputFilename = "_".join([out_report_name, "%d"])
    trajName = "*traj*"
    reportName = "*report*_%d"
    distances_label = "\t".join(residues)
    residues = parse_selection(residues)
    if nProcessors is None:
        nProcessors = utilities.getCpuCount()
    nProcessors = max(1, nProcessors)
    print("Calculating distances with %d processors" % nProcessors)
    epochs = utilities.get_epoch_folders(folder)
    if top is not None:
        top_obj = utilities.getTopologyObject(top)
    else:
        top_obj = None
    files = []
    if not epochs:
        # path does not contain an adaptive simulation, we'll try to retrieve
        # trajectories from the specified path
        files = analysis_utils.process_folder(None, folder, trajName, reportName, os.path.join(folder, outputFilename), top_obj, trajs_to_select)
    for epoch in epochs:
        print("Epoch", epoch)
        files.extend(analysis_utils.process_folder(epoch, folder, trajName, reportName, os.path.join(folder, epoch, outputFilename), top_obj, trajs_to_select))
    print("Starting to process files!")
    pool = mp.Pool(nProcessors)
    results = [pool.apply_async(process_file, args=(info[0], info[2], residues, info[1], info[4], format_out, new_report, info[3], distances_label)) for info in files]
    pool.close()
    pool.join()
    for res in results:
        res.get()
def main(trajectory_name, path, n_processors, imaging):
    epochs = utilities.get_epoch_folders(path)
    to_process = []
    pool = mp.Pool(n_processors)
    trajectory_glob = trajectory_name + "_*"
    for epoch in epochs:
        with open(os.path.join(epoch, "topologyMapping.txt")) as f:
            top_map = f.read().rstrip().split(":")
        for traj in glob.glob(os.path.join(path, epoch, trajectory_glob)):
            traj_num = utilities.getTrajNum(traj)
            to_process.append(
                (top_map[traj_num - 1], traj, epoch, traj_num, imaging))

    pool.map(process_traj, to_process)
    pool.close()
    pool.terminate()
def main(num_clusters,
         output_folder,
         ligand_resname,
         atom_ids,
         folder_name=".",
         topology=None):
    extractCoords.main(folder_name,
                       lig_resname=ligand_resname,
                       non_Repeat=True,
                       atom_Ids=atom_ids)
    trajectoryFolder = "allTrajs"
    trajectoryBasename = "traj*"
    stride = 1
    clusterCountsThreshold = 0

    folders = utilities.get_epoch_folders(folder_name)
    folders.sort(key=int)

    if os.path.exists("discretized"):
        # If there is a previous clustering, remove to cluster again
        shutil.rmtree("discretized")
    clusteringObject = cluster.Cluster(num_clusters,
                                       trajectoryFolder,
                                       trajectoryBasename,
                                       alwaysCluster=False,
                                       stride=stride)
    clusteringObject.clusterTrajectories()
    clusteringObject.eliminateLowPopulatedClusters(clusterCountsThreshold)
    clusterCenters = clusteringObject.clusterCenters

    centersInfo = get_centers_info(trajectoryFolder, trajectoryBasename,
                                   num_clusters, clusterCenters)
    COMArray = [centersInfo[i]['center'][:3] for i in range(num_clusters)]
    if output_folder is not None:
        outputFolder = os.path.join(output_folder, "")
        if not os.path.exists(outputFolder):
            os.makedirs(outputFolder)
    else:
        outputFolder = ""
    writePDB(
        COMArray,
        outputFolder + "clusters_%d_KMeans_allSnapshots.pdb" % num_clusters)
    writeInitialStructures(centersInfo,
                           outputFolder + "initial_%d.pdb",
                           topology=topology)
def main(sim_path, n_trajs, trajectory_name, plot_name, residues_selected):
    # since we remove the water molecules, any topology file will be fine
    info1, info2 = parse_selection(selected_res)
    cache_file = "distances.npy"
    if not os.path.exists(cache_file):
        global_traj = None
        trajectory_name = "_%d".join(os.path.splitext(trajectory_name))

        epochs = utilities.get_epoch_folders(sim_path)
        for epoch in epochs:
            with open(os.path.join(sim_path, epoch,
                                   "topologyMapping.txt")) as f:
                top_map = f.read().rstrip().split(":")
            for i in range(1, n_trajs + 1):
                print("Processing epoch", epoch, "trajectory", i)
                trajectory = md.load(os.path.join(epoch, trajectory_name % i),
                                     top=os.path.join(
                                         sim_path, "topologies",
                                         "topology_%s.pdb" % top_map[i - 1]))
                if global_traj is None:
                    global_traj = trajectory.remove_solvent()
                    atom1 = global_traj.top.select(
                        "resname '%s' and residue %s and name %s" % info1)
                    atom2 = global_traj.top.select(
                        "resname '%s' and residue %s and name %s" % info2)
                    if atom1.size == 0 or atom2.size == 0:
                        raise ValueError(
                            "Nothing found under current selection")
                else:
                    global_traj += trajectory.remove_solvent()
        distance = 10 * md.compute_distances(global_traj,
                                             [atom1.tolist() + atom2.tolist()])
        np.save(cache_file, distance)
    else:
        distance = np.load(cache_file)
    f1, ax1 = plt.subplots(1, 1)
    ax1.plot(distance, 'x-')
    ax1.set_ylabel(r"Distance %s ($\AA$)" % residues_selected)
    if plot_name is not None:
        f1.savefig(plot_name)
    plt.show()
Exemple #11
0
def main(metricCol, lig_resname, nTrajs, stride, atomId, saving_frequency):
    folders = utilities.get_epoch_folders(".")
    box_center = None
    templateLine = "HETATM%s    H BOX Z 501    %s%s%s  0.75%s            H  \n"
    for epoch in folders:
        print("Processing epoch %s" % epoch)
        data = []
        confData = []
        maxEpoch = -1
        maxEpochCoords = None
        for iTraj in range(1, nTrajs):
            report = np.loadtxt("%s/report_%d" % (epoch, iTraj))
            if len(report.shape) < 2:
                report = report[np.newaxis, :]
            maxTrajIndex = np.argmax(report[:, metricCol])
            snapshots = utilities.getSnapshots("%s/trajectory_%d.pdb" % (epoch, iTraj))
            for i, snapshot in enumerate(itertools.islice(snapshots, 0, None, stride)):
                report_line = i * stride * saving_frequency
                data.append(get_coords(snapshot, atomId, lig_resname) + [report[report_line, metricCol]])
                confData.append((epoch, iTraj, report_line))
            if report[maxTrajIndex, metricCol] > maxEpoch:
                maxEpoch = report[maxTrajIndex, metricCol]
                maxEpochCoords = get_coords(snapshots[maxTrajIndex], atomId, lig_resname)
            if box_center is None and iTraj == 1:
                box_center = data[0][:3]
        data = np.array(data)
        minInd = np.argmin(data[:, -1])
        minMetric = data[minInd, -1]
        data[:, -1] -= minMetric
        utilities.write_PDB_clusters(data, title="epoch_%s.pdb" % epoch, use_beta=True)
        print("Max value for metric", maxEpoch, maxEpochCoords)
        with open("epoch_%s.pdb" % epoch, "a") as fa:
            fa.write("TER\n")
            serial = ("%d" % data.shape[0]).rjust(5)
            x = ("%.3f" % box_center[0]).rjust(8)
            y = ("%.3f" % box_center[1]).rjust(8)
            z = ("%.3f" % box_center[2]).rjust(8)
            g = ("%.2f" % 0).rjust(6)
            fa.write(templateLine % (serial, x, y, z, g))
        box_center = maxEpochCoords
Exemple #12
0
def cleanPreviousSimulation(output_path):
    """
        Clean the uneeded data from a previous simulation

        :param output_path: Path where the data is stored
        :type output_path: str
    """
    equilibration_folders = glob.glob(os.path.join(output_path, "equilibration*"))
    for folder in equilibration_folders:
        try:
            shutil.rmtree(folder)
        except OSError as exc:
            if exc.errno != errno.ENOENT:
                raise
            # If another process deleted the folder between the glob and the
            # actual removing an OSError is raised
    epochs = utilities.get_epoch_folders(output_path)
    for epoch in epochs:
        try:
            shutil.rmtree(os.path.join(output_path, epoch))
        except OSError as exc:
            if exc.errno != errno.ENOENT:
                raise
Exemple #13
0
def createPlot(reportName,
               column1,
               column2,
               stepsPerRun,
               printWithLines,
               paletteModifier,
               trajs_range=None,
               label_x=None,
               label_y=None,
               label_colorbar=None,
               fig_size=(6, 6),
               simulation_path=".",
               skip_first_step=False,
               skip_steps=None,
               y_top=None,
               y_bottom=None,
               x_left=None,
               x_right=None):
    """
        Generate a string to be passed to gnuplot

        :param reportName: Name of the files containing the simulation data
        :type reportName: str
        :param column1: Column to plot in the X axis
        :type column1: int
        :param column2: Column to plot in the Y axis
        :type column2: int
        :param stepsPerRun: Number of steps per epoch,
        :type stepsPerRun: int
        :param paletteModifier: Whether to use the epoch as color or a column
        :type paletteModifier: int
        :param trajs_range: Range of trajectories to plot
        :type trajs_range: str
        :param label_x: Label of the x-axis
        :type label_x: str
        :param label_y: Label of the y-axis
        :type label_y: str
        :param label_colorbar: Label of the colorbar
        :type label_colorbar: str
        :param fig_size: Size of the plot figure (default (6in, 6in))
        :type fig_size: tuple
        :param simulation_path: Path to the simulation data
        :type simulation_path: str
        :param skip_first_step: Whether to avoid plotting the first point in each report
        :type skip_first_step: bool
        :param skip_steps: Number of steps to skip in the plot
        :type skip_steps: int
        :param y_bottom: Bottom limit of the y axis
        :type y_bottom: float
        :param y_top: Top limit of the y axis
        :type y_top: float
        :param x_left: Left limit of the x axis
        :type x_bottom: float
        :param x_right: Right limit of the x axis
        :type x_right: float
    """
    epochs = utilities.get_epoch_folders(simulation_path)
    numberOfEpochs = int(len(epochs))
    if numberOfEpochs == 0:
        raise ValueError("No simulation found in specified path ",
                         os.path.abspath(simulation_path))
    cmap_name = "viridis"

    dictionary = {
        'reportName': reportName,
        'col2': column2,
        'numberOfEpochs': numberOfEpochs,
        'col1': column1,
        'withLines': printWithLines,
        'color': paletteModifier
    }
    annotations = []
    artists = []
    trajectory_range = set()
    if trajs_range is not None:
        start, end = map(int, trajs_range.split(":"))
        trajectory_range = set(range(start, end + 1))
    cmin = 1e10
    cmax = -1e10
    data_dict = {}
    max_report = 0
    min_report = 1e10
    for epoch in epochs:
        ep = int(epoch)
        reports = utilities.getReportList(
            os.path.join(simulation_path, epoch, reportName + "*"))
        if not reports:
            raise ValueError(
                "Could not find any reports with the given name!!")
        for report in reports:
            report_num = utilities.getReportNum(report)
            max_report = max(max_report, report_num)
            min_report = min(min_report, report_num)
            if trajs_range is not None and report_num not in trajectory_range:
                continue
            data = utilities.loadtxtfile(report)
            if skip_steps is not None:
                if data.shape[0] <= skip_steps:
                    continue
                data = data[skip_steps:]
            elif skip_first_step:
                data = data[1:]
            if paletteModifier is not None and paletteModifier != -1:
                cmin = min(cmin, data[:, paletteModifier].min())
                cmax = max(cmax, data[:, paletteModifier].max())
            data_dict[(ep, report_num)] = data
    fig, ax = plt.subplots(figsize=fig_size)
    ticks = None
    if paletteModifier == -1:
        cmin = min_report
        cmax = max_report
    if paletteModifier is None:
        cmin = int(epochs[0])
        cmax = int(epochs[-1])
        ticks = range(cmin, cmax + 1)
    sm = plt.cm.ScalarMappable(cmap=plt.get_cmap(cmap_name),
                               norm=plt.Normalize(vmin=cmin, vmax=cmax))
    sm.set_array([])
    dictionary['cmap'] = sm
    if paletteModifier != -1:
        cbar = plt.colorbar(sm, ticks=ticks)
        cbar.ax.zorder = -1
    offset = 0
    if skip_steps is not None:
        offset = skip_steps
    elif skip_first_step:
        # if we skipt the first step there is a point that is not shown but we
        # should count either way
        offset = 1
    for el in data_dict:
        addLine(data_dict[el], el[1], el[0], stepsPerRun, dictionary, artists)
        annotations.append([
            "Epoch: %d\nTrajectory: %d\nModel: %d" %
            (el[0], el[1], i + 1 + offset) for i in range(len(data_dict[el]))
        ])
    if label_x is not None:
        plt.xlabel(label_x)
    if label_y is not None:
        plt.ylabel(label_y)
        if paletteModifier is None:
            cbar.set_label("Epoch")
        if label_colorbar is not None:
            cbar.set_label(label_colorbar)
    ax.set_ylim(bottom=y_bottom, top=y_top)
    ax.set_xlim(left=x_left, right=x_right)

    annot = ax.annotate("",
                        xy=(0, 0),
                        xytext=(20, 20),
                        textcoords="offset points",
                        bbox=dict(boxstyle="round", fc="w"),
                        arrowprops=dict(arrowstyle="->"))
    annot.set_visible(False)

    def modify_color(color):
        color_offset = 0.5
        color = list(color)
        for i in range(3):
            color[i] = min(color[i] + color_offset, 1)
        return tuple(color)

    def update_annot(ind, color, pos, index):
        """Update the information box of the selected point"""
        annot.xy = pos
        annot.set_text(annotations[index][int(ind["ind"][0])])
        annot.get_bbox_patch().set_facecolor(modify_color(color))
        annot.get_bbox_patch().set_alpha(0.8)
        annot.zorder = 10

    def locate_event(event):
        for j, el in enumerate(artists):
            found, info = el.contains(event)
            if found:
                return j, found, info, el
        return 0, False, None, None

    def extract_data(obj_plot, ind):
        try:
            x, y = obj_plot.get_data()
            x = x[ind["ind"][0]]
            y = y[ind["ind"][0]]
            return (x, y)
        except AttributeError:
            return obj_plot.get_offsets()[ind["ind"][0]]

    def extract_color(obj_plot, ind):
        try:
            return obj_plot.get_markerfacecolor()
        except AttributeError:
            return obj_plot.get_facecolor()[ind["ind"][0]]

    def hover(event):
        """Action to perform when hovering the mouse on a point"""
        vis = annot.get_visible()
        if event.inaxes == ax:
            index, cont, ind, obj = locate_event(event)
            if cont:
                update_annot(ind, extract_color(obj, ind),
                             extract_data(obj, ind), index)
                annot.set_visible(True)
                fig.canvas.draw_idle()
            else:
                if vis:
                    annot.set_visible(False)
                    fig.canvas.draw_idle()

    # Respond to mouse motion
    fig.canvas.mpl_connect("motion_notify_event", hover)
Exemple #14
0
import os
import scipy.optimize as optim
from AdaptivePELE.utilities import utilities
import matplotlib.pyplot as plt
plt.style.use("ggplot")


def reward_new(x, rews):
    return -(x * rews).sum()


def reward(x, rews):
    return -(x[:, np.newaxis] * rews).sum()


folders = utilities.get_epoch_folders(".")
for folder in folders[::-1]:
    if os.path.exists(folder + "/clustering/object.pkl"):
        cl_object = utilities.readClusteringObject(folder +
                                                   "/clustering/object.pkl")
        break
# first_cluster = 0
trajToDivide = 144 * 2
rewardsEvol = []
weightsEvol = []
weightsEvol_new = []
weights = None
weights_new = None
metricInd = 4
labels = ["TE", "RMSD", "BE", "SASA"]
plots = True
def main(controlFile, trajName, reportName, folder, top, outputFilename, nProcessors, output_folder, format_str, new_report, trajs_to_select):
    """
        Calculate the corrected rmsd values of conformation taking into account
        molecule symmetries

        :param controlFile: Control file
        :type controlFile: str
        :param folder: Path the simulation
        :type folder: str
        :param top: Path to the topology
        :type top: str
        :param outputFilename: Name of the output file
        :type outputFilename: str
        :param nProcessors: Number of processors to use
        :type nProcessors: int
        :param output_folder: Path where to store the new reports
        :type output_folder: str
        :param format_str: String with the format of the report
        :type format_str: str
        :param new_report: Whether to write rmsd to a new report file
        :type new_report: bool

    """
    if trajName is None:
        trajName = "*traj*"
    else:
        trajName += "_*"
    if reportName is None:
        reportName = "report_%d"
    else:
        reportName += "_%d"
    if output_folder is not None:
        outputFilename = os.path.join(output_folder, outputFilename)
    outputFilename += "_%d"
    if nProcessors is None:
        nProcessors = utilities.getCpuCount()
    nProcessors = max(1, nProcessors)
    print("Calculating RMSDs with %d processors" % nProcessors)
    epochs = utilities.get_epoch_folders(folder)
    if top is not None:
        top_obj = utilities.getTopologyObject(top)
    else:
        top_obj = None

    resname, nativeFilename, symmetries, rmsdColInReport = readControlFile(controlFile)

    nativePDB = atomset.PDB()
    nativePDB.initialise(nativeFilename, resname=resname)

    files = []
    if not epochs:
        # path does not contain an adaptive simulation, we'll try to retrieve
        # trajectories from the specified path
        files = analysis_utils.process_folder(None, folder, trajName, reportName, os.path.join(folder, outputFilename), top_obj, trajs_to_select)
    for epoch in epochs:
        print("Epoch", epoch)
        files.extend(analysis_utils.process_folder(epoch, folder, trajName, reportName, os.path.join(folder, epoch, outputFilename), top_obj, trajs_to_select))
    pool = mp.Pool(nProcessors)
    results = [pool.apply_async(calculate_rmsd_traj, args=(nativePDB, resname, symmetries, rmsdColInReport, info[0], info[1], info[2], info[3], info[4], format_str, new_report)) for info in files]
    pool.close()
    pool.join()
    for res in results:
        res.get()
def main(num_clusters,
         criteria1,
         criteria2,
         ligand_resname,
         output_folder="ClusterCentroids",
         atom_ids="",
         cpus=2,
         topology=None,
         report="report_",
         traj="trajectory_",
         use_pdb=False,
         png=False,
         CA=0,
         sidechains=0,
         restart="all"):
    # Create multiprocess pool
    if cpus > 1:
        pool = mp.Pool(cpus)
    else:
        pool = mp.Pool(1)
    # Extract COM ligand for each snapshot
    if not glob.glob("allTrajs/traj*"):
        extractCoords.main(lig_resname=ligand_resname,
                           non_Repeat=True,
                           atom_Ids=atom_ids,
                           nProcessors=cpus,
                           parallelize=True,
                           topology=topology,
                           protein_CA=CA,
                           sidechains=sidechains)

    print("Clusterize trajectories by RMSD of COM")
    trajectoryFolder = "allTrajs"
    trajectoryBasename = "*traj*"
    stride = 1
    clusterCountsThreshold = 0
    folders = utilities.get_epoch_folders(".")
    folders.sort(key=int)
    if not restart:

        clusteringObject = cluster.Cluster(num_clusters,
                                           trajectoryFolder,
                                           trajectoryBasename,
                                           alwaysCluster=True,
                                           stride=stride)
        clusteringObject.clusterTrajectories()
        clusteringObject.eliminateLowPopulatedClusters(clusterCountsThreshold)
        clusterCenters = clusteringObject.clusterCenters
        np.savetxt("clustercenters.dat", clusterCenters)
        dtrajs = clusteringObject.dtrajs

        print("Extract metrics for each snapshot")
        min_metric_trajs = {}
        epochs = [folder for folder in glob.glob("./*/") if folder.isdigit()]
        reports = simulationToCsv.gather_reports()
        fields = simulationToCsv.retrieve_fields(reports[0])
        df = simulationToCsv.init_df(fields)
        df = simulationToCsv.fill_data(reports, df, pool)

        print("Update data with metrics and clusters")
        df.index = range(df.shape[0])
        df["Cluster"] = [None] * df.shape[0]
        input_list = [[
            df, Traj, d
        ] for d, Traj in zip(dtrajs, clusteringObject.trajFilenames)]
        results = pool.map(save_to_df, input_list)
        for data in results:
            for df_tmp in data:
                df.update(df_tmp)
        df.to_csv("Simulation.csv", index=False)
    if restart:
        df = pd.read_csv("Simulation.csv")
        clusterCenters = utilities.loadtxtfile("clustercenters.dat")
        print(clusterCenters)
    centersInfo = get_centers_info(trajectoryFolder, trajectoryBasename,
                                   num_clusters, clusterCenters)
    COMArray = [centersInfo[i]['center'] for i in range(num_clusters)]

    print("Retrieve clusters and metric")
    fields1 = []
    fields2 = []
    print(centersInfo)
    for cluster_num in centersInfo:
        epoch_num, traj_num, snap_num = map(
            int, centersInfo[cluster_num]['structure'])
        field1, crit1_name = get_metric(criteria1, epoch_num, traj_num,
                                        snap_num, report)
        field2, crit2_name = get_metric(criteria2, epoch_num, traj_num,
                                        snap_num, report)
        fields1.append(field1)
        fields2.append(field2)

    if output_folder is not None:
        outputFolder = os.path.join(output_folder, "")
        if not os.path.exists(outputFolder):
            os.makedirs(outputFolder)
    else:
        outputFolder = ""
    print("Output structures")
    writePDB(
        COMArray,
        outputFolder + "clusters_%d_KMeans_allSnapshots.pdb" % num_clusters)
    writeInitialStructures(fields1,
                           fields2,
                           crit1_name,
                           crit2_name,
                           centersInfo,
                           outputFolder + "cluster_{}_{}_{}_{}_{}.pdb",
                           traj,
                           topology=topology,
                           use_pdb=use_pdb)
    plotClusters(fields1,
                 fields2,
                 crit1_name,
                 crit2_name,
                 outputFolder,
                 png=png)
    assesClusterConvergence(df, num_clusters, traj, topology)
    return
Exemple #17
0
def main(sim_path, n_trajs, trajectory_name, plot_name, residues_selected):
    # since we remove the water molecules, any topology file will be fine
    ref = md.load(os.path.join(sim_path, "topologies", "topology_0.pdb"))
    ref.remove_solvent(inplace=True)
    labels = []
    selections = []
    for res in ref.top.residues:
        if res.is_protein and (residues_selected is None
                               or res.resSeq in residues_selected):
            if residues_selected is not None:
                residues_selected.remove(res.resSeq)
            labels.append("%s%d" % (res.code, res.resSeq))
            selections.append(
                ref.top.select("protein and symbol != 'H' and residue %d" %
                               res.resSeq))
    if residues_selected is not None and len(residues_selected):
        raise ValueError("Residues %s not found in protein!" %
                         ", ".join(sorted([str(x)
                                           for x in residues_selected])))
    if not os.path.exists("rmsf.npy"):
        avg_xyz = None
        global_traj = None
        trajectory_name = "_%d".join(os.path.splitext(trajectory_name))

        epochs = utilities.get_epoch_folders(sim_path)
        n_epochs = len(epochs)
        for epoch in epochs:
            with open(os.path.join(sim_path, epoch,
                                   "topologyMapping.txt")) as f:
                top_map = f.read().rstrip().split(":")
            for i in range(1, n_trajs + 1):
                print("Processing epoch", epoch, "trajectory", i)
                trajectory = md.load(os.path.join(epoch, trajectory_name % i),
                                     top=os.path.join(
                                         sim_path, "topologies",
                                         "topology_%s.pdb" % top_map[i - 1]))
                if global_traj is None:
                    avg_xyz = np.mean(trajectory.xyz, axis=0)
                    global_traj = trajectory.remove_solvent()
                else:
                    avg_xyz += np.mean(trajectory.xyz, axis=0)
                    global_traj += trajectory.remove_solvent()
        avg_xyz /= (n_epochs * n_trajs)
        rmsfs = []
        for i, ind in enumerate(selections):
            temp = 10 * np.sqrt(3 * np.mean(
                (global_traj.xyz[:, ind, :] - avg_xyz[ind, :])**2,
                axis=(1, 2)))
            rmsfs.append(np.mean(temp))
        np.save("rmsf.npy", rmsfs)
    else:
        rmsfs = np.load("rmsf.npy")
    f1, ax1 = plt.subplots(1, 1)
    # get axis size in inches
    width = ax1.get_window_extent().transformed(
        f1.dpi_scale_trans.inverted()).width
    # font size is assumed to be 12pt and 1pt is 1/72in
    font = 12 * 1 / 72.0
    # if there are less labels that the max that would fit, show them all
    n_ticks = max(1, len(labels) // int(np.floor(width / font)))
    print(width, font, n_ticks, len(labels), width / font)
    x_vals = np.array(range(len(labels)))
    ax1.plot(rmsfs, 'x-')
    ax1.set_xticks(x_vals[::n_ticks])
    ax1.set_xticklabels(labels[::n_ticks])
    ax1.set_ylabel(r"RMSF ($\AA$)")
    ax1.tick_params(axis='x', rotation=90, labelsize=10)
    if plot_name is not None:
        f1.savefig(plot_name)
    plt.show()
Exemple #18
0
def main(folder_name=".",
         atom_Ids="",
         lig_resname="",
         numtotalSteps=0,
         enforceSequential_run=0,
         writeLigandTrajectory=True,
         setNumber=0,
         protein_CA=0,
         non_Repeat=False,
         nProcessors=None,
         parallelize=True,
         topology=None,
         sidechains=False,
         sidechain_folder=".",
         cm=False,
         use_extra_atoms=False,
         CM_mode="p-lig",
         calc_dihedrals=False,
         dihedrals_projection=False):
    params = ParamsHandler(folder_name, atom_Ids, lig_resname, numtotalSteps,
                           enforceSequential_run, writeLigandTrajectory,
                           setNumber, protein_CA, non_Repeat, nProcessors,
                           parallelize, topology, sidechains, sidechain_folder,
                           cm, use_extra_atoms, CM_mode, calc_dihedrals,
                           dihedrals_projection)
    constants = Constants()

    if params.topology is not None:
        params.topology = utilities.getTopologyObject(params.topology)

    params.lig_resname = parseResname(params.atomIds, params.lig_resname,
                                      params.contact_map, params.cm_mode,
                                      params.dihedrals)

    folderWithTrajs = params.folder_name

    makeGatheredTrajsFolder(constants)

    if params.enforceSequential_run:
        folders = ["."]
    else:
        folders = utilities.get_epoch_folders(folderWithTrajs)
        if len(folders) == 0:
            folders = ["."]

    # if multiprocess is not available, turn off parallelization
    params.parallelize &= PARALELLIZATION

    if params.parallelize:
        if params.nProcessors is None:
            params.nProcessors = utilities.getCpuCount()
        params.nProcessors = max(1, params.nProcessors)

        print("Running extractCoords with %d cores" % (params.nProcessors))
        pool = mp.Pool(params.nProcessors)
    else:
        pool = None

    params.sidechains = extractSidechainIndexes(
        params, pool=pool) if params.sidechains else []

    for folder_it in folders:
        pathFolder = os.path.join(folderWithTrajs, folder_it)
        print("Extracting coords from folder %s" % folder_it)
        ligand_trajs_folder = os.path.join(pathFolder,
                                           constants.ligandTrajectoryFolder)
        if params.writeLigandTrajectory and not os.path.exists(
                ligand_trajs_folder):
            os.makedirs(ligand_trajs_folder)
        writeFilenamesExtractedCoordinates(pathFolder,
                                           params,
                                           constants,
                                           pool=pool)
        if not params.non_Repeat:
            print("Repeating snapshots from folder %s" % folder_it)
            repeatExtractedSnapshotsInFolder(pathFolder,
                                             constants,
                                             params.numtotalSteps,
                                             pool=None)
        print("Gathering trajs in %s" % constants.gatherTrajsFolder)
        gatherTrajs(constants, folder_it, params.setNumber, params.non_Repeat)
def main(num_clusters,
         criteria1,
         criteria2,
         output_folder,
         ligand_resname,
         atom_ids,
         cpus=2,
         topology=None,
         report="report_",
         traj="trajectory_",
         use_pdb=False):
    if not glob.glob("*/extractedCoordinates/coord_*"):
        extractCoords.main(lig_resname=ligand_resname,
                           non_Repeat=True,
                           atom_Ids=atom_ids,
                           nProcessors=cpus,
                           parallelize=False,
                           topology=topology,
                           use_pdb=use_pdb)
    trajectoryFolder = "allTrajs"
    trajectoryBasename = "*traj*"
    stride = 1
    clusterCountsThreshold = 0
    folders = utilities.get_epoch_folders(".")
    folders.sort(key=int)

    clusteringObject = cluster.Cluster(num_clusters,
                                       trajectoryFolder,
                                       trajectoryBasename,
                                       alwaysCluster=True,
                                       stride=stride)
    clusteringObject.clusterTrajectories()
    clusteringObject.eliminateLowPopulatedClusters(clusterCountsThreshold)
    clusterCenters = clusteringObject.clusterCenters
    centersInfo = get_centers_info(trajectoryFolder, trajectoryBasename,
                                   num_clusters, clusterCenters)
    COMArray = [centersInfo[i]['center'] for i in xrange(num_clusters)]

    fields1 = []
    fields2 = []
    for cluster_num in centersInfo:
        epoch_num, traj_num, snap_num = map(
            int, centersInfo[cluster_num]['structure'])
        field1, crit1_name = get_metric(criteria1, epoch_num, traj_num,
                                        snap_num, report)
        field2, crit2_name = get_metric(criteria2, epoch_num, traj_num,
                                        snap_num, report)
        fields1.append(field1)
        fields2.append(field2)

    if output_folder is not None:
        outputFolder = os.path.join(output_folder, "")
        if not os.path.exists(outputFolder):
            os.makedirs(outputFolder)
    else:
        outputFolder = ""
    writePDB(
        COMArray,
        outputFolder + "clusters_%d_KMeans_allSnapshots.pdb" % num_clusters)
    writeInitialStructures(fields1,
                           fields2,
                           crit1_name,
                           crit2_name,
                           centersInfo,
                           outputFolder + "cluster_{}_{}_{}_{}_{}.pdb",
                           traj,
                           topology=topology,
                           use_pdb=use_pdb)
    plotClusters(fields1, fields2, crit1_name, crit2_name, outputFolder)
Exemple #20
0
def main(nTICs,
         numClusters,
         ligand_resname,
         lag,
         nTraj,
         n_steps,
         out_path=None,
         stride_conformations=1,
         atomId="",
         repeat=False,
         plotTICA=False,
         topology=None):
    # Constants definition
    trajectoryFolder = "tica_projected_trajs"
    trajectoryBasename = "tica_traj*"
    stride = 1
    clusterCountsThreshold = 0
    clustersCentersFolder = "clustersCenters"
    ticaObject = "tica.pkl"

    if out_path is None:
        folderPath = ""
        curr_folder = "."
    else:
        folderPath = out_path
        curr_folder = out_path

    folders = utilities.get_epoch_folders(curr_folder)
    folders.sort(key=int)

    if not os.path.exists(
            os.path.join(folderPath,
                         "0/repeatedExtractedCoordinates/")) or repeat:
        # Extract ligand and alpha carbons coordinates
        extractCoords.main(folder_name=curr_folder,
                           lig_resname=ligand_resname,
                           numtotalSteps=n_steps,
                           protein_CA=False,
                           non_Repeat=False,
                           sidechains=True,
                           sidechain_folder="../output_clustering/initial*",
                           enforceSequential_run=0,
                           nProcessors=1)

    tica = make_TICA_decomposition(ticaObject, folders, folderPath, lag)

    # Select the desired number of independent components from the full
    # decomposition
    projected = tica.get_output(dimensions=list(range(nTICs)))
    write_TICA_trajs(trajectoryFolder, projected, trajectoryBasename, folders,
                     nTraj)
    clusteringObject = cluster_TICA_space(numClusters, trajectoryFolder,
                                          trajectoryBasename, stride,
                                          clusterCountsThreshold)
    trajsUniq, projectedUniq = projectTICATrajs(folders,
                                                folderPath,
                                                ligand_resname,
                                                atomId,
                                                stride_conformation,
                                                nTICs,
                                                tica,
                                                topology=topology)

    clusterCenters = clusteringObject.clusterCenters
    dtrajs = clusteringObject.assignNewTrajectories(projectedUniq)
    centersInfo = find_representative_strucutures(folders, numClusters, nTraj,
                                                  clusterCenters,
                                                  projectedUniq, dtrajs)
    writeCentersInfo(centersInfo,
                     folderPath,
                     ligand_resname,
                     nTICs,
                     numClusters,
                     trajsUniq,
                     clustersCentersFolder,
                     nTraj,
                     topology=topology)
    if plotTICA:
        make_TICA_plot(nTICs, projected)
Exemple #21
0
def main(n_clusters,
         output_folder,
         SASAColumn,
         norm_energy,
         num_bins,
         percentile,
         plots,
         atom_Ids,
         folder_name,
         traj_basename,
         cluster_energy,
         topology=None):
    energyColumn = 3

    if output_folder is not None:
        outputFolder = os.path.join(output_folder, "")
        if not os.path.exists(outputFolder):
            os.makedirs(outputFolder)
    else:
        outputFolder = ""

    extractCoords.main(folder_name,
                       lig_resname=ligand_resname,
                       non_Repeat=True,
                       atom_Ids=atom_Ids)

    epochFolders = utilities.get_epoch_folders(folder_name)
    points = []
    for epoch in epochFolders:
        report_files = glob.glob(os.path.join(epoch, "*report*"))
        report_files.sort(key=lambda x: int(x[x.rfind("_") + 1:]))
        for report_name in report_files:
            traj_num = int(report_name[report_name.rfind("_") + 1:])
            coordinates = np.loadtxt(
                os.path.join(
                    folder_name, "%s/extractedCoordinates/coord_%d.dat" %
                    (epoch, traj_num)))
            report = np.loadtxt(report_name)
            if len(report.shape) < 2:
                points.append([
                    report[energyColumn], report[SASAColumn],
                    int(epoch), traj_num, 0
                ] + coordinates[1:].tolist())
            else:
                epoch_line = np.array([int(epoch)] * report.shape[0])
                traj_line = np.array([traj_num] * report.shape[0])
                snapshot_line = np.array(range(report.shape[0]))
                points.extend(
                    np.hstack(
                        (report[:, (energyColumn, SASAColumn)],
                         epoch_line[:, np.newaxis], traj_line[:, np.newaxis],
                         snapshot_line[:, np.newaxis], coordinates[:, 1:])))
    points = np.array(points)
    points = points[points[:, 1].argsort()]
    minSASA = points[0, 1]
    maxSASA = points[-1, 1]
    left_bins = np.linspace(minSASA, maxSASA, num=num_bins, endpoint=False)
    indices = np.searchsorted(points[:, 1], left_bins)
    thresholds = np.array([
        np.percentile(points[i:j, 0], percentile)
        for i, j in zip(indices[:-1], indices[1:])
    ])

    new_points = []
    occupation = []
    for ij, (i, j) in enumerate(zip(indices[:-1], indices[1:])):
        found = np.where(points[i:j, 0] < thresholds[ij])[0]
        occupation.append(len(found))
        if len(found) == 1:
            new_points.append(points[found + i])
        elif len(found) > 1:
            new_points.extend(points[found + i])

    points = np.array(new_points)
    if norm_energy:
        energyMin = points.min(axis=0)[0]
        points[:, 0] -= energyMin
        energyMax = points.max(axis=0)[0]
        points[:, 0] /= energyMax

    if cluster_energy:
        print("Clustering using energy and SASA")
        kmeans = KMeans(n_clusters=n_clusters).fit(points[:, :2])
        title = "clusters_%d_energy_SASA.pdb"
    else:
        print("Clustering using ligand coordinates")
        kmeans = KMeans(n_clusters=n_clusters).fit(points[:, 5:8])
        title = "clusters_%d_energy_SASA_coords.pdb"
    centers_energy = []
    centers_coords = []
    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology = None
    for i, center in enumerate(kmeans.cluster_centers_):
        if cluster_energy:
            dist = np.linalg.norm((points[:, :2] - center), axis=1)
        else:
            dist = np.linalg.norm((points[:, 5:8] - center), axis=1)
        epoch, traj, snapshot = points[dist.argmin(), 2:5]
        centers_energy.append(points[dist.argmin(), :2])
        centers_coords.append(points[dist.argmin(), 5:8])
        traj_file = glob.glob("%d/%s_%d*" % (epoch, traj_basename, traj))[0]
        conf = utilities.getSnapshots(traj_file,
                                      topology=topology)[int(snapshot)]
        if isinstance(conf, basestring):
            with open(os.path.join(outputFolder, "initial_%d.pdb" % i),
                      "w") as fw:
                fw.write(conf)
        else:
            utilities.write_mdtraj_object_PDB(
                conf, os.path.join(outputFolder, "initial_%d.pdb" % i),
                topology_contents)
    centers_energy = np.array(centers_energy)
    centers_coords = np.array(centers_coords)
    writePDB(centers_coords, os.path.join(outputFolder, title % n_clusters))
    if plots:
        plt.scatter(points[:, 1], points[:, 0], c=kmeans.labels_, alpha=0.5)
        plt.scatter(centers_energy[:, 1],
                    centers_energy[:, 0],
                    c=list(range(n_clusters)),
                    marker='x',
                    s=56,
                    zorder=1)
        plt.xlabel("SASA")
        if norm_energy:
            plt.ylabel("Energy (normalized)")
            plt.savefig(
                os.path.join(outputFolder, "clusters_energy_normalized.png"))
        else:
            plt.ylabel("Energy (kcal/mol)")
            plt.savefig(
                os.path.join(outputFolder, "clusters_no_normalized.png"))
        plt.show()