コード例 #1
0
ファイル: testAtomset.py プロジェクト: cescgina/msm_pele
    def test_symmetryContactMapJaccard_XTC(self):
        xtc_obj = mdtraj.load("tests/data/symmetries/cluster_1.xtc",
                              top="tests/data/symmetries/cluster_1.pdb")
        topology = utilities.getTopologyFile(
            "tests/data/symmetries/cluster_1.pdb")
        pdb_1 = atomset.PDB()
        pdb_1.initialise(10 * xtc_obj.xyz[0], resname='AEN', topology=topology)
        topology = utilities.getTopologyFile(
            "tests/data/symmetries/cluster_1_sym.pdb")
        xtc_obj = mdtraj.load("tests/data/symmetries/cluster_1_sym.xtc",
                              top="tests/data/symmetries/cluster_1_sym.pdb")
        pdb_1_sym = atomset.PDB()
        pdb_1_sym.initialise(10 * xtc_obj.xyz[0],
                             resname='AEN',
                             topology=topology)
        symmetries3PTB = [{"3230:N1:AEN": "3231:N2:AEN"}]
        symmetryEvaluator = sym.SymmetryContactMapEvaluator(symmetries3PTB)
        symmetryEvaluatorEmpty = sym.SymmetryContactMapEvaluator()

        contactMap1, contacts1 = symmetryEvaluator.buildContactMap(
            pdb_1, 'AEN', 16)
        cluster = clustering.Cluster(pdb_1, contactMap=contactMap1)
        contactMap1Sym, contactsSym = symmetryEvaluator.createContactMap(
            pdb_1_sym, 'AEN', 16)
        contactMapNoSym, _ = symmetryEvaluator.createContactMap(
            pdb_1_sym, 'AEN', 16)

        goldenJaccard = 0.0
        Jaccard = symmetryEvaluator.evaluateJaccard(contactMap1Sym,
                                                    cluster.contactMap)
        JaccardNosym = symmetryEvaluatorEmpty.evaluateJaccard(
            contactMapNoSym, cluster.contactMap)
        self.assertEqual(contacts1, contactsSym)
        self.assertAlmostEqual(goldenJaccard, Jaccard)
        self.assertNotAlmostEqual(Jaccard, JaccardNosym)
コード例 #2
0
ファイル: testAtomset.py プロジェクト: cescgina/msm_pele
 def test_write_XTC_to_pdb(self):
     golden = "tests/data/ain_native_fixed.pdb"
     output = "xtc_to_pdb.pdb"
     topology = utilities.getTopologyFile(golden)
     xtc_obj = mdtraj.load("tests/data/ain_native_fixed.xtc", top=golden)
     xtc = atomset.PDB()
     xtc.initialise(xtc_obj.xyz[0], resname="AIN", topology=topology)
     top = utilities.getTopologyFile(golden)
     xtc.writePDB(output)
     golden_pdb = atomset.PDB()
     golden_pdb.initialise(golden, resname="AIN")
     output_pdb = atomset.PDB()
     output_pdb.initialise(output, resname="AIN")
     os.remove(output)
     self.assertEqual(golden_pdb.atoms, output_pdb.atoms)
コード例 #3
0
def main(outputDir, files, topology, structs, template=None):
    found = False
    if outputDir:
        utilities.makeFolder(outputDir)
    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology_contents = None
    if structs is not None:
        structs = set(structs)
    for f in files:
        name = os.path.split(f)[-1]
        templateName = os.path.join(outputDir,
                                    template) if template else os.path.join(
                                        outputDir, name[:-4] + "_%d.pdb")
        snapshots = utilities.getSnapshots(f, topology=topology)
        for i, snapshot in enumerate(snapshots):
            if structs is not None and i + 1 not in structs:
                continue
            if not isinstance(snapshot, basestring):
                PDB = atomset.PDB()
                PDB.initialise(snapshot, topology=topology_contents)
                snapshot = PDB.get_pdb_string(model_num=i + 1)
            if template:
                with open(templateName, 'w') as of:
                    of.write(snapshot)
                found = True
            else:
                with open(templateName % i, 'w') as of:
                    of.write(snapshot)
                found = True
    return found
コード例 #4
0
def trajectory_and_snapshot_to_pdb(trajectory_path, snapshot, output_path):
    """
    Given an absolute path to a trajectory of Adaptive and a snapshot (MODEL) in xtc format, the function transform it
    into a PDB format.
    :param trajectory_path: Absolute path to a trajectory from Adaptive, in xtc format.
    :type trajectory_path:str
    :param snapshot: model of a trajectory that you want to transform.
    :type snapshot: int
    :param output_path: output path of the new pdb file.
    :type output_path: str
    :return: Creates a PDB file.
    """
    topology_path_splited = trajectory_path.split("/")[0:-2]
    topology_path = os.path.join("/".join(topology_path_splited),
                                 "topology.pdb")
    topology_contents = adapt_tools.getTopologyFile(topology_path)
    trajectory = adapt_tools.getSnapshots(trajectory_path,
                                          topology=topology_path)
    try:
        single_model = trajectory[snapshot]
        PDB = atomset.PDB()
        PDB.initialise(single_model, topology=topology_contents)
    except IndexError:
        exit(
            "You are selecting the model {} for a trajectory that has {} models, please, reselect the model index "
            "(starting from 0).".format(snapshot, len(trajectory)))
    with open(output_path, "w") as fw:
        fw.write("MODEL     %4d\n" % (snapshot + 1))
        fw.write(PDB.pdb)
        fw.write("ENDMDL\n")
        fw.write("END\n")
コード例 #5
0
def main(epoch_num, trajectory, snapshot_num, resname, clustering_object,
         topology):
    calc = RMSDCalculator.RMSDCalculator()
    clustering_object = utilities.readClusteringObject(clustering_object)
    n_clusters = utilities.loadtxtfile(
        os.path.join(str(max(0, epoch_num - 1)), "clustering",
                     "summary.txt")).shape[0]
    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology_contents = None
    filename = glob.glob(
        os.path.join(str(epoch_num), "*traj*_%d.*" % trajectory))
    if not filename:
        raise ValueError(
            "No file with the specified epoch and trajectory found")
    try:
        snapshots = utilities.getSnapshots(filename[0],
                                           topology=topology)[snapshot_num]
    except IndexError:
        raise IndexError(
            "Snapshot number %d not found in trajectory %d for epoch %d, please check that the arguments provided are correct"
            % (snapshot_num, trajectory, epoch_num))
    pdb = atomset.PDB()
    pdb.initialise(snapshots, resname=resname, topology=topology_contents)
    for i, cluster in enumerate(clustering_object[:n_clusters]):
        dist = calc.computeRMSD(pdb, cluster.pdb)
        if dist < cluster.threshold:
            print("Snapshot belongs to cluster", i)
            return
    print("Snapshot not assigned to any cluster! :(")
コード例 #6
0
def calculate_rmsd_traj(nativePDB, resname, symmetries, rmsdColInReport, traj, reportName, top, epoch, outputFilename, fmt_str, new_report):
    top_proc = None
    if top is not None:
        top_proc = utilities.getTopologyFile(top)
    rmsds = utilities.getRMSD(traj, nativePDB, resname, symmetries, topology=top_proc)

    if new_report:
        fixedReport = np.zeros((rmsds.size, 2))
        fixedReport[:, 0] = range(rmsds.size)
        fixedReport[:, 1] = rmsds
        header = ""
    else:
        with open(reportName) as f:
            header = f.readline().rstrip()
            if not header.startswith("#"):
                header = ""
            reportFile = utilities.loadtxtfile(reportName)
        if rmsdColInReport > 0 and rmsdColInReport < reportFile.shape[1]:
            reportFile[:, rmsdColInReport] = rmsds
            fixedReport = reportFile
        else:
            fixedReport = analysis_utils.extendReportWithRmsd(reportFile, rmsds)

    with open(outputFilename, "w") as fw:
        if header:
            fw.write("%s\tRMSD\n" % header)
        else:
            fw.write("# Step\tRMSD\n")
        np.savetxt(fw, fixedReport, fmt=fmt_str)
コード例 #7
0
def main(trajectory, snapshot, epoch, outputPath, out_filename, topology):
    if outputPath is not None:
        outputPath = os.path.join(outputPath, "")
        if not os.path.exists(outputPath):
            os.makedirs(outputPath)
    else:
        outputPath = ""
    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology_contents = None
    if os.path.exists(outputPath + out_filename):
        # If the specified name exists, append a number to distinguish the files
        name, ext = os.path.splitext(out_filename)
        out_filename = "".join([name, "_%d", ext])
        i = 1
        while os.path.exists(outputPath + out_filename % i):
            i += 1
        out_filename %= i
    pathway = []
    # Strip out trailing backslash if present
    pathPrefix, epoch = os.path.split(epoch.rstrip("/"))
    sys.stderr.write("Creating pathway...\n")
    while True:
        filename = glob.glob(
            os.path.join(pathPrefix, epoch, "*traj*_%d.*" % trajectory))
        if not filename:
            raise ValueError(
                "Trajectory %s not found!" %
                os.path.join(pathPrefix, epoch, "*traj*_%d.*" % trajectory))
        snapshots = utilities.getSnapshots(filename[0])
        if epoch == '0':
            initial = 0
        else:
            # avoid repeating the initial snapshot
            initial = 1
        if not isinstance(snapshots[0], basestring):
            new_snapshots = []
            for i in range(initial, snapshot + 1):
                PDB = atomset.PDB()
                PDB.initialise(snapshots[i], topology=topology_contents)
                new_snapshots.append(PDB.pdb)
            snapshots = new_snapshots
        else:
            snapshots = snapshots[initial:snapshot + 1]
        pathway.insert(0, snapshots)
        if epoch == '0':
            # Once we get to epoch 0, we just need to append the trajectory
            # where the cluster was found and we can break out of the loop
            break
        procMapping = open(
            os.path.join(pathPrefix, epoch,
                         "processorMapping.txt")).read().rstrip().split(':')
        epoch, trajectory, snapshot = map(
            int, procMapping[trajectory - 1][1:-1].split(','))
        epoch = str(epoch)
    sys.stderr.write("Writing pathway...\n")
    with open(outputPath + out_filename, "a") as f:
        f.write("ENDMDL\n".join(itertools.chain.from_iterable(pathway)))
コード例 #8
0
ファイル: testAtomset.py プロジェクト: cescgina/msm_pele
 def testPDB_sel_resnum_XTC(self):
     golden = "tests/data/ain_native_fixed.pdb"
     topology = utilities.getTopologyFile(golden)
     xtc_obj = mdtraj.load("tests/data/ain_native_fixed.xtc", top=golden)
     xtc = atomset.PDB()
     xtc.initialise(10 * xtc_obj.xyz[0], resnum=2, topology=topology)
     golden_pdb = atomset.PDB()
     golden_pdb.initialise(golden, resnum=2)
     self.assertEqual(xtc, golden_pdb)
コード例 #9
0
def main(ligand, clusters_file, conf_folder, topology=None):
    trajFolder = "allTrajs_nonRepeat"
    cluster_centers = np.loadtxt(clusters_file)
    if not os.path.exists("discretized"):
        os.makedirs("discretized")
    if not os.path.exists(trajFolder):
        os.makedirs(trajFolder)
    stride = 1
    clusterCountsThreshold = 0
    trajBasename = "coord*"
    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology_contents = None
    epoch_folders = utilities.get_epoch_folders(conf_folder)
    numClusters = cluster_centers.shape[0]
    coordinates = [[] for cl in range(numClusters)]
    for it in epoch_folders:
        files = glob.glob(conf_folder + "%s/extractedCoordinates/coord*" % it)
        for f in files:
            traj = os.path.splitext(f)[0].split("_")[-1]
            shutil.copy(f, trajFolder + "/coord_%s_%s.dat" % (it, traj))
    clusteringObject = cluster.Cluster(numClusters,
                                       trajFolder,
                                       trajBasename,
                                       alwaysCluster=False,
                                       stride=stride)
    clusteringObject.clusterTrajectories()
    clusteringObject.eliminateLowPopulatedClusters(clusterCountsThreshold)
    for i in range(numClusters):
        if not os.path.exists("cluster_%d" % i):
            os.makedirs("cluster_%d/allStructures" % i)
    dtrajs_files = glob.glob("discretized/*.disctraj")
    for dtraj in dtrajs_files:
        print(dtraj)
        traj = np.loadtxt(dtraj)
        epoch, traj_num = map(int,
                              os.path.splitext(dtraj)[0].split("_", 3)[1:])
        trajPositions = np.loadtxt(trajFolder + "/coord_%d_%d.dat" %
                                   (epoch, traj_num))
        trajFile = glob.glob(
            os.path.join(conf_folder + "%d/trajectory_%d*" %
                         (epoch, traj_num)))[0]
        snapshots = utilities.getSnapshots(trajFile, topology=topology)
        for nSnap, cluster_num in enumerate(traj):
            coordinates[int(cluster_num)].append(trajPositions[nSnap])
            filename = "cluster_%d/allStructures/conf_%d_%d_%d.pdb" % (
                cluster_num, epoch, traj_num, nSnap)
            if isinstance(snapshots[nSnap], basestring):
                with open(filename, "w") as fw:
                    fw.write(snapshots[nSnap])
            else:
                utilities.write_mdtraj_object_PDB(snapshots[nSnap], filename,
                                                  topology_contents)
    for cl in range(numClusters):
        np.savetxt("cluster_%d/positions.dat" % cl, coordinates[cl])
コード例 #10
0
def main(representatives_files, path_structures, output="", clusters=None, trajNames="trajectory", topology=None):
    if clusters is None:
        clusters = ['a']
    # Load the representative structures file
    try:
        clusters_info = np.loadtxt(representatives_files, skiprows=1, dtype=int)
    except IOError:
        raise IOError("Couldn't find a representative file in %s, please check that the path is correct" % representatives_files)
    # Organize to minimise pdb loading
    if clusters != ['a']:
        clusters_info = clusters_info[list(map(int, clusters))]

    extract_info = getExtractInfo(clusters_info)

    # Write appropiate pdbs
    destFolder = output
    if not output:
        destFolder, _ = os.path.split(representatives_files)
        destFolder = os.path.join(destFolder, "representative_structures_pdbs")

    if not os.path.exists(destFolder):
        os.makedirs(destFolder)
    else:
        destFolder += "_%d"
        it = 1
        while os.path.exists(destFolder % it):
            it += 1
        destFolder %= it
        os.makedirs(destFolder)
    structureFolder = os.path.join(path_structures, "%d", trajNames+"_%d.*")

    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology_contents = None

    for trajFile, extraInfo in extract_info.items():
        try:
            pdbFile = glob.glob(structureFolder % trajFile)[0]
        except IndexError:
            raise ValueError("Structure %s not found" % (structureFolder % trajFile))
        try:
            snapshots = utilities.getSnapshots(pdbFile, topology=topology)
        except IOError:
            raise IOError("Unable to open %s, please check that the path to structures provided is correct" % pdbFile)
        for pair in extraInfo:
            if topology_contents is None:
                with open(os.path.join(destFolder, "cluster_%d.pdb" % pair[0]), "w") as fw:
                    fw.write(snapshots[pair[1]])
                    fw.write("\n")
            else:
                utilities.write_mdtraj_object_PDB(snapshots[pair[1]], os.path.join(destFolder, "cluster_%d.pdb" % pair[0]), topology=topology_contents)
コード例 #11
0
ファイル: testAtomset.py プロジェクト: cescgina/msm_pele
    def testPDB_contacts_XTC(self):
        # preparation
        golden = "tests/data/ain_native_fixed.pdb"
        topology = utilities.getTopologyFile(golden)
        xtc_obj = mdtraj.load("tests/data/ain_native_fixed.xtc", top=golden)
        xtc = atomset.PDB()
        xtc.initialise(10 * xtc_obj.xyz[0], resname="AIN", topology=topology)
        golden_pdb = atomset.PDB()
        golden_pdb.initialise(golden, resname="AIN")

        # function to test
        contacts = golden_pdb.countContacts("AIN", 8)
        contacts_xtc = xtc.countContacts("AIN", 8)
        self.assertEqual(contacts, contacts_xtc)
コード例 #12
0
ファイル: histCM.py プロジェクト: cescgina/msm_pele
def generateConformations(resname, clAcc, trajectory, topology):
    if topology is None:
        topology_contents = None
    else:
        topology_contents = utilities.getTopologyFile(topology)
    if clAcc is None:
        for traj in trajectory:
            snapshots = utilities.getSnapshots(traj, topology=topology)
            for snapshot in snapshots:
                PDBobj = atomset.PDB()
                PDBobj.initialise(snapshot, resname=resname, topology=topology_contents)
                yield PDBobj
    else:
        for cluster in clAcc.clusters.clusters:
            yield cluster.pdb
コード例 #13
0
def writeInitialStructures(centers_info, filename_template, topology=None):
    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    for cluster_num in centers_info:
        epoch_num, traj_num, snap_num = map(
            int, centers_info[cluster_num]['structure'])
        trajectory = glob.glob("%d/trajectory_%d*" % (epoch_num, traj_num))[0]
        snapshots = utilities.getSnapshots(trajectory, topology=topology)
        if isinstance(snapshots[0], basestring):
            with open(filename_template % cluster_num, "w") as fw:
                fw.write(snapshots[snap_num])
        else:
            utilities.write_mdtraj_object_PDB(snapshots[snap_num],
                                              filename_template % cluster_num,
                                              topology_contents)
コード例 #14
0
ファイル: testAtomset.py プロジェクト: cescgina/msm_pele
    def testPDB_COM_XTC(self):
        # preparation
        golden = "tests/data/ain_native_fixed.pdb"
        topology = utilities.getTopologyFile(golden)
        xtc_obj = mdtraj.load("tests/data/ain_native_fixed.xtc", top=golden)
        xtc = atomset.PDB()
        xtc.initialise(10 * xtc_obj.xyz[0], resname="AIN", topology=topology)
        golden_pdb = atomset.PDB()
        golden_pdb.initialise(golden, resname="AIN")

        # assertion
        self.assertAlmostEqual(xtc.totalMass, golden_pdb.totalMass, 3)
        np.testing.assert_array_almost_equal(xtc.getCOM(),
                                             golden_pdb.getCOM(),
                                             decimal=3)
コード例 #15
0
ファイル: testAtomset.py プロジェクト: cescgina/msm_pele
    def testPDB_RMSD_XTC(self):
        # preparation
        golden = "tests/data/ain_native_fixed.pdb"
        topology = utilities.getTopologyFile(golden)
        xtc_obj = mdtraj.load("tests/data/ain_native_fixed.xtc", top=golden)
        xtc = atomset.PDB()
        xtc.initialise(10 * xtc_obj.xyz[0], resname="AIN", topology=topology)
        golden_pdb = atomset.PDB()
        golden_pdb.initialise(golden, resname="AIN")

        # assertion
        RMSDCalc = RMSDCalculator.RMSDCalculator()

        # function to test
        RMSD = RMSDCalc.computeRMSD(golden_pdb, xtc)
        golden_RMSD = 0.0000
        self.assertAlmostEqual(RMSD, golden_RMSD, 2)
コード例 #16
0
ファイル: testAtomset.py プロジェクト: cescgina/msm_pele
 def test_PDB_interface_XTC(self):
     golden = "tests/data/ain_native_fixed.pdb"
     topology = utilities.getTopologyFile(golden)
     xtc_obj = mdtraj.load("tests/data/ain_native_fixed.xtc", top=golden)
     xtc = atomset.PDB()
     xtc.initialise(10 * xtc_obj.xyz[0], resname="AIN", topology=topology)
     golden_pdb = atomset.PDB()
     golden_pdb.initialise(golden, resname="AIN")
     self.assertEqual(len(golden_pdb), len(xtc))
     atomList = [atom for atom in golden_pdb]
     atomList_xtc = [atom for atom in xtc]
     self.assertEqual(atomList, atomList_xtc)
     atomId = xtc.atomList[0]
     atom = xtc[atomId]
     self.assertEqual(atom, xtc.getAtom(atomId))
     xtc[atomId] = None
     self.assertEqual(None, xtc.getAtom(atomId))
コード例 #17
0
ファイル: testAtomset.py プロジェクト: cescgina/msm_pele
 def testPDB_RMSD_symmetries_XTC(self):
     # preparation
     golden = "tests/data/ain_native_fixed.pdb"
     topology = utilities.getTopologyFile(golden)
     xtc_obj = mdtraj.load("tests/data/ain_native_fixed.xtc", top=golden)
     xtc = atomset.PDB()
     xtc.initialise(10 * xtc_obj.xyz[0], resname="AIN", topology=topology)
     golden_pdb = atomset.PDB()
     golden_pdb.initialise(golden, resname="AIN")
     symDict = [{"1733:O1:AIN": "1735:O2:AIN"}]
     RMSDCalc = RMSDCalculator.RMSDCalculator(symDict)
     # function to test
     RMSD = RMSDCalc.computeRMSD(xtc, golden_pdb)
     reverseRMSD = RMSDCalc.computeRMSD(golden_pdb, xtc)
     golden_RMSD = 0.00000
     self.assertAlmostEqual(RMSD, reverseRMSD, 2)
     self.assertAlmostEqual(RMSD, golden_RMSD, 2)
コード例 #18
0
ファイル: testAtomset.py プロジェクト: cescgina/msm_pele
    def testPDB_contactmap_XTC(self):
        # preparation
        golden = "tests/data/ain_native_fixed.pdb"
        topology = utilities.getTopologyFile(golden)
        xtc_obj = mdtraj.load("tests/data/ain_native_fixed.xtc", top=golden)
        xtc = atomset.PDB()
        xtc.initialise(10 * xtc_obj.xyz[0], resname="AIN", topology=topology)
        golden_pdb = atomset.PDB()
        golden_pdb.initialise(golden, resname="AIN")
        symmetryEvaluator = sym.SymmetryContactMapEvaluator([])

        # function to test
        contact_map, contacts = symmetryEvaluator.createContactMap(
            golden_pdb, "AIN", 8)
        symmetryEvaluator_xtc = sym.SymmetryContactMapEvaluator([])
        contact_map_xtc, contacts_xtc = symmetryEvaluator_xtc.createContactMap(
            xtc, "AIN", 8)
        np.testing.assert_array_equal(contact_map, contact_map_xtc)
        self.assertEqual(contacts_xtc, contacts)
コード例 #19
0
def writeCentersInfo(centersInfo,
                     folderPath,
                     ligand_resname,
                     nTICs,
                     numClusters,
                     trajsUniq,
                     clustersCentersFolder,
                     nTraj,
                     topology=None):
    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology_contents = None
    if not os.path.exists(clustersCentersFolder):
        os.makedirs(clustersCentersFolder)
    COM_list = []
    for clusterNum in centersInfo:
        epoch, trajNum, snap = centersInfo[clusterNum]['structure']
        COM_list.append(trajsUniq[int(epoch) * nTraj + (trajNum - 1)][snap])
        # Accept non-pdb trajectories
        trajFile = glob.glob(
            os.path.join(folderPath, "%s/trajectory_%d.*" % (epoch, trajNum)))
        trajFile = trajFile[0]
        snapshots = utilities.getSnapshots(trajFile, topology=topology)
        pdb_object = atomset.PDB()
        pdb_object.initialise(snapshots[snap],
                              resname=ligand_resname,
                              topology=topology_contents)
        pdb_object.writePDB(
            str(
                os.path.join(str(clustersCentersFolder),
                             "cluster_%d.pdb" % clusterNum)))

    distances = [[nC, centersInfo[nC]['minDist']] for nC in range(numClusters)]
    np.savetxt(
        os.path.join(clustersCentersFolder,
                     "clusterDistances_%dcl_%dTICs.dat" %
                     (numClusters, nTICs)), distances)
    utilities.write_PDB_clusters(
        COM_list,
        os.path.join(clustersCentersFolder,
                     "clustersCenters_%dcl_%dTICs.pdb" % (numClusters, nTICs)))
コード例 #20
0
ファイル: clusterEnergySASA.py プロジェクト: cescgina/PyTools
def main(n_clusters,
         output_folder,
         SASAColumn,
         norm_energy,
         num_bins,
         percentile,
         plots,
         atom_Ids,
         folder_name,
         traj_basename,
         cluster_energy,
         topology=None):
    energyColumn = 3

    if output_folder is not None:
        outputFolder = os.path.join(output_folder, "")
        if not os.path.exists(outputFolder):
            os.makedirs(outputFolder)
    else:
        outputFolder = ""

    extractCoords.main(folder_name,
                       lig_resname=ligand_resname,
                       non_Repeat=True,
                       atom_Ids=atom_Ids)

    epochFolders = utilities.get_epoch_folders(folder_name)
    points = []
    for epoch in epochFolders:
        report_files = glob.glob(os.path.join(epoch, "*report*"))
        report_files.sort(key=lambda x: int(x[x.rfind("_") + 1:]))
        for report_name in report_files:
            traj_num = int(report_name[report_name.rfind("_") + 1:])
            coordinates = np.loadtxt(
                os.path.join(
                    folder_name, "%s/extractedCoordinates/coord_%d.dat" %
                    (epoch, traj_num)))
            report = np.loadtxt(report_name)
            if len(report.shape) < 2:
                points.append([
                    report[energyColumn], report[SASAColumn],
                    int(epoch), traj_num, 0
                ] + coordinates[1:].tolist())
            else:
                epoch_line = np.array([int(epoch)] * report.shape[0])
                traj_line = np.array([traj_num] * report.shape[0])
                snapshot_line = np.array(range(report.shape[0]))
                points.extend(
                    np.hstack(
                        (report[:, (energyColumn, SASAColumn)],
                         epoch_line[:, np.newaxis], traj_line[:, np.newaxis],
                         snapshot_line[:, np.newaxis], coordinates[:, 1:])))
    points = np.array(points)
    points = points[points[:, 1].argsort()]
    minSASA = points[0, 1]
    maxSASA = points[-1, 1]
    left_bins = np.linspace(minSASA, maxSASA, num=num_bins, endpoint=False)
    indices = np.searchsorted(points[:, 1], left_bins)
    thresholds = np.array([
        np.percentile(points[i:j, 0], percentile)
        for i, j in zip(indices[:-1], indices[1:])
    ])

    new_points = []
    occupation = []
    for ij, (i, j) in enumerate(zip(indices[:-1], indices[1:])):
        found = np.where(points[i:j, 0] < thresholds[ij])[0]
        occupation.append(len(found))
        if len(found) == 1:
            new_points.append(points[found + i])
        elif len(found) > 1:
            new_points.extend(points[found + i])

    points = np.array(new_points)
    if norm_energy:
        energyMin = points.min(axis=0)[0]
        points[:, 0] -= energyMin
        energyMax = points.max(axis=0)[0]
        points[:, 0] /= energyMax

    if cluster_energy:
        print("Clustering using energy and SASA")
        kmeans = KMeans(n_clusters=n_clusters).fit(points[:, :2])
        title = "clusters_%d_energy_SASA.pdb"
    else:
        print("Clustering using ligand coordinates")
        kmeans = KMeans(n_clusters=n_clusters).fit(points[:, 5:8])
        title = "clusters_%d_energy_SASA_coords.pdb"
    centers_energy = []
    centers_coords = []
    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology = None
    for i, center in enumerate(kmeans.cluster_centers_):
        if cluster_energy:
            dist = np.linalg.norm((points[:, :2] - center), axis=1)
        else:
            dist = np.linalg.norm((points[:, 5:8] - center), axis=1)
        epoch, traj, snapshot = points[dist.argmin(), 2:5]
        centers_energy.append(points[dist.argmin(), :2])
        centers_coords.append(points[dist.argmin(), 5:8])
        traj_file = glob.glob("%d/%s_%d*" % (epoch, traj_basename, traj))[0]
        conf = utilities.getSnapshots(traj_file,
                                      topology=topology)[int(snapshot)]
        if isinstance(conf, basestring):
            with open(os.path.join(outputFolder, "initial_%d.pdb" % i),
                      "w") as fw:
                fw.write(conf)
        else:
            utilities.write_mdtraj_object_PDB(
                conf, os.path.join(outputFolder, "initial_%d.pdb" % i),
                topology_contents)
    centers_energy = np.array(centers_energy)
    centers_coords = np.array(centers_coords)
    writePDB(centers_coords, os.path.join(outputFolder, title % n_clusters))
    if plots:
        plt.scatter(points[:, 1], points[:, 0], c=kmeans.labels_, alpha=0.5)
        plt.scatter(centers_energy[:, 1],
                    centers_energy[:, 0],
                    c=list(range(n_clusters)),
                    marker='x',
                    s=56,
                    zorder=1)
        plt.xlabel("SASA")
        if norm_energy:
            plt.ylabel("Energy (normalized)")
            plt.savefig(
                os.path.join(outputFolder, "clusters_energy_normalized.png"))
        else:
            plt.ylabel("Energy (kcal/mol)")
            plt.savefig(
                os.path.join(outputFolder, "clusters_no_normalized.png"))
        plt.show()
コード例 #21
0
    return args.clusteringObject, args.resname, args.metrics, args.population, args.contacts, args.i, args.top


if __name__ == "__main__":
    pklObject_filename, lig_resname, metricsFlag, population_flag, contacts_flag, input_file, top = parseArguments(
    )

    metricPlot_filename = ""  # "results/contactClusters.png"
    populationPlot_filename = ""  # "results/contactClusterspop.png"
    contactsPlot_filename = ""  # "results/contactClustersContacts.png"
    title_metric = "Metrics Contacts"
    title_population = "Population Contacts"
    title_contacts = "Number of contacts Contacts"
    topology_contents = None
    if top is not None:
        topology_contents = utilities.getTopologyFile(top)

    plotClusteringData(pklObject_filename,
                       lig_resname,
                       title_metric,
                       title_population,
                       title_contacts,
                       metricPlot_filename,
                       populationPlot_filename,
                       contactsPlot_filename,
                       metricsFlag,
                       population_flag,
                       contacts_flag,
                       input_file,
                       topology=topology_contents)
    plt.show()
コード例 #22
0
    for traj in trajs:
        snapshots = utilities.getSnapshots(traj, topology=topology)
        for snapshot in snapshots:
            PDB = atomset.PDB()
            PDB.initialise(snapshot, type="PROTEIN", topology=topology_content)
            snapshotsTot.append(PDB)

    return avgStruct, snapshotsTot


if __name__ == "__main__":
    trajs, ref, nResidues, top = parseArguments()
    if top is None:
        top_content = None
    else:
        top_content = utilities.getTopologyFile(top)
    if ref is None:
        avgPDB, totPDBs = extractAvgPDB(trajs, top, top_content)
    else:
        avgPDB, totPDBs = mapReference(ref, trajs, top, top_content)
    RMSF = {atom: 0.0 for atom in avgPDB}
    residueMapping = {}
    # TODO: Handle multiple chains and insertion residues in PDB
    for PDBobj in totPDBs:
        for atomID, atom in PDBobj.atoms.items():
            RMSF[atomID] += np.sum((atom.getAtomCoords() - avgPDB[atomID])**2)
    for atomID, atom in PDBobj.atoms.items():
        if atom.resnum not in residueMapping:
            residueMapping[atom.resnum] = {atomID}
        else:
            residueMapping[atom.resnum].add(atomID)