def computeEachAtomsUnnormalisedAutocorrelation(trajectories, avgR): """ Computes the autocorrelation with the formula: C(k) = 1/[(n-k)] \sum_{t=1}^{n-k} (Xt - mu)(Xt+k - mu) To normalise it wihitn [-1:1] c(k) = C(k) / var When the true mean \mu and variance \sigma^2 are known, this estimate is unbiased. """ rirj = [] rirjMeasures = [] for i, traj in enumerate(trajectories): #trajectory = prody.parsePDB(traj) trajectory = prody.parsePDB(traj, subset='calpha') coordinates = trajectory.getCoordsets() #superpose ensembleTrajectory = prody.PDBEnsemble("Complex") ensembleTrajectory.setAtoms(trajectory) ensembleTrajectory.addCoordset(coordinates[INITIAL_FRAME:]) ensembleTrajectory.setCoords(coordinates[0]) #reference ensembleTrajectory.superpose() #ensembleTrajectory = trajectory srirj, sMeasures = computeEachAtomsUnnormalisedAutocorrelationForASingleTrajectory( ensembleTrajectory.getCoordsets(), avgR) rirj.append(srirj) rirjMeasures.append(sMeasures) rirj = sumOverTrajectories(rirj) rirjMeasures = sumOverTrajectories(rirjMeasures) return rirj / rirjMeasures
def calc_average(trajectory, cutoff): output = prody.AtomGroup('Cartesian average coordinates') output_coords = trajectory.getCoords() output.setCoords(trajectory.getCoords()) output.setNames(trajectory.getNames()) output.setResnums(trajectory.getResnums()) output.setResnames(trajectory.getResnames()) ensemble = prody.PDBEnsemble(trajectory) ensemble.iterpose() cutoff = numpy.mean(ensemble.getRMSDs()) * cutoff print 'Using cutoff of {}'.format(cutoff) input_coords = ensemble.getCoordsets() n_atoms = output_coords.shape[0] for i in range(n_atoms): print 'Computing residue {} of {}:'.format(i + 1, n_atoms) average = get_network_average(input_coords[:, i, :], cutoff) output_coords[i, :] = average output.setCoords(output_coords) return output
def computeEachAtomAllTrajectoriesMean(trajectories): """ Computes the mean of each atom's position in all the trajectories """ ri = [] riMeasures = [] for i, traj in enumerate(trajectories): #trajectory = prody.parsePDB(traj) trajectory = prody.parsePDB(traj, subset='calpha') coordinates = trajectory.getCoordsets() ensembleTrajectory = prody.PDBEnsemble("Complex") ensembleTrajectory.setAtoms(trajectory) ensembleTrajectory.addCoordset(coordinates[INITIAL_FRAME:]) ensembleTrajectory.setCoords(coordinates[0]) #reference ensembleTrajectory.superpose() #ensembleTrajectory = trajectory sri, sriMeasures = addTrajectoryCoordinates( ensembleTrajectory.getCoordsets(), trajectory.numAtoms()) #sri, sriMeasures = addTrajectoryCoordinates(trajectory.getCoordsets(), trajectory.numAtoms()) ri.append(sri) riMeasures.append(sriMeasures) return average(ri, riMeasures)
def calc_average(trajectory): output = prody.AtomGroup('Cartesian average coordinates') output.setCoords(trajectory.getCoords()) output.setNames(trajectory.getNames()) output.setResnums(trajectory.getResnums()) output.setResnames(trajectory.getResnames()) ensemble = prody.PDBEnsemble(trajectory) ensemble.iterpose() coords = ensemble.getCoordsets() average_coords = numpy.mean(coords, axis=0) output.setCoords(average_coords) return output
def get_closest_frame(trajectory, average_structure): output = prody.AtomGroup('Cartesian average coordinates') output.setCoords(trajectory.getCoords()) output.setNames(trajectory.getNames()) output.setResnums(trajectory.getResnums()) output.setResnames(trajectory.getResnames()) ensemble = prody.PDBEnsemble(trajectory) ensemble.setCoords(average_structure) ensemble.superpose() rmsds = ensemble.getRMSDs() min_index = numpy.argmin(rmsds) output.setCoords(ensemble.getCoordsets(min_index)) return output
def setSelections(self, ensemble): ''' put selections also into key arguments ''' custom_selection = False if custom_selection == False: self.selection_ref_structure = self.ref_chain return ensemble else: self.selection2="name CA" structure = ensemble.getAtoms().select(self.selection2) sel_string = structure.getSelstr() #selec = structure.getSelstr().split() selec = structure.getIndices() if self.debug: print("***DEBUG***", selec) print("***DEBUG***", len(selec)) #create new ensemble containing only the selected atoms ensemble2 = prody.PDBEnsemble(ensemble.getTitle()) ensemble2.setAtoms(structure) ensemble2.setCoords(structure.getCoords()) self.selection_ref_structure = structure #ensemble2.addCoordset(structure) for coords in ensemble.iterCoordsets(): new_coords = np.zeros((ensemble2.numSelected(), 3)) count = 0 for coord in range(len(coords)): if coord in selec: new_coords[count] = coords[coord] #print(count, new_coords[count] count+=1 ensemble2.addCoordset(new_coords) ensemble2.getConformation(0).setLabel(ensemble2.getTitle()) if self.debug: print("***DEBUG*** Ensemble before selections: ", repr(ensemble)) print("***DEBUG*** Ensemble after selections: ", repr(ensemble2)) return ensemble2
def get_coords_and_superimpose_with_prody(trajectories, skip, max_frames, iterpose=True): all_coordsets = [] for traj_path in trajectories: trajectory = prody.parsePDB(traj_path, subset='calpha') coordinates = trajectory.getCoordsets() ensembleTrajectory = prody.PDBEnsemble("Complex") ensembleTrajectory.setAtoms(trajectory) ensembleTrajectory.addCoordset( coordinates[skip:min(trajectory.numCoordsets(), max_frames + skip)]) ensembleTrajectory.setCoords(coordinates[0]) #reference if iterpose: print "\t- Using iterposition on trajectory" ensembleTrajectory.iterpose() else: ensembleTrajectory.superpose() all_coordsets.append(ensembleTrajectory.getCoordsets()) return all_coordsets
import prody import numpy import sys import os.path pdb_data = prody.parsePDB(sys.argv[1]) pdb_name = os.path.basename(sys.argv[1]) pdb_trajectory = prody.PDBEnsemble("aligned_CA") prot = pdb_data.select("name CA not resname CA") pdb_trajectory.setCoords(pdb_data.getCoordsets()[0]) pdb_trajectory.addCoordset(pdb_data.getCoordsets()) pdb_trajectory.setAtoms(prot) pdb_trajectory.superpose() prody.writePDB(pdb_name + ".aligned_CA.pdb", pdb_trajectory) with file(pdb_name + "aligned_CA.coords", 'w') as outfile: outfile.write("%d %d %d\n" % pdb_trajectory.getCoordsets().shape) for coordset in pdb_trajectory.getCoordsets(): numpy.savetxt(outfile, coordset) lig = pdb_data.select("resname BEN not name H1 H2 H3 H4 H5 H6 H7 HN1 HN2") pdb_trajectory.setAtoms(lig) rmsds = pdb_trajectory.getRMSDs() prody.writePDB(pdb_name + ".aligned_BEN.pdb", pdb_trajectory) with file(pdb_name + ".aligned_BEN.coords", 'w') as outfile: outfile.write("%d %d %d\n" % pdb_trajectory.getCoordsets().shape) for coordset in pdb_trajectory.getCoordsets(): numpy.savetxt(outfile, coordset)
def createEnsemble(self, pdbs, logger): ''' Create a prody ensemble based on getPDBs return Take into account, that system can be prepared or not and take gaps and multiple chains into account take single files and trajectory file into account pdbs: list-array with pdb-filenames ToDo: - set reference depending on longest chain with no gaps - check for duplicate chains and only select this one ''' logger.info("Create Ensemble") ref_chids = [] #set the reference structure, if not chosen by user, the first Frame or PDB is taken #remove reference Frame/PDB from list if self.ref == "none": self.ref = pdbs[0] pdbs.pop(0) else: if self.ref in pdbs: if self.debug: logger.info("***DEBUG*** Found Ref:", self.ref) logger.info("***DEBUG*** Index Ref", pdbs.index(self.ref)) logger.info("Removing reference from pdblist") pdbs.pop(pdbs.index(self.ref)) if self.debug: logger.info("***DEBUG*** Reference is",self.ref) #open reference file and check for HID, HIE; if found, replace by HIS try: pdbinfile = open(self.ref) except IOError as e: env.logger.info(self.ref) env.logger.info("I/O error({0}): {1}".format(e.errno, e.strerror)) exit() pdbfiledata = pdbinfile.read() for line in pdbinfile: if line[17:20] == "HID" or line[17:20] == "HIE": env.logger.info("HID or HIE found in pdb-structure. The names will be changed to HIS in the input file") break pdbinfile.close() pdbfiledata = pdbfiledata.replace("HID","HIS") pdbfiledata = pdbfiledata.replace("HIE","HIS") logger.info("Warning!!! Rewriting reference file {}".format(self.ref)) pdbfile = open(self.ref,'w') pdbfile.write(pdbfiledata) pdbfile.close() #set reference and create first structure if self.selection == "all": ref_structure = prody.parsePDB(self.ref) else: ref_structure = prody.parsePDB(self.ref, subset=self.selection) reference_hierview = ref_structure.getHierView() logger.info(f"Found {reference_hierview.numChains()} Chain(s) in {reference_hierview._atoms.getTitle()}") ensemble_ref_title = reference_hierview._atoms.getTitle() logger.info(reference_hierview[0]) ''' at the moment all chains are taken --> check for duplicate chains and only take on of those ''' for chain in reference_hierview: ref_chids.append(chain.getChid()) if self.debug: env.logger.info(f"***DEBUG*** {ref_chids}") reference_chains = [reference_hierview[chid] for chid in ref_chids] if self.debug: env.logger.info(f"***DEBUG*** {reference_chains}") ref_chain = reference_chains[0] for i in range (1, len(reference_chains), 1): ref_chain = ref_chain + reference_chains[i] if self.debug: env.logger.info("***DEBUG***", ref_chain) # save globally self.ref_chain = ref_chain #Create Ensemble of structures ensemble = prody.PDBEnsemble(ensemble_ref_title) ensemble.setAtoms(ref_chain) ensemble.setCoords(ref_chain) #Set ref_structure as first coordinate set ensemble.addCoordset(ref_structure) if self.debug: env.logger.info("***DEBUG***", ref_chain.getResnames()) env.logger.info("***DEBUG***", ref_chain.getResnums()) unmapped = [] # map remaining structures to reference chain and add to ensemble if mapped for pdb in pdbs: if self.debug: env.logger.info("***DEBUG*** Processing ", pdb) if self.selection == "all": #structure = prody.parsePDB(self.ref) structure = prody.parsePDB(pdb) else: structure = prody.parsePDB(pdb, subset=self.selection) atommaps = [] for reference_chain in reference_chains: # Map current PDB file to the reference chain mappings = prody.mapOntoChain(structure, reference_chain, seqid=90, coverage=50, subset=self.selection) if len(mappings) == 0: env.logger.info('Failed to map', pdb) break atommaps.append(mappings[0][0]) # Make sure all chains are mapped if len(atommaps) != len(reference_chains): unmapped.append(pdb) continue #env.logger.info(atommaps[0] atommap = atommaps[0] for i in range (1, len(reference_chains), 1): atommap = atommap + atommaps[i] # Add the atommap (mapped coordinates) to the ensemble # Note that some structures do not completely map (missing residues) # so we pass weights (1 for mapped atoms, 0 for unmapped atoms) ensemble.addCoordset(atommap, weights=atommap.getFlags('mapped')) if self.debug: env.logger.info("***DEBUG*** Number of conformations:", ensemble.numConfs()) env.logger.info("***DEBUG*** ", ensemble) env.logger.info("***DEBUG*** Unmapped structures:", unmapped) ensemble.iterpose() #env.logger.info(ensemble.getTitle() return ensemble
import prody import numpy pdb_data = prody.parsePDB( "../Models/prot_stretching/stretching_trajectory_offset_ligand.pdb") pdb_trajectory = prody.PDBEnsemble("iterposed_CA") # Write the initial coordsets prot = pdb_data.select("name CA") prody.writePDB("stretching_trajectory_offset_ligand.iterposed_all.pdb", prot) with file("stretching_trajectory_offset_ligand.initial_CA.coords", 'w') as outfile: outfile.write("%d %d %d\n" % prot.getCoordsets().shape) for coordset in prot.getCoordsets(): numpy.savetxt(outfile, coordset) # We only want to work with CAs. If we use the 'all coordinates+atom selection" trick # Prody will still use all coordinates for iterative superposition pdb_trajectory.setCoords(prot.getCoordsets()[0]) pdb_trajectory.addCoordset(prot.getCoordsets()) pdb_trajectory.setAtoms(prot) pdb_trajectory.iterpose() prody.writePDB("stretching_trajectory_offset_ligand.iterposed_CA.pdb", pdb_trajectory) with file("stretching_trajectory_offset_ligand.iterposed_CA.coords", 'w') as outfile: outfile.write("%d %d %d\n" % pdb_trajectory.getCoordsets().shape) for coordset in pdb_trajectory.getCoordsets(): numpy.savetxt(outfile, coordset)
''' import prody import time import numpy from pyRMSD.RMSDCalculator import RMSDCalculator if __name__ == '__main__': TRAJECTORY_FILE = "data/ubi_amber_100.pdb" # Coordinates loading pdb_data = prody.parsePDB(TRAJECTORY_FILE) num_atoms = pdb_data.numAtoms() total_frames = pdb_data.numCoordsets() trajectory = prody.PDBEnsemble("UBI") trajectory.setAtoms(pdb_data) trajectory.addCoordset(pdb_data.getCoordsets()) coordinates = numpy.copy(pdb_data.getCoordsets()) # RMSD matrix computation start_t = time.time() prody_rmsd_values = [] for i in range(0, total_frames): trajectory.setCoords(pdb_data.getCoordsets()[i]) trajectory.superpose() prody_rmsd_values.extend(trajectory.getRMSDs()[1:]) trajectory.delCoordset(0) stop_t = time.time() print "Prody's computation time:", stop_t - start_t
subset='ca', chain=args.ref_chain_id) if reference_structure is None: raise Exception('Error loading reference structure') else: raise Exception('Reference structure not found') reference_hierview = reference_structure.getHierView() reference_chain = reference_hierview[args.ref_chain_id] # Generate an ensemble of structures from the pdb files ens_FN = 'prody.ens.npz' mappings_FN = 'prody.mappings.pkl' if os.path.isfile(ens_FN) and os.path.isfile(mappings_FN): ensemble = prody.loadEnsemble(ens_FN) mappings = pickle.load(open(mappings_FN,'r')) else: ensemble = prody.PDBEnsemble() # Set ensemble atoms ensemble.setAtoms(reference_chain) # Set reference coordinates ensemble.setCoords(reference_chain.getCoords()) mappings = [] new_pdb_FNs = [] for (pdb_id,chain_id) in chain_hits: pdb_FN = 'chains_original/%s%s.pdb'%(pdb_id,chain_id) if pdb_id in exclude: continue structure = prody.parsePDB(pdb_FN, subset='ca', chain=chain_id) if structure is None: prody.plog('Failed to parse ' + pdb_FN) continue
import prody import numpy import sys pdb_data = prody.parsePDB(sys.argv[1]) offsets = numpy.array([[[12, 40, 2]]*pdb_data.numAtoms(), [[5, -5, 8]]*pdb_data.numAtoms(), [[-13, -14, 7]]*pdb_data.numAtoms(), [[13, 14, 23]]*pdb_data.numAtoms(), #[[-8,-4,-2]]*3239, [[6,89,2]]*pdb_data.numAtoms()]) pdb_trajectory = prody.PDBEnsemble(sys.argv[1]) pdb_trajectory.setAtoms(pdb_data) pdb_trajectory.addCoordset(pdb_data.getCoordsets()+offsets[0:len(pdb_data.getCoordsets())]) pdb_trajectory.setCoords(pdb_data.getCoordsets()[0]) prody.writePDB(sys.argv[2], pdb_trajectory)