def convert_mdtraj_to_oemol(traj: md.Trajectory) -> oechem.OEMol: """ This method converts an mdtraj Trajectory to an OEMol via saving as a PDBfile and reading in with OpenEye. Although this seems hacky, it seems less error-prone than trying to manually construct the OEMol. Parameters ---------- mdtraj: md.Trajectory The trajectory to turn into an OEMol Returns ------- mol : oechem.OEMol The trajectory represented as an OEMol """ #create a temporary file with a PDB suffix and save with MDTraj pdb_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdb") traj.save(pdb_file.name) pdb_file.close() #Now use the openeye oemolistream to read in this file as an OEMol: ifs = oechem.oemolistream() ifs.open(pdb_file.name) ifs.SetFormat(oechem.OEFormat_PDB) mol = oechem.OEMol() oechem.OEReadMolecule(ifs, mol) #close the stream and delete the temporary pdb file ifs.close() os.unlink(pdb_file.name) return mol
def write_equilibrium_trajectory(equilibrium_result: EquilibriumResult, trajectory: md.Trajectory, trajectory_filename: str) -> float: """ Write the results of an equilibrium simulation to disk. This task will append the results to the given filename. Parameters ---------- equilibrium_result : EquilibriumResult namedtuple the result of an equilibrium calculation trajectory : md.Trajectory the trajectory resulting from an equilibrium simulation trajectory_filename : str the name of the trajectory file to which we should append Returns ------- reduced_potential_final_frame : float the reduced potential of the final frame """ if not os.path.exists(trajectory_filename): trajectory.save_hdf5(trajectory_filename) else: written_traj = md.load_hdf5(trajectory_filename) concatenated_traj = written_traj.join(trajectory) concatenated_traj.save_hdf5(trajectory_filename) return equilibrium_result.reduced_potential
def mdtraj_to_oemol(snapshot: md.Trajectory): """ Create an OEMol from an MDTraj file by writing and reading NOTE: This uses terrible heuristics Parameters ---------- snapshot : mdtraj.Trajectory MDTraj Trajectory with a single snapshot Returns ------- oemol : openeye.oechem.OEMol The OEMol """ from openeye import oechem with tempfile.TemporaryDirectory() as tmpdir: filename = os.path.join(tmpdir, "tmp.pdb") # Write the PDB file snapshot.save(filename) # Read it with OpenEye with oechem.oemolistream(filename) as ifs: for mol in ifs.GetOEGraphMols(): return mol
def write_nonequilibrium_trajectory( nonequilibrium_result: NonequilibriumResult, nonequilibrium_trajectory: md.Trajectory, trajectory_filename: str) -> float: """ Write the results of a nonequilibrium switching trajectory to a file. The trajectory is written to an mdtraj hdf5 file, whereas the cumulative work is written to a numpy file. Parameters ---------- nonequilibrium_result : NonequilibriumResult namedtuple The result of a nonequilibrium switching calculation nonequilibrium_trajectory : md.Trajectory The trajectory resulting from a nonequilibrium simulation trajectory_filename : str The full filepath for where to store the trajectory Returns ------- final_work : float The final value of the work trajectory """ if nonequilibrium_trajectory is not None: nonequilibrium_trajectory.save_hdf5(trajectory_filename) return nonequilibrium_result.cumulative_work[-1]
def convert_mdtraj_to_oemol(traj: md.Trajectory) -> oechem.OEMol: """ This method converts an mdtraj Trajectory to an OEMol via saving as a PDBfile and reading in with OpenEye. Although this seems hacky, it seems less error-prone than trying to manually construct the OEMol. Parameters ---------- mdtraj: md.Trajectory The trajectory to turn into an OEMol Returns ------- mol : oechem.OEMol The trajectory represented as an OEMol """ # create a temporary file with a PDB suffix and save with MDTraj pdb_file = tempfile.NamedTemporaryFile(delete=False, suffix=".pdb") traj.save(pdb_file.name) pdb_file.close() # Now use the openeye oemolistream to read in this file as an OEMol: ifs = oechem.oemolistream() ifs.open(pdb_file.name) ifs.SetFormat(oechem.OEFormat_PDB) mol = oechem.OEMol() oechem.OEReadMolecule(ifs, mol) # close the stream and delete the temporary pdb file ifs.close() os.unlink(pdb_file.name) return mol
def write_equilibrium_trajectory(trajectory: md.Trajectory, trajectory_filename: str) -> float: """ Write the results of an equilibrium simulation to disk. This task will append the results to the given filename. Arguments ---------- trajectory : md.Trajectory the trajectory resulting from an equilibrium simulation trajectory_filename : str the name of the trajectory file to which we should append Returns ------- True """ if not os.path.exists(trajectory_filename): trajectory.save_hdf5(trajectory_filename) _logger.debug( f"{trajectory_filename} does not exist; instantiating and writing to." ) else: _logger.debug(f"{trajectory_filename} exists; appending.") written_traj = md.load_hdf5(trajectory_filename) concatenated_traj = written_traj.join(trajectory) concatenated_traj.save_hdf5(trajectory_filename) return True
def test_lprmsd_0(): # remap a permutation of all the atoms with no rotation ref = random.randn(1, 10, 3).astype(np.float32) mapping = random.permutation(10) print('true mapping', mapping) new = ref[:, mapping] value = lprmsd(Trajectory(xyz=new, topology=None), Trajectory(xyz=ref, topology=None)) eq(value, np.array([0.0], dtype=np.float32), decimal=3)
def test_lprmsd_1(): # resolve a random rotation with no permutation ref = random.randn(1, 50, 3).astype(np.float32) mapping = np.arange(50) rot = rotation_matrix_from_quaternion(uniform_quaternion()) new = ref[:, mapping].dot(rot) value = lprmsd(Trajectory(xyz=new, topology=None), Trajectory(xyz=ref, topology=None), permute_groups=[[]]) assert value[0] < 1e-2
def test_lprmsd_2(): # resolve a random rotation with some permutation ref = random.randn(1, 50, 3).astype(np.float32) # first half of the atoms can permute, last 10 are fixed permutation mapping = np.concatenate((random.permutation(10), 10 + np.arange(40))) rot = rotation_matrix_from_quaternion(uniform_quaternion()) new = ref[:, mapping].dot(rot) value = lprmsd(Trajectory(xyz=new, topology=None), Trajectory(xyz=ref, topology=None), permute_groups=[np.arange(10)]) assert value[0] < 1e-2
def test_unitcell(): # make sure that bogus unitcell vecotrs are not saved top = md.load(get_fn('native.pdb')).restrict_atoms(range(5)).topology t = Trajectory(xyz=np.random.randn(100, 5, 3), topology=top) # xtc dcd binpos trr h5 pdb nc lh5 for fn in [temp1, temp2, temp3, temp4, temp5, temp6, temp6, temp8]: t.save(fn) f = lambda: eq(md.load(fn, top=top).unitcell_vectors, None) f.description = 'unitcell preservation in %s' % os.path.splitext(fn)[1] yield f
def setup(self): self.q_values = np.array([1.0, 2.0]) self.num_phi = 360 self.traj = Trajectory.load(ref_file('ala2.pdb')) self.num_shots = 2 # generate the tables file on disk, then re-open it intensities = np.abs( np.random.randn(self.num_shots, len(self.q_values), self.num_phi) / 100.0 + \ np.cos( np.linspace(0.0, 4.0*np.pi, self.num_phi) ) ) if os.path.exists('tmp_tables.h5'): os.remove('tmp_tables.h5') hdf = tables.File('tmp_tables.h5', 'w') a = tables.Atom.from_dtype(np.dtype(np.float64)) node = hdf.create_earray(where='/', name='data', shape=(0, len(self.q_values), self.num_phi), atom=a, filters=io.COMPRESSION) node.append(intensities) hdf.close() self.tables_file = tables.File('tmp_tables.h5', 'r+') pi = self.tables_file.root.data pm = np.random.binomial(1, 0.9, size=(len(self.q_values), self.num_phi)) k = 1.0 self.rings = xray.Rings(self.q_values, pi, k, pm) return
def test_select_positions(self, universe: md.Trajectory, n_frames: int) -> None: """Test get_positions function using atom selection. GIVEN topology and trajectory filenames and an atom selection WHEN the get_positions function is called THEN return a array of positions with shape (n_frames, n_atoms, 3) Parameters ---------- universe : Trajectory Molecular dynamics trajectory n_frames : int Number of frames """ mask = "protein and name CA" atoms = universe.topology.select(mask) n_atoms = atoms.size array = utils.get_positions( TOPWW, [ TRJWW, ], mask=mask, ) assert array.shape == (n_frames, n_atoms, 3) testing.assert_allclose(array[0], universe.atom_slice(atoms).xyz[0] * 10) assert isinstance(array, np.ndarray)
def traj_frame_to_sampler_state(traj: md.Trajectory, frame_number: int, box_vectors): xyz = traj.xyz[frame_number, :, :] box_vectors = traj.openmm_boxes(frame_number) sampler_state = states.SamplerState( unit.Quantity(xyz, unit=unit.nanometers)) return sampler_state
def test_select_average(self, universe: md.Trajectory) -> None: """Test get_average_structure function using atom selection. GIVEN topology and trajectory filenames and an atom selection WHEN the get_average_structure function is called THEN the average coordinates are computed Parameters ---------- universe : Trajectory Molecular dynamics trajectory """ mask = "protein and name CA" atoms = universe.topology.select(mask) n_atoms = atoms.size average = utils.get_average_structure( TOPWW, [ TRJWW, ], mask=mask, ) assert average.xyz.shape == (1, n_atoms, 3) universe_average = universe.atom_slice(atoms).xyz.mean(axis=0) testing.assert_allclose(average.xyz[0], universe_average)
def setup(self): self.q_values = np.array([1.0, 2.0]) self.num_phi = 360 self.traj = Trajectory.load(ref_file('ala2.pdb')) self.num_shots = 4 self.rings = xray.Rings.simulate(self.traj, 1, self.q_values, self.num_phi, self.num_shots) # 1 molec
def read_as_traj(self, iteration=None, segment=None, atom_indices=None): _check_mode(self.mode, ('r', )) pnode = self._get_node(where='/', name='pointer') iter_labels = pnode[:, 0] seg_labels = pnode[:, 1] if iteration is None and segment is None: frame_indices = slice(None) elif isinstance(iteration, (np.integer, int)) and isinstance( segment, (np.integer, int)): frame_torf = np.logical_and(iter_labels == iteration, seg_labels == segment) frame_indices = np.arange(len(iter_labels))[frame_torf] else: raise ValueError( "iteration and segment must be integers and provided at the same time" ) if len(frame_indices) == 0: raise ValueError( f"no frame was selected: iteration={iteration}, segment={segment}, atom_indices={atom_indices}" ) iter_labels = iter_labels[frame_indices] seg_labels = seg_labels[frame_indices] topology = self.topology if atom_indices is not None: topology = topology.subset(atom_indices) data = self.read(frame_indices=frame_indices, atom_indices=atom_indices) if len(data) == 0: return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology) in_units_of(data.coordinates, self.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(data.cell_lengths, self.distance_unit, Trajectory._distance_unit, inplace=True) return WESTTrajectory( data.coordinates, topology=topology, time=data.time, unitcell_lengths=data.cell_lengths, unitcell_angles=data.cell_angles, iter_labels=iter_labels, seg_labels=seg_labels, pcoords=None, )
def setup(self): self.q_values = np.array([1.0, 2.0]) self.num_phi = 360 self.l = 50.0 self.d = xray.Detector.generic(spacing=0.4, l=self.l) self.num_shots = 2 self.i = np.abs( np.random.randn(self.num_shots, self.d.num_pixels) ) self.t = Trajectory.load(ref_file('ala2.pdb')) self.shot = xray.Shotset(self.i, self.d)
def _join_traj_data(traj_data, top_file): top = load_topology_cached(top_file) xyz = np.concatenate(tuple(map(itemgetter(0), traj_data))) traj = Trajectory(xyz, top) if all(t.unitcell_lengths is not None for t in traj_data): unitcell_lengths = np.concatenate(tuple(map(itemgetter(1), traj_data))) traj.unitcell_lengths = unitcell_lengths if all(t.box is not None for t in traj_data): boxes = np.concatenate(tuple(map(itemgetter(-1), traj_data))) traj.unitcell_vectors = boxes if all(t.unitcell_angles is not None for t in traj_data): angles = np.concatenate(tuple(map(itemgetter(2), traj_data))) traj.unitcell_angles = angles return traj
def load_lh5(filename, top=None, stride=None, atom_indices=None, frame=None): """Load an deprecated MSMBuilder2 LH5 trajectory file. Parameters ---------- filename : str filename of AMBER NetCDF file. top : {str, Trajectory, Topology} The NetCDF format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. See Also -------- mdtraj.LH5TrajectoryFile : Low level interface to LH5 files """ from mdtraj import Trajectory atom_indices = cast_indices(atom_indices) with LH5TrajectoryFile(filename) as f: if frame is not None: f.seek(frame) xyz = f.read(n_frames=1, atom_indices=atom_indices) else: xyz = f.read(stride=stride, atom_indices=atom_indices) topology = f.topology in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) if atom_indices is not None: topology = f.topology.subset(atom_indices) time = np.arange(len(xyz)) if frame is not None: time += frame elif stride is not None: time *= stride return Trajectory(xyz=xyz, topology=topology, time=time)
def _construct_traj(self): """ Create an mdtraj.Trajectory from the CG topology and xyz. """ cg_traj = Trajectory(self._cg_xyz, self._cg_top, time=self._aa_traj.time, unitcell_lengths=self._aa_traj.unitcell_lengths, unitcell_angles=self._aa_traj.unitcell_angles) self._cg_traj = cg_traj
def test_sph_harm(): # ----------------------- traj = Trajectory.load(ref_file('pentagon.pdb')) q_magnitudes = [1.6] num_coefficients = 44 num_molecules = 1 num_shots = 20000 num_phi = 2048 # ----------------------- q = q_magnitudes[0] # compute the Kam-theoretic values of the Legendre coefficients C_ell, which # we will call "coeffsh" coeffsh_even = scatter.sph_harm_coefficients(traj, q_magnitudes, num_coefficients=num_coefficients/2) coeffsh_even = np.nan_to_num(coeffsh_even) coeffsh_even /= coeffsh_even[1] coeffsh = np.zeros(num_coefficients) coeffsh[0::2] = coeffsh_even.flatten() # next, preform a simulation of the scattering and empirically compute the # correlation function rings = xray.Rings.simulate(traj, num_molecules, q_magnitudes, num_phi, num_shots) c = rings.correlate_intra(q, q, mean_only=True) # it seems best to compare the solutions in the expanded basis c_sh = np.polynomial.legendre.legval(rings.cospsi(q, q), coeffsh.flatten()) c = c - c.mean() c_sh = c_sh - c_sh.mean() # plt.figure() # plt.plot(c_sh / c_sh[0]) # plt.plot(c / c[0]) # plt.show() # if these are more than 10% different, fail the test error = (np.sum(np.abs( (c_sh / c_sh[0]) - (c / c[0]) )) / float(num_phi)) assert error < 0.1, 'simulation and analytical computation >10%% different (%f %%)' % error return
def write_nonequilibrium_trajectory(nonequilibrium_result: NonequilibriumResult, nonequilibrium_trajectory: md.Trajectory, trajectory_filename: str) -> float: """ Write the results of a nonequilibrium switching trajectory to a file. The trajectory is written to an mdtraj hdf5 file, whereas the cumulative work is written to a numpy file. Parameters ---------- nonequilibrium_result : NonequilibriumResult namedtuple The result of a nonequilibrium switching calculation nonequilibrium_trajectory : md.Trajectory The trajectory resulting from a nonequilibrium simulation trajectory_filename : str The full filepath for where to store the trajectory Returns ------- final_work : float The final value of the work trajectory """ if nonequilibrium_trajectory is not None: nonequilibrium_trajectory.save_hdf5(trajectory_filename) return nonequilibrium_result.cumulative_work[-1]
def test_py_cpu_smoke(self): traj = Trajectory.load(ref_file('ala2.pdb')) num_molecules = 1 detector = xray.Detector.generic() detector.beam.photons_scattered_per_shot = 1e3 I = scatter.simulate_shot(traj, num_molecules, detector, finite_photon=True) # simple statistical sanity check assert np.abs(I.sum() - detector.beam.photons_scattered_per_shot) < \ np.sqrt(detector.beam.photons_scattered_per_shot)*6.0
def test_python_call(self): """ Test the GPU scattering simulation interface (scatter.simulate) """ if not GPU: raise SkipTest print "testing python wrapper fxn..." traj = Trajectory.load(ref_file('ala2.pdb')) num_molecules = 512 detector = xray.Detector.generic() py_I = scatter.simulate_shot(traj, num_molecules, detector) assert not np.all( py_I == 0.0 ) assert not np.isnan(np.sum( py_I ))
def setup(self): self.q_values = np.array([1.0, 2.0]) self.num_phi = 360 self.l = 50.0 self.d = xray.Detector.generic(spacing=0.4, l=self.l) self.t = Trajectory.load(ref_file('ala2.pdb')) self.num_shots = 2 intensities = np.abs(np.random.randn(self.num_shots, self.d.num_pixels)) io.saveh('tmp_tables.h5', data=intensities) self.tables_file = tables.File('tmp_tables.h5') self.i = self.tables_file.root.data self.shot = xray.Shotset(self.i, self.d) return
def test_no_hydrogens(): traj = Trajectory.load(ref_file('ala2.pdb')) num_molecules = 1 detector = xray.Detector.generic() detector.beam.photons_scattered_per_shot = 1e3 I_noH = scatter.simulate_shot(traj, num_molecules, detector, ignore_hydrogens=True, dont_rotate=True) I_wH = scatter.simulate_shot(traj, num_molecules, detector, ignore_hydrogens=False, dont_rotate=True) assert not np.all(I_noH == I_wH) # compute the differece -- we're not setting random numbers here so just # looking at radially averaged stuff... diff = np.sum(np.abs(I_noH - I_wH) / I_wH) / float(len(I_wH)) print diff assert diff < 1.0, 'ignoring hydrogens makes too big of a difference...'
def test_lprmsd_null(): ref = random.randn(1, 10, 3).astype(np.float32) new = np.copy(ref) value = lprmsd(Trajectory(xyz=new, topology=None), Trajectory(xyz=ref, topology=None)) eq(value, np.array([0.0], dtype=np.float32), decimal=3)
def cg_by_index(trj, atom_indices_list, bead_label_list, chain_list=None, segment_id_list=None, resSeq_list=None, inplace=False, bonds=None, split_shared_atoms=False, mod_weights_list=None, mapping_function="com", charge_tol=1e-5, center_postwrap=False): """Create a coarse grained (CG) trajectory from subsets of atoms by computing centers of mass of selected sets of atoms. Parameters ---------- atom_indices_list : list of array-like, dtype=int, shape=(n_beads,n_atoms) List of indices of atoms to combine into CG sites bead_label_list : list of maximum 4-letter strings to label CG sites chain_list : optional list of chain id's to split resulting beads into separate chains resSeq_list : optional list of residue sequence id's to assign cg residues segment_id_list : optional list of segment id's to assign cg residues inplace : bool, default=False If ``True``, the operation is done inplace, modifying ``trj``. Otherwise, a copy is returned with the sliced atoms, and ``trj`` is not modified. bonds : array-like,dtype=int, shape=(n_bonds,2), default=None If specified, sets these bonds in new topology split_shared_atoms: boolean If specified, check to see if atoms are shared per molecule in beads. If so, equally divide their weight accordingly for each bead. mapping_function: string, default='com': how to map xyz coordinates options: %s center_postwrap: Boolean Whether to wrap the CG system after it is mapped. Assumes that box is centered at 0, and only has effect if periodic information is present. Note - If repeated resSeq values are used, as for a repeated motiff in a CG polymer, those sections most be broken into separate chains or an incorrect topology will result Returns ------- traj : md.Trajectory The return value is either ``trj``, or the new trajectory, depending on the value of ``inplace``. """ % mapping_options.keys() if not len(atom_indices_list) == len(bead_label_list): raise ValueError("Must supply a list of bead labels of the " "same length as a list of selected atom indices") for bead_label in bead_label_list: if not (type(bead_label) is str) or len(bead_label) > 4 or len(bead_label) < 1: raise ValueError("Specified bead label '%s' is not valid, \ must be a string between 1 and 4 characters" % bead_label) bead_label_list = [bead_label.upper() for bead_label in bead_label_list] if mapping_function not in mapping_options: raise ValueError("Must select a mapping function from: %s"\ %mapping_options.keys()) if chain_list is None: chain_list = np.ones(len(atom_indices_list), dtype=int) elif len(chain_list) != len(atom_indices_list): raise ValueError("Supplied chain_list must be of the same length " "as a list of selected atom indices") if segment_id_list is not None and len(segment_id_list) != len( atom_indices_list): raise ValueError("Supplied segment_id_list must be of the same " "length as a list of selected atom indices") if resSeq_list is not None and len(resSeq_list) != len(atom_indices_list): raise ValueError("Supplied resSeq_list must be of the same " "length as a list of selected atom indices") n_beads = len(atom_indices_list) xyz = np.zeros((trj.xyz.shape[0], n_beads, trj.xyz.shape[2]), dtype=trj.xyz.dtype, order='C') forces = np.zeros((trj.xyz.shape[0], n_beads, trj.xyz.shape[2]), dtype=np.double, order='C') columns = ["serial", "name", "element", "resSeq", "resName", "chainID"] #total masse for each cg bead. masses = np.zeros((n_beads), dtype=np.float64) #list of masses for elements in cg bead. masses_i = [] #masses for ii in range(n_beads): #atoms in curent cg bead. atom_indices = atom_indices_list[ii] #first, construct lists of masses in current cg bead. temp_masses = np.array([]) for jj in atom_indices: temp_masses = np.append(temp_masses, trj.top.atom(jj).element.mass) masses_i.append(temp_masses) masses[ii] = masses_i[ii].sum() if hasattr(trj.top.atom(1), 'charge'): #total charge for each cg bead. charges = np.zeros((n_beads), dtype=np.float64) #lists of charges for in current cg bead charges_i = [] #charges for ii in range(n_beads): #atoms in curent cg bead. atom_indices = atom_indices_list[ii] #first, construct lists of masses in current cg bead. temp_charges = np.array([]) for jj in atom_indices: temp_charges = np.append(temp_charges, trj.top.atom(jj).charge) charges_i.append(temp_charges) charges[ii] = charges_i[ii].sum() forcenorm_i = [] if mapping_function == 'cof' or mapping_function == 'center_of_force': for ii in range(n_beads): atom_indices = atom_indices_list[ii] forcenorm_i.append(get_forcenorms(trj, atom_indices)) if mapping_function == 'coc' or mapping_function == 'center_of_charge': for charge in charges: if np.absolute(charge) < charge_tol: raise ValueError("Total charge on site %i is near zero" % ii) topology_labels = [] element_label_dict = {} if (split_shared_atoms): mod_weights_list = gen_unique_overlap_mod_weights(atom_indices_list) has_forces = False try: trj.__dict__['forces'] test_forces = map_forces(trj, (0, )) has_forces = True except TypeError: print("WARNING: Invalid Forces\nNo Map applied to forces") except KeyError: pass except: print("Unknown error, check your forces\nexiting...") raise for i in range(n_beads): atom_indices = atom_indices_list[i] bead_label = bead_label_list[i] xyz_i = xyz[:, i, :] if mapping_function == 'coc' or mapping_function == 'center_of_charge': weights = charges_i[i] elif mapping_function == 'com' or mapping_function == 'center_of_mass': weights = masses_i[i] elif mapping_function == 'cof' or mapping_function == 'center_of_force': weights = forcenorm_i[i] elif mapping_function == 'center': weights = np.ones(len(atom_indices)) if (mod_weights_list is not None): weights[:] = np.multiply(weights, mod_weights_list[i]) compute_center_weighted(xyz_i, trj.xyz, atom_indices, weights, unitcell_lengths=trj.unitcell_lengths, center_postwrap=center_postwrap) if has_forces: forces_i = map_forces(trj, atom_indices) forces[:, i, :] = forces_i if resSeq_list is not None: resSeq = resSeq_list[i] else: resSeq = i + 1 #element_label='%4s'%('B%i'%(resSeq)) if not bead_label in element_label_dict: element_label = '%2s' % ('B%i' % (len(element_label_dict) % 10)) element_label_dict[bead_label] = element_label else: element_label = element_label_dict[bead_label] if element_label.strip().upper( ) not in element.Element._elements_by_symbol: element.Element(1000 + resSeq, element_label, element_label, masses[i], 1.0) topology_labels.append([ i, bead_label, element_label, resSeq, '%3s' % bead_label, chain_list[i] ]) df = pd.DataFrame(topology_labels, columns=columns) topology = Topology.from_dataframe(df, bonds=bonds) if segment_id_list is not None: for beadidx, bead in enumerate(topology.atoms): bead.residue.segment_id = segment_id_list[beadidx] if inplace: if trj._topology is not None: trj._topology = topology trj._xyz = xyz return trj unitcell_lengths = unitcell_angles = None if trj._have_unitcell: unitcell_lengths = trj._unitcell_lengths.copy() unitcell_angles = trj._unitcell_angles.copy() time = trj._time.copy() new_trj = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=unitcell_lengths, unitcell_angles=unitcell_angles) new_trj.forces = forces return new_trj
def load_pdb(filename, stride=None, atom_indices=None, frame=None, no_boxchk=False): """Load a RCSB Protein Data Bank file from disk. Parameters ---------- filename : str Path to the PDB file on disk. The string could be a URL. Valid URL schemes include http and ftp. stride : int, default=None Only read every stride-th model from the file atom_indices : array_like, default=None If not None, then read only a subset of the atoms coordinates from the file. These indices are zero-based (not 1 based, as used by the PDB format). So if you want to load only the first atom in the file, you would supply ``atom_indices = np.array([0])``. frame : int, default=None Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. no_boxchk : bool, default=False By default, a heuristic check based on the particle density will be performed to determine if the unit cell dimensions are absurd. If the particle density is >1000 atoms per nm^3, the unit cell will be discarded. This is done because all PDB files from RCSB contain a CRYST1 record, even if there are no periodic boundaries, and dummy values are filled in instead. This check will filter out those false unit cells and avoid potential errors in geometry calculations. Set this variable to ``True`` in order to skip this heuristic check. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. Examples -------- >>> import mdtraj as md >>> pdb = md.load_pdb('2EQQ.pdb') >>> print(pdb) <mdtraj.Trajectory with 20 frames, 423 atoms at 0x110740a90> See Also -------- mdtraj.PDBTrajectoryFile : Low level interface to PDB files """ from mdtraj import Trajectory if not isinstance(filename, six.string_types): raise TypeError('filename must be of type string for load_pdb. ' 'you supplied %s' % type(filename)) atom_indices = cast_indices(atom_indices) filename = str(filename) with PDBTrajectoryFile(filename) as f: atom_slice = slice(None) if atom_indices is None else atom_indices if frame is not None: coords = f.positions[[frame], atom_slice, :] else: coords = f.positions[::stride, atom_slice, :] assert coords.ndim == 3, 'internal shape error' n_frames = len(coords) topology = f.topology if atom_indices is not None: topology = topology.subset(atom_indices) if f.unitcell_angles is not None and f.unitcell_lengths is not None: unitcell_lengths = np.array([f.unitcell_lengths] * n_frames) unitcell_angles = np.array([f.unitcell_angles] * n_frames) else: unitcell_lengths = None unitcell_angles = None in_units_of(coords, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(unitcell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(len(coords)) if frame is not None: time *= frame elif stride is not None: time *= stride traj = Trajectory(xyz=coords, time=time, topology=topology, unitcell_lengths=unitcell_lengths, unitcell_angles=unitcell_angles) if not no_boxchk and traj.unitcell_lengths is not None: # Only one CRYST1 record is allowed, so only do this check for the first # frame. Some RCSB PDB files do not *really* have a unit cell, but still # have a CRYST1 record with a dummy definition. These boxes are usually # tiny (e.g., 1 A^3), so check that the particle density in the unit # cell is not absurdly high. Standard water density is ~55 M, which # yields a particle density ~100 atoms per cubic nm. It should be safe # to say that no particle density should exceed 10x that. particle_density = traj.top.n_atoms / traj.unitcell_volumes[0] if particle_density > 1000: warnings.warn('Unlikely unit cell vectors detected in PDB file likely ' 'resulting from a dummy CRYST1 record. Discarding unit ' 'cell vectors.') traj._unitcell_lengths = traj._unitcell_angles = None return traj
#!/usr/bin/env python import mdtraj from mdtraj import Trajectory as t import os # combine trajectories trjs = [f for f in os.listdir('.') if 'trj' in f] ts = {} tn = [] for i in trjs: num = i.split('.')[0].split('j')[-1] tn.append(num) ts[i] = t.load('./trj%i.h5' % int(num)) tn.sort() z = ts['trj0.h5'] for i in tn: z = z.join(ts['trj%s.h5' % i]) # trim data to have a frame every 1 ns frames = [] for i in range(len(z)): if i % 10 == 0: frames.append(i) zp = z.slice(frames) # save combined data zp.save_pdb('ns.pdb')
def load_pdb(filename, stride=None, atom_indices=None, frame=None): """Load a RCSB Protein Data Bank file from disk. Parameters ---------- filename : str Path to the PDB file on disk. The string could be a URL. Valid URL schemes include http and ftp. stride : int, default=None Only read every stride-th model from the file atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. These indices are zero-based (not 1 based, as used by the PDB format). So if you want to load only the first atom in the file, you would supply ``atom_indices = np.array([0])``. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. Examples -------- >>> import mdtraj as md >>> pdb = md.load_pdb('2EQQ.pdb') >>> print pdb <mdtraj.Trajectory with 20 frames, 423 atoms at 0x110740a90> See Also -------- mdtraj.PDBTrajectoryFile : Low level interface to PDB files """ from mdtraj import Trajectory if not isinstance(filename, six.string_types): raise TypeError('filename must be of type string for load_pdb. ' 'you supplied %s' % type(filename)) atom_indices = cast_indices(atom_indices) filename = str(filename) with PDBTrajectoryFile(filename) as f: atom_slice = slice(None) if atom_indices is None else atom_indices if frame is not None: coords = f.positions[[frame], atom_slice, :] else: coords = f.positions[::stride, atom_slice, :] assert coords.ndim == 3, 'internal shape error' n_frames = len(coords) topology = f.topology if atom_indices is not None: topology = topology.subset(atom_indices) if f.unitcell_angles is not None and f.unitcell_lengths is not None: unitcell_lengths = np.array([f.unitcell_lengths] * n_frames) unitcell_angles = np.array([f.unitcell_angles] * n_frames) else: unitcell_lengths = None unitcell_angles = None in_units_of(coords, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(unitcell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(len(coords)) if frame is not None: time *= frame elif stride is not None: time *= stride return Trajectory(xyz=coords, time=time, topology=topology, unitcell_lengths=unitcell_lengths, unitcell_angles=unitcell_angles)
def build_scaled_pdb(traj: md.Trajectory, ratio, pdb_name): f_rescale = np.vectorize(lambda t: t * ratio) traj.xyz[0] = f_rescale(traj.xyz[0]) scaled_pdb_name = PDBAnalyzer.prefix_scaled_path + pdb_name traj.save_pdb(filename=scaled_pdb_name)
def cg_by_index(trj, atom_indices_list, bead_label_list, chain_list=None, segment_id_list=None, resSeq_list=None, inplace=False, bonds=None, mapping_function="com"): """Create a coarse grained (CG) trajectory from subsets of atoms by computing centers of mass of selected sets of atoms. Parameters ---------- atom_indices_list : list of array-like, dtype=int, shape=(n_beads,n_atoms) List of indices of atoms to combine into CG sites bead_label_list : list of maximum 4-letter strings to label CG sites chain_list : optional list of chain id's to split resulting beads into separate chains resSeq_list : optional list of residue sequence id's to assign cg residues segment_id_list : optional list of segment id's to assign cg residues inplace : bool, default=False If ``True``, the operation is done inplace, modifying ``trj``. Otherwise, a copy is returned with the sliced atoms, and ``trj`` is not modified. bonds : array-like,dtype=int, shape=(n_bonds,2), default=None If specified, sets these bonds in new topology mapping_function: string, default='com': how to map xyz coordinates options: %s Note - If repeated resSeq values are used, as for a repeated motiff in a CG polymer, those sections most be broken into separate chains or an incorrect topology will result Returns ------- traj : md.Trajectory The return value is either ``trj``, or the new trajectory, depending on the value of ``inplace``. """ % mapping_options.keys() if not len(atom_indices_list) == len(bead_label_list): raise ValueError( "Must supply a list of bead labels of the same length as a list of selected atom indices" ) for bead_label in bead_label_list: if not (type(bead_label) is str) or len(bead_label) > 4 or len(bead_label) < 1: raise ValueError( "Specified bead label '%s' is not valid, must be a string between 1 and 4 characters" % bead_label) bead_label_list = [bead_label.upper() for bead_label in bead_label_list] if mapping_function not in mapping_options: raise ValueError("Must select a mapping function from: %s" % mapping_options.keys()) map_coords = mapping_options[mapping_function] if chain_list is None: chain_list = np.ones(len(atom_indices_list), dtype=int) elif len(chain_list) != len(atom_indices_list): raise ValueError( "Supplied chain_list must be of the same length as a list of selected atom indices" ) if segment_id_list is not None and len(segment_id_list) != len( atom_indices_list): raise ValueError( "Supplied segment_id_list must be of the same length as a list of selected atom indices" ) if resSeq_list is not None and len(resSeq_list) != len(atom_indices_list): raise ValueError( "Supplied resSeq_list must be of the same length as a list of selected atom indices" ) n_beads = len(atom_indices_list) xyz = np.zeros((trj.xyz.shape[0], n_beads, trj.xyz.shape[2]), dtype=trj.xyz.dtype, order='C') forces = np.zeros((trj.xyz.shape[0], n_beads, trj.xyz.shape[2]), dtype=np.double, order='C') columns = ["serial", "name", "element", "resSeq", "resName", "chainID"] masses = np.array([ np.sum([a.mass for a in trj.top.atoms if a.index in atom_indices]) for atom_indices in atom_indices_list ], dtype=np.float64) charges = np.array([ np.sum([a.charge for a in trj.top.atoms if a.index in atom_indices]) for atom_indices in atom_indices_list ], dtype=np.float64) topology_labels = [] element_label_dict = {} xyz_i = np.zeros((trj.xyz.shape[0], trj.xyz.shape[2]), dtype=trj.xyz.dtype, order='C') for i in range(n_beads): atom_indices = atom_indices_list[i] bead_label = bead_label_list[i] #xyz_i = map_coords(trj,atom_indices) masses_i = np.array( [a.mass for a in trj.top.atoms if a.index in atom_indices_list[i]], dtype=np.float64) map_coords(xyz_i, trj.xyz, atom_indices, masses_i, unitcell_lengths=trj.unitcell_lengths) xyz[:, i, :] = xyz_i if "forces" in trj.__dict__ and len(trj.forces) > 0: forces_i = map_forces(trj, atom_indices) forces[:, i, :] = forces_i if resSeq_list is not None: resSeq = resSeq_list[i] else: resSeq = i + 1 #element_label='%4s'%('B%i'%(resSeq)) if not bead_label in element_label_dict: element_label = '%2s' % ('B%i' % (len(element_label_dict) % 10)) element_label_dict[bead_label] = element_label else: element_label = element_label_dict[bead_label] if element_label.strip().upper( ) not in element.Element._elements_by_symbol: element.Element(1000 + resSeq, element_label, element_label, masses[i], 1.0) topology_labels.append([ i, bead_label, element_label, resSeq, '%3s' % bead_label, chain_list[i] ]) df = pd.DataFrame(topology_labels, columns=columns) topology = Topology.from_dataframe(df, bonds=bonds) if segment_id_list is not None: for beadidx, bead in enumerate(topology.atoms): bead.residue.segment_id = segment_id_list[beadidx] if inplace: if trj._topology is not None: trj._topology = topology trj._xyz = xyz return trj unitcell_lengths = unitcell_angles = None if trj._have_unitcell: unitcell_lengths = trj._unitcell_lengths.copy() unitcell_angles = trj._unitcell_angles.copy() time = trj._time.copy() new_trj = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=unitcell_lengths, unitcell_angles=unitcell_angles) new_trj.forces = forces return new_trj
def cg_by_index(trj, atom_indices_list, bead_label_list, chain_list=None, segment_id_list=None, resSeq_list=None, inplace=False, bonds=None, mapping_function="com"): """Create a coarse grained (CG) trajectory from subsets of atoms by computing centers of mass of selected sets of atoms. Parameters ---------- atom_indices_list : list of array-like, dtype=int, shape=(n_beads,n_atoms) List of indices of atoms to combine into CG sites bead_label_list : list of maximum 4-letter strings to label CG sites chain_list : optional list of chain id's to split resulting beads into separate chains resSeq_list : optional list of residue sequence id's to assign cg residues segment_id_list : optional list of segment id's to assign cg residues inplace : bool, default=False If ``True``, the operation is done inplace, modifying ``trj``. Otherwise, a copy is returned with the sliced atoms, and ``trj`` is not modified. bonds : array-like,dtype=int, shape=(n_bonds,2), default=None If specified, sets these bonds in new topology mapping_function: string, default='com': how to map xyz coordinates options: %s Note - If repeated resSeq values are used, as for a repeated motiff in a CG polymer, those sections most be broken into separate chains or an incorrect topology will result Returns ------- traj : md.Trajectory The return value is either ``trj``, or the new trajectory, depending on the value of ``inplace``. """%mapping_options.keys() if not len(atom_indices_list)==len(bead_label_list): raise ValueError("Must supply a list of bead labels of the same length as a list of selected atom indices") for bead_label in bead_label_list: if not (type(bead_label) is str) or len(bead_label)>4 or len(bead_label)<1: raise ValueError("Specified bead label '%s' is not valid, must be a string between 1 and 4 characters"%bead_label) bead_label_list = [ bead_label.upper() for bead_label in bead_label_list ] if mapping_function not in mapping_options: raise ValueError("Must select a mapping function from: %s"%mapping_options.keys()) map_coords = mapping_options[mapping_function] if chain_list is None: chain_list = np.ones(len(atom_indices_list),dtype=int) elif len(chain_list)!=len(atom_indices_list): raise ValueError("Supplied chain_list must be of the same length as a list of selected atom indices") if segment_id_list is not None and len(segment_id_list)!=len(atom_indices_list): raise ValueError("Supplied segment_id_list must be of the same length as a list of selected atom indices") if resSeq_list is not None and len(resSeq_list)!=len(atom_indices_list): raise ValueError("Supplied resSeq_list must be of the same length as a list of selected atom indices") n_beads = len(atom_indices_list) xyz = np.zeros((trj.xyz.shape[0],n_beads,trj.xyz.shape[2]),dtype=trj.xyz.dtype,order='C') forces = np.zeros((trj.xyz.shape[0],n_beads,trj.xyz.shape[2]),dtype=np.double,order='C') columns = ["serial","name","element","resSeq","resName","chainID"] masses = np.array([ np.sum([a.mass for a in trj.top.atoms if a.index in atom_indices]) for atom_indices in atom_indices_list],dtype=np.float64) charges = np.array([ np.sum([a.charge for a in trj.top.atoms if a.index in atom_indices]) for atom_indices in atom_indices_list],dtype=np.float64) topology_labels = [] element_label_dict = {} xyz_i = np.zeros((trj.xyz.shape[0],trj.xyz.shape[2]),dtype=trj.xyz.dtype,order='C') for i in range(n_beads): atom_indices = atom_indices_list[i] bead_label = bead_label_list[i] #xyz_i = map_coords(trj,atom_indices) masses_i = np.array([a.mass for a in trj.top.atoms if a.index in atom_indices_list[i]],dtype=np.float64) map_coords(xyz_i,trj.xyz,atom_indices,masses_i,unitcell_lengths=trj.unitcell_lengths) xyz[:,i,:] = xyz_i if "forces" in trj.__dict__ and len(trj.forces)>0: forces_i = map_forces(trj,atom_indices) forces[:,i,:] = forces_i if resSeq_list is not None: resSeq = resSeq_list[i] else: resSeq = i + 1 #element_label='%4s'%('B%i'%(resSeq)) if not bead_label in element_label_dict: element_label='%2s'%('B%i'%(len(element_label_dict)%10)) element_label_dict[bead_label] = element_label else: element_label = element_label_dict[bead_label] if element_label.strip().upper() not in element.Element._elements_by_symbol: element.Element(1000+resSeq, element_label, element_label, masses[i], 1.0) topology_labels.append( [i,bead_label,element_label,resSeq,'%3s'%bead_label,chain_list[i]] ) df = pd.DataFrame(topology_labels,columns=columns) topology = Topology.from_dataframe(df,bonds=bonds) if segment_id_list is not None: for beadidx,bead in enumerate(topology.atoms): bead.residue.segment_id = segment_id_list[beadidx] if inplace: if trj._topology is not None: trj._topology = topology trj._xyz = xyz return trj unitcell_lengths = unitcell_angles = None if trj._have_unitcell: unitcell_lengths = trj._unitcell_lengths.copy() unitcell_angles = trj._unitcell_angles.copy() time = trj._time.copy() new_trj = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=unitcell_lengths, unitcell_angles=unitcell_angles) new_trj.forces = forces return new_trj
def save_trajectory(trajectory: mdtraj.Trajectory, filename: str, force_overwrite: bool = True): trajectory.save_pdb(filename, force_overwrite=force_overwrite)
parser.add_argument('--pro', action='store_true', help='write trj of protein only', default=False) parser.add_argument('--sr', type=str, help='script root', default='/home/kmckiern/scripts/') args = parser.parse_args() sr = args.sr sys.path.insert(0, sr + 'py_general/') from toolz import natural_sort ext_i = '.' + args.trj_ext td = args.trj_dir # combine trajectories trjs = [f for f in os.listdir(td) if ext_i in f] trjs = natural_sort(trjs) ts = t.load(trjs[0], top=args.top, stride=args.stride1) if args.vs: # i'm going to pad these residues by 8 arg = ts.top.select('resid 23 to 39') lysglu = ts.top.select('resid 118 to 135') lys = ts.top.select('resid 308 to 324') vs = np.concatenate([arg, lysglu, lys]) ts = ts.atom_slice(vs) try: ts[0].save_pdb('/home/kmckiern/clc/analysis/vs_dihed/pro/vs_ref.pdb') except: print 'usual protonation error, probably' nt = len(trjs) for ndx, i in enumerate(trjs[1:]): # for newest trj, cut end just in case write is incomplete if ndx + 1 == nt:
def render_traj(topology, positions): traj = Trajectory(positions / unit.nanometers, Topology.from_openmm(topology)) return (show_mdtraj(traj).add_ball_and_stick('all').center_view(zoom=True))
def traj_frame_to_sampler_state(traj: md.Trajectory, frame_number: int,box_vectors): xyz = traj.xyz[frame_number, :, :] box_vectors = traj.openmm_boxes(frame_number) sampler_state = states.SamplerState(unit.Quantity(xyz, unit=unit.nanometers)) return sampler_state