def test_lprmsd_0(): # remap a permutation of all the atoms with no rotation ref = random.randn(1, 10, 3).astype(np.float32) mapping = random.permutation(10) print('true mapping', mapping) new = ref[:, mapping] value = lprmsd(Trajectory(xyz=new, topology=None), Trajectory(xyz=ref, topology=None)) eq(value, np.array([0.0], dtype=np.float32), decimal=3)
def test_lprmsd_1(): # resolve a random rotation with no permutation ref = random.randn(1, 50, 3).astype(np.float32) mapping = np.arange(50) rot = rotation_matrix_from_quaternion(uniform_quaternion()) new = ref[:, mapping].dot(rot) value = lprmsd(Trajectory(xyz=new, topology=None), Trajectory(xyz=ref, topology=None), permute_groups=[[]]) assert value[0] < 1e-2
def test_lprmsd_2(): # resolve a random rotation with some permutation ref = random.randn(1, 50, 3).astype(np.float32) # first half of the atoms can permute, last 10 are fixed permutation mapping = np.concatenate((random.permutation(10), 10 + np.arange(40))) rot = rotation_matrix_from_quaternion(uniform_quaternion()) new = ref[:, mapping].dot(rot) value = lprmsd(Trajectory(xyz=new, topology=None), Trajectory(xyz=ref, topology=None), permute_groups=[np.arange(10)]) assert value[0] < 1e-2
def read_as_traj(self, iteration=None, segment=None, atom_indices=None): _check_mode(self.mode, ('r', )) pnode = self._get_node(where='/', name='pointer') iter_labels = pnode[:, 0] seg_labels = pnode[:, 1] if iteration is None and segment is None: frame_indices = slice(None) elif isinstance(iteration, (np.integer, int)) and isinstance( segment, (np.integer, int)): frame_torf = np.logical_and(iter_labels == iteration, seg_labels == segment) frame_indices = np.arange(len(iter_labels))[frame_torf] else: raise ValueError( "iteration and segment must be integers and provided at the same time" ) if len(frame_indices) == 0: raise ValueError( f"no frame was selected: iteration={iteration}, segment={segment}, atom_indices={atom_indices}" ) iter_labels = iter_labels[frame_indices] seg_labels = seg_labels[frame_indices] topology = self.topology if atom_indices is not None: topology = topology.subset(atom_indices) data = self.read(frame_indices=frame_indices, atom_indices=atom_indices) if len(data) == 0: return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology) in_units_of(data.coordinates, self.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(data.cell_lengths, self.distance_unit, Trajectory._distance_unit, inplace=True) return WESTTrajectory( data.coordinates, topology=topology, time=data.time, unitcell_lengths=data.cell_lengths, unitcell_angles=data.cell_angles, iter_labels=iter_labels, seg_labels=seg_labels, pcoords=None, )
def test_unitcell(): # make sure that bogus unitcell vecotrs are not saved top = md.load(get_fn('native.pdb')).restrict_atoms(range(5)).topology t = Trajectory(xyz=np.random.randn(100, 5, 3), topology=top) # xtc dcd binpos trr h5 pdb nc lh5 for fn in [temp1, temp2, temp3, temp4, temp5, temp6, temp6, temp8]: t.save(fn) f = lambda: eq(md.load(fn, top=top).unitcell_vectors, None) f.description = 'unitcell preservation in %s' % os.path.splitext(fn)[1] yield f
def load_lh5(filename, top=None, stride=None, atom_indices=None, frame=None): """Load an deprecated MSMBuilder2 LH5 trajectory file. Parameters ---------- filename : str filename of AMBER NetCDF file. top : {str, Trajectory, Topology} The NetCDF format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. See Also -------- mdtraj.LH5TrajectoryFile : Low level interface to LH5 files """ from mdtraj import Trajectory atom_indices = cast_indices(atom_indices) with LH5TrajectoryFile(filename) as f: if frame is not None: f.seek(frame) xyz = f.read(n_frames=1, atom_indices=atom_indices) else: xyz = f.read(stride=stride, atom_indices=atom_indices) topology = f.topology in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) if atom_indices is not None: topology = f.topology.subset(atom_indices) time = np.arange(len(xyz)) if frame is not None: time += frame elif stride is not None: time *= stride return Trajectory(xyz=xyz, topology=topology, time=time)
def _construct_traj(self): """ Create an mdtraj.Trajectory from the CG topology and xyz. """ cg_traj = Trajectory(self._cg_xyz, self._cg_top, time=self._aa_traj.time, unitcell_lengths=self._aa_traj.unitcell_lengths, unitcell_angles=self._aa_traj.unitcell_angles) self._cg_traj = cg_traj
def _join_traj_data(traj_data, top_file): top = load_topology_cached(top_file) xyz = np.concatenate(tuple(map(itemgetter(0), traj_data))) traj = Trajectory(xyz, top) if all(t.unitcell_lengths is not None for t in traj_data): unitcell_lengths = np.concatenate(tuple(map(itemgetter(1), traj_data))) traj.unitcell_lengths = unitcell_lengths if all(t.box is not None for t in traj_data): boxes = np.concatenate(tuple(map(itemgetter(-1), traj_data))) traj.unitcell_vectors = boxes if all(t.unitcell_angles is not None for t in traj_data): angles = np.concatenate(tuple(map(itemgetter(2), traj_data))) traj.unitcell_angles = angles return traj
def load_pdb(filename, stride=None, atom_indices=None, frame=None, no_boxchk=False): """Load a RCSB Protein Data Bank file from disk. Parameters ---------- filename : str Path to the PDB file on disk. The string could be a URL. Valid URL schemes include http and ftp. stride : int, default=None Only read every stride-th model from the file atom_indices : array_like, default=None If not None, then read only a subset of the atoms coordinates from the file. These indices are zero-based (not 1 based, as used by the PDB format). So if you want to load only the first atom in the file, you would supply ``atom_indices = np.array([0])``. frame : int, default=None Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. no_boxchk : bool, default=False By default, a heuristic check based on the particle density will be performed to determine if the unit cell dimensions are absurd. If the particle density is >1000 atoms per nm^3, the unit cell will be discarded. This is done because all PDB files from RCSB contain a CRYST1 record, even if there are no periodic boundaries, and dummy values are filled in instead. This check will filter out those false unit cells and avoid potential errors in geometry calculations. Set this variable to ``True`` in order to skip this heuristic check. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. Examples -------- >>> import mdtraj as md >>> pdb = md.load_pdb('2EQQ.pdb') >>> print(pdb) <mdtraj.Trajectory with 20 frames, 423 atoms at 0x110740a90> See Also -------- mdtraj.PDBTrajectoryFile : Low level interface to PDB files """ from mdtraj import Trajectory if not isinstance(filename, six.string_types): raise TypeError('filename must be of type string for load_pdb. ' 'you supplied %s' % type(filename)) atom_indices = cast_indices(atom_indices) filename = str(filename) with PDBTrajectoryFile(filename) as f: atom_slice = slice(None) if atom_indices is None else atom_indices if frame is not None: coords = f.positions[[frame], atom_slice, :] else: coords = f.positions[::stride, atom_slice, :] assert coords.ndim == 3, 'internal shape error' n_frames = len(coords) topology = f.topology if atom_indices is not None: topology = topology.subset(atom_indices) if f.unitcell_angles is not None and f.unitcell_lengths is not None: unitcell_lengths = np.array([f.unitcell_lengths] * n_frames) unitcell_angles = np.array([f.unitcell_angles] * n_frames) else: unitcell_lengths = None unitcell_angles = None in_units_of(coords, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(unitcell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(len(coords)) if frame is not None: time *= frame elif stride is not None: time *= stride traj = Trajectory(xyz=coords, time=time, topology=topology, unitcell_lengths=unitcell_lengths, unitcell_angles=unitcell_angles) if not no_boxchk and traj.unitcell_lengths is not None: # Only one CRYST1 record is allowed, so only do this check for the first # frame. Some RCSB PDB files do not *really* have a unit cell, but still # have a CRYST1 record with a dummy definition. These boxes are usually # tiny (e.g., 1 A^3), so check that the particle density in the unit # cell is not absurdly high. Standard water density is ~55 M, which # yields a particle density ~100 atoms per cubic nm. It should be safe # to say that no particle density should exceed 10x that. particle_density = traj.top.n_atoms / traj.unitcell_volumes[0] if particle_density > 1000: warnings.warn('Unlikely unit cell vectors detected in PDB file likely ' 'resulting from a dummy CRYST1 record. Discarding unit ' 'cell vectors.') traj._unitcell_lengths = traj._unitcell_angles = None return traj
def load_pdb(filename, stride=None, atom_indices=None, frame=None): """Load a RCSB Protein Data Bank file from disk. Parameters ---------- filename : str Path to the PDB file on disk. The string could be a URL. Valid URL schemes include http and ftp. stride : int, default=None Only read every stride-th model from the file atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. These indices are zero-based (not 1 based, as used by the PDB format). So if you want to load only the first atom in the file, you would supply ``atom_indices = np.array([0])``. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. Examples -------- >>> import mdtraj as md >>> pdb = md.load_pdb('2EQQ.pdb') >>> print pdb <mdtraj.Trajectory with 20 frames, 423 atoms at 0x110740a90> See Also -------- mdtraj.PDBTrajectoryFile : Low level interface to PDB files """ from mdtraj import Trajectory if not isinstance(filename, six.string_types): raise TypeError('filename must be of type string for load_pdb. ' 'you supplied %s' % type(filename)) atom_indices = cast_indices(atom_indices) filename = str(filename) with PDBTrajectoryFile(filename) as f: atom_slice = slice(None) if atom_indices is None else atom_indices if frame is not None: coords = f.positions[[frame], atom_slice, :] else: coords = f.positions[::stride, atom_slice, :] assert coords.ndim == 3, 'internal shape error' n_frames = len(coords) topology = f.topology if atom_indices is not None: topology = topology.subset(atom_indices) if f.unitcell_angles is not None and f.unitcell_lengths is not None: unitcell_lengths = np.array([f.unitcell_lengths] * n_frames) unitcell_angles = np.array([f.unitcell_angles] * n_frames) else: unitcell_lengths = None unitcell_angles = None in_units_of(coords, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(unitcell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(len(coords)) if frame is not None: time *= frame elif stride is not None: time *= stride return Trajectory(xyz=coords, time=time, topology=topology, unitcell_lengths=unitcell_lengths, unitcell_angles=unitcell_angles)
def cg_by_index(trj, atom_indices_list, bead_label_list, chain_list=None, segment_id_list=None, resSeq_list=None, inplace=False, bonds=None, mapping_function="com"): """Create a coarse grained (CG) trajectory from subsets of atoms by computing centers of mass of selected sets of atoms. Parameters ---------- atom_indices_list : list of array-like, dtype=int, shape=(n_beads,n_atoms) List of indices of atoms to combine into CG sites bead_label_list : list of maximum 4-letter strings to label CG sites chain_list : optional list of chain id's to split resulting beads into separate chains resSeq_list : optional list of residue sequence id's to assign cg residues segment_id_list : optional list of segment id's to assign cg residues inplace : bool, default=False If ``True``, the operation is done inplace, modifying ``trj``. Otherwise, a copy is returned with the sliced atoms, and ``trj`` is not modified. bonds : array-like,dtype=int, shape=(n_bonds,2), default=None If specified, sets these bonds in new topology mapping_function: string, default='com': how to map xyz coordinates options: %s Note - If repeated resSeq values are used, as for a repeated motiff in a CG polymer, those sections most be broken into separate chains or an incorrect topology will result Returns ------- traj : md.Trajectory The return value is either ``trj``, or the new trajectory, depending on the value of ``inplace``. """ % mapping_options.keys() if not len(atom_indices_list) == len(bead_label_list): raise ValueError( "Must supply a list of bead labels of the same length as a list of selected atom indices" ) for bead_label in bead_label_list: if not (type(bead_label) is str) or len(bead_label) > 4 or len(bead_label) < 1: raise ValueError( "Specified bead label '%s' is not valid, must be a string between 1 and 4 characters" % bead_label) bead_label_list = [bead_label.upper() for bead_label in bead_label_list] if mapping_function not in mapping_options: raise ValueError("Must select a mapping function from: %s" % mapping_options.keys()) map_coords = mapping_options[mapping_function] if chain_list is None: chain_list = np.ones(len(atom_indices_list), dtype=int) elif len(chain_list) != len(atom_indices_list): raise ValueError( "Supplied chain_list must be of the same length as a list of selected atom indices" ) if segment_id_list is not None and len(segment_id_list) != len( atom_indices_list): raise ValueError( "Supplied segment_id_list must be of the same length as a list of selected atom indices" ) if resSeq_list is not None and len(resSeq_list) != len(atom_indices_list): raise ValueError( "Supplied resSeq_list must be of the same length as a list of selected atom indices" ) n_beads = len(atom_indices_list) xyz = np.zeros((trj.xyz.shape[0], n_beads, trj.xyz.shape[2]), dtype=trj.xyz.dtype, order='C') forces = np.zeros((trj.xyz.shape[0], n_beads, trj.xyz.shape[2]), dtype=np.double, order='C') columns = ["serial", "name", "element", "resSeq", "resName", "chainID"] masses = np.array([ np.sum([a.mass for a in trj.top.atoms if a.index in atom_indices]) for atom_indices in atom_indices_list ], dtype=np.float64) charges = np.array([ np.sum([a.charge for a in trj.top.atoms if a.index in atom_indices]) for atom_indices in atom_indices_list ], dtype=np.float64) topology_labels = [] element_label_dict = {} xyz_i = np.zeros((trj.xyz.shape[0], trj.xyz.shape[2]), dtype=trj.xyz.dtype, order='C') for i in range(n_beads): atom_indices = atom_indices_list[i] bead_label = bead_label_list[i] #xyz_i = map_coords(trj,atom_indices) masses_i = np.array( [a.mass for a in trj.top.atoms if a.index in atom_indices_list[i]], dtype=np.float64) map_coords(xyz_i, trj.xyz, atom_indices, masses_i, unitcell_lengths=trj.unitcell_lengths) xyz[:, i, :] = xyz_i if "forces" in trj.__dict__ and len(trj.forces) > 0: forces_i = map_forces(trj, atom_indices) forces[:, i, :] = forces_i if resSeq_list is not None: resSeq = resSeq_list[i] else: resSeq = i + 1 #element_label='%4s'%('B%i'%(resSeq)) if not bead_label in element_label_dict: element_label = '%2s' % ('B%i' % (len(element_label_dict) % 10)) element_label_dict[bead_label] = element_label else: element_label = element_label_dict[bead_label] if element_label.strip().upper( ) not in element.Element._elements_by_symbol: element.Element(1000 + resSeq, element_label, element_label, masses[i], 1.0) topology_labels.append([ i, bead_label, element_label, resSeq, '%3s' % bead_label, chain_list[i] ]) df = pd.DataFrame(topology_labels, columns=columns) topology = Topology.from_dataframe(df, bonds=bonds) if segment_id_list is not None: for beadidx, bead in enumerate(topology.atoms): bead.residue.segment_id = segment_id_list[beadidx] if inplace: if trj._topology is not None: trj._topology = topology trj._xyz = xyz return trj unitcell_lengths = unitcell_angles = None if trj._have_unitcell: unitcell_lengths = trj._unitcell_lengths.copy() unitcell_angles = trj._unitcell_angles.copy() time = trj._time.copy() new_trj = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=unitcell_lengths, unitcell_angles=unitcell_angles) new_trj.forces = forces return new_trj
def test_lprmsd_null(): ref = random.randn(1, 10, 3).astype(np.float32) new = np.copy(ref) value = lprmsd(Trajectory(xyz=new, topology=None), Trajectory(xyz=ref, topology=None)) eq(value, np.array([0.0], dtype=np.float32), decimal=3)
def cg_by_index(trj, atom_indices_list, bead_label_list, chain_list=None, segment_id_list=None, resSeq_list=None, inplace=False, bonds=None, split_shared_atoms=False, mod_weights_list=None, mapping_function="com", charge_tol=1e-5, center_postwrap=False): """Create a coarse grained (CG) trajectory from subsets of atoms by computing centers of mass of selected sets of atoms. Parameters ---------- atom_indices_list : list of array-like, dtype=int, shape=(n_beads,n_atoms) List of indices of atoms to combine into CG sites bead_label_list : list of maximum 4-letter strings to label CG sites chain_list : optional list of chain id's to split resulting beads into separate chains resSeq_list : optional list of residue sequence id's to assign cg residues segment_id_list : optional list of segment id's to assign cg residues inplace : bool, default=False If ``True``, the operation is done inplace, modifying ``trj``. Otherwise, a copy is returned with the sliced atoms, and ``trj`` is not modified. bonds : array-like,dtype=int, shape=(n_bonds,2), default=None If specified, sets these bonds in new topology split_shared_atoms: boolean If specified, check to see if atoms are shared per molecule in beads. If so, equally divide their weight accordingly for each bead. mapping_function: string, default='com': how to map xyz coordinates options: %s center_postwrap: Boolean Whether to wrap the CG system after it is mapped. Assumes that box is centered at 0, and only has effect if periodic information is present. Note - If repeated resSeq values are used, as for a repeated motiff in a CG polymer, those sections most be broken into separate chains or an incorrect topology will result Returns ------- traj : md.Trajectory The return value is either ``trj``, or the new trajectory, depending on the value of ``inplace``. """ % mapping_options.keys() if not len(atom_indices_list) == len(bead_label_list): raise ValueError("Must supply a list of bead labels of the " "same length as a list of selected atom indices") for bead_label in bead_label_list: if not (type(bead_label) is str) or len(bead_label) > 4 or len(bead_label) < 1: raise ValueError("Specified bead label '%s' is not valid, \ must be a string between 1 and 4 characters" % bead_label) bead_label_list = [bead_label.upper() for bead_label in bead_label_list] if mapping_function not in mapping_options: raise ValueError("Must select a mapping function from: %s"\ %mapping_options.keys()) if chain_list is None: chain_list = np.ones(len(atom_indices_list), dtype=int) elif len(chain_list) != len(atom_indices_list): raise ValueError("Supplied chain_list must be of the same length " "as a list of selected atom indices") if segment_id_list is not None and len(segment_id_list) != len( atom_indices_list): raise ValueError("Supplied segment_id_list must be of the same " "length as a list of selected atom indices") if resSeq_list is not None and len(resSeq_list) != len(atom_indices_list): raise ValueError("Supplied resSeq_list must be of the same " "length as a list of selected atom indices") n_beads = len(atom_indices_list) xyz = np.zeros((trj.xyz.shape[0], n_beads, trj.xyz.shape[2]), dtype=trj.xyz.dtype, order='C') forces = np.zeros((trj.xyz.shape[0], n_beads, trj.xyz.shape[2]), dtype=np.double, order='C') columns = ["serial", "name", "element", "resSeq", "resName", "chainID"] #total masse for each cg bead. masses = np.zeros((n_beads), dtype=np.float64) #list of masses for elements in cg bead. masses_i = [] #masses for ii in range(n_beads): #atoms in curent cg bead. atom_indices = atom_indices_list[ii] #first, construct lists of masses in current cg bead. temp_masses = np.array([]) for jj in atom_indices: temp_masses = np.append(temp_masses, trj.top.atom(jj).element.mass) masses_i.append(temp_masses) masses[ii] = masses_i[ii].sum() if hasattr(trj.top.atom(1), 'charge'): #total charge for each cg bead. charges = np.zeros((n_beads), dtype=np.float64) #lists of charges for in current cg bead charges_i = [] #charges for ii in range(n_beads): #atoms in curent cg bead. atom_indices = atom_indices_list[ii] #first, construct lists of masses in current cg bead. temp_charges = np.array([]) for jj in atom_indices: temp_charges = np.append(temp_charges, trj.top.atom(jj).charge) charges_i.append(temp_charges) charges[ii] = charges_i[ii].sum() forcenorm_i = [] if mapping_function == 'cof' or mapping_function == 'center_of_force': for ii in range(n_beads): atom_indices = atom_indices_list[ii] forcenorm_i.append(get_forcenorms(trj, atom_indices)) if mapping_function == 'coc' or mapping_function == 'center_of_charge': for charge in charges: if np.absolute(charge) < charge_tol: raise ValueError("Total charge on site %i is near zero" % ii) topology_labels = [] element_label_dict = {} if (split_shared_atoms): mod_weights_list = gen_unique_overlap_mod_weights(atom_indices_list) has_forces = False try: trj.__dict__['forces'] test_forces = map_forces(trj, (0, )) has_forces = True except TypeError: print("WARNING: Invalid Forces\nNo Map applied to forces") except KeyError: pass except: print("Unknown error, check your forces\nexiting...") raise for i in range(n_beads): atom_indices = atom_indices_list[i] bead_label = bead_label_list[i] xyz_i = xyz[:, i, :] if mapping_function == 'coc' or mapping_function == 'center_of_charge': weights = charges_i[i] elif mapping_function == 'com' or mapping_function == 'center_of_mass': weights = masses_i[i] elif mapping_function == 'cof' or mapping_function == 'center_of_force': weights = forcenorm_i[i] elif mapping_function == 'center': weights = np.ones(len(atom_indices)) if (mod_weights_list is not None): weights[:] = np.multiply(weights, mod_weights_list[i]) compute_center_weighted(xyz_i, trj.xyz, atom_indices, weights, unitcell_lengths=trj.unitcell_lengths, center_postwrap=center_postwrap) if has_forces: forces_i = map_forces(trj, atom_indices) forces[:, i, :] = forces_i if resSeq_list is not None: resSeq = resSeq_list[i] else: resSeq = i + 1 #element_label='%4s'%('B%i'%(resSeq)) if not bead_label in element_label_dict: element_label = '%2s' % ('B%i' % (len(element_label_dict) % 10)) element_label_dict[bead_label] = element_label else: element_label = element_label_dict[bead_label] if element_label.strip().upper( ) not in element.Element._elements_by_symbol: element.Element(1000 + resSeq, element_label, element_label, masses[i], 1.0) topology_labels.append([ i, bead_label, element_label, resSeq, '%3s' % bead_label, chain_list[i] ]) df = pd.DataFrame(topology_labels, columns=columns) topology = Topology.from_dataframe(df, bonds=bonds) if segment_id_list is not None: for beadidx, bead in enumerate(topology.atoms): bead.residue.segment_id = segment_id_list[beadidx] if inplace: if trj._topology is not None: trj._topology = topology trj._xyz = xyz return trj unitcell_lengths = unitcell_angles = None if trj._have_unitcell: unitcell_lengths = trj._unitcell_lengths.copy() unitcell_angles = trj._unitcell_angles.copy() time = trj._time.copy() new_trj = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=unitcell_lengths, unitcell_angles=unitcell_angles) new_trj.forces = forces return new_trj
def render_traj(topology, positions): traj = Trajectory(positions / unit.nanometers, Topology.from_openmm(topology)) return (show_mdtraj(traj).add_ball_and_stick('all').center_view(zoom=True))