def compute_dihedrals(trajectory, indices, opt=True): """Compute the dihedral angles between the supplied quartets of atoms in each frame in a trajectory. Parameters ---------- trajectory : Trajectory An mtraj trajectory. indices : np.ndarray, shape=(n_dihedrals, 4), dtype=int Each row gives the indices of four atoms which together make a dihedral angle. The angle is between the planes spanned by the first three atoms and the last three atoms, a torsion around the bond between the middle two atoms. opt : bool, default=True Use an optimized native library to calculate angles. Returns ------- dihedrals : np.ndarray, shape=(n_frames, n_dihedrals), dtype=float The output array gives, in each frame from the trajectory, each of the `n_dihedrals` torsion angles. The angles are measured in **radians**. """ xyz = ensure_type(trajectory.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False) quartets = ensure_type(np.asarray(indices), dtype=np.int32, ndim=2, name='indices', shape=(None, 4), warn_on_cast=False) if not np.all(np.logical_and(quartets < trajectory.n_atoms, quartets >= 0)): raise ValueError('indices must be between 0 and %d' % trajectory.n_atoms) out = np.zeros((xyz.shape[0], quartets.shape[0]), dtype=np.float32) if opt: _geometry._dihedral(xyz, quartets, out) else: _dihedral(xyz, quartets, out) return out
def find_closest_contact(traj, group1, group2, frame=0, periodic=True): """Find the closest contact between two groups of atoms. Given a frame of a Trajectory and two groups of atoms, identify the pair of atoms (one from each group) that form the closest contact between the two groups. Parameters ---------- traj : Trajectory An mtraj trajectory. group1 : np.ndarray, shape=(num_atoms), dtype=int The indices of atoms in the first group. group2 : np.ndarray, shape=(num_atoms), dtype=int The indices of atoms in the second group. frame : int, default=0 The frame of the Trajectory to take positions from periodic : bool, default=True If `periodic` is True and the trajectory contains unitcell information, we will compute distances under the minimum image convention. Returns ------- result : tuple (int, int, float) The indices of the two atoms forming the closest contact, and the distance between them. """ xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False)[frame] atoms1 = ensure_type(group1, dtype=np.int32, ndim=1, name='group1', warn_on_cast=False) atoms2 = ensure_type(group2, dtype=np.int32, ndim=1, name='group2', warn_on_cast=False) if periodic and traj._have_unitcell: box = ensure_type(traj.unitcell_vectors, dtype=np.float32, ndim=3, name='unitcell_vectors', shape=(len(traj.xyz), 3, 3), warn_on_cast=False)[frame] else: box = None return _geometry._find_closest_contact(xyz, atoms1, atoms2, box)
def write(self, xyz, cell_lengths=None): """Write one or more frames of data to a mdcrd file Parameters ---------- xyz : np.ndarray, shape=(n_frames, n_atoms, 3) The cartesian coordinates of the atoms to write. By convention, the lengths should be in units of angstroms. cell_lengths : np.ndarray, shape=(n_frames, 3), dtype=float32, optional The length of the periodic box in each frame, in each direction, `a`, `b`, `c`. By convention the lengths should be in units of angstroms. """ if not self._mode == 'w': raise ValueError('write() is only available when file is opened ' 'in mode="w"') xyz = ensure_type(xyz, np.float32, 3, 'xyz', can_be_none=False, shape=(None, None, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) cell_lengths = ensure_type(cell_lengths, np.float32, 2, 'cell_lengths', can_be_none=True, shape=(len(xyz), 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) if self._w_has_box is None: # this is the first write() self._n_atoms = xyz.shape[1] self._fh.write('TITLE : Created by MDTraj with %d atoms\n' % self._n_atoms) if cell_lengths is None: self._w_has_box = False else: self._w_has_box = True elif self._w_has_box is True: if cell_lengths is None: raise ValueError('This mdcrd file must contain unitcell ' 'information') elif self._w_has_box is False: if cell_lengths is not None: raise ValueError('This mdcrd file must not contain unitcell ' 'information') else: raise RuntimeError() for i in range(xyz.shape[0]): for j, coord in enumerate(xyz[i].reshape(-1)): lfdone = False out = "%8.3f" % coord if len(out) > 8: raise ValueError('Overflow error') self._fh.write(out) if (j+1) % 10 == 0: self._fh.write("\n") lfdone = True if not lfdone: self._fh.write("\n") if cell_lengths is not None: self._fh.write("%8.3f %8.3f %8.3f\n" % tuple(cell_lengths[i]))
def write(self, coordinates, topology, time=None, unitcell_vectors=None, precision=3): """Write one or more frames of a molecular dynamics trajectory to disk in the GROMACS GRO format. Parameters ---------- coordinates : np.ndarray, dtype=np.float32, shape=(n_frames, n_atoms, 3) The cartesian coordinates of each atom, in units of nanometers. topology : mdtraj.Topology The Topology defining the model to write. time : np.ndarray, dtype=float32, shape=(n_frames), optional The simulation time corresponding to each frame, in picoseconds. If not supplied, the numbers 0..n_frames will be written. unitcell_vectors : np.ndarray, dtype=float32, shape=(n_frames, 3, 3), optional The periodic box vectors of the simulation in each frame, in nanometers. precision : int, optional The number of decimal places to print for coordinates. Default is 3 """ if not self._open: raise ValueError('I/O operation on closed file') if not self._mode == 'w': raise ValueError('file not opened for writing') coordinates = ensure_type(coordinates, dtype=np.float32, ndim=3, name='coordinates', can_be_none=False, warn_on_cast=False) time = ensure_type(time, dtype=float, ndim=1, name='time', can_be_none=True, shape=(len(coordinates),), warn_on_cast=False) unitcell_vectors = ensure_type(unitcell_vectors, dtype=float, ndim=3, name='unitcell_vectors', can_be_none=True, shape=(len(coordinates), 3, 3), warn_on_cast=False) for i in range(coordinates.shape[0]): frame_time = None if time is None else time[i] frame_box = None if unitcell_vectors is None else unitcell_vectors[i] self._write_frame(coordinates[i], topology, frame_time, frame_box, precision)
def compute_angles(traj, angle_indices, opt=True): """Compute the bond angles between the supplied triplets of indices in each frame of a trajectory. Parameters ---------- traj : Trajectory An mtraj trajectory. angle_indices : np.ndarray, shape=(num_pairs, 2), dtype=int Each row gives the indices of three atoms which together make an angle. opt : bool, default=True Use an optimized native library to calculate distances. Our optimized SSE angle calculation implementation is 10-20x faster than the (itself optimized) numpy implementation. Returns ------- angles : np.ndarray, shape=[n_frames, n_angles], dtype=float The angles are in radians """ xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False) triplets = ensure_type(np.asarray(angle_indices), dtype=np.int32, ndim=2, name='angle_indices', shape=(None, 3), warn_on_cast=False) if not np.all(np.logical_and(triplets < traj.n_atoms, triplets >= 0)): raise ValueError('angle_indices must be between 0 and %d' % traj.n_atoms) out = np.zeros((xyz.shape[0], triplets.shape[0]), dtype=np.float32) if opt: _geometry._angle(xyz, triplets, out) else: _angle(xyz, triplets, out) return out
def permute_energies(X, s): """Re-order an observable X so that u[i, j, k] correponds to frame i, sampled from state j, evaluated in state k. Parameters ---------- X : np.ndarray, shape=(n_iter, n_replicas, n_replicas) The observable to permute s : np.ndarray, shape=(n_iter, n_replicas), dtype='int' The thermodynamic state indices of each replica slot. s[i, k] is the thermodynamic state index of frame i, replica k. """ X = ensure_type(X, 'float32', 3, "X") n_iter, n_replicas, n_replicas = X.shape s = ensure_type(s, "int", 2, "s", shape=(n_iter, n_replicas)) u = np.zeros((n_iter, n_replicas, n_replicas)) for i, si in enumerate(s): mapping = dict(zip(range(n_replicas), si)) inv_map = {v:k for k, v in mapping.items()} si_inv = [inv_map[k] for k in range(n_replicas)] u[i] = X[i, si_inv] return u
def compute_dihedrals(traj, indices, periodic=True, opt=True): """Compute the dihedral angles between the supplied quartets of atoms in each frame in a trajectory. Parameters ---------- traj : Trajectory An mtraj trajectory. indices : np.ndarray, shape=(n_dihedrals, 4), dtype=int Each row gives the indices of four atoms which together make a dihedral angle. The angle is between the planes spanned by the first three atoms and the last three atoms, a torsion around the bond between the middle two atoms. periodic : bool, default=True If `periodic` is True and the trajectory contains unitcell information, we will treat dihedrals that cross periodic images using the minimum image convention. opt : bool, default=True Use an optimized native library to calculate angles. Returns ------- dihedrals : np.ndarray, shape=(n_frames, n_dihedrals), dtype=float The output array gives, in each frame from the trajectory, each of the `n_dihedrals` torsion angles. The angles are measured in **radians**. """ xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False) quartets = ensure_type(indices, dtype=np.int32, ndim=2, name='indices', shape=(None, 4), warn_on_cast=False) if not np.all(np.logical_and(quartets < traj.n_atoms, quartets >= 0)): raise ValueError('indices must be between 0 and %d' % traj.n_atoms) if len(quartets) == 0: return np.zeros((len(xyz), 0), dtype=np.float32) if periodic and traj._have_unitcell: if opt and not np.allclose(traj.unitcell_angles, 90): warnings.warn('Optimized dihedral calculation does not work for non-orthorhombic ' 'unit cells and periodic boundary conditions. Falling back to much ' 'slower pure-Python implementation. Set periodic=False or opt=False ' 'to disable this message.') opt = False out = np.zeros((xyz.shape[0], quartets.shape[0]), dtype=np.float32) if periodic and traj._have_unitcell: box = ensure_type(traj.unitcell_vectors, dtype=np.float32, ndim=3, name='unitcell_vectors', shape=(len(xyz), 3, 3)) if opt: _geometry._dihedral_mic(xyz, quartets, box, out) return out else: _dihedral(traj, quartets, periodic, out) return out if opt: _geometry._dihedral(xyz, quartets, out) else: _dihedral(traj, quartets, periodic, out) return out
def compute_angles(traj, angle_indices, periodic=True, opt=True): """Compute the bond angles between the supplied triplets of indices in each frame of a trajectory. Parameters ---------- traj : Trajectory An mdtraj trajectory. angle_indices : np.ndarray, shape=(num_angles, 3), dtype=int Each row gives the indices of three atoms which together make an angle. periodic : bool, default=True If `periodic` is True and the trajectory contains unitcell information, we will treat angles that cross periodic images using the minimum image convention. opt : bool, default=True Use an optimized native library to calculate distances. Our optimized SSE angle calculation implementation is 10-20x faster than the (itself optimized) numpy implementation. Returns ------- angles : np.ndarray, shape=[n_frames, n_angles], dtype=float The angles are in radians """ xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False) triplets = ensure_type(angle_indices, dtype=np.int32, ndim=2, name='angle_indices', shape=(None, 3), warn_on_cast=False) if not np.all(np.logical_and(triplets < traj.n_atoms, triplets >= 0)): raise ValueError('angle_indices must be between 0 and %d' % traj.n_atoms) if len(triplets) == 0: return np.zeros((len(xyz), 0), dtype=np.float32) if periodic and traj._have_unitcell: if opt and not np.allclose(traj.unitcell_angles, 90): warnings.warn('Optimized angle calculation does not work for non-orthorhombic ' 'unit cells and periodic boundary conditions. Falling back to much ' 'slower pure-Python implementation. Set periodic=False or opt=False ' 'to disable this message.') opt = False out = np.zeros((xyz.shape[0], triplets.shape[0]), dtype=np.float32) if periodic is True and traj._have_unitcell: box = ensure_type(traj.unitcell_vectors, dtype=np.float32, ndim=3, name='unitcell_vectors', shape=(len(xyz), 3, 3)) if opt: _geometry._angle_mic(xyz, triplets, box, out) return out else: _angle(traj, triplets, periodic, out) return out if opt: _geometry._angle(xyz, triplets, out) else: _angle(traj, triplets, periodic, out) return out
def compute_angles(traj, angle_indices, periodic=True, opt=True): """Compute the bond angles between the supplied triplets of indices in each frame of a trajectory. Parameters ---------- traj : Trajectory An mdtraj trajectory. angle_indices : np.ndarray, shape=(num_angles, 3), dtype=int Each row gives the indices of three atoms which together make an angle. periodic : bool, default=True If `periodic` is True and the trajectory contains unitcell information, we will treat angles that cross periodic images using the minimum image convention. opt : bool, default=True Use an optimized native library to calculate distances. Our optimized SSE angle calculation implementation is 10-20x faster than the (itself optimized) numpy implementation. Returns ------- angles : np.ndarray, shape=[n_frames, n_angles], dtype=float The angles are in radians """ xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name="traj.xyz", shape=(None, None, 3), warn_on_cast=False) triplets = ensure_type( angle_indices, dtype=np.int32, ndim=2, name="angle_indices", shape=(None, 3), warn_on_cast=False ) if not np.all(np.logical_and(triplets < traj.n_atoms, triplets >= 0)): raise ValueError("angle_indices must be between 0 and %d" % traj.n_atoms) if len(triplets) == 0: return np.zeros((len(xyz), 0), dtype=np.float32) out = np.zeros((xyz.shape[0], triplets.shape[0]), dtype=np.float32) if periodic is True and traj._have_unitcell: box = ensure_type( traj.unitcell_vectors, dtype=np.float32, ndim=3, name="unitcell_vectors", shape=(len(xyz), 3, 3) ) if opt: orthogonal = np.allclose(traj.unitcell_angles, 90) _geometry._angle_mic(xyz, triplets, box.transpose(0, 2, 1).copy(), out, orthogonal) return out else: _angle(traj, triplets, periodic, out) return out if opt: _geometry._angle(xyz, triplets, out) else: _angle(traj, triplets, periodic, out) return out
def validate_input_arrays(predictions, measurements, uncertainties, prior_pops=None): """Check input data for correct shape and dtype Parameters ---------- predictions : ndarray, shape = (num_frames, num_measurements) predictions[j, i] gives the ith observabled predicted at frame j measurements : ndarray, shape = (num_measurements) measurements[i] gives the ith experimental measurement uncertainties : ndarray, shape = (num_measurements) uncertainties[i] gives the uncertainty of the ith experiment prior_pops : ndarray, shape = (num_frames), optional Prior populations of each conformation. If None, skip. Notes ----- All inputs must have float64 type and compatible shapes. """ num_frames, num_measurements = predictions.shape ensure_type(predictions, np.float64, 2, "predictions") ensure_type(measurements, np.float64, 1, "measurements", shape=(num_measurements,)) ensure_type(uncertainties, np.float64, 1, "uncertainties", shape=(num_measurements,)) if prior_pops is not None: ensure_type(prior_pops, np.float64, 1, "prior_pops", shape=(num_frames,))
def compute_distances(traj, atom_pairs, periodic=True, opt=True): """Compute the distances between pairs of atoms in each frame. Parameters ---------- traj : Trajectory An mtraj trajectory. atom_pairs : np.ndarray, shape=(num_pairs, 2), dtype=int Each row gives the indices of two atoms involved in the interaction. periodic : bool, default=True If `periodic` is True and the trajectory contains unitcell information, we will compute distances under the minimum image convention. opt : bool, default=True Use an optimized native library to calculate distances. Our optimized SSE minimum image convention calculation implementation is over 1000x faster than the naive numpy implementation. Returns ------- distances : np.ndarray, shape=(n_frames, num_pairs), dtype=float The distance, in each frame, between each pair of atoms. """ xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False) pairs = ensure_type(atom_pairs, dtype=np.int32, ndim=2, name='atom_pairs', shape=(None, 2), warn_on_cast=False) if not np.all(np.logical_and(pairs < traj.n_atoms, pairs >= 0)): raise ValueError('atom_pairs must be between 0 and %d' % traj.n_atoms) if len(pairs) == 0: return np.zeros((len(xyz), 0), dtype=np.float32) if periodic and traj._have_unitcell: box = ensure_type(traj.unitcell_vectors, dtype=np.float32, ndim=3, name='unitcell_vectors', shape=(len(xyz), 3, 3), warn_on_cast=False) orthogonal = np.allclose(traj.unitcell_angles, 90) if opt: out = np.empty((xyz.shape[0], pairs.shape[0]), dtype=np.float32) _geometry._dist_mic(xyz, pairs, box.transpose(0, 2, 1).copy(), out, orthogonal) return out else: return _distance_mic(xyz, pairs, box.transpose(0, 2, 1), orthogonal) # either there are no unitcell vectors or they dont want to use them if opt: out = np.empty((xyz.shape[0], pairs.shape[0]), dtype=np.float32) _geometry._dist(xyz, pairs, out) return out else: return _distance(xyz, pairs)
def shrake_rupley(traj, probe_radius=0.14, n_sphere_points=960): """Compute the solvent accessible surface area of each atom in each simulation frame. Parameters ---------- traj : Trajectory An mtraj trajectory. probe_radius : float, optional The radius of the probe, in nm. n_sphere_pts : int, optional The number of points representing the surface of each atom, higher values leads to more accuracy. Returns ------- areas : np.array, shape=(n_frames, n_atoms) The accessible surface area of each atom in every frame Notes ----- This code implements the Shrake and Rupley algorithm, with the Golden Section Spiral algorithm to generate the sphere points. The basic idea is to great a mesh of points representing the surface of each atom (at a distance of the van der waals radius plus the probe radius from the nuclei), and then count the number of such mesh points that are on the molecular surface -- i.e. not within the radius of another atom. Assuming that the points are evenly distributed, the number of points is directly proportional to the accessible surface area (its just 4*pi*r^2 time the fraction of the points that are accessible). There are a number of different ways to generate the points on the sphere -- possibly the best way would be to do a little "molecular dyanmics" : put the points on the sphere, and then run MD where all the points repel one another and wait for them to get to an energy minimum. But that sounds expensive. This code uses the golden section spiral algorithm (picture at http://xsisupport.com/2012/02/25/evenly-distributing-points-on-a-sphere-with-the-golden-sectionspiral/) where you make this spiral that traces out the unit sphere and then put points down equidistant along the spiral. It's cheap, but not perfect. The gromacs utility g_sas uses a slightly different algorithm for generating points on the sphere, which is based on an icosahedral tesselation. roughly, the icosahedral tesselation works something like this http://www.ziyan.info/2008/11/sphere-tessellation-using-icosahedron.html References ---------- .. [1] Shrake, A; Rupley, JA. (1973) J Mol Biol 79 (2): 351--71. """ if not _geometry._processor_supports_sse41(): raise RuntimeError('This CPU does not support the required instruction set (SSE4.1)') xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False) out = np.zeros((xyz.shape[0], xyz.shape[1]), dtype=np.float32) atom_radii = [_ATOMIC_RADII[atom.element.symbol] for atom in traj.topology.atoms] radii = np.array(atom_radii, np.float32) + probe_radius _geometry._sasa(xyz, radii, int(n_sphere_points), out) return out
def write(self, xyz, types=None): """Write one or more frames of data to a xyz file. Parameters ---------- xyz : np.ndarray, shape=(n_frames, n_atoms, 3) The cartesian coordinates of the atoms to write. types : np.ndarray, shape(3, ) The type of each particle. """ if not self._mode == 'w': raise ValueError('write() is only available when file is opened ' 'in mode="w"') if not types: # Make all particles the same type. types = ['X' for _ in xrange(xyz.shape[1])] xyz = ensure_type(xyz, np.float32, 3, 'xyz', can_be_none=False, shape=(None, None, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) in_units_of(xyz, 'nanometers', self.distance_unit, inplace=True) for i in range(xyz.shape[0]): self._fh.write('{0}\n'.format(xyz.shape[1])) self._fh.write("Created with MDTraj {0}, {1}\n".format(version, str(date.today()))) for j, coord in enumerate(xyz[i]): self._fh.write('{0} {1:8.3f} {2:8.3f} {3:8.3f}\n'.format( types[j], coord[0], coord[1], coord[2]))
def _init(self, sequences, init_params): """Find initial means(hot start)""" sequences = [ensure_type(s, dtype=np.float32, ndim=2, name='s', warn_on_cast=False) for s in sequences] self._impl._sequences = sequences if self.n_hotstart == 'all': small_dataset = np.vstack(sequences) else: small_dataset = np.vstack(sequences[0:min(len(sequences), self.n_hotstart)]) if self.init_algo == "GMM" and ("m" in init_params or "v" in init_params): mixture = sklearn.mixture.GMM(self.n_states, n_init=1, random_state=self.random_state) mixture.fit(small_dataset) if "m" in init_params: self.means_ = mixture.means_ if "v" in init_params: self.vars_ = mixture.covars_ else: if 'm' in init_params: with warnings.catch_warnings(): warnings.simplefilter("ignore") self.means_ = cluster.KMeans( n_clusters=self.n_states, n_init=1, init='random', n_jobs=self.n_jobs, random_state=self.random_state).fit( small_dataset).cluster_centers_ if 'v' in init_params: self.vars_ = np.vstack([np.var(small_dataset, axis=0)] * self.n_states) if 't' in init_params: transmat_ = np.empty((self.n_states, self.n_states)) transmat_.fill(1.0 / self.n_states) self.transmat_ = transmat_ self.populations_ = np.ones(self.n_states) / self.n_states
def score(self, data): """Log-likelihood of sequences under the model """ sequences = [ensure_type(s, dtype=np.float32, ndim=2, name="s") for s in data] self.inferrer._sequences = data logprob, _ = self.inferrer.do_mslds_estep() return logprob
def select_pairs(self, selection1=None, selection2=None): """Generate unique pairs of atom indices. If a selecton is a string, it will be resolved using the atom selection DSL, otherwise it is expected to be an array of atom indices. Parameters ---------- selection1 : str or array-like, shape=(n_indices, ), dtype=int A selection for `select()` or an array of atom indices. selection2 : str or array-like, shape=(n_indices, ), dtype=int A selection for `select()` or an array of atom indices. Returns ------- pairs : array-like, shape=(n_pairs, 2), dtype=int Each row gives the indices of two atoms. """ # Resolve selections using the atom selection DSL... if isinstance(selection1, string_types): a_indices = self.select(selection1) else: # ...or use a provided array of indices. a_indices = ensure_type(selection1, dtype=np.int32, ndim=1, name='a_indices', warn_on_cast=False) if isinstance(selection2, string_types): b_indices = self.select(selection2) else: b_indices = ensure_type(selection2, dtype=np.int32, ndim=1, name='b_indices', warn_on_cast=False) a_indices.sort() b_indices.sort() # Create unique pairs from the indices. if np.array_equal(a_indices, b_indices): # This is more efficient and memory friendly by removing the # intermediate set creation required in the case below. pairs = np.fromiter(itertools.chain.from_iterable(itertools.combinations(a_indices, 2)), dtype=np.int32, count=len(a_indices) * (len(a_indices) - 1)) pairs = np.vstack((pairs[::2], pairs[1::2])).T else: pairs = np.array(list(set( (a, b) if a > b else (b, a) for a, b in itertools.product(a_indices, b_indices) if a != b)), dtype=np.int32) return pairs
def select_pairs(self, selection1=None, selection2=None): """Generate unique pairs of atom indices. If a selecton is a string, it will be resolved using the atom selection DSL, otherwise it is expected to be an array of atom indices. Parameters ---------- selection1 : str or array-like, shape=(n_indices, ), dtype=int A selection for `select()` or an array of atom indices. selection2 : str or array-like, shape=(n_indices, ), dtype=int A selection for `select()` or an array of atom indices. Returns ------- pairs : array-like, shape=(n_pairs, 2), dtype=int Each row gives the indices of two atoms. """ # Resolve selections using the atom selection DSL... if isinstance(selection1, string_types): a_indices = self.select(selection1) else: # ...or use a provided array of indices. a_indices = ensure_type(selection1, dtype=np.int32, ndim=1, name='a_indices', warn_on_cast=False) if isinstance(selection2, string_types): b_indices = self.select(selection2) else: b_indices = ensure_type(selection2, dtype=np.int32, ndim=1, name='b_indices', warn_on_cast=False) a_indices.sort() b_indices.sort() # Create unique pairs from the indices. # In the cases where a_indices and b_indices are identical or mutually # exclusive, we can utilize a more efficient and memory friendly # approach by removing the intermediate set creation required in # the general case. if np.array_equal(a_indices, b_indices): pairs = self._unique_pairs_equal(a_indices) elif len(np.intersect1d(a_indices, b_indices)) == 0: pairs = self._unique_pairs_mutually_exclusive(a_indices, b_indices) else: pairs = self._unique_pairs(a_indices, b_indices) return pairs
def test_ensure_type_25(): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") val = ensure_type(a, np.float64, 1, '', length=10, warn_on_cast=False) assert val.dtype == np.float64 assert a.dtype == np.float32 # a should not be changed assert len(w) == 0 # no warning since we set warn_on_cast to False
def test_ensure_type_2(): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") val = ensure_type(a, np.float64, 1, '', length=10) assert val.dtype == np.float64 assert a.dtype == np.float32 # a should not be changed assert len(w) == 1 assert issubclass(w[-1].category, TypeCastPerformanceWarning)
def compute_displacements(traj, atom_pairs, periodic=True, opt=True): """Compute the displacement vector between pairs of atoms in each frame of a trajectory. Parameters ---------- traj : Trajectory Trajectory to compute distances in atom_pairs : np.ndarray, shape[num_pairs, 2], dtype=int Each row gives the indices of two atoms. periodic : bool, default=True If `periodic` is True and the trajectory contains unitcell information, we will compute distances under the minimum image convention. opt : bool, default=True Use an optimized native library to calculate distances. Our optimized minimum image convention calculation implementation is over 1000x faster than the naive numpy implementation. Returns ------- displacements : np.ndarray, shape=[n_frames, n_pairs, 3], dtype=float32 The displacememt vector, in each frame, between each pair of atoms. """ xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3)) pairs = ensure_type(np.asarray(atom_pairs), dtype=np.int32, ndim=2, name='atom_pairs', shape=(None, 2)) if not np.all(np.logical_and(pairs < traj.n_atoms, pairs >= 0)): raise ValueError('atom_pairs must be between 0 and %d' % traj.n_atoms) if periodic is True and traj._have_unitcell: box = ensure_type(traj.unitcell_vectors, dtype=np.float32, ndim=3, name='unitcell_vectors', shape=(len(xyz), 3, 3)) if opt and _geometry._processor_supports_sse41(): out = np.empty((xyz.shape[0], pairs.shape[0], 3), dtype=np.float32) _geometry._dist_mic_displacement(xyz, pairs, box, out) return out else: return _displacement_mic(xyz, pairs, box) # either there are no unitcell vectors or they dont want to use them if opt and _geometry._processor_supports_sse41(): out = np.empty((xyz.shape[0], pairs.shape[0], 3), dtype=np.float32) _geometry._dist_displacement(xyz, pairs, out) return out return _displacement(xyz, pairs)
def compute_distances(traj, atom_pairs, periodic=True, opt=True): """Compute the distances between pairs of atoms in each frame. Parameters ---------- traj : Trajectory An mtraj trajectory. atom_pairs : np.ndarray, shape=(num_pairs, 2), dtype=int Each row gives the indices of two atoms involved in the interaction. periodic : bool, default=True If `periodic` is True and the trajectory contains unitcell information, we will compute distances under the minimum image convention. opt : bool, default=True Use an optimized native library to calculate distances. Our optimized SSE minimum image convention calculation implementation is over 1000x faster than the naive numpy implementation. Returns ------- distances : np.ndarray, shape=(n_frames, num_pairs), dtype=float The distance, in each frame, between each pair of atoms. """ xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='taj.xyz', shape=(None, None, 3), warn_on_cast=False) pairs = ensure_type(np.asarray(atom_pairs), dtype=np.int32, ndim=2, name='atom_pairs', shape=(None, 2), warn_on_cast=False) if periodic is True and traj._have_unitcell: box = ensure_type(traj.unitcell_vectors, dtype=np.float32, ndim=3, name='unitcell_vectors', shape=(len(xyz), 3, 3)) if opt: out = np.empty((xyz.shape[0], pairs.shape[0]), dtype=np.float32) _geometry._dist_mic(xyz, pairs, box, out) return out else: return _distance_mic(xyz, pairs, box) # either there are no unitcell vectors or they dont want to use them if opt: out = np.empty((xyz.shape[0], pairs.shape[0]), dtype=np.float32) _geometry._dist(xyz, pairs, out) return out else: return _distance(xyz, pairs)
def compute_rdf(traj, pairs=None, r_range=None, bin_width=0.005, periodic=True, opt=True): """Compute radial distribution functions for pairs in every frame. Parameters ---------- traj : Trajectory Trajectory to compute radial distribution function in. pairs : array-like, shape=(n_pairs, 2), dtype=int, optional, default=None Each row gives the indices of two atoms. r_range : array-like, shape=(2,), optional, default=(0.0, 1.0) Minimum and maximum radii. bin_width : int, optional, default=0.005 Width of the bins in nanometers. periodic : bool, default=True If `periodic` is True and the trajectory contains unitcell information, we will compute distances under the minimum image convention. opt : bool, default=True Use an optimized native library to compute the pair wise distances. Returns ------- r : np.ndarray, shape=(np.diff(r_range) / bin_width - 1), dtype=float Radii values corresponding to the centers of the bins. g_r : np.ndarray, shape=(np.diff(r_range) / bin_width - 1), dtype=float Radial distribution function values at r. See also -------- Topology.select_pairs """ if r_range is None: r_range = np.array([0.0, 1.0]) r_range = ensure_type(r_range, dtype=np.float64, ndim=1, name='r_range', shape=(2,), warn_on_cast=False) bins = np.arange(r_range[0], r_range[1], bin_width) distances = compute_distances(traj, pairs, periodic=periodic, opt=opt) g_r, edges = np.histogram(distances, bins=bins) r = 0.5 * (edges[1:] + edges[:-1]) # Normalize by volume of the spherical shell. # See discussion https://github.com/mdtraj/mdtraj/pull/724. There might be # a less biased way to accomplish this. The conclusion was that this could # be interesting to try, but is likely not hugely consequential. This method # of doing the calculations matches the implementation in other packages like # AmberTools' cpptraj and gromacs g_rdf. V = (4 / 3) * np.pi * (np.power(edges[1:], 3) - np.power(edges[:-1], 3)) norm = len(pairs) * np.sum(1.0 / traj.unitcell_volumes) * V g_r = g_r.astype(np.float64) / norm # From int64. return r, g_r
def xyz(self, value): "Set the cartesian coordinates of each atom in each simulation frame" if self.top is not None: # if we have a topology and its not None shape = (None, self.topology._numAtoms, 3) else: shape = (None, None, 3) value = ensure_type(value, np.float32, 3, 'xyz', shape=shape, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) self._xyz = value self._rmsd_traces = None
def unitcell_lengths(self, value): """Set the lengths that define the shape of the unit cell in each frame Parameters ---------- value : np.ndarray, shape=(n_frames, 3) The distances ``a``, ``b``, and ``c`` that define the shape of the unit cell in each frame, or None """ self._unitcell_lengths = ensure_type(value, np.float32, 2, 'unitcell_lengths', can_be_none=True, shape=(len(self), 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True)
def unitcell_angles(self, value): """Set the lengths that define the shape of the unit cell in each frame Parameters ---------- value : np.ndarray, shape=(n_frames, 3) The angles ``alpha``, ``beta`` and ``gamma`` that define the shape of the unit cell in each frame. The angles should be in degrees. """ self._unitcell_angles = ensure_type(value, np.float32, 2, 'unitcell_angles', can_be_none=True, shape=(len(self), 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True)
def _compute_Q_tensor(all_directors): """Compute the Q-tensor for a set of directors. For each frame, Q_{ab} = 1/(2N) sum_{i_molecules} (3 * e_{ia} * e_{ib} - d_{ab}) [1] Parameters ---------- directors : np.ndarray, shape=(n_frames, n_compounds, 3), dtype=float64 An array of directors describing each compound's orientation over time. Returns ------- Q_ab : np.ndarray, shape=(traj.n_frames, 3, 3), dtype=float64 The Q-tensors describing the directors for each frame. See also -------- _compute_director References ---------- .. [1] Allen, M. P.; Tildesley , D. J. (1987), "Computer Simulation of Liquids", p. 305, Eq. 11.19 """ all_directors = ensure_type(all_directors, dtype=np.float64, ndim=3, name='directors', shape=(None, None, 3)) if NP18: normed = all_directors / np.linalg.norm(all_directors, axis=2)[..., np.newaxis] Q_ab = np.zeros(shape=(all_directors.shape[0], 3, 3), dtype=np.float64) for n, directors in enumerate(all_directors): if NP18: normed_vectors = normed[n] else: normed_vectors = directors / np.sqrt((directors ** 2.0).sum(-1))[..., np.newaxis] for vector in normed_vectors: Q_ab[n, 0, 0] += 3.0 * vector[0] * vector[0] - 1 Q_ab[n, 0, 1] += 3.0 * vector[0] * vector[1] Q_ab[n, 0, 2] += 3.0 * vector[0] * vector[2] Q_ab[n, 1, 0] += 3.0 * vector[1] * vector[0] Q_ab[n, 1, 1] += 3.0 * vector[1] * vector[1] - 1 Q_ab[n, 1, 2] += 3.0 * vector[1] * vector[2] Q_ab[n, 2, 0] += 3.0 * vector[2] * vector[0] Q_ab[n, 2, 1] += 3.0 * vector[2] * vector[1] Q_ab[n, 2, 2] += 3.0 * vector[2] * vector[2] - 1 Q_ab /= (2.0 * all_directors.shape[1]) return Q_ab
def compute_translation_and_rotation(mobile, target): """Returns the translation and rotation mapping mobile onto target. Parameters ---------- mobile : ndarray, shape = (n_atoms, 3) xyz coordinates of a `single` frame, to be aligned onto target. target : ndarray, shape = (n_atoms, 3) xyz coordinates of a `single` frame Returns ------- translation : ndarray, shape=(3,) Difference between the centroids of the two conformations rotation : ndarray, shape=(3,3) Rotation matrix to apply to mobile to carry out the transformation. """ ensure_type(mobile, 'float', 2, 'mobile', warn_on_cast=False, shape=(None, 3)) ensure_type(target, 'float', 2, 'target', warn_on_cast=False, shape=(target.shape[0], 3)) mu1 = mobile.mean(0) mu2 = target.mean(0) translation = mu2 mobile = mobile - mu1 target = target - mu2 correlation_matrix = np.dot(np.transpose(mobile), target) V, S, W_tr = np.linalg.svd(correlation_matrix) is_reflection = (np.linalg.det(V) * np.linalg.det(W_tr)) < 0.0 if is_reflection: V[:, -1] = -V[:, -1] rotation = np.dot(V, W_tr) return translation, rotation
def score(self, sequences): """Log-likelihood of sequences under the model Parameters ---------- sequences : list List of 2-dimensional array observation sequences, each of which has shape (n_samples_i, n_features), where n_samples_i is the length of the i_th observation. """ sequences = [ensure_type(s, dtype=np.float32, ndim=2, name='s') for s in sequences] self._impl._sequences = sequences logprob, _ = self._impl.do_estep() return logprob
def read(self, n_frames=None, stride=None, atom_indices=None): """Read one or more frames of data from the file Parameters ---------- n_frames : {int, None} The number of frames to read. If not supplied, all of the remaining frames will be read. stride : {int, None} By default all of the frames will be read, but you can pass this flag to read a subset of of the data by grabbing only every `stride`-th frame from disk. atom_indices : {int, None} By default all of the atom will be read, but you can pass this flag to read only a subsets of the atoms for the `coordinates` and `velocities` fields. Note that you will have to carefully manage the indices and the offsets, since the `i`-th atom in the topology will not necessarily correspond to the `i`-th atom in your subset. Returns ------- xyz : np.ndarray, shape=(n_frames, n_atoms, 3), dtype=np.float32 The cartesian coordinates, in nanometers """ _check_mode(self.mode, ('r')) if n_frames is None: n_frames = np.inf if stride is not None: stride = int(stride) if atom_indices is None: atom_slice = slice(None) else: atom_slice = ensure_type(atom_indices, dtype=np.int, ndim=1, name='atom_indices', warn_on_cast=False) total_n_frames = len(self._handle.root.XYZList) frame_slice = slice(self._frame_index, min( self._frame_index + n_frames, total_n_frames), stride) if frame_slice.stop - frame_slice.start == 0: return np.array([], dtype=np.float32) xyz = self._handle.root.XYZList.__getitem__((frame_slice, atom_slice)) if xyz.dtype == np.int16 or xyz.dtype == np.int32: xyz = _convert_from_lossy_integers(xyz) self._frame_index += (frame_slice.stop - frame_slice.start) return xyz
def squareform(distances, residue_pairs): """Reshape the contact distance to square contact maps Parameters ---------- distances : np.ndarray, shape=(n_frames, n_pairs) Distances between pairs of residues, as computed by `mdtraj.geometry.compute_contacts`. residue_pairs : np.ndarray, shape=(n_pairs, 2) The indices of the residues involved in each pair, as returned by `mdtraj.geometry.compute_contacts` Returns ------- contact_maps : np.ndarray, shape=(n_frames, n_residues, n_residues) Reshaped version of `distances`, such that the distance, in the `k`th frame of the trajectory from residue `i` to residue `j` is given by `contact_maps[k, i, j]`. All entries in `contact_maps` corresponding to the distance between residues that were not part of residue_pairs are 0.0. See Also -------- mdtraj.compute_contacts : Compute the array of contact distances """ if not isinstance(distances, np.ndarray) and distances.ndim == 2: raise ValueError('distances must be a 2d array') residue_pairs = ensure_type( residue_pairs, dtype=np.int, ndim=2, name='residue_pars', shape=(None, 2), warn_on_cast=False) if not np.all(residue_pairs >= 0): raise ValueError('residue_pairs references a residue that is not in ' 'the permitted range') if distances.shape[1] != residue_pairs.shape[0]: raise ValueError('The number of pairs in distances, %d, does not ' 'match the number of pairs in residue_pairs, %d.' % (distances.shape[1], residue_pairs.shape[0])) n_residues = np.max(residue_pairs) + 1 contact_maps = np.zeros((distances.shape[0], n_residues, n_residues), dtype=distances.dtype) contact_maps[:, residue_pairs[:, 0], residue_pairs[:, 1]] = distances contact_maps[:, residue_pairs[:, 1], residue_pairs[:, 0]] = distances return contact_maps
def write(self, coordinates, time=None, cell_lengths=None, cell_angles=None): """Write one frame of a MD trajectory to disk in the AMBER NetCDF restart file format. Parameters ---------- coordinates : np.ndarray, dtype=np.float32, shape=([1,] n_atoms, 3) The cartesian coordinates of each atom, in units of angstroms. Must be only a single frame (shape can be (1,N,3) or (N,3) where N is the number of atoms) time : array-like with 1 element or float, optional The time corresponding to this frame. If not specified, a place holder of 0 will be written cell_lengths : np.ndarray, dtype=np.double, shape=([1,] 3) The lengths (a,b,c) of the unit cell for the frame in Angstroms cell_angles : np.ndarray, dtype=np.double, shape=([1,] 3) The angles between the unit cell vectors for the frame in Degrees Notes ----- You must only have one frame to write to this file. """ if self._mode != 'w': raise IOError( 'The file was opened in mode=%s. Writing not allowed.' % self._mode) if not self._needs_initialization: # Must have already been written -- can only write once raise RuntimeError('NetCDF restart file has already been written ' '-- can only write one frame to restart files.') coordinates = in_units_of(coordinates, None, 'angstroms') time = in_units_of(time, None, 'picoseconds') cell_lengths = in_units_of(cell_lengths, None, 'angstroms') cell_angles = in_units_of(cell_angles, None, 'degrees') # typecheck all of the input arguments rigorously coordinates = ensure_type(coordinates, np.float32, 3, 'coordinates', length=None, can_be_none=False, shape=(1, None, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) n_frames, n_atoms = coordinates.shape[0], coordinates.shape[1] if n_frames != 1: raise ValueError('Can only write 1 frame to a restart file!') if time is not None: try: time = float(time) except TypeError: raise TypeError('Can only provide a single time') else: time = 0.0 cell_lengths = ensure_type(cell_lengths, np.float64, 2, 'cell_lengths', length=1, can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) cell_angles = ensure_type(cell_angles, np.float64, 2, 'cell_angles', length=1, can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) if ((cell_lengths is None and cell_angles is not None) or (cell_lengths is not None and cell_angles is None)): prov, negl = 'cell_lengths', 'cell_angles' if cell_lengths is None: prov, negl = negl, prov raise ValueError('You provided the variable "%s" but did not ' 'provide "%s". Either provide both or neither -- ' 'one without the other is meaningless.' % (prov, negl)) self._initialize_headers(n_atoms=n_atoms, set_coordinates=True, set_time=(time is not None), set_cell=(cell_lengths is not None)) self._needs_initialization = False # Write the time, coordinates, and box info if time is not None: self._handle.variables['time'][0] = float(time) self._handle.variables['coordinates'][:, :] = coordinates[0, :, :] if cell_lengths is not None: self._handle.variables['cell_angles'][:] = cell_angles[0, :] self._handle.variables['cell_lengths'][:] = cell_lengths[0, :] self.flush()
def shrake_rupley(traj, probe_radius=0.14, n_sphere_points=960, mode='atom', change_radii=None, get_mapping=False): """Compute the solvent accessible surface area of each atom or residue in each simulation frame. Parameters ---------- traj : Trajectory An mtraj trajectory. probe_radius : float, optional The radius of the probe, in nm. n_sphere_points : int, optional The number of points representing the surface of each atom, higher values leads to more accuracy. mode : {'atom', 'residue'} In mode == 'atom', the extracted areas are resolved per-atom In mode == 'residue', this is consolidated down to the per-residue SASA by summing over the atoms in each residue. change_radii : dict, optional A partial or complete dict containing the radii to change from the defaults. Should take the form {"Symbol" : radii_in_nm }, e.g. {"Cl" : 0.175 } to change the radii of Chlorine to 175 pm. get_mapping : bool, optional Instead of returning only the areas, also return the indices of the atoms or the residue-to-atom mapping. If True, will return a tuple that contains the areas and the mapping (np.array, shape=(n_atoms)). Returns ------- areas : np.array, shape=(n_frames, n_features) The accessible surface area of each atom or residue in every frame. If mode == 'atom', the second dimension will index the atoms in the trajectory, whereas if mode == 'residue', the second dimension will index the residues. Notes ----- This code implements the Shrake and Rupley algorithm, with the Golden Section Spiral algorithm to generate the sphere points. The basic idea is to great a mesh of points representing the surface of each atom (at a distance of the van der waals radius plus the probe radius from the nuclei), and then count the number of such mesh points that are on the molecular surface -- i.e. not within the radius of another atom. Assuming that the points are evenly distributed, the number of points is directly proportional to the accessible surface area (its just 4*pi*r^2 time the fraction of the points that are accessible). There are a number of different ways to generate the points on the sphere -- possibly the best way would be to do a little "molecular dyanmics" : put the points on the sphere, and then run MD where all the points repel one another and wait for them to get to an energy minimum. But that sounds expensive. This code uses the golden section spiral algorithm (picture at http://xsisupport.com/2012/02/25/evenly-distributing-points-on-a-sphere-with-the-golden-sectionspiral/) where you make this spiral that traces out the unit sphere and then put points down equidistant along the spiral. It's cheap, but not perfect. The gromacs utility g_sas uses a slightly different algorithm for generating points on the sphere, which is based on an icosahedral tesselation. roughly, the icosahedral tesselation works something like this http://www.ziyan.info/2008/11/sphere-tessellation-using-icosahedron.html References ---------- .. [1] Shrake, A; Rupley, JA. (1973) J Mol Biol 79 (2): 351--71. """ xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False) if mode == 'atom': dim1 = xyz.shape[1] atom_mapping = np.arange(dim1, dtype=np.int32) elif mode == 'residue': dim1 = traj.n_residues atom_mapping = np.array([a.residue.index for a in traj.top.atoms], dtype=np.int32) if not np.all( np.unique(atom_mapping) == np.arange(1 + np.max(atom_mapping))): raise ValueError('residues must have contiguous integer indices ' 'starting from zero') else: raise ValueError( 'mode must be one of "residue", "atom". "%s" supplied' % mode) modified_radii = {} if change_radii is not None: # in case _ATOMIC_RADII is in use elsehwere... modified_radii = deepcopy(_ATOMIC_RADII) # Now, modify the values specified in 'change_radii' for k, v in change_radii.items(): modified_radii[k] = v out = np.zeros((xyz.shape[0], dim1), dtype=np.float32) if bool(modified_radii): atom_radii = [ modified_radii[atom.element.symbol] for atom in traj.topology.atoms ] else: atom_radii = [ _ATOMIC_RADII[atom.element.symbol] for atom in traj.topology.atoms ] radii = np.array(atom_radii, np.float32) + probe_radius _geometry._sasa(xyz, radii, int(n_sphere_points), atom_mapping, out) if get_mapping == True: return out, atom_mapping else: return out
def read(self, frame_indices=None, atom_indices=None): _check_mode(self.mode, ('r', )) if frame_indices is None: frame_slice = slice(None) self._frame_index += frame_slice.stop - frame_slice.start else: frame_slice = ensure_type(frame_indices, dtype=np.int, ndim=1, name='frame_indices', warn_on_cast=False) if not np.all( frame_slice < self._handle.root.coordinates.shape[0]): raise ValueError( 'As a zero-based index, the entries in ' 'frame_slice must all be less than the number of frames ' 'in the trajectory, %d' % self._handle.root.coordinates.shape[0]) if not np.all(frame_slice >= 0): raise ValueError( 'The entries in frame_indices must be greater ' 'than or equal to zero') self._frame_index += frame_slice[-1] - frame_slice[0] if atom_indices is None: # get all of the atoms atom_slice = slice(None) else: atom_slice = ensure_type(atom_indices, dtype=np.int, ndim=1, name='atom_indices', warn_on_cast=False) if not np.all(atom_slice < self._handle.root.coordinates.shape[1]): raise ValueError( 'As a zero-based index, the entries in ' 'atom_indices must all be less than the number of atoms ' 'in the trajectory, %d' % self._handle.root.coordinates.shape[1]) if not np.all(atom_slice >= 0): raise ValueError('The entries in atom_indices must be greater ' 'than or equal to zero') def get_item(node, key): if not isinstance(key, tuple): return node.__getitem__(key) n_list_like = 0 new_keys = [] for item in key: if not isinstance(item, slice): try: d = np.diff(item) if len(d) == 0: item = item[0] elif np.all(d == d[0]): item = slice(item[0], item[-1] + d[0], d[0]) else: n_list_like += 1 except Exception: n_list_like += 1 new_keys.append(item) new_keys = tuple(new_keys) if n_list_like <= 1: return node.__getitem__(new_keys) data = node for i, item in enumerate(new_keys): dkey = [slice(None)] * len(key) dkey[i] = item dkey = tuple(dkey) data = data.__getitem__(dkey) return data def get_field(name, slice, out_units, can_be_none=True): try: node = self._get_node(where='/', name=name) data = get_item(node, slice) in_units = node.attrs.units if not isinstance(in_units, string_types): in_units = in_units.decode() data = in_units_of(data, in_units, out_units) return data except self.tables.NoSuchNodeError: if can_be_none: return None raise frames = Frames( coordinates=get_field('coordinates', (frame_slice, atom_slice, slice(None)), out_units='nanometers', can_be_none=False), time=get_field('time', frame_slice, out_units='picoseconds'), cell_lengths=get_field('cell_lengths', (frame_slice, slice(None)), out_units='nanometers'), cell_angles=get_field('cell_angles', (frame_slice, slice(None)), out_units='degrees'), velocities=get_field('velocities', (frame_slice, atom_slice, slice(None)), out_units='nanometers/picosecond'), kineticEnergy=get_field('kineticEnergy', frame_slice, out_units='kilojoules_per_mole'), potentialEnergy=get_field('potentialEnergy', frame_slice, out_units='kilojoules_per_mole'), temperature=get_field('temperature', frame_slice, out_units='kelvin'), alchemicalLambda=get_field('lambda', frame_slice, out_units='dimensionless'), ) return frames
def write(self, xyz, cell_lengths=None): """Write one or more frames of data to a mdcrd file Parameters ---------- xyz : np.ndarray, shape=(n_frames, n_atoms, 3) The cartesian coordinates of the atoms to write. By convention, the lengths should be in units of angstroms. cell_lengths : np.ndarray, shape=(n_frames, 3), dtype=float32, optional The length of the periodic box in each frame, in each direction, `a`, `b`, `c`. By convention the lengths should be in units of angstroms. """ if not self._mode == 'w': raise ValueError('write() is only available when file is opened ' 'in mode="w"') xyz = ensure_type(xyz, np.float32, 3, 'xyz', can_be_none=False, shape=(None, None, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) cell_lengths = ensure_type(cell_lengths, np.float32, 2, 'cell_lengths', can_be_none=True, shape=(len(xyz), 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) if self._w_has_box is None: # this is the first write() self._n_atoms = xyz.shape[1] self._fh.write('TITLE : Created by MDTraj with %d atoms\n' % self._n_atoms) if cell_lengths is None: self._w_has_box = False else: self._w_has_box = True elif self._w_has_box is True: if cell_lengths is None: raise ValueError('This mdcrd file must contain unitcell ' 'information') elif self._w_has_box is False: if cell_lengths is not None: raise ValueError('This mdcrd file must not contain unitcell ' 'information') else: raise RuntimeError() for i in range(xyz.shape[0]): for j, coord in enumerate(xyz[i].reshape(-1)): lfdone = False out = "%8.3f" % coord if len(out) > 8: raise ValueError('Overflow error') self._fh.write(out) if (j + 1) % 10 == 0: self._fh.write("\n") lfdone = True if not lfdone: self._fh.write("\n") if cell_lengths is not None: self._fh.write("%8.3f %8.3f %8.3f\n" % tuple(cell_lengths[i]))
def read(self, n_frames=None, stride=None, atom_indices=None): """Read one or more frames of data from the file Parameters ---------- n_frames : {int, None} The number of frames to read. If not supplied, all of the remaining frames will be read. stride : {int, None} By default all of the frames will be read, but you can pass this flag to read a subset of of the data by grabbing only every `stride`-th frame from disk. atom_indices : {int, None} By default all of the atom will be read, but you can pass this flag to read only a subsets of the atoms for the `coordinates` and `velocities` fields. Note that you will have to carefully manage the indices and the offsets, since the `i`-th atom in the topology will not necessarily correspond to the `i`-th atom in your subset. Notes ----- If you'd like more flexible access to the data, that is available by using the pytables group directly, which is accessible via the `root` property on this class. Returns ------- frames : namedtuple The returned namedtuple will have the fields "coordinates", "time", "cell_lengths", "cell_angles", "velocities", "kineticEnergy", "potentialEnergy", "temperature" and "alchemicalLambda". Each of the fields in the returned namedtuple will either be a numpy array or None, dependening on if that data was saved in the trajectory. All of the data shall be n units of "nanometers", "picoseconds", "kelvin", "degrees" and "kilojoules_per_mole". """ _check_mode(self.mode, ('r',)) if n_frames is None: n_frames = np.inf if stride is not None: stride = int(stride) total_n_frames = len(self._handle.root.coordinates) frame_slice = slice(self._frame_index, min(self._frame_index + n_frames, total_n_frames), stride) if frame_slice.stop - frame_slice.start == 0: return [] if atom_indices is None: # get all of the atoms atom_slice = slice(None) else: atom_slice = ensure_type(atom_indices, dtype=np.int, ndim=1, name='atom_indices', warn_on_cast=False) if not np.all(atom_slice < self._handle.root.coordinates.shape[1]): raise ValueError('As a zero-based index, the entries in ' 'atom_indices must all be less than the number of atoms ' 'in the trajectory, %d' % self._handle.root.coordinates.shape[1]) if not np.all(atom_slice >= 0): raise ValueError('The entries in atom_indices must be greater ' 'than or equal to zero') def get_field(name, slice, out_units, can_be_none=True): try: node = self._get_node(where='/', name=name) data = node.__getitem__(slice) in_units = node.attrs.units if not isinstance(in_units, string_types): in_units = in_units.decode() data = in_units_of(data, in_units, out_units) return data except self.tables.NoSuchNodeError: if can_be_none: return None raise frames = Frames( coordinates = get_field('coordinates', (frame_slice, atom_slice, slice(None)), out_units='nanometers', can_be_none=False), time = get_field('time', frame_slice, out_units='picoseconds'), cell_lengths = get_field('cell_lengths', (frame_slice, slice(None)), out_units='nanometers'), cell_angles = get_field('cell_angles', (frame_slice, slice(None)), out_units='degrees'), velocities = get_field('velocities', (frame_slice, atom_slice, slice(None)), out_units='nanometers/picosecond'), kineticEnergy = get_field('kineticEnergy', frame_slice, out_units='kilojoules_per_mole'), potentialEnergy = get_field('potentialEnergy', frame_slice, out_units='kilojoules_per_mole'), temperature = get_field('temperature', frame_slice, out_units='kelvin'), alchemicalLambda = get_field('lambda', frame_slice, out_units='dimensionless') ) self._frame_index += (frame_slice.stop - frame_slice.start) return frames
def write(self, xyz, cell_lengths, cell_angles=None, types=None, unit_set='real'): """Write one or more frames of data to a lammpstrj file. Parameters ---------- xyz : np.ndarray, shape=(n_frames, n_atoms, 3) The cartesian coordinates of the atoms to write. By convention, the lengths should be in units of angstroms. cell_lengths : np.ndarray, dtype=np.double, shape=(n_frames, 3) The lengths (a,b,c) of the unit cell for each frame. By convention, the lengths should be in units of angstroms. cell_angles : np.ndarray, dtype=np.double, shape=(n_frames, 3) The angles (\alpha, \beta, \gamma) defining the unit cell for each frame. (Units of degrees). types : np.ndarray, shape(3, ), dtype=int The numeric type of each particle. unit_set : str, optional The LAMMPS unit set that the simulation was performed in. See http://lammps.sandia.gov/doc/units.html for options. Currently supported unit sets: 'real'. """ if not self._mode == 'w': raise ValueError('write() is only available when file is opened ' 'in mode="w"') xyz = ensure_type(xyz, np.float32, 3, 'xyz', can_be_none=False, shape=(None, None, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) cell_lengths = ensure_type(cell_lengths, np.float32, 2, 'cell_lengths', can_be_none=False, shape=(len(xyz), 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) if cell_angles is None: cell_angles = np.empty_like(cell_lengths) cell_angles.fill(90) cell_angles = ensure_type(cell_angles, np.float32, 2, 'cell_angles', can_be_none=False, shape=(len(xyz), 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) if not types: # Make all particles the same type. types = np.ones(shape=(xyz.shape[1])) types = ensure_type(types, np.int, 1, 'types', can_be_none=True, shape=(xyz.shape[1], ), warn_on_cast=False, add_newaxis_on_deficient_ndim=False) # TODO: Support other unit sets. if unit_set == 'real': self.distance_unit == 'angstroms' else: raise ValueError( 'Unsupported unit set specified: {0}.'.format(unit_set)) for i in range(xyz.shape[0]): # --- begin header --- self._fh.write('ITEM: TIMESTEP\n') self._fh.write( '{0}\n'.format(i)) # TODO: Write actual time if known. self._fh.write('ITEM: NUMBER OF ATOMS\n') self._fh.write('{0}\n'.format(xyz.shape[1])) self.write_box(cell_lengths[i], cell_angles[i], xyz[i].min(axis=0)) # --- end header --- # --- begin body --- self._fh.write('ITEM: ATOMS id type xu yu zu\n') for j, coord in enumerate(xyz[i]): self._fh.write( '{0:d} {1:d} {2:8.3f} {3:8.3f} {4:8.3f}\n'.format( j + 1, types[j], coord[0], coord[1], coord[2]))
def compute_contacts(traj, contacts='all', scheme='closest-heavy', ignore_nonprotein=True, periodic=True, soft_min=False, soft_min_beta=20): """Compute the distance between pairs of residues in a trajectory. Parameters ---------- traj : md.Trajectory An mdtraj trajectory. It must contain topology information. contacts : array-like, ndim=2 or 'all' An array containing pairs of indices (0-indexed) of residues to compute the contacts between, or 'all'. The string 'all' will select all pairs of residues separated by two or more residues (i.e. the i to i+1 and i to i+2 pairs will be excluded). scheme : {'ca', 'closest', 'closest-heavy', 'sidechain', 'sidechain-heavy'} scheme to determine the distance between two residues: 'ca' : distance between two residues is given by the distance between their alpha carbons 'closest' : distance is the closest distance between any two atoms in the residues 'closest-heavy' : distance is the closest distance between any two non-hydrogen atoms in the residues 'sidechain' : distance is the closest distance between any two atoms in residue sidechains 'sidechain-heavy' : distance is the closest distance between any two non-hydrogen atoms in residue sidechains ignore_nonprotein : bool When using `contact==all`, don't compute contacts between "residues" which are not protein (i.e. do not contain an alpha carbon). periodic : bool, default=True If periodic is True and the trajectory contains unitcell information, we will compute distances under the minimum image convention. soft_min : bool, default=False If soft_min is true, we will use a diffrentiable version of the scheme. The exact expression used is d = \frac{\beta}{log\sum_i{exp(\frac{\beta}{d_i}})} where beta is user parameter which defaults to 20nm. The expression we use is copied from the plumed mindist calculator. http://plumed.github.io/doc-v2.0/user-doc/html/mindist.html soft_min_beta : float, default=20nm The value of beta to use for the soft_min distance option. Very large values might cause small contact distances to go to 0. Returns ------- distances : np.ndarray, shape=(n_frames, n_pairs), dtype=np.float32 Distances for each residue-residue contact in each frame of the trajectory residue_pairs : np.ndarray, shape=(n_pairs, 2), dtype=int Each row of this return value gives the indices of the residues involved in the contact. This argument mirrors the `contacts` input parameter. When `all` is specified as input, this return value gives the actual residue pairs resolved from `all`. Furthermore, when scheme=='ca', any contact pair supplied as input corresponding to a residue without an alpha carbon (e.g. HOH) is ignored from the input contacts list, meanings that the indexing of the output `distances` may not match up with the indexing of the input `contacts`. But the indexing of `distance` *will* match up with the indexing of `residue_pairs` Examples -------- >>> # To compute the contact distance between residue 0 and 10 and >>> # residues 0 and 11 >>> md.compute_contacts(t, [[0, 10], [0, 11]]) >>> # the itertools library can be useful to generate the arrays of indices >>> group_1 = [0, 1, 2] >>> group_2 = [10, 11] >>> pairs = list(itertools.product(group_1, group_2)) >>> print(pairs) [(0, 10), (0, 11), (1, 10), (1, 11), (2, 10), (2, 11)] >>> md.compute_contacts(t, pairs) See Also -------- mdtraj.geometry.squareform : turn the result from this function into a square "contact map" Topology.residue : Get residues from the topology by index """ if traj.topology is None: raise ValueError('contact calculation requires a topology') if isinstance(contacts, string_types): if contacts.lower() != 'all': raise ValueError('(%s) is not a valid contacts specifier' % contacts.lower()) residue_pairs = [] for i in xrange(traj.n_residues): residue_i = traj.topology.residue(i) if ignore_nonprotein and not any( a for a in residue_i.atoms if a.name.lower() == 'ca'): continue for j in xrange(i + 3, traj.n_residues): residue_j = traj.topology.residue(j) if ignore_nonprotein and not any( a for a in residue_j.atoms if a.name.lower() == 'ca'): continue if residue_i.chain == residue_j.chain: residue_pairs.append((i, j)) residue_pairs = np.array(residue_pairs) if len(residue_pairs) == 0: raise ValueError('No acceptable residue pairs found') else: residue_pairs = ensure_type(np.asarray(contacts), dtype=np.int, ndim=2, name='contacts', shape=(None, 2), warn_on_cast=False) if not np.all( (residue_pairs >= 0) * (residue_pairs < traj.n_residues)): raise ValueError( 'contacts requests a residue that is not in the permitted range' ) # now the bulk of the function. This will calculate atom distances and then # re-work them in the required scheme to get residue distances scheme = scheme.lower() if scheme not in [ 'ca', 'closest', 'closest-heavy', 'sidechain', 'sidechain-heavy' ]: raise ValueError( 'scheme must be one of [ca, closest, closest-heavy, sidechain, sidechain-heavy]' ) if scheme == 'ca': if soft_min: import warnings warnings.warn("The soft_min=True option with scheme=ca gives" "the same results as soft_min=False") filtered_residue_pairs = [] atom_pairs = [] for r0, r1 in residue_pairs: ca_atoms_0 = [ a.index for a in traj.top.residue(r0).atoms if a.name.lower() == 'ca' ] ca_atoms_1 = [ a.index for a in traj.top.residue(r1).atoms if a.name.lower() == 'ca' ] if len(ca_atoms_0) == 1 and len(ca_atoms_1) == 1: atom_pairs.append((ca_atoms_0[0], ca_atoms_1[0])) filtered_residue_pairs.append((r0, r1)) elif len(ca_atoms_0) == 0 or len(ca_atoms_1) == 0: # residue does not contain a CA atom, skip it if contacts != 'all': # if the user manually asked for this residue, and didn't use "all" import warnings warnings.warn( 'Ignoring contacts pair %d-%d. No alpha carbon.' % (r0, r1)) else: raise ValueError( 'More than 1 alpha carbon detected in residue %d or %d' % (r0, r1)) residue_pairs = np.array(filtered_residue_pairs) distances = md.compute_distances(traj, atom_pairs, periodic=periodic) elif scheme in [ 'closest', 'closest-heavy', 'sidechain', 'sidechain-heavy' ]: if scheme == 'closest': residue_membership = [[atom.index for atom in residue.atoms] for residue in traj.topology.residues] elif scheme == 'closest-heavy': # then remove the hydrogens from the above list residue_membership = [[ atom.index for atom in residue.atoms if not (atom.element == element.hydrogen) ] for residue in traj.topology.residues] elif scheme == 'sidechain': residue_membership = [[ atom.index for atom in residue.atoms if atom.is_sidechain ] for residue in traj.topology.residues] elif scheme == 'sidechain-heavy': # then remove the hydrogens from the above list residue_membership = [[ atom.index for atom in residue.atoms if atom.is_sidechain and not (atom.element == element.hydrogen) ] for residue in traj.topology.residues] residue_lens = [len(ainds) for ainds in residue_membership] atom_pairs = [] n_atom_pairs_per_residue_pair = [] for pair in residue_pairs: atom_pairs.extend( list( itertools.product(residue_membership[pair[0]], residue_membership[pair[1]]))) n_atom_pairs_per_residue_pair.append(residue_lens[pair[0]] * residue_lens[pair[1]]) atom_distances = md.compute_distances(traj, atom_pairs, periodic=periodic) # now squash the results based on residue membership n_residue_pairs = len(residue_pairs) distances = np.zeros((len(traj), n_residue_pairs), dtype=np.float32) n_atom_pairs_per_residue_pair = np.asarray( n_atom_pairs_per_residue_pair) for i in xrange(n_residue_pairs): index = int(np.sum(n_atom_pairs_per_residue_pair[:i])) n = n_atom_pairs_per_residue_pair[i] if not soft_min: distances[:, i] = atom_distances[:, index:index + n].min(axis=1) else: distances[:, i] = soft_min_beta / \ np.log(np.sum(np.exp(soft_min_beta/ atom_distances[:, index : index + n]), axis=1)) else: raise ValueError('This is not supposed to happen!') return distances, residue_pairs
def write(self, coordinates, time=None, cell_lengths=None, cell_angles=None, velocities=None, kineticEnergy=None, potentialEnergy=None, temperature=None, alchemicalLambda=None): """Write one or more frames of data to the file This method saves data that is associated with one or more simulation frames. Note that all of the arguments can either be raw numpy arrays or unitted arrays (with simtk.unit.Quantity). If the arrays are unittted, a unit conversion will be automatically done from the supplied units into the proper units for saving on disk. You won't have to worry about it. Furthermore, if you wish to save a single frame of simulation data, you can do so naturally, for instance by supplying a 2d array for the coordinates and a single float for the time. This "shape deficiency" will be recognized, and handled appropriately. Parameters ---------- coordinates : np.ndarray, shape=(n_frames, n_atoms, 3) The cartesian coordinates of the atoms to write. By convention, the lengths should be in units of nanometers. time : np.ndarray, shape=(n_frames,), optional You may optionally specify the simulation time, in picoseconds corresponding to each frame. cell_lengths : np.ndarray, shape=(n_frames, 3), dtype=float32, optional You may optionally specify the unitcell lengths. The length of the periodic box in each frame, in each direction, `a`, `b`, `c`. By convention the lengths should be in units of angstroms. cell_angles : np.ndarray, shape=(n_frames, 3), dtype=float32, optional You may optionally specify the unitcell angles in each frame. Organized analogously to cell_lengths. Gives the alpha, beta and gamma angles respectively. By convention, the angles should be in units of degrees. velocities : np.ndarray, shape=(n_frames, n_atoms, 3), optional You may optionally specify the cartesian components of the velocity for each atom in each frame. By convention, the velocities should be in units of nanometers / picosecond. kineticEnergy : np.ndarray, shape=(n_frames,), optional You may optionally specify the kinetic energy in each frame. By convention the kinetic energies should b in units of kilojoules per mole. potentialEnergy : np.ndarray, shape=(n_frames,), optional You may optionally specify the potential energy in each frame. By convention the kinetic energies should b in units of kilojoules per mole. temperature : np.ndarray, shape=(n_frames,), optional You may optionally specify the temperature in each frame. By convention the temperatures should b in units of Kelvin. alchemicalLambda : np.ndarray, shape=(n_frames,), optional You may optionally specify the alchemical lambda in each frame. These have no units, but are generally between zero and one. """ _check_mode(self.mode, ('w', 'a')) # these must be either both present or both absent. since # we're going to throw an error if one is present w/o the other, # lets do it now. if cell_lengths is None and cell_angles is not None: raise ValueError('cell_lengths were given, but no cell_angles') if cell_lengths is not None and cell_angles is None: raise ValueError('cell_angles were given, but no cell_lengths') # if the input arrays are simtk.unit.Quantities, convert them # into md units. Note that this acts as a no-op if the user doesn't # have simtk.unit installed (e.g. they didn't install OpenMM) coordinates = in_units_of(coordinates, None, 'nanometers') time = in_units_of(time, None, 'picoseconds') cell_lengths = in_units_of(cell_lengths, None, 'nanometers') cell_angles = in_units_of(cell_angles, None, 'degrees') velocities = in_units_of(velocities, None, 'nanometers/picosecond') kineticEnergy = in_units_of(kineticEnergy, None, 'kilojoules_per_mole') potentialEnergy = in_units_of(potentialEnergy, None, 'kilojoules_per_mole') temperature = in_units_of(temperature, None, 'kelvin') alchemicalLambda = in_units_of(alchemicalLambda, None, 'dimensionless') # do typechecking and shapechecking on the arrays # this ensure_type method has a lot of options, but basically it lets # us validate most aspects of the array. Also, we can upconvert # on defficent ndim, which means that if the user sends in a single # frame of data (i.e. coordinates is shape=(n_atoms, 3)), we can # realize that. obviously the default mode is that they want to # write multiple frames at a time, so the coordinate shape is # (n_frames, n_atoms, 3) coordinates = ensure_type(coordinates, dtype=np.float32, ndim=3, name='coordinates', shape=(None, None, 3), can_be_none=False, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) n_frames, n_atoms, = coordinates.shape[0:2] time = ensure_type(time, dtype=np.float32, ndim=1, name='time', shape=(n_frames,), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) cell_lengths = ensure_type(cell_lengths, dtype=np.float32, ndim=2, name='cell_lengths', shape=(n_frames, 3), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) cell_angles = ensure_type(cell_angles, dtype=np.float32, ndim=2, name='cell_angles', shape=(n_frames, 3), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) velocities = ensure_type(velocities, dtype=np.float32, ndim=3, name='velocities', shape=(n_frames, n_atoms, 3), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) kineticEnergy = ensure_type(kineticEnergy, dtype=np.float32, ndim=1, name='kineticEnergy', shape=(n_frames,), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) potentialEnergy = ensure_type(potentialEnergy, dtype=np.float32, ndim=1, name='potentialEnergy', shape=(n_frames,), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) temperature = ensure_type(temperature, dtype=np.float32, ndim=1, name='temperature', shape=(n_frames,), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) alchemicalLambda = ensure_type(alchemicalLambda, dtype=np.float32, ndim=1, name='alchemicalLambda', shape=(n_frames,), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) # if this is our first call to write(), we need to create the headers # and the arrays in the underlying HDF5 file if self._needs_initialization: self._initialize_headers( n_atoms=n_atoms, set_coordinates=True, set_time=(time is not None), set_cell=(cell_lengths is not None or cell_angles is not None), set_velocities=(velocities is not None), set_kineticEnergy=(kineticEnergy is not None), set_potentialEnergy=(potentialEnergy is not None), set_temperature=(temperature is not None), set_alchemicalLambda=(alchemicalLambda is not None)) self._needs_initialization = False # we need to check that that the entries that the user is trying # to save are actually fields in OUR file try: # try to get the nodes for all of the fields that we have # which are not None for name in ['coordinates', 'time', 'cell_angles', 'cell_lengths', 'velocities', 'kineticEnergy', 'potentialEnergy', 'temperature']: contents = locals()[name] if contents is not None: self._get_node(where='/', name=name).append(contents) if contents is None: # for each attribute that they're not saving, we want # to make sure the file doesn't explect it try: self._get_node(where='/', name=name) raise AssertionError() except self.tables.NoSuchNodeError: pass # lambda is different, since the name in the file is lambda # but the name in this python function is alchemicalLambda name = 'lambda' if alchemicalLambda is not None: self._get_node(where='/', name=name).append(alchemicalLambda) else: try: self._get_node(where='/', name=name) raise AssertionError() except self.tables.NoSuchNodeError: pass except self.tables.NoSuchNodeError: raise ValueError("The file that you're trying to save to doesn't " "contain the field %s. You can always save a new trajectory " "and have it contain this information, but I don't allow 'ragged' " "arrays. If one frame is going to have %s information, then I expect " "all of them to. So I can't save it for just these frames. Sorry " "about that :)" % (name, name)) except AssertionError: raise ValueError("The file that you're saving to expects each frame " "to contain %s information, but you did not supply it." "I don't allow 'ragged' arrays. If one frame is going " "to have %s information, then I expect all of them to. " % (name, name)) self._frame_index += n_frames self.flush()
def rmsd_qcp(conformation1, conformation2): """Compute the RMSD with Theobald's quaterion-based characteristic polynomial Rapid calculation of RMSDs using a quaternion-based characteristic polynomial. Acta Crystallogr A 61(4):478-480. Parameters ---------- conformation1 : np.ndarray, shape=(n_atoms, 3) The cartesian coordinates of the first conformation conformation2 : np.ndarray, shape=(n_atoms, 3) The cartesian coordinates of the second conformation Returns ------- rmsd : float The root-mean square deviation after alignment between the two pointsets """ ensure_type(conformation1, np.float32, 2, 'conformation1', warn_on_cast=False, shape=(None, 3)) ensure_type(conformation2, np.float32, 2, 'conformation2', warn_on_cast=False, shape=(conformation1.shape[0], 3)) A = _center(conformation1) B = _center(conformation2) if not A.shape[0] == B.shape[0]: raise ValueError( 'conformation1 and conformation2 must have same number of atoms') n_atoms = len(A) # the inner product of the structures A and B G_A = np.einsum('ij,ij', A, A) G_B = np.einsum('ij,ij', B, B) # print 'GA', G_A, np.trace(np.dot(A.T, A)) # print 'GB', G_B, np.trace(np.dot(B.T, B)) # M is the inner product of the matrices A and B M = np.dot(B.T, A) # unpack the elements Sxx, Sxy, Sxz = M[0, :] Syx, Syy, Syz = M[1, :] Szx, Szy, Szz = M[2, :] # do some intermediate computations to assemble the characteristic # polynomial Sxx2 = Sxx * Sxx Syy2 = Syy * Syy Szz2 = Szz * Szz Sxy2 = Sxy * Sxy Syz2 = Syz * Syz Sxz2 = Sxz * Sxz Syx2 = Syx * Syx Szy2 = Szy * Szy Szx2 = Szx * Szx SyzSzymSyySzz2 = 2.0 * (Syz * Szy - Syy * Szz) Sxx2Syy2Szz2Syz2Szy2 = Syy2 + Szz2 - Sxx2 + Syz2 + Szy2 # two of the coefficients C2 = -2.0 * (Sxx2 + Syy2 + Szz2 + Sxy2 + Syx2 + Sxz2 + Szx2 + Syz2 + Szy2) C1 = 8.0 * (Sxx * Syz * Szy + Syy * Szx * Sxz + Szz * Sxy * Syx - Sxx * Syy * Szz - Syz * Szx * Sxy - Szy * Syx * Sxz) SxzpSzx = Sxz + Szx SyzpSzy = Syz + Szy SxypSyx = Sxy + Syx SyzmSzy = Syz - Szy SxzmSzx = Sxz - Szx SxymSyx = Sxy - Syx SxxpSyy = Sxx + Syy SxxmSyy = Sxx - Syy Sxy2Sxz2Syx2Szx2 = Sxy2 + Sxz2 - Syx2 - Szx2 # the other coefficient C0 = Sxy2Sxz2Syx2Szx2 * Sxy2Sxz2Syx2Szx2 \ + (Sxx2Syy2Szz2Syz2Szy2 + SyzSzymSyySzz2) * (Sxx2Syy2Szz2Syz2Szy2 - SyzSzymSyySzz2) \ + (-(SxzpSzx) * (SyzmSzy) + (SxymSyx) * (SxxmSyy - Szz)) * (-(SxzmSzx) * (SyzpSzy) + (SxymSyx) * (SxxmSyy + Szz)) \ + (-(SxzpSzx) * (SyzpSzy) - (SxypSyx) * (SxxpSyy - Szz)) * (-(SxzmSzx) * (SyzmSzy) - (SxypSyx) * (SxxpSyy + Szz)) \ + (+(SxypSyx) * (SyzpSzy) + (SxzpSzx) * (SxxmSyy + Szz)) * (-(SxymSyx) * (SyzmSzy) + (SxzpSzx) * (SxxpSyy + Szz)) \ + (+(SxypSyx) * (SyzmSzy) + (SxzmSzx) * (SxxmSyy - Szz)) * (-(SxymSyx) * (SyzpSzy) + (SxzmSzx) * (SxxpSyy - Szz)) E0 = (G_A + G_B) / 2.0 f = lambda x: x**4.0 + C2 * x**2. + C1 * x + C0 df = lambda x: 4 * x**3.0 + 2 * C2 * x + C1 max_eigenvalue = scipy.optimize.newton(f, E0, df) rmsd = np.sqrt(np.abs(2.0 * (E0 - max_eigenvalue) / n_atoms)) return rmsd
def discrete_approx_mvn(X, means, covars, match_variances=True): """Find a discrete approximation to a multivariate normal distribution. The method employs find the discrete distribution with support only at the supplied points X with minimal K-L divergence to a target multivariate normal distribution under the constraints that the mean and variance of the discrete distribution match the normal distribution exactly. Parameters ---------- X : np.ndarray, shape=(n_points, n_features) The allowable points means : np.ndarray, shape=(n_features) The mean vector of the MVN covars : np.ndarray, shape=(n_features, n_features) or shape=(n_features,) If covars is 2D, it's interpreted as the covariance matrix for the model. If 1D, we assume a diagonal covariance matrix with the specified diagonal entries. match_variances : bool, optimal When True, both the means and the variances of the discrete distribution are constrained. Under some circumstances, this is not satisfiable (e.g. if there aren't enough samples Returns ------- weights : np.ndarray, shape=(n_samples,) The weight for each of the points in X in the resulting discrete probability distribution Notes ----- The discrete distribution is one that has mass only at the specified points. It can therefore be parameterized by a set of weights on each point. If :math:`\{X_i\}` is the set of allowable points, and :math:`\{w_i\}` are the weights, then our discrete distribution has the form .. math:: p(y; w) = w_i \sum \delta(y - X_i). We chose the :math:`w_i` by minimizing the K-L divergence from the our discrete distribution to the desired multivariate normal subject to a constraint that the first moments of the discrete distribution match the mean of the multivariate normal exactly, and that the variances also match. Let :math:`q(x)` be the target distribution. The optimal weights are then .. math:: min_{\{w_i\}} \sum_i p(X_i; w) \log \frac{p(X_i; w)}{q(X_i)} subject to .. math:: \sum_i (X_i) p(X_i; w) = \int_\Omega (x) q(x) = \mu, \sum_i (X_i-mu)**2 p(X_i; w) = \int_\Omega (x-mu) q(x). References ---------- .. [1] Tanaka, Ken'ichiro, and Alexis Akira Toda. "Discrete approximations of continuous distributions by maximum entropy." Economics Letters 118.3 (2013): 445-450. """ X = ensure_type(np.asarray(X), dtype=np.float32, ndim=2, name='X', warn_on_cast=False) means = ensure_type(np.asarray(means), np.float64, ndim=1, name='means', warn_on_cast=False) covars = np.asarray(covars) # Get the un-normalized probability of each point X_i in the MVN # `prob` are the q(X_i) in the mathematics # `moments` are the \bar{T} that we want to match. if covars.ndim == 1: # diagonal covariance case if not len(covars) == len(means): raise ValueError( 'Shape Error: covars and means musth have the same length') prob = np.exp(-0.5 * np.sum(1. / np.sqrt(covars) * (X - means)**2, axis=1)) moments = np.concatenate((means, covars)) if match_variances else means elif covars.ndim == 2: if not (covars.shape[0] == len(means) and covars.shape[1] == len(means)): raise ValueError( 'Shape Error: covars must be square, with size = len(means)') # full 2d covariance matrix cv_chol = scipy.linalg.cholesky(covars, lower=True) cv_sol = scipy.linalg.solve_triangular(cv_chol, (X - means).T, lower=True).T prob = np.exp(-0.5 * (np.sum(cv_sol**2, axis=1))) moments = np.concatenate( (means, np.diag(covars))) if match_variances else means else: raise ValueError('covars must be 1D or 2D') # this is T(x_i) for each X_i moment_contributions = np.hstack( (X, (X - means)**2)) if match_variances else X def objective_and_grad(l): dot = np.dot(moment_contributions, l) lse = scipy.misc.logsumexp(dot, b=prob) # value of the objective function obj_value = lse - np.dot(l, moments) # gradient of objective function dot_max = dot.max(axis=0) log_numerator = np.log( np.sum(moment_contributions * (prob * np.exp(dot - dot_max)).reshape(-1, 1), axis=0)) + dot_max grad_value = np.exp(log_numerator - lse) - moments return obj_value, grad_value result = scipy.optimize.minimize(objective_and_grad, jac=True, x0=np.ones_like(moments), method='BFGS') if not result['success']: raise NotSatisfiableError() dot = np.dot(moment_contributions, result['x']) log_denominator = scipy.misc.logsumexp(dot, b=prob) weights = prob * np.exp(dot - log_denominator) if not np.all(np.isfinite(weights)): raise NotSatisfiableError() weights = weights / np.sum(weights) return weights
def write(self, coordinates, time=None, cell_lengths=None, cell_angles=None): """Write one or more frames of a molecular dynamics trajectory to disk in the AMBER NetCDF format. Parameters ---------- coordinates : np.ndarray, dtype=np.float32, shape=(n_frames, n_atoms, 3) The cartesian coordinates of each atom, in units of angstroms. time : np.ndarray, dtype=np.float32, shape=(n_frames), optional The time index corresponding to each frame, in units of picoseconds. cell_lengths : np.ndarray, dtype=np.double, shape=(n_frames, 3) The lengths (a,b,c) of the unit cell for each frame. cell_angles : np.ndarray, dtype=np.double, shape=(n_frames, 3) The angles (\alpha, \beta, \gamma) defining the unit cell for each frame. Notes ----- If the input arrays are of dimension deficient by one, for example if the coordinates array is two dimensional, the time is a single scalar or cell_lengths and cell_angles are a 1d array of length three, that is okay. You'll simply be saving a single frame. """ self._validate_open() if self._mode not in ['w', 'ws', 'a', 'as']: raise IOError( 'The file was opened in mode=%s. Writing is not allowed.' % self._mode) coordinates = in_units_of(coordinates, None, 'angstroms') time = in_units_of(time, None, 'picoseconds') cell_lengths = in_units_of(cell_lengths, None, 'angstroms') cell_angles = in_units_of(cell_angles, None, 'degrees') # typecheck all of the input arguments rigorously coordinates = ensure_type(coordinates, np.float32, 3, 'coordinates', length=None, can_be_none=False, shape=(None, None, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) n_frames, n_atoms = coordinates.shape[0], coordinates.shape[1] time = ensure_type(time, np.float32, 1, 'time', length=n_frames, can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) cell_lengths = ensure_type(cell_lengths, np.float64, 2, 'cell_lengths', length=n_frames, can_be_none=True, shape=(n_frames, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) cell_angles = ensure_type(cell_angles, np.float64, 2, 'cell_angles', length=n_frames, can_be_none=True, shape=(n_frames, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) # are we dealing with a periodic system? if (cell_lengths is None and cell_angles is not None) or (cell_lengths is not None and cell_angles is None): provided, neglected = 'cell_lengths', 'cell_angles' if cell_lengths is None: provided, neglected = neglected, provided raise ValueError( 'You provided the variable "%s", but neglected to ' 'provide "%s". They either BOTH must be provided, or ' 'neither. Having one without the other is meaningless' % (provided, neglected)) if self._needs_initialization: self._initialize_headers(n_atoms=n_atoms, set_coordinates=True, set_time=(time is not None), set_cell=(cell_lengths is not None and cell_angles is not None)) self._needs_initialization = False # this slice object says where we're going to put the data in the # arrays frame_slice = slice(self._frame_index, self._frame_index + n_frames) # deposit the data try: self._handle.variables['coordinates'][ frame_slice, :, :] = coordinates if time is not None: self._handle.variables['time'][frame_slice] = time if cell_lengths is not None: self._handle.variables['cell_lengths'][ frame_slice, :] = cell_lengths if cell_angles is not None: self._handle.variables['cell_angles'][ frame_slice, :] = cell_angles except KeyError as e: raise ValueError("The file that you're trying to save to doesn't " "contain the field %s." % str(e)) # check for missing attributes missing = None if (time is None and 'time' in self._handle.variables): missing = 'time' elif (cell_angles is None and 'cell_angles' in self._handle.variables): missing = 'cell_angles' elif (cell_lengths is None and 'cell_lengths' in self._handle.variables): missing = 'cell_lengths' if missing is not None: raise ValueError( "The file that you're saving to expects each frame " "to contain %s information, but you did not supply it." "I don't allow 'ragged' arrays." % missing) # update the frame index pointers. this should be done at the # end so that if anything errors out, we don't actually get here self._frame_index += n_frames
def read(self, atom_indices=None): """Read data from an AMBER NetCDF restart file Parameters ---------- atom_indices : np.ndarray, dtype=int, optional The specific indices of the atoms you'd like to retrieve. If not supplied, all of the atoms will be retrieved. Returns ------- coordinates : np.ndarray, shape=(1, n_atoms, 3) The cartesian coordinates of the atoms, in units of angstroms. These files only ever contain 1 frame time : np.ndarray, None The time corresponding to the frame, in units of picoseconds, or None if no time information is present cell_lengths : np.ndarray, None The lengths (a, b, c) of the unit cell for the frame in angstroms, or None if the information is not present in the file cell_angles : np.ndarray, None The angles (\alpha, \beta, \gamma) defining the unit cell for each frame, or None if the information is not present in the file. Notes ----- If the file is not a NetCDF file with the appropriate convention, a TypeError is raised. If variables that are needed do not exist or if illegal values are passed in for parameters, ValueError is raised. If I/O errors occur, IOError is raised. """ if self._mode != 'r': raise IOError('The file was opened in mode=%s. Reading is not ' 'allowed.' % self._mode) if 'coordinates' not in self._handle.variables: raise ValueError('No coordinates found in the NetCDF file.') # Check that conventions are correct try: conventions = self._handle.Conventions.decode('ascii') except UnicodeDecodeError: raise TypeError('NetCDF file does not have correct Conventions') try: convention_version = self._handle.ConventionVersion.decode('ascii') except UnicodeDecodeError: raise ValueError( 'NetCDF file does not have correct ConventionVersion') except AttributeError: raise TypeError('NetCDF file does not have ConventionVersion') if (not hasattr(self._handle, 'Conventions') or conventions != 'AMBERRESTART'): raise TypeError('NetCDF file does not have correct Conventions') if convention_version != '1.0': raise ValueError('NetCDF restart has ConventionVersion %s. Only ' 'Version 1.0 is supported.' % convention_version) if atom_indices is not None: atom_slice = ensure_type(atom_indices, dtype=np.int, ndim=1, name='atom_indices', warn_on_cast=False) if not np.all(atom_slice) >= 0: raise ValueError('Entries in atom_slice must be >= 0') coordinates = self._handle.variables['coordinates'][atom_slice, :] else: coordinates = self._handle.variables['coordinates'][:, :] # Get unit cell parameters if 'cell_lengths' in self._handle.variables: cell_lengths = self._handle.variables['cell_lengths'][:] else: cell_lengths = None if 'cell_angles' in self._handle.variables: cell_angles = self._handle.variables['cell_angles'][:] else: cell_angles = None if cell_lengths is None and cell_angles is not None: warnings.warn('cell_lengths were found, but no cell_angles') if cell_lengths is not None and cell_angles is None: warnings.warn('cell_angles were found, but no cell_lengths') if 'time' in self._handle.variables: time = self._handle.variables['time'].getValue() else: warnings.warn('No time found in NetCDF file.') time = None # scipy.io.netcdf variables are mem-mapped, and are only backed by valid # memory while the file handle is open. This is _bad_ because we need to # support the user opening the file, reading the coordinates, and then # closing it, and still having the coordinates be a valid memory # segment. # https://github.com/simtk/mdtraj/issues/440 if coordinates is not None and not coordinates.flags['WRITEABLE']: coordinates = np.array(coordinates, copy=True) if cell_lengths is not None and not cell_lengths.flags['WRITEABLE']: cell_lengths = np.array(cell_lengths, copy=True) if cell_angles is not None and not cell_angles.flags['WRITEABLE']: cell_angles = np.array(cell_angles, copy=True) # The leading frame dimension is missing on all of these arrays since # restart files have only one frame. Reshape them to add this extra # dimension coordinates = coordinates[np.newaxis, :] if cell_lengths is not None: cell_lengths = cell_lengths[np.newaxis, :] if cell_angles is not None: cell_angles = cell_angles[np.newaxis, :] if time is not None: time = np.asarray([ time, ]) return coordinates, time, cell_lengths, cell_angles
def compute_rdf(traj, pairs, r_range=None, bin_width=0.005, n_bins=None, periodic=True, opt=True): """Compute radial distribution functions for pairs in every frame. Parameters ---------- traj : Trajectory Trajectory to compute radial distribution function in. pairs : array-like, shape=(n_pairs, 2), dtype=int Each row gives the indices of two atoms. r_range : array-like, shape=(2,), optional, default=(0.0, 1.0) Minimum and maximum radii. bin_width : float, optional, default=0.005 Width of the bins in nanometers. n_bins : int, optional, default=None The number of bins. If specified, this will override the `bin_width` parameter. periodic : bool, default=True If `periodic` is True and the trajectory contains unitcell information, we will compute distances under the minimum image convention. opt : bool, default=True Use an optimized native library to compute the pair wise distances. Returns ------- r : np.ndarray, shape=(np.diff(r_range) / bin_width - 1), dtype=float Radii values corresponding to the centers of the bins. g_r : np.ndarray, shape=(np.diff(r_range) / bin_width - 1), dtype=float Radial distribution function values at r. See also -------- Topology.select_pairs """ if r_range is None: r_range = np.array([0.0, 1.0]) r_range = ensure_type(r_range, dtype=np.float64, ndim=1, name='r_range', shape=(2, ), warn_on_cast=False) if n_bins is not None: n_bins = int(n_bins) if n_bins <= 0: raise ValueError('`n_bins` must be a positive integer') else: n_bins = int((r_range[1] - r_range[0]) / bin_width) distances = compute_distances(traj, pairs, periodic=periodic, opt=opt) g_r, edges = np.histogram(distances, range=r_range, bins=n_bins) r = 0.5 * (edges[1:] + edges[:-1]) # Normalize by volume of the spherical shell. # See discussion https://github.com/mdtraj/mdtraj/pull/724. There might be # a less biased way to accomplish this. The conclusion was that this could # be interesting to try, but is likely not hugely consequential. This method # of doing the calculations matches the implementation in other packages like # AmberTools' cpptraj and gromacs g_rdf. V = (4 / 3) * np.pi * (np.power(edges[1:], 3) - np.power(edges[:-1], 3)) norm = len(pairs) * np.sum(1.0 / traj.unitcell_volumes) * V g_r = g_r.astype(np.float64) / norm # From int64. return r, g_r
def write(self, coordinates, time=None, cell_lengths=None, cell_angles=None): """Write one frame of a MD trajectory to disk in the AMBER ASCII restart file format. Parameters ---------- coordinates : np.ndarray, dtype=np.float32, shape=([1,] n_atoms, 3) The cartesian coordinates of each atom, in units of angstroms. Must be only a single frame (shape can be (1,N,3) or (N,3) where N is the number of atoms) time : array-like with 1 element or float, optional The time corresponding to this frame. If not specified, a place holder of 0 will be written cell_lengths : np.ndarray, dtype=np.double, shape=([1,] 3) The lengths (a,b,c) of the unit cell for the frame in Angstroms cell_angles : np.ndarray, dtype=np.double, shape=([1,] 3) The angles between the unit cell vectors for the frame in Degrees """ if self._mode != 'w': raise IOError( 'The file was opened in mode=%s. Writing not allowed.' % self._mode) if not self._needs_initialization: # Must have already been written -- can only write once raise RuntimeError('restart file has already been written -- can ' 'only write one frame to restart files.') # These are no-ops. # coordinates = in_units_of(coordinates, None, 'angstroms') # time = in_units_of(time, None, 'picoseconds') # cell_lengths = in_units_of(cell_lengths, None, 'angstroms') # cell_angles = in_units_of(cell_angles, None, 'degrees') # typecheck all of the input arguments rigorously coordinates = ensure_type(coordinates, np.float32, 3, 'coordinates', length=None, can_be_none=False, shape=(1, None, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) n_frames, self._n_atoms = coordinates.shape[0], coordinates.shape[1] if n_frames != 1: raise ValueError('Can only write 1 frame to a restart file!') if time is not None: try: time = float(time) except TypeError: raise TypeError('Can only provide a single time') else: time = 0.0 cell_lengths = ensure_type(cell_lengths, np.float64, 2, 'cell_lengths', length=1, can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) cell_angles = ensure_type(cell_angles, np.float64, 2, 'cell_angles', length=1, can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) if ((cell_lengths is None and cell_angles is not None) or (cell_lengths is not None and cell_angles is None)): prov, negl = 'cell_lengths', 'cell_angles' if cell_lengths is None: prov, negl = negl, prov raise ValueError('You provided the variable "%s" but did not ' 'provide "%s". Either provide both or neither -- ' 'one without the other is meaningless.' % (prov, negl)) self._handle.write( 'Amber restart file (without velocities) written by ' 'MDTraj\n') self._handle.write('%5d%15.7e\n' % (self._n_atoms, time)) fmt = '%12.7f%12.7f%12.7f' for i in range(self._n_atoms): acor = coordinates[0, i, :] self._handle.write(fmt % (acor[0], acor[1], acor[2])) if i % 2 == 1: self._handle.write('\n') if self._n_atoms % 2 == 1: self._handle.write('\n') if cell_lengths is not None: self._handle.write( fmt % (cell_lengths[0, 0], cell_lengths[0, 1], cell_lengths[0, 2])) self._handle.write( fmt % (cell_angles[0, 0], cell_angles[0, 1], cell_angles[0, 2]) + '\n') self._handle.flush()
def compute_rdf_t(traj, pairs, times, period_length=None, r_range=None, bin_width=0.005, n_bins=None, self_correlation=True, periodic=True, opt=True): """Compute time-dependent radial distribution functions. Parameters ---------- traj : Trajectory Trajectory to compute time-dependent radial distribution function in. pairs : array-like, shape=(n_pairs, 2), dtype=int Each row gives the indices of two atoms. times : array-like, shape=(any, 2), dtype=int Each row gives the indices of two frames. period_length : int, optional, default=None The length of each chunk of frames to consider when time-averaging r_range : array-like, shape=(2,), optional, default=(0.0, 1.0) Minimum and maximum radii. bin_width : float, optional, default=0.005 Width of the bins in nanometers. n_bins : int, optional, default=None The number of bins. If specified, this will override the `bin_width` parameter. self_correlation : bool, default=True Whether or not to include the self-correlation, the case of i=j periodic : bool, default=True If `periodic` is True and the trajectory contains unitcell information, we will compute distances under the minimum image convention. opt : bool, default=True Use an optimized native library to compute the pair wise distances. Returns ------- r : np.ndarray, shape=(np.diff(r_range) / bin_width - 1), dtype=float Radii values corresponding to the centers of the bins. g_r_t : np.ndarray, shape=(len(times), np.diff(r_range) / bin_width - 1), dtype=float Radial distribution function values at r. See also -------- Topology.select_pairs """ if r_range is None: r_range = np.array([0.0, 1.0]) r_range = ensure_type(r_range, dtype=np.float64, ndim=1, name='r_range', shape=(2, ), warn_on_cast=False) if n_bins is not None: n_bins = int(n_bins) if n_bins <= 0: raise ValueError('`n_bins` must be a positive integer') else: n_bins = int((r_range[1] - r_range[0]) / bin_width) if period_length is None: period_length = traj.n_frames # Add self pairs to `pairs` if self_correlation: pairs_set = np.unique(pairs) pairs = np.vstack([np.vstack([pairs_set, pairs_set]).T, pairs]) g_r_t = np.zeros(shape=(len(times), n_bins)) num_chunks = int(np.floor(traj.n_frames / period_length)) # Returns shape (len(times), len(pairs)) frame_distances = compute_distances_t(traj, pairs, times, periodic=periodic, opt=opt) for n, distances in enumerate(frame_distances): tmp, edges = np.histogram(distances, range=r_range, bins=n_bins) g_r_t[n, :] += tmp r = 0.5 * (edges[1:] + edges[:-1]) # Normalize by volume of the spherical shell (see above) V = (4 / 3) * np.pi * (np.power(edges[1:], 3) - np.power(edges[:-1], 3)) norm = len(pairs) / (period_length) * np.sum( 1.0 / traj.unitcell_volumes) * V g_r_t = g_r_t.astype(np.float64) / norm # From int64. return r, g_r_t
def compute_dihedrals(traj, indices, periodic=True, opt=True): """Compute the dihedral angles between the supplied quartets of atoms in each frame in a trajectory. Parameters ---------- traj : Trajectory An mtraj trajectory. indices : np.ndarray, shape=(n_dihedrals, 4), dtype=int Each row gives the indices of four atoms which together make a dihedral angle. The angle is between the planes spanned by the first three atoms and the last three atoms, a torsion around the bond between the middle two atoms. periodic : bool, default=True If `periodic` is True and the trajectory contains unitcell information, we will treat dihedrals that cross periodic images using the minimum image convention. opt : bool, default=True Use an optimized native library to calculate angles. Returns ------- dihedrals : np.ndarray, shape=(n_frames, n_dihedrals), dtype=float The output array gives, in each frame from the trajectory, each of the `n_dihedrals` torsion angles. The angles are measured in **radians**. """ xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False) quartets = ensure_type(indices, dtype=np.int32, ndim=2, name='indices', shape=(None, 4), warn_on_cast=False) if not np.all(np.logical_and(quartets < traj.n_atoms, quartets >= 0)): raise ValueError('indices must be between 0 and %d' % traj.n_atoms) if len(quartets) == 0: return np.zeros((len(xyz), 0), dtype=np.float32) out = np.zeros((xyz.shape[0], quartets.shape[0]), dtype=np.float32) if periodic and traj._have_unitcell: box = ensure_type(traj.unitcell_vectors, dtype=np.float32, ndim=3, name='unitcell_vectors', shape=(len(xyz), 3, 3)) if opt: orthogonal = np.allclose(traj.unitcell_angles, 90) _geometry._dihedral_mic(xyz, quartets, box.transpose(0, 2, 1).copy(), out, orthogonal) return out else: _dihedral(traj, quartets, periodic, out) return out if opt: _geometry._dihedral(xyz, quartets, out) else: _dihedral(traj, quartets, periodic, out) return out
def read(self, n_frames=None, stride=None, atom_indices=None): """Read data from a molecular dynamics trajectory in the AMBER NetCDF format. Parameters ---------- n_frames : int, optional If n_frames is not None, the next n_frames of data from the file will be read. Otherwise, all of the frames in the file will be read. stride : int, optional If stride is not None, read only every stride-th frame from disk. atom_indices : np.ndarray, dtype=int, optional The specific indices of the atoms you'd like to retrieve. If not supplied, all of the atoms will be retrieved. Returns ------- coordinates : np.ndarray, shape=(n_frames, n_atoms, 3) The cartesian coordinates of the atoms, in units of angstroms. time : np.ndarray, None The time corresponding to each frame, in units of picoseconds, or None if no time information is present in the trajectory. cell_lengths : np.ndarray, None The lengths (a,b,c) of the unit cell for each frame, or None if the information is not present in the file. cell_angles : np.ndarray, None The angles (\alpha, \beta, \gamma) defining the unit cell for each frame, or None if the information is not present in the file. """ self._validate_open() if self._mode != 'r': raise IOError( 'The file was opened in mode=%s. Reading is not allowed.' % self._mode) if n_frames is None: n_frames = np.inf elif stride is not None: # 'n_frames' frames should be read in total n_frames *= stride total_n_frames = self.n_frames frame_slice = slice(self._frame_index, self._frame_index + min(n_frames, total_n_frames), stride) if self._frame_index >= total_n_frames: # just return something that'll look like len(xyz) == 0 # this is basically just an alternative to throwing an indexerror return np.array([]), None, None, None if atom_indices is None: # get all of the atoms atom_slice = slice(None) else: atom_slice = ensure_type(atom_indices, dtype=int, ndim=1, name='atom_indices', warn_on_cast=False) if not np.all(atom_slice < self.n_atoms): raise ValueError( 'As a zero-based index, the entries in ' 'atom_indices must all be less than the number of atoms ' 'in the trajectory, %d' % self.n_atoms) if not np.all(atom_slice >= 0): raise ValueError('The entries in atom_indices must be greater ' 'than or equal to zero') if 'coordinates' in self._handle.variables: coordinates = self._handle.variables['coordinates'][frame_slice, atom_slice, :] else: raise ValueError( 'No coordinates found in the NetCDF file. The only ' 'variables in the file were %s' % self._handle.variables.keys()) if 'time' in self._handle.variables: time = self._handle.variables['time'][frame_slice] else: time = None if 'cell_lengths' in self._handle.variables: cell_lengths = self._handle.variables['cell_lengths'][frame_slice] else: cell_lengths = None if 'cell_angles' in self._handle.variables: cell_angles = self._handle.variables['cell_angles'][frame_slice] else: cell_angles = None if cell_lengths is None and cell_angles is not None: warnings.warn('cell_lengths were found, but no cell_angles') if cell_lengths is not None and cell_angles is None: warnings.warn('cell_angles were found, but no cell_lengths') self._frame_index = self._frame_index + min(n_frames, total_n_frames) # scipy.io.netcdf variables are mem-mapped, and are only backed # by valid memory while the file handle is open. This is _bad_. # because we need to support the user opening the file, reading # the coordinates, and then closing it, and still having the # coordinates be a valid memory segment. # https://github.com/rmcgibbo/mdtraj/issues/440 if coordinates is not None and not coordinates.flags['WRITEABLE']: coordinates = np.array(coordinates, copy=True) if time is not None and not time.flags['WRITEABLE']: time = np.array(time, copy=True) if cell_lengths is not None and not cell_lengths.flags['WRITEABLE']: cell_lengths = np.array(cell_lengths, copy=True) if cell_angles is not None and not cell_angles.flags['WRITEABLE']: cell_angles = np.array(cell_angles, copy=True) return coordinates, time, cell_lengths, cell_angles