def test_rmsd_atom_indices(): native = md.load(get_fn('native.pdb')) t1 = md.load(get_fn('traj.h5')) atom_indices = np.arange(10) dist1 = md.rmsd(t1, native, atom_indices=atom_indices) t2 = md.load(get_fn('traj.h5')) t2.restrict_atoms(atom_indices) native.restrict_atoms(atom_indices) dist2 = md.rmsd(t2, native) eq(dist1, dist2)
def main(): parser = argparse.ArgumentParser(description='custom featurization of clc fah trjs') parser.add_argument('--ref', type=str, help='homology model pdb file') parser.add_argument('--trj', type=str, help='trajectory file') parser.add_argument('--mol2', type=str, help='homology model mol2 file (charges needed for dipole calc)') args = parser.parse_args() # load system data trj = mdtraj.load(args.trj, top=args.ref) hmodel = mdtraj.load(args.ref) ### feature 0: protein RMSD from hmodel ### pi_noh = [atom.index for atom in trj.top.atoms if ((atom.residue.is_protein) and (atom.element.symbol != 'H'))] p_rmsd = mdtraj.rmsd(trj, hmodel, atom_indices=pi_noh) ### feature 1: GLU128 RMSD from hmodel ### e128 = res_ndxs(hmodel, vs_ri['glu128']) e128_rmsd = mdtraj.rmsd(trj, hmodel, atom_indices=e128) ### feature 2: LYS317 and GLU318 RMSD from hmodel ### tl = np.concatenate((res_ndxs(hmodel, vs_ri['lys317']), res_ndxs(hmodel, vs_ri['glu318']))) tl_rmsd = mdtraj.rmsd(trj, hmodel, atom_indices=tl) ### feature 2: distance between ASP32 and LYS127 ### a32 = ele_ndxs(hmodel, vs_ri['asp32'], ['OD1', 'OD2']) l127 = ele_ndxs(hmodel, vs_ri['lys127'], ['NZ']) al_pairs = cartesian([a32, l127]) # i think the asp oxygens are degenerate, so i'll look at the min here al_dist = np.amin(al_pairs, axis=1)
def calc_obs(traj): arg_cz_id = 2442 glu_cd_id = 862 lys_nz_id = 634 tyr_oh_id = 2019 inactive = mdt.load("./topologies/inactive.pdb") active = mdt.load("./topologies/active.pdb") aloop_atoms_list = [i.index for residue in np.arange(147, 168) for i in inactive.topology.residue(residue).atoms] all_heavy = [i.index for i in inactive.topology.atoms if i.residue.is_protein and i.element.name != "hydrogen"] print("Processing %s" % traj) # load the trajectory trj = mdt.load(traj, atom_indices=np.arange(inactive.n_atoms)) inactive_rms = mdt.rmsd(trj, inactive, atom_indices=all_heavy) active_rms = mdt.rmsd(trj, active, atom_indices=all_heavy) aloop_rms = mdt.rmsd(trj, inactive, frame=0, atom_indices=aloop_atoms_list) distances = mdt.compute_distances(trj, np.vstack(([arg_cz_id, glu_cd_id], [lys_nz_id, glu_cd_id]))) return dict( fname=os.path.basename(traj), inactive_rmsd=inactive_rms, active_rmsd=active_rms, aloop_inactive_rmsd=aloop_rms, glu_arg=distances[:, 0], gly_lys=distances[:, 1], )
def pairwise_distances(X, Y=None, index=None, metric="euclidean"): ''' Compute the distance matrix from a vector array X and optional Y. This method takes either a vector array or a distance matrix, and returns a distance matrix. If the input is a vector array, the distances are computed. If the input is a distances matrix, it is returned instead. This method provides a safe way to take a distance matrix as input, while preserving compatibility with many other algorithms that take a vector array. :param X: array [n_samples_a, n_samples_a] Array of pairwise distances between samples, or a feature array. :param Y: array [n_samples_b, n_features] A second feature array only if X has shape [n_samples_a, n_features]. :param index: int, the index of element in X array :param metric: The metric to use when calculating distance between instances in a feature array. If metric ='rmsd', it should be computed by MDTraj :return: The distances ''' if metric == "rmsd": if Y is None: distances_ = md.rmsd(X, X, index, parallel=True, precentered=True) else: #distances_ = np.empty((len(X), len(Y)), dtype=np.float32) # for i in xrange(len(Y)): distances_ = md.rmsd(X, Y, index, parallel=True, precentered=True) return distances_ else: if Y is None: print "if Y is None" return sp.pairwise_distances(X, X[index], metric=metric) if index is None: print "if index is None, pairwise XX" return sp.pairwise_distances(X, X, metric=metric)
def test_ComparetoMDtraj(self): import mdtraj as md traj = pt.load(filename="./data/Tc5b.x", top="./data/Tc5b.top") m_top = md.load_prmtop("./data/Tc5b.top") m_traj = md.load_mdcrd("./data/Tc5b.x", m_top) m_traj.xyz = m_traj.xyz * 10 # convert `nm` to `Angstrom` unit arr0 = pt.rmsd(traj, ref=0) arr1 = pt.rmsd(traj, ref=0) arr2 = pt.rmsd(traj, ) a_md0 = md.rmsd(m_traj, m_traj, 0) aa_eq(arr0, arr1) aa_eq(arr0, arr2) aa_eq(arr0, a_md0) arr0 = pt.rmsd(traj, ref=-1) arr1 = pt.rmsd(traj, ref=-1) a_md = md.rmsd(m_traj, m_traj, -1) aa_eq(arr0, arr1) aa_eq(arr0, a_md) mask = ":3-18@CA,C" atm = traj.top(mask) arr0 = pt.rmsd(traj, ref=-1, mask=mask) arr1 = pt.rmsd(traj, mask=atm.indices, ref=-1) arr2 = pt.rmsd(traj, mask=list(atm.indices), ref=-1) arr3 = pt.rmsd(traj, mask=tuple(atm.indices), ref=-1) a_md = md.rmsd(m_traj, m_traj, -1, atm.indices) aa_eq(arr0, a_md) aa_eq(arr1, a_md) aa_eq(arr2, a_md) aa_eq(arr3, a_md) fa = Trajectory(traj) arr0 = pt.rmsd(fa, ref=-1, mask=mask) arr1 = pt.rmsd(fa, mask=atm.indices, ref=-1) arr2 = pt.rmsd(fa, mask=list(atm.indices), ref=-1) arr3 = pt.rmsd(fa, mask=tuple(atm.indices), ref=-1) a_md = md.rmsd(m_traj, m_traj, -1, atm.indices) aa_eq(arr0, a_md) aa_eq(arr1, a_md) aa_eq(arr2, a_md) aa_eq(arr3, a_md) fa = Trajectory(traj) mask = "!@H=" atm = fa.top(mask) arr0 = pt.rmsd(fa, ref=4, mask=mask) a_md = md.rmsd(m_traj, m_traj, 4, atm.indices) # exclude 0-th frame for ref aa_eq(arr0, a_md)
def test_rmsd_ref_ainds(): native = md.load(get_fn('native.pdb')) t1 = md.load(get_fn('traj.h5')) atom_indices = np.arange(10) dist1 = md.rmsd(t1, native, atom_indices=atom_indices, ref_atom_indices=atom_indices) bad_atom_indices = np.arange(10, 20) t2 = md.load(get_fn('traj.h5')) dist2 = md.rmsd(t2, native, atom_indices=atom_indices, ref_atom_indices=bad_atom_indices) assert np.all(dist2 > dist1)
def one_to_many(self, prepared_traj1, prepared_traj2, index1, indices2): """Calculate a vector of distances from one frame of the first trajectory to many frames of the second trajectory The distances calculated are from the `index1`th frame of `prepared_traj1` to the frames in `prepared_traj2` with indices `indices2` Parameters ---------- prepared_traj1 : rmsd.TheoData First prepared trajectory prepared_traj2 : rmsd.TheoData Second prepared trajectory index1 : int index in `prepared_trajectory` indices2 : ndarray list of indices in `prepared_traj2` to calculate the distances to Returns ------- Vector of distances of length len(indices2) Notes ----- If the omp_parallel optional argument is True, we use shared-memory parallelization in C to do this faster. Using omp_parallel = False is advised if indices2 is a short list and you are paralellizing your algorithm (say via mpi) at a different level. """ return md.rmsd(prepared_traj1, prepared_traj2, index1, parallel=self.omp_parallel, precentered=True)[indices2]
def one_to_all(self, prepared_traj1, prepared_traj2, index1): """Calculate a vector of distances from one frame of the first trajectory to all of the frames in the second trajectory The distances calculated are from the `index1`th frame of `prepared_traj1` to the frames in `prepared_traj2` Parameters ---------- prepared_traj1 : rmsd.TheoData First prepared trajectory prepared_traj2 : rmsd.TheoData Second prepared trajectory index1 : int index in `prepared_trajectory` Returns ------- Vector of distances of length len(prepared_traj2) Notes ----- If the omp_parallel optional argument is True, we use shared-memory parallelization in C to do this faster. """ return md.rmsd(prepared_traj2, prepared_traj1, index1, parallel=self.omp_parallel, precentered=True)
def rmsd_connector(traj, inactive, residues_map = None): residues = [121, 282] if residues_map is not None: residues = map_residues(residues_map, residues) nonsymmetric = ["CG2", "CG1", "CD1", "CD2", "CE1", "CE2"] connector_atoms = [(a.index, str(a)) for a in traj.topology.atoms if a.residue.resSeq in [121, 282] and "hydrogen" not in a.element and not any(substring in str(a) for substring in nonsymmetric)] #print(connector_atom_names) #print connector_atoms connector_atoms = sorted(connector_atoms, key=operator.itemgetter(1), reverse = True) #print(connector_atoms) connector_atoms = [a[0] for a in connector_atoms] traj_stripped = traj.atom_slice(connector_atoms) connector_atoms_target = [(a.index,str(a)) for a in inactive.topology.atoms if a.residue.resSeq in [121, 282] and "hydrogen" not in a.element and not any(substring in str(a) for substring in nonsymmetric)] #connector_atom_names = [(a, a.element, a.index, a.residue) for a in inactive.topology.atoms if a.residue.resSeq in [121, 282] and "hydrogen" not in a.element] #print(connector_atom_names) #print connector_atoms_target connector_atoms_target = sorted(connector_atoms_target, key=operator.itemgetter(1), reverse = True) #print(connector_atoms_target) connector_atoms_target = [a[0] for a in connector_atoms_target] inactive_stripped = inactive.atom_slice(connector_atoms_target) traj_stripped_aligned = traj_stripped.superpose(inactive_stripped) rmsds = md.rmsd(traj_stripped, inactive_stripped) * 10.0 return rmsds
def calculate_rmsd(trajectory, topology, reference): import mdtraj traj = mdtraj.load(trajectory, top=topology) ref = mdtraj.load(reference) rmsd = mdtraj.rmsd(traj, ref) data = {"step": str(traj.n_frames), "rmsd": str(rmsd[-1])} return data
def test_lprmsd_5(get_fn): t = md.load(get_fn('frame0.h5')) t1 = md.load(get_fn('frame0.h5')) r = md.rmsd(t, t1, 0) a = md.lprmsd(t, t1, 0, permute_groups=[[]], superpose=True) eq(a, r, decimal=3)
def _deprecated_models_regular_spatial_clustering(templateids, traj, atom_indices=None, cutoff=0.06): """ Superseded by models_regular_spatial_clustering """ mdtraj_rmsd_args = {} if atom_indices: mdtraj_rmsd_args['atom_indices'] = atom_indices unique_templateids = [] min_rmsd = [] # Iterate through models for (t, templateid) in enumerate(templateids): # Add the first templateid to the list of uniques if t==0: unique_templateids.append(templateid) continue # Calculate rmsds of models up to t against the model t. rmsds = mdtraj.rmsd(traj[0:t], traj[t], parallel=False, **mdtraj_rmsd_args) min_rmsd.append(min(rmsds)) # If any rmsd is less than cutoff, discard; otherwise add to list of uniques if min_rmsd[-1] < cutoff: continue else: unique_templateids.append(templateid) return unique_templateids
def partial_transform(self, traj): """Featurize an MD trajectory into a vector space by calculating the RMSD to each frame in a reference trajectory. Parameters ---------- traj : mdtraj.Trajectory A molecular dynamics trajectory to featurize. Returns ------- features : np.ndarray, dtype=float, shape=(n_samples, n_features) A featurized trajectory is a 2D array of shape `(length_of_trajectory x n_features)` where each `features[i]` vector is computed by applying the featurization function to the `i`th snapshot of the input trajectory. See Also -------- transform : simultaneously featurize a collection of MD trajectories """ X = np.zeros((traj.n_frames, self.n_features)) for frame in range(self.n_features): X[:, frame] = md.rmsd(traj, self.trj0, atom_indices=self.atom_indices, frame=frame) return X
def plot_rmsd_distribution(cells, topol, atom_indices, bins=50): assert type(topol) is mdtraj.Trajectory, 'Expected Trajectory but got {}'.format(type(topotl)) trajs = [] for state in cells.L: t = copy.deepcopy(topol) t.xyz = state.x trajs.append(t) traj = trajs[0] traj = traj.join(trajs[1:]) rmsds = [] for frame in xrange(len(traj)): r = mdtraj.rmsd(traj, traj, frame=frame, atom_indices=atom_indices) rmsds.append(r) rmsds = np.vstack(rmsds) triu = np.triu_indices(len(rmsds)) rmsds[triu] = -1 np.fill_diagonal(rmsds, -1) rmsds = rmsds[np.where(rmsds >= 0)] plt.hist(rmsds, bins=bins)
def main(opts): print 'Loading atom indices file for trajectories', opts.ndx ndx = np.loadtxt(opts.ndx, dtype=np.int) print 'Loading cells from', opts.cells cells = mdtraj.load(opts.topol, atom_indices=ndx) cells.xyz = load_cells_gps(opts.cells) print 'Loading trajectories', ' '.join(opts.trajs) traj = mdtraj.load(opts.trajs, top=opts.topol, atom_indices=ndx) print 'Assigning to {} cells'.format(len(cells)) rmsds = -np.ones((len(cells), len(traj))) for i in xrange(len(cells)): rmsds[i] = mdtraj.rmsd(traj, cells, frame=i) rmsds = rmsds.T A = -np.ones((len(traj),), dtype=np.int) for f in xrange(len(traj)): A[f] = rmsds[f].argmin() np.savetxt(opts.assignments, A, fmt='%d') print 'Computing populations' P = np.bincount(A) np.savetxt(opts.populations, P, fmt='%d')
def shukla_coords(trajectories,KER,Aloop,SRC2): difference = [] rmsd = [] for traj in trajectories: # append difference k295e310 = md.compute_contacts(traj, [KER[0]]) e310r409 = md.compute_contacts(traj, [KER[1]]) difference.append(10*(e310r409[0] - k295e310[0])) # 10x because mdtraj is naturally in nm # append rmsd Activation_Loop_SRC2 = SRC2.top.select("backbone and (resid %s to %s)" %(140,160)) Activation_Loop_kinase = traj.top.select("backbone and (resid %s to %s)" %(Aloop[0],Aloop[1])) SRC2_cut = SRC2.atom_slice(Activation_Loop_SRC2) traj_cut = traj.atom_slice(Activation_Loop_kinase) rmsd.append(10*(md.rmsd(traj_cut,SRC2_cut,frame=0))) # 10x because mdtraj is naturaly in nm # flatten list of arrays flattened_difference = np.asarray([val for sublist in difference for val in sublist]) flattened_rmsd = np.asarray([val for sublist in rmsd for val in sublist]) return [flattened_rmsd, flattened_difference]
def compute_rmsd(fname, topname, sel="name CA", step=1): rmsd = [] atom_indices = md.load(topname).topology.select(sel) top = md.load(topname) for chunk in md.iterload(fname, top=top, stride=step): rmsd.append(md.rmsd(chunk, top, 0, atom_indices=atom_indices)) rmsd = np.concatenate(rmsd) return rmsd
def test_pdist_rmsd_2(): got = pdist(X_rmsd, "rmsd", X_indices) all2all = np.array([md.rmsd(X_rmsd, X_rmsd[i], precentered=True) for i in range(len(X_rmsd))]).astype(np.double) submatrix = all2all[np.ix_(X_indices, X_indices)] ref = submatrix[np.triu_indices(5, k=1)] np.testing.assert_almost_equal(got, ref, decimal=4)
def test_mdtraj_equivalence(): traj, ref = _random_trajs() feat = LigandRMSDFeaturizer(reference_frame=ref, align_by='custom', calculate_for='custom', align_indices=range(ref.n_atoms), calculate_indices=range(ref.n_atoms)) multi_chain = feat.transform([traj]) md_traj = md.rmsd(traj,ref,frame=0) np.testing.assert_almost_equal(multi_chain[0][:, 0], md_traj, decimal=4)
def test_trajectory_rmsd(): t = md.load(get_fn('traj.h5')) for parallel in [True, False]: calculated = md.rmsd(t, t, 0, parallel=parallel) reference = np.zeros(t.n_frames) for i in range(t.n_frames): reference[i] = rmsd_qcp(t.xyz[0], t.xyz[i]) eq(calculated, reference, decimal=3)
def rmsd(self): has_model_indices = self.df[self.df.has_model == True].index ca_atoms = [a.index for a in self.traj.topology.atoms if a.name == 'CA'] rmsds = mdtraj.rmsd(self.traj, self.ref_model_traj, atom_indices=ca_atoms, parallel=False) template_rmsds = [None] * len(self.templateids) for m,t in enumerate(has_model_indices): template_rmsds[t] = rmsds[m] self.df['rmsd'] = template_rmsds
def run(dir,top,ext,err,idx): ref = md.load(top) atoms= getIndices(idx,ref.n_atoms) for i in glob.glob(dir+'/*.'+ext): traj= md.load(i,top=top) r=md.rmsd(target=traj, reference=ref,atom_indices=atoms) p=np.divide(np.abs(r[1:]-r[:-1]),r[:-1])>err if True in p: print i
def test_2(): # https://github.com/rmcgibbo/mdtraj/issues/438 try: traj = md.load(get_fn('frame0.h5')) # precenter the coordinates traj.center_coordinates() traces = traj._rmsd_traces np.save('temp.npy', traj.xyz) traj.xyz = np.load('temp.npy', mmap_mode='r') traj._rmsd_traces = traces # this should work, since we don't need to modify the # coordinates inplace md.rmsd(traj, traj, 0, precentered=True) finally: del traj os.unlink('temp.npy')
def check_imaging(self): if self.t.n_frames > 2: self.section("Imaging") r = md.rmsd(target=self.t, reference=self.t[0]) percent_change = np.divide(np.abs(r[2:] - r[1:-1]), r[1:-1]) * 100.0 if np.any(percent_change > self.rmsd_tolerance): self.log("Potential imaging issue: %s" % self.fn) else: self.log("No imaging issue detected")
def test_2(): # https://github.com/mdtraj/mdtraj/issues/438 try: dir = tempfile.mkdtemp() fn = os.path.join(dir, 'temp.npy') traj = md.load(get_fn('frame0.h5')) # precenter the coordinates traj.center_coordinates() traces = traj._rmsd_traces np.save(fn, traj.xyz) traj.xyz = np.load(fn, mmap_mode='r') traj._rmsd_traces = traces with assert_raises(ValueError): md.rmsd(traj, traj, 0, precentered=True) finally: del traj os.unlink(fn) os.rmdir(dir)
def test_superpose_0(): t1 = md.load(get_fn('traj.h5')) reference_rmsd = md.rmsd(t1, t1, 0) t1.superpose(t1, 0) displ_rmsd = np.zeros(t1.n_frames) for i in range(t1.n_frames): delta = t1.xyz[i] - t1.xyz[0] displ_rmsd[i] = (delta ** 2.0).sum(1).mean() ** 0.5 eq(reference_rmsd, displ_rmsd, decimal=5)
def test_precentered_1(): # test rmsd against the numpy version, using the same trajectory # as target and reference t1 = md.load(get_fn('traj.h5'), stride=10) t2 = md.load(get_fn('traj.h5'), stride=10) # don't center t1, and use it without precentered # explicitly center t2, and use *with* precentered for parallel in [True, False]: t2.center_coordinates() eq(t1.n_frames, t2.n_frames) for i in range(t1.n_frames): ref = np.zeros(t1.n_frames) for j in range(t1.n_frames): ref[j] = rmsd_qcp(t1.xyz[j], t1.xyz[i]) val1 = md.rmsd(t1, t1, i, parallel=parallel, precentered=False) val2 = md.rmsd(t2, t2, i, parallel=parallel, precentered=True) eq(ref, val1, decimal=3) eq(val1, val2)
def test_lprmsd_4(get_fn): t1 = md.load(get_fn('1bpi.pdb')) t1.xyz += 0.05 * random.randn(t1.n_frames, t1.n_atoms, 3) t2 = md.load(get_fn('1bpi.pdb')) # some random indices indices = random.permutation(t1.n_atoms)[:t1.n_atoms - 5] got = md.lprmsd(t2, t1, atom_indices=indices, permute_groups=[[]]) ref = md.rmsd(t2, t1, atom_indices=indices) eq(got, ref, decimal=3)
def shukla_coords_byrun(files,KER,Aloop,SRC2): difference = [] rmsd = [] difference_combinetrajs = [] rmsd_combinetrajs = [] path_base = files.split('*')[0] clone0_files = "%s/*clone0.h5" % path_base globfiles = glob(clone0_files) runs_list = [] for filename in globfiles: run_string = re.search('run([^-]+)',filename).group(1) run = int(run_string) if run not in runs_list: runs_list.append(run) runs_list.sort() for run in runs_list: trajectories = dataset.MDTrajDataset("%s/run%d-clone*1.h5" % (path_base,run)) print "Run %s has %s trajectories." % (run,len(trajectories)) for traj in trajectories: # append difference k295e310 = md.compute_contacts(traj, [KER[0]]) e310r409 = md.compute_contacts(traj, [KER[1]]) difference_combinetrajs.append(10*(e310r409[0] - k295e310[0])) # 10x because mdtraj is naturally in nm # append rmsd Activation_Loop_SRC2 = SRC2.top.select("backbone and (resid %s to %s)" %(Aloop[0],Aloop[1])) Activation_Loop_kinase = traj.top.select("backbone and (resid %s to %s)" %(Aloop[0],Aloop[1])) SRC2_cut = SRC2.atom_slice(Activation_Loop_SRC2) traj_cut = traj.atom_slice(Activation_Loop_kinase) rmsd_combinetrajs.append(10*(md.rmsd(traj_cut,SRC2_cut,frame=0))) # 10x because mdtraj is naturaly in nm # flatten list of arrays difference_combinetrajs = np.asarray([val for sublist in difference_combinetrajs for val in sublist]) rmsd_combinetrajs = np.asarray([val for sublist in rmsd_combinetrajs for val in sublist]) difference.append(difference_combinetrajs) difference_combinetrajs = [] rmsd.append(rmsd_combinetrajs) rmsd_combinetrajs = [] return [rmsd, difference]
def test_1(): # https://github.com/rmcgibbo/mdtraj/issues/438 try: traj = md.load(get_fn('frame0.h5')) np.save('temp.npy', traj.xyz) traj.xyz = np.load('temp.npy', mmap_mode='r') # since traj isn't precentered, this requires centering # the coordinates which is done inplace. but that's not possible # with mmap_mode = 'r' assert_raises(ValueError, md.rmsd, traj, traj, 0) # this should work traj.xyz = np.load('temp.npy', mmap_mode='c') md.rmsd(traj, traj, 0) finally: del traj os.unlink('temp.npy')
def plot_rmsd(target, reference, atom_list=[], fig_name='rmsd'): print('Calculating RMSD') rmsd = md.rmsd(target, reference, atom_indices=atom_list) print('Plotting RMSD to ' + fig_name + '.png') fig = plt.figure() ax = fig.add_subplot(111) ax.plot(rmsd) ax.set_ylabel('nm') ax.set_xlabel('frame') ax.set_title(fig_name) # plt.show() fig.savefig(fig_name + '.png')
def test_precentered_2(get_fn): # test rmsd against the numpy version, using the difference # trajectories as target and reference t1_a = md.load(get_fn('traj.h5'), stride=10) t2_a = md.load(get_fn('traj.h5'), stride=10) t1_b = md.load(get_fn('traj.h5'), stride=10) t2_b = md.load(get_fn('traj.h5'), stride=10) # don't center t1, and use it without precentered # explicitly center t2, and use *with* precentered t2_a.center_coordinates() t2_b.center_coordinates() for parallel in [True, False]: for i in range(t1_b.n_frames): ref = np.zeros(t1_a.n_frames) for j in range(t1_a.n_frames): ref[j] = rmsd_qcp(t1_a.xyz[j], t1_b.xyz[i]) val1 = md.rmsd(t1_a, t1_b, i, parallel=parallel, precentered=False) val2 = md.rmsd(t2_a, t2_b, i, parallel=parallel, precentered=True) eq(ref, val1, decimal=3) eq(val1, val2, decimal=4)
def calc_rms_vectors(trajectory, args): ''' DESCRIPTION Calculates RMSD of all frames vs. referenece. Args: trajectory (mdtraj.Trajectory): trajectory object to analyze. args (argparse.Namespace): user input parameters parsed by argparse. Return: rmsd_ (list): list containing the RMSD of all frames vs. reference. ''' trajectory.center_coordinates() reference = args.reference rmsd_ = md.rmsd(trajectory, trajectory, reference, precentered=True) return rmsd_
def test_1(): # https://github.com/rmcgibbo/mdtraj/issues/438 try: dir = tempfile.mkdtemp() fn = os.path.join(dir, 'temp.npy') traj = md.load(get_fn('frame0.h5')) np.save(fn, traj.xyz) traj.xyz = np.load(fn, mmap_mode='r') # since traj isn't precentered, this requires centering # the coordinates which is done inplace. but that's not possible # with mmap_mode = 'r' assert_raises(ValueError, md.rmsd, traj, traj, 0) # this should work traj.xyz = np.load(fn, mmap_mode='c') md.rmsd(traj, traj, 0) finally: del traj os.unlink(fn) os.rmdir(dir)
def test_alanine_dipeptide_basic(): # This test takes the rmsd of the 0th set of alanine dipeptide # trajectories relative to the 0th frame of the dataset. # The test asserts that all rmsd's calculated will be equal # to the ones that would be calculated straight from mdtraj. dataset = fetch_alanine_dipeptide() trajectories = dataset["trajectories"] featurizer = RMSDFeaturizer(trajectories[0][0]) data = featurizer.transform(trajectories[0:1]) true_rmsd = md.rmsd(trajectories[0], trajectories[0][0]) np.testing.assert_array_almost_equal(data[0][:, 0], true_rmsd, decimal=4)
def func(arg): xtcf, topf, reffs, p, sel = arg toppdb = md.load_pdb(topf) atm0 = toppdb.top.select(sel) xtc = md.load_xtc(xtcf, topf) xtc = xtc.atom_slice(atm0) rmsd = [] for reff in reffs: refpdb = md.load_pdb(reff) atm1 = refpdb.top.select(sel) ref = refpdb.atom_slice(atm1) rmsd.append(md.rmsd(xtc, ref)) rmsd.append([p] * len(xtc)) return np.array(rmsd).T
def rmsd_to_structure(clusters_dir, ref_dir, text): pdbs = get_trajectory_files(clusters_dir) ref = md.load_frame(ref_dir, index=0) rmsds = np.zeros(shape=(len(pdbs), 2)) for i in range(0, len(pdbs)): print(i) pdb_file = pdbs[i] pdb = md.load_frame(pdb_file, index=0) rmsd = md.rmsd(pdb, ref, 0) rmsds[i, 0] = i rmsds[i, 1] = rmsd[0] rmsd_file = "%s/%s_rmsds.csv" % (clusters_dir, text) np.savetxt(rmsd_file, rmsds, delimiter=",")
def test_MinRmsd_with_atom_indices(self): # Test the Trajectory-input variant self.feat.add_minrmsd_to_ref(self.traj[self.ref_frame], atom_indices=self.atom_indices) # and the file-input variant self.feat.add_minrmsd_to_ref(xtcfile, ref_frame=self.ref_frame, atom_indices=self.atom_indices) test_Y = self.feat.transform(self.traj).squeeze() # now the reference ref_Y = mdtraj.rmsd(self.traj, self.traj[self.ref_frame], atom_indices=self.atom_indices) verbose_assertion_minrmsd(ref_Y, test_Y, self) assert self.feat.dimension() == 2 assert len(self.feat.describe()) == 2
def func(args): traj, top, reffs, sel = args if not isinstance(traj, md.core.trajectory.Trajectory): traj = md.load(traj, top=top) atm_ndx = traj.top.select(sel) traj = traj.atom_slice(atm_ndx) rmsds = [] for reff in reffs: ref = md.load_pdb(reff) atm_ndx = ref.top.select(sel) ref = ref.atom_slice(atm_ndx) traj = traj.superpose(ref) rmsd = md.rmsd(traj, ref) rmsds.append(rmsd) return np.array(rmsds)
def test_against_mdtraj_diff_xy(sess, traj): inds = [5, 19, 234] target = np.array(traj.xyz[inds]) frames = tf.constant(traj.xyz) target = tf.constant(target) prmsd = tftraj.rmsd.pairwise_msd(frames, target) result = sess.run(prmsd) print(result.shape) md_result = [ md.rmsd(traj, traj, i) ** 2 for i in inds ] md_result = np.array(md_result).T np.testing.assert_almost_equal(result, md_result, decimal=5)
def slice_list_of_geoms_to_closest_to_ref(geom_list, ref): r""" For a list of md.Trajectory objects, reduce md.Trajectory in the list to the frame closest to a reference :param geom_list: list of md.Trajectories :param ref: md.Trajectory :return: md.Trajectory of n_frames = len(geom_list), oriented wrt to ref """ out_geoms = None for cand_geoms in geom_list: igeom = cand_geoms[_np.argmin(_md.rmsd(cand_geoms, ref))] if out_geoms is None: out_geoms = igeom else: out_geoms = out_geoms.join(igeom) return out_geoms.superpose(ref)
def test_assign_nearest_rmsd_1(): # rmsd assign nearest without X_indices assignments, inertia = assign_nearest(X_rmsd, Y_rmsd, "rmsd") assert isinstance(assignments, np.ndarray) assert isinstance(inertia, float) cdist = np.array([ md.rmsd(X_rmsd, Y_rmsd[i], precentered=True) for i in range(len(Y_rmsd)) ]).T assert cdist.shape == (10, 3) np.testing.assert_array_equal(assignments, cdist.argmin(axis=1)) np.testing.assert_almost_equal(inertia, cdist[np.arange(10), assignments].sum(), decimal=6)
def get_properties(self, deffnm, cwd='.', stride=1, mdp=None, mindist=True): if mdp is None: mdp = self.mdp if mindist: self.call_gmx(cmd='mindist', stdin='Protein', cwd=cwd, f=f'{deffnm}.xtc', s=f'{deffnm}.tpr', od=f'{deffnm}.mindist.xvg', pi=True, dt=float(mdp.nstenergy) * float(mdp.dt) * stride) with open(f'{cwd}/{deffnm}.mindist.xvg') as f: data = np.array([[float(s) for s in l.split()] for l in f if l[0] not in '#@']).T df = edr_to_df(f'{cwd}/{deffnm}.edr')[::stride] traj = self.load_xtc(f'{cwd}/{deffnm}.vis.xtc', stride=stride) if mindist: minlen = min(data.shape[1], len(df), len(traj)) else: minlen = min(len(df), len(traj)) df = df.head(minlen) if mindist: data = data[..., :minlen] traj = traj[:minlen] if not (((not mindist) or np.array_equal(df['Time'], data[0])) and np.array_equal(df['Time'], traj.time)): raise ValueError("Could not match times across different inputs") calpha_atom_indices = traj.top.select_atom_indices('alpha') rmsd = md.rmsd(traj, self.traj, atom_indices=calpha_atom_indices) if mindist: df['Min. PI dist'] = data[1] df['Max. int dist'] = data[2] df['RMSD'] = rmsd return (traj, df)
def test_assign_to_nearest_center_few_centers(): # assign_to_nearest_center takes two code paths, one for # n_centers > n_frames and one for n_frames > n_centers. This tests # the latter. trj = md.load(get_fn('frame0.xtc'), top=get_fn('native.pdb')) center_frames = [0, int(len(trj) / 3), int(len(trj) / 2)] assigns, distances = util.assign_to_nearest_center(trj, trj[center_frames], md.rmsd) alldists = np.zeros((len(center_frames), len(trj))) for i, center_frame in enumerate(trj[center_frames]): alldists[i] = md.rmsd(trj, center_frame) assert_allclose(np.min(alldists, axis=0), distances, atol=1e-3) assert_array_equal(np.argmin(alldists, axis=0), assigns)
def calc_pcoord(refpath, toppath, mobpath, FORM): """ Calculate pcoord (RMSD) using MDTraj and save results to file specified in get_pcoord.sh/runseg.sh. Here the filename is rmsd.dat, but if you were calculating somebody else like a simple distance you could change the filename to distance.dat instead. Just make sure to change the filename both in this script and in get_pcoord.sh/runseg.sh. Parameters: refpath (str): path to initial state coordinate file. toppath (str): path to topology file. mobpath (str): path to trajectory file. FORM (str): indicates whether we're evaluating a basis/initial state or not. If we are evaluating an initial/basis state (ie. if the script is called from get_pcoord.sh) then FORM = 'RESTRT', and we check to make sure our pcoord is a numpy array with shape (1,). Otherwise, the pcoord is a numpy array with shape = (pcoord_len, pcoord_ndim) as specified in west.cfg. """ # Load the reference crystal and the trajectory # Use the load_netcdf() function so MDtraj knows it is a netcdf file. crystal = md.load_netcdf(refpath, top=toppath) traj = md.load_netcdf(mobpath, top=toppath) # Get a list of CA indices from the topology file. CA_indices = crystal.topology.select("name == CA") # Calculate the rmsd of the trajectory relative to the crystal, using only # the C-Alpha atoms for the calculation (we must specify this as there is # explicit solvent present in the simulation.) # The rmsd() function takes an optional third int argument which refers to # the frame in the reference to measure distances to. By default, the frame # is set to 0. A general form of the function is: # MDTraj.rmsd(target, reference, frame=0) which returns a numpy array rmsd = md.rmsd(traj, crystal, atom_indices=CA_indices) # Write RMSD to output file. if FORM == "RESTRT": # We only need the last value in the array. rmsd = numpy.array(rmsd[-1]) # WESTPA expects a 1x1 array, so we must correct the shape if needed. if rmsd.ndim == 0: rmsd.shape = (1,) numpy.savetxt("rmsd.dat", rmsd) else:
def find_best(target, reference, atom_indices=None): """ target and reference are both of type mdtraj.Trajectory reference is of length 1, target of arbitrary length returns a Structure object and the index within the trajectory Examples -------- >>> import chisurf.settings as mfm >>> times = times = mfm.TrajectoryFile('./test/data/structure/2807_8_9_b.h5', reading_routine='r', stride=1) >>> find_best(times.mdtraj, times.mdtraj[2]) (2, <mdtraj.Trajectory with 1 frames, 2495 atoms, 164 residues, without unitcells at 0x13570b30>) """ rmsds = md.rmsd(target, reference, atom_indices=atom_indices) iMin = np.argmin(rmsds) return iMin, target[iMin]
def sort_trajectory(traj): ''' Given a trajectory, find the frame with the minimum mean RMSD to all other frames in the trajectory, and sort all remaining frames by their distance to that frame Parameters ---------- traj : mdtraj.Trajectory Returns ------- sorted_traj : mdtraj.Trajectory ''' rmsds = [md.rmsd(traj, traj[i]) for i in range(len(traj))] min_mean_rmsd = np.argmin([np.mean(r) for r in rmsds]) return traj[np.argsort(rmsds[min_mean_rmsd])]
def ca_interfacial_rmsd_angstroms(traj, native, group1, group2, ca_cutoff_angstroms=10., verbose=True): native = native[0] # ensure only a single frame is passed res_group1, res_group2 = [np.array(sorted(set([native.topology.atom(i).residue.index for i in g]))) for g in (group1,group2)] contact_pairs = np.array([(i,j) for i in res_group1 for j in res_group2]) is_contact = (10.*md.compute_contacts(native,scheme='ca', contacts=contact_pairs)[0]<ca_cutoff_angstroms)[0] contacts = contact_pairs[is_contact] interface_residues = sorted(set(contacts[:,0]).union(set(contacts[:,1]))) if verbose: print '%i interface residues (%i,%i)' % ( len(interface_residues), len(set(contacts[:,0])), len(set(contacts[:,1]))) interface_atom_indices = np.array([a.index for a in native.topology.atoms if a.residue.index in interface_residues]) return 10.*md.rmsd(traj.atom_slice(interface_atom_indices), native.atom_slice(interface_atom_indices))
def test_two_refs_basic(): # This test uses the 0th and 1st frames of the 0th set of # adp trajectories as the two reference trajectories and # ensures that the rmsd of the 0th frame of the dataset with # the 0th reference are identical and the 1st frame of the # dataset with the 1st reference are identical. trajectories = AlanineDipeptide().get_cached().trajectories featurizer = RMSDFeaturizer(trajectories[0][0:2]) data = featurizer.transform(trajectories[0:1]) true_rmsd = np.zeros((trajectories[0].n_frames, 2)) for frame in range(2): true_rmsd[:, frame] = md.rmsd(trajectories[0], trajectories[0][frame]) np.testing.assert_almost_equal(data[0][0, 0], data[0][1, 1], decimal=3) np.testing.assert_almost_equal(data[0][1, 0], data[0][0, 1], decimal=3) np.testing.assert_array_almost_equal(data[0], true_rmsd, decimal=4)
def test_rmsd_kcenters_mpi_subsample(): TRJFILE = os.path.join(os.path.dirname(__file__), 'data', 'frame0.xtc') TOPFILE = os.path.join(os.path.dirname(__file__), 'data', 'native.pdb') SELECTION = '(name N or name C or name CA or name H or name O)' SUBSAMPLE_FACTOR = 3 expected_size = (5, np.ceil(501 / SUBSAMPLE_FACTOR)) with tempfile.TemporaryDirectory() as tdname: tdname = MPI.COMM_WORLD.bcast(tdname, root=0) for i in range(expected_size[0]): shutil.copy(TRJFILE, os.path.join(tdname, 'frame%s.xtc' % i)) with warnings.catch_warnings(): warnings.filterwarnings('ignore') a, d, inds, s = runhelper([ '--trajectories', os.path.join(tdname, 'frame?.xtc'), '--topology', TOPFILE, '--cluster-number', '4', '--subsample', str(SUBSAMPLE_FACTOR), '--atoms', SELECTION, '--algorithm', 'kcenters' ], expected_size=expected_size, expect_reassignment=False) trj = md.load(TRJFILE, top=TOPFILE) trj_sele = trj.atom_slice(trj.top.select(SELECTION)) result = kcenters.kcenters(trj_sele[::SUBSAMPLE_FACTOR], 'rmsd', n_clusters=4, mpi_mode=False) expected_indices = [ np.argmin(md.rmsd(trj_sele, c)) for c in result.centers ] assert_array_equal(expected_indices, inds[:, 1]) expected_s = md.join([trj[i[1]] for i in inds]) assert_array_equal(expected_s.xyz, md.join(s).xyz)
def ligands_rmsd_calculator(pdb_target, pdb_reference, resname="GRW"): """ :param pdb_target: problem pdb file :param pdb_reference: reference pdb file :param write2report: if True export results in a file :param write2pdb: pdb file with the result of the superposition between "pdb_target" and "pdb_reference" :param ligand_chain: name of the chain of the ligand :return: superpose the backbone of the pdb_target to the pdb_reference and computes the RMSD of the ligand """ # Reparation of PDB files if it is needed repair_pdbs(pdb_target) # Load the data to mdtraj target = mdtraj.load(pdb_target) reference = mdtraj.load(pdb_reference) # Get the indexes of heavy atoms of the ligands ligand = reference.topology.select('resname "{}"'.format(resname)) rmsd = mdtraj.rmsd(target, reference=reference, atom_indices=ligand) return rmsd
def rmsd_connector(traj, inactive, residues=[], residues_map=None): if residues_map is not None: residues = map_residues(residues_map, residues) nonsymmetric = ["CG2", "CG1", "CD1", "CD2", "CE1", "CE2"] connector_atoms = [] for residue in residues: connector_atoms += [ (a.index, str(a)) for a in traj.topology.atoms if residue.is_mdtraj_res_equivalent(a.residue) and "hydrogen" not in a.element and not any(substring in str(a) for substring in nonsymmetric) ] connector_atoms = sorted(connector_atoms, key=operator.itemgetter(1), reverse=True) connector_atoms = [a[0] for a in connector_atoms] traj_stripped = traj.atom_slice(connector_atoms) connector_atoms_target = [] for residue in residues: connector_atoms_target += [ (a.index, str(a)) for a in inactive.topology.atoms if residue.is_mdtraj_res_equivalent(a.residue) and "hydrogen" not in a.element and not any(substring in str(a) for substring in nonsymmetric) ] connector_atoms_target = sorted(connector_atoms_target, key=operator.itemgetter(1), reverse=True) connector_atoms_target = [a[0] for a in connector_atoms_target] inactive_stripped = inactive.atom_slice(connector_atoms_target) try: traj_stripped_aligned = traj_stripped.superpose(inactive_stripped) rmsds = md.rmsd(traj_stripped, inactive_stripped) * 10.0 except: rmsds = np.empty(traj_stripped.n_frames) return rmsds
def calculate_dist_matrix(t): #aij = half dist squared #t = md.load('output2.dcd', top = 'ala-dipeptide.pdb') t.center_coordinates() t.superpose(t) a = np.zeros((t.n_frames, t.n_frames)) print('calculating dist matrix s') sys.stdout.write("calculating dist matrix : ") sys.stdout.flush() for i in range(np.shape(a)[0]): #print('done',i) msg = "item %i of %i" % (i, t.n_frames) sys.stdout.write(msg + chr(8) * len(msg)) sys.stdout.flush() for j in range(i + 1): a[i][j] = md.rmsd(t[i], t[j]) ######## ||t1 - t2|| a[j][i] = a[i][j] sys.stdout.write(str(t.n_frames) + " DONE" + " " * len(msg) + "\n") sys.stdout.flush() return a # aij*episilon
def plot_rsmd(traj_list, fout=None): rmsd_list = [mdtraj.rmsd(traj, traj, 0) * 10 for traj in traj_list] ax, side_ax = msme.plot_trace(rmsd_list[0], ylabel='RMSD (Å)', xlabel='Time (ns)', label=args.Trajectories[0][:-3], **next(palette_cycled)) formatter = FuncFormatter(to_ns) ax.xaxis.set_major_formatter(formatter) if len(rmsd_list) > 1: for i, rmsd in enumerate(rmsd_list[1:]): msme.plot_trace(rmsd, ylabel='RMSD (Å)', xlabel='Time (ns)', ax=ax, side_ax=side_ax, label=args.Trajectories[i + 1][:-3], **next(palette_cycled)) if len(rmsd_list) > 5: ax.legend_.remove() sns.despine() f = plt.gcf() f.savefig(fout)
def plot_rmsd(args): if not args.figure_fl and not args.output_tsv: raise Exception("No output specified.") print "reading trajectory" traj = md.load(args.input_traj, top=args.pdb_file) print "aligning frames" if args.align_atom_select in ["all", "alpha", "minimal", "heavy", "water"]: align_atoms = traj.topology.select_atom_indices(args.align_atom_select) else: align_atoms = traj.topology.select(args.align_atom_select) traj.superpose(traj, atom_indices=align_atoms) print "computing RMSD" if args.rmsd_atom_select in ["all", "alpha", "minimal", "heavy", "water"]: rmsd_atoms = traj.topology.select_atom_indices(args.rmsd_atom_select) else: rmsd_atoms = traj.topology.select(args.rmsd_atom_select) rmsds = md.rmsd(traj, traj, atom_indices=rmsd_atoms, ref_atom_indices=rmsd_atoms) frame_time = np.arange(1, traj.n_frames + 1) * args.timestep if args.figure_fl: plt.clf() plt.plot(frame_time, rmsds) plt.xlabel("Time (ns)", fontsize=16) plt.ylabel("RMSD (nm)", fontsize=16) plt.xlim([frame_time[0], frame_time[-1]]) plt.grid() plt.savefig(args.figure_fl, DPI=300) if args.output_tsv: with open(args.output_tsv, "w") as fl: fl.write("Time (ns)\tRMSD (nm)\n") for t, r in zip(frame_time, rmsds): fl.write("%s\t%s\n" % (t, r))
def rmsd_traj(trajfiles, topfile=None, **kwargs): # TODO look at timing of with/without precentering and eliminate # option if it doesn't decrease the calculation time ''' RMSD of frames in trajfile to reference structure in topfile. They (currently) need to have identical topology as topfile is both the reference structure and topology source. The logic here requiresd that `topfile` is a PDB filepath, not trajectory. Notes: - `precenter` to this function is an operation to be done, which will result in `precentered` to the MDTraj function as True. - centering and RMSD calculation, not loading, timed if `timeit=True` - see MDTraj notes on centering http://mdtraj.org/development/api/generated/mdtraj.rmsd.html?highlight=precentered# ''' ref = mdtraj.load(topfile) traj = md_traj(trajfiles, topfile) kwargs_rmsd = prep_rmsd_calc(traj, **kwargs) rmsds = mdtraj.rmsd(traj, ref, **kwargs_rmsd) return rmsds
def clustering(traj, maxClusters, thresh_R): distances = np.empty((traj.n_frames, traj.n_frames)) for i in range(traj.n_frames): for j in range(traj.n_frames): if md.rmsd(traj[i], traj[j], 0) <= 0.001: distances[i, j] = 0.0 else: distances[i, j] = km.kuhn_munkres(traj[i], traj[j]) mean_rmsd = np.mean(distances[np.nonzero(distances)]) min_rmsd = np.min(distances[np.nonzero(distances)]) max_rmsd = np.max(distances[np.nonzero(distances)]) print('Min pairwise rmsd: %f nm' % np.min(min_rmsd)) print('Max pairwise rmsd: %f nm' % np.max(max_rmsd)) print('Mean pairwise rmsd: %f nm' % np.max(mean_rmsd)) reduced_distances = squareform(distances, checks=False) linkage = scipy.cluster.hierarchy.linkage(reduced_distances, method='average') flat_cluster = scipy.cluster.hierarchy.fcluster(linkage, thresh_R, criterion='distance') return flat_cluster
def make_clusters(native, traj): distances = compute_rmsd_matrix(traj) plot_k_dist(distances) #clustering set up clusterer = hdbscan.HDBSCAN(min_cluster_size=MIN_SAMPLES) cluster_indices = clusterer.fit_predict(distances) min_index = 0 max_index = np.max(cluster_indices) + 1 #clustering clusters = [ traj[np.where(cluster_indices == index)] for index in range(min_index, max_index) ] clusters = sorted(clusters, key=lambda x: x.n_frames, reverse=True) #now add the unclustered frames to last cluster clusters.append(traj[np.where(cluster_indices == -1)]) cluster_sizes = [c.n_frames for c in clusters] total_frames = traj.n_frames print('Found {} total clusters.'.format(len(clusters))) #calculates important values and outputs to files for i, c in enumerate(clusters): rmsds_to_native = md.rmsd(c, native) * 10 mean = np.mean(rmsds_to_native) median = np.median(rmsds_to_native) min_ = np.min(rmsds_to_native) max_ = np.max(rmsds_to_native) std_ = np.std(rmsds_to_native) np.savetxt("clusters_0" + str(i) + ".dat", rmsds_to_native, fmt="%f") print( 'Cluster {:02d} has population {:.1f}; RMSD: {:.2f} {:.2f} {:.2f} {:.2f} {:.2f}' .format(i, 100 * cluster_sizes[i] / float(total_frames), mean, median, min_, max_, std_)) c.save('cluster_{:02d}.pdb'.format(i))
def calculate_RMSD(self, ref_struct=None, atom_desc=None, save=True): """ Calculates RMSD from the reference of ref_struct Parameters: ref_struct: The reference structure file (default None: Uses the structure file from the first trajectory) atom_desc: Description of atom indices (default None) save: Saves data to RMSD_data.npy if save is True (default True) Returns: RMSD_list: List of all RMSD values calculated for the gen """ gro_ref = md.load(self.gro_file) indice_pairs = self.get_indices(atom_desc) print('Calculating RMSD...') RMSD = md.rmsd(self.traj, gro_ref) if save: load_and_save(self, RMSD) print('Saved RMSD to index [%d][%d][%d]' % (self.run_num, self.clone_num, self.gen_num)) return RMSD
def RMSD1to1(traj1, traj2): """ The function calculate rmsd for each pair of frames traj1 and traj2 must have the same number of frames. traj1 is a trajectory for which rmsd is calculated. traj2 is a reference trajectory. The function returns 1D numpy array, which contains one real rmsd value for each frame """ traj1_nframes = traj1.n_frames traj2_nframes = traj2.n_frames print("trajectory 1 contains %i frames." % traj1_nframes) print("trajectory 2 contains %i frames." % traj2_nframes) if traj1_nframes != traj2_nframes: raise IOError("Input trajectories contain different number of frames") rmsd = np.empty(traj1_nframes) for i in range(0, traj1_nframes): rmsd[i] = md.rmsd(traj2[i], traj1[i]) return (rmsd)