def test_list_of_masks(self): traj = self.traj.copy() mask = ['@CA', '@CB', ':3-18@CA,C'] arr = pt.rmsd(traj, mask=mask) for idx, m in enumerate(mask): aa_eq(arr[idx], pt.rmsd(traj, mask=m)) aa_eq(arr[idx], pt.rmsd(traj, mask=traj.top.select(m))) mask = ['@CA', '@CB', ':3-18@CA,C', [0, 3, 5]] self.assertRaises(ValueError, lambda: pt.rmsd(traj, mask=mask)) mask_2 = [[0, 3, 6], range(50)] aa_eq(pt.rmsd(traj, mask=mask_2)[0], pt.rmsd(traj, mask=mask_2[0])) aa_eq(pt.rmsd(traj, mask=mask_2)[1], pt.rmsd(traj, mask=mask_2[1])) ca = pt.select('@CA', traj.top) cb = pt.select('@CB', traj.top) aa_eq(pt.rmsd(traj, mask=ca), pt.rmsd(traj, mask=[ca, cb])[0]) aa_eq(pt.rmsd(traj, mask=cb), pt.rmsd(traj, mask=[ca, cb])[1])
def test_nativecontacts(self): traj = pt.iterload(fn('Tc5b.x'), fn('Tc5b.top')) dslist = pt.native_contacts(traj, top=traj.top) cpp = np.loadtxt( fn('tc5b.native_contacts.dat'), skiprows=1, usecols=(1, 2)).T aa_eq(dslist.values, cpp) # mask2 cb_indices = pt.select('@CB', traj.top) dslist2 = pt.native_contacts(traj, mask='@CA', mask2='@CB') dslist3 = pt.native_contacts(traj, mask='@CA', mask2=cb_indices) aa_eq(dslist2.values, dslist3.values)
def test_comprehensive(self): traj = pt.iterload(fn('Tc5b.x'), fn('Tc5b.top')) # make sure we DO reproducing cpptraj output f_saved = pt.iterload(fn("avg.Tc5b.pdb"), traj.top)[0] # shorter frame2 = mean_structure(traj) aa_eq(frame2.xyz, f_saved.xyz, decimal=3) frame3 = mean_structure(traj=traj) aa_eq(frame3.xyz, f_saved.xyz, decimal=3) # test list frame4 = mean_structure(traj=[traj, traj[:3]], top=traj.top) # test iter frame5 = mean_structure(traj=traj(1, 8, 2), top=traj.top) f5_saved = pt.iterload(fn("avg.Tc5b.frame_2_to_8_skip_2.pdb"), traj.top)[0] aa_eq(frame5.xyz, f5_saved.xyz, decimal=3) # test iter CA frame5 = mean_structure(traj[[0, 3, 7]], '@CA', top=traj.top) # use atom_indices ca_indices = pt.select('@CA', traj.top) frame5_1 = mean_structure(traj[[0, 3, 7]], ca_indices, top=traj.top) # test frame_indices frame6 = mean_structure(traj, mask='@CA', frame_indices=[0, 3, 7]) aa_eq(frame5.xyz, frame6.xyz, decimal=3) aa_eq(frame5_1.xyz, frame6.xyz, decimal=3) xyz_0 = pt.get_coordinates(traj(1, 8, 2)) xyz_1 = np.array([ frame.xyz.copy() for frame in traj.iterframe(frame_indices=range(1, 8, 2)) ]) aa_eq(xyz_0, xyz_1, decimal=3) # test as traj out_traj = mean_structure(traj, mask='@CA', frame_indices=[0, 3, 7], dtype='traj') assert isinstance(out_traj, Trajectory), 'must be Trajectory' aa_eq(out_traj.xyz, frame6.xyz, decimal=3) # raise if not trajectory, traj or frame self.assertRaises(ValueError, lambda: pt.mean_structure(traj, dtype='trajxyz'))
def test_nativecontacts(self): traj = pt.iterload("./data/Tc5b.x", "./data/Tc5b.top") dslist = pt.native_contacts(traj, top=traj.top) cpp = np.loadtxt('data/tc5b.native_contacts.dat', skiprows=1, usecols=(1, 2)).T aa_eq(dslist.values, cpp) # mask2 cb_indices = pt.select('@CB', traj.top) dslist2 = pt.native_contacts(traj, mask='@CA', mask2='@CB') dslist3 = pt.native_contacts(traj, mask='@CA', mask2=cb_indices) aa_eq(dslist2.values, dslist3.values)
def test_comprehensive(self): traj = pt.iterload("./data/Tc5b.x", "./data/Tc5b.top") # make sure we DO reproducing cpptraj output f_saved = pt.iterload("./data/avg.Tc5b.pdb", traj.top)[0] # shorter frame2 = mean_structure(traj) aa_eq(frame2.xyz, f_saved.xyz, decimal=3) frame3 = mean_structure(traj=traj) aa_eq(frame3.xyz, f_saved.xyz, decimal=3) # test list frame4 = mean_structure(traj=[traj, traj[:3]], top=traj.top) # test iter frame5 = mean_structure(traj=traj(1, 8, 2), top=traj.top) f5_saved = pt.iterload("./data/avg.Tc5b.frame_2_to_8_skip_2.pdb", traj.top)[0] aa_eq(frame5.xyz, f5_saved.xyz, decimal=3) # test iter CA frame5 = mean_structure(traj[[0, 3, 7]], '@CA', top=traj.top) # use atom_indices ca_indices = pt.select('@CA', traj.top) frame5_1 = mean_structure(traj[[0, 3, 7]], ca_indices, top=traj.top) # test frame_indices frame6 = mean_structure(traj, mask='@CA', frame_indices=[0, 3, 7]) aa_eq(frame5.xyz, frame6.xyz, decimal=3) aa_eq(frame5_1.xyz, frame6.xyz, decimal=3) xyz_0 = pt.get_coordinates(traj(1, 8, 2)) xyz_1 = np.array([frame.xyz.copy( ) for frame in traj.iterframe(frame_indices=range(1, 8, 2))]) aa_eq(xyz_0, xyz_1, decimal=3) # test as traj out_traj = mean_structure(traj, mask='@CA', frame_indices=[0, 3, 7], dtype='traj') assert isinstance(out_traj, Trajectory), 'must be Trajectory' aa_eq(out_traj.xyz, frame6.xyz, decimal=3) # raise if not trajectory, traj or frame self.assertRaises(ValueError, lambda: pt.mean_structure(traj, dtype='trajxyz'))
def test_rdf(self): traj = pt.iterload("./data/tz2.truncoct.nc", "./data/tz2.truncoct.parm7", frame_slice=(0, 10)) command = ''' radial output/Radial.agr 0.5 10.0 :5@CD :WAT@O radial output/cRadial.agr 0.5 10.0 :5 :WAT@O center1 radial output/cRadial.agr 0.5 10.0 :5 :WAT@O center2 radial output/cRadial.agr 0.5 20.0 :3 :WAT@O radial output/cRadial.agr 0.5 20.0 :3 :WAT@O noimage radial output/radial.dat 0.5 10.0 :5@CD :WAT@O radial output/radial2.dat 0.25 10.0 :5@CD :WAT@O ''' # get data directly from cpptraj state = pt.load_batch(traj, command) state.run() # get data from pytraj data0 = pt.rdf(traj, solvent_mask=':WAT@O', bin_spacing=0.5, maximum=10.0, solute_mask=':5@CD') data01 = pt.rdf(traj, solvent_mask=':WAT@O', bin_spacing=0.5, maximum=10.0, solute_mask=':5@CD') data1 = pt.rdf(traj, solvent_mask=':WAT@O', bin_spacing=0.5, maximum=10.0, center_solvent=True, solute_mask=':5') data2 = pt.rdf(traj, solvent_mask=':WAT@O', bin_spacing=0.5, maximum=10.0, center_solute=True, solute_mask=':5') data3 = pt.rdf(traj, solvent_mask=':WAT@O', bin_spacing=0.5, maximum=20.0, center_solute=False, solute_mask=':3') data4 = pt.rdf(traj, solvent_mask=':WAT@O', bin_spacing=0.5, maximum=20.0, center_solute=False, image=False, solute_mask=':3') data5 = pt.rdf(traj, solvent_mask=':WAT@O', bin_spacing=0.25, maximum=10.0, solute_mask=':5@CD') # solvent_mask is array solvent_indices = pt.select(':WAT@O', traj.top) data6 = pt.rdf(traj, solvent_mask=solvent_indices, bin_spacing=0.25, maximum=10.0, solute_mask=':5@CD') # do assertion aa_eq(data0[1], state.data[1], decimal=7) aa_eq(data1[1], state.data[2], decimal=7) aa_eq(data2[1], state.data[3], decimal=7) aa_eq(data3[1], state.data[4], decimal=7) aa_eq(data4[1], state.data[5], decimal=7) # default solvent mask :WAT@O aa_eq(data01[1], state.data[1], decimal=7) steps = np.loadtxt('output/radial.dat').T[0] aa_eq(data0[0], steps) steps2 = np.loadtxt('output/radial2.dat').T[0] aa_eq(data5[0], steps2) aa_eq(data6[0], steps2)
def main(): traj = pt.load(args.traj, args.parm) rnd_iter = args.riter rnd_vecs = args.evec pairs = list() if args.mask_proj == None: args.mask_proj = args.mask print "Mask : ", args.mask print "Mask proj: ", args.mask_proj if rnd_vecs < 1: rnd_vecs = 3 * traj[args.mask].xyz.shape[1] - 6 #make pairs for n_i in range(rnd_vecs): for n_j in range(rnd_vecs): if n_i < n_j: pairs.append((n_i, n_j)) sele = pt.select(traj.top, args.mask) sele_txt = "" for s_i, s in enumerate(sele): sele_txt += "%d %s\n" % (s_i, traj.top.atomlist[s]) o = open("%s_sele.dat" % args.prefix, "w") o.write(sele_txt) o.close() n_vecs = rnd_vecs pca_data, eigen = pt.pca(traj[args.start:], mask=args.mask, n_vecs=n_vecs) eigen_val = eigen[0] eigen_vec = eigen[1] np.savetxt("%s_eigen_vec.dat" % args.prefix, np.c_[eigen_vec[0], eigen_vec[1], eigen_vec[2]]) np.savetxt("%s_pcadata.dat" % args.prefix, pca_data.T) #h = hist(pca_data[0], pca_data[1]) #h.plot2d(xlab="PC1 [$\AA$]", ylab="PC2 [$\AA$]", title="PCA", name=args.out) # Plot PCA for pc_i, pc_j in pairs: plt.scatter(pca_data[pc_i], pca_data[pc_j], marker='o', c="r", alpha=0.5) plt.xlabel("PC%d [$\AA$]" % pc_i) plt.ylabel("PC%d [$\AA$]" % pc_j) plt.title("PCA PC%d vs. PC%d" % (pc_i, pc_j)) plt.savefig("PC%d-vs-PC%s_%s.png" % (pc_i, pc_j, args.prefix)) plt.close('all') # Plot atom contritbuion for pc_i in range(3): l = eigen_vec[pc_i].shape[0] c = np.linalg.norm(eigen_vec[pc_i].reshape((l / 3, 3)), axis=1) a = np.arange(l / 3) + 1 plt.plot(a, c, label="PC%s" % pc_i, alpha=0.5) plt.legend() plt.xlim(0, l / 3 + 1) plt.xlabel("Atom ID") plt.ylabel("Eigenvector components") plt.title("Eigenvectors") plt.savefig("Eigenvectors_%s.png" % args.prefix) plt.close('all') total_var = np.sum(eigen_val) plt.scatter(range(1, n_vecs + 1), (np.cumsum(eigen_val) / total_var) * 100, label="Cumulative Variance") plt.plot(range(1, n_vecs + 1), (eigen_val / total_var) * 100, "g--", label="Variance") plt.legend() #plt.xticks(range(1, n_vecs+1, 2)) plt.xlabel("Eigenvector #") plt.ylabel("Variance explained [%]") plt.title("Variance explained by PC Eigenvectors") plt.savefig("Variance_%s.png" % args.prefix, dpi=1000) plt.close('all') if args.traj_proj != None and args.parm_proj != None: traj_proj = pt.load(args.traj_proj, args.parm_proj) pt.rmsd(traj_proj, mask=args.mask_proj) #avg_proj = pt.mean_structure(traj_proj, mask=args.mask) #pt.rmsd(traj_proj, mask=args.mask, ref=avg_proj) projection_data = pt.projection(traj_proj[args.start_proj:], args.mask_proj, eigenvalues=eigen_val,\ eigenvectors=eigen_vec,\ scalar_type='covar') np.savetxt("%s_pcadata_proj.dat" % args.prefix, projection_data.T) #h = hist(projection_data[0], projection_data[1]) #h.plot2d(xlab="PC1 [$\AA$]", ylab="PC2 [$\AA$]", title="PCA projection", name=args.out_proj) for pc_i, pc_j in pairs: plt.scatter(pca_data[pc_i], pca_data[pc_j], marker='o', c="r", alpha=0.5) plt.scatter(projection_data[pc_i], projection_data[pc_j], marker='o', c="g", alpha=0.5) plt.xlabel("PC%d [$\AA$]" % pc_i) plt.ylabel("PC%d [$\AA$]" % pc_j) plt.title("PCA PC%d vs. PC%d with projection" % (pc_i, pc_j)) plt.savefig("PC%d-vs-PC%s_%s_projection.png" % (pc_i, pc_j, args.prefix)) plt.close('all') plt.scatter(projection_data[pc_i], projection_data[pc_j], marker='o', c="g", alpha=0.5) plt.xlabel("PC%d [$\AA$]" % pc_i) plt.ylabel("PC%d [$\AA$]" % pc_j) plt.title("PCA PC%d vs. PC%d only projection" % (pc_i, pc_j)) plt.savefig("PC%d-vs-PC%d_%s_only_projection.png" % (pc_i, pc_j, args.prefix)) plt.close('all') pca_data_2, eigen_2 = pt.pca(traj_proj[args.start_proj:], mask=args.mask_proj, n_vecs=n_vecs) eigen_val_2 = eigen_2[0] eigen_vec_2 = eigen_2[1] overlap = 0 for pc_i in range(rnd_vecs): for pc_j in range(rnd_vecs): overlap += (np.dot(eigen_vec[pc_i], eigen_vec_2[pc_j]) / (np.linalg.norm(eigen_vec[pc_i]) * np.linalg.norm(eigen_vec_2[pc_j])))**2 overlap /= rnd_vecs print "Vector space spanned by traj-1 overlap with traj-2 subspace (%d vecs): %6.3f" % ( rnd_vecs, overlap) if args.zscore != None: overlap_rnd = np.zeros(rnd_iter) for r in range(rnd_iter): ### make random traj t1_rnd = traj for f in range(t1_rnd.xyz[args.start:].shape[0]): idxs = np.arange(t1_rnd.xyz[args.start + f, ].shape[0]) sele = np.random.permutation(idxs) t1_rnd[f] = t1_rnd.xyz[args.start + f, ][sele] pca_t1_rnd, eigen_t1_rnd = pt.pca(t1_rnd[args.start:], mask=args.mask, n_vecs=n_vecs) eigen_vec_1_rnd = eigen_t1_rnd[1] for pc_i in range(n_vecs): for pc_j in range(n_vecs): overlap_rnd[r] += ( np.dot(eigen_vec[pc_i], eigen_vec_1_rnd[pc_j]) / (np.linalg.norm(eigen_vec[pc_i]) * np.linalg.norm(eigen_vec_1_rnd[pc_j])))**2 overlap_rnd[r] /= n_vecs z_score = (overlap - np.mean(overlap_rnd)) / np.std(overlap_rnd) print "Z-score : %6.3f" % z_score
import pytraj as pt traj = pt.iterload("../tests/data/Tc5b.x", "../tests/data/Tc5b.top") top = traj.top # get indices of CA atoms print(pt.select("@CA", top)) # see how many H atoms print(pt.select("@H=", top))