예제 #1
0
    def test_list_of_masks(self):
        traj = self.traj.copy()
        mask = ['@CA', '@CB', ':3-18@CA,C']
        arr = pt.rmsd(traj, mask=mask)
        for idx, m in enumerate(mask):
            aa_eq(arr[idx], pt.rmsd(traj, mask=m))
            aa_eq(arr[idx], pt.rmsd(traj, mask=traj.top.select(m)))

        mask = ['@CA', '@CB', ':3-18@CA,C', [0, 3, 5]]
        self.assertRaises(ValueError, lambda: pt.rmsd(traj, mask=mask))

        mask_2 = [[0, 3, 6], range(50)]
        aa_eq(pt.rmsd(traj, mask=mask_2)[0], pt.rmsd(traj, mask=mask_2[0]))
        aa_eq(pt.rmsd(traj, mask=mask_2)[1], pt.rmsd(traj, mask=mask_2[1]))

        ca = pt.select('@CA', traj.top)
        cb = pt.select('@CB', traj.top)
        aa_eq(pt.rmsd(traj, mask=ca), pt.rmsd(traj, mask=[ca, cb])[0])
        aa_eq(pt.rmsd(traj, mask=cb), pt.rmsd(traj, mask=[ca, cb])[1])
예제 #2
0
    def test_list_of_masks(self):
        traj = self.traj.copy()
        mask = ['@CA', '@CB', ':3-18@CA,C']
        arr = pt.rmsd(traj, mask=mask)
        for idx, m in enumerate(mask):
            aa_eq(arr[idx], pt.rmsd(traj, mask=m))
            aa_eq(arr[idx], pt.rmsd(traj, mask=traj.top.select(m)))

        mask = ['@CA', '@CB', ':3-18@CA,C', [0, 3, 5]]
        self.assertRaises(ValueError, lambda: pt.rmsd(traj, mask=mask))

        mask_2 = [[0, 3, 6], range(50)]
        aa_eq(pt.rmsd(traj, mask=mask_2)[0], pt.rmsd(traj, mask=mask_2[0]))
        aa_eq(pt.rmsd(traj, mask=mask_2)[1], pt.rmsd(traj, mask=mask_2[1]))

        ca = pt.select('@CA', traj.top)
        cb = pt.select('@CB', traj.top)
        aa_eq(pt.rmsd(traj, mask=ca), pt.rmsd(traj, mask=[ca, cb])[0])
        aa_eq(pt.rmsd(traj, mask=cb), pt.rmsd(traj, mask=[ca, cb])[1])
    def test_nativecontacts(self):
        traj = pt.iterload(fn('Tc5b.x'), fn('Tc5b.top'))

        dslist = pt.native_contacts(traj, top=traj.top)
        cpp = np.loadtxt(
            fn('tc5b.native_contacts.dat'), skiprows=1, usecols=(1, 2)).T
        aa_eq(dslist.values, cpp)

        # mask2
        cb_indices = pt.select('@CB', traj.top)
        dslist2 = pt.native_contacts(traj, mask='@CA', mask2='@CB')
        dslist3 = pt.native_contacts(traj, mask='@CA', mask2=cb_indices)
        aa_eq(dslist2.values, dslist3.values)
예제 #4
0
    def test_comprehensive(self):
        traj = pt.iterload(fn('Tc5b.x'), fn('Tc5b.top'))
        # make sure we DO reproducing cpptraj output
        f_saved = pt.iterload(fn("avg.Tc5b.pdb"), traj.top)[0]

        # shorter
        frame2 = mean_structure(traj)
        aa_eq(frame2.xyz, f_saved.xyz, decimal=3)

        frame3 = mean_structure(traj=traj)
        aa_eq(frame3.xyz, f_saved.xyz, decimal=3)

        # test list
        frame4 = mean_structure(traj=[traj, traj[:3]], top=traj.top)

        # test iter
        frame5 = mean_structure(traj=traj(1, 8, 2), top=traj.top)
        f5_saved = pt.iterload(fn("avg.Tc5b.frame_2_to_8_skip_2.pdb"),
                               traj.top)[0]
        aa_eq(frame5.xyz, f5_saved.xyz, decimal=3)

        # test iter CA
        frame5 = mean_structure(traj[[0, 3, 7]], '@CA', top=traj.top)

        # use atom_indices
        ca_indices = pt.select('@CA', traj.top)
        frame5_1 = mean_structure(traj[[0, 3, 7]], ca_indices, top=traj.top)

        # test frame_indices
        frame6 = mean_structure(traj, mask='@CA', frame_indices=[0, 3, 7])
        aa_eq(frame5.xyz, frame6.xyz, decimal=3)
        aa_eq(frame5_1.xyz, frame6.xyz, decimal=3)

        xyz_0 = pt.get_coordinates(traj(1, 8, 2))
        xyz_1 = np.array([
            frame.xyz.copy()
            for frame in traj.iterframe(frame_indices=range(1, 8, 2))
        ])
        aa_eq(xyz_0, xyz_1, decimal=3)

        # test as traj
        out_traj = mean_structure(traj,
                                  mask='@CA',
                                  frame_indices=[0, 3, 7],
                                  dtype='traj')
        assert isinstance(out_traj, Trajectory), 'must be Trajectory'
        aa_eq(out_traj.xyz, frame6.xyz, decimal=3)

        # raise if not trajectory, traj or frame
        self.assertRaises(ValueError,
                          lambda: pt.mean_structure(traj, dtype='trajxyz'))
예제 #5
0
    def test_nativecontacts(self):
        traj = pt.iterload("./data/Tc5b.x", "./data/Tc5b.top")

        dslist = pt.native_contacts(traj, top=traj.top)
        cpp = np.loadtxt('data/tc5b.native_contacts.dat',
                         skiprows=1,
                         usecols=(1, 2)).T
        aa_eq(dslist.values, cpp)

        # mask2
        cb_indices = pt.select('@CB', traj.top)
        dslist2 = pt.native_contacts(traj, mask='@CA', mask2='@CB')
        dslist3 = pt.native_contacts(traj, mask='@CA', mask2=cb_indices)
        aa_eq(dslist2.values, dslist3.values)
예제 #6
0
    def test_comprehensive(self):
        traj = pt.iterload("./data/Tc5b.x", "./data/Tc5b.top")
        # make sure we DO reproducing cpptraj output
        f_saved = pt.iterload("./data/avg.Tc5b.pdb", traj.top)[0]

        # shorter
        frame2 = mean_structure(traj)
        aa_eq(frame2.xyz, f_saved.xyz, decimal=3)

        frame3 = mean_structure(traj=traj)
        aa_eq(frame3.xyz, f_saved.xyz, decimal=3)

        # test list
        frame4 = mean_structure(traj=[traj, traj[:3]], top=traj.top)

        # test iter
        frame5 = mean_structure(traj=traj(1, 8, 2), top=traj.top)
        f5_saved = pt.iterload("./data/avg.Tc5b.frame_2_to_8_skip_2.pdb",
                               traj.top)[0]
        aa_eq(frame5.xyz, f5_saved.xyz, decimal=3)

        # test iter CA
        frame5 = mean_structure(traj[[0, 3, 7]], '@CA', top=traj.top)

        # use atom_indices
        ca_indices = pt.select('@CA', traj.top)
        frame5_1 = mean_structure(traj[[0, 3, 7]], ca_indices, top=traj.top)

        # test frame_indices
        frame6 = mean_structure(traj, mask='@CA', frame_indices=[0, 3, 7])
        aa_eq(frame5.xyz, frame6.xyz, decimal=3)
        aa_eq(frame5_1.xyz, frame6.xyz, decimal=3)

        xyz_0 = pt.get_coordinates(traj(1, 8, 2))
        xyz_1 = np.array([frame.xyz.copy(
        ) for frame in traj.iterframe(frame_indices=range(1, 8, 2))])
        aa_eq(xyz_0, xyz_1, decimal=3)

        # test as traj
        out_traj = mean_structure(traj,
                                  mask='@CA',
                                  frame_indices=[0, 3, 7],
                                  dtype='traj')
        assert isinstance(out_traj, Trajectory), 'must be Trajectory'
        aa_eq(out_traj.xyz, frame6.xyz, decimal=3)

        # raise if not trajectory, traj or frame
        self.assertRaises(ValueError, lambda: pt.mean_structure(traj, dtype='trajxyz'))
예제 #7
0
    def test_rdf(self):
        traj = pt.iterload("./data/tz2.truncoct.nc",
                           "./data/tz2.truncoct.parm7",
                           frame_slice=(0, 10))

        command = '''
        radial output/Radial.agr 0.5 10.0 :5@CD :WAT@O
        radial output/cRadial.agr 0.5 10.0 :5 :WAT@O center1
        radial output/cRadial.agr 0.5 10.0 :5 :WAT@O center2
        radial output/cRadial.agr 0.5 20.0 :3 :WAT@O
        radial output/cRadial.agr 0.5 20.0 :3 :WAT@O noimage
        radial output/radial.dat 0.5 10.0 :5@CD :WAT@O
        radial output/radial2.dat 0.25 10.0 :5@CD :WAT@O
        '''

        # get data directly from cpptraj
        state = pt.load_batch(traj, command)
        state.run()

        # get data from pytraj
        data0 = pt.rdf(traj,
                       solvent_mask=':WAT@O',
                       bin_spacing=0.5,
                       maximum=10.0,
                       solute_mask=':5@CD')

        data01 = pt.rdf(traj,
                        solvent_mask=':WAT@O',
                        bin_spacing=0.5,
                        maximum=10.0,
                        solute_mask=':5@CD')

        data1 = pt.rdf(traj,
                       solvent_mask=':WAT@O',
                       bin_spacing=0.5,
                       maximum=10.0,
                       center_solvent=True,
                       solute_mask=':5')

        data2 = pt.rdf(traj,
                       solvent_mask=':WAT@O',
                       bin_spacing=0.5,
                       maximum=10.0,
                       center_solute=True,
                       solute_mask=':5')

        data3 = pt.rdf(traj,
                       solvent_mask=':WAT@O',
                       bin_spacing=0.5,
                       maximum=20.0,
                       center_solute=False,
                       solute_mask=':3')

        data4 = pt.rdf(traj,
                       solvent_mask=':WAT@O',
                       bin_spacing=0.5,
                       maximum=20.0,
                       center_solute=False,
                       image=False,
                       solute_mask=':3')

        data5 = pt.rdf(traj,
                       solvent_mask=':WAT@O',
                       bin_spacing=0.25,
                       maximum=10.0,
                       solute_mask=':5@CD')

        # solvent_mask is array
        solvent_indices = pt.select(':WAT@O', traj.top)
        data6 = pt.rdf(traj,
                       solvent_mask=solvent_indices,
                       bin_spacing=0.25,
                       maximum=10.0,
                       solute_mask=':5@CD')

        # do assertion
        aa_eq(data0[1], state.data[1], decimal=7)
        aa_eq(data1[1], state.data[2], decimal=7)
        aa_eq(data2[1], state.data[3], decimal=7)
        aa_eq(data3[1], state.data[4], decimal=7)
        aa_eq(data4[1], state.data[5], decimal=7)

        # default solvent mask :WAT@O
        aa_eq(data01[1], state.data[1], decimal=7)
        steps = np.loadtxt('output/radial.dat').T[0]
        aa_eq(data0[0], steps)

        steps2 = np.loadtxt('output/radial2.dat').T[0]
        aa_eq(data5[0], steps2)
        aa_eq(data6[0], steps2)
예제 #8
0
def main():

    traj = pt.load(args.traj, args.parm)

    rnd_iter = args.riter
    rnd_vecs = args.evec
    pairs = list()

    if args.mask_proj == None:
        args.mask_proj = args.mask

    print "Mask     : ", args.mask
    print "Mask proj: ", args.mask_proj

    if rnd_vecs < 1:

        rnd_vecs = 3 * traj[args.mask].xyz.shape[1] - 6

    #make pairs
    for n_i in range(rnd_vecs):
        for n_j in range(rnd_vecs):

            if n_i < n_j:

                pairs.append((n_i, n_j))

    sele = pt.select(traj.top, args.mask)

    sele_txt = ""

    for s_i, s in enumerate(sele):

        sele_txt += "%d %s\n" % (s_i, traj.top.atomlist[s])

    o = open("%s_sele.dat" % args.prefix, "w")
    o.write(sele_txt)
    o.close()

    n_vecs = rnd_vecs
    pca_data, eigen = pt.pca(traj[args.start:], mask=args.mask, n_vecs=n_vecs)
    eigen_val = eigen[0]
    eigen_vec = eigen[1]
    np.savetxt("%s_eigen_vec.dat" % args.prefix,
               np.c_[eigen_vec[0], eigen_vec[1], eigen_vec[2]])
    np.savetxt("%s_pcadata.dat" % args.prefix, pca_data.T)

    #h        = hist(pca_data[0], pca_data[1])
    #h.plot2d(xlab="PC1 [$\AA$]", ylab="PC2 [$\AA$]", title="PCA", name=args.out)

    # Plot PCA
    for pc_i, pc_j in pairs:

        plt.scatter(pca_data[pc_i],
                    pca_data[pc_j],
                    marker='o',
                    c="r",
                    alpha=0.5)
        plt.xlabel("PC%d [$\AA$]" % pc_i)
        plt.ylabel("PC%d [$\AA$]" % pc_j)
        plt.title("PCA PC%d vs. PC%d" % (pc_i, pc_j))
        plt.savefig("PC%d-vs-PC%s_%s.png" % (pc_i, pc_j, args.prefix))
        plt.close('all')

    # Plot atom contritbuion
    for pc_i in range(3):

        l = eigen_vec[pc_i].shape[0]
        c = np.linalg.norm(eigen_vec[pc_i].reshape((l / 3, 3)), axis=1)
        a = np.arange(l / 3) + 1
        plt.plot(a, c, label="PC%s" % pc_i, alpha=0.5)
        plt.legend()

    plt.xlim(0, l / 3 + 1)
    plt.xlabel("Atom ID")
    plt.ylabel("Eigenvector components")
    plt.title("Eigenvectors")
    plt.savefig("Eigenvectors_%s.png" % args.prefix)
    plt.close('all')

    total_var = np.sum(eigen_val)

    plt.scatter(range(1, n_vecs + 1), (np.cumsum(eigen_val) / total_var) * 100,
                label="Cumulative Variance")
    plt.plot(range(1, n_vecs + 1), (eigen_val / total_var) * 100,
             "g--",
             label="Variance")
    plt.legend()
    #plt.xticks(range(1, n_vecs+1, 2))
    plt.xlabel("Eigenvector #")
    plt.ylabel("Variance explained [%]")
    plt.title("Variance explained by PC Eigenvectors")
    plt.savefig("Variance_%s.png" % args.prefix, dpi=1000)
    plt.close('all')

    if args.traj_proj != None and args.parm_proj != None:

        traj_proj = pt.load(args.traj_proj, args.parm_proj)
        pt.rmsd(traj_proj, mask=args.mask_proj)
        #avg_proj        = pt.mean_structure(traj_proj, mask=args.mask)
        #pt.rmsd(traj_proj, mask=args.mask, ref=avg_proj)
        projection_data = pt.projection(traj_proj[args.start_proj:], args.mask_proj, eigenvalues=eigen_val,\
                                                                                           eigenvectors=eigen_vec,\
                                                                                           scalar_type='covar')
        np.savetxt("%s_pcadata_proj.dat" % args.prefix, projection_data.T)

        #h = hist(projection_data[0], projection_data[1])
        #h.plot2d(xlab="PC1 [$\AA$]", ylab="PC2 [$\AA$]", title="PCA projection", name=args.out_proj)
        for pc_i, pc_j in pairs:

            plt.scatter(pca_data[pc_i],
                        pca_data[pc_j],
                        marker='o',
                        c="r",
                        alpha=0.5)
            plt.scatter(projection_data[pc_i],
                        projection_data[pc_j],
                        marker='o',
                        c="g",
                        alpha=0.5)
            plt.xlabel("PC%d [$\AA$]" % pc_i)
            plt.ylabel("PC%d [$\AA$]" % pc_j)
            plt.title("PCA PC%d vs. PC%d with projection" % (pc_i, pc_j))
            plt.savefig("PC%d-vs-PC%s_%s_projection.png" %
                        (pc_i, pc_j, args.prefix))
            plt.close('all')

            plt.scatter(projection_data[pc_i],
                        projection_data[pc_j],
                        marker='o',
                        c="g",
                        alpha=0.5)
            plt.xlabel("PC%d [$\AA$]" % pc_i)
            plt.ylabel("PC%d [$\AA$]" % pc_j)
            plt.title("PCA PC%d vs. PC%d only projection" % (pc_i, pc_j))
            plt.savefig("PC%d-vs-PC%d_%s_only_projection.png" %
                        (pc_i, pc_j, args.prefix))
            plt.close('all')

        pca_data_2, eigen_2 = pt.pca(traj_proj[args.start_proj:],
                                     mask=args.mask_proj,
                                     n_vecs=n_vecs)
        eigen_val_2 = eigen_2[0]
        eigen_vec_2 = eigen_2[1]

        overlap = 0

        for pc_i in range(rnd_vecs):

            for pc_j in range(rnd_vecs):

                overlap += (np.dot(eigen_vec[pc_i], eigen_vec_2[pc_j]) /
                            (np.linalg.norm(eigen_vec[pc_i]) *
                             np.linalg.norm(eigen_vec_2[pc_j])))**2

        overlap /= rnd_vecs
        print "Vector space spanned by traj-1 overlap with traj-2 subspace (%d vecs): %6.3f" % (
            rnd_vecs, overlap)

        if args.zscore != None:

            overlap_rnd = np.zeros(rnd_iter)

            for r in range(rnd_iter):

                ### make random traj
                t1_rnd = traj
                for f in range(t1_rnd.xyz[args.start:].shape[0]):

                    idxs = np.arange(t1_rnd.xyz[args.start + f, ].shape[0])
                    sele = np.random.permutation(idxs)
                    t1_rnd[f] = t1_rnd.xyz[args.start + f, ][sele]

                pca_t1_rnd, eigen_t1_rnd = pt.pca(t1_rnd[args.start:],
                                                  mask=args.mask,
                                                  n_vecs=n_vecs)

                eigen_vec_1_rnd = eigen_t1_rnd[1]

                for pc_i in range(n_vecs):

                    for pc_j in range(n_vecs):

                        overlap_rnd[r] += (
                            np.dot(eigen_vec[pc_i], eigen_vec_1_rnd[pc_j]) /
                            (np.linalg.norm(eigen_vec[pc_i]) *
                             np.linalg.norm(eigen_vec_1_rnd[pc_j])))**2

                overlap_rnd[r] /= n_vecs

            z_score = (overlap - np.mean(overlap_rnd)) / np.std(overlap_rnd)

            print "Z-score                                                              : %6.3f" % z_score
예제 #9
0
    def test_rdf(self):
        traj = pt.iterload("./data/tz2.truncoct.nc",
                           "./data/tz2.truncoct.parm7",
                           frame_slice=(0, 10))

        command = '''
        radial output/Radial.agr 0.5 10.0 :5@CD :WAT@O
        radial output/cRadial.agr 0.5 10.0 :5 :WAT@O center1
        radial output/cRadial.agr 0.5 10.0 :5 :WAT@O center2
        radial output/cRadial.agr 0.5 20.0 :3 :WAT@O
        radial output/cRadial.agr 0.5 20.0 :3 :WAT@O noimage
        radial output/radial.dat 0.5 10.0 :5@CD :WAT@O
        radial output/radial2.dat 0.25 10.0 :5@CD :WAT@O
        '''

        # get data directly from cpptraj
        state = pt.load_batch(traj, command)
        state.run()

        # get data from pytraj
        data0 = pt.rdf(traj,
                       solvent_mask=':WAT@O',
                       bin_spacing=0.5,
                       maximum=10.0,
                       solute_mask=':5@CD')

        data01 = pt.rdf(traj,
                        solvent_mask=':WAT@O',
                        bin_spacing=0.5,
                        maximum=10.0,
                        solute_mask=':5@CD')

        data1 = pt.rdf(traj,
                       solvent_mask=':WAT@O',
                       bin_spacing=0.5,
                       maximum=10.0,
                       center_solvent=True,
                       solute_mask=':5')

        data2 = pt.rdf(traj,
                       solvent_mask=':WAT@O',
                       bin_spacing=0.5,
                       maximum=10.0,
                       center_solute=True,
                       solute_mask=':5')

        data3 = pt.rdf(traj,
                       solvent_mask=':WAT@O',
                       bin_spacing=0.5,
                       maximum=20.0,
                       center_solute=False,
                       solute_mask=':3')

        data4 = pt.rdf(traj,
                       solvent_mask=':WAT@O',
                       bin_spacing=0.5,
                       maximum=20.0,
                       center_solute=False,
                       image=False,
                       solute_mask=':3')

        data5 = pt.rdf(traj,
                       solvent_mask=':WAT@O',
                       bin_spacing=0.25,
                       maximum=10.0,
                       solute_mask=':5@CD')

        # solvent_mask is array
        solvent_indices = pt.select(':WAT@O', traj.top)
        data6 = pt.rdf(traj,
                       solvent_mask=solvent_indices,
                       bin_spacing=0.25,
                       maximum=10.0,
                       solute_mask=':5@CD')

        # do assertion
        aa_eq(data0[1], state.data[1], decimal=7)
        aa_eq(data1[1], state.data[2], decimal=7)
        aa_eq(data2[1], state.data[3], decimal=7)
        aa_eq(data3[1], state.data[4], decimal=7)
        aa_eq(data4[1], state.data[5], decimal=7)

        # default solvent mask :WAT@O
        aa_eq(data01[1], state.data[1], decimal=7)
        steps = np.loadtxt('output/radial.dat').T[0]
        aa_eq(data0[0], steps)

        steps2 = np.loadtxt('output/radial2.dat').T[0]
        aa_eq(data5[0], steps2)
        aa_eq(data6[0], steps2)
import pytraj as pt

traj = pt.iterload("../tests/data/Tc5b.x", "../tests/data/Tc5b.top")
top = traj.top

# get indices of CA atoms
print(pt.select("@CA", top))

# see how many H atoms
print(pt.select("@H=", top))