Example #1
0
def gen_cmap(args):
    """Load a trajectory file and calculate the atom contactmap.

    Parameters
    ----------
    args: argparse object
        the argument options

    Returns
    -------
    cmap_dat: pd.DataFrame, shape = [ N, M]
        the contactmap along the time. N is number of frames,
        M is N_res * N_res (N_res is number of residues for cmap
        calculations. )

    """

    # TODO: add a residue based selection module here
    # select atoms indices for distance calculations
    atoms_selections = args.select.split()
    if len(atoms_selections) == 2:
        atoms_selections = atoms_selections
    else:
        atoms_selections = [atoms_selections[0]] * 2

    top = mt.load(args.s).topology

    atom_grp_a = top.select("name %s" % atoms_selections[0])
    atom_grp_b = top.select("name %s" % atoms_selections[1])

    # load the trajectory file by chunks iteratively
    print("Iterloading xtc trajectory file ...... ")
    trajs = gmxcli.read_xtc(xtc=args.f, top=args.s, chunk=1000, stride=int(args.dt/args.ps))

    cmap_dat = np.array([])
    print("Computing contactmap ...... ")
    for i, traj in enumerate(trajs):
        contmap = cmap.ContactMap(traj=traj, group_a=atom_grp_a, group_b=atom_grp_b, cutoff=args.cutoff)
        contmap.generate_cmap(shape="array", switch=args.switch)
        if i == 0:
            cmap_dat = contmap.cmap_
        else:
            cmap_dat = np.concatenate((cmap_dat, contmap.cmap_), axis=0)

    cmap_dat = pd.DataFrame(cmap_dat)

    return cmap_dat
Example #2
0
def iterload_xyz_coordinates(xtcfile, top, chunk, stride, atom_selection="name CA"):
    """
    Load a large gromacs xtc trajectory file using mdtraj.iterload method, and extract
    the atom xyz coordinates.

    Parameters
    ----------
    xtcfile: str, format gromacs xtc
        the gromacs xtc trajectory file
    top: str, format pdb
        the reference pdb file
    chunk: int,
        number of frames to load per time
    stride: int,
        skip n frames when loading trajectory file
    atom_selection: str,
        the atom selection language

    Returns
    -------
    xyz: pandas dataframe, shape = [N, M]
        the xyz coordinates dataset,
        N is number of samples, or frames
        M is n_atoms * 3

    """

    trajs = gmxcli.read_xtc(xtc=xtcfile, top=top, chunk=chunk, stride=stride)

    xyz = np.array([])

    for i, traj in enumerate(trajs):

        copca = cmap.CoordinatesXYZ(traj, top, atom_selection)

        if i == 0:
            xyz = copca.xyz_coordinates()
        else:
            xyz = np.concatenate((xyz, copca.xyz_coordinates()), axis=0)

    xyz = pd.DataFrame(xyz)

    return xyz
Example #3
0
def gen_dihedrals(args):
    """General dihedral angles from gromacs xtc file

    Parameters
    ----------
    args : argparse object,
        the arguments options

    Returns
    -------
    di_angles: pd.DataFrame, shape = [ N, M ]
        time-series dihedral angles. N is number of frames, M is number of
        dihedral angles per frame.
    """
    #args = arguments(d=d)

    elements = angles.read_index(args.n, angle_type="dihedral")

    dih_angles = np.array([])

    # load the trajectory file by chunks iteratively
    print("Loading xtc trajectory file now ......")
    trajs = gmxcli.read_xtc(args.f, args.s, chunk=200, stride=int(args.dt / args.ps))

    # calculate the dihedral angles by chunks iteratively
    print("Calculating dihedral angles ...... ")
    for traj in trajs:
        dang = angles.ComputeAngles(traj)
        if dih_angles.shape[0] == 0:
            dih_angles = dang.get_dihedral_angles(elements)
        else:
            dih_angles = np.concatenate((dih_angles, dang.get_dihedral_angles(elements)), axis=0)

    #if not isinstance(dih_angles, pd.DataFrame):
    dih_angles = pd.DataFrame(dih_angles)
    dih_angles.index = np.arange(dih_angles.shape[0]) * args.dt

    return dih_angles
Example #4
0
def run_coord_number():
    """


    Examples:
    gmx_coordnum.py -f test_traj_dt1ns.xtc -s reference.pdb -rc " " 1 40
    -lc " " 50 70 -o coord_result.csv -atomtype all all -byres True
    -v True -cutoff 0.5
    """
    startTime = datetime.now()

    args = arguments()

    resides_a, resides_b = [], []
    group_a, group_b = [], []

    if args.byres:
        ndx = index.PdbIndex(args.s, [args.rc[0]], args.rc[1:])
        ndx.resid_mapper()
        #print(ndx.resid_mapper_)
        s, e = ndx.resid_mt_style(args.rc[0], args.rc[1], args.rc[2])
        #print(s, e)
        resides_a = np.arange(s, e + 1)

        ndx = index.PdbIndex(args.s, [args.lc[0]], args.lc[1:])
        ndx.resid_mapper()
        #print(ndx.resid_mapper_)
        s, e = ndx.resid_mt_style(args.lc[0], args.lc[1], args.lc[2])
        resides_b = np.arange(s, e + 1)

        verbose(args.v,
                "X-axis resids : " + " ".join([str(x) for x in resides_a]))
        verbose(args.v,
                "Y-axis resids : " + " ".join([str(x) for x in resides_b]))

    else:
        # TODO: define a way to select atom slices
        # define the atom indices for receptor
        '''ndx = index.PdbIndex(reference=args.s, atomtype=args.atomtype[0],
                             resSeq=args.rc[1:],
                             chain=[args.rc[0]])
        ndx.prepare_selection()
        ndx.res_index()'''
        group_a = index.gen_atom_index(pdbin=args.s,
                                       chain=[args.rc[0]],
                                       atomtype=args.atomtype[0],
                                       resSeq=args.rc[1:],
                                       style="mdtraj")
        #print(group_a)
        # define the atom indices for ligand
        '''ndx = index.PdbIndex(reference=args.s, atomtype=args.atomtype[1],
                             resSeq=args.lc[1:],
                             chain=[args.lc[0]])
        ndx.prepare_selection()
        ndx.res_index()'''
        group_b = index.gen_atom_index(pdbin=args.s,
                                       chain=[args.lc[0]],
                                       atomtype=args.atomtype[-1],
                                       resSeq=args.lc[1:],
                                       style="mdtraj")
        verbose(args.v, "Atom indexing processing completed ......")

    results = np.array([])

    verbose(args.v, "Loading trajectory xtc file ...... ")
    trajs = gmxcli.read_xtc(args.f,
                            args.s,
                            chunk=1000,
                            stride=int(args.dt / args.ps))

    verbose(args.v, "Performing calculations ...... ")
    for i, traj in enumerate(trajs):
        verbose(args.v,
                "Generate coordinate number for chunk #%d trajectory " % i)
        if args.byres:
            coord = cmap.CmapNbyN(traj,
                                  resids_a=resides_a,
                                  resids_b=resides_b,
                                  cutoff=args.cutoff)
            coord.contact_nbyn()
            if i == 0:
                results = coord.contacts_by_res_
            else:
                results = np.concatenate((results, coord.contacts_by_res_),
                                         axis=0)

        else:
            coord = cmap.ContactMap(traj, group_a, group_b, args.cutoff)
            coord.coord_num()

            if i == 0:
                results = coord.coord_number_
            else:
                results = np.concatenate((results, coord.coord_number_),
                                         axis=0)

    results = pd.DataFrame(results)
    results.index = np.arange(results.shape[0]) * args.dt

    verbose(args.v, "Saving results to output file ...... ")
    results.to_csv(args.o,
                   sep=",",
                   header=False,
                   index=True,
                   float_format="%.1f")

    print("Total Time Usage: ")
    print(datetime.now() - startTime)
Example #5
0
def gmxangle():
    """A gromacs g_angle simulator which works the same way as the gromacs tool

    Returns
    -------
    angles: np.ndarray, shape=[N * M ]
        the angles, N is number of frames, M is the number of angles per frame

    """

    args = arguments()

    if os.path.exists(args.f) and os.path.exists(args.n) \
            and os.path.exists(args.s):

        # prepare index atom slices
        ndx = read_index(args.n, args.type)

        if args.v:
            print("Compute cosine of the angles: ", bool(args.cos))
            print("Atom indices: ")
            print(ndx)

        # load trajectories
        trajs = gmxcli.read_xtc(xtc=args.f,
                                top=args.s,
                                chunk=1000,
                                stride=int(args.dt / args.ps))
        if args.v:
            print("Frame information: ")
            for i, traj in enumerate(trajs):
                print("Trajectory %3d: %12d frames" % (i, traj.n_frames))

        angles = np.array([])

        # for each traj chunk, calculate angles, and cat them together
        for i, traj in enumerate(trajs):
            cangle = ComputeAngles(traj)
            if args.v:
                print("Progress: %12d " % (i * traj.n_frames))
                print(angles.shape)

            if angles.shape[0] == 0:
                angles = cangle.get_dihedral_angles(ndx)
            else:
                angles = np.concatenate(
                    (angles, cangle.get_dihedral_angles(ndx)), axis=0)

        if args.v:
            print("Write angles to output file: ", args.o)

        # write angles to an output file
        write_angles(angles,
                     args.o,
                     cosine=args.cos,
                     sine=args.sin,
                     dt=args.dt,
                     begin=args.b,
                     end=args.e)

    else:
        print("Some of the input files are not existed. Input again.")
        if not os.path.exists(args.n):
            print("Index file is not provided. Run again.")
        if not os.path.exists(args.f):
            print("Xtc trajectory file is not provided. Run again.")
        if not os.path.exists(args.s):
            print("Reference file is not provided. Run again.")

    sys.exit(1)
Example #6
0
def iterload_cmap():
    """Load large trajectory iteratively using mdtraj.iterload function,
    then generate contact map from the trajectories.

    Returns
    -------

    """

    # for calculation time counting
    startTime = datetime.now()

    # argument options
    args = arguments()

    verbose(args.v, "Atom selecting ......")
    # TODO: atom selection method required
    if os.path.exists(args.s) and args.s[-4:] == ".pdb":
        inp = args.s
    elif os.path.exists(args.f) and args.f[-4:] == ".pdb":
        inp = args.f
    else:
        inp = None
        print("Reference pdb file is not existed. Exit now!")
        sys.exit(0)

    rec_index = int(args.rc[2]) - int(args.rc[1]) + 1
    lig_index = int(args.lc[2]) - int(args.lc[1]) + 1

    verbose(args.v, "Loading trajectory ......")
    # read gromacs trajectory
    trajs = gmxcli.read_xtc(args.f,
                            args.s,
                            chunk=1000,
                            stride=int(args.dt / args.ps))

    n_frames = sum([x.n_frames for x in trajs])
    verbose(args.v, "Total number of frames: %d " % n_frames)

    verbose(args.v, "Start calculating contact map ......")
    if args.NbyN:
        contact_map = cmap_nbyn(trajs, inp, args.rc, args.lc, args.v,
                                args.cutoff, " ABCDEFGHIJK", args.atomtype)
    else:
        contact_map = cmap_general(trajs,
                                   inp,
                                   args.rc,
                                   args.lc,
                                   args.atomtype,
                                   args.cutoff,
                                   v=args.v,
                                   switch=args.switch)

    # subset the results
    verbose(args.v, "Preparing output file ......")
    contact_map = pd.DataFrame(contact_map)
    contact_map.index = np.arange(contact_map.shape[0]) * args.dt
    contact_map = pca.datset_subset(contact_map, args.b, args.e)

    # get mean cmap data
    if args.opt in ['A', 'a', 'average', 'Average']:
        results = np.mean(contact_map, axis=0).reshape((rec_index, lig_index))
        results = pd.DataFrame(results)
        results.index = range(int(args.rc[1]), int(args.rc[2]) + 1)
        results.columns = range(int(args.lc[1]), int(args.lc[2]) + 1)
    else:
        results = contact_map
        results = pd.DataFrame(results)
        results.index = np.arange(results.shape[0]) * args.dt
        results.columns = [str(x) for x in np.arange(results.shape[1])]

    # save results to an output file
    verbose(args.v, "Writing output now ...... ")
    results.to_csv(args.o,
                   sep=",",
                   header=True,
                   index=True,
                   float_format="%.3f")

    print("Total Time Usage: ")
    print(datetime.now() - startTime)
Example #7
0
from mdanaly import gmxcli, pca, cmap
from dockml import index
import sys
import numpy as np

if __name__ == "__main__":

    ref = sys.argv[1]
    xtc = sys.argv[2]
    ind = sys.argv[3]

    # load xtc first
    trajs = gmxcli.read_xtc(xtc, ref, 1000, 50)

    # process index
    ndx = index.GmxIndex(ind)
    sets = ["receptor", "ligand"]
    used_groups = []

    atom_indices = []
    for i in [0, 1]:
        print("Please select a group for %s: " % sets[i])
        for j, gn in enumerate(ndx.groups):
            print("%d : %s" % (j, gn))

        used_groups.append(ndx.groups[int(input("Your choice: "))])

    rec_ndx = [int(x) - 1 for x in ndx.groupContent(used_groups[0])]
    lig_ndx = [int(x) - 1 for x in ndx.groupContent(used_groups[1])]

    cmaps = np.array([])