def gen_cmap(args): """Load a trajectory file and calculate the atom contactmap. Parameters ---------- args: argparse object the argument options Returns ------- cmap_dat: pd.DataFrame, shape = [ N, M] the contactmap along the time. N is number of frames, M is N_res * N_res (N_res is number of residues for cmap calculations. ) """ # TODO: add a residue based selection module here # select atoms indices for distance calculations atoms_selections = args.select.split() if len(atoms_selections) == 2: atoms_selections = atoms_selections else: atoms_selections = [atoms_selections[0]] * 2 top = mt.load(args.s).topology atom_grp_a = top.select("name %s" % atoms_selections[0]) atom_grp_b = top.select("name %s" % atoms_selections[1]) # load the trajectory file by chunks iteratively print("Iterloading xtc trajectory file ...... ") trajs = gmxcli.read_xtc(xtc=args.f, top=args.s, chunk=1000, stride=int(args.dt/args.ps)) cmap_dat = np.array([]) print("Computing contactmap ...... ") for i, traj in enumerate(trajs): contmap = cmap.ContactMap(traj=traj, group_a=atom_grp_a, group_b=atom_grp_b, cutoff=args.cutoff) contmap.generate_cmap(shape="array", switch=args.switch) if i == 0: cmap_dat = contmap.cmap_ else: cmap_dat = np.concatenate((cmap_dat, contmap.cmap_), axis=0) cmap_dat = pd.DataFrame(cmap_dat) return cmap_dat
def iterload_xyz_coordinates(xtcfile, top, chunk, stride, atom_selection="name CA"): """ Load a large gromacs xtc trajectory file using mdtraj.iterload method, and extract the atom xyz coordinates. Parameters ---------- xtcfile: str, format gromacs xtc the gromacs xtc trajectory file top: str, format pdb the reference pdb file chunk: int, number of frames to load per time stride: int, skip n frames when loading trajectory file atom_selection: str, the atom selection language Returns ------- xyz: pandas dataframe, shape = [N, M] the xyz coordinates dataset, N is number of samples, or frames M is n_atoms * 3 """ trajs = gmxcli.read_xtc(xtc=xtcfile, top=top, chunk=chunk, stride=stride) xyz = np.array([]) for i, traj in enumerate(trajs): copca = cmap.CoordinatesXYZ(traj, top, atom_selection) if i == 0: xyz = copca.xyz_coordinates() else: xyz = np.concatenate((xyz, copca.xyz_coordinates()), axis=0) xyz = pd.DataFrame(xyz) return xyz
def gen_dihedrals(args): """General dihedral angles from gromacs xtc file Parameters ---------- args : argparse object, the arguments options Returns ------- di_angles: pd.DataFrame, shape = [ N, M ] time-series dihedral angles. N is number of frames, M is number of dihedral angles per frame. """ #args = arguments(d=d) elements = angles.read_index(args.n, angle_type="dihedral") dih_angles = np.array([]) # load the trajectory file by chunks iteratively print("Loading xtc trajectory file now ......") trajs = gmxcli.read_xtc(args.f, args.s, chunk=200, stride=int(args.dt / args.ps)) # calculate the dihedral angles by chunks iteratively print("Calculating dihedral angles ...... ") for traj in trajs: dang = angles.ComputeAngles(traj) if dih_angles.shape[0] == 0: dih_angles = dang.get_dihedral_angles(elements) else: dih_angles = np.concatenate((dih_angles, dang.get_dihedral_angles(elements)), axis=0) #if not isinstance(dih_angles, pd.DataFrame): dih_angles = pd.DataFrame(dih_angles) dih_angles.index = np.arange(dih_angles.shape[0]) * args.dt return dih_angles
def run_coord_number(): """ Examples: gmx_coordnum.py -f test_traj_dt1ns.xtc -s reference.pdb -rc " " 1 40 -lc " " 50 70 -o coord_result.csv -atomtype all all -byres True -v True -cutoff 0.5 """ startTime = datetime.now() args = arguments() resides_a, resides_b = [], [] group_a, group_b = [], [] if args.byres: ndx = index.PdbIndex(args.s, [args.rc[0]], args.rc[1:]) ndx.resid_mapper() #print(ndx.resid_mapper_) s, e = ndx.resid_mt_style(args.rc[0], args.rc[1], args.rc[2]) #print(s, e) resides_a = np.arange(s, e + 1) ndx = index.PdbIndex(args.s, [args.lc[0]], args.lc[1:]) ndx.resid_mapper() #print(ndx.resid_mapper_) s, e = ndx.resid_mt_style(args.lc[0], args.lc[1], args.lc[2]) resides_b = np.arange(s, e + 1) verbose(args.v, "X-axis resids : " + " ".join([str(x) for x in resides_a])) verbose(args.v, "Y-axis resids : " + " ".join([str(x) for x in resides_b])) else: # TODO: define a way to select atom slices # define the atom indices for receptor '''ndx = index.PdbIndex(reference=args.s, atomtype=args.atomtype[0], resSeq=args.rc[1:], chain=[args.rc[0]]) ndx.prepare_selection() ndx.res_index()''' group_a = index.gen_atom_index(pdbin=args.s, chain=[args.rc[0]], atomtype=args.atomtype[0], resSeq=args.rc[1:], style="mdtraj") #print(group_a) # define the atom indices for ligand '''ndx = index.PdbIndex(reference=args.s, atomtype=args.atomtype[1], resSeq=args.lc[1:], chain=[args.lc[0]]) ndx.prepare_selection() ndx.res_index()''' group_b = index.gen_atom_index(pdbin=args.s, chain=[args.lc[0]], atomtype=args.atomtype[-1], resSeq=args.lc[1:], style="mdtraj") verbose(args.v, "Atom indexing processing completed ......") results = np.array([]) verbose(args.v, "Loading trajectory xtc file ...... ") trajs = gmxcli.read_xtc(args.f, args.s, chunk=1000, stride=int(args.dt / args.ps)) verbose(args.v, "Performing calculations ...... ") for i, traj in enumerate(trajs): verbose(args.v, "Generate coordinate number for chunk #%d trajectory " % i) if args.byres: coord = cmap.CmapNbyN(traj, resids_a=resides_a, resids_b=resides_b, cutoff=args.cutoff) coord.contact_nbyn() if i == 0: results = coord.contacts_by_res_ else: results = np.concatenate((results, coord.contacts_by_res_), axis=0) else: coord = cmap.ContactMap(traj, group_a, group_b, args.cutoff) coord.coord_num() if i == 0: results = coord.coord_number_ else: results = np.concatenate((results, coord.coord_number_), axis=0) results = pd.DataFrame(results) results.index = np.arange(results.shape[0]) * args.dt verbose(args.v, "Saving results to output file ...... ") results.to_csv(args.o, sep=",", header=False, index=True, float_format="%.1f") print("Total Time Usage: ") print(datetime.now() - startTime)
def gmxangle(): """A gromacs g_angle simulator which works the same way as the gromacs tool Returns ------- angles: np.ndarray, shape=[N * M ] the angles, N is number of frames, M is the number of angles per frame """ args = arguments() if os.path.exists(args.f) and os.path.exists(args.n) \ and os.path.exists(args.s): # prepare index atom slices ndx = read_index(args.n, args.type) if args.v: print("Compute cosine of the angles: ", bool(args.cos)) print("Atom indices: ") print(ndx) # load trajectories trajs = gmxcli.read_xtc(xtc=args.f, top=args.s, chunk=1000, stride=int(args.dt / args.ps)) if args.v: print("Frame information: ") for i, traj in enumerate(trajs): print("Trajectory %3d: %12d frames" % (i, traj.n_frames)) angles = np.array([]) # for each traj chunk, calculate angles, and cat them together for i, traj in enumerate(trajs): cangle = ComputeAngles(traj) if args.v: print("Progress: %12d " % (i * traj.n_frames)) print(angles.shape) if angles.shape[0] == 0: angles = cangle.get_dihedral_angles(ndx) else: angles = np.concatenate( (angles, cangle.get_dihedral_angles(ndx)), axis=0) if args.v: print("Write angles to output file: ", args.o) # write angles to an output file write_angles(angles, args.o, cosine=args.cos, sine=args.sin, dt=args.dt, begin=args.b, end=args.e) else: print("Some of the input files are not existed. Input again.") if not os.path.exists(args.n): print("Index file is not provided. Run again.") if not os.path.exists(args.f): print("Xtc trajectory file is not provided. Run again.") if not os.path.exists(args.s): print("Reference file is not provided. Run again.") sys.exit(1)
def iterload_cmap(): """Load large trajectory iteratively using mdtraj.iterload function, then generate contact map from the trajectories. Returns ------- """ # for calculation time counting startTime = datetime.now() # argument options args = arguments() verbose(args.v, "Atom selecting ......") # TODO: atom selection method required if os.path.exists(args.s) and args.s[-4:] == ".pdb": inp = args.s elif os.path.exists(args.f) and args.f[-4:] == ".pdb": inp = args.f else: inp = None print("Reference pdb file is not existed. Exit now!") sys.exit(0) rec_index = int(args.rc[2]) - int(args.rc[1]) + 1 lig_index = int(args.lc[2]) - int(args.lc[1]) + 1 verbose(args.v, "Loading trajectory ......") # read gromacs trajectory trajs = gmxcli.read_xtc(args.f, args.s, chunk=1000, stride=int(args.dt / args.ps)) n_frames = sum([x.n_frames for x in trajs]) verbose(args.v, "Total number of frames: %d " % n_frames) verbose(args.v, "Start calculating contact map ......") if args.NbyN: contact_map = cmap_nbyn(trajs, inp, args.rc, args.lc, args.v, args.cutoff, " ABCDEFGHIJK", args.atomtype) else: contact_map = cmap_general(trajs, inp, args.rc, args.lc, args.atomtype, args.cutoff, v=args.v, switch=args.switch) # subset the results verbose(args.v, "Preparing output file ......") contact_map = pd.DataFrame(contact_map) contact_map.index = np.arange(contact_map.shape[0]) * args.dt contact_map = pca.datset_subset(contact_map, args.b, args.e) # get mean cmap data if args.opt in ['A', 'a', 'average', 'Average']: results = np.mean(contact_map, axis=0).reshape((rec_index, lig_index)) results = pd.DataFrame(results) results.index = range(int(args.rc[1]), int(args.rc[2]) + 1) results.columns = range(int(args.lc[1]), int(args.lc[2]) + 1) else: results = contact_map results = pd.DataFrame(results) results.index = np.arange(results.shape[0]) * args.dt results.columns = [str(x) for x in np.arange(results.shape[1])] # save results to an output file verbose(args.v, "Writing output now ...... ") results.to_csv(args.o, sep=",", header=True, index=True, float_format="%.3f") print("Total Time Usage: ") print(datetime.now() - startTime)
from mdanaly import gmxcli, pca, cmap from dockml import index import sys import numpy as np if __name__ == "__main__": ref = sys.argv[1] xtc = sys.argv[2] ind = sys.argv[3] # load xtc first trajs = gmxcli.read_xtc(xtc, ref, 1000, 50) # process index ndx = index.GmxIndex(ind) sets = ["receptor", "ligand"] used_groups = [] atom_indices = [] for i in [0, 1]: print("Please select a group for %s: " % sets[i]) for j, gn in enumerate(ndx.groups): print("%d : %s" % (j, gn)) used_groups.append(ndx.groups[int(input("Your choice: "))]) rec_ndx = [int(x) - 1 for x in ndx.groupContent(used_groups[0])] lig_ndx = [int(x) - 1 for x in ndx.groupContent(used_groups[1])] cmaps = np.array([])