def cluster_dataset(array_like, options=''): '''cluster dataset Parameters ---------- array_like : array_like options : str, cpptraj options Returns ------- cluster index for each data point Examples -------- >>> import pytraj as pt >>> import numpy as np >>> array_like = np.random.randint(0, 10, 1000) >>> data = pt.cluster.cluster_dataset(array_like, 'clusters 10 epsilon 3.0') ''' import numpy as np c_dslist = CpptrajDatasetList() c_dslist.add('double', '__array_like') c_dslist[0].resize(len(array_like)) c_dslist[0].values[:] = array_like act = c_analysis.Analysis_Clustering() command = 'data __array_like ' + options act(command, dslist=c_dslist) return np.array(c_dslist[-2])
def test_1(self): traj = pt.iterload(fn('Tc5b.x'), fn('Tc5b.top')) act = adict['distance'] dslist = CpptrajDatasetList() act.read_input(":2@CA :10@CA", traj.top, dslist=dslist) act.setup(traj.top) for frame in traj.iterframe(stop=5): act.compute(frame) dslist = CpptrajDatasetList() act2 = adict['distance'] act2.read_input(":2@CA :10@CA", traj.top, dslist=dslist) act2.setup(traj.top) act2.compute(traj.iterframe(stop=5)) assert act2.n_frames == 5
def vector(traj=None, command="", frame_indices=None, dtype='ndarray', top=None): """perform vector calculation. See example below. Same as 'vector' command in cpptraj. Parameters ---------- traj : Trajectory-like or iterable that produces :class:`pytraj.Frame` command : str or a list of strings, cpptraj command frame_indices : array-like, optional, default None only perform calculation for given frame indices dtype : output's dtype, default 'ndarray' top : Topology, optional, default None Returns ------- out : numpy ndarray, shape (n_frames, 3) if command is a string numpy ndarray, shape (n_vectors, n_frames, 3) if command is a list of strings Examples -------- >>> import pytraj as pt >>> traj = pt.datafiles.load_tz2_ortho() >>> data = pt.vector.vector(traj, "@CA @CB") >>> data = pt.vector.vector(traj, [("@CA @CB"),]) >>> data = pt.vector.vector(traj, "principal z") >>> data = pt.vector.vector(traj, "principal x") >>> data = pt.vector.vector(traj, "ucellx") >>> data = pt.vector.vector(traj, "boxcenter") >>> data = pt.vector.vector(traj, "box") Notes ----- It's faster to calculate with a list of commands. For example, if you need to perform 3 calculations for 'ucellx', 'boxcenter', 'box' like below: >>> data = pt.vector.vector(traj, "ucellx") >>> data = pt.vector.vector(traj, "boxcenter") >>> data = pt.vector.vector(traj, "box") You should use a list of commands for faster calculation. >>> comlist = ['ucellx', 'boxcenter', 'box'] >>> data = pt.vector.vector(traj, comlist) """ c_dslist = CpptrajDatasetList() top_ = get_topology(traj, top) list_of_commands = get_list_of_commands(command) fi = get_fiterator(traj, frame_indices) actlist = ActionList() for command in list_of_commands: act = c_action.Action_Vector() actlist.add(act, command, top_, dslist=c_dslist) actlist.compute(fi) return get_data_from_dtype(c_dslist, dtype=dtype)
def read_data(cls, filename, arg=""): ''' >>> from pytraj.datasets.datasetlist import DatasetList >>> d = DatasetList.read_data('data/tc5b.native_contacts.dat') ''' df = DataFile() dslist = CpptrajDatasetList() df.read_data(filename, ArgList(arg), dslist) return DatasetList(dslist)
def read_data(cls, filename, arg=""): ''' >>> from pytraj.datasets.datasetlist import DatasetList >>> DatasetList.read_data('data/tc5b.native_contacts.dat') <pytraj.DatasetList with 2 datasets> Contacts_00001[native] [ 7095. 5904. 5638. 5600. 5695. 5745. 5611. 5556. 5739. 5748.] <BLANKLINE> Contacts_00001[nonnative] [ 0. 2696. 3065. 3552. 3700. 2624. 4000. 3797. 3482. 4265.] ''' df = DataFile() dslist = CpptrajDatasetList() df.read_data(filename, ArgList(arg), dslist) return DatasetList(dslist)
def nastruct(traj=None, ref=0, resrange=None, resmap=None, hbcut=3.5, frame_indices=None, pucker_method='altona', dtype='nupars', groove_3dna=True, top=None): """compute nucleic acid parameters. (adapted from cpptraj doc) Parameters ---------- traj : Trajectory-like ref : {Frame, int}, default 0 (first frame) resrange : None, str or array-like of integers resmap : residue map, example: 'AF2:A' hbcut : float, default=3.5 Angstrong Distance cutoff for determining basepair hbond pucker_method : str, {'altona', 'cremer'}, default 'altona' 'altona' : Use method of Altona & Sundaralingam to calculate sugar pucker 'cremer' : Use method of Cremer and Pople to calculate sugar pucker' frame_indices : array-like, default None (all frames) groove_3dna : bool, default True if True, major and minor groove will match 3DNA's output. dtype : str, {'nupars', 'cpptraj_dataset'}, default 'nupars' Returns ------- out : nupars object. One can assess different values (major groove width, xdips values ...) by accessing its attribute. See example below. Examples -------- >>> import pytraj as pt >>> import numpy as np >>> traj = pt.datafiles.load_rna() >>> data = pt.nastruct(traj, groove_3dna=False) >>> data.keys()[:5] # doctest: +SKIP ['buckle', 'minor', 'major', 'xdisp', 'stagger'] >>> # get minor groove width values for each pairs for each snapshot >>> # data.minor is a tuple, first value is a list of basepairs, seconda value is >>> # numpy array, shape=(n_frames, n_pairs) >>> data.minor # doctest: +SKIP (['1G16C', '2G15C', '3G14C', '4C13G', '5G12C', '6C11G', '7C10G', '8C9G'], array([[ 13.32927036, 13.403409 , 13.57159901, ..., 13.26655865, 13.43054485, 13.4557209 ], [ 13.32002068, 13.45918751, 13.63253593, ..., 13.27066231, 13.42743683, 13.53450871], [ 13.34087658, 13.53778553, 13.57062435, ..., 13.29017353, 13.38542843, 13.46101475]])) >>> data.twist # doctest: +SKIP (['1G16C-2G15C', '2G15C-3G14C', '3G14C-4C13G', '4C13G-5G12C', '5G12C-6C11G', '6C11G-7C10G', '7C10G-8C9G'], array([[ 34.77773666, 33.98158646, 30.18647003, ..., 35.14608765, 33.9628334 , 33.13056946], [ 33.39176178, 32.68476105, 28.36385536, ..., 36.59774399, 30.20827484, 26.48732948], [ 36.20665359, 32.58955002, 27.47707367, ..., 33.42843246, 30.90047073, 33.73724365]])) >>> # change dtype >>> data = pt.nastruct(traj, dtype='cpptraj_dataset') """ from pytraj.datasets.c_datasetlist import DatasetList as CpptrajDatasetList from .c_action import c_action from pytraj.array import DataArray _resrange = get_resrange(resrange) fi = get_fiterator(traj, frame_indices) ref = get_reference(traj, ref) _top = get_topology(traj, top) _resmap = "resmap " + resmap if resmap is not None else "" _hbcut = "hbcut " + str(hbcut) if hbcut is not None else "" _pucker_method = pucker_method _groove_3dna = 'groovecalc 3dna' if groove_3dna else '' command = " ".join( (_resrange, _resmap, _hbcut, _pucker_method, _groove_3dna)) dslist = CpptrajDatasetList() # need to construct 3 steps so we can pickle this method for parallel # not sure why? act = c_action.Action_NAstruct(command, top=_top, dslist=dslist) act.compute(ref) act.compute(fi) if dtype == 'cpptraj_dataset': return dslist elif dtype == 'nupars': dslist_py = [] for d in dslist: dslist_py.append(DataArray(d)) dslist_py[-1].values = dslist_py[-1].values[1:] return nupars(_group(dslist_py, lambda x: x.aspect)) else: raise ValueError("only support dtype = {'nupars', 'cpptraj_dataset'}")
def calc_dssp(traj=None, mask="", frame_indices=None, dtype='ndarray', simplified=False, top=None): """return dssp profile for frame/traj Parameters ---------- traj : Trajectory-like mask: str atom mask frame_indices : {None, array-like}, default None, optional specify frame numbers for calculation. if None, do all frames dtype : str, default 'ndarray' return data type, for regular user, just use default one (ndarray). use dtype='dataset' if wanting to get secondary structure in integer format simplified : bool, default False if True, use simplified codes, only has 'H', 'E' and 'C' if False, use all DSSP codes Returns ------- out_0: ndarray, shape=(n_residues,) residue names out_1: ndarray, shape=(n_frames, n_residues) DSSP for each residue out_2 : pytraj.DatasetList average value for each secondary structure type Examples -------- >>> import pytraj as pt >>> traj = pt.load_pdb_rcsb('1l2y') >>> residues, ss, _ = pt.dssp(traj, ":2-10") >>> residues # doctest: +SKIP array(['LEU:2', 'TYR:3', 'ILE:4', 'GLN:5', 'TRP:6', 'LEU:7', 'LYS:8', 'ASP:9', 'GLY:10'], dtype='<U6') >>> ss # doctest: +SKIP array([['0', 'H', 'H', ..., 'H', 'T', '0'], ['0', 'H', 'H', ..., 'H', 'T', '0'], ['0', 'H', 'H', ..., 'H', 'T', '0'], ..., ['0', 'H', 'H', ..., 'H', 'T', '0'], ['0', 'H', 'H', ..., 'H', 'H', '0'], ['0', 'H', 'H', ..., 'H', 'T', '0']], dtype='<U1') >>> residues, ss, _ = pt.dssp(traj, mask=range(100)) >>> traj = pt.fetch_pdb('1l2y') >>> residues, ss, _ = pt.dssp(traj, simplified=True) >>> ss[0].tolist() # first frame ['C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'C', 'C', 'H', 'H', 'H', 'H', 'C', 'C', 'C', 'C', 'C', 'C'] Notes ----- ========= ======= ========= ======================= Character Integer DSSP_Char Seconday structure type ========= ======= ========= ======================= 0 0 '0' None b 1 'E' Parallel Beta-sheet B 2 'B' Anti-parallel Beta-sheet G 3 'G' 3-10 helix H 4 'H' Alpha helix I 5 'I' Pi (3-14) helix T 6 'T' Turn S 7 'S' Bend ========= ======= ========= ======================= Simlified codes:: - 'H': include 'H', 'G', 'I' (helix) - 'E': include 'E', 'B' (strand) - 'C': include 'T', 'S' or '0' (coil) Simlified codes will be mostly used for visulization in other packages. """ from pytraj.datasets.c_datasetlist import DatasetList as CpptrajDatasetList from pytraj.c_action.c_action import Action_DSSP command = mask dslist = CpptrajDatasetList() Action_DSSP()(command, traj, top=top, dslist=dslist) # replace key to something nicer for key, dset in dslist.iteritems(): if 'DSSP' in key: key = key.replace("DSSP_00000[", "") key = key.replace("]", "_avg") dset.key = key.lower() dtype = dtype.lower() if dtype == 'ndarray': # get all dataset from DatSetList if dtype == integer arr0 = dslist.grep("integer", mode='dtype').values keys = dslist.grep("integer", mode='dtype').keys() avg_dict = DatasetList(dslist.grep('_avg')) ss_array = np.asarray([ _to_string_secondary_structure(arr, simplified=simplified) for arr in arr0 ]).T return np.asarray(keys), ss_array, avg_dict else: return get_data_from_dtype(dslist, dtype=dtype)
def vector_mask(traj=None, mask="", frame_indices=None, dtype='ndarray', top=None): """compute vector between two maskes Parameters ---------- traj : Trajectory-like or iterable that produces Frame mask: str or array of string or array of intergers, shape (n_vectors, 2) vector maskes frame_indices : array-like or iterable that produces integer number frame indices dtype : str, default 'ndarray' output dtype top : Topology, optional, default None Returns ------- if mask is a string, return 2D ndarray, shape (n_frames, 3) if mask is a list of strings or a 2D ndarray, return 3D ndarray, shape (n_vectors, n_frames, 3) Examples -------- >>> # calcualte N-H vector >>> import pytraj as pt >>> import numpy as np >>> traj = pt.load_sample_data('tz2') >>> from pytraj import vector as va >>> n_indices = pt.select_atoms('@N', traj.top) >>> h_indices = n_indices + 1 >>> # create n-h pair for vector calculation >>> n_h_pairs = np.array(list(zip(n_indices, h_indices))) >>> data_vec = va.vector_mask(traj, n_h_pairs, dtype='ndarray') >>> # compute vectors for specific frame indices (0, 4) >>> data_vec = va.vector_mask(traj, n_h_pairs, frame_indices=[0, 4], dtype='ndarray') """ from pytraj.utils.get_common_objects import get_topology, get_data_from_dtype, get_fiterator from pytraj.utils.get_common_objects import get_list_of_commands from pytraj.datasets.c_datasetlist import DatasetList as CpptrajDatasetList from pytraj.analysis.c_action.c_action import Action_Vector from pytraj.analysis.c_action.actionlist import ActionList fi = get_fiterator(traj, frame_indices) _top = get_topology(fi, top) dslist = CpptrajDatasetList() template_command = ' mask ' cm_arr = np.asarray(mask) if cm_arr.dtype.kind != 'i': list_of_commands = get_list_of_commands(mask) else: if cm_arr.ndim != 2: raise ValueError( 'if mask is a numpy.ndarray, it must have ndim = 2') list_of_commands = _2darray_to_atommask_groups(cm_arr) actlist = ActionList() for command in list_of_commands: act = Action_Vector() _command = command + template_command actlist.add(act, _command, _top, dslist=dslist) actlist.compute(fi) return get_data_from_dtype(dslist, dtype=dtype)