Example #1
0
def cluster_dataset(array_like, options=''):
    '''cluster dataset

    Parameters
    ----------
    array_like : array_like
    options : str, cpptraj options

    Returns
    -------
    cluster index for each data point

    Examples
    --------
    >>> import pytraj as pt
    >>> import numpy as np
    >>> array_like = np.random.randint(0, 10, 1000)
    >>> data = pt.cluster.cluster_dataset(array_like, 'clusters 10 epsilon 3.0')
    '''
    import numpy as np
    c_dslist = CpptrajDatasetList()
    c_dslist.add('double', '__array_like')
    c_dslist[0].resize(len(array_like))
    c_dslist[0].values[:] = array_like
    act = c_analysis.Analysis_Clustering()
    command = 'data __array_like ' + options
    act(command, dslist=c_dslist)

    return np.array(c_dslist[-2])
    def test_1(self):
        traj = pt.iterload(fn('Tc5b.x'), fn('Tc5b.top'))
        act = adict['distance']
        dslist = CpptrajDatasetList()
        act.read_input(":2@CA :10@CA", traj.top, dslist=dslist)
        act.setup(traj.top)

        for frame in traj.iterframe(stop=5):
            act.compute(frame)

        dslist = CpptrajDatasetList()
        act2 = adict['distance']
        act2.read_input(":2@CA :10@CA", traj.top, dslist=dslist)
        act2.setup(traj.top)
        act2.compute(traj.iterframe(stop=5))
        assert act2.n_frames == 5
Example #3
0
def vector(traj=None,
           command="",
           frame_indices=None,
           dtype='ndarray',
           top=None):
    """perform vector calculation. See example below. Same as 'vector' command in cpptraj.

    Parameters
    ----------
    traj : Trajectory-like or iterable that produces :class:`pytraj.Frame`
    command : str or a list of strings, cpptraj command
    frame_indices : array-like, optional, default None
        only perform calculation for given frame indices
    dtype : output's dtype, default 'ndarray'
    top : Topology, optional, default None

    Returns
    -------
    out : numpy ndarray, shape (n_frames, 3) if command is a string
          numpy ndarray, shape (n_vectors, n_frames, 3) if command is a list of strings

    Examples
    --------
    >>> import pytraj as pt
    >>> traj = pt.datafiles.load_tz2_ortho()
    >>> data = pt.vector.vector(traj, "@CA @CB")
    >>> data = pt.vector.vector(traj, [("@CA @CB"),])
    >>> data = pt.vector.vector(traj, "principal z")
    >>> data = pt.vector.vector(traj, "principal x")
    >>> data = pt.vector.vector(traj, "ucellx")
    >>> data = pt.vector.vector(traj, "boxcenter")
    >>> data = pt.vector.vector(traj, "box")

    Notes
    -----
    It's faster to calculate with a list of commands.
    For example, if you need to perform 3 calculations for 'ucellx', 'boxcenter', 'box'
    like below:

    >>> data = pt.vector.vector(traj, "ucellx")
    >>> data = pt.vector.vector(traj, "boxcenter")
    >>> data = pt.vector.vector(traj, "box")

    You should use a list of commands for faster calculation.
    >>> comlist = ['ucellx', 'boxcenter', 'box']
    >>> data = pt.vector.vector(traj, comlist)
    """
    c_dslist = CpptrajDatasetList()
    top_ = get_topology(traj, top)
    list_of_commands = get_list_of_commands(command)
    fi = get_fiterator(traj, frame_indices)
    actlist = ActionList()

    for command in list_of_commands:
        act = c_action.Action_Vector()
        actlist.add(act, command, top_, dslist=c_dslist)
    actlist.compute(fi)

    return get_data_from_dtype(c_dslist, dtype=dtype)
Example #4
0
 def read_data(cls, filename, arg=""):
     '''
     >>> from pytraj.datasets.datasetlist import DatasetList
     >>> d = DatasetList.read_data('data/tc5b.native_contacts.dat')
     '''
     df = DataFile()
     dslist = CpptrajDatasetList()
     df.read_data(filename, ArgList(arg), dslist)
     return DatasetList(dslist)
Example #5
0
 def read_data(cls, filename, arg=""):
     '''
     >>> from pytraj.datasets.datasetlist import DatasetList
     >>> DatasetList.read_data('data/tc5b.native_contacts.dat')
     <pytraj.DatasetList with 2 datasets>
     Contacts_00001[native]
     [ 7095.  5904.  5638.  5600.  5695.  5745.  5611.  5556.  5739.  5748.]
     <BLANKLINE>
     Contacts_00001[nonnative]
     [    0.  2696.  3065.  3552.  3700.  2624.  4000.  3797.  3482.  4265.]
     '''
     df = DataFile()
     dslist = CpptrajDatasetList()
     df.read_data(filename, ArgList(arg), dslist)
     return DatasetList(dslist)
Example #6
0
def nastruct(traj=None,
             ref=0,
             resrange=None,
             resmap=None,
             hbcut=3.5,
             frame_indices=None,
             pucker_method='altona',
             dtype='nupars',
             groove_3dna=True,
             top=None):
    """compute nucleic acid parameters. (adapted from cpptraj doc)

    Parameters
    ----------
    traj : Trajectory-like
    ref : {Frame, int}, default 0 (first frame)
    resrange : None, str or array-like of integers
    resmap : residue map, example: 'AF2:A'
    hbcut : float, default=3.5 Angstrong
        Distance cutoff for determining basepair hbond
    pucker_method : str, {'altona', 'cremer'}, default 'altona'
        'altona' : Use method of Altona & Sundaralingam to calculate sugar pucker
        'cremer' : Use method of Cremer and Pople to calculate sugar pucker'
    frame_indices : array-like, default None (all frames)
    groove_3dna : bool, default True
        if True, major and minor groove will match 3DNA's output.
    dtype : str, {'nupars', 'cpptraj_dataset'}, default 'nupars'

    Returns
    -------
    out : nupars object. One can assess different values (major groove width, xdips values
    ...) by accessing its attribute. See example below.

    Examples
    --------
    >>> import pytraj as pt
    >>> import numpy as np
    >>> traj = pt.datafiles.load_rna()
    >>> data = pt.nastruct(traj, groove_3dna=False)
    >>> data.keys()[:5] # doctest: +SKIP
    ['buckle', 'minor', 'major', 'xdisp', 'stagger']
    >>> # get minor groove width values for each pairs for each snapshot
    >>> # data.minor is a tuple, first value is a list of basepairs, seconda value is
    >>> # numpy array, shape=(n_frames, n_pairs)

    >>> data.minor # doctest: +SKIP
    (['1G16C', '2G15C', '3G14C', '4C13G', '5G12C', '6C11G', '7C10G', '8C9G'],
     array([[ 13.32927036,  13.403409  ,  13.57159901, ...,  13.26655865,
             13.43054485,  13.4557209 ],
           [ 13.32002068,  13.45918751,  13.63253593, ...,  13.27066231,
             13.42743683,  13.53450871],
           [ 13.34087658,  13.53778553,  13.57062435, ...,  13.29017353,
             13.38542843,  13.46101475]]))

    >>> data.twist # doctest: +SKIP
    (['1G16C-2G15C', '2G15C-3G14C', '3G14C-4C13G', '4C13G-5G12C', '5G12C-6C11G', '6C11G-7C10G', '7C10G-8C9G'],
    array([[ 34.77773666,  33.98158646,  30.18647003, ...,  35.14608765,
             33.9628334 ,  33.13056946],
           [ 33.39176178,  32.68476105,  28.36385536, ...,  36.59774399,
             30.20827484,  26.48732948],
           [ 36.20665359,  32.58955002,  27.47707367, ...,  33.42843246,
             30.90047073,  33.73724365]]))

    >>> # change dtype
    >>> data = pt.nastruct(traj, dtype='cpptraj_dataset')
    """
    from pytraj.datasets.c_datasetlist import DatasetList as CpptrajDatasetList
    from .c_action import c_action
    from pytraj.array import DataArray

    _resrange = get_resrange(resrange)

    fi = get_fiterator(traj, frame_indices)
    ref = get_reference(traj, ref)
    _top = get_topology(traj, top)
    _resmap = "resmap " + resmap if resmap is not None else ""
    _hbcut = "hbcut " + str(hbcut) if hbcut is not None else ""
    _pucker_method = pucker_method
    _groove_3dna = 'groovecalc 3dna' if groove_3dna else ''

    command = " ".join(
        (_resrange, _resmap, _hbcut, _pucker_method, _groove_3dna))

    dslist = CpptrajDatasetList()

    # need to construct 3 steps so we can pickle this method for parallel
    # not sure why?
    act = c_action.Action_NAstruct(command, top=_top, dslist=dslist)
    act.compute(ref)
    act.compute(fi)

    if dtype == 'cpptraj_dataset':
        return dslist
    elif dtype == 'nupars':
        dslist_py = []
        for d in dslist:
            dslist_py.append(DataArray(d))
            dslist_py[-1].values = dslist_py[-1].values[1:]
        return nupars(_group(dslist_py, lambda x: x.aspect))
    else:
        raise ValueError("only support dtype = {'nupars', 'cpptraj_dataset'}")
Example #7
0
def calc_dssp(traj=None,
              mask="",
              frame_indices=None,
              dtype='ndarray',
              simplified=False,
              top=None):
    """return dssp profile for frame/traj

    Parameters
    ----------
    traj : Trajectory-like
    mask: str
        atom mask
    frame_indices : {None, array-like}, default None, optional
        specify frame numbers for calculation.
        if None, do all frames
    dtype : str, default 'ndarray'
        return data type, for regular user, just use default one (ndarray).
        use dtype='dataset' if wanting to get secondary structure in integer format
    simplified : bool, default False
        if True, use simplified codes, only has 'H', 'E' and 'C'
        if False, use all DSSP codes

    Returns
    -------
    out_0: ndarray, shape=(n_residues,)
        residue names
    out_1: ndarray, shape=(n_frames, n_residues)
        DSSP for each residue
    out_2 : pytraj.DatasetList
        average value for each secondary structure type

    Examples
    --------
    >>> import pytraj as pt
    >>> traj = pt.load_pdb_rcsb('1l2y')
    >>> residues, ss, _ = pt.dssp(traj, ":2-10")
    >>> residues # doctest: +SKIP
    array(['LEU:2', 'TYR:3', 'ILE:4', 'GLN:5', 'TRP:6', 'LEU:7', 'LYS:8',
           'ASP:9', 'GLY:10'],
          dtype='<U6')
    >>> ss # doctest: +SKIP
    array([['0', 'H', 'H', ..., 'H', 'T', '0'],
           ['0', 'H', 'H', ..., 'H', 'T', '0'],
           ['0', 'H', 'H', ..., 'H', 'T', '0'],
           ...,
           ['0', 'H', 'H', ..., 'H', 'T', '0'],
           ['0', 'H', 'H', ..., 'H', 'H', '0'],
           ['0', 'H', 'H', ..., 'H', 'T', '0']],
          dtype='<U1')

    >>> residues, ss, _ = pt.dssp(traj, mask=range(100))

    >>> traj = pt.fetch_pdb('1l2y')
    >>> residues, ss, _ = pt.dssp(traj, simplified=True)
    >>> ss[0].tolist() # first frame
    ['C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'C', 'C', 'H', 'H', 'H', 'H', 'C', 'C', 'C', 'C', 'C', 'C']


    Notes
    -----
    ========= ======= ========= =======================
    Character Integer DSSP_Char Seconday structure type
    ========= ======= ========= =======================
    0         0       '0'       None
    b         1       'E'       Parallel Beta-sheet
    B         2       'B'       Anti-parallel Beta-sheet
    G         3       'G'       3-10 helix
    H         4       'H'       Alpha helix
    I         5       'I'       Pi (3-14) helix
    T         6       'T'       Turn
    S         7       'S'       Bend
    ========= ======= ========= =======================

    Simlified codes::

        - 'H': include 'H', 'G', 'I' (helix)
        - 'E': include 'E', 'B' (strand)
        - 'C': include 'T', 'S' or '0' (coil)

    Simlified codes will be mostly used for visulization in other packages.
    """
    from pytraj.datasets.c_datasetlist import DatasetList as CpptrajDatasetList
    from pytraj.c_action.c_action import Action_DSSP

    command = mask

    dslist = CpptrajDatasetList()

    Action_DSSP()(command, traj, top=top, dslist=dslist)

    # replace key to something nicer
    for key, dset in dslist.iteritems():
        if 'DSSP' in key:
            key = key.replace("DSSP_00000[", "")
            key = key.replace("]", "_avg")
            dset.key = key.lower()
    dtype = dtype.lower()

    if dtype == 'ndarray':
        # get all dataset from DatSetList if dtype == integer
        arr0 = dslist.grep("integer", mode='dtype').values
        keys = dslist.grep("integer", mode='dtype').keys()
        avg_dict = DatasetList(dslist.grep('_avg'))
        ss_array = np.asarray([
            _to_string_secondary_structure(arr, simplified=simplified)
            for arr in arr0
        ]).T
        return np.asarray(keys), ss_array, avg_dict
    else:
        return get_data_from_dtype(dslist, dtype=dtype)
Example #8
0
def vector_mask(traj=None,
                mask="",
                frame_indices=None,
                dtype='ndarray',
                top=None):
    """compute vector between two maskes

    Parameters
    ----------
    traj : Trajectory-like or iterable that produces Frame
    mask: str or array of string or array of intergers, shape (n_vectors, 2)
        vector maskes
    frame_indices : array-like or iterable that produces integer number
        frame indices
    dtype : str, default 'ndarray'
        output dtype
    top : Topology, optional, default None

    Returns
    -------
    if mask is a string, return 2D ndarray, shape (n_frames, 3)
    if mask is a list of strings or a 2D ndarray, return 3D ndarray, shape (n_vectors, n_frames, 3)

    Examples
    --------
    >>> # calcualte N-H vector
    >>> import pytraj as pt
    >>> import numpy as np
    >>> traj = pt.load_sample_data('tz2')
    >>> from pytraj import vector as va
    >>> n_indices = pt.select_atoms('@N', traj.top)
    >>> h_indices = n_indices + 1

    >>> # create n-h pair for vector calculation
    >>> n_h_pairs = np.array(list(zip(n_indices, h_indices)))
    >>> data_vec = va.vector_mask(traj, n_h_pairs, dtype='ndarray')

    >>> # compute vectors for specific frame indices (0, 4)
    >>> data_vec = va.vector_mask(traj, n_h_pairs, frame_indices=[0, 4], dtype='ndarray')
    """
    from pytraj.utils.get_common_objects import get_topology, get_data_from_dtype, get_fiterator
    from pytraj.utils.get_common_objects import get_list_of_commands
    from pytraj.datasets.c_datasetlist import DatasetList as CpptrajDatasetList
    from pytraj.analysis.c_action.c_action import Action_Vector
    from pytraj.analysis.c_action.actionlist import ActionList

    fi = get_fiterator(traj, frame_indices)
    _top = get_topology(fi, top)
    dslist = CpptrajDatasetList()
    template_command = ' mask '

    cm_arr = np.asarray(mask)
    if cm_arr.dtype.kind != 'i':
        list_of_commands = get_list_of_commands(mask)
    else:
        if cm_arr.ndim != 2:
            raise ValueError(
                'if mask is a numpy.ndarray, it must have ndim = 2')
        list_of_commands = _2darray_to_atommask_groups(cm_arr)

    actlist = ActionList()

    for command in list_of_commands:
        act = Action_Vector()
        _command = command + template_command
        actlist.add(act, _command, _top, dslist=dslist)
    actlist.compute(fi)
    return get_data_from_dtype(dslist, dtype=dtype)