Beispiel #1
0
def cluster_dataset(array_like, options=''):
    '''cluster dataset

    Parameters
    ----------
    array_like : array_like
    options : str, cpptraj options

    Returns
    -------
    cluster index for each data point

    Examples
    --------
    >>> import pytraj as pt
    >>> import numpy as np
    >>> array_like = np.random.randint(0, 10, 1000)
    >>> data = pt.cluster.cluster_dataset(array_like, 'clusters 10 epsilon 3.0')
    '''
    import numpy as np
    c_dslist = CpptrajDatasetList()
    c_dslist.add('double', '__array_like')
    c_dslist[0].resize(len(array_like))
    c_dslist[0].values[:] = array_like
    act = c_analysis.Analysis_Clustering()
    command = 'data __array_like ' + options
    act(command, dslist=c_dslist)

    return np.array(c_dslist[-2])
Beispiel #2
0
    def test_1(self):
        # just need to install libcpptraj with openmp
        # that's it

        # export OMP_NUM_THREADS=1
        # python ./test_openmp_0.py
        # export OMP_NUM_THREADS=8
        # python ./test_openmp_0.py

        dslist = DatasetList()
        dflist = DataFileList()

        traj = pt.iterload(fn('Tc5b.x'), fn('Tc5b.top'))

        dslist.add("coords", "test_traj")
        dslist[0].top = traj.top
        for i in range(45):
            dslist[0].load(traj.filename)
        act = Analysis_Rms2d()

        with Timer() as t:
            act("crdset test_traj rmsout ./output/_test_2drms_CRDtest.openmp.dat",
                dslist=dslist,
                dflist=dflist)

        # make sure to reproduce cpptraj to avoif false-impression :D
        import numpy as np
        matout = dslist[-1].get_full_matrix()

        tmp = np.loadtxt("./data/test_openmp.Tc5b.n_threads_1.dat",
                         skiprows=1,
                         usecols=range(1, dslist[0].size + 1))
        cpp_save = tmp.flatten()
        # use decimal = 3 to mathc cpptraj's format here
        assert_almost_equal(cpp_save, matout, decimal=3)
Beispiel #3
0
 def test_reference_2(self):
     traj = pt.iterload(fn('tz2.nc'), fn('tz2.parm7'))
     from pytraj.analysis.c_action.c_action import Action_Rmsd
     from pytraj.datasets.c_datasetlist import DatasetList
     act = Action_Rmsd()
     ref = traj[2]
     dslist = DatasetList()
     dslist.add('ref_frame', 'myref')
     dslist[-1].top = traj.top
     dslist[-1].add_frame(ref)
     act('myrmsd refindex 0 @CA', traj, top=traj.top, dslist=dslist)
     aa_eq(pt.rmsd(traj, ref=traj[2], mask='@CA'), dslist[-1].values)
Beispiel #4
0
 def test_reference_2(self):
     traj = pt.iterload("./data/tz2.nc", "./data/tz2.parm7")
     from pytraj.c_action.c_action import Action_Rmsd
     from pytraj.datasets.c_datasetlist import DatasetList
     act = Action_Rmsd()
     ref = traj[2]
     dslist = DatasetList()
     dslist.add('ref_frame', 'myref')
     dslist[-1].top = traj.top
     dslist[-1].add_frame(ref)
     act('myrmsd refindex 0 @CA', traj, top=traj.top, dslist=dslist)
     aa_eq(pt.rmsd(traj, ref=traj[2], mask='@CA'),
           dslist[-1].values)
    def test_actionlist(self):
        '''test_actionlist
        '''
        dslist = DatasetList()
        actlist = ActionList()
        traj = pt.iterload("./data/Tc5b.x", "./data/Tc5b.top")
        mask_list = ['@CB @CA', '@CA @H']

        for mask in mask_list:
            actlist.add(CA.Action_Vector(), mask, traj.top, dslist=dslist)
        actlist.compute(traj)

        dslist2 = pt.calc_vector(traj, mask_list)
        dslist4 = va.vector_mask(traj, mask_list)

        dslist3_0 = pt.calc_vector(traj, mask_list[0])
        dslist3_1 = pt.calc_vector(traj, mask_list[1])

        aa_eq(dslist3_0, dslist2[0])
        aa_eq(dslist3_1, dslist2[1])

        aa_eq(dslist3_0, dslist4[0])
        aa_eq(dslist3_1, dslist4[1])

        aa_eq(dslist3_0, dslist[0].values)
        aa_eq(dslist3_1, dslist[1].values)
Beispiel #6
0
def calc_dssp(traj=None,
              mask="",
              frame_indices=None,
              dtype='ndarray',
              simplified=False,
              top=None):
    """return dssp profile for frame/traj

    Parameters
    ----------
    traj : Trajectory-like
    mask: str
        atom mask
    frame_indices : {None, array-like}, default None, optional
        specify frame numbers for calculation.
        if None, do all frames
    dtype : str, default 'ndarray'
        return data type, for regular user, just use default one (ndarray).
        use dtype='dataset' if wanting to get secondary structure in integer format
    simplified : bool, default False
        if True, use simplified codes, only has 'H', 'E' and 'C'
        if False, use all DSSP codes

    Returns
    -------
    out_0: ndarray, shape=(n_residues,)
        residue names
    out_1: ndarray, shape=(n_frames, n_residues)
        DSSP for each residue
    out_2 : pytraj.DatasetList
        average value for each secondary structure type

    Examples
    --------
    >>> import pytraj as pt
    >>> traj = pt.load_pdb_rcsb('1l2y')
    >>> residues, ss, _ = pt.dssp(traj, ":2-10")
    >>> residues # doctest: +SKIP
    array(['LEU:2', 'TYR:3', 'ILE:4', 'GLN:5', 'TRP:6', 'LEU:7', 'LYS:8',
           'ASP:9', 'GLY:10'],
          dtype='<U6')
    >>> ss # doctest: +SKIP
    array([['0', 'H', 'H', ..., 'H', 'T', '0'],
           ['0', 'H', 'H', ..., 'H', 'T', '0'],
           ['0', 'H', 'H', ..., 'H', 'T', '0'],
           ...,
           ['0', 'H', 'H', ..., 'H', 'T', '0'],
           ['0', 'H', 'H', ..., 'H', 'H', '0'],
           ['0', 'H', 'H', ..., 'H', 'T', '0']],
          dtype='<U1')

    >>> residues, ss, _ = pt.dssp(traj, mask=range(100))

    >>> traj = pt.fetch_pdb('1l2y')
    >>> residues, ss, _ = pt.dssp(traj, simplified=True)
    >>> ss[0].tolist() # first frame
    ['C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'C', 'C', 'H', 'H', 'H', 'H', 'C', 'C', 'C', 'C', 'C', 'C']


    Notes
    -----
    ========= ======= ========= =======================
    Character Integer DSSP_Char Seconday structure type
    ========= ======= ========= =======================
    0         0       '0'       None
    b         1       'E'       Parallel Beta-sheet
    B         2       'B'       Anti-parallel Beta-sheet
    G         3       'G'       3-10 helix
    H         4       'H'       Alpha helix
    I         5       'I'       Pi (3-14) helix
    T         6       'T'       Turn
    S         7       'S'       Bend
    ========= ======= ========= =======================

    Simlified codes::

        - 'H': include 'H', 'G', 'I' (helix)
        - 'E': include 'E', 'B' (strand)
        - 'C': include 'T', 'S' or '0' (coil)

    Simlified codes will be mostly used for visulization in other packages.
    """
    from pytraj.datasets.c_datasetlist import DatasetList as CpptrajDatasetList
    from pytraj.c_action.c_action import Action_DSSP

    command = mask

    dslist = CpptrajDatasetList()

    Action_DSSP()(command, traj, top=top, dslist=dslist)

    # replace key to something nicer
    for key, dset in dslist.iteritems():
        if 'DSSP' in key:
            key = key.replace("DSSP_00000[", "")
            key = key.replace("]", "_avg")
            dset.key = key.lower()
    dtype = dtype.lower()

    if dtype == 'ndarray':
        # get all dataset from DatSetList if dtype == integer
        arr0 = dslist.grep("integer", mode='dtype').values
        keys = dslist.grep("integer", mode='dtype').keys()
        avg_dict = DatasetList(dslist.grep('_avg'))
        ss_array = np.asarray([
            _to_string_secondary_structure(arr, simplified=simplified)
            for arr in arr0
        ]).T
        return np.asarray(keys), ss_array, avg_dict
    else:
        return get_data_from_dtype(dslist, dtype=dtype)
Beispiel #7
0
def calc_dssp(traj=None,
              mask="",
              frame_indices=None,
              dtype='ndarray',
              simplified=False,
              top=None):
    """return dssp profile for frame/traj

    Parameters
    ----------
    traj : Trajectory-like
    mask: str
        atom mask
    frame_indices : {None, array-like}, default None, optional
        specify frame numbers for calculation.
        if None, do all frames
    dtype : str, default 'ndarray'
        return data type, for regular user, just use default one (ndarray).
        use dtype='dataset' if wanting to get secondary structure in integer format
    simplified : bool, default False
        if True, use simplified codes, only has 'H', 'E' and 'C'
        if False, use all DSSP codes

    Returns
    -------
    out_0: ndarray, shape=(n_residues,)
        residue names
    out_1: ndarray, shape=(n_frames, n_residues)
        DSSP for each residue
    out_2 : pytraj.DatasetList
        average value for each secondary structure type

    Examples
    --------
    >>> import pytraj as pt
    >>> traj = pt.load_pdb_rcsb('1l2y')
    >>> residues, ss, _ = pt.dssp(traj, ":2-10")
    >>> residues # doctest: +SKIP
    array(['LEU:2', 'TYR:3', 'ILE:4', 'GLN:5', 'TRP:6', 'LEU:7', 'LYS:8',
           'ASP:9', 'GLY:10'],
          dtype='<U6')
    >>> ss # doctest: +SKIP
    array([['0', 'H', 'H', ..., 'H', 'T', '0'],
           ['0', 'H', 'H', ..., 'H', 'T', '0'],
           ['0', 'H', 'H', ..., 'H', 'T', '0'],
           ...,
           ['0', 'H', 'H', ..., 'H', 'T', '0'],
           ['0', 'H', 'H', ..., 'H', 'H', '0'],
           ['0', 'H', 'H', ..., 'H', 'T', '0']],
          dtype='<U1')

    >>> residues, ss, _ = pt.dssp(traj, mask=range(100))

    >>> traj = pt.fetch_pdb('1l2y')
    >>> residues, ss, _ = pt.dssp(traj, simplified=True)
    >>> ss[0].tolist() # first frame
    ['C', 'H', 'H', 'H', 'H', 'H', 'H', 'H', 'C', 'C', 'H', 'H', 'H', 'H', 'C', 'C', 'C', 'C', 'C', 'C']


    Notes
    -----
    ========= ======= ========= =======================
    Character Integer DSSP_Char Seconday structure type
    ========= ======= ========= =======================
    0         0       '0'       None
    b         1       'E'       Parallel Beta-sheet
    B         2       'B'       Anti-parallel Beta-sheet
    G         3       'G'       3-10 helix
    H         4       'H'       Alpha helix
    I         5       'I'       Pi (3-14) helix
    T         6       'T'       Turn
    S         7       'S'       Bend
    ========= ======= ========= =======================

    Simlified codes::

        - 'H': include 'H', 'G', 'I' (helix)
        - 'E': include 'E', 'B' (strand)
        - 'C': include 'T', 'S' or '0' (coil)

    Simlified codes will be mostly used for visulization in other packages.
    """
    from pytraj.datasets.c_datasetlist import DatasetList as CpptrajDatasetList
    from pytraj.c_action.c_action import Action_DSSP

    command = mask

    dslist = CpptrajDatasetList()

    Action_DSSP()(command, traj, top=top, dslist=dslist)

    # replace key to something nicer
    for key, dset in dslist.iteritems():
        if 'DSSP' in key:
            key = key.replace("DSSP_00000[", "")
            key = key.replace("]", "_avg")
            dset.key = key.lower()
    dtype = dtype.lower()

    if dtype == 'ndarray':
        # get all dataset from DatSetList if dtype == integer
        arr0 = dslist.grep("integer", mode='dtype').values
        keys = dslist.grep("integer", mode='dtype').keys()
        avg_dict = DatasetList(dslist.grep('_avg'))
        ss_array = np.asarray([_to_string_secondary_structure(
            arr,
            simplified=simplified) for arr in arr0]).T
        return np.asarray(keys), ss_array, avg_dict
    else:
        return get_data_from_dtype(dslist, dtype=dtype)
Beispiel #8
0
def energy_decomposition(traj=None,
                         prmtop=None,
                         igb=8,
                         mm_options=None,
                         qm_options=None,
                         mode=None,
                         dtype='dict',
                         frame_indices=None,
                         top=None):
    """energy decomposition by calling `libsander`

    Parameters
    ----------
    traj : Trajectory-like or iterables that produce Frame
        if `traj` does not hold Topology information, `top` must be provided
    prmtop : str or Structure from ParmEd, default=None, optional
        To avoid any unexpected error, you should always provide original topology
        filename. If prmtop is None, pytraj will load Topology from traj.top.filename.

        - why do you need to load additional topology filename? Because cpptraj and sander
          use different Topology object, can not convert from one to another.
    igb : GB model, default=8 (GB-Neck2)
        If specify `mm_options`, this `igb` input will be ignored
    mm_options : InputOptions from `sander`, default=None, optional
        if `mm_options` is None, use `gas_input` with given igb.
        If `mm_options` is not None, use this
    qm_options : InputOptions from `sander` for QMMM, optional
    mode : str, default=None, optional
        if mode='minimal', get only 'bond', 'angle', 'dihedral' and 'total' energies
    top : pytraj.Topology or str, default=None, optional
        only need to specify this ``top`` if ``traj`` does not hold Topology
    dtype : str, {'dict', 'dataset', 'ndarray', 'dataframe'}, default='dict'
        return data type
    frame_indices : None or 1D array-like, default None
        if not None, only perform calculation for given frames

    Returns
    -------
    Dict of energies (to be used with DataFrame) or DatasetList

    Examples
    --------
    Examples are adapted from $AMBERHOME/test/sanderapi

    >>> import pytraj as pt
    >>> # GB energy
    >>> traj = pt.datafiles.load_ala3()
    >>> traj.n_frames
    1
    >>> data = pt.energy_decomposition(traj, igb=8)
    >>> data['gb']
    array([-92.88577683])
    >>> data['bond']
    array([ 5.59350521])

    >>> # PME
    >>> import os
    >>> from pytraj.testing import amberhome
    >>> import sander
    >>> topfile = os.path.join(amberhome, "test/4096wat/prmtop")
    >>> rstfile = os.path.join(amberhome, "test/4096wat/eq1.x")
    >>> traj = pt.iterload(rstfile, topfile)
    >>> options = sander.pme_input()
    >>> options.cut = 8.0
    >>> edict = pt.energy_decomposition(traj=traj, mm_options=options)
    >>> edict['vdw']
    array([ 6028.95167558])

    >>> # GB + QMMM
    >>> topfile = os.path.join(amberhome, "test/qmmm2/lysine_PM3_qmgb2/prmtop")
    >>> rstfile = os.path.join(amberhome, "test/qmmm2/lysine_PM3_qmgb2/lysine.crd")
    >>> traj = pt.iterload(rstfile, topfile)

    >>> options = sander.gas_input(8)
    >>> options.cut = 99.0
    >>> options.ifqnt = 1
    >>> qm_options = sander.qm_input()
    >>> qm_options.iqmatoms[:3] = [8, 9, 10]
    >>> qm_options.qm_theory = "PM3"
    >>> qm_options.qmcharge = 0
    >>> qm_options.qmgb = 2
    >>> qm_options.adjust_q = 0

    >>> edict = pt.energy_decomposition(traj=traj, mm_options=options, qm_options=qm_options)
    >>> edict['bond']
    array([ 0.00160733])
    >>> edict['scf']
    array([-11.92177575])

    Notes
    -----
    This method does not work with `pytraj.pmap` when you specify mm_options and
    qm_options. Use `pytraj.pmap_mpi` with MPI instead.

    Work with ``pytraj.pmap``::

        pt.pmap(pt.energy_decomposition, traj, igb=8, dtype='dict')

    Will NOT work with ``pytraj.pmap``::

        import sander
        inp = sander.gas_input(8)
        pt.pmap(pt.energy_decomposition, traj, mm_options=inp, dtype='dict')

    Why? Because Python need to pickle each object to send to different cores and Python
    does not know how to pickle mm_options from sander.gas_input(8).

    This works with ``pytraj.pmap_mpi`` because pytraj explicitly create ``mm_options``
    in each core without pickling.
    """
    from collections import defaultdict, OrderedDict
    from pytraj.misc import get_atts
    import numpy as np

    try:
        import sander
    except ImportError:
        raise ImportError("need both `pysander` installed. Check Ambertools15")

    ddict = defaultdict(_default_func)

    if mm_options is None:
        inp = sander.gas_input(igb)
    elif igb is not None:
        inp = mm_options

    if isinstance(inp, string_types):
        # dangerous
        local_dict = {'sander': sander}
        exec(inp.lstrip(), local_dict)
        inp = local_dict['mm_options']

    if isinstance(qm_options, string_types):
        # dangerous
        local_dict = {'sander': sander}
        exec(qm_options.lstrip(), local_dict)
        qm_options = local_dict['qm_options']

    if prmtop is None:
        try:
            # try to load from file by taking top.filename
            prmtop_ = top.filename
        except AttributeError:
            raise ValueError("prmtop must be AmberParm object in ParmEd")
    else:
        # Structure, string
        prmtop_ = prmtop

    if not hasattr(prmtop_, 'coordinates') or prmtop_.coordinates is None:
        try:
            # if `traj` is Trajectory-like (not frame_iter), try to take 1st
            # coords
            coords = traj[0].xyz
        except (TypeError, AttributeError):
            # create fake list
            coords = [0. for _ in range(top.n_atoms * 3)]
    else:
        # use default coords in `AmberParm`
        coords = prmtop_.coordinates

    if top.has_box():
        box = top.box.tolist()
        has_box = True
    else:
        box = None
        has_box = False

    with sander.setup(prmtop_, coords, box, inp, qm_options):
        for frame in iterframe_master(traj):
            if has_box:
                sander.set_box(*frame.box.tolist())
            sander.set_positions(frame.xyz)
            ene, frc = sander.energy_forces()

            # potentially slow
            ene_atts = get_atts(ene)
            for att in ene_atts:
                ddict[att].append(getattr(ene, att))

    new_dict = None
    if mode == 'minimal':
        new_dict = {}
        for key in ['bond', 'angle', 'dihedral', 'tot']:
            new_dict[key] = ddict[key]
    else:
        new_dict = ddict

    for key in new_dict.keys():
        new_dict[key] = np.asarray(new_dict[key])

    if dtype == 'dict':
        return OrderedDict(new_dict)
    else:
        from pytraj.datasets.c_datasetlist import DatasetList

        dslist = DatasetList()
        size = new_dict['tot'].__len__()
        for key in new_dict.keys():
            dslist.add('double')
            dslist[-1].key = key
            dslist[-1].resize(size)
            dslist[-1].data[:] = new_dict[key]
        return get_data_from_dtype(dslist, dtype)