Example #1
0
    def testget_common_objects(self):
        # raises
        # raise if try to index traj()
        self.assertRaises(TypeError, lambda: get_reference(self.traj(), 3))
        self.assertRaises(TypeError, lambda: get_reference(self.traj(), None))

        # specify wrong mask
        self.assertRaises(TypeError, lambda: pt.superpose(self.traj[:], 3))
    def testget_common_objects(self):
        # raises
        # raise if try to index traj()
        with pytest.raises(TypeError):
            get_reference(self.traj(), 3)
        with pytest.raises(TypeError):
            get_reference(self.traj(), None)

        # specify wrong mask
        with pytest.raises(TypeError):
            pt.superpose(self.traj[:], 3)
Example #3
0
    def inner(*args, **kwd):
        args = list(args)
        traj = kwd.get('traj', args[0])
        frame_indices = kwd.get('frame_indices')
        ref = kwd.get('ref')
        top = kwd.get('top')

        if 'mask' in kwd.keys():
            mask = kwd.get('mask')
        else:
            mask = args[1]

        # overwrite
        kwd['top'] = get_topology(traj, top)
        if ref is not None:
            kwd['ref'] = get_reference(traj, ref)
        if 'traj' in kwd.keys():
            kwd['traj'] = get_fiterator(traj, frame_indices)
        else:
            args[0] = get_fiterator(traj, frame_indices)
        if not isinstance(mask, string_types):
            mask = array_to_cpptraj_atommask(mask)
        if 'mask' in kwd.keys():
            kwd['mask'] = mask
        else:
            args[1] = mask
        return f(*args, **kwd)
Example #4
0
    def superpose(self, mask='*', ref=None, ref_mask='', mass=False):
        """register to superpose to reference frame when iterating. 
        To turn off superposing, set traj._being_transformed = False

        Notes
        -----
        This method is different from ``superpose`` in pytraj.Trajectory.
        It does not change the coordinates of TrajectoryCpptraj/TrajectoryIterator itself but 
        changing the coordinates of copied Frame.

        This method is mainly for NGLView in Jupyter notebook, to view out-of-core data.
        It's good to do translation and rotation on the fly.


        Examples
        --------
        >>> import pytraj as pt
        >>> traj = pt.datafiles.load_tz2()
        >>> isinstance(traj, pt.TrajectoryIterator)
        True
        >>> traj[0].xyz[0]
        array([-1.88900006,  9.1590004 ,  7.56899977])

        >>> # turn on superpose
        >>> _ = traj.superpose(ref=-1, mask='@CA')
        >>> traj[0].xyz[0]
        array([ 6.97324167,  8.82901548,  1.31844696])

        >>> # turn off superpose
        >>> traj._being_transformed = False
        >>> traj[0].xyz[0]
        array([-1.88900006,  9.1590004 ,  7.56899977])

        Examples for NGLView::

            import pytraj as pt, nglview as nv
            traj = pt.datafiles.load_tz2()
            traj.superpose(ref=0, mask='@CA')
            view = nv.show_pytraj(traj)
            view
        """
        ref = get_reference(self, ref)
        super(TrajectoryIterator, self).superpose(mask=mask,
                                                  ref=ref,
                                                  ref_mask=ref_mask,
                                                  mass=mass)
        return self
Example #5
0
def _pmap(func, traj, *args, **kwargs):
    '''use python's multiprocessing to accelerate calculation. Limited calculations.

    Parameters
    ----------
    func : a pytraj's methods or a list of string or simply as a cpptraj' text
    traj : pytraj.TrajectoryIterator
    n_cores : int, number of cores to be used, default 2. Specify n_cores=-1 to use all available cores
    iter_options : dict, default {}
        Specify trajectory iterating option. This will be done before calling ``func``.
    frame_indices : {None, array-like}, default None, optional
        if provided, pytraj will split this frame_indices into different chunks and let
        cpptraj perform calculation for specific indices.
        frame_indices must be pickable so is can be sent to different cores.

    *args, **kwargs: additional keywords

    Returns
    -------
    out : OrderedDict

    Notes
    -----
    - If you not sure about parallel's results, you should compare the output to serial run.

    - This is absolutely experimental. The syntax might be changed in future.

    Rule of thumbs: start with small number of frames (saying 10 frames), varying
    n_cores=1, 2, 3, 4 to see if the data makes sense or not.

    There are two modes in this method, use pytraj's methods (pytraj.rmsd, pytraj.radgyr,
    ...) or use cpptraj's command text syntax ('autoimage', 'rms', ...)

    If using cpptraj syntax::

        pytraj only supports limited cpptraj's Actions (not Analysis, checm Amber15 manual
        about Action and Analysis), say no  to 'matrix', 'atomicfluct', ... or any action
        that results output depending on the number of frames.


    This method only benifits you if your calculation is quite long (saying few minutes to
    few hours). For calculation that takes less than 1 minutes, you won't see the
    significant speed up (or even slower) since pytraj need to warm up and need to gather
    data when the calculation done.

    The parallel cacluation is very simple, trajectory will be split (almost equal) to
    different chunk (n_chunks = n_cores), pytraj/cpptraj perform calculation for each
    chunk in each core and then send data back to master. Note that we are using Python's
    built-in multiprocessing module, so you can use this method interactively in Ipython
    and ipython/jupyter notebook. This behavior is different from using MPI, in which you
    need to write a script, escaping ipython ession and type something like::

        mpirun -n 4 python my_script.py

    vs::

        In [1]: pt.pmap(pt.radgyr, traj, n_cores=4)
        Out[1]:
        OrderedDict([('RoG_00000',
                      array([ 18.91114428,  18.93654996,  18.84969884,  18.90449256,
                              18.8568644 ,  18.88917208,  18.9430491 ,  18.88878079,
                              18.91669565,  18.87069722]))])

    This is experimental method, you should expect its syntax, default output will be changed.

    When sending Topology to different cores, pytraj will reload Topology from
    traj.top.filename, so if you need to update Topology (in the fly), save it to disk and
    reload before using ``pytraj.pmap``

    Examples
    --------
    >>> import numpy as np
    >>> import pytraj as pt
    >>> traj = pt.load_sample_data('tz2')

    >>> # use iter_options
    >>> iter_options = {'autoimage': True, 'rmsfit': (0, '@CA')}
    >>> data = pt.pmap(pt.mean_structure, traj, iter_options=iter_options)

    >>> # cpptraj command style
    >>> data = pt.pmap(['distance :3 :7', 'vector mask :3 :12'], traj, n_cores=4)

    >>> # use reference. Need to explicitly use 'refindex', which is index of reflist
    >>> data = pt.pmap(['rms @CA refindex 0'], traj, ref=[traj[3],], n_cores=3)
    >>> data
    OrderedDict([('RMSD_00001', array([  2.68820312e-01,   3.11804885e-01,   2.58835452e-01,
             9.10475988e-08,   2.93310737e-01,   4.10197322e-01,
             3.96226694e-01,   3.66059215e-01,   3.90890362e-01,
             4.89180497e-01]))])

    >>> # use reference: if not want to use 'refindex', can use 'reference'
    >>> # the advantage is you can not specify a list of reference
    >>> data = pt.pmap(['rms @CA reference'], traj, ref=[traj[3],], n_cores=3)
    >>> data
    OrderedDict([('RMSD_00001', array([  2.68820312e-01,   3.11804885e-01,   2.58835452e-01,
             9.10475988e-08,   2.93310737e-01,   4.10197322e-01,
             3.96226694e-01,   3.66059215e-01,   3.90890362e-01,
             4.89180497e-01]))])

    >>> # use different references. Need to explicitly use 'refindex', which is index of reflist
    >>> # create a list of references
    >>> reflist = traj[3], traj[4]
    >>> # make sure to specify `refindex`
    >>> # `refindex 0` is equal to `reflist[0]`
    >>> # `refindex 1` is equal to `reflist[1]`
    >>> data = pt.pmap(['rms @CA refindex 0', 'rms !@H= refindex 1'], traj, ref=reflist, n_cores=2)
    >>> # convert to ndarray
    >>> data_arr = pt.tools.dict_to_ndarray(data)

    >>> # perform parallel calculation with given frame_indices
    >>> traj = pt.datafiles.load_tz2()
    >>> data = pt.pmap(pt.radgyr, traj, '@CA', frame_indices=range(10, 50), n_cores=4)
    >>> # serial version
    >>> data = pt.radgyr(traj, '@CA', frame_indices=range(10, 50))


    See also
    --------
    pytraj.pmap_mpi
    '''
    from multiprocessing import Pool
    from pytraj import TrajectoryIterator

    n_cores = kwargs.pop('n_cores') if 'n_cores' in kwargs else 2
    iter_options = kwargs.pop(
        'iter_options') if 'iter_options' in kwargs else {}
    apply = kwargs.pop('apply') if 'apply' in kwargs else None
    progress = kwargs.pop('progress') if 'progress' in kwargs else None
    progress_params = kwargs.pop(
        'progress_params') if 'progress_params' in kwargs else dict()

    if n_cores <= 0:
        # use all available cores
        n_cores = cpu_count()

    # update reference
    if 'ref' in kwargs:
        kwargs['ref'] = get_reference(traj, kwargs['ref'])

    if isinstance(func, (list, tuple, string_types)):
        # assume using _load_batch_pmap
        from pytraj.parallel.base import _load_batch_pmap
        #check_valid_command(func)
        data = _load_batch_pmap(n_cores=n_cores,
                                traj=traj,
                                lines=func,
                                dtype='dict',
                                root=0,
                                mode='multiprocessing',
                                **kwargs)
        data = concat_dict((x[0] for x in data))
        return data
    else:
        if not callable(func):
            raise ValueError('must callable argument')
        # pytraj's method
        if not hasattr(func, '_is_parallelizable'):
            raise ValueError("this method does not support parallel")
        elif not func._is_parallelizable:
            raise ValueError("this method does not support parallel")
        else:
            if hasattr(
                    func, '_openmp_capability'
            ) and func._openmp_capability and 'OPENMP' in compiled_info():
                raise RuntimeError(
                    "this method supports both openmp and pmap, but your cpptraj "
                    "version was installed with openmp. Should not use both openmp and pmap at the "
                    "same time. In this case, do not use pmap since openmp is more efficient"
                )

        if not isinstance(traj, TrajectoryIterator):
            raise ValueError('only support TrajectoryIterator')

        if 'dtype' not in kwargs and func not in [
                mean_structure,
                matrix.dist,
                matrix.idea,
                ired_vector_and_matrix,
                rotation_matrix,
                volmap,
        ]:
            kwargs['dtype'] = 'dict'

        # keyword
        if func is volmap:
            assert kwargs.get('size') is not None, 'must provide "size" value'

        p = Pool(n_cores)

        pfuncs = partial(worker_by_func,
                         n_cores=n_cores,
                         func=func,
                         traj=traj,
                         args=args,
                         kwargs=kwargs,
                         iter_options=iter_options,
                         apply=apply,
                         progress=progress,
                         progress_params=progress_params)

        data = p.map(pfuncs, [rank for rank in range(n_cores)])
        p.close()

        dataset_processor = PmapDataset(data,
                                        func=func,
                                        kwargs=kwargs,
                                        traj=traj)
        return dataset_processor.process()
Example #6
0
def pmap_mpi(func, traj, *args, **kwargs):
    """parallel with MPI (mpi4py)

    Parameters
    ----------
    func : a function
    traj : pytraj.TrajectoryIterator
    *args, **kwargs: additional arguments

    Examples
    --------
    .. code-block:: bash

        $ # create test_radgyr.py file
        $ cat > test_radgyr.py <<EOF
        import pytraj as pt
        from mpi4py import MPI
        comm = MPI.COMM_WORLD

        traj = pt.iterload('tz2.nc', 'tz2.parm7')

        result_arr = pt.pmap_mpi(pt.radgyr, traj, "@CA")

        if comm.rank == 0:
            # save data to disk to read later by pytraj.read_pickle
            # pt.to_pickle(result_arr, 'output.pk')
            print(result_arr)
        EOF

        $ # run in parallel
        $ mpirun -n 4 python ./test_radgyr.py
        [array([ 8.10916061,  7.7643485 ,  8.09693108, ...,  9.70825678,
                9.3161563 ,  8.86720964]), array([ 8.82037273,  8.89008289,  9.48540176, ...,  9.29585981,
                9.53138062,  9.19155977]), array([ 9.13735723,  8.94651001,  8.97810478, ...,  7.68751186,
                8.31361647,  7.83763754]), array([ 7.37423766,  7.05637263,  6.52135566, ...,  6.38061648,
                6.24139008,  6.48994552])]
    """
    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    n_cores = comm.size
    rank = comm.rank

    # update reference
    if 'ref' in kwargs:
        kwargs['ref'] = get_reference(traj, kwargs['ref'])

    if not isinstance(func, (list, tuple)):
        # split traj to ``n_cores`` chunks, perform calculation
        # for rank-th chunk
        if 'dtype' not in kwargs:
            kwargs['dtype'] = 'dict'

        frame_indices = kwargs.pop('frame_indices', None)
        if frame_indices is None:
            start, stop = split_range(n_cores, 0, traj.n_frames)[rank]
            my_iter = traj.iterframe(start=start, stop=stop)
        else:
            my_indices = np.array_split(frame_indices, n_cores)[rank]
            my_iter = traj.iterframe(frame_indices=my_indices)
        n_frames = my_iter.n_frames
        data = func(my_iter, *args, **kwargs)
        # total : List[OrderedDict or Any]
        total = comm.gather(data, root=0)
        n_frames_collection = comm.gather(n_frames, root=0)
        if rank == 0:
            data_collection = [
                (val, n_frames_)
                for (val, n_frames_) in zip(total, n_frames_collection)
            ]
            dataset_processor = PmapDataset(data_collection,
                                            func=func,
                                            traj=traj,
                                            kwargs=kwargs)
            return dataset_processor.process()
    else:
        # cpptraj command style
        from pytraj.parallel.base import _load_batch_pmap
        total = _load_batch_pmap(n_cores=n_cores,
                                 traj=traj,
                                 lines=func,
                                 dtype='dict',
                                 root=0,
                                 mode='mpi',
                                 **kwargs)
        if rank == 0:
            # otherwise, total=None
            total = concat_dict((x[0] for x in total))
    return total