def testget_common_objects(self): # raises # raise if try to index traj() self.assertRaises(TypeError, lambda: get_reference(self.traj(), 3)) self.assertRaises(TypeError, lambda: get_reference(self.traj(), None)) # specify wrong mask self.assertRaises(TypeError, lambda: pt.superpose(self.traj[:], 3))
def testget_common_objects(self): # raises # raise if try to index traj() with pytest.raises(TypeError): get_reference(self.traj(), 3) with pytest.raises(TypeError): get_reference(self.traj(), None) # specify wrong mask with pytest.raises(TypeError): pt.superpose(self.traj[:], 3)
def inner(*args, **kwd): args = list(args) traj = kwd.get('traj', args[0]) frame_indices = kwd.get('frame_indices') ref = kwd.get('ref') top = kwd.get('top') if 'mask' in kwd.keys(): mask = kwd.get('mask') else: mask = args[1] # overwrite kwd['top'] = get_topology(traj, top) if ref is not None: kwd['ref'] = get_reference(traj, ref) if 'traj' in kwd.keys(): kwd['traj'] = get_fiterator(traj, frame_indices) else: args[0] = get_fiterator(traj, frame_indices) if not isinstance(mask, string_types): mask = array_to_cpptraj_atommask(mask) if 'mask' in kwd.keys(): kwd['mask'] = mask else: args[1] = mask return f(*args, **kwd)
def superpose(self, mask='*', ref=None, ref_mask='', mass=False): """register to superpose to reference frame when iterating. To turn off superposing, set traj._being_transformed = False Notes ----- This method is different from ``superpose`` in pytraj.Trajectory. It does not change the coordinates of TrajectoryCpptraj/TrajectoryIterator itself but changing the coordinates of copied Frame. This method is mainly for NGLView in Jupyter notebook, to view out-of-core data. It's good to do translation and rotation on the fly. Examples -------- >>> import pytraj as pt >>> traj = pt.datafiles.load_tz2() >>> isinstance(traj, pt.TrajectoryIterator) True >>> traj[0].xyz[0] array([-1.88900006, 9.1590004 , 7.56899977]) >>> # turn on superpose >>> _ = traj.superpose(ref=-1, mask='@CA') >>> traj[0].xyz[0] array([ 6.97324167, 8.82901548, 1.31844696]) >>> # turn off superpose >>> traj._being_transformed = False >>> traj[0].xyz[0] array([-1.88900006, 9.1590004 , 7.56899977]) Examples for NGLView:: import pytraj as pt, nglview as nv traj = pt.datafiles.load_tz2() traj.superpose(ref=0, mask='@CA') view = nv.show_pytraj(traj) view """ ref = get_reference(self, ref) super(TrajectoryIterator, self).superpose(mask=mask, ref=ref, ref_mask=ref_mask, mass=mass) return self
def _pmap(func, traj, *args, **kwargs): '''use python's multiprocessing to accelerate calculation. Limited calculations. Parameters ---------- func : a pytraj's methods or a list of string or simply as a cpptraj' text traj : pytraj.TrajectoryIterator n_cores : int, number of cores to be used, default 2. Specify n_cores=-1 to use all available cores iter_options : dict, default {} Specify trajectory iterating option. This will be done before calling ``func``. frame_indices : {None, array-like}, default None, optional if provided, pytraj will split this frame_indices into different chunks and let cpptraj perform calculation for specific indices. frame_indices must be pickable so is can be sent to different cores. *args, **kwargs: additional keywords Returns ------- out : OrderedDict Notes ----- - If you not sure about parallel's results, you should compare the output to serial run. - This is absolutely experimental. The syntax might be changed in future. Rule of thumbs: start with small number of frames (saying 10 frames), varying n_cores=1, 2, 3, 4 to see if the data makes sense or not. There are two modes in this method, use pytraj's methods (pytraj.rmsd, pytraj.radgyr, ...) or use cpptraj's command text syntax ('autoimage', 'rms', ...) If using cpptraj syntax:: pytraj only supports limited cpptraj's Actions (not Analysis, checm Amber15 manual about Action and Analysis), say no to 'matrix', 'atomicfluct', ... or any action that results output depending on the number of frames. This method only benifits you if your calculation is quite long (saying few minutes to few hours). For calculation that takes less than 1 minutes, you won't see the significant speed up (or even slower) since pytraj need to warm up and need to gather data when the calculation done. The parallel cacluation is very simple, trajectory will be split (almost equal) to different chunk (n_chunks = n_cores), pytraj/cpptraj perform calculation for each chunk in each core and then send data back to master. Note that we are using Python's built-in multiprocessing module, so you can use this method interactively in Ipython and ipython/jupyter notebook. This behavior is different from using MPI, in which you need to write a script, escaping ipython ession and type something like:: mpirun -n 4 python my_script.py vs:: In [1]: pt.pmap(pt.radgyr, traj, n_cores=4) Out[1]: OrderedDict([('RoG_00000', array([ 18.91114428, 18.93654996, 18.84969884, 18.90449256, 18.8568644 , 18.88917208, 18.9430491 , 18.88878079, 18.91669565, 18.87069722]))]) This is experimental method, you should expect its syntax, default output will be changed. When sending Topology to different cores, pytraj will reload Topology from traj.top.filename, so if you need to update Topology (in the fly), save it to disk and reload before using ``pytraj.pmap`` Examples -------- >>> import numpy as np >>> import pytraj as pt >>> traj = pt.load_sample_data('tz2') >>> # use iter_options >>> iter_options = {'autoimage': True, 'rmsfit': (0, '@CA')} >>> data = pt.pmap(pt.mean_structure, traj, iter_options=iter_options) >>> # cpptraj command style >>> data = pt.pmap(['distance :3 :7', 'vector mask :3 :12'], traj, n_cores=4) >>> # use reference. Need to explicitly use 'refindex', which is index of reflist >>> data = pt.pmap(['rms @CA refindex 0'], traj, ref=[traj[3],], n_cores=3) >>> data OrderedDict([('RMSD_00001', array([ 2.68820312e-01, 3.11804885e-01, 2.58835452e-01, 9.10475988e-08, 2.93310737e-01, 4.10197322e-01, 3.96226694e-01, 3.66059215e-01, 3.90890362e-01, 4.89180497e-01]))]) >>> # use reference: if not want to use 'refindex', can use 'reference' >>> # the advantage is you can not specify a list of reference >>> data = pt.pmap(['rms @CA reference'], traj, ref=[traj[3],], n_cores=3) >>> data OrderedDict([('RMSD_00001', array([ 2.68820312e-01, 3.11804885e-01, 2.58835452e-01, 9.10475988e-08, 2.93310737e-01, 4.10197322e-01, 3.96226694e-01, 3.66059215e-01, 3.90890362e-01, 4.89180497e-01]))]) >>> # use different references. Need to explicitly use 'refindex', which is index of reflist >>> # create a list of references >>> reflist = traj[3], traj[4] >>> # make sure to specify `refindex` >>> # `refindex 0` is equal to `reflist[0]` >>> # `refindex 1` is equal to `reflist[1]` >>> data = pt.pmap(['rms @CA refindex 0', 'rms !@H= refindex 1'], traj, ref=reflist, n_cores=2) >>> # convert to ndarray >>> data_arr = pt.tools.dict_to_ndarray(data) >>> # perform parallel calculation with given frame_indices >>> traj = pt.datafiles.load_tz2() >>> data = pt.pmap(pt.radgyr, traj, '@CA', frame_indices=range(10, 50), n_cores=4) >>> # serial version >>> data = pt.radgyr(traj, '@CA', frame_indices=range(10, 50)) See also -------- pytraj.pmap_mpi ''' from multiprocessing import Pool from pytraj import TrajectoryIterator n_cores = kwargs.pop('n_cores') if 'n_cores' in kwargs else 2 iter_options = kwargs.pop( 'iter_options') if 'iter_options' in kwargs else {} apply = kwargs.pop('apply') if 'apply' in kwargs else None progress = kwargs.pop('progress') if 'progress' in kwargs else None progress_params = kwargs.pop( 'progress_params') if 'progress_params' in kwargs else dict() if n_cores <= 0: # use all available cores n_cores = cpu_count() # update reference if 'ref' in kwargs: kwargs['ref'] = get_reference(traj, kwargs['ref']) if isinstance(func, (list, tuple, string_types)): # assume using _load_batch_pmap from pytraj.parallel.base import _load_batch_pmap #check_valid_command(func) data = _load_batch_pmap(n_cores=n_cores, traj=traj, lines=func, dtype='dict', root=0, mode='multiprocessing', **kwargs) data = concat_dict((x[0] for x in data)) return data else: if not callable(func): raise ValueError('must callable argument') # pytraj's method if not hasattr(func, '_is_parallelizable'): raise ValueError("this method does not support parallel") elif not func._is_parallelizable: raise ValueError("this method does not support parallel") else: if hasattr( func, '_openmp_capability' ) and func._openmp_capability and 'OPENMP' in compiled_info(): raise RuntimeError( "this method supports both openmp and pmap, but your cpptraj " "version was installed with openmp. Should not use both openmp and pmap at the " "same time. In this case, do not use pmap since openmp is more efficient" ) if not isinstance(traj, TrajectoryIterator): raise ValueError('only support TrajectoryIterator') if 'dtype' not in kwargs and func not in [ mean_structure, matrix.dist, matrix.idea, ired_vector_and_matrix, rotation_matrix, volmap, ]: kwargs['dtype'] = 'dict' # keyword if func is volmap: assert kwargs.get('size') is not None, 'must provide "size" value' p = Pool(n_cores) pfuncs = partial(worker_by_func, n_cores=n_cores, func=func, traj=traj, args=args, kwargs=kwargs, iter_options=iter_options, apply=apply, progress=progress, progress_params=progress_params) data = p.map(pfuncs, [rank for rank in range(n_cores)]) p.close() dataset_processor = PmapDataset(data, func=func, kwargs=kwargs, traj=traj) return dataset_processor.process()
def pmap_mpi(func, traj, *args, **kwargs): """parallel with MPI (mpi4py) Parameters ---------- func : a function traj : pytraj.TrajectoryIterator *args, **kwargs: additional arguments Examples -------- .. code-block:: bash $ # create test_radgyr.py file $ cat > test_radgyr.py <<EOF import pytraj as pt from mpi4py import MPI comm = MPI.COMM_WORLD traj = pt.iterload('tz2.nc', 'tz2.parm7') result_arr = pt.pmap_mpi(pt.radgyr, traj, "@CA") if comm.rank == 0: # save data to disk to read later by pytraj.read_pickle # pt.to_pickle(result_arr, 'output.pk') print(result_arr) EOF $ # run in parallel $ mpirun -n 4 python ./test_radgyr.py [array([ 8.10916061, 7.7643485 , 8.09693108, ..., 9.70825678, 9.3161563 , 8.86720964]), array([ 8.82037273, 8.89008289, 9.48540176, ..., 9.29585981, 9.53138062, 9.19155977]), array([ 9.13735723, 8.94651001, 8.97810478, ..., 7.68751186, 8.31361647, 7.83763754]), array([ 7.37423766, 7.05637263, 6.52135566, ..., 6.38061648, 6.24139008, 6.48994552])] """ from mpi4py import MPI comm = MPI.COMM_WORLD n_cores = comm.size rank = comm.rank # update reference if 'ref' in kwargs: kwargs['ref'] = get_reference(traj, kwargs['ref']) if not isinstance(func, (list, tuple)): # split traj to ``n_cores`` chunks, perform calculation # for rank-th chunk if 'dtype' not in kwargs: kwargs['dtype'] = 'dict' frame_indices = kwargs.pop('frame_indices', None) if frame_indices is None: start, stop = split_range(n_cores, 0, traj.n_frames)[rank] my_iter = traj.iterframe(start=start, stop=stop) else: my_indices = np.array_split(frame_indices, n_cores)[rank] my_iter = traj.iterframe(frame_indices=my_indices) n_frames = my_iter.n_frames data = func(my_iter, *args, **kwargs) # total : List[OrderedDict or Any] total = comm.gather(data, root=0) n_frames_collection = comm.gather(n_frames, root=0) if rank == 0: data_collection = [ (val, n_frames_) for (val, n_frames_) in zip(total, n_frames_collection) ] dataset_processor = PmapDataset(data_collection, func=func, traj=traj, kwargs=kwargs) return dataset_processor.process() else: # cpptraj command style from pytraj.parallel.base import _load_batch_pmap total = _load_batch_pmap(n_cores=n_cores, traj=traj, lines=func, dtype='dict', root=0, mode='mpi', **kwargs) if rank == 0: # otherwise, total=None total = concat_dict((x[0] for x in total)) return total