def test_xyz(self): traj = pt.iterload("./data/Tc5b.x", "./data/Tc5b.top") frame = Frame() frame.append_xyz(traj[0].xyz) aa_eq(frame.xyz, traj[0].xyz) aa_eq(frame.xyz.flatten(), traj[0].xyz.flatten()) aa_eq(np.array(frame._buffer1d), traj[0].xyz.flatten())
def make_random_frame(n_atoms=10000): ''' Examples -------- >>> make_random_frame(n_atoms=300) <Frame with 300 atoms> ''' import numpy as np from pytraj import Frame frame = Frame(n_atoms) frame.xyz[:] = np.random.randn(n_atoms, 3) return frame
def test_2(self): import numpy as np box = Box() arr0 = np.arange(6).astype(np.float64) box.data[:] = arr0 for idx, x in enumerate(arr0): assert box.data[idx] == x # set Box for Frame f1 = Frame() f1.box = box aa_eq(f1.box.values, box.values, decimal=7)
def merge_frames(iterables): """merge from frames to a single Frame. Order matters. Examples -------- >>> import pytraj as pt >>> traj = pt.load_sample_data('tz2') >>> traj[0] <Frame with 5293 atoms> >>> merge_frames(traj(0, 3)) <Frame with 15879 atoms> """ from pytraj import Frame xyz = np.vstack([f.xyz.copy() for f in iterables]) frame = Frame() frame.append_xyz(xyz) return frame
def merge_frames(iterables): """merge from frames to a single Frame. Order matters. Examples -------- >>> import pytraj as pt >>> traj = pt.load_sample_data('tz2') >>> traj[0] <Frame with 5293 atoms> >>> merge_frames(traj(0, 3)) <Frame with 15879 atoms> """ from pytraj import Frame xyz = np.vstack((f.xyz.copy() for f in iterables)) frame = Frame() frame.append_xyz(xyz) return frame
def test_run_0(self): # load traj farray = pt.load(filename="./data/tz2.truncoct.nc", top="./data/tz2.truncoct.parm7")[:2] fold = farray.copy() act = allactions.Action_Image() ptrajin = """ center :2-11 image center familiar com :6 """ # create 'strip' action stripact = allactions.Action_Strip() # creat datasetlist to hold distance data dsetlist = CpptrajDatasetList() dflist = DataFileList() # creat ActionList to hold actions alist = ActionList() top = farray.top # add two actions: Action_Strip and Action_Distance alist.add(allactions.Action_Center(), ArgList(":2-11"), top=top) alist.add(allactions.Action_Image(), ArgList("center familiar com :6"), top=top) # do checking alist.check_topology(top) farray2 = Trajectory() frame0 = Frame() # testing how fast to do the actions # loop all frames # use iterator to make faster loop # don't use "for i in range(farray.n_frames)" for frame in farray: # perform actions for each frame # we make a copy since we want to keep orginal Frame frame0 = frame.copy() alist.compute(frame0) # we need to keep the modified frame in farray2 farray2.append(frame0) # make sure that Action_Strip does its job in stripping assert farray2.n_frames == farray.n_frames fsaved = pt.iterload(cpptraj_test_dir + "/Test_Image/image4.crd.save", "data/tz2.truncoct.parm7") assert fsaved.n_frames == 2
def process(self): # val : Tuple[OrdereDict, n_frames] if self.func in [matrix.dist, matrix.idea, volmap]: mat = np.sum( (val[0] * val[1] for val in self.data)) / self.traj.n_frames return mat elif self.func in [ ired_vector_and_matrix, ]: # val : Tuple[(vecs, mat), n_frames] mat = np.sum( (val[0][1] * val[1] for val in self.data)) / self.traj.n_frames vecs = np.column_stack(val[0][0] for val in self.data) return (vecs, mat) elif self.func in [ rotation_matrix, ]: if 'with_rmsd' in self.kwargs.keys() and self.kwargs['with_rmsd']: # val : Tuple[(mat, rmsd), n_frames] mat = np.row_stack(val[0][0] for val in self.data) rmsd_ = np.hstack(val[0][1] for val in self.data) return OrderedDict(out=(mat, rmsd_)) else: # val : Tuple[mat, n_frames] mat = np.row_stack(val[0] for val in self.data) return OrderedDict(mat=mat) elif self.func == mean_structure: xyz = np.sum( (x[1] * x[0].xyz for x in self.data)) / self.traj.n_frames frame = Frame(xyz.shape[0]) frame.xyz[:] = xyz return frame elif 'hbond' in self.func.__name__: return concat_hbond(self.data) else: return concat_dict((x[0] for x in self.data))
import unittest import pytraj as pt from utils import fn from array import array import numpy as np from pytraj import Frame from pytraj.testing import aa_eq from pytraj import * N_ATOMS = 10 FRAME = Frame(N_ATOMS) arr = np.arange(3 * N_ATOMS) FRAME.xyz[:] = arr.reshape(N_ATOMS, 3) FRAME_orig = FRAME.copy() class TestFrame(unittest.TestCase): def test_fit(self): traj = pt.iterload(fn('Tc5b.x'), fn('Tc5b.top')) trajnew = pt.iterload(fn('md1_prod.fit_to_first.Tc5b.x'), fn('Tc5b.top')) # make sure 0-th frame does not change frame0 = traj[0] trajnew[0] frame1 = traj[1] frame1new = trajnew[1] # try do-fitting from Python
import unittest import pytraj as pt from array import array import numpy as np from pytraj import Frame from pytraj.base import * from pytraj.math import Vec3 from pytraj.testing import aa_eq N_ATOMS = 10 FRAME = Frame(N_ATOMS) arr = np.arange(3 * N_ATOMS) FRAME.xyz[:] = arr.reshape(N_ATOMS, 3) FRAME_orig = FRAME.copy() class TestFrame(unittest.TestCase): def test_fit(self): traj = pt.iterload("./data/Tc5b.x", "./data/Tc5b.top") trajnew = pt.iterload("./data/md1_prod.fit_to_first.Tc5b.x", "./data/Tc5b.top") # make sure 0-th frame does not change frame0 = traj[0] frame0new = trajnew[0] frame1 = traj[1] frame1new = trajnew[1] # try do-fitting from Python
def _pmap(func, traj, *args, **kwd): '''use python's multiprocessing to accelerate calculation. Limited calculations. Parameters ---------- func : a pytraj's methods or a list of string or simply as a cpptraj' text traj : pytraj.TrajectoryIterator n_cores : int, number of cores to be used, default 2. Specify n_cores=-1 to use all available cores iter_options : dict, default {} Specify trajectory iterating option. This will be done before calling ``func``. frame_indices : {None, array-like}, default None, optional if provided, pytraj will split this frame_indices into different chunks and let cpptraj perform calculation for specific indices. frame_indices must be pickable so is can be sent to different cores. *args, **kwd: additional keywords Returns ------- out : OrderedDict Notes ----- - If you not sure about parallel's results, you should compare the output to serial run. - This is absolutely experimental. The syntax might be changed in future. Rule of thumbs: start with small number of frames (saying 10 frames), varying n_cores=1, 2, 3, 4 to see if the data makes sense or not. There are two modes in this method, use pytraj's methods (pytraj.rmsd, pytraj.radgyr, ...) or use cpptraj's command text syntax ('autoimage', 'rms', ...) If using cpptraj syntax:: pytraj only supports limited cpptraj's Actions (not Analysis, checm Amber15 manual about Action and Analysis), say no to 'matrix', 'atomicfluct', ... or any action that results output depending on the number of frames. This method only benifits you if your calculation is quite long (saying few minutes to few hours). For calculation that takes less than 1 minutes, you won't see the significant speed up (or even slower) since pytraj need to warm up and need to gather data when the calculation done. The parallel cacluation is very simple, trajectory will be split (almost equal) to different chunk (n_chunks = n_cores), pytraj/cpptraj perform calculation for each chunk in each core and then send data back to master. Note that we are using Python's built-in multiprocessing module, so you can use this method interactively in Ipython and ipython/jupyter notebook. This behavior is different from using MPI, in which you need to write a script, escaping ipython ession and type something like:: mpirun -n 4 python my_script.py vs:: In [1]: pt.pmap(pt.radgyr, traj, n_cores=4) Out[1]: OrderedDict([('RoG_00000', array([ 18.91114428, 18.93654996, 18.84969884, 18.90449256, 18.8568644 , 18.88917208, 18.9430491 , 18.88878079, 18.91669565, 18.87069722]))]) This is experimental method, you should expect its syntax, default output will be changed. When sending Topology to different cores, pytraj will reload Topology from traj.top.filename, so if you need to update Topology (in the fly), save it to disk and reload before using ``pytraj.pmap`` Examples -------- >>> import numpy as np >>> import pytraj as pt >>> traj = pt.load_sample_data('tz2') >>> # use iter_options >>> iter_options = {'autoimage': True, 'rmsfit': (0, '@CA')} >>> data = pt.pmap(pt.mean_structure, traj, iter_options=iter_options) >>> # cpptraj command style >>> data = pt.pmap(['distance :3 :7', 'vector mask :3 :12'], traj, n_cores=4) >>> # use reference. Need to explicitly use 'refindex', which is index of reflist >>> data = pt.pmap(['rms @CA refindex 0'], traj, ref=[traj[3],], n_cores=3) >>> data OrderedDict([('RMSD_00001', array([ 2.68820312e-01, 3.11804885e-01, 2.58835452e-01, 9.10475988e-08, 2.93310737e-01, 4.10197322e-01, 3.96226694e-01, 3.66059215e-01, 3.90890362e-01, 4.89180497e-01]))]) >>> # use reference: if not want to use 'refindex', can use 'reference' >>> # the advantage is you can not specify a list of reference >>> data = pt.pmap(['rms @CA reference'], traj, ref=[traj[3],], n_cores=3) >>> data OrderedDict([('RMSD_00001', array([ 2.68820312e-01, 3.11804885e-01, 2.58835452e-01, 9.10475988e-08, 2.93310737e-01, 4.10197322e-01, 3.96226694e-01, 3.66059215e-01, 3.90890362e-01, 4.89180497e-01]))]) >>> # use different references. Need to explicitly use 'refindex', which is index of reflist >>> # create a list of references >>> reflist = traj[3], traj[4] >>> # make sure to specify `refindex` >>> # `refindex 0` is equal to `reflist[0]` >>> # `refindex 1` is equal to `reflist[1]` >>> data = pt.pmap(['rms @CA refindex 0', 'rms !@H= refindex 1'], traj, ref=reflist, n_cores=2) >>> # convert to ndarray >>> data_arr = pt.tools.dict_to_ndarray(data) >>> # perform parallel calculation with given frame_indices >>> traj = pt.datafiles.load_tz2() >>> data = pt.pmap(pt.radgyr, traj, '@CA', frame_indices=range(10, 50), n_cores=4) >>> # serial version >>> data = pt.radgyr(traj, '@CA', frame_indices=range(10, 50)) See also -------- pytraj.pmap_mpi ''' from multiprocessing import Pool from pytraj import TrajectoryIterator n_cores = kwd.pop('n_cores') if 'n_cores' in kwd else 2 iter_options = kwd.pop('iter_options') if 'iter_options' in kwd else {} apply = kwd.pop('apply') if 'apply' in kwd else None if n_cores <= 0: # use all available cores n_cores = cpu_count() # update reference if 'ref' in kwd: kwd['ref'] = get_reference(traj, kwd['ref']) if isinstance(func, (list, tuple, string_types)): # assume using _load_batch_pmap from pytraj.parallel.base import _load_batch_pmap #check_valid_command(func) data = _load_batch_pmap(n_cores=n_cores, traj=traj, lines=func, dtype='dict', root=0, mode='multiprocessing', **kwd) data = concat_dict((x[1] for x in data)) return data else: if not callable(func): raise ValueError('must callable argument') # pytraj's method if not hasattr(func, '_is_parallelizable'): raise ValueError("this method does not support parallel") elif not func._is_parallelizable: raise ValueError("this method does not support parallel") else: if hasattr( func, '_openmp_capability' ) and func._openmp_capability and 'OPENMP' in compiled_info(): raise RuntimeError( "this method supports both openmp and pmap, but your cpptraj " "version was installed with openmp. Should not use both openmp and pmap at the " "same time. In this case, do not use pmap since openmp is more efficient" ) if not isinstance(traj, TrajectoryIterator): raise ValueError('only support TrajectoryIterator') if 'dtype' not in kwd and func not in [ mean_structure, matrix.dist, matrix.idea, ired_vector_and_matrix, rotation_matrix, volmap, ]: kwd['dtype'] = 'dict' # keyword if func is volmap: assert kwd.get('size') is not None, 'must provide "size" value' p = Pool(n_cores) pfuncs = partial(worker_byfunc, n_cores=n_cores, func=func, traj=traj, args=args, kwd=kwd, iter_options=iter_options, apply=apply) data = p.map(pfuncs, [rank for rank in range(n_cores)]) p.close() if func in [matrix.dist, matrix.idea, volmap]: mat = np.sum((val[1] * val[2] for val in data)) / traj.n_frames return mat elif func in [ ired_vector_and_matrix, ]: # data is a list of (rank, (vectors, matrix), n_frames) mat = np.sum((val[1][1] * val[2] for val in data)) / traj.n_frames vecs = np.column_stack(val[1][0] for val in data) return (vecs, mat) elif func in [ rotation_matrix, ]: if 'with_rmsd' in kwd.keys() and kwd['with_rmsd']: # data is a list of (rank, (mat, rmsd), n_frames) mat = np.row_stack(val[1][0] for val in data) rmsd_ = np.hstack(val[1][1] for val in data) return OrderedDict(out=(mat, rmsd_)) else: mat = np.row_stack(val[1] for val in data) return OrderedDict(mat=mat) elif func == mean_structure: xyz = np.sum((x[2] * x[1].xyz for x in data)) / traj.n_frames frame = Frame(xyz.shape[0]) frame.xyz[:] = xyz return frame else: return concat_dict((x[1] for x in data))
n_atoms = traj.top.n_atoms n_frames = traj.n_frames if rank == 0: ref = traj[0] ref_xyz = np.asarray(ref.xyz, dtype=np.float64) else: ref = None ref_xyz = np.empty((n_atoms, 3), dtype=np.float64) # broadcast ref_xyz to other cores from master comm.Bcast([ref_xyz, MPI.DOUBLE]) if rank != 0: # need to reconstruct ref ref = Frame() ref.append_xyz(ref_xyz) _ref = ref.copy() def rmsd_mpi(traj, _ref): arr0 = pyca.calc_rmsd("@CA", traj, traj.top, _ref) return arr0 arr0 = rmsd_mpi(traj, _ref) if rank == 0: data = np.empty(size * traj.n_frames, dtype=np.float64) else:
def _pmap(func, traj, *args, **kwd): '''use python's multiprocessing to accelerate calculation. Limited calculations. Parameters ---------- func : a pytraj's methods or a list of string or simply as a cpptraj' text traj : pytraj.TrajectoryIterator n_cores : int, number of cores to be used, default 2. Specify n_cores=-1 to use all available cores iter_options : dict, default {} Specify trajectory iterating option. This will be done before calling ``func``. frame_indices : {None, array-like}, default None, optional if provided, pytraj will split this frame_indices into different chunks and let cpptraj perform calculation for specific indices. frame_indices must be pickable so is can be sent to different cores. *args, **kwd: additional keywords Returns ------- out : OrderedDict Notes ----- - If you not sure about parallel's results, you should compare the output to serial run. - This is absolutely experimental. The syntax might be changed in future. Rule of thumbs: start with small number of frames (saying 10 frames), varying n_cores=1, 2, 3, 4 to see if the data makes sense or not. There are two modes in this method, use pytraj's methods (pytraj.rmsd, pytraj.radgyr, ...) or use cpptraj's command text syntax ('autoimage', 'rms', ...) If using cpptraj syntax:: pytraj only supports limited cpptraj's Actions (not Analysis, checm Amber15 manual about Action and Analysis), say no to 'matrix', 'atomicfluct', ... or any action that results output depending on the number of frames. This method only benifits you if your calculation is quite long (saying few minutes to few hours). For calculation that takes less than 1 minutes, you won't see the significant speed up (or even slower) since pytraj need to warm up and need to gather data when the calculation done. The parallel cacluation is very simple, trajectory will be split (almost equal) to different chunk (n_chunks = n_cores), pytraj/cpptraj perform calculation for each chunk in each core and then send data back to master. Note that we are using Python's built-in multiprocessing module, so you can use this method interactively in Ipython and ipython/jupyter notebook. This behavior is different from using MPI, in which you need to write a script, escaping ipython ession and type something like:: mpirun -n 4 python my_script.py vs:: In [1]: pt.pmap(pt.radgyr, traj, n_cores=4) Out[1]: OrderedDict([('RoG_00000', array([ 18.91114428, 18.93654996, 18.84969884, 18.90449256, 18.8568644 , 18.88917208, 18.9430491 , 18.88878079, 18.91669565, 18.87069722]))]) This is experimental method, you should expect its syntax, default output will be changed. When sending Topology to different cores, pytraj will reload Topology from traj.top.filename, so if you need to update Topology (in the fly), save it to disk and reload before using ``pytraj.pmap`` Examples -------- >>> import numpy as np >>> import pytraj as pt >>> traj = pt.load_sample_data('tz2') >>> # use iter_options >>> iter_options = {'autoimage': True, 'rmsfit': (0, '@CA')} >>> data = pt.pmap(pt.mean_structure, traj, iter_options=iter_options) >>> # cpptraj command style >>> data = pt.pmap(['distance :3 :7', 'vector mask :3 :12'], traj, n_cores=4) >>> # use reference. Need to explicitly use 'refindex', which is index of reflist >>> data = pt.pmap(['rms @CA refindex 0'], traj, ref=[traj[3],], n_cores=3) >>> data OrderedDict([('RMSD_00001', array([ 2.68820312e-01, 3.11804885e-01, 2.58835452e-01, 9.10475988e-08, 2.93310737e-01, 4.10197322e-01, 3.96226694e-01, 3.66059215e-01, 3.90890362e-01, 4.89180497e-01]))]) >>> # use reference: if not want to use 'refindex', can use 'reference' >>> # the advantage is you can not specify a list of reference >>> data = pt.pmap(['rms @CA reference'], traj, ref=[traj[3],], n_cores=3) >>> data OrderedDict([('RMSD_00001', array([ 2.68820312e-01, 3.11804885e-01, 2.58835452e-01, 9.10475988e-08, 2.93310737e-01, 4.10197322e-01, 3.96226694e-01, 3.66059215e-01, 3.90890362e-01, 4.89180497e-01]))]) >>> # use different references. Need to explicitly use 'refindex', which is index of reflist >>> # create a list of references >>> reflist = traj[3], traj[4] >>> # make sure to specify `refindex` >>> # `refindex 0` is equal to `reflist[0]` >>> # `refindex 1` is equal to `reflist[1]` >>> data = pt.pmap(['rms @CA refindex 0', 'rms !@H= refindex 1'], traj, ref=reflist, n_cores=2) >>> # convert to ndarray >>> data_arr = pt.tools.dict_to_ndarray(data) >>> # perform parallel calculation with given frame_indices >>> traj = pt.datafiles.load_tz2() >>> data = pt.pmap(pt.radgyr, traj, '@CA', frame_indices=range(10, 50), n_cores=4) >>> # serial version >>> data = pt.radgyr(traj, '@CA', frame_indices=range(10, 50)) See also -------- pytraj.pmap_mpi ''' from multiprocessing import Pool from pytraj import TrajectoryIterator n_cores = kwd.pop('n_cores') if 'n_cores' in kwd else 2 iter_options = kwd.pop('iter_options') if 'iter_options' in kwd else {} apply = kwd.pop('apply') if 'apply' in kwd else None if n_cores <= 0: # use all available cores n_cores = cpu_count() # update reference if 'ref' in kwd: kwd['ref'] = get_reference(traj, kwd['ref']) if isinstance(func, (list, tuple, string_types)): # assume using _load_batch_pmap from pytraj.parallel.base import _load_batch_pmap #check_valid_command(func) data = _load_batch_pmap(n_cores=n_cores, traj=traj, lines=func, dtype='dict', root=0, mode='multiprocessing', **kwd) data = concat_dict((x[1] for x in data)) return data else: if not callable(func): raise ValueError('must callable argument') # pytraj's method if not hasattr(func, '_is_parallelizable'): raise ValueError("this method does not support parallel") elif not func._is_parallelizable: raise ValueError("this method does not support parallel") else: if hasattr( func, '_openmp_capability') and func._openmp_capability and 'OPENMP' in compiled_info( ): raise RuntimeError( "this method supports both openmp and pmap, but your cpptraj " "version was installed with openmp. Should not use both openmp and pmap at the " "same time. In this case, do not use pmap since openmp is more efficient") if not isinstance(traj, TrajectoryIterator): raise ValueError('only support TrajectoryIterator') if 'dtype' not in kwd and func not in [ mean_structure, matrix.dist, matrix.idea, ired_vector_and_matrix, rotation_matrix, volmap, ]: kwd['dtype'] = 'dict' # keyword if func is volmap: assert kwd.get('size') is not None, 'must provide "size" value' p = Pool(n_cores) pfuncs = partial(worker_byfunc, n_cores=n_cores, func=func, traj=traj, args=args, kwd=kwd, iter_options=iter_options, apply=apply) data = p.map(pfuncs, [rank for rank in range(n_cores)]) p.close() if func in [matrix.dist, matrix.idea, volmap]: mat = np.sum((val[1] * val[2] for val in data)) / traj.n_frames return mat elif func in [ired_vector_and_matrix, ]: # data is a list of (rank, (vectors, matrix), n_frames) mat = np.sum((val[1][1] * val[2] for val in data)) / traj.n_frames vecs = np.column_stack(val[1][0] for val in data) return (vecs, mat) elif func in [rotation_matrix, ]: if 'with_rmsd' in kwd.keys() and kwd['with_rmsd']: # data is a list of (rank, (mat, rmsd), n_frames) mat = np.row_stack(val[1][0] for val in data) rmsd_ = np.hstack(val[1][1] for val in data) return OrderedDict(out=(mat, rmsd_)) else: mat = np.row_stack(val[1] for val in data) return OrderedDict(mat=mat) elif func == mean_structure: xyz = np.sum((x[2] * x[1].xyz for x in data)) / traj.n_frames frame = Frame(xyz.shape[0]) frame.xyz[:] = xyz return frame else: return concat_dict((x[1] for x in data))