def init_first(self): # update the shape if select is present if self.select is not None: self.shape = (len(self.select), ) + self.shape[1:] # compute the number of dimensions, i.e. 3 for atoms self.ndim = 1 for s in self.shape[1:]: self.ndim *= s # allocate working arrays self.last_poss = [ np.zeros(self.shape, float) for i in range(self.mult) ] self.pos = np.zeros(self.shape, float) # prepare the hdf5 output file, if present. AnalysisHook.init_first(self) if self.outg is not None: for m in range(self.mult): self.outg.create_dataset('msd%03i' % (m + 1), shape=(0, ), maxshape=(None, ), dtype=float) self.outg.create_dataset('msdsums', data=self.msdsums) self.outg.create_dataset('msdcounters', data=self.msdcounters) self.outg.create_dataset('pars', shape=(2, ), dtype=float) self.outg.create_dataset('pars_error', shape=(2, ), dtype=float)
def init_first(self): AnalysisHook.init_first(self) if self.outg is not None: self.outg.create_dataset('amps', (self.ssize,), float) self.outg.create_dataset('freqs', (self.ssize,), float) self.outg.create_dataset('ac', (self.ssize,), float) self.outg.create_dataset('time', (self.ssize,), float)
def init_first(self): AnalysisHook.init_first(self) if self.outg is not None: self.outg.create_dataset("amps", (self.ssize,), float) self.outg.create_dataset("freqs", (self.ssize,), float) self.outg.create_dataset("ac", (self.ssize,), float) self.outg.create_dataset("time", (self.ssize,), float)
def init_first(self): '''Setup some work arrays''' # determine the number of atoms if self.select0 is None: self.natom0 = self.natom else: self.natom0 = len(self.select0) self.pos0 = np.zeros((self.natom0, 3), float) # the number of pairs if self.select1 is None: self.npair = (self.natom0 * (self.natom0 - 1)) // 2 self.pos1 = None else: self.natom1 = len(self.select1) self.pos1 = np.zeros((self.natom1, 3), float) self.npair = self.natom0 * self.natom1 # multiply the number of pairs by all images self.npair *= (1 + 2 * self.nimage)**3 # Prepare the output self.work = np.zeros(self.npair, float) AnalysisHook.init_first(self) if self.outg is not None: self.outg.create_dataset('rdf', (self.nbin, ), float) self.outg.create_dataset('CN', (self.nbin, ), float) self.outg['d'] = self.d if self.pairs_sr is not None: self.outg.create_dataset('rdf_sr', (self.nbin, ), float)
def init_first(self): AnalysisHook.init_first(self) if self.outg is not None: self.outg.create_dataset('amps', (self.ssize, ), float) self.outg.create_dataset('freqs', (self.ssize, ), float) self.outg.create_dataset('ac', (self.ssize, ), float) self.outg.create_dataset('time', (self.ssize, ), float)
def init_first(self): '''Setup some work arrays''' # determine the number of atoms if self.select0 is None: self.natom0 = self.natom else: self.natom0 = len(self.select0) self.pos0 = np.zeros((self.natom0, 3), float) # the number of pairs if self.select1 is None: self.npair = (self.natom0*(self.natom0-1))/2 self.pos1 = None else: self.natom1 = len(self.select1) self.pos1 = np.zeros((self.natom1, 3), float) self.npair = self.natom0*self.natom1 # multiply the number of pairs by all images self.npair *= (1 + 2*self.nimage)**3 # Prepare the output self.work = np.zeros(self.npair, float) AnalysisHook.init_first(self) if self.outg is not None: self.outg.create_dataset('rdf', (self.nbin,), float) self.outg['d'] = self.d if self.pairs_sr is not None: self.outg.create_dataset('rdf_sr', (self.nbin,), float)
def __init__(self, f=None, start=0, end=-1, step=1, mult=20, select=None, bsize=None, pospath='trajectory/pos', poskey='pos', outpath=None): """Computes mean-squared displacements and diffusion constants **Optional arguments:** f An h5.File instance containing the trajectory data. If ``f`` is not given, or it does not contain the dataset referred to with the ``path`` argument, an on-line analysis is carried out. start, end, step Optional arguments for the ``get_slice`` function. max_sample is not supported because the choice of the step argument is critical for a useful result. mult In the first place, the mean square displacement (MSD) between subsequent step is computed. The MSD is also computed between every, two, three, ..., until ``mult`` steps. select A list of atom indexes that are considered for the computation of the MSD's. If not given, all atoms are used. bsize If given, time intervals that coincide with the boundaries of the block size, will not be considered form the analysis. This is useful when there is a significant monte carlo move between subsequent blocks. If step > 1, the intervals will be left out if the overlap with boundaries of blocks with size bsize*step. pospath The path of the dataset that contains the time dependent data in the HDF5 file. The first axis of the array must be the time axis. poskey In case of an on-line analysis, this is the key of the state item that contains the data from which the MSD's are derived. outpath The output path for the MSD results. If not given, it defaults to '%s_diff' % path. If this path already exists, it will be removed first. """ if bsize is not None and bsize < mult: raise ValueError('The bsize parameter must be larger than mult.') self.mult = mult self.select = select self.bsize = bsize self.msdsums = np.zeros(self.mult, float) self.msdcounters = np.zeros(self.mult, int) self.counter = 0 if outpath is None: outpath = pospath + '_diff' analysis_inputs = {'pos': AnalysisInput(pospath, poskey)} AnalysisHook.__init__(self, f, start, end, None, step, analysis_inputs, outpath, True)
def init_first(self): # update the shape if select is present if self.select is not None: self.shape = (len(self.select),) + self.shape[1:] # compute the number of dimensions, i.e. 3 for atoms self.ndim = 1 for s in self.shape[1:]: self.ndim *= s # allocate working arrays self.last_poss = [np.zeros(self.shape, float) for i in range(self.mult)] self.pos = np.zeros(self.shape, float) # prepare the hdf5 output file, if present. AnalysisHook.init_first(self) if self.outg is not None: for m in range(self.mult): self.outg.create_dataset('msd%03i' % (m+1), shape=(0,), maxshape=(None,), dtype=float) self.outg.create_dataset('msdsums', data=self.msdsums) self.outg.create_dataset('msdcounters', data=self.msdcounters) self.outg.create_dataset('pars', shape=(2,), dtype=float) self.outg.create_dataset('pars_error', shape=(2,), dtype=float)
def __init__(self, f=None, start=0, end=-1, step=1, bsize=4096, select=None, path='trajectory/vel', key='vel', outpath=None, weights=None): """ **Optional arguments:** f An h5.File instance containing the trajectory data. If ``f`` is not given, or it does not contain the dataset referred to with the ``path`` argument, an on-line analysis is carried out. start The first sample to be considered for analysis. This may be negative to indicate that the analysis should start from the -start last samples. end The last sample to be considered for analysis. This may be negative to indicate that the last -end sample should not be considered. step The spacing between the samples used for the analysis bsize The size of the blocks used for individual FFT calls. select A list of atom indexes that are considered for the computation of the spectrum. If not given, all atoms are used. path The path of the dataset that contains the time dependent data in the HDF5 file. The first axis of the array must be the time axis. The spectra are summed over the other axes. key In case of an on-line analysis, this is the key of the state item that contains the data from which the spectrum is derived. outpath The output path for the frequency computation in the HDF5 file. If not given, it defaults to '%s_spectrum' % path. If this path already exists, it will be removed first. weights If not given, the spectrum is just a simple sum of contributions from different time-dependent functions. If given, a linear combination is made based on these weights. The max_sample argument from get_slice is not used because the choice step value is an important parameter: it is best to choose step*bsize such that it coincides with a part of the trajectory in which the velocities (or other data) are continuous. The block size should be set such that it corresponds to a decent resolution on the frequency axis, i.e. 33356 fs of MD data corresponds to a resolution of about 1 cm^-1. The step size should be set such that the highest frequency is above the highest relevant frequency in the spectrum, e.g. a step of 10 fs corresponds to a frequency maximum of 3336 cm^-1. The total number of FFT's, i.e. length of the simulation divided by the block size multiplied by the number of time-dependent functions in the data, determines the noise reduction on the (the amplitude of) spectrum. If there is sufficient data to perform 10K FFT's, one should get a reasonably smooth spectrum. Depending on the FFT implementation in numpy, it may be interesting to tune the bsize argument. A power of 2 is typically a good choice. When f is None, or when the path does not exist in the HDF5 file, the class can be used as an on-line analysis hook for the iterative algorithms in yaff.sampling package. This means that the spectrum is built up as the iterative algorithm progresses. The end option is ignored for an on-line analysis. """ self.bsize = bsize self.select = select self.weights = weights self.ssize = self.bsize//2+1 # the length of the spectrum array self.amps = np.zeros(self.ssize, float) self.nfft = 0 # the number of fft calls, for statistics if outpath is None: outpath = path + '_spectrum' analysis_inputs = {'signal': AnalysisInput(path, key)} AnalysisHook.__init__(self, f, start, end, None, step, analysis_inputs, outpath, True)
def init_online(self): AnalysisHook.init_online(self) self.ncollect = 0
def __init__(self, rcut, rspacing, f=None, start=0, end=-1, max_sample=None, step=None, select0=None, select1=None, pairs_sr=None, nimage=0, pospath='trajectory/pos', poskey='pos', cellpath=None, cellkey=None, outpath=None): """Computes a radial distribution function (RDF) **Argument:** rcut The cutoff for the RDF analysis. This should be lower than the spacing between the primitive cell planes, multiplied by (1+2*nimage). rspacing The width of the bins to build up the RDF. **Optional arguments:** f An h5.File instance containing the trajectory data. If ``f`` is not given, or it does not contain the dataset referred to with the ``path`` argument, an on-line analysis is carried out. start, end, max_sample, step arguments to setup the selection of time slices. See ``get_slice`` for more information. select0 A list of atom indexes that are considered for the computation of the rdf. If not given, all atoms are used. select1 A list of atom indexes that are needed to compute an RDF between two disjoint sets of atoms. (If there is some overlap between select0 and select1, an error will be raised.) If this is None, an 'internal' RDF will be computed for the atoms specified in select0. pairs_sr An array with short-range pairs of atoms (shape K x 2). When given, an additional RDFs is generated for the short-range pairs (rdf_sr). nimage The number of cell images to consider in the computation of the pair distances. By default, this is zero, meaning that only the minimum image convention is used. pospath The path of the dataset that contains the time dependent data in the HDF5 file. The first axis of the array must be the time axis. This is only needed for an off-line analysis poskey In case of an on-line analysis, this is the key of the state item that contains the data from which the RDF is derived. cellpath The path the time-dependent cell vector data. This is only needed when the cell parameters are variable and the analysis is off-line. cellkey The key of the stateitem that contains the cell vectors. This is only needed when the cell parameters are variable and the analysis is done on-line. outpath The output path for the frequency computation in the HDF5 file. If not given, it defaults to '%s_rdf' % path. If this path already exists, it will be removed first. When f is None, or when the path does not exist in the HDF5 file, the class can be used as an on-line analysis hook for the iterative algorithms in yaff.sampling package. This means that the RDF is built up as the itertive algorithm progresses. The end option is ignored and max_sample is not applicable to an on-line analysis. """ if select0 is not None: if len(select0) != len(set(select0)): raise ValueError('No duplicates are allowed in select0') if len(select0) == 0: raise ValueError('select0 can not be an empty list') if select1 is not None: if len(select1) != len(set(select1)): raise ValueError('No duplicates are allowed in select1') if len(select1) == 0: raise ValueError('select1 can not be an empty list') if select0 is not None and select1 is not None and len(select0) + len( select1) != len(set(select0) | set(select1)): raise ValueError( 'No overlap is allowed between select0 and select1. If you want to compute and RDF within a set of atoms, omit the select1 argument.' ) if select0 is None and select1 is not None: raise ValueError('select1 can not be given without select0.') self.rcut = rcut self.rspacing = rspacing self.select0 = select0 self.select1 = select1 self.pairs_sr = self._process_pairs_sr(pairs_sr) self.nimage = nimage self.nbin = int(self.rcut / self.rspacing) self.bins = np.arange(self.nbin + 1) * self.rspacing self.d = self.bins[:-1] + 0.5 * self.rspacing self.rdf_sum = np.zeros(self.nbin, float) self.CN_sum = np.zeros(self.nbin, float) if self.pairs_sr is not None: self.rdf_sum_sr = np.zeros(self.nbin, float) self.nsample = 0 if outpath is None: outpath = pospath + '_rdf' analysis_inputs = { 'pos': AnalysisInput(pospath, poskey), 'cell': AnalysisInput(cellpath, cellkey, False) } AnalysisHook.__init__(self, f, start, end, max_sample, step, analysis_inputs, outpath, False)
def __init__(self, f=None, start=0, end=-1, step=1, bsize=4096, select=None, path='trajectory/vel', key='vel', outpath=None, weights=None): """ **Optional arguments:** f An h5.File instance containing the trajectory data. If ``f`` is not given, or it does not contain the dataset referred to with the ``path`` argument, an on-line analysis is carried out. start The first sample to be considered for analysis. This may be negative to indicate that the analysis should start from the -start last samples. end The last sample to be considered for analysis. This may be negative to indicate that the last -end sample should not be considered. step The spacing between the samples used for the analysis bsize The size of the blocks used for individual FFT calls. select A list of atom indexes that are considered for the computation of the spectrum. If not given, all atoms are used. path The path of the dataset that contains the time dependent data in the HDF5 file. The first axis of the array must be the time axis. The spectra are summed over the other axes. key In case of an on-line analysis, this is the key of the state item that contains the data from which the spectrum is derived. outpath The output path for the frequency computation in the HDF5 file. If not given, it defaults to '%s_spectrum' % path. If this path already exists, it will be removed first. weights If not given, the spectrum is just a simple sum of contributions from different time-dependent functions. If given, a linear combination is made based on these weights. The max_sample argument from get_slice is not used because the choice step value is an important parameter: it is best to choose step*bsize such that it coincides with a part of the trajectory in which the velocities (or other data) are continuous. The block size should be set such that it corresponds to a decent resolution on the frequency axis, i.e. 33356 fs of MD data corresponds to a resolution of about 1 cm^-1. The step size should be set such that the highest frequency is above the highest relevant frequency in the spectrum, e.g. a step of 10 fs corresponds to a frequency maximum of 3336 cm^-1. The total number of FFT's, i.e. length of the simulation divided by the block size multiplied by the number of time-dependent functions in the data, determines the noise reduction on the (the amplitude of) spectrum. If there is sufficient data to perform 10K FFT's, one should get a reasonably smooth spectrum. Depending on the FFT implementation in numpy, it may be interesting to tune the bsize argument. A power of 2 is typically a good choice. When f is None, or when the path does not exist in the HDF5 file, the class can be used as an on-line analysis hook for the iterative algorithms in yaff.sampling package. This means that the spectrum is built up as the iterative algorithm progresses. The end option is ignored for an on-line analysis. """ self.bsize = bsize self.select = select self.weights = weights self.ssize = self.bsize // 2 + 1 # the length of the spectrum array self.amps = np.zeros(self.ssize, float) self.nfft = 0 # the number of fft calls, for statistics if outpath is None: outpath = path + '_spectrum' analysis_inputs = {'signal': AnalysisInput(path, key)} AnalysisHook.__init__(self, f, start, end, None, step, analysis_inputs, outpath, True)
def __init__(self, rcut, rspacing, f=None, start=0, end=-1, max_sample=None, step=None, select0=None, select1=None, pairs_sr=None, nimage=0, pospath='trajectory/pos', poskey='pos', cellpath=None, cellkey=None, outpath=None): """Computes a radial distribution function (RDF) **Argument:** rcut The cutoff for the RDF analysis. This should be lower than the spacing between the primitive cell planes, multiplied by (1+2*nimage). rspacing The width of the bins to build up the RDF. **Optional arguments:** f An h5.File instance containing the trajectory data. If ``f`` is not given, or it does not contain the dataset referred to with the ``path`` argument, an on-line analysis is carried out. start, end, max_sample, step arguments to setup the selection of time slices. See ``get_slice`` for more information. select0 A list of atom indexes that are considered for the computation of the rdf. If not given, all atoms are used. select1 A list of atom indexes that are needed to compute an RDF between two disjoint sets of atoms. (If there is some overlap between select0 and select1, an error will be raised.) If this is None, an 'internal' RDF will be computed for the atoms specified in select0. pairs_sr An array with short-range pairs of atoms (shape K x 2). When given, an additional RDFs is generated for the short-range pairs (rdf_sr). nimage The number of cell images to consider in the computation of the pair distances. By default, this is zero, meaning that only the minimum image convention is used. pospath The path of the dataset that contains the time dependent data in the HDF5 file. The first axis of the array must be the time axis. This is only needed for an off-line analysis poskey In case of an on-line analysis, this is the key of the state item that contains the data from which the RDF is derived. cellpath The path the time-dependent cell vector data. This is only needed when the cell parameters are variable and the analysis is off-line. cellkey The key of the stateitem that contains the cell vectors. This is only needed when the cell parameters are variable and the analysis is done on-line. outpath The output path for the frequency computation in the HDF5 file. If not given, it defaults to '%s_rdf' % path. If this path already exists, it will be removed first. When f is None, or when the path does not exist in the HDF5 file, the class can be used as an on-line analysis hook for the iterative algorithms in yaff.sampling package. This means that the RDF is built up as the itertive algorithm progresses. The end option is ignored and max_sample is not applicable to an on-line analysis. """ if select0 is not None: if len(select0) != len(set(select0)): raise ValueError('No duplicates are allowed in select0') if len(select0) == 0: raise ValueError('select0 can not be an empty list') if select1 is not None: if len(select1) != len(set(select1)): raise ValueError('No duplicates are allowed in select1') if len(select1) == 0: raise ValueError('select1 can not be an empty list') if select0 is not None and select1 is not None and len(select0) + len(select1) != len(set(select0) | set(select1)): raise ValueError('No overlap is allowed between select0 and select1. If you want to compute and RDF within a set of atoms, omit the select1 argument.') if select0 is None and select1 is not None: raise ValueError('select1 can not be given without select0.') self.rcut = rcut self.rspacing = rspacing self.select0 = select0 self.select1 = select1 self.pairs_sr = self._process_pairs_sr(pairs_sr) self.nimage = nimage self.nbin = int(self.rcut/self.rspacing) self.bins = np.arange(self.nbin+1)*self.rspacing self.d = self.bins[:-1] + 0.5*self.rspacing self.rdf_sum = np.zeros(self.nbin, float) if self.pairs_sr is not None: self.rdf_sum_sr = np.zeros(self.nbin, float) self.nsample = 0 if outpath is None: outpath = pospath + '_rdf' analysis_inputs = {'pos': AnalysisInput(pospath, poskey), 'cell': AnalysisInput(cellpath, cellkey, False)} AnalysisHook.__init__(self, f, start, end, max_sample, step, analysis_inputs, outpath, False)