def __init__(self, system, parts, nlist=None): """ **Arguments:** system An instance of the ``System`` class. parts A list of instances of sublcasses of ``ForcePart``. These are the different types of contributions to the force field, e.g. valence interactions, real-space electrostatics, and so on. **Optional arguments:** nlist A ``NeighborList`` instance. This is required if some items in the parts list use this nlist object. """ ForcePart.__init__(self, 'all', system) self.system = system self.parts = [] self.nlist = nlist self.needs_nlist_update = nlist is not None for part in parts: self.add_part(part) if log.do_medium: with log.section('FFINIT'): log('Force field with %i parts:&%s.' % ( len(self.parts), ', '.join(part.name for part in self.parts) )) log('Neighborlist present: %s' % (self.nlist is not None))
def __init__(self, system, grids): ''' **Arguments:** system An instance of the ``System`` class. grids A dictionary with (ffatype, grid) items. Each grid must be a three-dimensional array with energies. This force part is only applicable to systems that are 3D periodic. ''' if system.cell.nvec != 3: raise ValueError('The system must be 3d periodic for the grid term.') for grid in grids.itervalues(): if grid.ndim != 3: raise ValueError('The energy grids must be 3D numpy arrays.') ForcePart.__init__(self, 'grid', system) self.system = system self.grids = grids if log.do_medium: with log.section('FPINIT'): log('Force part: %s' % self.name) log.hline()
def __init__(self, system, alpha, gcut=0.35): ''' **Arguments:** system The system to which this interaction applies. alpha The alpha parameter in the Ewald summation method. gcut The cutoff in reciprocal space. ''' ForcePart.__init__(self, 'ewald_reci', system) if not system.cell.nvec == 3: raise TypeError('The system must have a 3D periodic cell.') if system.charges is None: raise ValueError('The system does not have charges.') if system.dipoles is None: raise ValueError('The system does not have dipoles.') self.system = system self.alpha = alpha self.gcut = gcut self.update_gmax() self.work = np.empty(system.natom*2) if log.do_medium: with log.section('FPINIT'): log('Force part: %s' % self.name) log.hline() log(' alpha: %s' % log.invlength(self.alpha)) log(' gcut: %s' % log.invlength(self.gcut)) log.hline()
def __init__(self, system, alpha, scalings): ''' **Arguments:** system The system to which this interaction applies. alpha The alpha parameter in the Ewald summation method. scalings A ``Scalings`` object. This object contains all the information about the energy scaling of pairwise contributions that are involved in covalent interactions. See :class:`yaff.pes.scalings.Scalings` for more details. ''' ForcePart.__init__(self, 'ewald_cor', system) if not system.cell.nvec == 3: raise TypeError('The system must have a 3D periodic cell') if system.charges is None: raise ValueError('The system does not have charges.') self.system = system self.alpha = alpha self.scalings = scalings if log.do_medium: with log.section('FPINIT'): log('Force part: %s' % self.name) log.hline() log(' alpha: %s' % log.invlength(self.alpha)) log(' scalings: %5.3f %5.3f %5.3f' % (scalings.scale1, scalings.scale2, scalings.scale3)) log.hline()
def generate(cls, system, parameters, **kwargs): """Create a force field for the given system with the given parameters. **Arguments:** system An instance of the System class parameters Three types are accepted: (i) the filename of the parameter file, which is a text file that adheres to YAFF parameter format, (ii) a list of such filenames, or (iii) an instance of the Parameters class. See the constructor of the :class:`yaff.pes.generator.FFArgs` class for the available optional arguments. This method takes care of setting up the FF object, and configuring all the necessary FF parts. This is a lot easier than creating an FF with the default constructor. Parameters for atom types that are not present in the system, are simply ignored. """ if system.ffatype_ids is None: raise ValueError('The generators needs ffatype_ids in the system object.') with log.section('GEN'), timer.section('Generator'): from yaff.pes.generator import apply_generators, FFArgs from yaff.pes.parameters import Parameters if log.do_medium: log('Generating force field from %s' % str(parameters)) if not isinstance(parameters, Parameters): parameters = Parameters.from_file(parameters) ff_args = FFArgs(**kwargs) apply_generators(system, parameters, ff_args) return ForceField(system, ff_args.parts, ff_args.nlist)
def __init__(self, system, alpha, dielectric=1.0): ''' **Arguments:** system The system to which this interaction applies. alpha The alpha parameter in the Ewald summation method. **Optional arguments:** dielectric The scalar relative permittivity of the system. ''' ForcePart.__init__(self, 'ewald_neut', system) if not system.cell.nvec == 3: raise TypeError('The system must have a 3D periodic cell') if system.charges is None: raise ValueError('The system does not have charges.') self.system = system self.alpha = alpha self.dielectric = dielectric if log.do_medium: with log.section('FPINIT'): log('Force part: %s' % self.name) log.hline() log(' alpha: %s' % log.invlength(self.alpha)) log(' relative permittivity: %5.3f' % self.dielectric) log.hline()
def set_standard_masses(self): """Initialize the ``masses`` attribute based on the atomic numbers.""" with log.section('SYS'): from molmod.periodic import periodic if self.masses is not None: if log.do_warning: log.warn('Overwriting existing masses with default masses.') self.masses = np.array([periodic[n].mass for n in self.numbers])
def g09log_to_hdf5(f, fn_log): """Convert Gaussian09 BOMD log file to Yaff HDF5 format. **Arguments:** f An open and writable HDF5 file. fn_log The name of the Gaussian log file. """ with log.section('G09H5'): if log.do_medium: log('Loading Gaussian 09 file \'%s\' into \'trajectory\' of HDF5 file \'%s\'' % ( fn_log, f.filename )) # First make sure the HDF5 file has a system description that is consistent # with the XYZ file. if 'system' not in f: raise ValueError('The HDF5 file must contain a system group.') if 'numbers' not in f['system']: raise ValueError('The HDF5 file must have a system group with atomic numbers.') natom = f['system/numbers'].shape[0] # Take care of the trajectory group tgrp = get_trajectory_group(f) # Take care of the pos and vel datasets dss = get_trajectory_datasets(tgrp, ('pos', (natom, 3)), ('vel', (natom, 3)), ('frc', (natom, 3)), ('time', (1,)), ('step', (1,)), ('epot', (1,)), ('ekin', (1,)), ('etot', (1,)), ) ds_pos, ds_vel, ds_frc, ds_time, ds_step, ds_epot, ds_ekin, ds_etot = dss # Load frame by frame row = get_last_trajectory_row(dss) for numbers, pos, vel, frc, time, step, epot, ekin, etot in _iter_frames_g09(fn_log): if (numbers != f['system/numbers']).any(): log.warn('The element numbers of the HDF5 and LOG file do not match.') write_to_dataset(ds_pos, pos, row) write_to_dataset(ds_vel, vel, row) write_to_dataset(ds_frc, frc, row) write_to_dataset(ds_time, time, row) write_to_dataset(ds_step, step, row) write_to_dataset(ds_epot, epot, row) write_to_dataset(ds_ekin, ekin, row) write_to_dataset(ds_etot, etot, row) row += 1 # Check number of rows check_trajectory_rows(tgrp, dss, row)
def to_file(self, fn): """Write the system to a file **Arguments:** fn The file to write to. Supported formats are: chk Internal human-readable checkpoint format. This format includes all the information of a system object. All data are stored in atomic units. h5 Internal binary checkpoint format. This format includes all the information of a system object. All data are stored in atomic units. xyz A simple file with atomic positions and elements. Coordinates are written in Angstroms. """ if fn.endswith('.chk'): from molmod.io import dump_chk dump_chk(fn, { 'numbers': self.numbers, 'pos': self.pos, 'ffatypes': self.ffatypes, 'ffatype_ids': self.ffatype_ids, 'scopes': self.scopes, 'scope_ids': self.scope_ids, 'bonds': self.bonds, 'rvecs': self.cell.rvecs, 'charges': self.charges, 'radii': self.radii, 'valence_charges': self.valence_charges, 'dipoles': self.dipoles, 'radii2': self.radii2, 'masses': self.masses, }) elif fn.endswith('.h5'): with h5.File(fn, 'w') as f: self.to_hdf5(f) elif fn.endswith('.xyz'): from molmod.io import XYZWriter from molmod.periodic import periodic xyz_writer = XYZWriter(fn, [periodic[n].symbol for n in self.numbers]) xyz_writer.dump(str(self), self.pos) else: raise NotImplementedError('The extension of %s does not correspond to any known format.' % fn) if log.do_high: with log.section('SYS'): log('Wrote system to %s.' % fn)
def run(self, nstep=None): with log.section(self.log_name), timer.section(self.log_name): if nstep is None: while True: if self.propagate(): break else: for i in xrange(nstep): if self.propagate(): break self.finalize()
def __init__(self, comsystem, scaling=None): ForcePart.__init__(self, 'valence_com', comsystem) #ForcePartValence.__init__(self, system) self.comlist = comsystem.comlist self.gpos = np.zeros((comsystem.gpos_dim, 3), float) self.dlist = DeltaList(self.comlist) self.iclist = InternalCoordinateList(self.dlist) self.vlist = ValenceList(self.iclist) self.scaling = scaling if log.do_medium: with log.section('FPINIT'): log('Force part: %s' % self.name) log.hline() self.term = None # volume term
def update(self): '''Rebuild or recompute the neighbor lists Based on the changes of the atomic positions or due to calls to ``update_rcut`` and ``update_rmax``, the neighbor lists will be rebuilt from scratch. The heavy computational work is done in low-level C routines. The neighbor lists array is reallocated if needed. The memory allocation is done in Python for convenience. ''' with log.section('NLIST'), timer.section('Nlists'): assert self.rcut > 0 if self._need_rebuild(): # *rebuild* the entire neighborlist if self.system.cell.volume != 0: if self.system.natom / self.system.cell.volume > 10: raise ValueError('Atom density too high') # 1) make an initial status object for the neighbor list algorithm status = nlist_status_init(self.rmax) # 2) a loop of consecutive update/allocate calls last_start = 0 while True: done = nlist_build(self.system.pos, self.rcut + self.skin, self.rmax, self.system.cell, status, self.neighs[last_start:]) if done: break last_start = len(self.neighs) new_neighs = np.empty((len(self.neighs) * 3) / 2, dtype=neigh_dtype) new_neighs[:last_start] = self.neighs self.neighs = new_neighs del new_neighs # 3) get the number of neighbors in the list. self.nneigh = nlist_status_finish(status) if log.do_debug: log('Rebuilt, size = %i' % self.nneigh) # 4) store the current state to check in future calls if we # need to do a rebuild or a recompute. self._checkpoint() self.rebuild_next = False else: # just *recompute* the deltas and the distance in the # neighborlist nlist_recompute(self.system.pos, self._pos_old, self.system.cell, self.neighs[:self.nneigh]) if log.do_debug: log('Recomputed')
def _verify_hooks(self): with log.section('ENSEM'): thermo = None index_thermo = 0 baro = None index_baro = 0 # Look for the presence of a thermostat and/or barostat if hasattr(self.hooks, '__len__'): for index, hook in enumerate(self.hooks): if hook.method == 'thermostat': thermo = hook index_thermo = index elif hook.method == 'barostat': baro = hook index_baro = index elif self.hooks is not None: if self.hooks.method == 'thermostat': thermo = self.hooks elif self.hooks.method == 'barostat': baro = self.hooks # If both are present, delete them and generate TBCombination element if thermo is not None and baro is not None: from yaff.sampling.npt import TBCombination if log.do_warning: log.warn( 'Both thermostat and barostat are present separately and will be merged' ) del self.hooks[max(index_thermo, index_thermo)] del self.hooks[min(index_thermo, index_baro)] self.hooks.append(TBCombination(thermo, baro)) if hasattr(self.hooks, '__len__'): for hook in self.hooks: if hook.name == 'TBCombination': thermo = hook.thermostat baro = hook.barostat elif self.hooks is not None: if self.hooks.name == 'TBCombination': thermo = self.hooks.thermostat baro = self.hooks.barostat if log.do_warning: if thermo is not None: log('Temperature coupling achieved through ' + str(thermo.name) + ' thermostat') if baro is not None: log('Pressure coupling achieved through ' + str(baro.name) + ' barostat')
def __init__(self, system, alpha, gcut=0.35, dielectric=1.0, exclude_frame=False, n_frame=0): ''' **Arguments:** system The system to which this interaction applies. alpha The alpha parameter in the Ewald summation method. **Optional arguments:** gcut The cutoff in reciprocal space. dielectric The scalar relative permittivity of the system. exclude_frame A boolean to exclude framework-framework interactions (exclude_frame=True) for efficiency sake in MC simulations. n_frame Number of framework atoms. This parameter is used to exclude framework-framework neighbors when exclude_frame=True. ''' ForcePart.__init__(self, 'ewald_reci', system) if not system.cell.nvec == 3: raise TypeError('The system must have a 3D periodic cell.') if system.charges is None: raise ValueError('The system does not have charges.') self.system = system self.alpha = alpha self.gcut = gcut self.dielectric = dielectric self.update_gmax() self.work = np.empty(system.natom*2) if exclude_frame == True and n_frame < 0: raise ValueError('The number of framework atoms to exclude must be positive.') elif exclude_frame == False: n_frame = 0 self.n_frame = n_frame if log.do_medium: with log.section('FPINIT'): log('Force part: %s' % self.name) log.hline() log(' alpha: %s' % log.invlength(self.alpha)) log(' gcut: %s' % log.invlength(self.gcut)) log(' relative permittivity: %5.3f' % self.dielectric) log.hline()
def __init__(self, ff, state=None, hooks=None, counter0=0): """ **Arguments:** ff The ForceField instance used in the iterative algorithm **Optional arguments:** state A list with state items. State items are simple objects that take or derive a property from the current state of the iterative algorithm. hooks A function (or a list of functions) that is called after every iterative. counter0 The counter value associated with the initial state. """ self.ff = ff if state is None: self.state_list = [ state_item.copy() for state_item in self.default_state ] else: #self.state_list = state self.state_list = [ state_item.copy() for state_item in self.default_state ] self.state_list += state self.state = dict((item.key, item) for item in self.state_list) if hooks is None: self.hooks = [] elif hasattr(hooks, '__len__'): self.hooks = hooks else: self.hooks = [hooks] self._add_default_hooks() self.counter0 = counter0 self.counter = counter0 with log.section(self.log_name), timer.section(self.log_name): self.initialize() # Initialize restart hook if present from yaff.sampling.io import RestartWriter for hook in self.hooks: if isinstance(hook, RestartWriter): hook.init_state(self)
def update(self): '''Rebuild or recompute the neighbor lists Based on the changes of the atomic positions or due to calls to ``update_rcut`` and ``update_rmax``, the neighbor lists will be rebuilt from scratch. The heavy computational work is done in low-level C routines. The neighbor lists array is reallocated if needed. The memory allocation is done in Python for convenience. ''' with log.section('NLIST'), timer.section('Nlists'): assert self.rcut > 0 if self._need_rebuild(): # *rebuild* the entire neighborlist if self.system.cell.volume != 0: if self.system.natom/self.system.cell.volume > 10: raise ValueError('Atom density too high') # 1) make an initial status object for the neighbor list algorithm status = nlist_status_init(self.rmax) # 2) a loop of consecutive update/allocate calls last_start = 0 while True: done = nlist_build( self.system.pos, self.rcut + self.skin, self.rmax, self.system.cell, status, self.neighs[last_start:] ) if done: break last_start = len(self.neighs) new_neighs = np.empty((len(self.neighs)*3)/2, dtype=neigh_dtype) new_neighs[:last_start] = self.neighs self.neighs = new_neighs del new_neighs # 3) get the number of neighbors in the list. self.nneigh = nlist_status_finish(status) if log.do_debug: log('Rebuilt, size = %i' % self.nneigh) # 4) store the current state to check in future calls if we # need to do a rebuild or a recompute. self._checkpoint() self.rebuild_next = False else: # just *recompute* the deltas and the distance in the # neighborlist nlist_recompute(self.system.pos, self._pos_old, self.system.cell, self.neighs[:self.nneigh]) if log.do_debug: log('Recomputed')
def __init__(self, system): ''' **Arguments:** system An instance of the ``System`` class. ''' ForcePart.__init__(self, 'valence', system) self.dlist = DeltaList(system) self.iclist = InternalCoordinateList(self.dlist) self.vlist = ValenceList(self.iclist) if log.do_medium: with log.section('FPINIT'): log('Force part: %s' % self.name) log.hline()
def generate(cls, system, parameters, **kwargs): """Create a force field for the given system with the given parameters. **Arguments:** system An instance of the System class parameters Four types are accepted: (i) the filename of the parameter file, which is a text file that adheres to YAFF parameter format, (ii) a list of such filenames, (iii) an instance of the Parameters class, or (iv) the filename of the parameter file in the YAML format. See the constructor of the :class:`yaff.pes.generator.FFArgs` class for the available optional arguments. This method takes care of setting up the FF object, and configuring all the necessary FF parts. This is a lot easier than creating an FF with the default constructor. Parameters for atom types that are not present in the system, are simply ignored. """ if system.ffatype_ids is None: raise ValueError( 'The generators needs ffatype_ids in the system object.') with log.section('GEN'), timer.section('Generator'): from yaff.pes.generator import apply_generators, FFArgs from yaff.pes.parameters import Parameters if log.do_medium: log('Generating force field from %s' % str(parameters)) if isinstance(parameters, str): if parameters[-3:] == 'txt': parameters = Parameters.from_file(parameters) ff_args = FFArgs(**kwargs) apply_generators(system, parameters, ff_args) else: yaml_dict = yaml.safe_load(open(parameters)) ff_args = FFArgs(**kwargs) apply_generators(system, yaml_dict, ff_args) else: if isinstance(parameters, Parameters): raise NotImplementedError else: yaml_dict = parameters ff_args = FFArgs(**kwargs) apply_generators(system, yaml_dict, ff_args) return ForceField(system, ff_args.parts, ff_args.nlist)
def add_term(self, term): '''Add a new term to the covalent force field. **Arguments:** term An instance of the class :class:`yaff.pes.ff.vlist.ValenceTerm`. In principle, one should add all energy terms before calling the ``compute`` method, but with the current implementation of Yaff, energy terms can be added at any time. (This may change in future.) ''' if log.do_high: with log.section('VTERM'): log('%7i&%s %s' % (self.vlist.nv, term.get_log(), ' '.join(ic.get_log() for ic in term.ics))) self.vlist.add_term(term)
def set_hills(self, q0s, Ks, sigmas, tempering=0.0, T=None, periodicities=None): # Safety checks assert q0s.shape[1]==self.ncv assert sigmas.shape[0]==self.ncv assert q0s.shape[0]==Ks.shape[0] if tempering != 0.0 and T is None: raise ValueError("For a well-tempered MTD run, the temperature " "has to be specified") self.q0s = q0s self.sigmas = sigmas self.Ks = Ks self.tempering = tempering self.T = T self.periodicities = periodicities if log.do_medium: with log.section("SUMHILL"): log("Found %d collective variables and %d Gaussian hills"%(self.ncv,self.q0s.shape[0]))
def __init__(self, ff, state=None, hooks=None, counter0=0): """ **Arguments:** ff The ForceField instance used in the iterative algorithm **Optional arguments:** state A list with state items. State items are simple objects that take or derive a property from the current state of the iterative algorithm. hooks A function (or a list of functions) that is called after every iterative. counter0 The counter value associated with the initial state. """ self.ff = ff if state is None: self.state_list = [state_item.copy() for state_item in self.default_state] else: #self.state_list = state self.state_list = [state_item.copy() for state_item in self.default_state] self.state_list += state self.state = dict((item.key, item) for item in self.state_list) if hooks is None: self.hooks = [] elif hasattr(hooks, '__len__'): self.hooks = hooks else: self.hooks = [hooks] self._add_default_hooks() self.counter0 = counter0 self.counter = counter0 with log.section(self.log_name), timer.section(self.log_name): self.initialize() # Initialize restart hook if present from yaff.sampling.io import RestartWriter for hook in self.hooks: if isinstance(hook, RestartWriter): hook.init_state(self)
def _verify_hooks(self): with log.section('ENSEM'): thermo = None index_thermo = 0 baro = None index_baro = 0 # Look for the presence of a thermostat and/or barostat if hasattr(self.hooks, '__len__'): for index, hook in enumerate(self.hooks): if hook.method == 'thermostat': thermo = hook index_thermo = index elif hook.method == 'barostat': baro = hook index_baro = index elif self.hooks is not None: if self.hooks.method == 'thermostat': thermo = self.hooks elif self.hooks.method == 'barostat': baro = self.hooks # If both are present, delete them and generate TBCombination element if thermo is not None and baro is not None: from yaff.sampling.npt import TBCombination if log.do_warning: log.warn('Both thermostat and barostat are present separately and will be merged') del self.hooks[max(index_thermo, index_thermo)] del self.hooks[min(index_thermo, index_baro)] self.hooks.append(TBCombination(thermo, baro)) if hasattr(self.hooks, '__len__'): for hook in self.hooks: if hook.name == 'TBCombination': thermo = hook.thermostat baro = hook.barostat elif self.hooks is not None: if self.hooks.name == 'TBCombination': thermo = self.hooks.thermostat baro = self.hooks.barostat if log.do_warning: if thermo is not None: log('Temperature coupling achieved through ' + str(thermo.name) + ' thermostat') if baro is not None: log('Pressure coupling achieved through ' + str(baro.name) + ' barostat')
def add_term(self, term): '''Add a new term to the covalent force field. **Arguments:** term An instance of the class :class:`yaff.pes.ff.vlist.ValenceTerm`. In principle, one should add all energy terms before calling the ``compute`` method, but with the current implementation of Yaff, energy terms can be added at any time. (This may change in future.) ''' if log.do_high: with log.section('VTERM'): log('%7i&%s %s' % (self.vlist.nv, term.get_log(), ' '.join( ic.get_log() for ic in term.ics))) self.vlist.add_term(term)
def detect_ffatypes(self, rules): """Initialize the ``ffatypes`` attribute based on ATSELECT rules. **Argument:** rules A list of (ffatype, rule) pairs that will be used to initialize the attributes ``self.ffatypes`` and ``self.ffatype_ids``. If the system already has FF atom types, they will be overwritten. """ with log.section('SYS'): # Give warning if needed if self.ffatypes is not None: if log.do_warning: log.warn('Overwriting existing FF atom types.') # Compile all the rules my_rules = [] for ffatype, rule in rules: check_name(ffatype) if isinstance(rule, str): rule = atsel_compile(rule) my_rules.append((ffatype, rule)) # Use the rules to detect the atom types lookup = {} self.ffatypes = [] self.ffatype_ids = np.zeros(self.natom, int) for i in range(self.natom): my_ffatype = None for ffatype, rule in my_rules: if rule(self, i): my_ffatype = ffatype break if my_ffatype is None: raise ValueError( 'Could not detect FF atom type of atom %i.' % i) ffatype_id = lookup.get(my_ffatype) if ffatype_id is None: ffatype_id = len(lookup) self.ffatypes.append(my_ffatype) lookup[my_ffatype] = ffatype_id self.ffatype_ids[i] = ffatype_id # Make sure all is done well ... self._init_derived_ffatypes()
def detect_ffatypes(self, rules): """Initialize the ``ffatypes`` attribute based on ATSELECT rules. **Argument:** rules A list of (ffatype, rule) pairs that will be used to initialize the attributes ``self.ffatypes`` and ``self.ffatype_ids``. If the system already has FF atom types, they will be overwritten. """ with log.section('SYS'): # Give warning if needed if self.ffatypes is not None: if log.do_warning: log.warn('Overwriting existing FF atom types.') # Compile all the rules my_rules = [] for ffatype, rule in rules: check_name(ffatype) if isinstance(rule, basestring): rule = atsel_compile(rule) my_rules.append((ffatype, rule)) # Use the rules to detect the atom types lookup = {} self.ffatypes = [] self.ffatype_ids = np.zeros(self.natom, int) for i in xrange(self.natom): my_ffatype = None for ffatype, rule in my_rules: if rule(self, i): my_ffatype = ffatype break if my_ffatype is None: raise ValueError('Could not detect FF atom type of atom %i.' % i) ffatype_id = lookup.get(my_ffatype) if ffatype_id is None: ffatype_id = len(lookup) self.ffatypes.append(my_ffatype) lookup[my_ffatype] = ffatype_id self.ffatype_ids[i] = ffatype_id # Make sure all is done well ... self._init_derived_ffatypes()
def __init__(self, system, comlist=None): ''' Parameters ---------- system An instance of the ``System`` class. comlist An optional layer to derive centers of mass from the atomic positions. These centers of mass are used as input for the first layer, the relative vectors. ''' ForcePart.__init__(self, 'valence', system) self.comlist = comlist self.dlist = DeltaList(system if comlist is None else comlist) self.iclist = InternalCoordinateList(self.dlist) self.vlist = ValenceList(self.iclist) if log.do_medium: with log.section('FPINIT'): log('Force part: %s' % self.name) log.hline()
def __init__(self, system): ''' Parameters ---------- system An instance of the ``System`` class. ''' ForcePart.__init__(self, 'valence', system) # override self.gpos to the correct size! # natom of COMSystem object will return number of beads # but gpos has to have the size (n_atoms, 3), to be consisten # with the other parts of the force field self.dlist = DeltaList(system) self.iclist = InternalCoordinateList(self.dlist) self.vlist = ValenceList(self.iclist) if log.do_medium: with log.section('FPINIT'): log('Force part: %s' % self.name) log.hline()
def __init__(self, system, nlist, scalings, pair_pot): ''' **Arguments:** system The system to which this pairwise interaction applies. nlist A ``NeighborList`` object. This has to be the same as the one passed to the ForceField object that contains this part. scalings A ``Scalings`` object. This object contains all the information about the energy scaling of pairwise contributions that are involved in covalent interactions. See :class:`yaff.pes.scalings.Scalings` for more details. pair_pot An instance of the ``PairPot`` built-in class from :mod:`yaff.pes.ext`. ''' ForcePart.__init__(self, 'pair_%s' % pair_pot.name, system) self.nlist = nlist self.scalings = scalings self.pair_pot = pair_pot self.nlist.request_rcut(pair_pot.rcut) if log.do_medium: with log.section('FPINIT'): log('Force part: %s' % self.name) log.hline() log(' scalings: %5.3f %5.3f %5.3f' % (scalings.scale1, scalings.scale2, scalings.scale3)) log(' real space cutoff: %s' % log.length(pair_pot.rcut)) tr = pair_pot.get_truncation() if tr is None: log(' truncation: none') else: log(' truncation: %s' % tr.get_log()) self.pair_pot.log() log.hline()
def iter_matches(self, other): """Yield all renumberings of atoms that map the given system on the current. Parameters ---------- other : yaff.System Another system with the same number of atoms (and chemical formula), or less atoms. The graph distance is used to perform the mapping, so bonds must be defined in the current and the given system. """ from molmod.graphs import Graph with log.section('SYS'): log('Generating allowed indexes for renumbering.') # The allowed permutations is just based on the chemical elements, not the atom # types, which could also be useful. allowed = [] if self.ffatypes is None or other.ffatypes is None: for number1 in other.numbers: allowed.append((self.numbers == number1).nonzero()[0]) else: # Only continue if other.ffatypes is a subset of self.ffatypes if not (set(self.ffatypes) >= set(other.ffatypes)): return ffatype_ids0 = self.ffatype_ids ffatypes0 = list(self.ffatypes) order = np.array([ffatypes0.index(ffatype) for ffatype in other.ffatypes]) ffatype_ids1 = order[other.ffatype_ids] for ffatype_id1 in ffatype_ids1: allowed.append((ffatype_ids0 == ffatype_id1).nonzero()[0]) # Use Molmod to construct graph distance matrices. log('Building graph distance matrix for self.') dm0 = Graph(self.bonds).distances log('Building graph distance matrix for other.') dm1 = Graph(other.bonds).distances # Yield the solutions log('Generating renumberings.') for match in iter_matches(dm0, dm1, allowed): yield match
def __init__(self, system, alpha, scalings, dielectric=1.0): ''' **Arguments:** system The system to which this interaction applies. alpha The alpha parameter in the Ewald summation method. scalings A ``Scalings`` object. This object contains all the information about the energy scaling of pairwise contributions that are involved in covalent interactions. See :class:`yaff.pes.scalings.Scalings` for more details. **Optional arguments:** dielectric The scalar relative permittivity of the system. ''' ForcePart.__init__(self, 'ewald_cor', system) if not system.cell.nvec == 3: raise TypeError('The system must have a 3D periodic cell') if system.charges is None: raise ValueError('The system does not have charges.') self.system = system self.alpha = alpha self.dielectric = dielectric self.scalings = scalings if log.do_medium: with log.section('FPINIT'): log('Force part: %s' % self.name) log.hline() log(' alpha: %s' % log.invlength(self.alpha)) log(' relative permittivity %5.3f' % self.dielectric) log(' scalings: %5.3f %5.3f %5.3f' % (scalings.scale1, scalings.scale2, scalings.scale3)) log.hline()
def detect_bonds(self, exceptions=None): """Initialize the ``bonds`` attribute based on inter-atomic distances **Optional argument:** exceptions: Specify custom threshold for certain pairs of elements. This must be a dictionary with ((num0, num1), threshold) as items. For each pair of elements, a distance threshold is used to detect bonded atoms. The distance threshold is based on a database of known bond lengths. If the database does not contain a record for the given element pair, the threshold is based on the sum of covalent radii. """ with log.section('SYS'): from molmod.bonds import bonds if self.bonds is not None: if log.do_warning: log.warn('Overwriting existing bonds.') work = np.zeros((self.natom*(self.natom-1))/2, float) self.cell.compute_distances(work, self.pos) ishort = (work < bonds.max_length*1.01).nonzero()[0] new_bonds = [] for i in ishort: i0, i1 = _unravel_triangular(i) n0 = self.numbers[i0] n1 = self.numbers[i1] if exceptions is not None: threshold = exceptions.get((n0, n1)) if threshold is None and n0!=n1: threshold = exceptions.get((n1, n0)) if threshold is not None: if work[i] < threshold: new_bonds.append([i0, i1]) continue if bonds.bonded(n0, n1, work[i]): new_bonds.append([i0, i1]) self.bonds = np.array(new_bonds) self._init_derived_bonds()
def detect_bonds(self, exceptions=None): """Initialize the ``bonds`` attribute based on inter-atomic distances **Optional argument:** exceptions: Specify custom threshold for certain pairs of elements. This must be a dictionary with ((num0, num1), threshold) as items. For each pair of elements, a distance threshold is used to detect bonded atoms. The distance threshold is based on a database of known bond lengths. If the database does not contain a record for the given element pair, the threshold is based on the sum of covalent radii. """ with log.section('SYS'): from molmod.bonds import bonds if self.bonds is not None: if log.do_warning: log.warn('Overwriting existing bonds.') work = np.zeros((self.natom * (self.natom - 1)) // 2, float) self.cell.compute_distances(work, self.pos) ishort = (work < bonds.max_length * 1.01).nonzero()[0] new_bonds = [] for i in ishort: i0, i1 = _unravel_triangular(i) n0 = self.numbers[i0] n1 = self.numbers[i1] if exceptions is not None: threshold = exceptions.get((n0, n1)) if threshold is None and n0 != n1: threshold = exceptions.get((n1, n0)) if threshold is not None: if work[i] < threshold: new_bonds.append([i0, i1]) continue if bonds.bonded(n0, n1, work[i]): new_bonds.append([i0, i1]) self.bonds = np.array(new_bonds) self._init_derived_bonds()
def estimate_hessian(dof, eps=1e-4): """Estimate the Hessian using the symmetric finite difference approximation. **Arguments:** dof A DOF object **Optional arguments:** eps The magnitude of the displacements """ with log.section('HESS'), timer.section('Hessian'): # Loop over all displacements if log.do_medium: log('The following displacements are computed:') log('DOF Dir Energy') log.hline() x1 = dof.x0.copy() rows = np.zeros((len(x1), len(x1)), float) for i in xrange(len(x1)): x1[i] = dof.x0[i] + eps epot, gradient_p = dof.fun(x1, do_gradient=True) if log.do_medium: log('% 7i pos %s' % (i, log.energy(epot))) x1[i] = dof.x0[i] - eps epot, gradient_m = dof.fun(x1, do_gradient=True) if log.do_medium: log('% 7i neg %s' % (i, log.energy(epot))) rows[i] = (gradient_p - gradient_m) / (2 * eps) x1[i] = dof.x0[i] dof.reset() if log.do_medium: log.hline() # Enforce symmetry and return return 0.5 * (rows + rows.T)
def estimate_hessian(dof, eps=1e-4): """Estimate the Hessian using the symmetric finite difference approximation. **Arguments:** dof A DOF object **Optional arguments:** eps The magnitude of the displacements """ with log.section('HESS'), timer.section('Hessian'): # Loop over all displacements if log.do_medium: log('The following displacements are computed:') log('DOF Dir Energy') log.hline() x1 = dof.x0.copy() rows = np.zeros((len(x1), len(x1)), float) for i in xrange(len(x1)): x1[i] = dof.x0[i] + eps epot, gradient_p = dof.fun(x1, do_gradient=True) if log.do_medium: log('% 7i pos %s' % (i, log.energy(epot))) x1[i] = dof.x0[i] - eps epot, gradient_m = dof.fun(x1, do_gradient=True) if log.do_medium: log('% 7i neg %s' % (i, log.energy(epot))) rows[i] = (gradient_p-gradient_m)/(2*eps) x1[i] = dof.x0[i] dof.reset() if log.do_medium: log.hline() # Enforce symmetry and return return 0.5*(rows + rows.T)
def __init__(self, system, pext): ''' **Arguments:** system An instance of the ``System`` class. pext The external pressure. (Positive will shrink the system.) In case of 2D-PBC, this is the surface tension. In case of 1D, this is the linear strain. This force part is only applicable to systems that are periodic. ''' if system.cell.nvec == 0: raise ValueError('The system must be periodic in order to apply a pressure') ForcePart.__init__(self, 'press', system) self.system = system self.pext = pext if log.do_medium: with log.section('FPINIT'): log('Force part: %s' % self.name) log.hline()
def __init__(self, system, part_pair): ''' **Arguments:** system An instance of the ``System`` class. part_pair An instance of the ``PairPot`` class. This force part is only applicable to systems that are 3D periodic. ''' if system.cell.nvec != 3: raise ValueError('Tail corrections can only be applied to 3D periodic systems') if part_pair.name in ['pair_ei','pair_eidip']: raise ValueError('Tail corrections are divergent for %s'%part_pair.name) super(ForcePartTailCorrection, self).__init__('tailcorr_%s'%(part_pair.name), system) self.ecorr, self.wcorr = part_pair.pair_pot.prepare_tailcorrections(system.natom) self.system = system if log.do_medium: with log.section('FPINIT'): log('Force part: %s' % self.name) log.hline()
def __init__(self, system, alpha, gcut=0.35, dielectric=1.0): ''' **Arguments:** system The system to which this interaction applies. alpha The alpha parameter in the Ewald summation method. **Optional arguments:** gcut The cutoff in reciprocal space. dielectric The scalar relative permittivity of the system. ''' ForcePart.__init__(self, 'ewald_reci', system) if not system.cell.nvec == 3: raise TypeError('The system must have a 3D periodic cell.') if system.charges is None: raise ValueError('The system does not have charges.') self.system = system self.alpha = alpha self.gcut = gcut self.dielectric = dielectric self.update_gmax() self.work = np.empty(system.natom * 2) if log.do_medium: with log.section('FPINIT'): log('Force part: %s' % self.name) log.hline() log(' alpha: %s' % log.invlength(self.alpha)) log(' gcut: %s' % log.invlength(self.gcut)) log(' relative permittivity: %5.3f' % self.dielectric) log.hline()
def __init__(self, system, pext): ''' **Arguments:** system An instance of the ``System`` class. pext The external pressure. (Positive will shrink the system.) In case of 2D-PBC, this is the surface tension. In case of 1D, this is the linear strain. This force part is only applicable to systems that are periodic. ''' if system.cell.nvec == 0: raise ValueError( 'The system must be periodic in order to apply a pressure') ForcePart.__init__(self, 'press', system) self.system = system self.pext = pext if log.do_medium: with log.section('FPINIT'): log('Force part: %s' % self.name) log.hline()
def __init__(self, system, comlist=None): ''' **Arguments:** system An instance of the ``System`` class. **Optional arguments:** comlist An optional layer to derive centers of mass from the atomic positions. These centers of mass are used as input for the first layer, the relative vectors. ''' ForcePart.__init__(self, 'bias', system) self.system = system self.valence = ForcePartValence(system) if comlist is not None: raise NotImplementedError if comlist is not None: self.valence_com = ForcePartValence(system, comlist=comlist) else: self.valence_com = None self.terms = [] # The terms contributing to the bias potential are divided into three # categories: # 0) instances of BiasPotential # 1) instances of ValenceTerm with a regular DeltaList # 2) instances of ValenceTerm with a COMList # The following list facilitates looking up the terms after they have # been added self.term_lookup = [] if log.do_medium: with log.section('FPINIT'): log('Force part: %s' % self.name) log.hline()
def pca_projection(f_target, f, pm, start=0, end=None, step=1, select=None, path='trajectory/pos', mw=True): """ Determines the principal components of an MD simulation **Arguments:** f_target Path to an h5.File instance to which the results are written. f An h5.File instance containing the trajectory data. pm An array containing the principal modes in its columns **Optional arguments:** start The first sample to be considered for analysis. This may be negative to indicate that the analysis should start from the -start last samples. end The last sample to be considered for analysis. This may be negative to indicate that the last -end sample should not be considered. step The spacing between the samples used for the analysis select A list of atom indexes that are considered for the computation of the spectrum. If not given, all atoms are used. path The path of the dataset that contains the time dependent data in the HDF5 file. The first axis of the array must be the time axis. mw If mass_weighted is True, the covariance matrix is mass-weighted. """ # Load in the relevant data q = f[path][start:end:step,:,:] # Select the given atoms if select is not None: q = q[:,select,:] # Reshape such that all Cartesian coordinates are treated equally q = q.reshape(q.shape[0],-1) # If necessary, weight with the mass if mw: # Select the necessary masses masses = f['system/masses'] if select is not None: masses = masses[select] # Repeat d times, with d the dimension masses = np.repeat(masses,3) # Reweight with the masses q *= np.sqrt(masses) # Calculation of the principal components: projection of each q_j on the principal modes with log.section('PCA'): log('Determining principal components') prin_comp = np.dot(q, pm) # Create output HDF5 file with h5.File(f_target, 'a') as g: if not 'pca' in g: pca = g.create_group('pca') else: pca = g['pca'] pca.create_dataset('pc', data=prin_comp)
def g09log_to_hdf5(f, fn_log): """Convert Gaussian09 BOMD log file to Yaff HDF5 format. **Arguments:** f An open and writable HDF5 file. fn_log The name of the Gaussian log file. """ with log.section('G09H5'): if log.do_medium: log('Loading Gaussian 09 file \'%s\' into \'trajectory\' of HDF5 file \'%s\'' % (fn_log, f.filename)) # First make sure the HDF5 file has a system description that is consistent # with the XYZ file. if 'system' not in f: raise ValueError('The HDF5 file must contain a system group.') if 'numbers' not in f['system']: raise ValueError( 'The HDF5 file must have a system group with atomic numbers.') natom = f['system/numbers'].shape[0] # Take care of the trajectory group tgrp = get_trajectory_group(f) # Take care of the pos and vel datasets dss = get_trajectory_datasets( tgrp, ('pos', (natom, 3)), ('vel', (natom, 3)), ('frc', (natom, 3)), ('time', (1, )), ('step', (1, )), ('epot', (1, )), ('ekin', (1, )), ('etot', (1, )), ) ds_pos, ds_vel, ds_frc, ds_time, ds_step, ds_epot, ds_ekin, ds_etot = dss # Load frame by frame row = get_last_trajectory_row(dss) for numbers, pos, vel, frc, time, step, epot, ekin, etot in _iter_frames_g09( fn_log): if (numbers != f['system/numbers']).any(): log.warn( 'The element numbers of the HDF5 and LOG file do not match.' ) write_to_dataset(ds_pos, pos, row) write_to_dataset(ds_vel, vel, row) write_to_dataset(ds_frc, frc, row) write_to_dataset(ds_time, time, row) write_to_dataset(ds_step, step, row) write_to_dataset(ds_epot, epot, row) write_to_dataset(ds_ekin, ekin, row) write_to_dataset(ds_etot, etot, row) row += 1 # Check number of rows check_trajectory_rows(tgrp, dss, row)
def calc_pca(f_target, cov_mat=None, f=None, q_ref=None, start=0, end=None, step=1, select=None, path='trajectory/pos', mw=True, temp=None): """ Performs a principle component analysis of the given trajectory. **Arguments:** f_target Path to an h5.File instance to which the results are written. **Optional arguments:** cov_mat The covariance matrix, if already calculated. If not provided, the covariance matrix will be calculatd based on the file f. f An h5.File instance containing the trajectory data. q_ref Reference vector of the positions. If not provided, the ensemble average is taken. start The first sample to be considered for analysis. This may be negative to indicate that the analysis should start from the -start last samples. end The last sample to be considered for analysis. This may be negative to indicate that the last -end sample should not be considered. step The spacing between the samples used for the analysis select A list of atom indexes that are considered for the computation of the spectrum. If not given, all atoms are used. path The path of the dataset that contains the time dependent data in the HDF5 file. The first axis of the array must be the time axis. mw If mass_weighted is True, the covariance matrix is mass-weighted. temp Temperature at which the simulation is carried out, necessary to determine the frequencies """ if cov_mat is None: if f is None: AssertionError('No covariance matrix nor h5.File instance provided.') else: with log.section('PCA'): log('Calculating covariance matrix') cov_mat, q_ref = calc_cov_mat(f, q_ref, start, end, step, select, path, mw) with log.section('PCA'): log('Diagonalizing the covariance matrix') # Eigenvalue decomposition eigval, eigvec = np.linalg.eigh(cov_mat) # Order the eigenvalues in decreasing order idx = eigval.argsort()[::-1] eigval = eigval[idx] eigvec = eigvec[:,idx] # Create output HDF5 file with h5.File(f_target, 'w') as g: pca = g.create_group('pca') # Output reference structure q_ref pca.create_dataset('q_ref', data=q_ref) # Output covariance matrix pca.create_dataset('cov_matrix', data=cov_mat) # Output eigenvectors in columns pca.create_dataset('pm', data=eigvec) # Output eigenvalues pca.create_dataset('eigvals', data=eigval) log('Determining inverse of the covariance matrix') # Process matrix to determine inverse # First, project out the three zero eigenvalues (translations) eigvec_reduced = eigvec[:,:-3] eigval_reduced = eigval[:-3] # Second, calculate the reduced covariance matrix and its inverse cov_mat_reduced = np.dot(np.dot(eigvec_reduced, np.diag(eigval_reduced)), eigvec_reduced.T) cov_mat_inverse = np.dot(np.dot(eigvec_reduced, np.diag(1/eigval_reduced)), eigvec_reduced.T) pca.create_dataset('cov_mat_red', data=cov_mat_reduced) pca.create_dataset('cov_mat_inv', data=cov_mat_inverse) # Third, if the temperature is specified, calculate the frequencies # (the zero frequencies are mentioned last so that their index corresponds to the principal modes) if temp is not None: log('Determining frequencies') frequencies = np.append(np.sqrt(boltzmann*temp/eigval_reduced)/(2*np.pi), np.repeat(0,3)) pca.create_dataset('freqs', data=frequencies) return eigval, eigvec
def dlpoly_history_to_hdf5(f, fn_history, sub=slice(None), pos_unit=angstrom, vel_unit=angstrom/picosecond, frc_unit=amu*angstrom/picosecond**2, time_unit=picosecond, mass_unit=amu): """Convert DLPolay History trajectory file to Yaff HDF5 format. **Arguments:** f An open and writable HDF5 file. fn_history The filename of the DLPOLY history file. **Optional arguments:** sub The sub argument for the DLPolyHistoryReader. This must be a slice object that defines the subsampling of the samples from the history file. By default all frames are read. pos_unit, vel_unit, frc_unit, time_unit and mass_unit The units used in the dlpoly history file. The default values correspond to the defaults used in DLPOLY. This routine will also test the consistency of the row attribute of the trajectory group. If some trajectory data is already present, it will be replaced by the new data. It is highly recommended to first initialize the HDF5 file with the ``to_hdf5`` method of the System class. """ with log.section('DPH5'): if log.do_medium: log('Loading DLPOLY history file \'%s\' into \'trajectory\' of HDF5 file \'%s\'' % ( fn_history, f.filename )) # Take care of the data group tgrp = get_trajectory_group(f) # Open the history file for reading hist_reader = DLPolyHistoryReader(fn_history, sub, pos_unit, vel_unit, frc_unit, time_unit, mass_unit) # Take care of the datasets that should always be present natom = hist_reader.num_atoms dss = get_trajectory_datasets( tgrp, ('step', (1,)), ('time', (1,)), ('cell', (3,3)), ('pos', (natom, 3)), ) ds_step, ds_time, ds_cell, ds_pos = dss # Take care of optional data sets if hist_reader.keytrj > 0: ds_vel = get_trajectory_datasets(tgrp, ('vel', (natom, 3)))[0] dss.append(ds_vel) if hist_reader.keytrj > 1: ds_frc = get_trajectory_datasets(tgrp, ('frc', (natom, 3)))[0] dss.append(ds_frc) # Decide on the first row to start writing data row = get_last_trajectory_row(dss) # Load data for frame in hist_reader: write_to_dataset(ds_step, frame["step"], row) write_to_dataset(ds_time, frame["time"], row) write_to_dataset(ds_cell, frame["cell"].T, row) write_to_dataset(ds_pos, frame["pos"], row) if hist_reader.keytrj > 0: write_to_dataset(ds_vel, frame["vel"], row) if hist_reader.keytrj > 1: write_to_dataset(ds_frc, frame["frc"], row) row += 1 # Check number of rows check_trajectory_rows(tgrp, dss, row)
def cp2k_ener_to_hdf5(f, fn_ener, sub=slice(None)): """Convert a CP2K energy trajectory file to Yaff HDF5 format. **Arguments:** f An open and writable HDF5 file. fn_ener The filename of the CP2K energy trajectory file. **Optional arguments:** sub This must be a slice object that defines the sub-sampling of the CP2K energy file. By default all time steps are read. This routine will also test the consistency of the row attribute of the trajectory group. If some trajectory data is already present, it will be replaced by the new data. Furthermore, this routine also checks the header of the CP2K energy file to make sure the values are interpreted correctly. It is highly recommended to first initialize the HDF5 file with the ``to_hdf5`` method of the System class. """ with log.section('CP2KEH5'): if log.do_medium: log('Loading CP2K energy file \'%s\' into \'trajectory\' of HDF5 file \'%s\'' % (fn_ener, f.filename)) # Take care of the data group tgrp = get_trajectory_group(f) # Take care of the datasets dss = get_trajectory_datasets( tgrp, ('step', (1, )), ('time', (1, )), ('ekin', (1, )), ('temp', (1, )), ('epot', (1, )), ('econs', (1, )), ) ds_step, ds_time, ds_ke, ds_temp, ds_pe, ds_cq = dss # Fill the datasets with data. row = get_last_trajectory_row(dss) counter = 0 fin = file(fn_ener) # check header line line = fin.next() words = line.split() if words[0] != '#': raise ValueError( 'The first line in the energies file should be a header line starting with #.' ) if words[3] != 'Time[fs]' or words[4] != 'Kin.[a.u.]' or \ words[5] != 'Temp[K]' or words[6] != 'Pot.[a.u.]' or \ words[7] + ' ' + words[8] != 'Cons Qty[a.u.]': raise ValueError( 'The fields in the header line indicate that this file contains unsupported data.' ) # Load lines for line in fin: if slice_match(sub, counter): words = line.split() write_to_dataset(ds_step, float(words[0]), row) write_to_dataset(ds_time, float(words[1]) * femtosecond, row) write_to_dataset(ds_ke, float(words[2]), row) write_to_dataset(ds_temp, float(words[3]), row) write_to_dataset(ds_pe, float(words[4]), row) write_to_dataset(ds_cq, float(words[5]), row) row += 1 counter += 1 fin.close() # Check number of rows check_trajectory_rows(tgrp, dss, row)
def write_lammps_table(ff, fn='lammps.table', rmin=0.50 * angstrom, nrows=2500, unit_style='electron'): ''' Write tables containing noncovalent interactions for LAMMPS. For every pair of ffatypes, a separate table is generated. Because electrostatic interactions require a specific treatment, point- charge electrostatics are NOT included in the tables. When distributed charge distributions (e.g. Gaussian) are used, this complicates matters. LAMMPS will still only treat point-charge electrostatics using a dedicated method (e.g. Ewald or PPPM), so the table has to contain the difference between the distributed charges and the point charge electrostatic interactions. This also means that every ffatype need a unique charge distribution, i.e. all atoms of the same atom type need to have the same charge and Gaussian radius. All pair potentials contributing to the table need to have the same scalings for near-neighbor interactions; this is however independent of the generation of the table and is dealt with elsewhere **Arguments:** ff Yaff ForceField instance **Optional arguments:** fn Filename where tables will be stored ''' # Find out if we are dealing with electrostatics from distributed charges corrections = [] for part in ff.parts: if part.name == 'pair_ei': if np.any(part.pair_pot.radii != 0.0): # Create a ForcePart with electrostatics from distributed # charges, completely in real space. pair_pot_dist = PairPotEI(part.pair_pot.charges, 0.0, part.pair_pot.rcut, tr=part.pair_pot.get_truncation(), dielectric=part.pair_pot.dielectric, radii=part.pair_pot.radii) fp_dist = ForcePartPair(ff.system, ff.nlist, part.scalings, pair_pot_dist) corrections.append((fp_dist, 1.0)) # Create a ForcePart with electrostatics from point # charges, completely in real space. pair_pot_point = PairPotEI(part.pair_pot.charges, 0.0, part.pair_pot.rcut, tr=part.pair_pot.get_truncation(), dielectric=part.pair_pot.dielectric) fp_point = ForcePartPair(ff.system, ff.nlist, part.scalings, pair_pot_point) corrections.append((fp_point, -1.0)) # Find the largest cut-off rmax = 0.0 for part in ff.parts: if part.name.startswith('pair_'): if part.name == 'pair_ei' and len(corrections) == 0: continue rmax = np.amax([rmax, part.pair_pot.rcut]) # Get LAMMPS ffatypes ffatypes, ffatype_ids = get_lammps_ffatypes(ff) # Select atom pairs for each pair of atom types ffa_pairs = [] for i in range(len(ffatypes)): index0 = np.where(ffatype_ids == i)[0][0] for j in range(i, len(ffatypes)): index1 = -1 candidates = np.where(ffatype_ids == j)[0] for cand in candidates: if cand==index0 or cand in ff.system.neighs1[index0] or\ cand in ff.system.neighs2[index0] or cand in ff.system.neighs3[index0] or\ cand in ff.system.neighs4[index0]: continue else: index1 = cand break if index1 == -1: log("ERROR constructing LAMMPS tables: there is no pair of atom types %s-%s which are not near neighbors" % (ffatypes[i], ffatypes[j])) log("Consider using a supercell to fix this problem") raise ValueError ffa_pairs.append([index0, index1]) if log.do_medium: with log.section('LAMMPS'): log("Generating LAMMPS table with covalent interactions") log.hline() log("rmin = %s | rmax = %s" % (log.length(rmin), log.length(rmax))) # We only consider one neighbor interaction ff.compute() ff.nlist.nneigh = 1 # Construct array of evenly spaced values distances = np.linspace(rmin, rmax, nrows) ftab = open(fn, 'w') ftab.write("# LAMMPS tabulated potential generated by Yaff\n") ftab.write("# All quantities in atomic units\n") ftab.write( "# The names of the tables refer to the ffatype_ids that have to be used in the Yaff system\n" ) ftab.write("#%4s %13s %21s %21s\n" % ("i", "d", "V", "F")) # Loop over all atom pairs for index0, index1 in ffa_pairs: energies = [] for d in distances: gposnn = np.zeros(ff.system.pos.shape, float) ff.nlist.neighs[0] = (index0, index1, d, 0.0, 0.0, d, 0, 0, 0) energy = 0.0 for part in ff.parts: if not part.name.startswith('pair'): continue if part.name == 'pair_ei': continue energy += part.compute(gpos=gposnn) for part, sign in corrections: gposcorr = np.zeros(ff.system.pos.shape, float) energy += sign * part.compute(gpos=gposcorr) gposnn[:] += sign * gposcorr row = [d, energy, gposnn[index0, 2]] energies.append(row) energies = np.asarray(energies) ffai = ffatypes[ffatype_ids[index0]] ffaj = ffatypes[ffatype_ids[index1]] if np.all(energies[:, 1] == 0.0): log.warn("Noncovalent energies between atoms %d (%s) and %d (%s) are zero"\ % (index0,ffai,index1,ffaj)) if np.all(energies[:, 2] == 0.0): log.warn("Noncovalent forces between atoms %d (%s) and %d (%s) are zero"\ % (index0,ffai,index1,ffaj)) if ffai > ffaj: name = '%s---%s' % (str(ffai), str(ffaj)) else: name = '%s---%s' % (str(ffaj), str(ffai)) ftab.write("%s\nN %d R %13.8f %13.8f\n\n" % (name, nrows, rmin / lammps_units[unit_style]['distance'], rmax / lammps_units[unit_style]['distance'])) for irow, row in enumerate(energies): ftab.write( "%05d %+13.8f %+21.12f %+21.12f\n" % (irow + 1, row[0] / lammps_units[unit_style]['distance'], row[1] / lammps_units[unit_style]['energy'], row[2] / lammps_units[unit_style]['energy'] * lammps_units[unit_style]['distance'])) if log.do_medium: log("%s done" % name)
def check(self): """Perform a slow internal consistency test. Use this for debugging only. It is assumed that self.rmax is set correctly. """ # 0) Some initial tests assert ( (self.neighs['a'][:self.nneigh] > self.neighs['b'][:self.nneigh]) | (self.neighs['r0'][:self.nneigh] != 0) | (self.neighs['r1'][:self.nneigh] != 0) | (self.neighs['r2'][:self.nneigh] != 0)).all() # A) transform the current nlist into a set actual = self.to_dictionary() # B) Define loops of cell vectors if self.system.cell.nvec == 3: def rloops(): for r2 in xrange(0, self.rmax[2] + 1): if r2 == 0: r1_start = 0 else: r1_start = -self.rmax[1] for r1 in xrange(r1_start, self.rmax[1] + 1): if r2 == 0 and r1 == 0: r0_start = 0 else: r0_start = -self.rmax[0] for r0 in xrange(r0_start, self.rmax[0] + 1): yield r0, r1, r2 elif self.system.cell.nvec == 2: def rloops(): for r1 in xrange(0, self.rmax[1] + 1): if r1 == 0: r0_start = 0 else: r0_start = -self.rmax[0] for r0 in xrange(r0_start, self.rmax[0] + 1): yield r0, r1, 0 elif self.system.cell.nvec == 1: def rloops(): for r0 in xrange(0, self.rmax[0] + 1): yield r0, 0, 0 else: def rloops(): yield 0, 0, 0 # C) Compute the nlists the slow way validation = {} nvec = self.system.cell.nvec for r0, r1, r2 in rloops(): for a in xrange(self.system.natom): for b in xrange(a + 1): if r0 != 0 or r1 != 0 or r2 != 0: signs = [1, -1] elif a > b: signs = [1] else: continue for sign in signs: delta = self.system.pos[b] - self.system.pos[a] self.system.cell.mic(delta) delta *= sign if nvec > 0: self.system.cell.add_vec( delta, np.array([r0, r1, r2])[:nvec]) d = np.linalg.norm(delta) if d < self.rcut + self.skin: if sign == 1: key = a, b, r0, r1, r2 else: key = b, a, r0, r1, r2 value = np.array([d, delta[0], delta[1], delta[2]]) validation[key] = value # D) Compare wrong = False with log.section('NLIST'): for key0, value0 in validation.iteritems(): value1 = actual.pop(key0, None) if value1 is None: log('Missing: ', key0) log(' Validation %s %s %s %s' % (log.length(value0[0]), log.length(value0[1]), log.length(value0[2]), log.length(value0[3]))) wrong = True elif abs(value0 - value1).max() > 1e-10 * log.length.conversion: log('Different:', key0) log(' Actual %s %s %s %s' % (log.length(value1[0]), log.length(value1[1]), log.length(value1[2]), log.length(value1[3]))) log(' Validation %s %s %s %s' % (log.length(value0[0]), log.length(value0[1]), log.length(value0[2]), log.length(value0[3]))) log(' Difference %10.3e %10.3e %10.3e %10.3e' % tuple( (value0 - value1) / log.length.conversion)) log(' AbsMaxDiff %10.3e' % (abs(value0 - value1).max() / log.length.conversion)) wrong = True for key1, value1 in actual.iteritems(): log('Redundant:', key1) log(' Actual %s %s %s %s' % (log.length(value1[0]), log.length(value1[1]), log.length(value1[2]), log.length(value1[3]))) wrong = True assert not wrong
def pca_projection(f_target, f, pm, start=0, end=None, step=1, select=None, path='trajectory/pos', mw=True): """ Determines the principal components of an MD simulation **Arguments:** f_target Path to an h5.File instance to which the results are written. f An h5.File instance containing the trajectory data. pm An array containing the principal modes in its columns **Optional arguments:** start The first sample to be considered for analysis. This may be negative to indicate that the analysis should start from the -start last samples. end The last sample to be considered for analysis. This may be negative to indicate that the last -end sample should not be considered. step The spacing between the samples used for the analysis select A list of atom indexes that are considered for the computation of the spectrum. If not given, all atoms are used. path The path of the dataset that contains the time dependent data in the HDF5 file. The first axis of the array must be the time axis. mw If mass_weighted is True, the covariance matrix is mass-weighted. """ # Load in the relevant data q = f[path][start:end:step, :, :] # Select the given atoms if select is not None: q = q[:, select, :] # Reshape such that all Cartesian coordinates are treated equally q = q.reshape(q.shape[0], -1) # If necessary, weight with the mass if mw: # Select the necessary masses masses = f['system/masses'] if select is not None: masses = masses[select] # Repeat d times, with d the dimension masses = np.repeat(masses, 3) # Reweight with the masses q *= np.sqrt(masses) # Calculation of the principal components: projection of each q_j on the principal modes with log.section('PCA'): log('Determining principal components') prin_comp = np.dot(q, pm) # Create output HDF5 file g = h5.File(f_target, 'a') if not 'pca' in g: pca = g.create_group('pca') else: pca = g['pca'] pca.create_dataset('pc', data=prin_comp) return pca
def write_principal_mode(f, f_pca, index, n_frames=100, select=None, mw=True, scaling=1.): """ Writes out one xyz file per principal mode given in index **Arguments:** f Path to an h5.File instance containing the original data. f_pca Path to an h5.File instance containing the PCA, with reference structure, eigenvalues and principal modes. index An array containing the principal modes which need to be written out. **Optional arguments:** n_frames The number of frames in each xyz file. select A list of atom indexes that are considered for the computation of the spectrum. If not given, all atoms are used. mw If mass_weighted is True, the covariance matrix is assumed to be mass-weighted. scaling Scaling factor applied to the maximum deviation of the principal mode (i.e. the maximum principal component for that mode) """ # Load in the relevant data # Atom numbers, masses and initial frame numbers = f['system/numbers'] masses = f['system/masses'] pos = f['trajectory/pos'] if select is not None: numbers = numbers[select] masses = masses[select] pos = pos[:, select, :] masses = np.repeat(masses, 3) # Data from the PC analysis grp = f_pca['pca'] # The selected principal modes pm = grp['pm'][:, index] # The corresponding eigenvalues eigval = grp['eigvals'][index] # And the principal components pc = grp['pc'][:, index] with log.section('PCA'): for i in xrange(len(index)): log('Writing out principal mode %s' % index[i]) if eigval[i] < 0: Warning('Negative eigenvalue encountered, skipping this entry') break # Determine maximum fluctuation (in units of meter*sqrt(kilogram)) max_fluct = np.max(np.abs(pc[:, i])) # Initialize XYZWriter from molmod package xw = XYZWriter('pm_%s.xyz' % index[i], [pd[number].symbol for number in numbers]) # Determine index in trajectory closest to rest state, and the corresponding positions ind_min = np.argmin(np.abs(pc[:, i])) r_ref = pos[ind_min, :, :] for j in xrange(n_frames): q_var = scaling * pm[:, i] * max_fluct * (2. * j - n_frames) / n_frames if mw: q_var /= np.sqrt(masses) r = r_ref + q_var.reshape(-1, 3) xw.dump('Frame%s' % j, r) del xw
def check(self): """Perform a slow internal consistency test. Use this for debugging only. It is assumed that self.rmax is set correctly. """ # 0) Some initial tests assert ( (self.neighs['a'][:self.nneigh] > self.neighs['b'][:self.nneigh]) | (self.neighs['r0'][:self.nneigh] != 0) | (self.neighs['r1'][:self.nneigh] != 0) | (self.neighs['r2'][:self.nneigh] != 0) ).all() # A) transform the current nlist into a set actual = self.to_dictionary() # B) Define loops of cell vectors if self.system.cell.nvec == 3: def rloops(): for r2 in xrange(0, self.rmax[2]+1): if r2 == 0: r1_start = 0 else: r1_start = -self.rmax[1] for r1 in xrange(r1_start, self.rmax[1]+1): if r2 == 0 and r1 == 0: r0_start = 0 else: r0_start = -self.rmax[0] for r0 in xrange(r0_start, self.rmax[0]+1): yield r0, r1, r2 elif self.system.cell.nvec == 2: def rloops(): for r1 in xrange(0, self.rmax[1]+1): if r1 == 0: r0_start = 0 else: r0_start = -self.rmax[0] for r0 in xrange(r0_start, self.rmax[0]+1): yield r0, r1, 0 elif self.system.cell.nvec == 1: def rloops(): for r0 in xrange(0, self.rmax[0]+1): yield r0, 0, 0 else: def rloops(): yield 0, 0, 0 # C) Compute the nlists the slow way validation = {} nvec = self.system.cell.nvec for r0, r1, r2 in rloops(): for a in xrange(self.system.natom): for b in xrange(a+1): if r0!=0 or r1!=0 or r2!=0: signs = [1, -1] elif a > b: signs = [1] else: continue for sign in signs: delta = self.system.pos[b] - self.system.pos[a] self.system.cell.mic(delta) delta *= sign if nvec > 0: self.system.cell.add_vec(delta, np.array([r0, r1, r2])[:nvec]) d = np.linalg.norm(delta) if d < self.rcut + self.skin: if sign == 1: key = a, b, r0, r1, r2 else: key = b, a, r0, r1, r2 value = np.array([d, delta[0], delta[1], delta[2]]) validation[key] = value # D) Compare wrong = False with log.section('NLIST'): for key0, value0 in validation.iteritems(): value1 = actual.pop(key0, None) if value1 is None: log('Missing: ', key0) log(' Validation %s %s %s %s' % ( log.length(value0[0]), log.length(value0[1]), log.length(value0[2]), log.length(value0[3]) )) wrong = True elif abs(value0 - value1).max() > 1e-10*log.length.conversion: log('Different:', key0) log(' Actual %s %s %s %s' % ( log.length(value1[0]), log.length(value1[1]), log.length(value1[2]), log.length(value1[3]) )) log(' Validation %s %s %s %s' % ( log.length(value0[0]), log.length(value0[1]), log.length(value0[2]), log.length(value0[3]) )) log(' Difference %10.3e %10.3e %10.3e %10.3e' % tuple((value0 - value1)/log.length.conversion) ) log(' AbsMaxDiff %10.3e' % (abs(value0 - value1).max()/log.length.conversion) ) wrong = True for key1, value1 in actual.iteritems(): log('Redundant:', key1) log(' Actual %s %s %s %s' % ( log.length(value1[0]), log.length(value1[1]), log.length(value1[2]), log.length(value1[3]) )) wrong = True assert not wrong
def calc_pca(f_target, cov_mat=None, f=None, q_ref=None, start=0, end=None, step=1, select=None, path='trajectory/pos', mw=True, temp=None): """ Performs a principle component analysis of the given trajectory. **Arguments:** f_target Path to an h5.File instance to which the results are written. **Optional arguments:** cov_mat The covariance matrix, if already calculated. If not provided, the covariance matrix will be calculatd based on the file f. f An h5.File instance containing the trajectory data. q_ref Reference vector of the positions. If not provided, the ensemble average is taken. start The first sample to be considered for analysis. This may be negative to indicate that the analysis should start from the -start last samples. end The last sample to be considered for analysis. This may be negative to indicate that the last -end sample should not be considered. step The spacing between the samples used for the analysis select A list of atom indexes that are considered for the computation of the spectrum. If not given, all atoms are used. path The path of the dataset that contains the time dependent data in the HDF5 file. The first axis of the array must be the time axis. mw If mass_weighted is True, the covariance matrix is mass-weighted. temp Temperature at which the simulation is carried out, necessary to determine the frequencies """ if cov_mat is None: if f is None: AssertionError( 'No covariance matrix nor h5.File instance provided.') else: with log.section('PCA'): log('Calculating covariance matrix') cov_mat, q_ref = calc_cov_mat(f, q_ref, start, end, step, select, path, mw) with log.section('PCA'): log('Diagonalizing the covariance matrix') # Eigenvalue decomposition eigval, eigvec = np.linalg.eigh(cov_mat) # Order the eigenvalues in decreasing order idx = eigval.argsort()[::-1] eigval = eigval[idx] eigvec = eigvec[:, idx] # Create output HDF5 file g = h5.File(f_target, 'w') pca = g.create_group('pca') # Output reference structure q_ref pca.create_dataset('q_ref', data=q_ref) # Output covariance matrix pca.create_dataset('cov_matrix', data=cov_mat) # Output eigenvectors in columns pca.create_dataset('pm', data=eigvec) # Output eigenvalues pca.create_dataset('eigvals', data=eigval) log('Determining inverse of the covariance matrix') # Process matrix to determine inverse # First, project out the three zero eigenvalues (translations) eigvec_reduced = eigvec[:, :-3] eigval_reduced = eigval[:-3] # Second, calculate the reduced covariance matrix and its inverse cov_mat_reduced = np.dot( np.dot(eigvec_reduced, np.diag(eigval_reduced)), eigvec_reduced.T) cov_mat_inverse = np.dot( np.dot(eigvec_reduced, np.diag(1 / eigval_reduced)), eigvec_reduced.T) pca.create_dataset('cov_mat_red', data=cov_mat_reduced) pca.create_dataset('cov_mat_inv', data=cov_mat_inverse) # Third, if the temperature is specified, calculate the frequencies # (the zero frequencies are mentioned last so that their index corresponds to the principal modes) if temp is not None: log('Determining frequencies') frequencies = np.append( np.sqrt(boltzmann * temp / eigval_reduced) / (2 * np.pi), np.repeat(0, 3)) pca.create_dataset('freqs', data=frequencies) return eigval, eigvec
def check_mic(self, system): '''Check if each scale2 and scale3 are uniquely defined. **Arguments:** system An instance of the system class, i.e. the one that is used to create this scaling object. This check is done by constructing for each scaled pair, all possible bond paths between the two atoms. For each path, the bond vectors (after applying the minimum image convention) are added. If for a given pair, these sums of bond vectors differ between all possible paths, the differences are expanded in cell vectors which can be used to construct a proper supercell in which scale2 and scale3 pairs are all uniquely defined. ''' if system.cell.nvec == 0: return troubles = False with log.section('SCALING'): for i0, i1, scale, nbond in self.stab: if nbond == 1: continue all_deltas = [] paths = [] for path in iter_paths(system, i0, i1, nbond): delta_total = 0 for j0 in range(nbond): j1 = j0 + 1 delta = system.pos[path[j0]] - system.pos[path[j1]] system.cell.mic(delta) delta_total += delta all_deltas.append(delta_total) paths.append(path) all_deltas = np.array(all_deltas) if abs(all_deltas.mean(axis=0) - all_deltas).max() > 1e-10: troubles = True if log.do_warning: log.warn('Troublesome pair scaling detected.') log('The following bond paths connect the same pair of ' 'atoms, yet the relative vectors are different.') for ipath in range(len(paths)): log('%2i %27s %10s %10s %10s' % ( ipath, ','.join(str(index) for index in paths[ipath]), log.length(all_deltas[ipath, 0]), log.length(all_deltas[ipath, 1]), log.length(all_deltas[ipath, 2]), )) log('Differences between relative vectors in fractional ' 'coordinates:') for ipath0 in range(1, len(paths)): for ipath1 in range(ipath0): diff = all_deltas[ipath0] - all_deltas[ipath1] diff_frac = np.dot(system.cell.gvecs, diff) log('%2i %2i %10.4f %10.4f %10.4f' % (ipath0, ipath1, diff_frac[0], diff_frac[1], diff_frac[2])) log.blank() if troubles: raise AssertionError( 'Due to the small spacing between some crystal planes, the scaling of non-bonding interactions will not work properly. Use a supercell to avoid this problem.' )
def xyz_to_hdf5(f, fn_xyz, sub=slice(None), file_unit=angstrom, name='pos'): """Convert XYZ trajectory file to Yaff HDF5 format. **Arguments:** f An open and writable HDF5 file. fn_xyz The filename of the XYZ trajectory file. **Optional arguments:** sub The sub argument for the XYZReader. This must be a slice object that defines the subsampling of the XYZ file reader. By default all frames are read. file_unit The unit of the data in the XYZ file. [default=angstrom] name The name of the HDF5 dataset where the trajectory is stored. This array is stored in the 'trajectory' group. This routine will also test the consistency of the row attribute of the trajectory group. If some trajectory data is already present, it will be replaced by the new data. It is highly recommended to first initialize the HDF5 file with the ``to_hdf5`` method of the System class. """ with log.section('XYZH5'): if log.do_medium: log('Loading XYZ file \'%s\' into \'trajectory/%s\' of HDF5 file \'%s\'' % (fn_xyz, name, f.filename)) # First make sure the HDF5 file has a system description that is consistent # with the XYZ file. if 'system' not in f: raise ValueError('The HDF5 file must contain a system group.') if 'numbers' not in f['system']: raise ValueError( 'The HDF5 file must have a system group with atomic numbers.') xyz_reader = XYZReader(fn_xyz, sub=sub, file_unit=file_unit) if len(xyz_reader.numbers) != len(f['system/numbers']): raise ValueError( 'The number of atoms in the HDF5 and the XYZ files does not match.' ) if (xyz_reader.numbers != f['system/numbers']).any(): log.warn( 'The atomic numbers of the HDF5 and XYZ file do not match.') # Take care of the trajectory group tgrp = get_trajectory_group(f) # Take care of the dataset ds, = get_trajectory_datasets(tgrp, (name, (len(xyz_reader.numbers), 3))) # Fill the dataset with data. row = get_last_trajectory_row([ds]) for title, coordinates in xyz_reader: write_to_dataset(ds, coordinates, row) row += 1 # Check number of rows check_trajectory_rows(tgrp, [ds], row)
def dlpoly_history_to_hdf5(f, fn_history, sub=slice(None), pos_unit=angstrom, vel_unit=angstrom / picosecond, frc_unit=amu * angstrom / picosecond**2, time_unit=picosecond, mass_unit=amu): """Convert DLPolay History trajectory file to Yaff HDF5 format. **Arguments:** f An open and writable HDF5 file. fn_history The filename of the DLPOLY history file. **Optional arguments:** sub The sub argument for the DLPolyHistoryReader. This must be a slice object that defines the subsampling of the samples from the history file. By default all frames are read. pos_unit, vel_unit, frc_unit, time_unit and mass_unit The units used in the dlpoly history file. The default values correspond to the defaults used in DLPOLY. This routine will also test the consistency of the row attribute of the trajectory group. If some trajectory data is already present, it will be replaced by the new data. It is highly recommended to first initialize the HDF5 file with the ``to_hdf5`` method of the System class. """ with log.section('DPH5'): if log.do_medium: log('Loading DLPOLY history file \'%s\' into \'trajectory\' of HDF5 file \'%s\'' % (fn_history, f.filename)) # Take care of the data group tgrp = get_trajectory_group(f) # Open the history file for reading hist_reader = DLPolyHistoryReader(fn_history, sub, pos_unit, vel_unit, frc_unit, time_unit, mass_unit) # Take care of the datasets that should always be present natom = hist_reader.num_atoms dss = get_trajectory_datasets( tgrp, ('step', (1, )), ('time', (1, )), ('cell', (3, 3)), ('pos', (natom, 3)), ) ds_step, ds_time, ds_cell, ds_pos = dss # Take care of optional data sets if hist_reader.keytrj > 0: ds_vel = get_trajectory_datasets(tgrp, ('vel', (natom, 3)))[0] dss.append(ds_vel) if hist_reader.keytrj > 1: ds_frc = get_trajectory_datasets(tgrp, ('frc', (natom, 3)))[0] dss.append(ds_frc) # Decide on the first row to start writing data row = get_last_trajectory_row(dss) # Load data for frame in hist_reader: write_to_dataset(ds_step, frame["step"], row) write_to_dataset(ds_time, frame["time"], row) write_to_dataset(ds_cell, frame["cell"].T, row) write_to_dataset(ds_pos, frame["pos"], row) if hist_reader.keytrj > 0: write_to_dataset(ds_vel, frame["vel"], row) if hist_reader.keytrj > 1: write_to_dataset(ds_frc, frame["frc"], row) row += 1 # Check number of rows check_trajectory_rows(tgrp, dss, row)
def write_principal_mode(f, f_pca, index, n_frames=100, select=None, mw=True, scaling=1.): """ Writes out one xyz file per principal mode given in index **Arguments:** f Path to an h5.File instance containing the original data. f_pca Path to an h5.File instance containing the PCA, with reference structure, eigenvalues and principal modes. index An array containing the principal modes which need to be written out. **Optional arguments:** n_frames The number of frames in each xyz file. select A list of atom indexes that are considered for the computation of the spectrum. If not given, all atoms are used. mw If mass_weighted is True, the covariance matrix is assumed to be mass-weighted. scaling Scaling factor applied to the maximum deviation of the principal mode (i.e. the maximum principal component for that mode) """ # Load in the relevant data # Atom numbers, masses and initial frame numbers = f['system/numbers'] masses = f['system/masses'] pos = f['trajectory/pos'] if select is not None: numbers = numbers[select] masses = masses[select] pos = pos[:,select,:] masses = np.repeat(masses,3) # Data from the PC analysis grp = f_pca['pca'] # The selected principal modes pm = grp['pm'][:,index] # The corresponding eigenvalues eigval = grp['eigvals'][index] # And the principal components pc = grp['pc'][:,index] with log.section('PCA'): for i in range(len(index)): log('Writing out principal mode %s' %index[i]) if eigval[i] < 0: Warning('Negative eigenvalue encountered, skipping this entry') break # Determine maximum fluctuation (in units of meter*sqrt(kilogram)) max_fluct = np.max(np.abs(pc[:,i])) # Initialize XYZWriter from molmod package xw = XYZWriter('pm_%s.xyz' %index[i], [pd[number].symbol for number in numbers]) # Determine index in trajectory closest to rest state, and the corresponding positions ind_min = np.argmin(np.abs(pc[:,i])) r_ref = pos[ind_min,:,:] for j in range(n_frames): q_var = scaling*pm[:,i]*max_fluct*(2.*j-n_frames)/n_frames if mw: q_var /= np.sqrt(masses) r = r_ref + q_var.reshape(-1,3) xw.dump('Frame%s' %j, r) del xw
def pca_convergence(f, eq_time=0 * picosecond, n_parts=None, step=1, fn='PCA_convergence', n_bootstrap=50, mw=True): """ Calculates the convergence of the simulation by calculating the pca similarity for different subsets of the simulation. **Arguments:** f An h5.File instance containing the trajectory data. **Optional arguments:** eq_time Equilibration time, discarded from the simulation. n_parts Array containing the number of parts in which the total simulation is divided. step Stepsize used in the trajectory. fn Filename containing the convergence plot. n_bootstrap The number of bootstrapped trajectories. mw If mass_weighted is True, the covariance matrix is mass-weighted. """ # Configure n_parts, the array containing the number of parts in which the total simulation is divided if n_parts is None: n_parts = np.array([1, 3, 10, 30, 100, 300]) # Read in the timestep and the number of atoms time = f['trajectory/time'] timestep = time[1] - time[0] time_length = len(time) # Determine the equilibration size eq_size = int(eq_time / timestep) ### ---PART A: SIMILARITY OF THE TRUE TRAJECTORY--- ### # Calculate the covariance matrix of the whole production run as golden standard covar_total, q_ref = calc_cov_mat(f, start=eq_size, step=step, mw=mw) # Initialize the average similarity vector of the divided trajectories sim_block = np.zeros(len(n_parts)) # Calculate this average similarity vector for j in xrange(len(n_parts)): # Determine in how many parts the trajectory should be divided and the corresponding block size n_part = n_parts[j] block_size = (time_length - eq_size) / n_part # Calculate the n_part covariance matrices and compare with the total covariance matrix tot_sim_block = 0 for i in xrange(n_part): start = eq_size + i * block_size covars, tmp = calc_cov_mat(f, start=start, end=start + block_size + 1, step=step, mw=mw) tot_sim_block += pca_similarity(covars, covar_total) # Determine the average similarity sim_block[j] = tot_sim_block / n_part ### ---PART B: SIMILARITY OF BOOTSTRAPPED TRAJECTORIES --- ### # Read in the positions, which will be used to generate bootstrapped trajectories pos = f['trajectory/pos'][eq_size:, :, :] pos = pos.reshape(pos.shape[0], -1) if mw: # Read in the masses of the atoms, and replicate them d times (d=dimension) masses = f['system/masses'] masses = np.repeat(masses, 3) # Create the mass-weighted positions matrix, on which the bootstrapping will be based pos *= np.sqrt(masses) # Initialize the vector containing the average similarity over all the bootstrapped, divided trajectories sim_bt_all = np.zeros(len(n_parts)) for k in xrange(n_bootstrap): with log.section('PCA'): log('Processing %s of %s bootstrapped trajectories' % (k + 1, n_bootstrap)) # Create a bootstrapped trajectory bt pos_bt = np.zeros(pos.shape) random_time = random.random(time_length) * time_length for h in np.arange(time_length): pos_bt[h, :] = pos[random_time[h], :] # Covariance matrix of the total bootstrapped trajectory covar_bt_total, tmp = calc_cov_mat_internal(pos_bt) # Initialize the vector containing the average similarity over the different blocks, # for the given bootstrapped trajectory sim_bt = np.zeros(len(n_parts)) for j in xrange(len(n_parts)): # Calculate the number of blocks, as well as the block size n_part = n_parts[j] block_size = (len(time) - eq_size) / n_part tot_sim_bt = 0 # Calculate the total similarity of this number of blocks, for this bootstrapped trajectory for i in xrange(n_part): start = eq_size + i * block_size pos_bt_block = pos_bt[start:start + block_size:step] covars_bt, tmp = calc_cov_mat_internal(pos_bt_block) tot_sim_bt += pca_similarity(covars_bt, covar_bt_total) # Calculate the average similarity for this number of blocks, for this bootstrapped trajectory sim_bt[j] = tot_sim_bt / n_part sim_bt_all += sim_bt # Calculate the average similarity over all bootstrapped trajectories sim_bt_all /= n_bootstrap ### ---PART C: PROCESSING THE RESULTS --- ### pt.clf() pt.semilogx((time[-1] - time[0]) / n_parts / picosecond, sim_block / sim_bt_all, 'r-') pt.semilogx((time[-1] - time[0]) / n_parts / picosecond, sim_block / sim_bt_all, 'rs') pt.xlabel('Block size [ps]') pt.ylabel('PCA similarity (1=perfectly similar)') pt.title('Convergence assessment via PCA: ' + fn) pt.ylim([0, 1]) pt.savefig(fn + '.png') pt.savefig(fn + '.pdf', format='pdf') return sim_block / sim_bt_all
def pca_convergence(f, eq_time=0*picosecond, n_parts=None, step=1, fn='PCA_convergence', n_bootstrap=50, mw=True): """ Calculates the convergence of the simulation by calculating the pca similarity for different subsets of the simulation. **Arguments:** f An h5.File instance containing the trajectory data. **Optional arguments:** eq_time Equilibration time, discarded from the simulation. n_parts Array containing the number of parts in which the total simulation is divided. step Stepsize used in the trajectory. fn Filename containing the convergence plot. n_bootstrap The number of bootstrapped trajectories. mw If mass_weighted is True, the covariance matrix is mass-weighted. """ # Configure n_parts, the array containing the number of parts in which the total simulation is divided if n_parts is None: n_parts = np.array([1,3,10,30,100,300]) # Read in the timestep and the number of atoms time = f['trajectory/time'] timestep = time[1] - time[0] time_length = len(time) # Determine the equilibration size eq_size = int(eq_time/timestep) ### ---PART A: SIMILARITY OF THE TRUE TRAJECTORY--- ### # Calculate the covariance matrix of the whole production run as golden standard covar_total, q_ref = calc_cov_mat(f, start=eq_size, step=step, mw=mw) # Initialize the average similarity vector of the divided trajectories sim_block = np.zeros(len(n_parts)) # Calculate this average similarity vector for j in range(len(n_parts)): # Determine in how many parts the trajectory should be divided and the corresponding block size n_part = n_parts[j] block_size = (time_length-eq_size)//n_part # Calculate the n_part covariance matrices and compare with the total covariance matrix tot_sim_block=0 for i in range(n_part): start = eq_size + i*block_size covars, tmp = calc_cov_mat(f, start=start, end=start+block_size+1, step=step, mw=mw) tot_sim_block += pca_similarity(covars, covar_total) # Determine the average similarity sim_block[j] = tot_sim_block/n_part ### ---PART B: SIMILARITY OF BOOTSTRAPPED TRAJECTORIES --- ### # Read in the positions, which will be used to generate bootstrapped trajectories pos = f['trajectory/pos'][eq_size:,:,:] pos = pos.reshape(pos.shape[0], -1) if mw: # Read in the masses of the atoms, and replicate them d times (d=dimension) masses = f['system/masses'] masses = np.repeat(masses,3) # Create the mass-weighted positions matrix, on which the bootstrapping will be based pos *= np.sqrt(masses) # Initialize the vector containing the average similarity over all the bootstrapped, divided trajectories sim_bt_all = np.zeros(len(n_parts)) for k in range(n_bootstrap): with log.section('PCA'): log('Processing %s of %s bootstrapped trajectories' %(k+1,n_bootstrap)) # Create a bootstrapped trajectory bt pos_bt = np.zeros(pos.shape) random_time = random.random(time_length)*time_length for h in np.arange(time_length): pos_bt[h,:] = pos[random_time[h],:] # Covariance matrix of the total bootstrapped trajectory covar_bt_total, tmp = calc_cov_mat_internal(pos_bt) # Initialize the vector containing the average similarity over the different blocks, # for the given bootstrapped trajectory sim_bt = np.zeros(len(n_parts)) for j in range(len(n_parts)): # Calculate the number of blocks, as well as the block size n_part = n_parts[j] block_size = (len(time)-eq_size)//n_part tot_sim_bt = 0 # Calculate the total similarity of this number of blocks, for this bootstrapped trajectory for i in range(n_part): start = eq_size + i*block_size pos_bt_block = pos_bt[start:start+block_size:step] covars_bt, tmp = calc_cov_mat_internal(pos_bt_block) tot_sim_bt += pca_similarity(covars_bt, covar_bt_total) # Calculate the average similarity for this number of blocks, for this bootstrapped trajectory sim_bt[j] = tot_sim_bt/n_part sim_bt_all += sim_bt # Calculate the average similarity over all bootstrapped trajectories sim_bt_all /= n_bootstrap ### ---PART C: PROCESSING THE RESULTS --- ### pt.clf() pt.semilogx((time[-1]-time[0])/n_parts/picosecond, sim_block/sim_bt_all, 'r-') pt.semilogx((time[-1]-time[0])/n_parts/picosecond, sim_block/sim_bt_all, 'rs') pt.xlabel('Block size [ps]') pt.ylabel('PCA similarity (1=perfectly similar)') pt.title('Convergence assessment via PCA: ' + fn) pt.ylim([0,1]) pt.savefig(fn+'.png') pt.savefig(fn+'.pdf', format='pdf') return sim_block/sim_bt_all
def __init__(self, numbers, pos, scopes=None, scope_ids=None, ffatypes=None, ffatype_ids=None, bonds=None, rvecs=None, charges=None, radii=None, valence_charges=None, dipoles=None, radii2=None, masses=None): r'''Initialize a System object. **Arguments:** numbers A numpy array with atomic numbers pos A numpy array (N,3) with atomic coordinates in Bohr. **Optional arguments:** scopes A list with scope names scope_ids A list of scope indexes that links each atom with an element of the scopes list. If this argument is not present, while scopes is given, it is assumed that scopes contains a scope name for every atom, i.e. that it is a list with length natom. In that case, it will be converted automatically to a scopes list with only unique name together with a corresponding scope_ids array. ffatypes A list of labels of the force field atom types. ffatype_ids A list of atom type indexes that links each atom with an element of the list ffatypes. If this argument is not present, while ffatypes is given, it is assumed that ffatypes contains an atom type for every element, i.e. that it is a list with length natom. In that case, it will be converted automatically to a short ffatypes list with only unique elements (within each scope) together with a corresponding ffatype_ids array. bonds a numpy array (B,2) with atom indexes (counting starts from zero) to define the chemical bonds. rvecs An array whose rows are the unit cell vectors. At most three rows are allowed, each containing three Cartesian coordinates. charges An array of atomic charges radii An array of atomic radii, :math:`R_{A,c}`, that determine shape of the atomic charge distribution: .. math:: \rho_{A,c}(\mathbf{r}) = \frac{q_A}{\pi^{3/2}R_{A,c}^3} \exp\left( -\frac{|r - \mathbf{R}_A|^2}{R_{A,c}^2} \right) valence_charges In case a point-core + distribute valence charge is used, this vector contains the valence charges. The core charges can be computed by subtracting the valence charges from the net charges. dipoles An array of atomic dipoles radii2 An array of atomic radii, :math:`R_{A,d}`, that determine shape of the atomic dipole distribution: .. math:: \rho_{A,d}(\mathbf{r}) = -2\frac{\mathbf{d}_A \cdot (\mathbf{r} - \mathbf{R}_A)}{ \sqrt{\pi} R_{A,d}^5 }\exp\left( -\frac{|r - \mathbf{R}_A|^2}{R_{A,d}^2} \right) masses The atomic masses (in atomic units, i.e. m_e) Several attributes are derived from the (optional) arguments: * ``cell`` contains the rvecs attribute and is an instance of the ``Cell`` class. * ``neighs1``, ``neighs2`` and ``neighs3`` are dictionaries derived from ``bonds`` that contain atoms that are separated 1, 2 and 3 bonds from a given atom, respectively. This means that i in system.neighs3[j] is ``True`` if there are three bonds between atoms i and j. ''' if len(numbers.shape) != 1: raise ValueError( 'Argument numbers must be a one-dimensional array.') if pos.shape != (len(numbers), 3): raise ValueError( 'The pos array must have Nx3 rows. Mismatch with numbers argument with shape (N,).' ) self.numbers = numbers self.pos = pos self.ffatypes = ffatypes self.ffatype_ids = ffatype_ids self.scopes = scopes self.scope_ids = scope_ids self.bonds = bonds self.cell = Cell(rvecs) self.charges = charges self.radii = radii self.valence_charges = valence_charges self.dipoles = dipoles self.radii2 = radii2 self.masses = masses with log.section('SYS'): # report some stuff self._init_log() # compute some derived attributes self._init_derived()