def select(self, *args): if len(args) == 0: return np.full(self.numbers.shape[0], False) elif 'all' in args: return np.full(self.numbers.shape[0], True) else: return np.stack([ self.numbers == a for b in iterable(args) for a in iterable(b) ]).any(axis=0)
def add_descriptors(self, descriptors, stage=True, dont_save_grads=True): self.descriptors = [d for d in self.descriptors] + \ [d for d in iterable(descriptors)] names = [d.name for d in self.descriptors] if len(set(names)) != len(self.descriptors): raise RuntimeError( f'two or more descriptors have the same names: {names}') if stage: self.stage(iterable(descriptors), dont_save_grads=dont_save_grads)
def __init__(self, lmax, nmax, radial, numbers, radii=1., flatten=True, normalize=True): super().__init__() self.ylm = Ylm(lmax) self.nmax = nmax self.radial = radial if type(radii) == float: self.radii = UniformRadii(radii) elif type(radii) == dict: self.radii = RadiiFromDict(radii) else: self.radii = radii self.numbers = sorted(iterable(numbers)) self.species = len(self.numbers) one = torch.ones(lmax + 1, lmax + 1) self.Yr = 2 * torch.torch.tril(one) - torch.eye(lmax + 1) self.Yi = 2 * torch.torch.triu(one, diagonal=1) a = torch.tensor([[ 1. / ((2 * l + 1) * 2**(2 * n + l) * fac(n) * fac(n + l)) for l in range(lmax + 1) ] for n in range(nmax + 1)]) self.nnl = (a[None] * a[:, None]).sqrt() self.dim = self.species**2 * (nmax + 1)**2 * (lmax + 1) self.shape = (self.species, self.species, nmax + 1, nmax + 1, lmax + 1) if flatten: self.shape = (self.dim, ) self.flatten = flatten self.normalize = normalize self.params = []
def displacements(self, select='all', deltas=None, srange=None, sample_size=100, corr=None, stats=None): I = self.select(select) s = Sampler(*srange) if srange else Sampler(self.start, self.stop) if deltas is None: deltas = get_exponential_deltas(s.start, s.stop) if corr is None: corr = correlator if stats is None: stats = mean_var data = [[ stats(data) for data in zip(*[ iterable(corr(*self.get_rand_pair(s, delta), I)) for _ in range(sample_size) ]) ] for delta in deltas] results = [ list(zip(*[dat[j] for dat in data])) for j in range(len(data[0])) ] return deltas, results
def __init__(self, terms=None, **kwargs): Calculator.__init__(self, **kwargs) if self.parameters.rc is None: raise NotImplementedError('pass a cutoff!') self.terms = iterable(terms) self.params = [par for term in self.terms for par in term.parameters()] self.as_tensors_with_grads = False
def append(self, others): if id(self) == id(others): _others = others.X[:] else: _others = iterable(others, ignore=TorchAtoms) for atoms in _others: assert atoms.__class__ == TorchAtoms self.X += [atoms]
def append(self, others, detach=False): if id(self) == id(others): _others = others.X[:] else: _others = iterable(others) for loc in _others: assert loc.__class__ == Local self.X += [loc.detach() if detach else loc]
def forward(self, atoms_or_loc, forces=False, enable_grad=True): with torch.set_grad_enabled(enable_grad): if forces: f = 0 e = 0 for loc in iterable(atoms_or_loc): _e = self.calculate(loc, forces=forces) if forces: _e, _f = _e f = f + _f e = e + _e if forces: return e, f else: return e
def diatomic(numbers, distances, pbc=False, cell=None): from theforce.util.util import iterable from itertools import combinations if not hasattr(numbers[0], '__iter__'): nums = ([(a, b) for a, b in combinations(set(numbers), 2)] + [(a, a) for a in set(numbers)]) else: nums = numbers X = [ TorchAtoms(positions=[[0., 0., 0.], [d, 0., 0.]], numbers=n, cell=cell, pbc=pbc) for n in nums for d in iterable(distances) ] if len(X) > 1: return AtomsData(X=X) else: return X[0]
def __init__(self, lmax, nmax, radial, numbers, atomic_unit=None, flatten=True): super().__init__() self.ylm = Ylm(lmax) self.nmax = nmax self._radial = radial if atomic_unit: self.unit = atomic_unit else: self.unit = radial.rc / 3 self.radial = Exp(-0.5 * I()**2 / self.unit**2) * radial self.numbers = sorted(iterable(numbers)) self.species = len(self.numbers) one = torch.ones(lmax + 1, lmax + 1) self.Yr = 2 * torch.torch.tril(one) - torch.eye(lmax + 1) self.Yi = 2 * torch.torch.triu(one, diagonal=1) a = torch.tensor([[ 1. / ((2 * l + 1) * 2**(2 * n + l) * fac(n) * fac(n + l)) for l in range(lmax + 1) ] for n in range(nmax + 1)]) self.nnl = (a[None] * a[:, None]).sqrt() self.dim = self.species**2 * (nmax + 1)**2 * (lmax + 1) self.shape = (self.species, self.species, nmax + 1, nmax + 1, lmax + 1) if flatten: self.shape = (self.dim, ) self.params = [] self._state = 'atomic_unit={}, flatten={}'.format(self.unit, flatten)
def includes_species(self, species): return any([a in iterable(species) for a in self.numbers_set])
def lce_filter(self, locs): if self.restrict is None: return locs else: return [loc for loc in locs if loc.number in iterable(self.restrict)]
def __init__(self, soaps): super().__init__() self.soaps = iterable(soaps) self.params = [par for soap in self.soaps for par in soap.params]
def potential_energy_surface(data=None, inducing=None, train=0, caching=False, append_log=True, **kwargs): from theforce.descriptor.atoms import AtomsData, LocalsData, sample_atoms from theforce.regression.gppotential import PosteriorPotential from theforce.regression.gppotential import train_gpp from theforce.util.util import iterable # get params params = get_params(**kwargs) log = open(params['path_log'], 'a' if append_log else 'w') log.write('{} threads: {}\n'.format(37 * '*', torch.get_num_threads())) # data if data is None: data = sample_atoms(params['path_data'], size=params['ndata'], chp=params['path_data_chp']) data.update(cutoff=params['cutoff']) natoms = sum([len(atoms) for atoms in data]) log.write( 'cutoff: {}\npath_data: {}\nndata: {} (={} locals)\npath_data_chp: {}\n' .format(params['cutoff'], params['path_data'], params['ndata'], natoms, params['path_data_chp'])) else: # if data is given as kwarg, it already should be cutoff-ed. assert len(data[-1]) == data[-1].natoms natoms = sum([len(atoms) for atoms in data]) log.write('ndata: {} (={} locals) (kwarg)\n'.format(len(data), natoms)) # inducing if inducing is None: if params['nlocals'] == -1: inducing = data.to_locals() log.write('nlocals: {} (=-1)\n'.format(len(inducing))) else: inducing = data.sample_locals(params['nlocals']) log.write('nlocals: {}\n'.format(params['nlocals'])) else: log.write('nlocals: {} (kwarg)\n'.format(len(inducing))) if params['path_inducing_chp'] is not None: inducing.to_traj(params['path_inducing_chp']) log.write('path_inducing_chp: {}\n'.format( params['path_inducing_chp'])) # numbers if params['numbers'] is None: params['numbers'] = data.numbers_set() log.write('numbers: {}\n'.format(params['numbers'])) log.close() # kernel gp = get_kernel(params) # train data.update(descriptors=gp.kern.kernels) inducing.stage(gp.kern.kernels) inducing.trainable = False # TODO: this should be set inside Locals state = 0 for steps in iterable(train): train_gpp(gp, data, inducing=inducing, steps=steps, logprob_loss=True, cov_loss=False) # save gp state += steps if state > 0 and params['path_gp_chp']: gp.to_file(params['path_gp_chp'], flag='state: {}'.format(state)) with open('log.txt', 'a') as log: log.write('path_gp_chp: {} (write, state={})\n'.format( params['path_gp_chp'], state)) # save inducing if inducing.trainable: raise NotImplementedError( 'trainable inducing is not implemented yet!') if state > 0: with open('log.txt', 'a') as log: log.write('\ntrained for {} steps\n'.format(state)) # create Posterior Potential V = PosteriorPotential(gp, data, inducing=inducing, use_caching=caching) # test if params['test']: data.set_per_atoms('predicted_energy', V(data, 'energy')) data.set_per_atom('predicted_forces', V(data, 'forces')) var_e = data.target_energy.var() var_ee = (data.cat('predicted_energy') - data.target_energy).var() R2_e = 1 - var_ee / var_e var_f = data.target_forces.var() var_ff = (data.cat('predicted_forces') - data.target_forces).var() R2_f = 1 - var_ff / var_f with open('log.txt', 'a') as log: log.write('\ntesting the model on the same data that created it:') log.write('\nenergy R2 score={}'.format(R2_e)) log.write('\nforces R2 score={}\n'.format(R2_f)) print('testing the model on the same data that created it:') print('energy R2 score={}'.format(R2_e)) print('forces R2 score={}'.format(R2_f)) return V
def mask_values(arr, vals): return np.stack([arr == v for v in iterable(vals)]).any(axis=0)
def stage(self, descriptors, dont_save_grads=True): for loc in self: loc.stage(iterable(descriptors), dont_save_grads=dont_save_grads)
def mlmd(ini_atoms, cutoff, au, dt, tolerance=0.1, pair=True, soap=True, ndata=10, max_steps=100, itrain=10 * [5], retrain=5 * [5], retrain_every=100, pes=potential_energy_surface): """ ML-assisted-MD: a calculator must be attached to ini_atoms. Rules of thumb: Initial training (itrain) is crucial for correct approximation of variances. Hyper-parameters are sensitive to nlocals=len(inducing) thus if you don't want to retrain gp every time the data is updated, at least keep nlocals fixed. """ dftcalc = ini_atoms.get_calculator() # run a short MD to gather some (dft) data atoms = TorchAtoms(ase_atoms=ini_atoms.copy()) atoms.set_velocities(ini_atoms.get_velocities()) atoms.set_calculator(dftcalc) dyn = VelocityVerlet(atoms, dt=dt, trajectory='md.traj', logfile='md.log') md_step = ndata dyn.run(md_step) ndft = md_step # train a potential data = AtomsData(traj='md.traj', cutoff=cutoff) inducing = data.to_locals() V = pes(data=data, inducing=inducing, cutoff=cutoff, atomic_unit=au, pairkernel=pair, soapkernel=soap, train=itrain, test=True, caching=True) atoms.update(cutoff=cutoff, descriptors=V.gp.kern.kernels) mlcalc = PosteriorVarianceCalculator(V) atoms.set_calculator(mlcalc) # long MD while md_step < max_steps: md_step += 1 forces = atoms.get_forces() var = atoms.calc.results['forces_var'] tol = np.sqrt(var.max(axis=1)) if (tol > tolerance).any(): _forces = forces _var = var # new dft calculation ndft += 1 print( '|............... new dft calculation (total={})'.format(ndft)) tmp = atoms.copy() tmp.set_calculator(dftcalc) true_forces = tmp.get_forces() # add new information to data new_data = AtomsData(X=[TorchAtoms(ase_atoms=tmp)]) new_data.update(cutoff=cutoff, descriptors=atoms.calc.potential.gp.kern.kernels) new_locals = new_data.to_locals() new_locals.stage(descriptors=atoms.calc.potential.gp.kern.kernels) data += new_data inducing += new_locals # TODO: importance sampling # remove old(est) information del data.X[0] del inducing.X[:len(new_locals)] # TODO: importance sampling # retrain if ndft % retrain_every == 0: print('|............... : retraining for {} steps'.format( retrain)) for steps in iterable(retrain): atoms.calc.potential.train(data, inducing=inducing, steps=steps, cov_loss=False) atoms.calc.potential.gp.to_file( 'gp.chp', flag='ndft={}'.format(ndft)) # update model print('|............... new regression') atoms.calc.potential.set_data(data, inducing, use_caching=True) # new forces atoms.calc.results.clear() forces = atoms.get_forces() var = atoms.calc.results['forces_var'] # report _err_pred = np.sqrt(_var).max() _err = np.abs(_forces - true_forces).max() err_pred = np.sqrt(var).max() err = np.abs(forces - true_forces).max() print('|............... : old max-error: predicted={}, true={}'. format(_err_pred, _err)) print('|............... : new max-error: predicted={}, true={}'. format(err_pred, err)) arrays = np.concatenate([true_forces, _forces, forces, _var, var], axis=1) with open('forces_var.txt', 'ab') as report: np.savetxt(report, arrays) print(md_step, '_') dyn.run(1) print('finished {} steps, used dftcalc only {} times'.format( md_step, ndft))
def stage(self, descriptors=None, dont_save_grads=True): descs = iterable(descriptors) if descriptors else self.descriptors for loc in self.loc: loc.stage(descs, dont_save_grads=dont_save_grads)
def set_descriptors(self, descriptors, stage=True, dont_save_grads=True): self.descriptors = [d for d in iterable(descriptors)] if stage: self.stage(dont_save_grads=dont_save_grads)