Beispiel #1
0
 def select(self, *args):
     if len(args) == 0:
         return np.full(self.numbers.shape[0], False)
     elif 'all' in args:
         return np.full(self.numbers.shape[0], True)
     else:
         return np.stack([
             self.numbers == a for b in iterable(args) for a in iterable(b)
         ]).any(axis=0)
Beispiel #2
0
 def add_descriptors(self, descriptors, stage=True, dont_save_grads=True):
     self.descriptors = [d for d in self.descriptors] + \
         [d for d in iterable(descriptors)]
     names = [d.name for d in self.descriptors]
     if len(set(names)) != len(self.descriptors):
         raise RuntimeError(
             f'two or more descriptors have the same names: {names}')
     if stage:
         self.stage(iterable(descriptors), dont_save_grads=dont_save_grads)
Beispiel #3
0
 def __init__(self,
              lmax,
              nmax,
              radial,
              numbers,
              radii=1.,
              flatten=True,
              normalize=True):
     super().__init__()
     self.ylm = Ylm(lmax)
     self.nmax = nmax
     self.radial = radial
     if type(radii) == float:
         self.radii = UniformRadii(radii)
     elif type(radii) == dict:
         self.radii = RadiiFromDict(radii)
     else:
         self.radii = radii
     self.numbers = sorted(iterable(numbers))
     self.species = len(self.numbers)
     one = torch.ones(lmax + 1, lmax + 1)
     self.Yr = 2 * torch.torch.tril(one) - torch.eye(lmax + 1)
     self.Yi = 2 * torch.torch.triu(one, diagonal=1)
     a = torch.tensor([[
         1. / ((2 * l + 1) * 2**(2 * n + l) * fac(n) * fac(n + l))
         for l in range(lmax + 1)
     ] for n in range(nmax + 1)])
     self.nnl = (a[None] * a[:, None]).sqrt()
     self.dim = self.species**2 * (nmax + 1)**2 * (lmax + 1)
     self.shape = (self.species, self.species, nmax + 1, nmax + 1, lmax + 1)
     if flatten:
         self.shape = (self.dim, )
     self.flatten = flatten
     self.normalize = normalize
     self.params = []
Beispiel #4
0
 def displacements(self,
                   select='all',
                   deltas=None,
                   srange=None,
                   sample_size=100,
                   corr=None,
                   stats=None):
     I = self.select(select)
     s = Sampler(*srange) if srange else Sampler(self.start, self.stop)
     if deltas is None:
         deltas = get_exponential_deltas(s.start, s.stop)
     if corr is None:
         corr = correlator
     if stats is None:
         stats = mean_var
     data = [[
         stats(data) for data in zip(*[
             iterable(corr(*self.get_rand_pair(s, delta), I))
             for _ in range(sample_size)
         ])
     ] for delta in deltas]
     results = [
         list(zip(*[dat[j] for dat in data])) for j in range(len(data[0]))
     ]
     return deltas, results
Beispiel #5
0
 def __init__(self, terms=None, **kwargs):
     Calculator.__init__(self, **kwargs)
     if self.parameters.rc is None:
         raise NotImplementedError('pass a cutoff!')
     self.terms = iterable(terms)
     self.params = [par for term in self.terms for par in term.parameters()]
     self.as_tensors_with_grads = False
Beispiel #6
0
 def append(self, others):
     if id(self) == id(others):
         _others = others.X[:]
     else:
         _others = iterable(others, ignore=TorchAtoms)
     for atoms in _others:
         assert atoms.__class__ == TorchAtoms
         self.X += [atoms]
Beispiel #7
0
 def append(self, others, detach=False):
     if id(self) == id(others):
         _others = others.X[:]
     else:
         _others = iterable(others)
     for loc in _others:
         assert loc.__class__ == Local
         self.X += [loc.detach() if detach else loc]
Beispiel #8
0
 def forward(self, atoms_or_loc, forces=False, enable_grad=True):
     with torch.set_grad_enabled(enable_grad):
         if forces:
             f = 0
         e = 0
         for loc in iterable(atoms_or_loc):
             _e = self.calculate(loc, forces=forces)
             if forces:
                 _e, _f = _e
                 f = f + _f
             e = e + _e
         if forces:
             return e, f
         else:
             return e
Beispiel #9
0
def diatomic(numbers, distances, pbc=False, cell=None):
    from theforce.util.util import iterable
    from itertools import combinations
    if not hasattr(numbers[0], '__iter__'):
        nums = ([(a, b) for a, b in combinations(set(numbers), 2)] +
                [(a, a) for a in set(numbers)])
    else:
        nums = numbers
    X = [
        TorchAtoms(positions=[[0., 0., 0.], [d, 0., 0.]],
                   numbers=n,
                   cell=cell,
                   pbc=pbc) for n in nums for d in iterable(distances)
    ]
    if len(X) > 1:
        return AtomsData(X=X)
    else:
        return X[0]
Beispiel #10
0
    def __init__(self,
                 lmax,
                 nmax,
                 radial,
                 numbers,
                 atomic_unit=None,
                 flatten=True):
        super().__init__()
        self.ylm = Ylm(lmax)
        self.nmax = nmax

        self._radial = radial
        if atomic_unit:
            self.unit = atomic_unit
        else:
            self.unit = radial.rc / 3
        self.radial = Exp(-0.5 * I()**2 / self.unit**2) * radial
        self.numbers = sorted(iterable(numbers))
        self.species = len(self.numbers)

        one = torch.ones(lmax + 1, lmax + 1)
        self.Yr = 2 * torch.torch.tril(one) - torch.eye(lmax + 1)
        self.Yi = 2 * torch.torch.triu(one, diagonal=1)

        a = torch.tensor([[
            1. / ((2 * l + 1) * 2**(2 * n + l) * fac(n) * fac(n + l))
            for l in range(lmax + 1)
        ] for n in range(nmax + 1)])
        self.nnl = (a[None] * a[:, None]).sqrt()

        self.dim = self.species**2 * (nmax + 1)**2 * (lmax + 1)
        self.shape = (self.species, self.species, nmax + 1, nmax + 1, lmax + 1)
        if flatten:
            self.shape = (self.dim, )

        self.params = []
        self._state = 'atomic_unit={}, flatten={}'.format(self.unit, flatten)
Beispiel #11
0
 def includes_species(self, species):
     return any([a in iterable(species) for a in self.numbers_set])
Beispiel #12
0
 def lce_filter(self, locs):
     if self.restrict is None:
         return locs
     else:
         return [loc for loc in locs if loc.number in iterable(self.restrict)]
Beispiel #13
0
 def __init__(self, soaps):
     super().__init__()
     self.soaps = iterable(soaps)
     self.params = [par for soap in self.soaps for par in soap.params]
Beispiel #14
0
def potential_energy_surface(data=None,
                             inducing=None,
                             train=0,
                             caching=False,
                             append_log=True,
                             **kwargs):
    from theforce.descriptor.atoms import AtomsData, LocalsData, sample_atoms
    from theforce.regression.gppotential import PosteriorPotential
    from theforce.regression.gppotential import train_gpp
    from theforce.util.util import iterable

    # get params
    params = get_params(**kwargs)
    log = open(params['path_log'], 'a' if append_log else 'w')
    log.write('{} threads: {}\n'.format(37 * '*', torch.get_num_threads()))

    # data
    if data is None:
        data = sample_atoms(params['path_data'],
                            size=params['ndata'],
                            chp=params['path_data_chp'])
        data.update(cutoff=params['cutoff'])
        natoms = sum([len(atoms) for atoms in data])
        log.write(
            'cutoff: {}\npath_data: {}\nndata: {} (={} locals)\npath_data_chp: {}\n'
            .format(params['cutoff'], params['path_data'], params['ndata'],
                    natoms, params['path_data_chp']))
    else:
        # if data is given as kwarg, it already should be cutoff-ed.
        assert len(data[-1]) == data[-1].natoms
        natoms = sum([len(atoms) for atoms in data])
        log.write('ndata: {} (={} locals) (kwarg)\n'.format(len(data), natoms))

    # inducing
    if inducing is None:
        if params['nlocals'] == -1:
            inducing = data.to_locals()
            log.write('nlocals: {} (=-1)\n'.format(len(inducing)))
        else:
            inducing = data.sample_locals(params['nlocals'])
            log.write('nlocals: {}\n'.format(params['nlocals']))
    else:
        log.write('nlocals: {} (kwarg)\n'.format(len(inducing)))
    if params['path_inducing_chp'] is not None:
        inducing.to_traj(params['path_inducing_chp'])
        log.write('path_inducing_chp: {}\n'.format(
            params['path_inducing_chp']))

    # numbers
    if params['numbers'] is None:
        params['numbers'] = data.numbers_set()
        log.write('numbers: {}\n'.format(params['numbers']))
    log.close()

    # kernel
    gp = get_kernel(params)

    # train
    data.update(descriptors=gp.kern.kernels)
    inducing.stage(gp.kern.kernels)
    inducing.trainable = False  # TODO: this should be set inside Locals
    state = 0
    for steps in iterable(train):
        train_gpp(gp,
                  data,
                  inducing=inducing,
                  steps=steps,
                  logprob_loss=True,
                  cov_loss=False)
        # save gp
        state += steps
        if state > 0 and params['path_gp_chp']:
            gp.to_file(params['path_gp_chp'], flag='state: {}'.format(state))
            with open('log.txt', 'a') as log:
                log.write('path_gp_chp: {} (write, state={})\n'.format(
                    params['path_gp_chp'], state))

        # save inducing
        if inducing.trainable:
            raise NotImplementedError(
                'trainable inducing is not implemented yet!')
    if state > 0:
        with open('log.txt', 'a') as log:
            log.write('\ntrained for {} steps\n'.format(state))

    # create Posterior Potential
    V = PosteriorPotential(gp, data, inducing=inducing, use_caching=caching)

    # test
    if params['test']:
        data.set_per_atoms('predicted_energy', V(data, 'energy'))
        data.set_per_atom('predicted_forces', V(data, 'forces'))
        var_e = data.target_energy.var()
        var_ee = (data.cat('predicted_energy') - data.target_energy).var()
        R2_e = 1 - var_ee / var_e
        var_f = data.target_forces.var()
        var_ff = (data.cat('predicted_forces') - data.target_forces).var()
        R2_f = 1 - var_ff / var_f
        with open('log.txt', 'a') as log:
            log.write('\ntesting the model on the same data that created it:')
            log.write('\nenergy R2 score={}'.format(R2_e))
            log.write('\nforces R2 score={}\n'.format(R2_f))
        print('testing the model on the same data that created it:')
        print('energy R2 score={}'.format(R2_e))
        print('forces R2 score={}'.format(R2_f))

    return V
Beispiel #15
0
def mask_values(arr, vals):
    return np.stack([arr == v for v in iterable(vals)]).any(axis=0)
Beispiel #16
0
 def stage(self, descriptors, dont_save_grads=True):
     for loc in self:
         loc.stage(iterable(descriptors), dont_save_grads=dont_save_grads)
Beispiel #17
0
def mlmd(ini_atoms,
         cutoff,
         au,
         dt,
         tolerance=0.1,
         pair=True,
         soap=True,
         ndata=10,
         max_steps=100,
         itrain=10 * [5],
         retrain=5 * [5],
         retrain_every=100,
         pes=potential_energy_surface):
    """ 
    ML-assisted-MD: a calculator must be attached to ini_atoms.
    Rules of thumb:
    Initial training (itrain) is crucial for correct approximation 
    of variances.
    Hyper-parameters are sensitive to nlocals=len(inducing) thus 
    if you don't want to retrain gp every time the data is updated, 
    at least keep nlocals fixed.
    """

    dftcalc = ini_atoms.get_calculator()

    # run a short MD to gather some (dft) data
    atoms = TorchAtoms(ase_atoms=ini_atoms.copy())
    atoms.set_velocities(ini_atoms.get_velocities())
    atoms.set_calculator(dftcalc)
    dyn = VelocityVerlet(atoms, dt=dt, trajectory='md.traj', logfile='md.log')
    md_step = ndata
    dyn.run(md_step)
    ndft = md_step

    # train a potential
    data = AtomsData(traj='md.traj', cutoff=cutoff)
    inducing = data.to_locals()
    V = pes(data=data,
            inducing=inducing,
            cutoff=cutoff,
            atomic_unit=au,
            pairkernel=pair,
            soapkernel=soap,
            train=itrain,
            test=True,
            caching=True)
    atoms.update(cutoff=cutoff, descriptors=V.gp.kern.kernels)
    mlcalc = PosteriorVarianceCalculator(V)
    atoms.set_calculator(mlcalc)

    # long MD
    while md_step < max_steps:

        md_step += 1

        forces = atoms.get_forces()
        var = atoms.calc.results['forces_var']
        tol = np.sqrt(var.max(axis=1))

        if (tol > tolerance).any():

            _forces = forces
            _var = var

            # new dft calculation
            ndft += 1
            print(
                '|............... new dft calculation (total={})'.format(ndft))
            tmp = atoms.copy()
            tmp.set_calculator(dftcalc)
            true_forces = tmp.get_forces()

            # add new information to data
            new_data = AtomsData(X=[TorchAtoms(ase_atoms=tmp)])
            new_data.update(cutoff=cutoff,
                            descriptors=atoms.calc.potential.gp.kern.kernels)
            new_locals = new_data.to_locals()
            new_locals.stage(descriptors=atoms.calc.potential.gp.kern.kernels)
            data += new_data
            inducing += new_locals  # TODO: importance sampling

            # remove old(est) information
            del data.X[0]
            del inducing.X[:len(new_locals)]  # TODO: importance sampling

            # retrain
            if ndft % retrain_every == 0:
                print('|............... : retraining for {} steps'.format(
                    retrain))
                for steps in iterable(retrain):
                    atoms.calc.potential.train(data,
                                               inducing=inducing,
                                               steps=steps,
                                               cov_loss=False)
                    atoms.calc.potential.gp.to_file(
                        'gp.chp', flag='ndft={}'.format(ndft))

            # update model
            print('|............... new regression')
            atoms.calc.potential.set_data(data, inducing, use_caching=True)

            # new forces
            atoms.calc.results.clear()
            forces = atoms.get_forces()
            var = atoms.calc.results['forces_var']

            # report
            _err_pred = np.sqrt(_var).max()
            _err = np.abs(_forces - true_forces).max()
            err_pred = np.sqrt(var).max()
            err = np.abs(forces - true_forces).max()
            print('|............... : old max-error: predicted={}, true={}'.
                  format(_err_pred, _err))
            print('|............... : new max-error: predicted={}, true={}'.
                  format(err_pred, err))
            arrays = np.concatenate([true_forces, _forces, forces, _var, var],
                                    axis=1)
            with open('forces_var.txt', 'ab') as report:
                np.savetxt(report, arrays)

        print(md_step, '_')
        dyn.run(1)
    print('finished {} steps, used dftcalc only {} times'.format(
        md_step, ndft))
Beispiel #18
0
 def stage(self, descriptors=None, dont_save_grads=True):
     descs = iterable(descriptors) if descriptors else self.descriptors
     for loc in self.loc:
         loc.stage(descs, dont_save_grads=dont_save_grads)
Beispiel #19
0
 def set_descriptors(self, descriptors, stage=True, dont_save_grads=True):
     self.descriptors = [d for d in iterable(descriptors)]
     if stage:
         self.stage(dont_save_grads=dont_save_grads)