Ejemplo n.º 1
0
def test_box_expansion():

    h2o = gt.System(box_size=[7, 7, 7])
    h2o.add_solvent('h2o', n=5)
    ase_atoms = h2o.random().ase_atoms()

    intra = gt.GAP('intra')
    intra.mol_idxs = [[3*j + i for i in range(3)] for j in range(5)]
    calc = IICalculator(intra=intra,
                        inter=gt.GAP('iter'))

    new_atoms = calc.expanded_atoms(atoms=ase_atoms)

    from timeit import repeat
    print('Expansion runs in ',
          min(repeat(lambda: calc.expanded_atoms(atoms=ase_atoms),
                     number=100))/100,
          's')

    write('tmp.xyz', new_atoms)

    ade_mol = ade.Molecule('tmp.xyz')
    assert ade_mol.distance(0, 1) < 2           # Å
    assert ade_mol.distance(0, 3) > 5           # Å
    os.remove('tmp.xyz')
Ejemplo n.º 2
0
def train_h2o():
    h2o_gap = gt.GAP(name='intra_h2o', system=h2o)
    _, _ = gt.active.train(h2o,
                           gap=h2o_gap,
                           method_name='dftb',
                           validate=False,
                           temp=1000)
    return None
Ejemplo n.º 3
0
def train_methane():
    methane_gap = gt.GAP(name='intra_methane', system=methane)
    _, _ = gt.active.train(methane,
                           gap=methane_gap,
                           method_name='dftb',
                           validate=False,
                           temp=1000)
    return None
Ejemplo n.º 4
0
def train_ii(system, method_name, intra_temp=1000, inter_temp=300, **kwargs):
    """
    Train an intra+intermolecular from just a system

    ---------------------------------------------------------------------------
    :param system: (gt.System)

    :param method_name: (str) e.g dftb

    :param intra_temp: (float) Temperature to run the intramolecular training

    :param inter_temp: (float) Temperature to run the intermolecular training
    """

    if system.n_unique_molecules > 1:
        raise ValueError('Can only train an inter+intra for a single bulk '
                         'molecular species')

    if system.n_unique_molecules < 1:
        raise ValueError('Must have at least one molecule to train GAP for')

    if 'temp' in kwargs:
        raise ValueError('Ambiguous specification, please specify: intra_temp '
                         'and inter_temp')

    # Create a system of just the monomer to train the intra-molecular
    # component of the system
    molecule = system.molecules[0]
    intra_system = gt.System(box_size=system.box.size)
    intra_system.add_molecules(molecule)

    # and train the intra component using a bespoke GAP
    gap = gt.GAP(name=f'intra_{molecule.name}', system=intra_system)
    intra_data, _ = train(intra_system,
                          method_name=method_name,
                          gap=gap,
                          temp=intra_temp,
                          **kwargs)

    if len(intra_data) == 0:
        raise RuntimeError('Failed to train the intra-system')

    # Now create an intra GAP that has the molecule indexes
    intra_gap = gt.gap.IntraGAP(name=f'intra_{molecule.name}',
                                system=system,
                                molecule=molecule)

    inter_gap = gt.InterGAP(name=f'inter_{molecule.name}',
                            system=system)

    # And finally train the inter component of the energy
    inter_data, gap = gt.active.train(system,
                                      method_name=method_name,
                                      temp=inter_temp,
                                      gap=gt.IIGAP(intra_gap, inter_gap),
                                      **kwargs)

    return (intra_data, inter_data), gap
Ejemplo n.º 5
0
def train_intra_zn():
    gap = gt.GAP(name='intra_znh2o6', system=znh2o6, default_params=False)
    gap.params.soap['O'] = gt.GTConfig.gap_default_soap_params
    gap.params.soap['O']['cutoff'] = 3.0
    gap.params.soap['O']['other'] = ['Zn', 'H', 'O']

    _, _ = gt.active.train(znh2o6,
                           gap=gap,
                           method_name='gpaw',
                           validate=True,
                           temp=1000,
                           tau_max=1000,
                           active_e_thresh=0.1,
                           n_configs_iter=20)
    return None
Ejemplo n.º 6
0
def test_gap_train():

    system = gt.System(box_size=[10, 10, 10])

    training_data = gt.Data(name='test')
    training_data.load(system=system,
                       filename=os.path.join(here, 'data', 'rnd_training.xyz'))

    assert len(training_data) == 10
    assert len(training_data[0].atoms) == 31

    if 'GT_GAP' not in os.environ or not os.environ['GT_GAP'] == 'True':
        return

    # Run GAP train with the training data
    gap = gt.GAP(name='test', system=system)
    gap.train(training_data)
Ejemplo n.º 7
0
def test_gap():

    water_dimer = gt.System(box_size=[3.0, 3.0, 3.0])
    water_dimer.add_molecules(h2o, n=2)

    gap = gt.GAP(name='test', system=water_dimer)

    assert hasattr(gap, 'name')
    assert hasattr(gap, 'params')
    assert gap.training_data is None

    assert hasattr(gap.params, 'general')
    assert hasattr(gap.params, 'pairwise')
    assert hasattr(gap.params, 'soap')

    # By default should only add a SOAP to non-hydrogen elements
    assert 'O' in gap.params.soap.keys()
    assert len(list(gap.params.soap)) == 1
Ejemplo n.º 8
0
    for r1 in np.linspace(0.8, 1.5, n_to_cube):
        for r2 in np.linspace(0.8, 1.5, n_to_cube):
            for r3 in np.linspace(1.0, 2.5, n_to_cube):
                h2o = get_h2o(r1, r2, r3)
                configs += h2o

    configs.parallel_cp2k()
    return configs


if __name__ == '__main__':

    water_monomer = gt.System(box_size=[8, 8, 8])
    water_monomer.add_solvent('h2o', n=1)

    gap = gt.GAP(name=f'monomer_2b_3b',
                 system=water_monomer,
                 default_params=None)

    # Should only have O-H
    gap.params.pairwise[('O', 'H')] = gt.GTConfig.gap_default_2b_params.copy()
    gap.params.pairwise[('O', 'H')]['cutoff'] = 3.0

    gap.params.angle[('H', 'O',
                      'H')] = gt.GTConfig.gap_default_2b_params.copy()
    gap.params.angle[('H', 'O', 'H')]['cutoff'] = 3.0

    train_data = grid_configs(n_to_cube=7)
    gap.train(train_data)
Ejemplo n.º 9
0
import gaptrain as gt
from autode.wrappers.keywords import GradientKeywords

gt.GTConfig.n_cores = 8

if __name__ == '__main__':

    gt.GTConfig.orca_keywords = GradientKeywords(
        ['PBE', 'ma-def2-SVP', 'EnGrad'])

    # large box to ensure no self-interaction
    sn2_ts = gt.System(box_size=[20, 20, 20])
    sn2_ts.add_molecules(gt.Molecule('ts.xyz', charge=-1))

    gap = gt.GAP(name='sn2_gap', system=sn2_ts, default_params=False)
    gap.params.soap['C'] = gt.GTConfig.gap_default_soap_params
    gap.params.soap['C']['other'] = ['H', 'Cl']
    gap.params.soap['C']['cutoff'] = 6.0

    data, gap = gt.active.train(sn2_ts,
                                method_name='orca',
                                temp=500,
                                active_e_thresh=0.1,
                                max_time_active_fs=500,
                                fix_init_config=True)

    # 'uplift' the configurations obtained at PBE/DZ to MP2/TZ
    gt.GTConfig.orca_keywords = GradientKeywords(
        ['DLPNO-CCSD(T)', 'ma-def2-TZVPP', 'NumGrad', 'AutoAux', 'EnGrad'])
    data.parallel_orca()
    gap.train(data)
Ejemplo n.º 10
0
import gaptrain as gt
from autode.wrappers.keywords import GradientKeywords

gt.GTConfig.n_cores = 10
gt.GTConfig.orca_keywords = GradientKeywords(['B3LYP', 'def2-SVP', 'EnGrad'])

# For non-periodic systems there's no need to define a box, but a System
# requires one
ts = gt.System(box_size=[10, 10, 10])
ts.add_molecules(gt.Molecule('ts1_prime.xyz'))

gap = gt.GAP(name='da_gap', system=ts, default_params={})
gap.params.soap['C'] = gt.GTConfig.gap_default_soap_params
gap.params.soap['C']['cutoff'] = 3.0
gap.params.soap['C']['other'] = ['H', 'C']

data, gap = gt.active.train(
    system=ts,
    method_name='orca',
    gap=gap,
    max_time_active_fs=200,
    temp=500,
    active_e_thresh=3 * 0.043,  # 3 kcal mol-1
    max_energy_threshold=5,
    max_active_iters=50,
    n_init_configs=10,
    fix_init_config=True)
Ejemplo n.º 11
0
                h2o = get_h2o(r1, r2, r3)
                configs += h2o

    return configs


if __name__ == '__main__':
    water_monomer = gt.System(box_size=[10, 10, 10])
    water_monomer.add_solvent('h2o', n=1)

    # Load the grid configurations and evaluate at PBE/400eV
    grid_configs = grid_configs(n_to_cube=8)
    grid_configs.parallel_gpaw()

    gap = gt.GAP(name=f'water_intra_gap',
                 system=water_monomer,
                 default_params=False)

    gap.params.pairwise[('O',
                         'H')] = deepcopy(gt.GTConfig.gap_default_2b_params)
    gap.params.pairwise[('O', 'H')]['cutoff'] = 3.0

    gap.params.pairwise[('H',
                         'H')] = deepcopy(gt.GTConfig.gap_default_2b_params)
    gap.params.pairwise[('H', 'H')]['cutoff'] = 3.0

    gap.params.angle[('H', 'O',
                      'H')] = deepcopy(gt.GTConfig.gap_default_2b_params)
    gap.params.angle[('H', 'O', 'H')]['cutoff'] = 3.0

    gap.train(grid_configs)
Ejemplo n.º 12
0
def train_ss(system, method_name, intra_temp=1000, inter_temp=300, **kwargs):
    """
    Train an intra+intermolecular from just a system

    ---------------------------------------------------------------------------
    :param system: (gt.System)

    :param method_name: (str) e.g dftb

    :param intra_temp: (float) Temperature to run the intramolecular training

    :param inter_temp: (float) Temperature to run the intermolecular training
    """
    if system.n_unique_molecules != 2:
        raise ValueError('Can only train an solute-solvent GAP for a system '
                         'with two molecules, the solute and the solvent')

    # Find the least, and most abundant molecules in the system, as the solute
    # and solvent respectively
    names = [mol.name for mol in system.molecules]
    nm1, nm2 = tuple(set(names))
    solute_name, solv_name = (nm1, nm2) if names.count(nm1) == 1 else (nm2, nm1)

    solute = [mol for mol in system.molecules if mol.name == solute_name][0]
    solv = [mol for mol in system.molecules if mol.name == solv_name][0]

    data = []   # List of training data for all the components in the system

    # Train the intramolecular components of the potential for the solute and
    # the solvent
    for molecule in (solute, solv):
        # Create a system with only one molecule
        intra_system = gt.System(box_size=system.box.size)
        intra_system.add_molecules(molecule)

        # and train..
        logger.info(f'Training intramolecular component of {molecule.name}')
        mol_data, _ = gt.active.train(intra_system,
                                      gap=gt.GAP(name=f'intra_{molecule.name}',
                                                 system=intra_system),
                                      method_name=method_name,
                                      temp=intra_temp,
                                      **kwargs)
        data.append(mol_data)

    # Recreate the GAPs with the full system (so they have the
    solv_gap = gt.gap.SolventIntraGAP(name=f'intra_{solv.name}', system=system)
    solute_gap = gt.gap.SoluteIntraGAP(name=f'intra_{solute.name}',
                                       system=system, molecule=solute)
    inter_gap = gt.InterGAP(name='inter', system=system)

    # and finally train the intermolecular part of the potential
    inter_data, gap = gt.active.train(system,
                                      method_name=method_name,
                                      gap=gt.gap.SSGAP(solute_intra=solute_gap,
                                                       solvent_intra=solv_gap,
                                                       inter=inter_gap),
                                      temp=inter_temp,
                                      **kwargs)
    data.append(inter_data)

    return tuple(data), gap
Ejemplo n.º 13
0
def train(system: gt.System,
          method_name: str,
          gap=None,
          max_time_active_fs=1000,
          min_time_active_fs=0,
          n_configs_iter=10,
          temp=300,
          active_e_thresh=None,
          active_method='diff',
          max_energy_threshold=None,
          validate=False,
          tau=None,
          tau_max=None,
          val_interval=None,
          max_active_iters=50,
          n_init_configs=10,
          init_configs=None,
          remove_intra_init_configs=True,
          fix_init_config=False,
          bbond_energy=None,
          fbond_energy=None,
          init_active_temp=None):
    """
    Train a system using active learning, by propagating dynamics using ML
    driven molecular dynamics (MD) and adding configurations where the error
    is above a threshold. Loop looks something like

    Generate configurations -> train a GAP -> run GAP-MD -> frames with error
                                   ^                               |
                                   |________ calc true  ___________


    Active learning will loop until either (1) the iteration > max_active_iters
    or (2) no configurations are found to add or (3) if calculated τ = max(τ)
    where the loop will break out

    --------------------------------------------------------------------------
    :param system: (gt.system.System)

    :param method_name: (str) Name of a method to use as the ground truth e.g.
                        dftb, orca, gpaw

    :param gap: (gt.gap.GAP) GAP to train with the active learnt data, if
                None then one will be initialised by placing SOAPs on each
                heavy atom and defining the 'other' atom types included in the
                neighbour density by their proximity. Distance cutoffs default
                to 3.5 Å for all atoms

    :param max_time_active_fs: (float) Maximum propagation time in the active
                               learning loop. Default = 1 ps

    :param min_time_active_fs: (float) Minimum propagation time for an
                               active learnt configuration. Will be updated
                               so the error is only calculated where the GAP
                               is unlikely to be accurate

    :param n_configs_iter: (int) Number of configurations to generate per
                           active learning cycle

    :param temp: (float) Temperature in K to propagate active learning at -
                 higher is better for stability but requires more training


    :param active_method: (str) Method used to generate active learnt
                          configurations. One of ['diff', 'qbc', 'gp_var']

    :param active_e_thresh: (float) Threshold in eV (E_t) above which a
                            configuration is added to the potential. If None
                            then will use 1 kcal mol-1 molecule-1

                            1. active_method='diff': |E_0 - E_GAP| > E_t

                            2. active_method='qbc': σ(E_GAP1, E_GAP2...) > E_t

                            3. active_method='gp_var': σ^2_GAP(predicted) > E_t

    :param max_energy_threshold: (float) Maximum relative energy threshold for
                                 configurations to be added to the training
                                 data

    :param validate: (bool) Whether or not to validate the potential during
                     the training. Will, by default run a τ calculation with
                     an interval max_time_active_fs / 100, so that a maximum of
                     50 calculations are run and a maximum time of
                     max(τ) = 5 x max_time_active_fs

    :param tau: (gt.loss.Tau) A instance of the τ error metric, unused if no
                validation is performed. Otherwise

    :param tau_max: (float | None) Maximum τ_acc in fs if float, will break out
                    of the active learning loop if this value is reached. If
                    None then won't break out

    :param val_interval: (int) Interval in the active training loop at which to
                         run the validation. Defaults to max_active_iters // 10
                         if validation is requested

    :param max_active_iters: (int) Maximum number of active learning
                             iterations to perform. Will break if we hit the
                             early stopping criteria

    :param n_init_configs: (int) Number of initial configurations to generate,
                           will be ignored if init_configs is not None

    :param init_configs: (gt.ConfigurationSet) A set of configurations from
                         which to start the active learning from

    :param remove_intra_init_configs: (bool) Whether the intramolecular
                                      component of the energy/force needs to
                                      be removed prior to training with
                                      init_configs. only applies for IIGAP
                                      and init_configs != None

    :param fix_init_config: (bool) Always start from the same initial
                            configuration for the active learning loop, if
                            False then the minimum energy structure is used.
                            Useful for TS learning, where dynamics should be
                            propagated from a saddle point not the minimum

    :param bbond_energy: (dict | None) Additional energy to add to a breaking
                         bond. e.g. bbond_energy={(0, 1), 0.1} Adds 0.1 eV
                         to the 'bond' between atoms 0 and 1 as velocities
                        shared between the atoms in the breaking bond direction

    :param fbond_energy: (dict | None) As bbond_energy but in the direction to
                         form a bond


    :param init_active_temp: (float | None) Initial temperature for velocities
                             in the 'active' MD search for configurations

    :return: (gt.Data, gt.GAP)
    """
    init_configs = get_init_configs(init_configs=init_configs,
                                    n=n_init_configs,
                                    method_name=method_name,
                                    system=system)

    # Remove the intra-molecular energy if an intra+inter (II) GAP is being
    # trained
    do_remove_intra = isinstance(gap, gt.IIGAP)
    if do_remove_intra and remove_intra_init_configs:
        remove_intra(init_configs, gap=gap)

    # Initial configuration must have energies
    assert all(cfg.energy is not None for cfg in init_configs)

    if gap is None:
        gap = gt.GAP(name=unique_name('active_gap'), system=system)

    # Initialise a τ metric with default parameters
    if validate and tau is None:
        # 1 ps default maximum tau
        tau = gt.loss.Tau(configs=get_init_configs(system, n=5),
                          e_lower=0.043363 * len(system.molecules),
                          max_fs=tau_max if tau_max is not None else 1000)

    # Default to validating 10 times through the training
    if validate and val_interval is None:
        val_interval = max(max_active_iters // 10, 1)

    # Initialise training data
    train_data = gt.Data(name=gap.name)
    train_data += init_configs

    # and train an initial GAP
    gap.train(init_configs)

    if active_e_thresh is None:
        if active_method.lower() == 'diff':
            #                 1 kcal mol-1 molecule-1
            active_e_thresh = 0.043363 * len(system.molecules)

        if active_method.lower() == 'qbc':
            # optimised on a small box of water. std dev. for total energy
            active_e_thresh = 1E-6 * len(system.molecules)

        if active_method.lower() == 'gp_var':
            # Threshold for maximum per-atom GP variance (eV atom^-1)
            active_e_thresh = 5E-5

    # Initialise the validation output file
    if validate:
        tau_file = open(f'{gap.name}_tau.txt', 'w')
        print('Iteration    n_evals      τ_acc / fs', file=tau_file)

    # Run the active learning loop, running iterative GAP-MD
    for iteration in range(max_active_iters):

        # Set the configuration from which GAP-MD will be run
        min_idx = int(np.argmin(train_data.energies()))
        init_config = train_data[0] if fix_init_config else train_data[min_idx]

        configs = get_active_configs(init_config,
                                     gap=gap,
                                     ref_method_name=method_name,
                                     method=str(active_method),
                                     n_configs=n_configs_iter,
                                     temp=temp,
                                     e_thresh=active_e_thresh,
                                     max_time_fs=max_time_active_fs,
                                     min_time_fs=min_time_active_fs,
                                     bbond_energy=bbond_energy,
                                     fbond_energy=fbond_energy,
                                     init_temp=init_active_temp)

        # Active learning finds no configurations,,
        if len(configs) == 0:
            # Calculate the final tau if we're running with validation
            if validate:
                tau.calculate(gap=gap, method_name=method_name)
                print(iteration, tau.value, sep='\t\t\t', file=tau_file)

            logger.info('No configs to add. Active learning = DONE')
            break

        min_time_active_fs = min(config.t0 for config in configs)
        logger.info(f'All active configurations reached t = '
                    f'{min_time_active_fs} fs before an error exceeded the '
                    f'threshold of {active_e_thresh:.3f} eV')

        if do_remove_intra:
            remove_intra(configs, gap=gap)

        train_data += configs

        # If required remove high-lying energy configuration from the data
        if max_energy_threshold is not None:
            train_data.remove_above_e(max_energy_threshold)

        # Retrain on these new data
        gap.train(train_data)

        # Print the accuracy
        if validate and iteration % val_interval == 0:

            tau.calculate(gap=gap, method_name=method_name)
            print(f'{iteration:<13g}'
                  f'{sum(config.n_evals for config in train_data):<13g}'
                  f'{tau.value}', sep='\t', file=tau_file)

            if np.abs(tau.value - tau.max_time) < 1:
                logger.info('Reached the maximum tau. Active learning = DONE')
                break

    return train_data, gap