def test_box_expansion(): h2o = gt.System(box_size=[7, 7, 7]) h2o.add_solvent('h2o', n=5) ase_atoms = h2o.random().ase_atoms() intra = gt.GAP('intra') intra.mol_idxs = [[3*j + i for i in range(3)] for j in range(5)] calc = IICalculator(intra=intra, inter=gt.GAP('iter')) new_atoms = calc.expanded_atoms(atoms=ase_atoms) from timeit import repeat print('Expansion runs in ', min(repeat(lambda: calc.expanded_atoms(atoms=ase_atoms), number=100))/100, 's') write('tmp.xyz', new_atoms) ade_mol = ade.Molecule('tmp.xyz') assert ade_mol.distance(0, 1) < 2 # Å assert ade_mol.distance(0, 3) > 5 # Å os.remove('tmp.xyz')
def train_h2o(): h2o_gap = gt.GAP(name='intra_h2o', system=h2o) _, _ = gt.active.train(h2o, gap=h2o_gap, method_name='dftb', validate=False, temp=1000) return None
def train_methane(): methane_gap = gt.GAP(name='intra_methane', system=methane) _, _ = gt.active.train(methane, gap=methane_gap, method_name='dftb', validate=False, temp=1000) return None
def train_ii(system, method_name, intra_temp=1000, inter_temp=300, **kwargs): """ Train an intra+intermolecular from just a system --------------------------------------------------------------------------- :param system: (gt.System) :param method_name: (str) e.g dftb :param intra_temp: (float) Temperature to run the intramolecular training :param inter_temp: (float) Temperature to run the intermolecular training """ if system.n_unique_molecules > 1: raise ValueError('Can only train an inter+intra for a single bulk ' 'molecular species') if system.n_unique_molecules < 1: raise ValueError('Must have at least one molecule to train GAP for') if 'temp' in kwargs: raise ValueError('Ambiguous specification, please specify: intra_temp ' 'and inter_temp') # Create a system of just the monomer to train the intra-molecular # component of the system molecule = system.molecules[0] intra_system = gt.System(box_size=system.box.size) intra_system.add_molecules(molecule) # and train the intra component using a bespoke GAP gap = gt.GAP(name=f'intra_{molecule.name}', system=intra_system) intra_data, _ = train(intra_system, method_name=method_name, gap=gap, temp=intra_temp, **kwargs) if len(intra_data) == 0: raise RuntimeError('Failed to train the intra-system') # Now create an intra GAP that has the molecule indexes intra_gap = gt.gap.IntraGAP(name=f'intra_{molecule.name}', system=system, molecule=molecule) inter_gap = gt.InterGAP(name=f'inter_{molecule.name}', system=system) # And finally train the inter component of the energy inter_data, gap = gt.active.train(system, method_name=method_name, temp=inter_temp, gap=gt.IIGAP(intra_gap, inter_gap), **kwargs) return (intra_data, inter_data), gap
def train_intra_zn(): gap = gt.GAP(name='intra_znh2o6', system=znh2o6, default_params=False) gap.params.soap['O'] = gt.GTConfig.gap_default_soap_params gap.params.soap['O']['cutoff'] = 3.0 gap.params.soap['O']['other'] = ['Zn', 'H', 'O'] _, _ = gt.active.train(znh2o6, gap=gap, method_name='gpaw', validate=True, temp=1000, tau_max=1000, active_e_thresh=0.1, n_configs_iter=20) return None
def test_gap_train(): system = gt.System(box_size=[10, 10, 10]) training_data = gt.Data(name='test') training_data.load(system=system, filename=os.path.join(here, 'data', 'rnd_training.xyz')) assert len(training_data) == 10 assert len(training_data[0].atoms) == 31 if 'GT_GAP' not in os.environ or not os.environ['GT_GAP'] == 'True': return # Run GAP train with the training data gap = gt.GAP(name='test', system=system) gap.train(training_data)
def test_gap(): water_dimer = gt.System(box_size=[3.0, 3.0, 3.0]) water_dimer.add_molecules(h2o, n=2) gap = gt.GAP(name='test', system=water_dimer) assert hasattr(gap, 'name') assert hasattr(gap, 'params') assert gap.training_data is None assert hasattr(gap.params, 'general') assert hasattr(gap.params, 'pairwise') assert hasattr(gap.params, 'soap') # By default should only add a SOAP to non-hydrogen elements assert 'O' in gap.params.soap.keys() assert len(list(gap.params.soap)) == 1
for r1 in np.linspace(0.8, 1.5, n_to_cube): for r2 in np.linspace(0.8, 1.5, n_to_cube): for r3 in np.linspace(1.0, 2.5, n_to_cube): h2o = get_h2o(r1, r2, r3) configs += h2o configs.parallel_cp2k() return configs if __name__ == '__main__': water_monomer = gt.System(box_size=[8, 8, 8]) water_monomer.add_solvent('h2o', n=1) gap = gt.GAP(name=f'monomer_2b_3b', system=water_monomer, default_params=None) # Should only have O-H gap.params.pairwise[('O', 'H')] = gt.GTConfig.gap_default_2b_params.copy() gap.params.pairwise[('O', 'H')]['cutoff'] = 3.0 gap.params.angle[('H', 'O', 'H')] = gt.GTConfig.gap_default_2b_params.copy() gap.params.angle[('H', 'O', 'H')]['cutoff'] = 3.0 train_data = grid_configs(n_to_cube=7) gap.train(train_data)
import gaptrain as gt from autode.wrappers.keywords import GradientKeywords gt.GTConfig.n_cores = 8 if __name__ == '__main__': gt.GTConfig.orca_keywords = GradientKeywords( ['PBE', 'ma-def2-SVP', 'EnGrad']) # large box to ensure no self-interaction sn2_ts = gt.System(box_size=[20, 20, 20]) sn2_ts.add_molecules(gt.Molecule('ts.xyz', charge=-1)) gap = gt.GAP(name='sn2_gap', system=sn2_ts, default_params=False) gap.params.soap['C'] = gt.GTConfig.gap_default_soap_params gap.params.soap['C']['other'] = ['H', 'Cl'] gap.params.soap['C']['cutoff'] = 6.0 data, gap = gt.active.train(sn2_ts, method_name='orca', temp=500, active_e_thresh=0.1, max_time_active_fs=500, fix_init_config=True) # 'uplift' the configurations obtained at PBE/DZ to MP2/TZ gt.GTConfig.orca_keywords = GradientKeywords( ['DLPNO-CCSD(T)', 'ma-def2-TZVPP', 'NumGrad', 'AutoAux', 'EnGrad']) data.parallel_orca() gap.train(data)
import gaptrain as gt from autode.wrappers.keywords import GradientKeywords gt.GTConfig.n_cores = 10 gt.GTConfig.orca_keywords = GradientKeywords(['B3LYP', 'def2-SVP', 'EnGrad']) # For non-periodic systems there's no need to define a box, but a System # requires one ts = gt.System(box_size=[10, 10, 10]) ts.add_molecules(gt.Molecule('ts1_prime.xyz')) gap = gt.GAP(name='da_gap', system=ts, default_params={}) gap.params.soap['C'] = gt.GTConfig.gap_default_soap_params gap.params.soap['C']['cutoff'] = 3.0 gap.params.soap['C']['other'] = ['H', 'C'] data, gap = gt.active.train( system=ts, method_name='orca', gap=gap, max_time_active_fs=200, temp=500, active_e_thresh=3 * 0.043, # 3 kcal mol-1 max_energy_threshold=5, max_active_iters=50, n_init_configs=10, fix_init_config=True)
h2o = get_h2o(r1, r2, r3) configs += h2o return configs if __name__ == '__main__': water_monomer = gt.System(box_size=[10, 10, 10]) water_monomer.add_solvent('h2o', n=1) # Load the grid configurations and evaluate at PBE/400eV grid_configs = grid_configs(n_to_cube=8) grid_configs.parallel_gpaw() gap = gt.GAP(name=f'water_intra_gap', system=water_monomer, default_params=False) gap.params.pairwise[('O', 'H')] = deepcopy(gt.GTConfig.gap_default_2b_params) gap.params.pairwise[('O', 'H')]['cutoff'] = 3.0 gap.params.pairwise[('H', 'H')] = deepcopy(gt.GTConfig.gap_default_2b_params) gap.params.pairwise[('H', 'H')]['cutoff'] = 3.0 gap.params.angle[('H', 'O', 'H')] = deepcopy(gt.GTConfig.gap_default_2b_params) gap.params.angle[('H', 'O', 'H')]['cutoff'] = 3.0 gap.train(grid_configs)
def train_ss(system, method_name, intra_temp=1000, inter_temp=300, **kwargs): """ Train an intra+intermolecular from just a system --------------------------------------------------------------------------- :param system: (gt.System) :param method_name: (str) e.g dftb :param intra_temp: (float) Temperature to run the intramolecular training :param inter_temp: (float) Temperature to run the intermolecular training """ if system.n_unique_molecules != 2: raise ValueError('Can only train an solute-solvent GAP for a system ' 'with two molecules, the solute and the solvent') # Find the least, and most abundant molecules in the system, as the solute # and solvent respectively names = [mol.name for mol in system.molecules] nm1, nm2 = tuple(set(names)) solute_name, solv_name = (nm1, nm2) if names.count(nm1) == 1 else (nm2, nm1) solute = [mol for mol in system.molecules if mol.name == solute_name][0] solv = [mol for mol in system.molecules if mol.name == solv_name][0] data = [] # List of training data for all the components in the system # Train the intramolecular components of the potential for the solute and # the solvent for molecule in (solute, solv): # Create a system with only one molecule intra_system = gt.System(box_size=system.box.size) intra_system.add_molecules(molecule) # and train.. logger.info(f'Training intramolecular component of {molecule.name}') mol_data, _ = gt.active.train(intra_system, gap=gt.GAP(name=f'intra_{molecule.name}', system=intra_system), method_name=method_name, temp=intra_temp, **kwargs) data.append(mol_data) # Recreate the GAPs with the full system (so they have the solv_gap = gt.gap.SolventIntraGAP(name=f'intra_{solv.name}', system=system) solute_gap = gt.gap.SoluteIntraGAP(name=f'intra_{solute.name}', system=system, molecule=solute) inter_gap = gt.InterGAP(name='inter', system=system) # and finally train the intermolecular part of the potential inter_data, gap = gt.active.train(system, method_name=method_name, gap=gt.gap.SSGAP(solute_intra=solute_gap, solvent_intra=solv_gap, inter=inter_gap), temp=inter_temp, **kwargs) data.append(inter_data) return tuple(data), gap
def train(system: gt.System, method_name: str, gap=None, max_time_active_fs=1000, min_time_active_fs=0, n_configs_iter=10, temp=300, active_e_thresh=None, active_method='diff', max_energy_threshold=None, validate=False, tau=None, tau_max=None, val_interval=None, max_active_iters=50, n_init_configs=10, init_configs=None, remove_intra_init_configs=True, fix_init_config=False, bbond_energy=None, fbond_energy=None, init_active_temp=None): """ Train a system using active learning, by propagating dynamics using ML driven molecular dynamics (MD) and adding configurations where the error is above a threshold. Loop looks something like Generate configurations -> train a GAP -> run GAP-MD -> frames with error ^ | |________ calc true ___________ Active learning will loop until either (1) the iteration > max_active_iters or (2) no configurations are found to add or (3) if calculated τ = max(τ) where the loop will break out -------------------------------------------------------------------------- :param system: (gt.system.System) :param method_name: (str) Name of a method to use as the ground truth e.g. dftb, orca, gpaw :param gap: (gt.gap.GAP) GAP to train with the active learnt data, if None then one will be initialised by placing SOAPs on each heavy atom and defining the 'other' atom types included in the neighbour density by their proximity. Distance cutoffs default to 3.5 Å for all atoms :param max_time_active_fs: (float) Maximum propagation time in the active learning loop. Default = 1 ps :param min_time_active_fs: (float) Minimum propagation time for an active learnt configuration. Will be updated so the error is only calculated where the GAP is unlikely to be accurate :param n_configs_iter: (int) Number of configurations to generate per active learning cycle :param temp: (float) Temperature in K to propagate active learning at - higher is better for stability but requires more training :param active_method: (str) Method used to generate active learnt configurations. One of ['diff', 'qbc', 'gp_var'] :param active_e_thresh: (float) Threshold in eV (E_t) above which a configuration is added to the potential. If None then will use 1 kcal mol-1 molecule-1 1. active_method='diff': |E_0 - E_GAP| > E_t 2. active_method='qbc': σ(E_GAP1, E_GAP2...) > E_t 3. active_method='gp_var': σ^2_GAP(predicted) > E_t :param max_energy_threshold: (float) Maximum relative energy threshold for configurations to be added to the training data :param validate: (bool) Whether or not to validate the potential during the training. Will, by default run a τ calculation with an interval max_time_active_fs / 100, so that a maximum of 50 calculations are run and a maximum time of max(τ) = 5 x max_time_active_fs :param tau: (gt.loss.Tau) A instance of the τ error metric, unused if no validation is performed. Otherwise :param tau_max: (float | None) Maximum τ_acc in fs if float, will break out of the active learning loop if this value is reached. If None then won't break out :param val_interval: (int) Interval in the active training loop at which to run the validation. Defaults to max_active_iters // 10 if validation is requested :param max_active_iters: (int) Maximum number of active learning iterations to perform. Will break if we hit the early stopping criteria :param n_init_configs: (int) Number of initial configurations to generate, will be ignored if init_configs is not None :param init_configs: (gt.ConfigurationSet) A set of configurations from which to start the active learning from :param remove_intra_init_configs: (bool) Whether the intramolecular component of the energy/force needs to be removed prior to training with init_configs. only applies for IIGAP and init_configs != None :param fix_init_config: (bool) Always start from the same initial configuration for the active learning loop, if False then the minimum energy structure is used. Useful for TS learning, where dynamics should be propagated from a saddle point not the minimum :param bbond_energy: (dict | None) Additional energy to add to a breaking bond. e.g. bbond_energy={(0, 1), 0.1} Adds 0.1 eV to the 'bond' between atoms 0 and 1 as velocities shared between the atoms in the breaking bond direction :param fbond_energy: (dict | None) As bbond_energy but in the direction to form a bond :param init_active_temp: (float | None) Initial temperature for velocities in the 'active' MD search for configurations :return: (gt.Data, gt.GAP) """ init_configs = get_init_configs(init_configs=init_configs, n=n_init_configs, method_name=method_name, system=system) # Remove the intra-molecular energy if an intra+inter (II) GAP is being # trained do_remove_intra = isinstance(gap, gt.IIGAP) if do_remove_intra and remove_intra_init_configs: remove_intra(init_configs, gap=gap) # Initial configuration must have energies assert all(cfg.energy is not None for cfg in init_configs) if gap is None: gap = gt.GAP(name=unique_name('active_gap'), system=system) # Initialise a τ metric with default parameters if validate and tau is None: # 1 ps default maximum tau tau = gt.loss.Tau(configs=get_init_configs(system, n=5), e_lower=0.043363 * len(system.molecules), max_fs=tau_max if tau_max is not None else 1000) # Default to validating 10 times through the training if validate and val_interval is None: val_interval = max(max_active_iters // 10, 1) # Initialise training data train_data = gt.Data(name=gap.name) train_data += init_configs # and train an initial GAP gap.train(init_configs) if active_e_thresh is None: if active_method.lower() == 'diff': # 1 kcal mol-1 molecule-1 active_e_thresh = 0.043363 * len(system.molecules) if active_method.lower() == 'qbc': # optimised on a small box of water. std dev. for total energy active_e_thresh = 1E-6 * len(system.molecules) if active_method.lower() == 'gp_var': # Threshold for maximum per-atom GP variance (eV atom^-1) active_e_thresh = 5E-5 # Initialise the validation output file if validate: tau_file = open(f'{gap.name}_tau.txt', 'w') print('Iteration n_evals τ_acc / fs', file=tau_file) # Run the active learning loop, running iterative GAP-MD for iteration in range(max_active_iters): # Set the configuration from which GAP-MD will be run min_idx = int(np.argmin(train_data.energies())) init_config = train_data[0] if fix_init_config else train_data[min_idx] configs = get_active_configs(init_config, gap=gap, ref_method_name=method_name, method=str(active_method), n_configs=n_configs_iter, temp=temp, e_thresh=active_e_thresh, max_time_fs=max_time_active_fs, min_time_fs=min_time_active_fs, bbond_energy=bbond_energy, fbond_energy=fbond_energy, init_temp=init_active_temp) # Active learning finds no configurations,, if len(configs) == 0: # Calculate the final tau if we're running with validation if validate: tau.calculate(gap=gap, method_name=method_name) print(iteration, tau.value, sep='\t\t\t', file=tau_file) logger.info('No configs to add. Active learning = DONE') break min_time_active_fs = min(config.t0 for config in configs) logger.info(f'All active configurations reached t = ' f'{min_time_active_fs} fs before an error exceeded the ' f'threshold of {active_e_thresh:.3f} eV') if do_remove_intra: remove_intra(configs, gap=gap) train_data += configs # If required remove high-lying energy configuration from the data if max_energy_threshold is not None: train_data.remove_above_e(max_energy_threshold) # Retrain on these new data gap.train(train_data) # Print the accuracy if validate and iteration % val_interval == 0: tau.calculate(gap=gap, method_name=method_name) print(f'{iteration:<13g}' f'{sum(config.n_evals for config in train_data):<13g}' f'{tau.value}', sep='\t', file=tau_file) if np.abs(tau.value - tau.max_time) < 1: logger.info('Reached the maximum tau. Active learning = DONE') break return train_data, gap