def test_delta_add_to_dataset(self): atoms_copy = self.OAL_learner.parent_dataset[-1].copy() atoms_copy.set_calculator(EMT()) [atoms_delta_sub] = self.OAL_learner.add_to_dataset(atoms_copy) delta_sub_energy = atoms_delta_sub.get_potential_energy() delta_sub_forces = atoms_delta_sub.get_forces() atoms_copy = atoms_copy.copy() atoms_copy.set_calculator(EMT()) (atoms_parent, ) = convert_to_singlepoint([atoms_copy]) parent_energy = atoms_parent.get_potential_energy() parent_forces = atoms_parent.get_forces() atoms_copy = atoms_copy.copy() atoms_copy.set_calculator(self.OAL_learner.base_calc) (atoms_base, ) = convert_to_singlepoint([atoms_copy]) base_energy = atoms_base.get_potential_energy() base_forces = atoms_base.get_forces() parent_ref_energy = self.OAL_learner.refs[0].get_potential_energy() base_ref_energy = self.OAL_learner.refs[1].get_potential_energy() delta_hand_energy = (delta_sub_energy + parent_ref_energy) + ( base_energy - base_ref_energy) delta_hand_forces = delta_sub_forces + base_forces assert self.OAL_learner.parent_dataset[-1] == atoms_parent assert np.allclose( delta_hand_energy, parent_energy, atol=ENERGY_THRESHOLD, ), str( "DeltaLearner add_to_dataset() + base calc energy inconsistent:\n" + str(delta_hand_energy) + "with calculated parent prediction:\n" + str(parent_energy) + "\ncomposed of:\n the add_to_dataset() parent prediction:\n" + str(delta_sub_energy) + "\n the base calc prediction:\n" + str(base_energy) + "\n the parent ref:\n" + str(parent_ref_energy) + "\n the base ref:\n" + str(base_ref_energy) + "\nfor Energy Threshold: " + str(ENERGY_THRESHOLD)) assert np.allclose( delta_hand_forces, parent_forces, atol=FORCE_THRESHOLD, ), str( "DeltaLearner add_to_dataset() + base calc forces inconsistent:\n" + str(delta_hand_forces) + "with calculated parent prediction:\n" + str(parent_forces) + "\ncomposed of:\n the add_to_dataset() parent prediction:\n" + str(delta_sub_forces) + "\n the base calc prediction:\n" + str(base_forces) + "\nfor Force Threshold: " + str(FORCE_THRESHOLD))
def test_delta_get_ml_prediction(self): atoms_copy = self.OAL_learner.parent_dataset[-1].copy() atoms_ML = self.OAL_learner.get_ml_prediction(atoms_copy.copy()) delta_sub_energy = atoms_ML.get_potential_energy() delta_sub_forces = atoms_ML.get_forces() atoms_copy = atoms_copy.copy() atoms_copy.set_calculator(self.OAL_learner.ml_potential) (atoms_ml_trained_on_diff_, ) = convert_to_singlepoint([atoms_copy]) ml_trained_on_diff_energy = atoms_ml_trained_on_diff_.get_potential_energy( ) ml_trained_on_diff_forces = atoms_ml_trained_on_diff_.get_forces() atoms_copy = atoms_copy.copy() atoms_copy.set_calculator(self.OAL_learner.base_calc) (atoms_base, ) = convert_to_singlepoint([atoms_copy]) base_energy = atoms_base.get_potential_energy() base_forces = atoms_base.get_forces() parent_ref_energy = self.OAL_learner.refs[0].get_potential_energy() base_ref_energy = self.OAL_learner.refs[1].get_potential_energy() # ml = (parent -parent ref) - (base - base ref) # parent = (base - base ref) + (ml + parent ref) delta_hand_energy = (ml_trained_on_diff_energy + parent_ref_energy) + ( base_energy - base_ref_energy) delta_hand_forces = ml_trained_on_diff_forces + base_forces assert np.allclose( delta_sub_energy, delta_hand_energy, atol=ENERGY_THRESHOLD, ), str("DeltaLearner get_ml_prediction() energy inconsistent:\n" + str(delta_sub_energy) + "\nwith hand calculated ML prediction delta:\n" + str(delta_hand_energy) + "\ncomposed of:\n the ML trained on difference prediction:\n" + str(ml_trained_on_diff_energy) + "\n the base calc prediction:\n" + str(base_energy) + "\n the parent ref:\n" + str(parent_ref_energy) + "\n the base ref:\n" + str(base_ref_energy) + "\nfor Energy Threshold: " + str(ENERGY_THRESHOLD)) assert np.allclose( delta_sub_forces, delta_hand_forces, atol=FORCE_THRESHOLD, ), str("DeltaLearner get_ml_prediction() forces inconsistent:\n" + str(delta_sub_forces) + "\nwith hand calculated ML prediction delta:\n" + str(delta_hand_forces) + "\ncomposed of:\n the ML trained on difference prediction:\n" + str(ml_trained_on_diff_forces) + "\n the base calc prediction:\n" + str(base_forces) + "\nfor Force Threshold: " + str(FORCE_THRESHOLD))
def calculate(self, atoms, properties, system_changes): Calculator.calculate(self, atoms, properties, system_changes) # If we have less than two data points, uncertainty is not # well calibrated so just use DFT if len(self.parent_dataset) < 2: energy, force = self.add_data_and_retrain(atoms) self.results["energy"] = energy self.results["forces"] = force return # Make a copy of the atoms with ensemble energies as a SP atoms_copy = atoms.copy() atoms_copy.set_calculator(self.ensemble_calc) (atoms_ML, ) = convert_to_singlepoint([atoms_copy]) # Check if we are extrapolating too far, and if so add/retrain if self.unsafe_prediction(atoms_ML) or self.parent_verify(atoms_ML): # We ran DFT, so just use that energy/force energy, force = self.add_data_and_retrain(atoms) else: energy = atoms_ML.get_potential_energy(apply_constraint=False) force = atoms_ML.get_forces(apply_constraint=False) # Return the energy/force self.results["energy"] = energy self.results["forces"] = force
def get_ml_prediction(self, atoms): """ Helper function which takes an atoms object with no calc attached. Makes an Ml prediction. Performs a delta add operation since the ML model was trained on delta sub data. Returns it with a delta ML potential predicted singlepoint. Designed to be overwritten by DeltaLearner which needs to modify ML predictions. """ atoms_copy = atoms.copy() atoms_copy.set_calculator(self.ml_potential) (atoms_with_info,) = convert_to_singlepoint([atoms_copy]) atoms_copy.set_calculator(self.add_delta_calc) (atoms_ML,) = convert_to_singlepoint([atoms_copy]) for key, value in atoms_with_info.info.items(): atoms_ML.info[key] = value return atoms_ML
def __init__( self, learner_params, trainer, parent_dataset, parent_calc, ): Calculator.__init__(self) self.parent_calc = parent_calc self.trainer = trainer self.learner_params = learner_params self.parent_dataset = convert_to_singlepoint(parent_dataset) # Don't bother making an ensemble with only one data point, # as the uncertainty is meaningless if len(self.parent_dataset) > 1: self.ensemble_sets, self.parent_dataset = non_bootstrap_ensemble( parent_dataset, n_ensembles=self.learner_params["n_ensembles"]) self.ensemble_calc = EnsembleCalc.make_ensemble( self.ensemble_sets, self.trainer) if "fmax_verify_threshold" in self.learner_params: self.fmax_verify_threshold = self.learner_params[ "fmax_verify_threshold"] else: self.fmax_verify_threshold = np.nan # always False self.uncertain_tol = learner_params["uncertain_tol"] self.parent_calls = 0
def get_ml_prediction(self, atoms): """ Helper function which takes an atoms object with no calc attached. Returns it with an ML potential predicted singlepoint. Designed to be overwritten by subclasses (DeltaLearner) that modify ML predictions. """ atoms_copy = atoms.copy() atoms_copy.set_calculator(self.ml_potential) (atoms_ML, ) = convert_to_singlepoint([atoms_copy]) return atoms_ML
def init_training_data(self): """ Prepare the training data by attaching delta values for training. """ raw_data = self.parent_dataset sp_raw_data = convert_to_singlepoint(raw_data) parent_ref_image = sp_raw_data[0].copy() base_ref_image = compute_with_calc(sp_raw_data[:1], self.base_calc)[0] self.refs = [parent_ref_image, base_ref_image] self.delta_sub_calc = DeltaCalc(self.calcs, "sub", self.refs) self.ensemble_sets, self.parent_dataset = bootstrap_ensemble( compute_with_calc(sp_raw_data, self.delta_sub_calc))
def init_training_data(self): """ Prepare the training data by attaching delta values for training. """ raw_data = self.training_data sp_raw_data = convert_to_singlepoint(raw_data) parent_ref_image = sp_raw_data[0] base_ref_image = compute_with_calc([parent_ref_image], self.base_calc)[0] self.refs = [parent_ref_image, base_ref_image] self.delta_sub_calc = DeltaCalc(self.calcs, "sub", self.refs) self.training_data = compute_with_calc(sp_raw_data, self.delta_sub_calc)
def init_training_data(self): """ Prepare the training data by attaching delta values for training. """ raw_data = self.training_data sp_raw_data = convert_to_singlepoint(raw_data) parent_ref_image = sp_raw_data[0] base_ref_image = compute_with_calc([parent_ref_image], self.base_calc)[0] self.refs = [parent_ref_image, base_ref_image] self.delta_sub_calc = DeltaCalc(self.calcs, "sub", self.refs) self.training_data = [] for image in sp_raw_data: sp_calc = image.get_calculator() sp_calc.implemented_properties = ["energy", "forces"] sp_delta_calc = DeltaCalc([sp_calc, self.base_calc], "sub", self.refs) self.training_data += compute_with_calc([image], sp_delta_calc)
def add_data_and_retrain(self, atoms): print("OnlineLearner: Parent calculation required") atoms_copy = atoms.copy() atoms_copy.set_calculator(copy.copy(self.parent_calc)) (new_data, ) = convert_to_singlepoint([atoms_copy]) energy_actual = new_data.get_potential_energy(apply_constraint=False) force_actual = new_data.get_forces(apply_constraint=False) self.ensemble_sets, self.parent_dataset = non_bootstrap_ensemble( self.parent_dataset, new_data, n_ensembles=self.learner_params["n_ensembles"], ) # Don't bother training if we have less than two datapoints if len(self.parent_dataset) >= 2: self.ensemble_calc = EnsembleCalc.make_ensemble( self.ensemble_sets, self.trainer) self.parent_calls += 1 return energy_actual, force_actual
def add_data_and_retrain(self, atoms): self.steps_since_last_query = 0 # don't redo singlepoints if not instructed to reverify and atoms have proper vasp singlepoints attached if (self.reverify_with_parent is False and hasattr(atoms, "calc") and atoms.calc is not None and (type(atoms.calc) is SinglePointCalculator or atoms.calc.name == self.parent_calc.name)): if not self.suppress_warnings: warn( "Assuming Atoms object Singlepoint is precalculated (to turn this behavior off: set 'reverify_with_parent' to True)" ) new_data = atoms # if verifying (or reverifying) do the singlepoints, and record the time parent calls takes else: print("OnlineLearner: Parent calculation required") start = time.time() atoms.set_calculator(self.parent_calc) (new_data, ) = convert_to_singlepoint([atoms]) end = time.time() print("Time to call parent (call #" + str(self.parent_calls) + "): " + str(end - start)) self.info["parent_time"] = end - start # add to complete dataset (for atomistic methods optimizer replay) if self.store_complete_dataset: self.complete_dataset.append(new_data) else: self.complete_dataset = [new_data] # before adding to parent (training) dataset, convert to top k forces if applicable if self.train_on_top_k_forces is not None: [training_data ] = convert_to_top_k_forces([new_data], self.train_on_top_k_forces) else: training_data = new_data # add to parent dataset (for training) and return partial dataset (for partial fit) partial_dataset = self.add_to_dataset(training_data) self.parent_calls += 1 start = time.time() # retrain the ml potential only if there is more than enough data that the ml potential may be used if len(self.parent_dataset) > self.num_initial_points: # if training only on recent points, and have trained before, then check if dataset has become long enough to train on subset if (self.trained_at_least_once and (self.train_on_recent_points is not None) and (len(self.parent_dataset) > self.train_on_recent_points)): self.ml_potential.train( self.parent_dataset[-self.train_on_recent_points:]) # otherwise, if partial fitting, partial fit if not training for the first time elif (self.trained_at_least_once and (self.train_on_recent_points is None) and (self.partial_fit)): self.ml_potential.train(self.parent_dataset, partial_dataset) # otherwise just train as normal else: self.ml_potential.train(self.parent_dataset) self.trained_at_least_once = True # if the data requirement has just been met: train for the first time on only the initial points to keep elif len(self.parent_dataset) == self.num_initial_points: new_parent_dataset = [ self.parent_dataset[i] for i in self.initial_points_to_keep ] self.parent_dataset = new_parent_dataset self.num_initial_points = len(self.parent_dataset) self.ml_potential.train(self.parent_dataset) self.trained_at_least_once = True end = time.time() self.info["training_time"] = end - start # set the energy and force results of the parent calculator and return them energy_actual = new_data.get_potential_energy( apply_constraint=self.constraint) force_actual = new_data.get_forces(apply_constraint=self.constraint) force_cons = new_data.get_forces() return energy_actual, force_actual, force_cons