def init_refs(self, initial_structure): self.parent_ref = initial_structure.copy() self.parent_ref.calc = deepcopy(initial_structure.calc) self.base_ref = compute_with_calc([initial_structure.copy()], self.base_calc)[0] self.refs = [self.parent_ref, self.base_ref] self.add_delta_calc = DeltaCalc( [self.ml_potential, self.base_calc], "add", self.refs, )
def do_after_train(self): """ Executes after training the trainer in every active learning loop. """ trainer_calc = self.make_trainer_calc() self.trained_calc = DeltaCalc([trainer_calc, self.base_calc], "add", self.refs) self.atomistic_method.run(calc=self.trained_calc, filename=self.fn_label) self.sample_candidates = list( self.atomistic_method.get_trajectory(filename=self.fn_label) ) final_point_image = [self.sample_candidates[-1]] # print(final_point_image[0].get_positions()) final_point_evA = compute_with_calc(final_point_image, self.parent_calc) self.final_point_force = np.max(np.abs(final_point_evA[0].get_forces())) self.training_data += subtract_deltas( final_point_evA, self.base_calc, self.refs ) self.parent_calls += 1 # final_queries_db = ase.db.connect("final_queried_images.db") random.seed(self.query_seeds[self.iterations - 1] + 1) # write_to_db(final_queries_db, final_point_image) if self.iterations == 0: writer = TrajectoryWriter("final_images.traj", mode="w") writer.write(final_point_image[0]) else: writer = TrajectoryWriter("final_images.traj", mode="a") writer.write(final_point_image[0]) self.terminate = self.check_terminate() self.iterations += 1
def ensemble_train_trainer(self, dataset): trainer = copy.deepcopy(self.trainer) trainer.train(dataset) trainer_calc = self.make_trainer_calc(trainer) trained_calc = DeltaCalc([trainer_calc, self.base_calc], "add", self.refs) return trained_calc
def subtract_deltas(images, base_calc, refs): """ Produces the delta values of the image with precalculated values. This function is intended to be used by images that have precalculated forces and energies using the parent calc, that are attached to the image via a singlepoint calculator. This avoids having to recalculate results by a costly parent calc. Parameters ---------- images: list List of ase atoms images to be calculated. Images should have singlepoint calculators with results. base_calc: ase Calculator object Calculator used as the baseline for taking delta subtraction. refs: list List of two images, they have results from parent and base calc respectively """ images = copy_images(images) for image in images: parent_calc_sp = image.calc delta_sub_calc = DeltaCalc([parent_calc_sp, base_calc], "sub", refs) image.set_calculator(delta_sub_calc) return convert_to_singlepoint(images)
def make_ensemble(self): trained_calcs = [] for dataset in self.ensemble_sets: self.trainer.train(dataset) trainer_calc = self.make_trainer_calc() trained_calcs.append( DeltaCalc([trainer_calc, self.base_calc], "add", self.refs)) ensemble_calc = EnsembleCalc(trained_calcs, self.trainer) self.trained_calc = ensemble_calc
def init_training_data(self): """ Prepare the training data by attaching delta values for training. """ raw_data = self.training_data sp_raw_data = convert_to_singlepoint(raw_data) parent_ref_image = sp_raw_data[0] base_ref_image = compute_with_calc([parent_ref_image], self.base_calc)[0] self.refs = [parent_ref_image, base_ref_image] self.delta_sub_calc = DeltaCalc(self.calcs, "sub", self.refs) self.training_data = [] for image in sp_raw_data: sp_calc = image.get_calculator() sp_calc.implemented_properties = ["energy", "forces"] sp_delta_calc = DeltaCalc([sp_calc, self.base_calc], "sub", self.refs) self.training_data += compute_with_calc([image], sp_delta_calc)
def init_refs(self, initial_structure): # TODO: raise error if no organic element is found in structure self.parent_ref = initial_structure.copy() self.parent_ref.calc = deepcopy(initial_structure.calc) self.adsorbate_idx = np.array( [ atom.symbol in set(["C", "H", "O", "N"]) for atom in initial_structure.copy() ] ) self.base_ref = compute_with_calc( [initial_structure.copy()[self.adsorbate_idx]], self.base_calc )[0] self.refs = [self.parent_ref, self.base_ref] self.add_delta_calc = DeltaCalc( [self.ml_potential, self.base_calc], "add", self.refs, )
def init_training_data(self): """ Prepare the training data by attaching delta values for training. """ raw_data = self.parent_dataset sp_raw_data = convert_to_singlepoint(raw_data) parent_ref_image = sp_raw_data[0].copy() base_ref_image = compute_with_calc(sp_raw_data[:1], self.base_calc)[0] self.refs = [parent_ref_image, base_ref_image] self.delta_sub_calc = DeltaCalc(self.calcs, "sub", self.refs) self.ensemble_sets, self.parent_dataset = bootstrap_ensemble( compute_with_calc(sp_raw_data, self.delta_sub_calc))
def init_training_data(self): """ Prepare the training data by attaching delta values for training. """ raw_data = self.training_data sp_raw_data = convert_to_singlepoint(raw_data) parent_ref_image = sp_raw_data[0] base_ref_image = compute_with_calc([parent_ref_image], self.base_calc)[0] self.refs = [parent_ref_image, base_ref_image] self.delta_sub_calc = DeltaCalc(self.calcs, "sub", self.refs) self.training_data = compute_with_calc(sp_raw_data, self.delta_sub_calc)
def do_after_train(self): """ Executes after training the trainer in every active learning loop. """ trainer_calc = self.make_trainer_calc() self.trained_calc = DeltaCalc([trainer_calc, self.base_calc], "add", self.refs) self.atomistic_method.run(calc=self.trained_calc, filename=self.fn_label) self.sample_candidates = list( self.atomistic_method.get_trajectory(filename=self.fn_label)) self.terminate = self.check_terminate() self.iterations += 1
def add_data(self, queried_images, query_idx): self.new_dataset = compute_with_calc(queried_images, self.delta_sub_calc) self.training_data += self.new_dataset self.parent_calls += len(self.new_dataset) un_delta_new_dataset = [] for image in self.new_dataset: add_delta_calc = DeltaCalc([image.calc, self.base_calc], "add", self.refs) [un_delta_image] = compute_with_calc([image], add_delta_calc) un_delta_new_dataset.append(un_delta_image) for i in range(len(un_delta_new_dataset)): image = un_delta_new_dataset[i] idx = None if query_idx is not None: idx = query_idx[i] energy = image.get_potential_energy(apply_constraint=False) forces = image.get_forces(apply_constraint=False) constrained_forces = image.get_forces() fmax = np.sqrt((constrained_forces**2).sum(axis=1).max()) info = { "check": True, "energy": energy, "forces": forces, "fmax": fmax, "ml_energy": None, "ml_forces": None, "ml_fmax": None, "parent_energy": energy, "parent_forces": forces, "parent_fmax": fmax, "force_uncertainty": image.info.get("max_force_stds", None), "energy_uncertainty": image.info.get("energy_stds", None), "dyn_uncertainty_tol": None, "stat_uncertain_tol": None, "tolerance": None, "parent_calls": self.parent_calls, "trained_on": True, "query_idx": idx, "substep": idx, } self.logger.write(image, info) return un_delta_new_dataset
def do_after_train(self): """ Executes after training the ml_potential in every active learning loop. """ ml_potential = self.make_trainer_calc() self.trained_calc = DeltaCalc([ml_potential, self.base_calc], "add", self.refs) self.atomistic_method.run(calc=self.trained_calc, filename=self.fn_label) self.sample_candidates = list( self.atomistic_method.get_trajectory(filename=self.fn_label)) substep = 0 for image in self.sample_candidates: energy = image.get_potential_energy(apply_constraint=False) forces = image.get_forces(apply_constraint=False) constrained_forces = image.get_forces() fmax = np.sqrt((constrained_forces**2).sum(axis=1).max()) info = { "check": False, "energy": energy, "forces": forces, "fmax": fmax, "ml_energy": energy, "ml_forces": forces, "ml_fmax": fmax, "parent_energy": None, "parent_forces": None, "parent_fmax": None, "force_uncertainty": image.info.get("max_force_stds", None), "energy_uncertainty": image.info.get("energy_stds", None), "dyn_uncertainty_tol": None, "stat_uncertain_tol": None, "tolerance": None, "parent_calls": self.parent_calls, "trained_on": False, "query_idx": None, "substep": substep, } substep += 1 self.logger.write(image, info) self.terminate = self.check_terminate() self.iterations += 1
def init_training_data(self): """ Prepare the training data by attaching delta values for training. """ # setup delta sub calc as defacto parent calc for all queries parent_ref_image = self.atomistic_method.initial_geometry base_ref_image = compute_with_calc([parent_ref_image], self.base_calc)[0] self.refs = [parent_ref_image, base_ref_image] self.delta_sub_calc = DeltaCalc(self.calcs, "sub", self.refs) # move training data into raw data for computing with delta calc raw_data = [] for image in self.training_data: raw_data.append(image) # run a trajectory with no training data: just the base model to sample from self.training_data = [] self.fn_label = f"{self.file_dir}{self.filename}_iter_{self.iterations}" self.do_after_train() # add initial data to training dataset self.add_data(raw_data, None) self.initial_image_energy = self.refs[0].get_potential_energy()
class InterfaceLearner(OnlineLearner): def __init__( self, learner_params, parent_dataset, ml_potential, parent_calc, base_calc, mongo_db=None, optional_config=None, ): self.base_calc = base_calc self.refs = None OnlineLearner.__init__( self, learner_params, parent_dataset, ml_potential, parent_calc, mongo_db=mongo_db, optional_config=optional_config, ) def init_logger(self, mongo_db, optional_config): self.logger = Logger( learner_params=self.learner_params, ml_potential=self.ml_potential, parent_calc=self.parent_calc, base_calc=self.base_calc, mongo_db_collection=mongo_db["online_learner"], optional_config=optional_config, ) def init_refs(self, initial_structure): # TODO: raise error if no organic element is found in structure self.parent_ref = initial_structure.copy() self.parent_ref.calc = deepcopy(initial_structure.calc) self.adsorbate_idx = np.array( [ atom.symbol in set(["C", "H", "O", "N"]) for atom in initial_structure.copy() ] ) self.base_ref = compute_with_calc( [initial_structure.copy()[self.adsorbate_idx]], self.base_calc )[0] self.refs = [self.parent_ref, self.base_ref] self.add_delta_calc = DeltaCalc( [self.ml_potential, self.base_calc], "add", self.refs, ) def get_ml_calc(self): self.ml_potential.reset() self.add_delta_calc.reset() return self.add_delta_calc def get_ml_prediction(self, atoms): """ Helper function which takes an atoms object with no calc attached. Makes an Ml prediction. Performs a delta add operation since the ML model was trained on delta sub data. Returns it with a delta ML potential predicted singlepoint. Designed to be overwritten by DeltaLearner which needs to modify ML predictions. """ atoms_copy = atoms.copy() atoms_copy.set_calculator(self.ml_potential) (atoms_with_info,) = convert_to_singlepoint([atoms_copy]) atoms_copy.set_calculator(self.add_delta_calc) (atoms_ML,) = convert_to_singlepoint([atoms_copy]) for key, value in atoms_with_info.info.items(): atoms_ML.info[key] = value return atoms_ML def add_to_dataset(self, new_data): """ Helper function which takes an atoms object with parent singlepoint attached. Performs a delta sub operation on the parent data so that the ML model will train on delta sub data. And adds new parent data to the training set. Returns the partial dataset just added. Designed to overwritten by DeltaLearner which needs to modify data added to training set. """ if self.refs is None: self.init_refs(new_data) (delta_sub_data,) = subtract_deltas([new_data], self.base_calc, self.refs) partial_dataset = [delta_sub_data] self.parent_dataset += partial_dataset return partial_dataset
def do_train(self): self.ensemble_calc = EnsembleCalc.make_ensemble( self.ensemble_sets, self.trainer) self.trained_calc = DeltaCalc([self.ensemble_calc, self.base_calc], "add", self.refs)
trainer = AtomsTrainer(config) checkpoint_path = "/home/jovyan/working/ocp/data/pretrained/s2ef/dimenetpp_2M.pt" model_path = ( "/home/jovyan/working/ocp-dev/configs/s2ef/2M/dimenet_plus_plus/dpp.yml" ) base_calc = OCPModel(model_path=model_path, checkpoint_path=checkpoint_path) # base_initial_structure = initial_structure.copy() base_initial_structure = compute_with_calc([initial_structure.copy()], base_calc)[0] # base_initial_structure.set_calculator(base_calc) delta_calc = DeltaCalc( [parent_calc, base_calc], "sub", [OAL_initial_structure, base_initial_structure], ) ml_potential = AmptorchEnsembleCalc(trainer, learner_params["n_ensembles"]) online_calc = OnlineLearner( learner_params, images, ml_potential, delta_calc, ) real_calc = DeltaCalc( [online_calc, base_calc], "add",
"rs_s": [0], }, "G4": { "etas": [0.005], "zetas": [1.0, 4.0], "gammas": [1.0, -1.0] }, "cutoff": 6, }, } # create image with base calculator attached cutoff = Gs["default"]["cutoff"] base_calc = MultiMorse(images, cutoff, combo="mean") slab.set_calculator(base_calc) #add delta_calc = DeltaCalc([parent_calculator, base_calc], "add", [slab, slab]) #Set slab calculator to delta calc and evaluate energy slab.set_calculator(delta_calc) add_energy = slab.get_potential_energy() #Sub delta_calc = DeltaCalc([parent_calculator, base_calc], "sub", [slab, slab]) #Set slab calculator to delta calc and evaluate energy slab.set_calculator(delta_calc) sub_energy = slab.get_potential_energy() def test_deltaCalc(): assert add_energy == sub_energy