def init_training_data(self): """ Prepare the training data by attaching delta values for training. """ raw_data = self.parent_dataset sp_raw_data = convert_to_singlepoint(raw_data) parent_ref_image = sp_raw_data[0].copy() base_ref_image = compute_with_calc(sp_raw_data[:1], self.base_calc)[0] self.refs = [parent_ref_image, base_ref_image] self.delta_sub_calc = DeltaCalc(self.calcs, "sub", self.refs) self.ensemble_sets, self.parent_dataset = bootstrap_ensemble( compute_with_calc(sp_raw_data, self.delta_sub_calc))
def init_training_data(self): """ Prepare the training data by attaching delta values for training. """ raw_data = self.training_data sp_raw_data = convert_to_singlepoint(raw_data) parent_ref_image = sp_raw_data[0] base_ref_image = compute_with_calc([parent_ref_image], self.base_calc)[0] self.refs = [parent_ref_image, base_ref_image] self.delta_sub_calc = DeltaCalc(self.calcs, "sub", self.refs) self.training_data = compute_with_calc(sp_raw_data, self.delta_sub_calc)
def do_after_train(self): """ Executes after training the trainer in every active learning loop. """ trainer_calc = self.make_trainer_calc() self.trained_calc = DeltaCalc([trainer_calc, self.base_calc], "add", self.refs) self.atomistic_method.run(calc=self.trained_calc, filename=self.fn_label) self.sample_candidates = list( self.atomistic_method.get_trajectory(filename=self.fn_label) ) final_point_image = [self.sample_candidates[-1]] # print(final_point_image[0].get_positions()) final_point_evA = compute_with_calc(final_point_image, self.parent_calc) self.final_point_force = np.max(np.abs(final_point_evA[0].get_forces())) self.training_data += subtract_deltas( final_point_evA, self.base_calc, self.refs ) self.parent_calls += 1 # final_queries_db = ase.db.connect("final_queried_images.db") random.seed(self.query_seeds[self.iterations - 1] + 1) # write_to_db(final_queries_db, final_point_image) if self.iterations == 0: writer = TrajectoryWriter("final_images.traj", mode="w") writer.write(final_point_image[0]) else: writer = TrajectoryWriter("final_images.traj", mode="a") writer.write(final_point_image[0]) self.terminate = self.check_terminate() self.iterations += 1
def setUpClass(cls) -> None: initial_structure = Icosahedron("Cu", 2) initial_structure.rattle(0.1) initial_structure.set_pbc(True) initial_structure.set_cell([20, 20, 20]) EMT_initial_structure = initial_structure.copy() parent_calc = EMT() cls.emt_counter = CounterCalc(parent_calc) EMT_initial_structure.set_calculator(cls.emt_counter) cls.EMT_structure_optim = Relaxation( EMT_initial_structure, BFGS, fmax=0.01, steps=30 ) cls.EMT_structure_optim.run(cls.emt_counter, "CuNP_emt") offline_initial_structure = compute_with_calc( [initial_structure.copy()], parent_calc )[0] Offline_relaxation = Relaxation( offline_initial_structure, BFGS, fmax=0.01, steps=30, maxstep=0.05 ) cls.offline_learner, cls.trained_calc, cls.Offline_traj = run_offline_al( Offline_relaxation, [offline_initial_structure], "CuNP_offline_al", parent_calc, ) cls.EMT_image = cls.EMT_structure_optim.get_trajectory("CuNP_emt")[-1] cls.EMT_image.set_calculator(parent_calc) cls.offline_final_structure_AL = cls.Offline_traj[-1] cls.offline_final_structure_AL.set_calculator(cls.trained_calc) cls.offline_final_structure_EMT = cls.Offline_traj[-1] cls.offline_final_structure_EMT.set_calculator(parent_calc) cls.description = "CuNP" return super().setUpClass()
def query_data(self): """ Queries data from a list of images. Calculates the properties and adds them to the training data. """ random.seed(self.query_seeds[self.iterations - 1]) random_queried_images, min_force_image = self.query_func() self.training_data += compute_with_calc( random_queried_images, self.delta_sub_calc ) min_image_parent = compute_with_calc([min_force_image], self.parent_calc)[0] self.final_point_force = np.max(np.abs(min_image_parent.get_forces())) self.training_data += subtract_deltas( [min_image_parent], self.base_calc, self.refs )
def add_data(self, queried_images, query_idx): self.new_dataset = compute_with_calc(queried_images, self.delta_sub_calc) self.training_data += self.new_dataset self.parent_calls += len(self.new_dataset) un_delta_new_dataset = [] for image in self.new_dataset: add_delta_calc = DeltaCalc([image.calc, self.base_calc], "add", self.refs) [un_delta_image] = compute_with_calc([image], add_delta_calc) un_delta_new_dataset.append(un_delta_image) for i in range(len(un_delta_new_dataset)): image = un_delta_new_dataset[i] idx = None if query_idx is not None: idx = query_idx[i] energy = image.get_potential_energy(apply_constraint=False) forces = image.get_forces(apply_constraint=False) constrained_forces = image.get_forces() fmax = np.sqrt((constrained_forces**2).sum(axis=1).max()) info = { "check": True, "energy": energy, "forces": forces, "fmax": fmax, "ml_energy": None, "ml_forces": None, "ml_fmax": None, "parent_energy": energy, "parent_forces": forces, "parent_fmax": fmax, "force_uncertainty": image.info.get("max_force_stds", None), "energy_uncertainty": image.info.get("energy_stds", None), "dyn_uncertainty_tol": None, "stat_uncertain_tol": None, "tolerance": None, "parent_calls": self.parent_calls, "trained_on": True, "query_idx": idx, "substep": idx, } self.logger.write(image, info) return un_delta_new_dataset
def query_data(self): """ Queries data from a list of images. Calculates the properties and adds them to the training data. """ random.seed(self.query_seeds[self.iterations - 1]) queried_images = self.query_func() self.training_data += compute_with_calc(queried_images, self.delta_sub_calc)
def init_training_data(self): """ Prepare the training data by attaching delta values for training. """ raw_data = self.training_data sp_raw_data = convert_to_singlepoint(raw_data) parent_ref_image = sp_raw_data[0] base_ref_image = compute_with_calc([parent_ref_image], self.base_calc)[0] self.refs = [parent_ref_image, base_ref_image] self.delta_sub_calc = DeltaCalc(self.calcs, "sub", self.refs) self.training_data = [] for image in sp_raw_data: sp_calc = image.get_calculator() sp_calc.implemented_properties = ["energy", "forces"] sp_delta_calc = DeltaCalc([sp_calc, self.base_calc], "sub", self.refs) self.training_data += compute_with_calc([image], sp_delta_calc)
def query_data(self): """ Queries data from a list of images. Calculates the properties and adds them to the training data. Parameters ---------- sample_candidates: list List of ase atoms objects to query from. """ queried_images = self.query_func() self.training_data += compute_with_calc(queried_images, self.delta_sub_calc)
def init_refs(self, initial_structure): self.parent_ref = initial_structure.copy() self.parent_ref.calc = deepcopy(initial_structure.calc) self.base_ref = compute_with_calc([initial_structure.copy()], self.base_calc)[0] self.refs = [self.parent_ref, self.base_ref] self.add_delta_calc = DeltaCalc( [self.ml_potential, self.base_calc], "add", self.refs, )
def check_final_force(self): final_point_image = [self.sample_candidates[-1]] final_point_evA = compute_with_calc(final_point_image, self.delta_sub_calc) self.final_point_force = final_point_evA[0].info["parent fmax"] print("final point fmax: ", self.final_point_force) # only add the last image to training data if the last image is safe to query if final_point_evA[0].info["parent energy"] < self.initial_image_energy: self.training_data += final_point_evA random.seed(self.query_seeds[self.iterations - 1] + 1) queries_db = ase.db.connect("queried_images.db") parent_E = final_point_evA[0].info["parent energy"] base_E = final_point_evA[0].info["base energy"] write_to_db(queries_db, final_point_evA, "final image", parent_E, base_E) self.parent_calls += 1
def init_refs(self, initial_structure): # TODO: raise error if no organic element is found in structure self.parent_ref = initial_structure.copy() self.parent_ref.calc = deepcopy(initial_structure.calc) self.adsorbate_idx = np.array( [ atom.symbol in set(["C", "H", "O", "N"]) for atom in initial_structure.copy() ] ) self.base_ref = compute_with_calc( [initial_structure.copy()[self.adsorbate_idx]], self.base_calc )[0] self.refs = [self.parent_ref, self.base_ref] self.add_delta_calc = DeltaCalc( [self.ml_potential, self.base_calc], "add", self.refs, )
def quantify_uncertainty(traj, model_calc): parent_images = copy_images(traj) model_images = compute_with_calc(traj, model_calc) initial_energy_diff = (model_images[0].get_potential_energy() - parent_images[0].get_potential_energy()) true_forces = [] predicted_forces = [] force_uncertainties = [] true_energies = [] predicted_energies = [] energy_uncertainties = [] for pi, mi in zip(parent_images, model_images): true_forces.append(np.sqrt((pi.get_forces()**2).sum(axis=1).max())) predicted_forces.append(np.sqrt( (mi.get_forces()**2).sum(axis=1).max())) force_uncertainties.append(mi.info["max_force_stds"]) if math.isnan(force_uncertainties[-1]): raise ValueError("NaN uncertainty") true_energies.append(pi.get_potential_energy()) predicted_energies.append(mi.get_potential_energy() - initial_energy_diff) energy_uncertainties.append(mi.info["energy_stds"]) force_scores = get_all_metrics( np.array(predicted_forces), np.array(force_uncertainties), np.array(true_forces), verbose=False, ) energy_scores = get_all_metrics( np.array(predicted_energies), np.array(energy_uncertainties), np.array(true_energies), verbose=False, ) return force_scores, energy_scores
def init_training_data(self): """ Prepare the training data by attaching delta values for training. """ # setup delta sub calc as defacto parent calc for all queries parent_ref_image = self.atomistic_method.initial_geometry base_ref_image = compute_with_calc([parent_ref_image], self.base_calc)[0] self.refs = [parent_ref_image, base_ref_image] self.delta_sub_calc = DeltaCalc(self.calcs, "sub", self.refs) # move training data into raw data for computing with delta calc raw_data = [] for image in self.training_data: raw_data.append(image) # run a trajectory with no training data: just the base model to sample from self.training_data = [] self.fn_label = f"{self.file_dir}{self.filename}_iter_{self.iterations}" self.do_after_train() # add initial data to training dataset self.add_data(raw_data, None) self.initial_image_energy = self.refs[0].get_potential_energy()
# "logger": True, "single-threaded": True, }, } dbname = "CuNP_oal" trainer = AtomsTrainer(config) checkpoint_path = "/home/jovyan/working/ocp/data/pretrained/s2ef/dimenetpp_2M.pt" model_path = ( "/home/jovyan/working/ocp-dev/configs/s2ef/2M/dimenet_plus_plus/dpp.yml" ) base_calc = OCPModel(model_path=model_path, checkpoint_path=checkpoint_path) # base_initial_structure = initial_structure.copy() base_initial_structure = compute_with_calc([initial_structure.copy()], base_calc)[0] # base_initial_structure.set_calculator(base_calc) delta_calc = DeltaCalc( [parent_calc, base_calc], "sub", [OAL_initial_structure, base_initial_structure], ) ml_potential = AmptorchEnsembleCalc(trainer, learner_params["n_ensembles"]) online_calc = OnlineLearner( learner_params, images, ml_potential, delta_calc,