def setUpClass(cls) -> None: # Set up parent calculator and image environment initial_structure = ase.io.read( "./relaxation_test_structures/Pt-NP.traj") initial_structure.set_calculator(EMT()) # Run relaxation with the parent calc EMT_initial_structure = initial_structure.copy() cls.emt_counter = CounterCalc(EMT()) EMT_initial_structure.set_calculator(cls.emt_counter) cls.EMT_structure_optim = Relaxation(EMT_initial_structure, BFGS, fmax=0.05, steps=100) cls.EMT_structure_optim.run(cls.emt_counter, "PtNP_emt") # Run relaxation with active learning OAL_initial_structure = initial_structure.copy() OAL_initial_structure.set_calculator(EMT()) OAL_relaxation = Relaxation(OAL_initial_structure, BFGS, fmax=0.05, steps=30, maxstep=0.04) cls.OAL_learner, cls.OAL_structure_optim = run_online_al( OAL_relaxation, [OAL_initial_structure], "PtNP_oal", EMT()) # Retain images of the final structure from both relaxations cls.EMT_image = cls.EMT_structure_optim.get_trajectory("PtNP_emt")[-1] cls.EMT_image.set_calculator(EMT()) cls.OAL_image = cls.OAL_structure_optim.get_trajectory("PtNP_oal")[-1] cls.OAL_image.set_calculator(EMT()) cls.description = "PtNP" return super().setUpClass()
def setUpClass(cls) -> None: initial_structure = Icosahedron("Cu", 2) initial_structure.rattle(0.1) initial_structure.set_pbc(True) initial_structure.set_cell([20, 20, 20]) EMT_initial_structure = initial_structure.copy() parent_calc = EMT() cls.emt_counter = CounterCalc(parent_calc) EMT_initial_structure.set_calculator(cls.emt_counter) cls.EMT_structure_optim = Relaxation( EMT_initial_structure, BFGS, fmax=0.01, steps=30 ) cls.EMT_structure_optim.run(cls.emt_counter, "CuNP_emt") offline_initial_structure = compute_with_calc( [initial_structure.copy()], parent_calc )[0] Offline_relaxation = Relaxation( offline_initial_structure, BFGS, fmax=0.01, steps=30, maxstep=0.05 ) cls.offline_learner, cls.trained_calc, cls.Offline_traj = run_offline_al( Offline_relaxation, [offline_initial_structure], "CuNP_offline_al", parent_calc, ) cls.EMT_image = cls.EMT_structure_optim.get_trajectory("CuNP_emt")[-1] cls.EMT_image.set_calculator(parent_calc) cls.offline_final_structure_AL = cls.Offline_traj[-1] cls.offline_final_structure_AL.set_calculator(cls.trained_calc) cls.offline_final_structure_EMT = cls.Offline_traj[-1] cls.offline_final_structure_EMT.set_calculator(parent_calc) cls.description = "CuNP" return super().setUpClass()
def setUpClass(cls) -> None: # Set up parent calculator and image environment initial_structure = Icosahedron("Cu", 2) initial_structure.rattle(0.1) initial_structure.set_pbc(True) initial_structure.set_cell([20, 20, 20]) # Run relaxation with the parent calc EMT_initial_structure = initial_structure.copy() cls.emt_counter = CounterCalc(EMT()) EMT_initial_structure.set_calculator(cls.emt_counter) cls.EMT_structure_optim = Relaxation(EMT_initial_structure, BFGS, fmax=FORCE_THRESHOLD, steps=30) cls.EMT_structure_optim.run(cls.emt_counter, "CuNP_emt") # Run relaxation with active learning chemical_formula = initial_structure.get_chemical_formula() al_config = cls.get_al_config() al_config["links"]["traj"] = "CuNP_emt.traj" cls.oal_results_dict = active_learning(al_config) dbname = (str(al_config["links"]["ml_potential"]) + "_" + str(chemical_formula) + "_oal") cls.OAL_image = Trajectory(dbname + ".traj")[-1] cls.OAL_image.set_calculator(EMT()) # Retain images of the final structure from both relaxations cls.EMT_image = cls.EMT_structure_optim.get_trajectory("CuNP_emt")[-1] cls.EMT_image.set_calculator(EMT()) cls.description = "CuNP" return super().setUpClass()
def run_offlineal(cluster, parent_calc, elements, al_learner_params, config, optimizer): Gs = { "default": { "G2": { "etas": np.logspace(np.log10(0.05), np.log10(5.0), num=4), "rs_s": [0], }, "G4": {"etas": [0.005], "zetas": [1.0, 4.0], "gammas": [1.0, -1.0]}, "cutoff": 6, }, } images = [cluster] al_learner_params["atomistic_method"] = Relaxation( cluster, optimizer, fmax=0.01, steps=100 ) config["dataset"] = { "raw_data": images, "val_split": 0, "elements": elements, "fp_params": Gs, "save_fps": False, "scaling": {"type": "normalize", "range": (-1, 1)}, } config["cmd"] = { "debug": False, "run_dir": "./", "seed": 2, "identifier": "cluster", "verbose": True, # "logger": True, "single-threaded": True, } trainer = AtomsTrainer(config) # base_calc = MultiMorse(images, Gs["default"]["cutoff"], combo="mean") base_calc = EMT() offlinecalc = FmaxLearner( al_learner_params, trainer, images, parent_calc, base_calc ) if os.path.exists("queried_images.db"): os.remove("queried_images.db") offlinecalc.learn() al_iterations = offlinecalc.iterations - 1 file_path = al_learner_params["file_dir"] + al_learner_params["filename"] final_ml_traj = read("{}_iter_{}.traj".format(file_path, al_iterations), ":") relaxed_clus = final_ml_traj[-1] return relaxed_clus, offlinecalc.parent_calls
def setUpClass(cls) -> None: # Set up parent calculator and image environment cls.initial_structure = Icosahedron("Cu", 2) cls.initial_structure.rattle(0.1) cls.initial_structure.set_pbc(True) cls.initial_structure.set_cell([20, 20, 20]) # Run relaxation with the parent calc EMT_initial_structure = cls.initial_structure.copy() cls.emt_counter = CounterCalc(EMT()) EMT_initial_structure.set_calculator(cls.emt_counter) cls.EMT_structure_optim = Relaxation(EMT_initial_structure, BFGS, fmax=FORCE_THRESHOLD, steps=30) cls.EMT_structure_optim.run(cls.emt_counter, "CuNP_emt") # Run relaxation with active learning OAL_initial_structure = cls.initial_structure.copy() OAL_initial_structure.set_calculator(EMT()) OAL_relaxation = Relaxation(OAL_initial_structure, BFGS, fmax=0.05, steps=60, maxstep=0.04) cls.OAL_learner, cls.OAL_structure_optim = run_delta_al( OAL_relaxation, [OAL_initial_structure], ["Cu"], "CuNP_oal", EMT(), ) # Retain images of the final structure from both relaxations cls.EMT_image = cls.EMT_structure_optim.get_trajectory("CuNP_emt")[-1] cls.EMT_image.set_calculator(EMT()) cls.OAL_image = cls.OAL_structure_optim.get_trajectory("CuNP_oal")[-1] cls.OAL_image.set_calculator(EMT()) cls.description = "CuNP" return super().setUpClass()
def run_relaxation( oal_initial_structure, config, learner, dbname, mongo_db, ): do_between_learner_and_run(learner, mongo_db) optimizer_str = config["relaxation"].get("optimizer", "BFGS") if optimizer_str == "BFGS": optimizer_alg = BFGS replay_method = config["relaxation"]["replay_method"] maxstep = config["relaxation"]["maxstep"] elif optimizer_str == "CG": optimizer_alg = SciPyFminCG replay_method = False maxstep = None else: ValueError("Invalid optimizer name (" + optimizer_str + ") provided") oal_relaxation = Relaxation( oal_initial_structure, optimizer_alg, fmax=config["relaxation"]["fmax"], steps=config["relaxation"]["steps"], maxstep=maxstep, ) oal_relaxation.run( learner, filename=dbname, replay_traj=replay_method, max_parent_calls=config["relaxation"]["max_parent_calls"], online_ml_fmax=config["learner"]["fmax_verify_threshold"], check_final=config["relaxation"].get("check_final", False), ) return oal_relaxation
def init_learner(self): """ Initializes learner, before training loop. """ self.iterations = 0 self.parent_calls = 0 self.terminate = False atomistic_method = self.learner_params.get("atomistic_method") if type(atomistic_method) is Relaxation: self.atomistic_method = atomistic_method elif type(atomistic_method) is dict: self.atomistic_method = Relaxation( initial_geometry=Trajectory( self.learner_params.get("atomistic_method").get( "initial_traj"))[0], optimizer=BFGS, fmax=self.learner_params.get("atomistic_method", {}).get("fmax", 0.03), steps=self.learner_params.get("atomistic_method", {}).get("steps", 2000), maxstep=self.learner_params.get("atomistic_method", {}).get("maxstep", 0.04), ) else: raise TypeError( "Passed in config without an atomistic method Relaxation object or dictionary" ) self.max_iterations = self.learner_params.get("max_iterations", 20) self.samples_to_retrain = self.learner_params.get( "samples_to_retrain", 1) self.filename = self.learner_params.get("filename", "relax_example") self.file_dir = self.learner_params.get("file_dir", "./") self.seed = self.learner_params.get("seed", random.randint(0, 100000)) random.seed(self.seed) self.query_seeds = random.sample(range(100000), self.max_iterations)
}, } trainer = AtomsTrainer(config) # building base morse calculator as base calculator cutoff = Gs["default"]["cutoff"] base_calc = MultiMorse(images, cutoff, combo="mean") # define learner_params OfflineActiveLearner learner_params = { "atomistic_method": Relaxation(initial_geometry=slab.copy(), optimizer=BFGS, fmax=0.01, steps=100), "max_iterations": 10, "samples_to_retrain": 2, "filename": "example", "file_dir": "./", "use_dask": False, } learner = OfflineActiveLearner(learner_params, trainer, images, parent_calc, base_calc)
# Set up parent calculator and image environment initial_structure = Icosahedron("Cu", 2) initial_structure.rattle(0.1) initial_structure.set_pbc(True) initial_structure.set_cell([20, 20, 20]) images = [] elements = ["Cu"] parent_calc = EMT() # Run relaxation with active learning OAL_initial_structure = initial_structure.copy() OAL_initial_structure.set_calculator(copy.deepcopy(parent_calc)) OAL_relaxation = Relaxation(OAL_initial_structure, BFGS, fmax=0.05, steps=200, maxstep=0.04) Gs = { "default": { "G2": { "etas": np.logspace(np.log10(0.05), np.log10(5.0), num=4), "rs_s": [0], }, "G4": { "etas": [0.005], "zetas": [1.0, 4.0], "gammas": [1.0, -1.0] }, "cutoff": 6,
"verbose": True, "logger": False, }, } trainer = AtomsTrainer(config) # building base morse calculator as base calculator cutoff = Gs["default"]["cutoff"] base_calc = MultiMorse(images, cutoff, combo="mean") learner_params = { "atomistic_method": Relaxation(initial_geometry=slab.copy(), optimizer=BFGS, fmax=0.01, steps=100), "max_iterations": 10, "samples_to_retrain": 2, "filename": "relax_example", "file_dir": "./", "query_method": "random", "use_dask": True, "seed": 1,
import ase from al_mlp.atomistic_methods import Relaxation from ase.optimize import BFGS from ase.calculators.emt import EMT initial_db = ase.io.read("Pt-init-images.db", ":") slab = initial_db[1] true_relax = Relaxation(slab, BFGS) true_relax.run(EMT(), "true_relax")
parent_calc = Vasp(**all_params["vasp"]) # declare ml calc ml_potential = FlarePPCalc(all_params["flare"], [initial_structure] + images) # declare base calc base_calc = OCPModel( model_path=all_params["ocp"]["model_path"], checkpoint_path=all_params["ocp"]["checkpoint_path"], ) # declare relaxation object oal_relaxation = Relaxation( oal_initial_structure, all_params["relaxation"]["optimizer"], fmax=all_params["relaxation"]["fmax"], steps=all_params["relaxation"]["steps"], maxstep=all_params["relaxation"]["maxstep"], ) # declare offline learner learner = OfflineActiveLearner( all_params["learner"], [oal_initial_structure] + images, ml_potential, parent_calc, base_calc, mongo_db=mongo_db, ) if os.path.exists("dft_calls.db"):
images, ml_potential, parent_calc, mongo_db=mongo_db, ) if os.path.exists("dft_calls.db"): os.remove("dft_calls.db") if mongo_db is not None: with open("runid.txt", "a") as f: f.write(str(learner.mongo_wrapper.run_id) + "\n") oal_relaxation = Relaxation( oal_initial_structure, all_params["relaxation"]["optimizer"], fmax=all_params["relaxation"]["fmax"], steps=all_params["relaxation"]["steps"], maxstep=all_params["relaxation"]["maxstep"], ) oal_relaxation.run( learner, filename=dbname, replay_traj=True, max_parent_calls=all_params["relaxation"]["max_parent_calls"], ) if hasattr(parent_calc, "close"): parent_calc.close()
import ase.io from al_mlp.tests.test_setup.online_relaxation_test import run_oal from al_mlp.atomistic_methods import Relaxation from ase.calculators.emt import EMT from ase.optimize import BFGS import numpy as np # Set up parent calculator and image environment initial_structure = ase.io.read("./relaxation_test_structures/Pt-NP.traj") initial_structure.set_calculator(EMT()) EMT_structure_optim = Relaxation(initial_structure, BFGS, fmax=0.05, steps=100) EMT_structure_optim.run(EMT(), "PtNP_emt") run_oal = run_oal(initial_structure) def oal_PtNP_energy(): assert np.allclose(EMT_structure_optim.get_potential_energy(), run_oal.get_potential_energy())
"identifier": "test", "verbose": True, "logger": False, }, } trainer = AtomsTrainer(config) trainer_calc = AMPtorch # building base morse calculator as base calculator cutoff = Gs["default"]["cutoff"] base_calc = MultiMorse(images, cutoff, combo="mean") learner_params = { "atomistic_method": Relaxation( initial_geometry=slab.copy(), optimizer=BFGS, fmax=0.01, steps=50 ), "max_iterations": 10, "samples_to_retrain": 5, "filename":"relax_example", "file_dir":"./", "query_method":"max_uncertainty", "use_dask":False, "atomistic_method":Relaxation } learner = EnsembleLearner(learner_params, trainer, images, parent_calc, base_calc,ensemble=3) learner.learn()
class OfflineActiveLearner: """Offline Active Learner. This class serves as a parent class to inherit more sophisticated learners with different query and termination strategies. Parameters ---------- learner_params: dict Dictionary of learner parameters and settings. ml_potential: ase Calculator object An instance of an ml_potential calculator that has a train and predict method. training_data: list A list of ase.Atoms objects that have attached calculators. Used as the first set of training data. parent_calc: ase Calculator object Calculator used for querying training data. base_calc: ase Calculator object Calculator used to calculate delta data for training. """ def __init__( self, learner_params, training_data, ml_potential, parent_calc, base_calc, mongo_db=None, optional_config=None, ): self.learner_params = learner_params self.ml_potential = ml_potential self.training_data = training_data self.parent_calc = parent_calc self.base_calc = base_calc self.calcs = [parent_calc, base_calc] if mongo_db is None: mongo_db = {"offline_learner": None} self.logger = Logger( learner_params=learner_params, ml_potential=ml_potential, parent_calc=parent_calc, base_calc=base_calc, mongo_db_collection=mongo_db["offline_learner"], optional_config=optional_config, ) self.init_learner() self.init_training_data() def init_learner(self): """ Initializes learner, before training loop. """ self.iterations = 0 self.parent_calls = 0 self.terminate = False atomistic_method = self.learner_params.get("atomistic_method") if type(atomistic_method) is Relaxation: self.atomistic_method = atomistic_method elif type(atomistic_method) is dict: self.atomistic_method = Relaxation( initial_geometry=Trajectory( self.learner_params.get("atomistic_method").get( "initial_traj"))[0], optimizer=BFGS, fmax=self.learner_params.get("atomistic_method", {}).get("fmax", 0.03), steps=self.learner_params.get("atomistic_method", {}).get("steps", 2000), maxstep=self.learner_params.get("atomistic_method", {}).get("maxstep", 0.04), ) else: raise TypeError( "Passed in config without an atomistic method Relaxation object or dictionary" ) self.max_iterations = self.learner_params.get("max_iterations", 20) self.samples_to_retrain = self.learner_params.get( "samples_to_retrain", 1) self.filename = self.learner_params.get("filename", "relax_example") self.file_dir = self.learner_params.get("file_dir", "./") self.seed = self.learner_params.get("seed", random.randint(0, 100000)) random.seed(self.seed) self.query_seeds = random.sample(range(100000), self.max_iterations) def init_training_data(self): """ Prepare the training data by attaching delta values for training. """ # setup delta sub calc as defacto parent calc for all queries parent_ref_image = self.atomistic_method.initial_geometry base_ref_image = compute_with_calc([parent_ref_image], self.base_calc)[0] self.refs = [parent_ref_image, base_ref_image] self.delta_sub_calc = DeltaCalc(self.calcs, "sub", self.refs) # move training data into raw data for computing with delta calc raw_data = [] for image in self.training_data: raw_data.append(image) # run a trajectory with no training data: just the base model to sample from self.training_data = [] self.fn_label = f"{self.file_dir}{self.filename}_iter_{self.iterations}" self.do_after_train() # add initial data to training dataset self.add_data(raw_data, None) self.initial_image_energy = self.refs[0].get_potential_energy() def learn(self): """ Conduct offline active learning. Parameters ---------- atomistic_method: object Define relaxation parameters and starting image. """ while not self.terminate: self.do_before_train() self.do_train() self.do_after_train() self.do_after_learn() def do_before_train(self): """ Executes before training the ml_potential in every active learning loop. """ self.query_data() self.fn_label = f"{self.file_dir}{self.filename}_iter_{self.iterations}" def do_train(self): """ Executes the training of ml_potential """ self.ml_potential.train(self.training_data) def do_after_train(self): """ Executes after training the ml_potential in every active learning loop. """ ml_potential = self.make_trainer_calc() self.trained_calc = DeltaCalc([ml_potential, self.base_calc], "add", self.refs) self.atomistic_method.run(calc=self.trained_calc, filename=self.fn_label) self.sample_candidates = list( self.atomistic_method.get_trajectory(filename=self.fn_label)) substep = 0 for image in self.sample_candidates: energy = image.get_potential_energy(apply_constraint=False) forces = image.get_forces(apply_constraint=False) constrained_forces = image.get_forces() fmax = np.sqrt((constrained_forces**2).sum(axis=1).max()) info = { "check": False, "energy": energy, "forces": forces, "fmax": fmax, "ml_energy": energy, "ml_forces": forces, "ml_fmax": fmax, "parent_energy": None, "parent_forces": None, "parent_fmax": None, "force_uncertainty": image.info.get("max_force_stds", None), "energy_uncertainty": image.info.get("energy_stds", None), "dyn_uncertainty_tol": None, "stat_uncertain_tol": None, "tolerance": None, "parent_calls": self.parent_calls, "trained_on": False, "query_idx": None, "substep": substep, } substep += 1 self.logger.write(image, info) self.terminate = self.check_terminate() self.iterations += 1 def do_after_learn(self): """ Executes after active learning loop terminates. """ pass def query_data(self): """ Queries data from a list of images. Calculates the properties and adds them to the training data. """ random.seed(self.query_seeds[self.iterations - 1]) queried_images, query_idx = self.query_func() self.add_data(queried_images, query_idx) def add_data(self, queried_images, query_idx): self.new_dataset = compute_with_calc(queried_images, self.delta_sub_calc) self.training_data += self.new_dataset self.parent_calls += len(self.new_dataset) un_delta_new_dataset = [] for image in self.new_dataset: add_delta_calc = DeltaCalc([image.calc, self.base_calc], "add", self.refs) [un_delta_image] = compute_with_calc([image], add_delta_calc) un_delta_new_dataset.append(un_delta_image) for i in range(len(un_delta_new_dataset)): image = un_delta_new_dataset[i] idx = None if query_idx is not None: idx = query_idx[i] energy = image.get_potential_energy(apply_constraint=False) forces = image.get_forces(apply_constraint=False) constrained_forces = image.get_forces() fmax = np.sqrt((constrained_forces**2).sum(axis=1).max()) info = { "check": True, "energy": energy, "forces": forces, "fmax": fmax, "ml_energy": None, "ml_forces": None, "ml_fmax": None, "parent_energy": energy, "parent_forces": forces, "parent_fmax": fmax, "force_uncertainty": image.info.get("max_force_stds", None), "energy_uncertainty": image.info.get("energy_stds", None), "dyn_uncertainty_tol": None, "stat_uncertain_tol": None, "tolerance": None, "parent_calls": self.parent_calls, "trained_on": True, "query_idx": idx, "substep": idx, } self.logger.write(image, info) return un_delta_new_dataset def check_terminate(self): """ Default termination function. """ final_image = self.sample_candidates[-1] query_idx = len(self.sample_candidates) - 1 final_image = self.add_data([final_image], [query_idx])[0] max_force = np.sqrt((final_image.get_forces()**2).sum(axis=1).max()) terminate = False if max_force <= self.atomistic_method.fmax: terminate = True print("Final image check with parent calc: " + str(terminate) + ", energy: " + str(final_image.get_potential_energy()) + ", max force: " + str(max_force)) if self.iterations >= self.max_iterations: return True return terminate def query_func(self): """ Default random query strategy. """ if self.samples_to_retrain < 2 and self.training_data == 0: query_idx = random.sample( range(1, len(self.sample_candidates)), 2, ) else: query_idx = random.sample( range(1, len(self.sample_candidates)), self.samples_to_retrain, ) queried_images = [self.sample_candidates[idx] for idx in query_idx] return queried_images, query_idx def make_trainer_calc(self, ml_potential=None): """ Default ml_potential calc after train. Assumes ml_potential has a 'get_calc' method. If ml_potential is passed in, it will get its calculator instead """ if len(self.training_data) == 0: return Dummy() if ml_potential is None: ml_potential = self.ml_potential if not isinstance(ml_potential, Calculator): calc = ml_potential.get_calc() else: calc = ml_potential return calc
"dyn_avg_steps": 15, "query_every_n_steps": 100, "num_initial_points": 0, "initial_points_to_keep": [], "fmax_verify_threshold": 0.03, "tolerance_selection": "min", "partial_fit": True, }, parent_dataset=[], ml_potential=ml_potential, parent_calc=parent_calc, mongo_db=None, optional_config=None, ) relaxer = Relaxation(initial_geometry=traj[0], optimizer=BFGS, fmax=0.03, steps=None, maxstep=0.2) relaxer.run( calc=learner, filename="online_learner_trajectory", replay_traj="parent_only", max_parent_calls=None, check_final=False, online_ml_fmax=learner.fmax_verify_threshold, ) print("done!")
"seed": 1, "identifier": "test", "verbose": True, # "logger": True, "single-threaded": False, }, } cutoff = Gs["default"]["cutoff"] parent_calc = EMT() trainer = AtomsTrainer(config) trainer_calc = AMPtorch # base_calc = MultiMorse(images, cutoff, combo="mean") base_calc = Dummy(images) onlinecalc = OnlineActiveLearner( learner_params, trainer, images, parent_calc, base_calc, # trainer_calc, n_ensembles=num_workers, n_cores="max", ) structure_optim = Relaxation(images[0], BFGS, fmax=0.05, steps=100) if os.path.exists("dft_calls.db"): os.remove("dft_calls.db") structure_optim.run(onlinecalc, filename="relax_oal")