def get_energy_metrics(config): trainer = AtomsTrainer(config) trainer.train() predictions = trainer.predict(images) pred_energies = np.array(predictions["energy"]) mae = np.mean(np.abs(true_energies - pred_energies)) assert mae < 0.02
def train_model(train_list, test_list, descriptor_set, trial_num, log_filename): Gs = construct_parameter_set(descriptor_set, log_filename = log_filename) # elements = ["Cu", "C", "O"] elements = ["H","O","C"] config = { "model": {"name":"bpnn", "get_forces": False, "num_layers": 3, "num_nodes": 50, #"elementwise":False, "batchnorm": True}, "optim": { "gpus":0, #"force_coefficient": 0.04, "force_coefficient": 0.0, "lr": 1e-3, "batch_size": 256, "epochs": 5000, "loss": "mae", #"scheduler": {"policy": "StepLR", "params": {"step_size": 1000, "gamma": 0.5}}, }, "dataset": { "raw_data": train_list, "val_split": 0.2, "elements": elements, "fp_scheme": "gaussian", "fp_params": Gs, "save_fps": False, "scaling": {"type": "normalize", "range": (0, 1)} }, "cmd": { "debug": False, "run_dir": "./", "seed": trial_num, "identifier": "test", "verbose": True, "logger": False, }, } trainer = AtomsTrainer(config) trainer.train() test_model(trainer, train_list, data_type = "train", log_filename = log_filename) test_model(trainer, test_list, data_type = "test", log_filename = log_filename) return
def get_force_metrics(config): trainer = AtomsTrainer(config) trainer.train() predictions = trainer.predict(images) pred_energies = np.array(predictions["energy"]) pred_forces = np.concatenate(np.array(predictions["forces"])) e_mae = np.mean(np.abs(true_energies - pred_energies)) f_mae = np.mean(np.abs(pred_forces - true_forces)) assert e_mae < 0.06 assert f_mae < 0.06
def get_performance_metrics(config): trainer = AtomsTrainer(config) trainer.train() predictions = trainer.predict(images) pred_energies = np.array(predictions["energy"]) pred_forces = np.concatenate(np.array(predictions["forces"])) e_mae = np.mean(np.abs(true_energies - pred_energies)) f_mae = np.mean(np.abs(pred_forces - true_forces)) assert e_mae < 0.01, "%f !< .01" % e_mae assert f_mae < 0.03, "%f !< .03" % f_mae return e_mae, f_mae
def train_and_combine(args_list): """ method for training trainer on ensemble sets, then create neural net calc, returns trained calc """ training_dataset = args_list[0] trainer = args_list[1] trainer.train(raw_data=training_dataset) check_path = trainer.cp_dir trainer = AtomsTrainer() trainer.load_pretrained(checkpoint_path=check_path) trainer_calc = trainer.get_calc() return trainer_calc
def train(self, parent_dataset, new_dataset=None): """ Uses Dask to parallelize, must have previously set up cluster, image to use, and pool of workers """ ensemble_sets, parent_dataset = non_bootstrap_ensemble( parent_dataset, n_ensembles=self.n_ensembles) def train_and_combine(args_list): """ method for training trainer on ensemble sets, then create neural net calc, returns trained calc """ training_dataset = args_list[0] trainer = args_list[1] trainer.train(raw_data=training_dataset) check_path = trainer.cp_dir trainer = AtomsTrainer() trainer.load_pretrained(checkpoint_path=check_path) trainer_calc = trainer.get_calc() return trainer_calc # split ensemble sets into separate args_lists, clone: trainer, # base calc and add to args_lists, add: refs to args_lists args_lists = [] random.seed(self.amptorch_trainer.config["cmd"]["seed"]) randomlist = [random.randint(0, 4294967295) for set in ensemble_sets] for i in range(len(ensemble_sets)): ensemble_set = ensemble_sets[i] copy_config = copy.deepcopy(self.amptorch_trainer.config) copy_config["cmd"]["seed"] = randomlist[i] copy_config["cmd"]["identifier"] = copy_config["cmd"][ "identifier"] + str(uuid.uuid4()) trainer_copy = AtomsTrainer(copy_config) args_lists.append((ensemble_set, trainer_copy)) # map training method, returns array of delta calcs trained_calcs = [] if self.executor is not None: futures = [] for args_list in args_lists: big_future = self.executor.scatter(args_list) futures.append( self.executor.submit(train_and_combine, big_future)) trained_calcs = [future.result() for future in futures] else: for args_list in args_lists: trained_calcs.append(train_and_combine(args_list)) # call init to construct ensemble calc from array of delta calcs self.trained_calcs = trained_calcs
def run_offlineal(cluster, parent_calc, elements, al_learner_params, config, optimizer): Gs = { "default": { "G2": { "etas": np.logspace(np.log10(0.05), np.log10(5.0), num=4), "rs_s": [0], }, "G4": {"etas": [0.005], "zetas": [1.0, 4.0], "gammas": [1.0, -1.0]}, "cutoff": 6, }, } images = [cluster] al_learner_params["atomistic_method"] = Relaxation( cluster, optimizer, fmax=0.01, steps=100 ) config["dataset"] = { "raw_data": images, "val_split": 0, "elements": elements, "fp_params": Gs, "save_fps": False, "scaling": {"type": "normalize", "range": (-1, 1)}, } config["cmd"] = { "debug": False, "run_dir": "./", "seed": 2, "identifier": "cluster", "verbose": True, # "logger": True, "single-threaded": True, } trainer = AtomsTrainer(config) # base_calc = MultiMorse(images, Gs["default"]["cutoff"], combo="mean") base_calc = EMT() offlinecalc = FmaxLearner( al_learner_params, trainer, images, parent_calc, base_calc ) if os.path.exists("queried_images.db"): os.remove("queried_images.db") offlinecalc.learn() al_iterations = offlinecalc.iterations - 1 file_path = al_learner_params["file_dir"] + al_learner_params["filename"] final_ml_traj = read("{}_iter_{}.traj".format(file_path, al_iterations), ":") relaxed_clus = final_ml_traj[-1] return relaxed_clus, offlinecalc.parent_calls
def test_pretrained(): torch.set_num_threads(1) trainer = AtomsTrainer(config) trainer.train() trained_cpdir = trainer.cp_dir e_mae_1, f_mae_1 = get_metrics(trainer) config["optim"]["epochs"] = 100 pretrained_trainer = AtomsTrainer(config) pretrained_trainer.load_pretrained(trained_cpdir) e_mae_2, f_mae_2 = get_metrics(pretrained_trainer) assert e_mae_1 == e_mae_2, "Pretrained energy metrics inconsistent!" assert f_mae_1 == f_mae_2, "Pretrained force metrics inconsistent!" pretrained_trainer.train() e_mae_3, f_mae_3 = get_metrics(pretrained_trainer) assert e_mae_3 < e_mae_2, "Retrained metrics are larger!" assert f_mae_3 < f_mae_2, "Retrained metrics are larger!"
def test_pretrained_no_config(): config_1 = copy.deepcopy(config) trainer = AtomsTrainer(config_1) trainer.train() trained_cpdir = trainer.cp_dir e_mae_1, f_mae_1 = get_metrics(trainer) trainer_2 = AtomsTrainer() trainer_2.load_pretrained(trained_cpdir) e_mae_2, f_mae_2 = get_metrics(trainer_2) assert e_mae_1 == e_mae_2, "configless - pretrained energy metrics inconsistent!" assert f_mae_1 == f_mae_2, "configless - pretrained force metrics inconsistent!"
def module_evaluate(learning_rate, num_nodes, num_layers): learning_rate = float(learning_rate) num_nodes = int(num_nodes) num_layers = int(num_layers) input_filename = "../data/water_dft.traj" # split input if there's no split if (os.path.exists("../data/train.traj") is False) or (os.path.exists("../data/test.traj") is False): print("Creating train_test split. ") train_ratio = 0.9 training_list, test_list = split_train_test( input_filename, train_ratio, save=True, filenames=["../data/train.traj", "../data/test.traj"]) else: print("Reading train_test split. ") training_list, test_list = load_training_data("../data/train.traj", "../data/test.traj") sigmas = np.logspace(np.log10(0.02), np.log10(1.0), num=5) MCSHs = { "MCSHs": { "0": { "groups": [1], "sigmas": sigmas }, "1": { "groups": [1], "sigmas": sigmas }, "2": { "groups": [1, 2], "sigmas": sigmas }, "3": { "groups": [1, 2, 3], "sigmas": sigmas }, "4": { "groups": [1, 2, 3, 4], "sigmas": sigmas }, "5": { "groups": [1, 2, 3, 4, 5], "sigmas": sigmas }, # "6": {"groups": [1, 2, 3, 4, 5, 6, 7], "sigmas": sigmas}, }, "atom_gaussians": { "H": "../MCSH_potentials/H_pseudodensity_2.g", "O": "../MCSH_potentials/O_pseudodensity_4.g", }, "cutoff": 8, } elements = ["H", "O"] config = { "model": { "get_forces": True, "num_layers": num_layers, "num_nodes": num_nodes }, "optim": { "device": "cpu", "force_coefficient": 0.2, "lr": learning_rate, "batch_size": 8, "epochs": 200, }, "dataset": { "raw_data": training_list, # "val_split": 0.1, "elements": elements, "fp_scheme": "gmp", "fp_params": MCSHs, "save_fps": True, }, "cmd": { "debug": False, "run_dir": "./", "seed": 1, "identifier": "test", "verbose": False, "logger": False, }, } trainer = AtomsTrainer(config) trainer.train() predictions = trainer.predict(test_list) true_energies = np.array( [image.get_potential_energy() for image in test_list]) pred_energies = np.array(predictions["energy"]) mae_result = np.mean(np.abs(true_energies - pred_energies)) return mae_result
def run_oal(atomistic_method, images, elements, dbname, parent_calc): Gs = { "default": { "G2": { "etas": np.logspace(np.log10(0.05), np.log10(5.0), num=4), "rs_s": [0], }, "G4": { "etas": [0.005], "zetas": [1.0, 4.0], "gammas": [1.0, -1.0] }, "cutoff": 6, }, } learner_params = { "max_iterations": 10, "samples_to_retrain": 1, "filename": "relax_example", "file_dir": "./", "uncertain_tol": 5.0, "fmax_verify_threshold": 0.05, # eV/AA "relative_variance": True, "n_ensembles": 10, "use_dask": True, } config = { "model": { "get_forces": True, "num_layers": 3, "num_nodes": 5 }, "optim": { "device": "cpu", "force_coefficient": 4.0, "lr": 1, "batch_size": 10, "epochs": 100, "optimizer": torch.optim.LBFGS, "optimizer_args": { "optimizer__line_search_fn": "strong_wolfe" }, }, "dataset": { "raw_data": images, "val_split": 0, "elements": elements, "fp_params": Gs, "save_fps": False, "scaling": { "type": "standardize" }, }, "cmd": { "debug": False, "run_dir": "./", "seed": 1, "identifier": "test", "verbose": False, # "logger": True, "single-threaded": True, }, } trainer = AtomsTrainer(config) onlinecalc = OnlineLearner( learner_params, trainer, images, parent_calc, ) if os.path.exists("dft_calls.db"): os.remove("dft_calls.db") atomistic_method.run(onlinecalc, filename=dbname) return onlinecalc, atomistic_method
MCSHs = MCSHs_dict[MCSHs_index] MCSHs = { "MCSHs": MCSHs, "atom_gaussians": potential_files, "cutoff": cutoff_distance } elements = [ 'Pt', 'Al', 'V', 'Pd', 'Fe', 'Sn', 'Ge', 'Bi', 'Ir', 'Re', 'Cd', 'Cr', 'Ag', 'Hf', 'Ru', 'Ti', 'Cs', 'Os', 'N', 'As', 'O', 'S', 'Mo', 'Ta', 'Zn', 'Y', 'Mn', 'Na', 'Rh', 'Hg', 'C', 'Co', 'Nb', 'Sc', 'Sr', 'H', 'Au', 'Ga', 'Tl', 'K', 'Se', 'B', 'Pb', 'Ca', 'Cl', 'Cu', 'Zr', 'Rb', 'P', 'W', 'Tc', 'Te', 'Ni', 'Sb', 'Si', 'In' ] trainer = AtomsTrainer() trainer.load_pretrained(checkpoint_name, gpu2cpu=True) trainer.config["dataset"]["save_fps"] = False result_dirname = "./test_result_val_id/sigma{}_MCSH{}_nodes{}_layers{}_cutoff{}_numtraining{}_results".format( sigmas_index, MCSHs_index, num_nodes, num_layers, cutoff_distance, num_training) test_mae = predict_data(trainer, test_images, set_index, result_dirname=result_dirname, image_type="test")
"elements": elements, "fp_scheme": "mcsh", "fp_params": MCSHs, "save_fps": True, }, "cmd": { "debug": False, "run_dir": "./", "seed": 1, "identifier": "test", "verbose": True, "logger": False, }, } trainer = AtomsTrainer(config) trainer.train() dataset = trainer.train_dataset position_array = [] fp_array = [] energy_array = [] for i, data in enumerate(dataset): position = images[i].get_positions() position_array.append(np.asarray(position)) fingerprint = data.fingerprint.numpy() fp_array.append(fingerprint) energy = images[i].get_potential_energy() energy_array.append(energy)
"val_split": 0, "elements": elements, "fp_params": Gs, "save_fps": True, }, "cmd": { "debug": False, "run_dir": "./", "seed": 1, "identifier": "test", "verbose": True, "logger": False, }, } trainer = AtomsTrainer(config) # building base morse calculator as base calculator cutoff = Gs["default"]["cutoff"] base_calc = MultiMorse(images, cutoff, combo="mean") # define learner_params OfflineActiveLearner learner_params = { "atomistic_method": Relaxation(initial_geometry=slab.copy(), optimizer=BFGS, fmax=0.01, steps=100), "max_iterations":
def run_offline_al(atomistic_method, images, dbname, parent_calc): Gs = { "default": { "G2": { "etas": np.logspace(np.log10(0.05), np.log10(5.0), num=4), "rs_s": [0] * 4, }, "G4": {"etas": [0.005], "zetas": [1.0, 4.0], "gammas": [1.0, -1.0]}, "cutoff": 6, }, } elements = np.unique(images[0].get_chemical_symbols()) learner_params = { "atomistic_method": atomistic_method, "max_iterations": 10, "force_tolerance": 0.01, "samples_to_retrain": 2, "filename": "relax_example", "file_dir": "./", "query_method": "random", "use_dask": False, "max_evA": 0.05, } config = { "model": { "get_forces": True, "num_layers": 3, "num_nodes": 20, }, "optim": { "device": "cpu", "force_coefficient": 30, "lr": 1, "batch_size": 10, "epochs": 100, # was 100 "loss": "mse", "metric": "mae", "optimizer": torch.optim.LBFGS, "optimizer_args": {"optimizer__line_search_fn": "strong_wolfe"}, }, "dataset": { "raw_data": images, "val_split": 0, "elements": elements, "fp_params": Gs, "save_fps": False, "scaling": {"type": "normalize", "range": (-1, 1)}, }, "cmd": { "debug": False, "run_dir": "./", "seed": 1, "identifier": "test", "verbose": True, # "logger": True, "single-threaded": True, }, } trainer = AtomsTrainer(config) cutoff = Gs["default"]["cutoff"] base_calc = MultiMorse(images, cutoff, combo="mean") learner = FmaxLearner( learner_params, images, trainer, parent_calc, base_calc, ) learner.learn() trained_calc = learner.trained_calc al_iterations = learner.iterations - 1 file_path = learner_params["file_dir"] + learner_params["filename"] final_ml_traj = ase.io.read("{}_iter_{}.traj".format(file_path, al_iterations), ":") if os.path.exists("dft_calls.db"): os.remove("dft_calls.db") # atomistic_method.run(learner, filename=dbname) return learner, trained_calc, final_ml_traj
"fp_params": Gs, "save_fps": True, }, "cmd": { "debug": False, "run_dir": "./", "seed": 1, "identifier": "test", "verbose": True, "logger": False, }, } config["dataset"]["cutoff_params"] = cosine_cutoff_params torch.set_num_threads(1) cosine_trainer = AtomsTrainer(config) cosine_trainer.train() predictions = cosine_trainer.predict(images) true_energies = np.array([image.get_potential_energy() for image in images]) cosine_pred_energies = np.array(predictions["energy"]) image.set_calculator(AMPtorch(cosine_trainer)) image.get_potential_energy() config["dataset"]["cutoff_params"] = polynomial_cutoff_params torch.set_num_threads(1) polynomial_trainer = AtomsTrainer(config) polynomial_trainer.train()
def offline_neb(images, parent_calc, iter=4, intermediate_images=3): torch.set_num_threads(1) parent_calc = parent_calc Gs = { "default": { "G2": { "etas": np.logspace(np.log10(0.05), np.log10(5.0), num=4), "rs_s": [0], }, "G4": { "etas": [0.005], "zetas": [1.0, 4.0], "gammas": [1.0, -1.0] }, "cutoff": 5.0, }, } elements = ["Cu", "C"] config = { "model": { "get_forces": True, "num_layers": 3, "num_nodes": 20, "activation": Tanh, }, "optim": { "device": "cpu", "force_coefficient": 27, "lr": 1e-2, "batch_size": 1000, "epochs": 300, "loss": "mse", "metric": "mse", "optimizer": torch.optim.LBFGS, "optimizer_args": { "optimizer__line_search_fn": "strong_wolfe" }, "scheduler": { "policy": torch.optim.lr_scheduler.CosineAnnealingWarmRestarts, "params": { "T_0": 10, "T_mult": 2 }, }, }, "dataset": { "raw_data": images, "val_split": 0, "elements": elements, "fp_params": Gs, "save_fps": True, "scaling": { "type": "normalize", "range": (-1, 1) }, }, "cmd": { "debug": False, "run_dir": "./", "seed": 1, "identifier": "test", "verbose": True, "logger": False, "dtype": torch.DoubleTensor, }, } trainer = AtomsTrainer(config) # building base morse calculator as base calculator cutoff = Gs["default"]["cutoff"] neb_images = images.copy() base_calc = MultiMorse(neb_images, cutoff, combo="mean") # base_calc = Dummy(images) # define learner_params OfflineActiveLearner learner_params = { "atomistic_method": NEBcalc( starting_images=neb_images, intermediate_samples=intermediate_images, ), "max_iterations": iter, "samples_to_retrain": intermediate_images, "filename": "example", "file_dir": "./", "use_dask": False, # "max_evA": 0.01, } learner = NEBLearner(learner_params, images, trainer, parent_calc, base_calc) learner.learn() return learner
def load_trainer(checkpoint_path): trainer = AtomsTrainer() # loading the pretrained model trainer.load_pretrained(checkpoint_path) return trainer
"batch_size": batch_size, "epochs": epochs, "loss": "mae", }, "dataset": { "lmdb_path": lmdb_paths, "val_split": val_split, "val_split_mode": "inorder", "cache": "partial" }, "cmd": { "debug": False, "run_dir": "./", "seed": 1, "identifier": "test", "verbose": True, # Weights and Biases used for logging - an account(free) is required "logger": False, }, } trainer = AtomsTrainer(config) if os.path.isdir(checkpoint_name): trainer.load_pretrained(checkpoint_name) else: print( "**** WARNING: checkpoint not found: {} ****".format(checkpoint_name)) print("training") trainer.train() print("end training")
def calculate_energy(x0): OH_bond_length = x0[0] bond_angle = x0[1] images = read("../data/water.traj", index=":") images = [images[0]] sigmas = np.logspace(np.log10(0.02), np.log10(1.0), num=5) MCSHs = { "MCSHs": { "0": {"groups": [1], "sigmas": sigmas}, "1": {"groups": [1], "sigmas": sigmas}, "2": {"groups": [1, 2], "sigmas": sigmas}, "3": {"groups": [1, 2, 3], "sigmas": sigmas}, "4": {"groups": [1, 2, 3, 4], "sigmas": sigmas}, "5": {"groups": [1, 2, 3, 4, 5], "sigmas": sigmas}, # "6": {"groups": [1, 2, 3, 4, 5, 6, 7], "sigmas": sigmas}, }, "atom_gaussians": { "H": "../MCSH_potentials/H_pseudodensity_2.g", "O": "../MCSH_potentials/O_pseudodensity_4.g", }, "cutoff": 8, } elements = ["H", "O"] config = { "model": {"get_forces": True, "num_layers": 3, "num_nodes": 20}, "optim": { "device": "cpu", "force_coefficient": 0.2, "lr": 1e-3, "batch_size": 8, "epochs": 500, }, "dataset": { "raw_data": images, # "val_split": 0.1, "elements": elements, "fp_scheme": "mcsh", "fp_params": MCSHs, "save_fps": True, }, "cmd": { "debug": False, "run_dir": "./", "seed": 1, "identifier": "test", "verbose": True, "logger": False, }, } trainer = AtomsTrainer(config) # loading the pretrained model trainer.load_pretrained("../checkpoints/2021-03-22-14-02-20-test") calculator = AMPtorch(trainer) image = molecule('H2O') image.set_distance(0, 2, OH_bond_length, fix=0) image.set_angle(1, 0, 2, bond_angle) image.set_cell([10, 10, 10]) image.center() image.set_calculator(calculator) return image.get_potential_energy()
"batch_size": 10, "epochs": 100, }, "dataset": { "raw_data": images, "val_split": 0, "elements": elements, "fp_scheme": "mcsh", "fp_params": MCSHs, "save_fps": True, }, "cmd": { "debug": False, "run_dir": "./", "seed": 1, "identifier": "test", "verbose": True, "logger": False, }, } trainer = AtomsTrainer(config) trainer.train() predictions = trainer.predict(images[:10]) true_energies = np.array([image.get_potential_energy() for image in images]) pred_energies = np.array(predictions["energy"]) print("Energy MSE:", np.mean((true_energies - pred_energies)**2))
def objective_function(rank, scratch_dir, params): train_images = Trajectory('train.traj') test_images = Trajectory('test.traj') elements = np.unique([atom.symbol for atom in train_images[0]]) cutoff = 6.0 cosine_cutoff_params = {'cutoff_func': 'cosine'} gds = GaussianDescriptorSet(elements, cutoff, cosine_cutoff_params) g2_etas = [0.25, 2.5, 0.25, 2.5] g2_rs_s = [0.0, 0.0, 3.0, 3.0] gds.batch_add_descriptors(2, g2_etas, g2_rs_s, []) g4_etas = [0.005, 0.005, 0.01, 0.01] g4_zetas = [1.0, 4.0, 4.0, 16.0] g4_gammas = [1.0, 1.0, -1.0, -1.0] gds.batch_add_descriptors(4, g4_etas, g4_zetas, g4_gammas) amptorch_config = { 'model': { 'get_forces': True, 'num_layers': params['num_layers'], 'num_nodes': params['num_nodes'], 'batchnorm': False, }, 'optim': { 'force_coefficient': 0.04, 'lr': 1e-2, 'batch_size': 32, 'epochs': 100, 'loss': 'mse', 'metric': 'mae', 'gpus': 0, }, 'dataset': { 'raw_data': train_images, 'val_split': 0.1, 'fp_params': gds, 'save_fps': True, 'scaling': { 'type': 'normalize', 'range': (0, 1) }, }, 'cmd': { 'debug': False, 'run_dir': scratch_dir, 'seed': 1, 'identifier': 'rank{}'.format(rank), 'verbose': False, 'logger': False, }, } mse = None with NoLogging(): # train on train_images.traj torch.set_num_threads(1) trainer = AtomsTrainer(amptorch_config) trainer.train() # evaluate on test_images.traj predictions = trainer.predict(test_images) true_energies = np.array( [image.get_potential_energy() for image in test_images]) pred_energies = np.array(predictions['energy']) mse = np.mean((true_energies - pred_energies)**2) return mse