def get_energy_metrics(config): trainer = AtomsTrainer(config) trainer.train() predictions = trainer.predict(images) pred_energies = np.array(predictions["energy"]) mae = np.mean(np.abs(true_energies - pred_energies)) assert mae < 0.02
def train_model(train_list, test_list, descriptor_set, trial_num, log_filename): Gs = construct_parameter_set(descriptor_set, log_filename = log_filename) # elements = ["Cu", "C", "O"] elements = ["H","O","C"] config = { "model": {"name":"bpnn", "get_forces": False, "num_layers": 3, "num_nodes": 50, #"elementwise":False, "batchnorm": True}, "optim": { "gpus":0, #"force_coefficient": 0.04, "force_coefficient": 0.0, "lr": 1e-3, "batch_size": 256, "epochs": 5000, "loss": "mae", #"scheduler": {"policy": "StepLR", "params": {"step_size": 1000, "gamma": 0.5}}, }, "dataset": { "raw_data": train_list, "val_split": 0.2, "elements": elements, "fp_scheme": "gaussian", "fp_params": Gs, "save_fps": False, "scaling": {"type": "normalize", "range": (0, 1)} }, "cmd": { "debug": False, "run_dir": "./", "seed": trial_num, "identifier": "test", "verbose": True, "logger": False, }, } trainer = AtomsTrainer(config) trainer.train() test_model(trainer, train_list, data_type = "train", log_filename = log_filename) test_model(trainer, test_list, data_type = "test", log_filename = log_filename) return
def get_force_metrics(config): trainer = AtomsTrainer(config) trainer.train() predictions = trainer.predict(images) pred_energies = np.array(predictions["energy"]) pred_forces = np.concatenate(np.array(predictions["forces"])) e_mae = np.mean(np.abs(true_energies - pred_energies)) f_mae = np.mean(np.abs(pred_forces - true_forces)) assert e_mae < 0.06 assert f_mae < 0.06
def test_pretrained_no_config(): config_1 = copy.deepcopy(config) trainer = AtomsTrainer(config_1) trainer.train() trained_cpdir = trainer.cp_dir e_mae_1, f_mae_1 = get_metrics(trainer) trainer_2 = AtomsTrainer() trainer_2.load_pretrained(trained_cpdir) e_mae_2, f_mae_2 = get_metrics(trainer_2) assert e_mae_1 == e_mae_2, "configless - pretrained energy metrics inconsistent!" assert f_mae_1 == f_mae_2, "configless - pretrained force metrics inconsistent!"
def get_performance_metrics(config): trainer = AtomsTrainer(config) trainer.train() predictions = trainer.predict(images) pred_energies = np.array(predictions["energy"]) pred_forces = np.concatenate(np.array(predictions["forces"])) e_mae = np.mean(np.abs(true_energies - pred_energies)) f_mae = np.mean(np.abs(pred_forces - true_forces)) assert e_mae < 0.01, "%f !< .01" % e_mae assert f_mae < 0.03, "%f !< .03" % f_mae return e_mae, f_mae
def test_pretrained(): torch.set_num_threads(1) trainer = AtomsTrainer(config) trainer.train() trained_cpdir = trainer.cp_dir e_mae_1, f_mae_1 = get_metrics(trainer) config["optim"]["epochs"] = 100 pretrained_trainer = AtomsTrainer(config) pretrained_trainer.load_pretrained(trained_cpdir) e_mae_2, f_mae_2 = get_metrics(pretrained_trainer) assert e_mae_1 == e_mae_2, "Pretrained energy metrics inconsistent!" assert f_mae_1 == f_mae_2, "Pretrained force metrics inconsistent!" pretrained_trainer.train() e_mae_3, f_mae_3 = get_metrics(pretrained_trainer) assert e_mae_3 < e_mae_2, "Retrained metrics are larger!" assert f_mae_3 < f_mae_2, "Retrained metrics are larger!"
"batch_size": 10, "epochs": 100, }, "dataset": { "raw_data": images, "val_split": 0, "elements": elements, "fp_scheme": "mcsh", "fp_params": MCSHs, "save_fps": True, }, "cmd": { "debug": False, "run_dir": "./", "seed": 1, "identifier": "test", "verbose": True, "logger": False, }, } trainer = AtomsTrainer(config) trainer.train() predictions = trainer.predict(images[:10]) true_energies = np.array([image.get_potential_energy() for image in images]) pred_energies = np.array(predictions["energy"]) print("Energy MSE:", np.mean((true_energies - pred_energies)**2))
def module_evaluate(learning_rate, num_nodes, num_layers): learning_rate = float(learning_rate) num_nodes = int(num_nodes) num_layers = int(num_layers) input_filename = "../data/water_dft.traj" # split input if there's no split if (os.path.exists("../data/train.traj") is False) or (os.path.exists("../data/test.traj") is False): print("Creating train_test split. ") train_ratio = 0.9 training_list, test_list = split_train_test( input_filename, train_ratio, save=True, filenames=["../data/train.traj", "../data/test.traj"]) else: print("Reading train_test split. ") training_list, test_list = load_training_data("../data/train.traj", "../data/test.traj") sigmas = np.logspace(np.log10(0.02), np.log10(1.0), num=5) MCSHs = { "MCSHs": { "0": { "groups": [1], "sigmas": sigmas }, "1": { "groups": [1], "sigmas": sigmas }, "2": { "groups": [1, 2], "sigmas": sigmas }, "3": { "groups": [1, 2, 3], "sigmas": sigmas }, "4": { "groups": [1, 2, 3, 4], "sigmas": sigmas }, "5": { "groups": [1, 2, 3, 4, 5], "sigmas": sigmas }, # "6": {"groups": [1, 2, 3, 4, 5, 6, 7], "sigmas": sigmas}, }, "atom_gaussians": { "H": "../MCSH_potentials/H_pseudodensity_2.g", "O": "../MCSH_potentials/O_pseudodensity_4.g", }, "cutoff": 8, } elements = ["H", "O"] config = { "model": { "get_forces": True, "num_layers": num_layers, "num_nodes": num_nodes }, "optim": { "device": "cpu", "force_coefficient": 0.2, "lr": learning_rate, "batch_size": 8, "epochs": 200, }, "dataset": { "raw_data": training_list, # "val_split": 0.1, "elements": elements, "fp_scheme": "gmp", "fp_params": MCSHs, "save_fps": True, }, "cmd": { "debug": False, "run_dir": "./", "seed": 1, "identifier": "test", "verbose": False, "logger": False, }, } trainer = AtomsTrainer(config) trainer.train() predictions = trainer.predict(test_list) true_energies = np.array( [image.get_potential_energy() for image in test_list]) pred_energies = np.array(predictions["energy"]) mae_result = np.mean(np.abs(true_energies - pred_energies)) return mae_result
def objective_function(rank, scratch_dir, params): train_images = Trajectory('train.traj') test_images = Trajectory('test.traj') elements = np.unique([atom.symbol for atom in train_images[0]]) cutoff = 6.0 cosine_cutoff_params = {'cutoff_func': 'cosine'} gds = GaussianDescriptorSet(elements, cutoff, cosine_cutoff_params) g2_etas = [0.25, 2.5, 0.25, 2.5] g2_rs_s = [0.0, 0.0, 3.0, 3.0] gds.batch_add_descriptors(2, g2_etas, g2_rs_s, []) g4_etas = [0.005, 0.005, 0.01, 0.01] g4_zetas = [1.0, 4.0, 4.0, 16.0] g4_gammas = [1.0, 1.0, -1.0, -1.0] gds.batch_add_descriptors(4, g4_etas, g4_zetas, g4_gammas) amptorch_config = { 'model': { 'get_forces': True, 'num_layers': params['num_layers'], 'num_nodes': params['num_nodes'], 'batchnorm': False, }, 'optim': { 'force_coefficient': 0.04, 'lr': 1e-2, 'batch_size': 32, 'epochs': 100, 'loss': 'mse', 'metric': 'mae', 'gpus': 0, }, 'dataset': { 'raw_data': train_images, 'val_split': 0.1, 'fp_params': gds, 'save_fps': True, 'scaling': { 'type': 'normalize', 'range': (0, 1) }, }, 'cmd': { 'debug': False, 'run_dir': scratch_dir, 'seed': 1, 'identifier': 'rank{}'.format(rank), 'verbose': False, 'logger': False, }, } mse = None with NoLogging(): # train on train_images.traj torch.set_num_threads(1) trainer = AtomsTrainer(amptorch_config) trainer.train() # evaluate on test_images.traj predictions = trainer.predict(test_images) true_energies = np.array( [image.get_potential_energy() for image in test_images]) pred_energies = np.array(predictions['energy']) mse = np.mean((true_energies - pred_energies)**2) return mse
"save_fps": True, }, "cmd": { "debug": False, "run_dir": "./", "seed": 1, "identifier": "test", "verbose": True, "logger": False, }, } config["dataset"]["cutoff_params"] = cosine_cutoff_params torch.set_num_threads(1) cosine_trainer = AtomsTrainer(config) cosine_trainer.train() predictions = cosine_trainer.predict(images) true_energies = np.array([image.get_potential_energy() for image in images]) cosine_pred_energies = np.array(predictions["energy"]) image.set_calculator(AMPtorch(cosine_trainer)) image.get_potential_energy() config["dataset"]["cutoff_params"] = polynomial_cutoff_params torch.set_num_threads(1) polynomial_trainer = AtomsTrainer(config) polynomial_trainer.train() predictions = polynomial_trainer.predict(images)