def prepare_model_megnet(individuals, epochs, outfile, excl=[]): # prepares model file # prepares Megnet model based on list of individuals # uses total energy per atom # excl - excluding particular stoichiometry - important for network learning structures = [] energies = [] adapt = AseAtomsAdaptor() empty = 0 if not excl: empty = 1 i = 0 for ind in individuals: struct_ase = ind.get_init_structure() chem_sym = struct_ase.get_chemical_symbols() e_tot = ind.e_tot struct_pymatgen = adapt.get_structure(struct_ase) flag = 1 if empty == 0 and chem_sym == excl: flag = 0 if flag == 1: structures.append(struct_pymatgen) energies.append(e_tot) i = i + 1 print("read data of " + str(i) + " structures total") # standard vales as taken from Megnet manual nfeat_bond = 100 nfeat_global = 2 r_cutoff = 5 gaussian_centers = np.linspace(0, r_cutoff + 1, nfeat_bond) gaussian_width = 0.5 distance_converter = GaussianDistance(gaussian_centers, gaussian_width) graph_converter = CrystalGraph(bond_converter=distance_converter, cutoff=r_cutoff) model = MEGNetModel(nfeat_bond, nfeat_global, graph_converter=graph_converter) # model training model.train(structures, energies, epochs=epochs) model.save_model(outfile)
def test_crystal_model_v2(self): cg = CrystalGraph() s = Structure(Lattice.cubic(3), ['Si'], [[0, 0, 0]]) with ScratchDir('.'): model = MEGNetModel(nfeat_edge=None, nfeat_global=2, nblocks=1, lr=1e-2, n1=4, n2=4, n3=4, npass=1, ntarget=1, graph_converter=cg, centers=np.linspace(0, 4, 10), width=0.5) model = model.train([s, s], [0.1, 0.1], epochs=2) t = model.predict_structure(s) self.assertTrue(t.shape == (1, ))
Xtrain = inputs.iloc[0:boundary]['structure'] ytrain = inputs.iloc[0:boundary]['band_gap'] Xtest = inputs.iloc[boundary:]['structure'] ytest = inputs.iloc[boundary:]['band_gap'] nfeat_bond = 10 nfeat_global = 2 r_cutoff = 5 gaussian_centers = np.linspace(0, 5, 10) gaussian_width = 0.5 distance_convertor = GaussianDistance(gaussian_centers, gaussian_width) bond_convertor = CrystalGraph(bond_convertor=distance_convertor, cutoff=r_cutoff) graph_convertor = CrystalGraph( bond_convertor=GaussianDistance(np.linspace(0, 5, 10), 0.5)) model = MEGNetModel(nfeat_bond, nfeat_global, graph_convertor=graph_convertor) model.from_file('fitted_gap_model.hdf5') model.train(Xtrain, ytrain, epochs=epochs, batch_size=batch_size, validation_structures=Xtest, validation_targets=ytest, scrub_failed_structures=True) model.save_model('fitted_gap_model.hdf5')
model = MEGNetModel(10, 2, nblocks=1, lr=1e-4, n1=4, n2=4, n3=4, npass=1, ntarget=1, graph_converter=CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 10), 0.5))) ep = 5000 callback = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, restore_best_weights=True) for s in test_structures: test_input.append(model.graph_converter.graph_to_input(model.graph_converter.convert(s))) if training_mode == 0: # PBE -> HSE ... -> part EXP, one by one idx = 0 for i in range(len(data_size)): model.train(structures[idx:idx+data_size[i]], targets[idx:idx+data_size[i]], epochs=ep) idx += data_size[i] prediction(model) elif training_mode == 1: # all training set together model.train(structures, targets, epochs=ep*len(data_size)) prediction(model) elif training_mode == 2: # only part EXP model.train(structures[sum(data_size[0:len(data_size)-1]):], targets[sum(data_size[0:len(data_size)-1]):], epochs=ep*len(data_size)) prediction(model) elif training_mode == 3: # all -> all-PBE -> all-PBE-HSE -> ... -> part EXP idx = 0 for i in range(len(data_size)): model.train(structures[idx:], targets[idx:], epochs=ep) idx += data_size[i] prediction(model) elif training_mode == 4: # use E1 as validation dataset, P -> H -> G -> S one by one
from megnet.callbacks import ReduceLRUponNan, ManualStop, XiaotongCB import numpy as np gc = CrystalGraph(bond_converter=GaussianDistance( np.linspace(0, 5, 100), 0.5), cutoff=4) model = MEGNetModel(100, 2, graph_converter=gc, lr=1e-4, loss=examine_loss) # , metrics=[examine_loss]) INTENSIVE = False # U0 is an extensive quantity scaler = StandardScaler.from_training_data(structures, targets, is_intensive=INTENSIVE) model.target_scaler = scaler # callbacks = [ReduceLRUponNan(patience=500), ManualStop(), XiaotongCB()] # change structures to megnet predictable structures mp_strs = [] train_graphs, train_targets = model.get_all_graphs_targets(structures, targets) train_nb_atoms = [len(i['atom']) for i in train_graphs] train_targets = [model.target_scaler.transform(i, j) for i, j in zip(train_targets, train_nb_atoms)] for s in structures: mp_strs.append(model.graph_converter.graph_to_input(model.graph_converter.convert(s))) callbacks = [ManualStop(), XiaotongCB((mp_strs, train_targets), commit_id)] model.train(structures, targets, epochs=50, verbose=2, callbacks=callbacks) print('finish..')
def main() -> None: """Execute main script.""" parser = ArgumentParser() parser.add_argument( "--train", action="store_true", help="Whether to train the model.", dest="do_train", ) parser.add_argument( "--eval", action="store_true", help="Whether to evaluate the model.", dest="do_eval", ) parser.add_argument( "--which", choices=["MEGNet", "VGP", "ProbNN"], required=("--train" in sys.argv), help=( "Which components to train: " "MEGNet -- Just the MEGNetModel; " "VGP -- Just the VGP part of the ProbNN; " "ProbNN -- The whole ProbNN." ), dest="which", ) parser.add_argument( "--epochs", "-n", type=int, required=("--train" in sys.argv), help="Number of training epochs.", dest="epochs", ) parser.add_argument( "--inducing", "-i", type=int, help="Number of inducing index points.", default=500, dest="num_inducing", ) args = parser.parse_args() do_train: bool = args.do_train do_eval: bool = args.do_eval which_model: str = args.which epochs: int = args.epochs num_inducing: int = args.num_inducing # Load the MEGNetModel into memory try: meg_model: MEGNetModel = MEGNetModel.from_file(str(MEGNET_MODEL_DIR)) except FileNotFoundError: meg_model = MEGNetModel(**default_megnet_config()) # Load the data into memory df = download_data(PHONONS_URL, PHONONS_SAVE_DIR) structures = df["structure"] targets = df["last phdos peak"] num_data = len(structures) print(f"{num_data} datapoints loaded.") num_training = floor(num_data * TRAINING_RATIO) print(f"{num_training} training data, {num_data-num_training} test data.") train_structs = structures[:num_training] train_targets = targets[:num_training] test_structs = structures[num_training:] test_targets = targets[num_training:] if which_model == "MEGNet": if do_train: tf_callback = TensorBoard(MEGNET_LOGS / NOW, write_graph=False) meg_model.train( train_structs, train_targets, test_structs, test_targets, automatic_correction=False, dirname="meg_checkpoints", epochs=epochs, callbacks=[tf_callback], verbose=VERBOSITY, ) meg_model.save_model(str(MEGNET_MODEL_DIR)) if do_eval: train_predicted = meg_model.predict_structures(train_structs).flatten() train_mae = MAE(train_predicted, None, train_targets) metric_logger.info("MEGNet train MAE = %f", train_mae) test_predicted = meg_model.predict_structures(test_structs).flatten() test_mae = MAE(test_predicted, None, test_targets) metric_logger.info("MEGNet test MAE = %f", test_mae) else: # Load the ProbNN into memory try: prob_model: MEGNetProbModel = MEGNetProbModel.load(PROB_MODEL_DIR) except FileNotFoundError: prob_model = MEGNetProbModel(meg_model, num_inducing, metrics=["MAE"]) if do_train: if which_model == "VGP": prob_model.set_frozen("NN", recompile=False) prob_model.set_frozen(["VGP", "Norm"], freeze=False) tf_callback = TensorBoard(VGP_LOGS / NOW, write_graph=False) else: prob_model.set_frozen(["VGP", "NN", "Norm"], freeze=False) tf_callback = TensorBoard(FULL_MODEL_LOGS / NOW, write_graph=False) prob_model.train( train_structs, train_targets, epochs, test_structs, test_targets, callbacks=[tf_callback], verbose=VERBOSITY, ) prob_model.save(PROB_MODEL_DIR) if do_eval: train_metrics = evaluate_uq_metrics( prob_model, train_structs, train_targets ) log_metrics(train_metrics, "training") test_metrics = evaluate_uq_metrics(prob_model, test_structs, test_targets) log_metrics(test_metrics, "test")
test_structures = structures[80:] train_targets = targets[:80] test_targets = targets[80:] from megnet.models import MEGNetModel from megnet.data.graph import GaussianDistance from megnet.data.crystal import CrystalGraph from megnet.utils.preprocessing import StandardScaler import numpy as np gc = CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 100), 0.5), cutoff=4) model = MEGNetModel(100, 2, graph_converter=gc, lr=1e-3) INTENSIVE = False # U0 is an extensive quantity scaler = StandardScaler.from_training_data(train_structures, train_targets, is_intensive=INTENSIVE) model.target_scaler = scaler model.train(train_structures, train_targets, epochs=500, verbose=2) predicted_tests = [] for i in test_structures: predicted_tests.append(model.predict_structure(i).ravel()[0]) print(type(test_targets), type(predicted_tests)) for i in range(10): print(test_targets[i], predicted_tests[i])
MAE /= test_size print('MAE is:', MAE) train_s = Q1_s + Q2_s train_t = Q1_t + Q2_t gc = CrystalGraph(bond_converter=GaussianDistance( np.linspace(0, 5, 100), 0.5), cutoff=4) model = MEGNetModel(100, 2, graph_converter=gc, lr=1e-3) INTENSIVE = False # U0 is an extensive quantity scaler = StandardScaler.from_training_data(train_s, train_t, is_intensive=INTENSIVE) model.target_scaler = scaler callback = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, restore_best_weights=True) idx = int(0.8 * len(train_s)) model.train(train_s[:idx], train_t[:idx], validation_structures=train_s[idx:], validation_targets=train_t[idx:], callbacks=[callback], epochs=1000, save_checkpoint=False, automatic_correction=False) print('Training finish..') predict(model)