Example #1
0
def prepare_model_megnet(individuals, epochs, outfile, excl=[]):
    # prepares model file
    # prepares Megnet model based on list of individuals
    # uses total energy per atom
    # excl - excluding particular stoichiometry - important for network learning
    structures = []
    energies = []
    adapt = AseAtomsAdaptor()
    empty = 0
    if not excl:
        empty = 1

    i = 0
    for ind in individuals:
        struct_ase = ind.get_init_structure()
        chem_sym = struct_ase.get_chemical_symbols()
        e_tot = ind.e_tot
        struct_pymatgen = adapt.get_structure(struct_ase)
        flag = 1
        if empty == 0 and chem_sym == excl:
            flag = 0

        if flag == 1:
            structures.append(struct_pymatgen)
            energies.append(e_tot)
            i = i + 1

    print("read data of " + str(i) + " structures total")

    # standard vales as taken from Megnet manual
    nfeat_bond = 100
    nfeat_global = 2
    r_cutoff = 5
    gaussian_centers = np.linspace(0, r_cutoff + 1, nfeat_bond)
    gaussian_width = 0.5
    distance_converter = GaussianDistance(gaussian_centers, gaussian_width)
    graph_converter = CrystalGraph(bond_converter=distance_converter, cutoff=r_cutoff)
    model = MEGNetModel(nfeat_bond, nfeat_global, graph_converter=graph_converter)

    # model training
    model.train(structures, energies, epochs=epochs)

    model.save_model(outfile)
Example #2
0
 def test_crystal_model_v2(self):
     cg = CrystalGraph()
     s = Structure(Lattice.cubic(3), ['Si'], [[0, 0, 0]])
     with ScratchDir('.'):
         model = MEGNetModel(nfeat_edge=None,
                             nfeat_global=2,
                             nblocks=1,
                             lr=1e-2,
                             n1=4,
                             n2=4,
                             n3=4,
                             npass=1,
                             ntarget=1,
                             graph_converter=cg,
                             centers=np.linspace(0, 4, 10),
                             width=0.5)
         model = model.train([s, s], [0.1, 0.1], epochs=2)
         t = model.predict_structure(s)
         self.assertTrue(t.shape == (1, ))
Example #3
0
Xtrain = inputs.iloc[0:boundary]['structure']
ytrain = inputs.iloc[0:boundary]['band_gap']

Xtest = inputs.iloc[boundary:]['structure']
ytest = inputs.iloc[boundary:]['band_gap']

nfeat_bond = 10
nfeat_global = 2
r_cutoff = 5
gaussian_centers = np.linspace(0, 5, 10)
gaussian_width = 0.5
distance_convertor = GaussianDistance(gaussian_centers, gaussian_width)
bond_convertor = CrystalGraph(bond_convertor=distance_convertor,
                              cutoff=r_cutoff)
graph_convertor = CrystalGraph(
    bond_convertor=GaussianDistance(np.linspace(0, 5, 10), 0.5))
model = MEGNetModel(nfeat_bond, nfeat_global, graph_convertor=graph_convertor)

model.from_file('fitted_gap_model.hdf5')

model.train(Xtrain,
            ytrain,
            epochs=epochs,
            batch_size=batch_size,
            validation_structures=Xtest,
            validation_targets=ytest,
            scrub_failed_structures=True)

model.save_model('fitted_gap_model.hdf5')
Example #4
0
model = MEGNetModel(10, 2, nblocks=1, lr=1e-4,
        n1=4, n2=4, n3=4, npass=1, ntarget=1,
        graph_converter=CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 10), 0.5)))


ep = 5000
callback = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, restore_best_weights=True)

for s in test_structures:
    test_input.append(model.graph_converter.graph_to_input(model.graph_converter.convert(s)))

if training_mode == 0: # PBE -> HSE ... -> part EXP, one by one
    idx = 0
    for i in range(len(data_size)):
        model.train(structures[idx:idx+data_size[i]], targets[idx:idx+data_size[i]], epochs=ep)
        idx += data_size[i]
        prediction(model)
elif training_mode == 1: # all training set together
    model.train(structures, targets, epochs=ep*len(data_size))
    prediction(model)
elif training_mode == 2: # only part EXP
    model.train(structures[sum(data_size[0:len(data_size)-1]):], targets[sum(data_size[0:len(data_size)-1]):], epochs=ep*len(data_size))
    prediction(model)
elif training_mode == 3: # all -> all-PBE -> all-PBE-HSE -> ... -> part EXP
    idx = 0
    for i in range(len(data_size)):
        model.train(structures[idx:], targets[idx:], epochs=ep)
        idx += data_size[i]
        prediction(model)
elif training_mode == 4: # use E1 as validation dataset, P -> H -> G -> S one by one
from megnet.callbacks import ReduceLRUponNan, ManualStop, XiaotongCB

import numpy as np

gc = CrystalGraph(bond_converter=GaussianDistance(
        np.linspace(0, 5, 100), 0.5), cutoff=4)
model = MEGNetModel(100, 2, graph_converter=gc, lr=1e-4, loss=examine_loss) # , metrics=[examine_loss])
INTENSIVE = False # U0 is an extensive quantity
scaler = StandardScaler.from_training_data(structures, targets, is_intensive=INTENSIVE)
model.target_scaler = scaler

# callbacks = [ReduceLRUponNan(patience=500), ManualStop(), XiaotongCB()]

# change structures to megnet predictable structures
mp_strs = []

train_graphs, train_targets = model.get_all_graphs_targets(structures, targets)
train_nb_atoms = [len(i['atom']) for i in train_graphs]
train_targets = [model.target_scaler.transform(i, j) for i, j in zip(train_targets, train_nb_atoms)]


for s in structures:
    mp_strs.append(model.graph_converter.graph_to_input(model.graph_converter.convert(s)))

callbacks = [ManualStop(), XiaotongCB((mp_strs, train_targets), commit_id)]

model.train(structures, targets, epochs=50, verbose=2, callbacks=callbacks)

print('finish..')
Example #6
0
def main() -> None:
    """Execute main script."""
    parser = ArgumentParser()
    parser.add_argument(
        "--train",
        action="store_true",
        help="Whether to train the model.",
        dest="do_train",
    )
    parser.add_argument(
        "--eval",
        action="store_true",
        help="Whether to evaluate the model.",
        dest="do_eval",
    )
    parser.add_argument(
        "--which",
        choices=["MEGNet", "VGP", "ProbNN"],
        required=("--train" in sys.argv),
        help=(
            "Which components to train: "
            "MEGNet -- Just the MEGNetModel; "
            "VGP -- Just the VGP part of the ProbNN; "
            "ProbNN -- The whole ProbNN."
        ),
        dest="which",
    )
    parser.add_argument(
        "--epochs",
        "-n",
        type=int,
        required=("--train" in sys.argv),
        help="Number of training epochs.",
        dest="epochs",
    )
    parser.add_argument(
        "--inducing",
        "-i",
        type=int,
        help="Number of inducing index points.",
        default=500,
        dest="num_inducing",
    )
    args = parser.parse_args()

    do_train: bool = args.do_train
    do_eval: bool = args.do_eval
    which_model: str = args.which
    epochs: int = args.epochs
    num_inducing: int = args.num_inducing

    # Load the MEGNetModel into memory
    try:
        meg_model: MEGNetModel = MEGNetModel.from_file(str(MEGNET_MODEL_DIR))
    except FileNotFoundError:
        meg_model = MEGNetModel(**default_megnet_config())

    # Load the data into memory
    df = download_data(PHONONS_URL, PHONONS_SAVE_DIR)
    structures = df["structure"]
    targets = df["last phdos peak"]
    num_data = len(structures)
    print(f"{num_data} datapoints loaded.")

    num_training = floor(num_data * TRAINING_RATIO)
    print(f"{num_training} training data, {num_data-num_training} test data.")
    train_structs = structures[:num_training]
    train_targets = targets[:num_training]
    test_structs = structures[num_training:]
    test_targets = targets[num_training:]

    if which_model == "MEGNet":
        if do_train:
            tf_callback = TensorBoard(MEGNET_LOGS / NOW, write_graph=False)
            meg_model.train(
                train_structs,
                train_targets,
                test_structs,
                test_targets,
                automatic_correction=False,
                dirname="meg_checkpoints",
                epochs=epochs,
                callbacks=[tf_callback],
                verbose=VERBOSITY,
            )
            meg_model.save_model(str(MEGNET_MODEL_DIR))
        if do_eval:
            train_predicted = meg_model.predict_structures(train_structs).flatten()
            train_mae = MAE(train_predicted, None, train_targets)
            metric_logger.info("MEGNet train MAE = %f", train_mae)

            test_predicted = meg_model.predict_structures(test_structs).flatten()
            test_mae = MAE(test_predicted, None, test_targets)
            metric_logger.info("MEGNet test MAE = %f", test_mae)
    else:
        # Load the ProbNN into memory
        try:
            prob_model: MEGNetProbModel = MEGNetProbModel.load(PROB_MODEL_DIR)
        except FileNotFoundError:
            prob_model = MEGNetProbModel(meg_model, num_inducing, metrics=["MAE"])

        if do_train:
            if which_model == "VGP":
                prob_model.set_frozen("NN", recompile=False)
                prob_model.set_frozen(["VGP", "Norm"], freeze=False)
                tf_callback = TensorBoard(VGP_LOGS / NOW, write_graph=False)
            else:
                prob_model.set_frozen(["VGP", "NN", "Norm"], freeze=False)
                tf_callback = TensorBoard(FULL_MODEL_LOGS / NOW, write_graph=False)
            prob_model.train(
                train_structs,
                train_targets,
                epochs,
                test_structs,
                test_targets,
                callbacks=[tf_callback],
                verbose=VERBOSITY,
            )
            prob_model.save(PROB_MODEL_DIR)
        if do_eval:
            train_metrics = evaluate_uq_metrics(
                prob_model, train_structs, train_targets
            )
            log_metrics(train_metrics, "training")
            test_metrics = evaluate_uq_metrics(prob_model, test_structs, test_targets)
            log_metrics(test_metrics, "test")
test_structures = structures[80:]
train_targets = targets[:80]
test_targets = targets[80:]

from megnet.models import MEGNetModel
from megnet.data.graph import GaussianDistance
from megnet.data.crystal import CrystalGraph
from megnet.utils.preprocessing import StandardScaler
import numpy as np

gc = CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 100), 0.5),
                  cutoff=4)
model = MEGNetModel(100, 2, graph_converter=gc, lr=1e-3)

INTENSIVE = False  # U0 is an extensive quantity
scaler = StandardScaler.from_training_data(train_structures,
                                           train_targets,
                                           is_intensive=INTENSIVE)
model.target_scaler = scaler

model.train(train_structures, train_targets, epochs=500, verbose=2)

predicted_tests = []
for i in test_structures:
    predicted_tests.append(model.predict_structure(i).ravel()[0])

print(type(test_targets), type(predicted_tests))

for i in range(10):
    print(test_targets[i], predicted_tests[i])
    MAE /= test_size
    print('MAE is:', MAE)

train_s = Q1_s + Q2_s
train_t = Q1_t + Q2_t

gc = CrystalGraph(bond_converter=GaussianDistance(
        np.linspace(0, 5, 100), 0.5), cutoff=4)
model = MEGNetModel(100, 2, graph_converter=gc, lr=1e-3)
INTENSIVE = False # U0 is an extensive quantity
scaler = StandardScaler.from_training_data(train_s, train_t, is_intensive=INTENSIVE)
model.target_scaler = scaler

callback = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, restore_best_weights=True)

idx = int(0.8 * len(train_s))


model.train(train_s[:idx], train_t[:idx],
        validation_structures=train_s[idx:],
        validation_targets=train_t[idx:],
        callbacks=[callback],
        epochs=1000,
        save_checkpoint=False,
        automatic_correction=False)


print('Training finish..')

predict(model)