def setUpClass(cls): cls.n_feature = 3 cls.n_bond_features = 10 cls.n_global_features = 2 class Generator(Sequence): def __init__(self, x, y): self.x = x self.y = y def __len__(self): return 10 def __getitem__(self, index): return self.x, self.y x_crystal = [np.array([1, 2, 3, 4]).reshape((1, -1)), np.random.normal(size=(1, 6, cls.n_bond_features)), np.random.normal(size=(1, 2, cls.n_global_features)), np.array([[0, 0, 1, 1, 2, 3]]), np.array([[1, 1, 0, 0, 3, 2]]), np.array([[0, 0, 1, 1]]), np.array([[0, 0, 0, 0, 1, 1]]), ] y = np.random.normal(size=(1, 2, 1)) cls.train_gen_crystal = Generator(x_crystal, y) x_mol = [np.random.normal(size=(1, 4, cls.n_feature)), np.random.normal(size=(1, 6, cls.n_bond_features)), np.random.normal(size=(1, 2, cls.n_global_features)), np.array([[0, 0, 1, 1, 2, 3]]), np.array([[1, 1, 0, 0, 3, 2]]), np.array([[0, 0, 1, 1]]), np.array([[0, 0, 0, 0, 1, 1]]), ] y = np.random.normal(size=(1, 2, 1)) cls.train_gen_mol = Generator(x_mol, y) cls.model = MEGNetModel(10, 2, nblocks=1, lr=1e-2, n1=4, n2=4, n3=4, npass=1, ntarget=1, graph_converter=CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 10), 0.5)), ) cls.model2 = MEGNetModel(10, 2, nblocks=1, lr=1e-2, n1=4, n2=4, n3=4, npass=1, ntarget=2, graph_converter=CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 10), 0.5)), )
def setUpClass(cls): cls.s = Structure.from_spacegroup('Fm-3m', Lattice.cubic(5.69169), ['Na', 'Cl'], [[0, 0, 0], [0, 0, 0.5]]) cls.dummy_model = MEGNetModel(100, 2, nblocks=1, n1=4, n2=2, n3=2, npass=1)
def test_check_dimension(self): gc = CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 20), 0.5)) s = Structure(Lattice.cubic(3), ['Si'], [[0, 0, 0]]) graph = gc.convert(s) model = MEGNetModel(10, 2, nblocks=1, lr=1e-2, n1=4, n2=4, n3=4, npass=1, ntarget=1, graph_converter=CrystalGraph(bond_converter=gc), ) with self.assertRaises(Exception) as context: model.check_dimension(graph) self.assertTrue('The data dimension for bond' in str(context.exception))
def setUpClass(cls): cls.n_feature = 3 cls.n_bond_features = 10 cls.n_global_features = 2 def generator(x, y): while True: yield x, y x_crystal = [ np.array([1, 2, 3, 4]).reshape((1, -1)), np.random.normal(size=(1, 6, cls.n_bond_features)), np.random.normal(size=(1, 2, cls.n_global_features)), np.array([[0, 0, 1, 1, 2, 3]]), np.array([[1, 1, 0, 0, 3, 2]]), np.array([[0, 0, 1, 1]]), np.array([[0, 0, 0, 0, 1, 1]]), ] y = np.random.normal(size=(1, 2, 1)) cls.train_gen_crystal = generator(x_crystal, y) x_mol = [ np.random.normal(size=(1, 4, cls.n_feature)), np.random.normal(size=(1, 6, cls.n_bond_features)), np.random.normal(size=(1, 2, cls.n_global_features)), np.array([[0, 0, 1, 1, 2, 3]]), np.array([[1, 1, 0, 0, 3, 2]]), np.array([[0, 0, 1, 1]]), np.array([[0, 0, 0, 0, 1, 1]]), ] y = np.random.normal(size=(1, 2, 1)) cls.train_gen_mol = generator(x_mol, y) cls.model = MEGNetModel( 10, 2, nblocks=1, lr=1e-2, n1=4, n2=4, n3=4, npass=1, ntarget=1, graph_convertor=CrystalGraph( bond_convertor=GaussianDistance(np.linspace(0, 5, 10), 0.5)), )
def test_crystal_model_v2(self): cg = CrystalGraph() s = Structure(Lattice.cubic(3), ['Si'], [[0, 0, 0]]) with ScratchDir('.'): model = MEGNetModel(nfeat_edge=None, nfeat_global=2, nblocks=1, lr=1e-2, n1=4, n2=4, n3=4, npass=1, ntarget=1, graph_converter=cg, centers=np.linspace(0, 4, 10), width=0.5) model = model.train([s, s], [0.1, 0.1], epochs=2) t = model.predict_structure(s) self.assertTrue(t.shape == (1, ))
def prepare_model_megnet(individuals, epochs, outfile, excl=[]): # prepares model file # prepares Megnet model based on list of individuals # uses total energy per atom # excl - excluding particular stoichiometry - important for network learning structures = [] energies = [] adapt = AseAtomsAdaptor() empty = 0 if not excl: empty = 1 i = 0 for ind in individuals: struct_ase = ind.get_init_structure() chem_sym = struct_ase.get_chemical_symbols() e_tot = ind.e_tot struct_pymatgen = adapt.get_structure(struct_ase) flag = 1 if empty == 0 and chem_sym == excl: flag = 0 if flag == 1: structures.append(struct_pymatgen) energies.append(e_tot) i = i + 1 print("read data of " + str(i) + " structures total") # standard vales as taken from Megnet manual nfeat_bond = 100 nfeat_global = 2 r_cutoff = 5 gaussian_centers = np.linspace(0, r_cutoff + 1, nfeat_bond) gaussian_width = 0.5 distance_converter = GaussianDistance(gaussian_centers, gaussian_width) graph_converter = CrystalGraph(bond_converter=distance_converter, cutoff=r_cutoff) model = MEGNetModel(nfeat_bond, nfeat_global, graph_converter=graph_converter) # model training model.train(structures, energies, epochs=epochs) model.save_model(outfile)
## Set GPU os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 2. Model construction ## Graph converter crystal_graph = CrystalGraph(bond_converter=GaussianDistance( centers=np.linspace(0, 6, 100), width=0.5), cutoff=5.0) ## model setup model = MEGNetModel( nfeat_edge=100, nfeat_global=None, ngvocal=len(TRAIN_FIDELITIES), global_embedding_dim=16, nblocks=3, nvocal=95, npass=2, graph_converter=crystal_graph, lr=1e-3, ) # 3. Data loading and processing ## load data ## Structure data for all materials project materials if not os.path.isfile("mp.2019.04.01.json"): raise RuntimeError( "Please download the data first! Use runall.sh in this directory if needed." )
###### megnet example hyper-parameters from megnet.models import MEGNetModel from megnet.data.graph import GaussianDistance from megnet.data.crystal import CrystalGraph import numpy as np nfeat_bond = 100 nfeat_global = 2 r_cutoff = 5 gaussian_centers = np.linspace(0, r_cutoff + 1, nfeat_bond) gaussian_width = 0.5 distance_converter = GaussianDistance(gaussian_centers, gaussian_width) graph_converter = CrystalGraph(bond_converter=distance_converter, cutoff=r_cutoff) model = MEGNetModel(nfeat_bond, nfeat_global, graph_converter=graph_converter) ######################################### def cvt_fmt_graph(rows): structures = [] props = [] for row in rows: structures.append( pymatgen_io_ase.AseAtomsAdaptor.get_structure(row.toatoms())) props.append(row.data[predict_item] / 100) # props.append(abs(row.data[predict_item]/10)) graphs_valid = [] targets_valid = [] structures_invalid = []
if abs(e) > cut_value: targets[it][i] = prdc # targets[i] = (model.predict_structure(structures[i]).ravel() + targets[i])/2 logging.info('Data count: {dc}, std orig dft value: {std_orig}, std of model output: {std_model}'.format( dc=len(targets_lst), std_orig=np.std(targets_lst), std_model=np.std(prediction_lst))) logging.info('Data count: {dc}, Mean orig: {mean_orig}, Mean_model: {mean_model}'.format( dc=len(targets_lst), mean_orig=np.mean(targets_lst), mean_model=np.mean(prediction_lst))) f = open(dump_model_name + '_'+ it + '.txt', 'wb') # to store and analyze the error pickle.dump(error_lst, f) f.close() # model = MEGNetModel(10, 2, nblocks=3, lr=1e-3, # n1=4, n2=4, n3=4, npass=1, ntarget=1, # graph_converter=CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 10), 0.5))) model = MEGNetModel(nfeat_edge=10, nfeat_global=2, graph_converter=CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 10), 0.5))) model.save_model(dump_model_name+'_1by1_init_randomly' + '.hdf5') init_model_tag = 'EGPHS' ep = 5000 callback = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True) for s in test_structures: test_input.append(model.graph_converter.graph_to_input(model.graph_converter.convert(s))) db_short_full_dict = {'G': 'gllb-sc', 'H': 'hse', 'S': 'scan', 'P': 'pbe', 'E': 'E1'} def construct_dataset_from_str(db_short_str): s = [] t = [] for i in range(len(db_short_str)):
e = (model.predict_structure(structures[i]).ravel() - targets[i]) ME += e error_lst.append(e) if abs(e) > 0.5: targets[i] = model.predict_structure(structures[i]).ravel() # targets[i] = (model.predict_structure(structures[i]).ravel() + targets[i])/2 ME /= sz f = open(str(sz) + 'txt', 'wb') pickle.dump(error_lst, f) f.close() # for i in range(idx, idx + sz): # targets[i] += ME idx += sz model = MEGNetModel(10, 2, nblocks=1, lr=1e-4, n1=4, n2=4, n3=4, npass=1, ntarget=1, graph_converter=CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 10), 0.5))) ep = 5000 callback = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, restore_best_weights=True) for s in test_structures: test_input.append(model.graph_converter.graph_to_input(model.graph_converter.convert(s))) if training_mode == 0: # PBE -> HSE ... -> part EXP, one by one idx = 0 for i in range(len(data_size)): model.train(structures[idx:idx+data_size[i]], targets[idx:idx+data_size[i]], epochs=ep) idx += data_size[i] prediction(model)
return result # === megnet start === # from megnet.models import MEGNetModel from megnet.data.graph import GaussianDistance from megnet.data.crystal import CrystalGraph from megnet.utils.preprocessing import StandardScaler from megnet.callbacks import ReduceLRUponNan, ManualStop, XiaotongCB import numpy as np gc = CrystalGraph(bond_converter=GaussianDistance( np.linspace(0, 5, 100), 0.5), cutoff=4) model = MEGNetModel(100, 2, graph_converter=gc, lr=1e-4, loss=examine_loss) # , metrics=[examine_loss]) INTENSIVE = False # U0 is an extensive quantity scaler = StandardScaler.from_training_data(structures, targets, is_intensive=INTENSIVE) model.target_scaler = scaler # callbacks = [ReduceLRUponNan(patience=500), ManualStop(), XiaotongCB()] # change structures to megnet predictable structures mp_strs = [] train_graphs, train_targets = model.get_all_graphs_targets(structures, targets) train_nb_atoms = [len(i['atom']) for i in train_graphs] train_targets = [model.target_scaler.transform(i, j) for i, j in zip(train_targets, train_nb_atoms)] for s in structures:
test_targets.append(t_exp[i]) # r = list(range(len(list(d['disordered_exp'].keys())))) # for i in r: # s_exp_disordered[i].remove_oxidation_states() # test_structures.append(s_exp_disordered[i]) # test_targets.append(t_exp_disordered[i]) model = MEGNetModel.from_file(old_model_name) model.summary() embed = model.get_weights()[0] print(model.get_weights()[0].shape) model_new = MEGNetModel( nfeat_edge=10, nfeat_global=2, graph_converter=CrystalGraph( bond_converter=GaussianDistance(np.linspace(0, 5, 10), 0.5))) model_new.summary() # model_new.set_weights(model.get_weights()[0:]) # prediction(model_new) model = MEGNetModel( nfeat_edge=100, nfeat_node=16, ngvocal=4, global_embedding_dim=16, graph_converter=CrystalGraphDisordered( bond_converter=GaussianDistance(np.linspace(0, 5, 100), 0.5))) model.summary()
def train(): # Parse args args = parse_args() radius = args.radius n_works = args.n_works warm_start = args.warm_start output_path = args.output_path graph_file = args.graph_file prop_col = args.property learning_rate = args.learning_rate embedding_file = args.embedding_file k_folds = list(map(int, args.k_folds.split(","))) print("args is : {}".format(args)) print("Local devices are : {}, \n\n Available gpus are : {}".format( device_lib.list_local_devices(), K.tensorflow_backend._get_available_gpus())) # prepare output path if not os.path.exists(output_path): os.makedirs(output_path, exist_ok=True) # Get a crystal graph with cutoff radius A cg = CrystalGraph( bond_convertor=GaussianDistance(np.linspace(0, radius + 1, 100), 0.5), cutoff=radius, ) if graph_file is not None: # load graph data with gzip.open(graph_file, "rb") as f: valid_graph_dict = pickle.load(f) idx_list = list(range(len(valid_graph_dict))) valid_idx_list = [ idx for idx, graph in valid_graph_dict.items() if graph is not None ] else: # load structure data with gzip.open(args.input_file, "rb") as f: df = pd.DataFrame(pickle.load(f))[["structure", prop_col]] idx_list = list(range(len(df))) # load embedding data for transfer learning if embedding_file is not None: with open(embedding_file) as json_file: embedding_data = json.load(json_file) # Calculate and save valid graphs valid_idx_list = list() valid_graph_dict = dict() for idx in idx_list: try: graph = cg.convert(df["structure"].iloc[idx]) if embedding_file is not None: graph["atom"] = [embedding_data[i] for i in graph["atom"]] valid_graph_dict[idx] = { "graph": graph, "target": df[prop_col].iloc[idx], } valid_idx_list.append(idx) except RuntimeError: valid_graph_dict[idx] = None # Save graphs with gzip.open(os.path.join(output_path, "graphs.pkl.gzip"), "wb") as f: pickle.dump(valid_graph_dict, f) # Split data kf = KFold(n_splits=args.cv, random_state=18012019, shuffle=True) for fold, (train_val_idx, test_idx) in enumerate(kf.split(idx_list)): print(fold) if fold not in k_folds: continue fold_output_path = os.path.join(output_path, "kfold_{}".format(fold)) fold_model_path = os.path.join(fold_output_path, "model") if not os.path.exists(fold_model_path): os.makedirs(fold_model_path, exist_ok=True) train_idx, val_idx = train_test_split(train_val_idx, test_size=0.25, random_state=18012019, shuffle=True) # Calculate valid train validation test ids and save it valid_train_idx = sorted(list(set(train_idx) & (set(valid_idx_list)))) valid_val_idx = sorted(list(set(val_idx) & (set(valid_idx_list)))) valid_test_idx = sorted(list(set(test_idx) & (set(valid_idx_list)))) np.save(os.path.join(fold_output_path, "train_idx.npy"), valid_train_idx) np.save(os.path.join(fold_output_path, "val_idx.npy"), valid_val_idx) np.save(os.path.join(fold_output_path, "test_idx.npy"), valid_test_idx) # Prepare training graphs train_graphs = [valid_graph_dict[i]["graph"] for i in valid_train_idx] train_targets = [ valid_graph_dict[i]["target"] for i in valid_train_idx ] # Prepare validation graphs val_graphs = [valid_graph_dict[i]["graph"] for i in valid_val_idx] val_targets = [valid_graph_dict[i]["target"] for i in valid_val_idx] # Normalize targets or not if args.normalize: y_scaler = StandardScaler() train_targets = y_scaler.fit_transform( np.array(train_targets).reshape(-1, 1)).ravel() val_targets = y_scaler.transform( np.array(val_targets).reshape((-1, 1))).ravel() else: y_scaler = None # Initialize model if warm_start is None: # Set up model if learning_rate is None: learning_rate = 1e-3 model = MEGNetModel( 100, 2, nblocks=args.n_blocks, nvocal=95, npass=args.n_pass, lr=learning_rate, loss=args.loss, graph_convertor=cg, is_classification=True if args.type == "classification" else False, nfeat_node=None if embedding_file is None else 16, ) initial_epoch = 0 else: # Model file model_list = [ m_file for m_file in os.listdir( os.path.join(warm_start, "kfold_{}".format(fold), "model")) if m_file.endswith(".hdf5") ] if args.type == "classification": model_list.sort( key=lambda m_file: float( m_file.split("_")[3].replace(".hdf5", "")), reverse=False, ) else: model_list.sort( key=lambda m_file: float( m_file.split("_")[3].replace(".hdf5", "")), reverse=True, ) model_file = os.path.join(warm_start, "kfold_{}".format(fold), "model", model_list[-1]) # Load model from file if learning_rate is None: full_model = load_model( model_file, custom_objects={ "softplus2": softplus2, "Set2Set": Set2Set, "mean_squared_error_with_scale": mean_squared_error_with_scale, "MEGNetLayer": MEGNetLayer, }, ) learning_rate = K.get_value(full_model.optimizer.lr) # Set up model model = MEGNetModel( 100, 2, nblocks=args.n_blocks, nvocal=95, npass=args.n_pass, lr=learning_rate, loss=args.loss, graph_convertor=cg, is_classification=True if args.type == "classification" else False, nfeat_node=None if embedding_file is None else 16, ) model.load_weights(model_file) initial_epoch = int(model_list[-1].split("_")[2]) print("warm start from : {}, \nlearning_rate is {}.".format( model_file, learning_rate)) # Train model.train_from_graphs( train_graphs, train_targets, val_graphs, val_targets, batch_size=args.batch_size, epochs=args.max_epochs, verbose=2, initial_epoch=initial_epoch, use_multiprocessing=False if n_works <= 1 else True, workers=n_works, dirname=fold_model_path, y_scaler=y_scaler, save_best_only=args.save_best_only, )
def main() -> None: """Execute main script.""" parser = ArgumentParser() parser.add_argument( "--train", action="store_true", help="Whether to train the model.", dest="do_train", ) parser.add_argument( "--eval", action="store_true", help="Whether to evaluate the model.", dest="do_eval", ) parser.add_argument( "--which", choices=["MEGNet", "VGP", "ProbNN"], required=("--train" in sys.argv), help=( "Which components to train: " "MEGNet -- Just the MEGNetModel; " "VGP -- Just the VGP part of the ProbNN; " "ProbNN -- The whole ProbNN." ), dest="which", ) parser.add_argument( "--epochs", "-n", type=int, required=("--train" in sys.argv), help="Number of training epochs.", dest="epochs", ) parser.add_argument( "--inducing", "-i", type=int, help="Number of inducing index points.", default=500, dest="num_inducing", ) args = parser.parse_args() do_train: bool = args.do_train do_eval: bool = args.do_eval which_model: str = args.which epochs: int = args.epochs num_inducing: int = args.num_inducing # Load the MEGNetModel into memory try: meg_model: MEGNetModel = MEGNetModel.from_file(str(MEGNET_MODEL_DIR)) except FileNotFoundError: meg_model = MEGNetModel(**default_megnet_config()) # Load the data into memory df = download_data(PHONONS_URL, PHONONS_SAVE_DIR) structures = df["structure"] targets = df["last phdos peak"] num_data = len(structures) print(f"{num_data} datapoints loaded.") num_training = floor(num_data * TRAINING_RATIO) print(f"{num_training} training data, {num_data-num_training} test data.") train_structs = structures[:num_training] train_targets = targets[:num_training] test_structs = structures[num_training:] test_targets = targets[num_training:] if which_model == "MEGNet": if do_train: tf_callback = TensorBoard(MEGNET_LOGS / NOW, write_graph=False) meg_model.train( train_structs, train_targets, test_structs, test_targets, automatic_correction=False, dirname="meg_checkpoints", epochs=epochs, callbacks=[tf_callback], verbose=VERBOSITY, ) meg_model.save_model(str(MEGNET_MODEL_DIR)) if do_eval: train_predicted = meg_model.predict_structures(train_structs).flatten() train_mae = MAE(train_predicted, None, train_targets) metric_logger.info("MEGNet train MAE = %f", train_mae) test_predicted = meg_model.predict_structures(test_structs).flatten() test_mae = MAE(test_predicted, None, test_targets) metric_logger.info("MEGNet test MAE = %f", test_mae) else: # Load the ProbNN into memory try: prob_model: MEGNetProbModel = MEGNetProbModel.load(PROB_MODEL_DIR) except FileNotFoundError: prob_model = MEGNetProbModel(meg_model, num_inducing, metrics=["MAE"]) if do_train: if which_model == "VGP": prob_model.set_frozen("NN", recompile=False) prob_model.set_frozen(["VGP", "Norm"], freeze=False) tf_callback = TensorBoard(VGP_LOGS / NOW, write_graph=False) else: prob_model.set_frozen(["VGP", "NN", "Norm"], freeze=False) tf_callback = TensorBoard(FULL_MODEL_LOGS / NOW, write_graph=False) prob_model.train( train_structs, train_targets, epochs, test_structs, test_targets, callbacks=[tf_callback], verbose=VERBOSITY, ) prob_model.save(PROB_MODEL_DIR) if do_eval: train_metrics = evaluate_uq_metrics( prob_model, train_structs, train_targets ) log_metrics(train_metrics, "training") test_metrics = evaluate_uq_metrics(prob_model, test_structs, test_targets) log_metrics(test_metrics, "test")
def megnet_input(prop, ZeroVals, bond, nfeat_global, cutoff, width, *fraction): """ megnet_input(prop, ZeroVals, bond, nfeat_global, cutoff, width, *fraction) Extracts valid structures and targets and splits them into user specified datsets. Inputs: prop- Optical property of interest. ZeroVals- Exclude/Include zero optical property values. bond- MEGNet feature bond. nfeat_global- MEGNet feature global. cutoff- MEGNet MEGNet radial cutoff. width- MEGNet gaussian width. *fraction- Fraction of data to split into training and validation sets. Passing an extra argument to split data based on quantity is permissible. Outputs: 1- Featurised structures for training with MEGNet. 2- Valid structures and targets. 3- Inputs for extraction of activations. 4- Pool, test, training and validation sets. """ logging.info("Get graph inputs to MEGNet ...") print("Bond features = ", bond) print("Global features = ", nfeat_global) print("Radial cutoff = ", cutoff) print("Gaussian width = ", width) gaussian_centers = np.linspace(0, cutoff, bond) distance_converter = GaussianDistance(gaussian_centers, width) graph_converter = CrystalGraph(bond_converter=distance_converter) model = MEGNetModel(bond, nfeat_global, graph_converter=graph_converter) datafile = "%s_data.pkl" % prop inputs = pd.read_pickle(datafile) print("\nNumber of input entries found for %s data = %s" % (prop, len(inputs))) if ZeroVals == False: logging.info( "Excluding zero optical property values from the dataset ...") mask = np.array( [i for i, val in enumerate(inputs[prop]) if abs(val) == 0.]) structures = np.delete(inputs["structure"].to_numpy(), mask) targets = np.delete(inputs[prop].to_numpy(), mask) print("Remaining number of entries = %s" % len(targets)) else: logging.info("Zero optical property values will be included ...") structures = inputs["structure"].to_numpy() targets = inputs[prop].to_numpy() # Get the valid structures and targets i.e exclude isolated atoms logging.info("Obtaining valid structures and targets ...") valid_structures = [] valid_targets = [] activations_input_full = [] for s, t in zip(structures, targets): try: activations_input_full.append( StructureGraph.get_input(graph_converter, s)) except: print("Skipping structure with isolated atom ...") continue valid_structures.append(s) valid_targets.append(t) print("Number of invalid structures = %s" % (len(targets) - len(valid_targets))) print("\nTotal number of entries available for analysis = %s" % len(valid_targets)) pool_frac = fraction[0][0] if len(fraction) == 1: if (fraction[0][0] + fraction[0][1]) == 1.: # For train-test split and k-fold cross-validation test_frac = fraction[0][1] logging.info("The pool is the same as the training set ...") print("Requested pool: %s%%" % (pool_frac * 100)) print("Requested test set: %s%%" % (test_frac * 100)) # Data split is based on percentages pool_boundary = int(len(valid_targets) * pool_frac) Xpool = np.array(valid_structures[0:pool_boundary]) ypool = np.array(valid_targets[0:pool_boundary]) Xtest = np.array(valid_structures[pool_boundary:]) ytest = np.array(valid_targets[pool_boundary:]) logging.info("The pool is the same as the training set ...") print("Pool:", ypool.shape) print("Test set:", ytest.shape) return (model, activations_input_full, valid_structures, valid_targets, Xpool, ypool, Xtest, ytest) elif (fraction[0][0] + fraction[0][1]) < 1.: # For repeat active learning val_frac = fraction[0][1] test_frac = np.round(1 - pool_frac, decimals=2) pool_boundary = int(len(valid_targets) * pool_frac) Xpool = np.array(valid_structures[0:pool_boundary]) ypool = np.array(valid_targets[0:pool_boundary]) Xtest = np.array(valid_structures[pool_boundary:]) ytest = np.array(valid_targets[pool_boundary:]) val_boundary = int(pool_boundary * val_frac) Xtrain = Xpool[:-val_boundary] ytrain = ypool[:-val_boundary] Xval = Xpool[-val_boundary:] yval = ypool[-val_boundary:] print("Requested validation set: %s%% of pool" % (val_frac * 100)) print("Training set:", ytrain.shape) print("Validation set:", yval.shape) print("Test set:", ytest.shape) return (model, activations_input_full, valid_structures, valid_targets, Xpool, ypool, Xtest, ytest, Xtrain, ytrain, Xval, yval) else: return (model, activations_input_full, np.array(valid_structures), np.array(valid_targets))
for i in qm9_ids] # We are training U0 herea train_structures = structures[:80] test_structures = structures[80:] train_targets = targets[:80] test_targets = targets[80:] from megnet.models import MEGNetModel from megnet.data.graph import GaussianDistance from megnet.data.crystal import CrystalGraph from megnet.utils.preprocessing import StandardScaler import numpy as np gc = CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 100), 0.5), cutoff=4) model = MEGNetModel(100, 2, graph_converter=gc, lr=1e-3) INTENSIVE = False # U0 is an extensive quantity scaler = StandardScaler.from_training_data(train_structures, train_targets, is_intensive=INTENSIVE) model.target_scaler = scaler model.train(train_structures, train_targets, epochs=500, verbose=2) predicted_tests = [] for i in test_structures: predicted_tests.append(model.predict_structure(i).ravel()[0]) print(type(test_targets), type(predicted_tests))