def load_model(model_path): ''' This function is called once during initialisation. Any one-time loading that needs to be done should be done here. If nothing needs loading, this function still needs to exist but can just return immediately. Parameters: -model_path: string of the path to the model file ''' try: model = np.load(model_path) except: print( "Could not load model file. Please ensure it is of the right format (.npz)" ) sys.exit(2) try: global gdml gdml = GDMLPredict(model) except: print("Unable to read GDML model file.") sys.exit(2)
def load_mbGDML_model(self, path): n_mers = self.call_para('train_models', 'n_mers') if not os.path.isdir(path): print_error('Model path (-i) needs to be a directory for mbGDML models'\ f'. Path given: {path}') # find the files automatically based on the naming scheme # *1mer*npz, *2body*npz, *3body*npz ... *nbody*npz mb_gdmls = [None] * n_mers for file in glob.glob(f'{path}/*.npz'): a = re.match(".*(\d)body.*npz|.*(1)mer.*npz", file) if a is not None: n1, n2 = a.groups() if n1 is None: n = int(n2) - 1 else: n = int(n1) - 1 if n < n_mers: mb_gdmls[n] = GDMLPredict(np.load(file)) model = mbGDMLPredict(mb_gdmls) # # test # r = self.vars[1][:5] # _,a = model.predict(self.dataset['z'], r[0]) # _,b = model.predict(self.dataset['z'], r) # print( a - b[0]) return model, [] # preliminarily empty training indices array
def sgdml_path_predict_F(self, model_path, input_var, batch_size): from sgdml.predict import GDMLPredict N = len(input_var) n_batches = N // batch_size + 1 if n_batches > 999: width = 20 else: width = None npz = np.load(model_path) model = GDMLPredict(npz) message = f"Predicting {os.path.basename(model_path)} batches" predicts = [] start_time, eta = time.time(), 0 for i in range(n_batches): print_x_out_of_y_eta(message, i, n_batches, eta, width=width) R = input_var[i * batch_size:(i + 1) * batch_size] if len(R) == 0: break _, F = model.predict(R) predicts.append(F) avg_time = (time.time() - start_time) / (i + 1) eta = (n_batches - i + 1) * avg_time print_x_out_of_y_eta(message, n_batches, n_batches, time.time() - start_time, True, width=width) predicts = np.concatenate(predicts) return predicts
def load_model_sgdml(self, model_path): ''' This function is called once during initialisation. Any one-time loading that needs to be done should be done here. If nothing needs loading, this function still needs to exist but can just return immediately. Parameters: -model_path: string of the path to the model file ''' from sgdml.predict import GDMLPredict try: model = np.load(model_path) except: print_error( "Could not load model file. Please ensure it is of the right format (.npz). Aborted." ) try: gdml = GDMLPredict(model) self.gdml = gdml except: print_error("Unable to read GDML model file. Aborted.")
dataset['E'] = dataset['E'][1:25000] R = dataset['R'] n_dataset = R.shape[0] print(R.shape) F_mean = np.zeros(R.shape) model_dir, model_file_names = args.model_dir n_models = len(model_file_names) for i, model_file_name in enumerate(model_file_names): model_path = os.path.join(model_dir, model_file_name) print(model_path) model = np.load(model_path) gdml = GDMLPredict(model) _, F = gdml.predict(R.reshape(n_dataset, -1)) F_mean += F.reshape(n_dataset, -1, 3) F_mean /= n_dataset dataset = dict(dataset) dataset['F'] = F_mean dataset['theory'] = '{} {}'.format('mean_field_F ', dataset['theory']) dataset['md5'] = io.dataset_md5(dataset) np.savez_compressed('MEAN_' + dataset_path, **dataset)
def load_sgdml_model(self, path): a = np.load(path) training_indices = a["idxs_train"] m = GDMLPredict(a) return m, training_indices
def __init__( self, latency=1.0, name="", threaded=False, sGDML_model=None, pars=None, dopbc=False, ): """Initialises FFsGDML Args: sGDML_model: Filename contaning the sGDML model """ # a socket to the communication library is created or linked super(FFsGDML, self).__init__(latency, name, pars, dopbc, threaded=threaded) # --- Load sGDML package --- try: from sgdml.predict import GDMLPredict from sgdml import __version__ info(" @ForceField: Using sGDML version " + __version__, verbosity.low) except ImportError: raise ValueError( "ERROR: sGDML package not located. Install it via: pip install sgdml" ) # A bit weird to use keyword argument for a required argument, but this # is also done in the code above. if sGDML_model is None: raise ValueError("Must provide a sGDML model file.") if dopbc is True: raise ValueError("Must set PBCs to False.") self.sGDML_model = sGDML_model # --- Load sGDML model file. --- try: self.model = np.load(self.sGDML_model) info( " @ForceField: sGDML model " + self.sGDML_model + " loaded", verbosity.medium, ) except ValueError: raise ValueError( "ERROR: Reading sGDML model " + self.model + " file failed." ) if "r_unit" in self.model and "e_unit" in self.model: info( " @ForceField: The units used in your sGDML model are" + self.sGDML_model["r_unit"] + " and " + self.sGDML_model["r_unit"], verbosity.low, ) info( " @ForceField: IMPORTANT: It is always assumed that the units in" + " the provided model file are in Angstroms and kcal/mol.", verbosity.low, ) # --- Constants --- self.bohr_to_ang = 1.0 / UnitMap["length"]["angstrom"] self.kcalmol_to_hartree = UnitMap["energy"]["cal/mol"] * 1000.0 self.kcalmolang_to_hartreebohr = self.bohr_to_ang * self.kcalmol_to_hartree # --- Creates predictor --- self.predictor = GDMLPredict(self.model) info( " @ForceField: Optimizing parallelization settings for sGDML FF.", verbosity.medium, ) self.predictor.prepare_parallel(n_bulk=1)
class FFsGDML(ForceField): """A symmetric Gradient Domain Machine Learning (sGDML) force field. Chmiela et al. Sci. Adv., 3(5), e1603015, 2017; Nat. Commun., 9(1), 3887, 2018. http://sgdml.org/doc/ https://github.com/stefanch/sGDML """ def __init__( self, latency=1.0, name="", threaded=False, sGDML_model=None, pars=None, dopbc=False, ): """Initialises FFsGDML Args: sGDML_model: Filename contaning the sGDML model """ # a socket to the communication library is created or linked super(FFsGDML, self).__init__(latency, name, pars, dopbc, threaded=threaded) # --- Load sGDML package --- try: from sgdml.predict import GDMLPredict from sgdml import __version__ info(" @ForceField: Using sGDML version " + __version__, verbosity.low) except ImportError: raise ValueError( "ERROR: sGDML package not located. Install it via: pip install sgdml" ) # A bit weird to use keyword argument for a required argument, but this # is also done in the code above. if sGDML_model is None: raise ValueError("Must provide a sGDML model file.") if dopbc is True: raise ValueError("Must set PBCs to False.") self.sGDML_model = sGDML_model # --- Load sGDML model file. --- try: self.model = np.load(self.sGDML_model) info( " @ForceField: sGDML model " + self.sGDML_model + " loaded", verbosity.medium, ) except ValueError: raise ValueError( "ERROR: Reading sGDML model " + self.model + " file failed." ) if "r_unit" in self.model and "e_unit" in self.model: info( " @ForceField: The units used in your sGDML model are" + self.sGDML_model["r_unit"] + " and " + self.sGDML_model["r_unit"], verbosity.low, ) info( " @ForceField: IMPORTANT: It is always assumed that the units in" + " the provided model file are in Angstroms and kcal/mol.", verbosity.low, ) # --- Constants --- self.bohr_to_ang = 1.0 / UnitMap["length"]["angstrom"] self.kcalmol_to_hartree = UnitMap["energy"]["cal/mol"] * 1000.0 self.kcalmolang_to_hartreebohr = self.bohr_to_ang * self.kcalmol_to_hartree # --- Creates predictor --- self.predictor = GDMLPredict(self.model) info( " @ForceField: Optimizing parallelization settings for sGDML FF.", verbosity.medium, ) self.predictor.prepare_parallel(n_bulk=1) def poll(self): """Polls the forcefield checking if there are requests that should be answered, and if necessary evaluates the associated forces and energy.""" # we have to be thread-safe, as in multi-system mode this might get called by many threads at once with self._threadlock: for r in self.requests: if r["status"] == "Queued": r["status"] = "Running" self.evaluate(r) def evaluate(self, r): """ Evaluate the energy and forces. """ E, F = self.predictor.predict(r["pos"] * self.bohr_to_ang) r["result"] = [ E[0] * self.kcalmol_to_hartree, F.flatten() * self.kcalmolang_to_hartreebohr, np.zeros((3, 3), float), {"raw": ""}, ] r["status"] = "Done" r["t_finished"] = time.time()
import numpy as np import matplotlib.pyplot as plt from sgdml.predict import GDMLPredict from sgdml.utils import io # from mpl_toolkits.mplot3d import Axes3D ,_ = io.read_xyz('examples/geometries/aspirin.xyz') # 9 atoms print r.shape # (1,27) model = np.load('models/aspirin.npz') gdml = GDMLPredict(model) e,f = gdml.predict(r) print e.shape # (1,) print f.shape # (1,27) np.set_printoptions(threshold=np.inf) ASPRN = np.load('/Users/sGDML/data/aspirin/aspirin_test.npz') print(ASPRN['E']) e0 = ASPRN['E'] e0 = np.reshape(e0,(1,-1)) aspirin_delta_e=e-e0 NX = aspirin_delta_e.shape+1 #500+1 x = np.arange(1,NX) plt.scatter(x,aspirin_delta_e,c = 'r',marker = 'o') plt.xlabel("Number of Aspirin Molecule Geometries") plt.ylabel("Error of Energy Prediction") plt.title("Error of Energy Prediction with Aspirin in sGDML") plt.show()
def __init__(self, latency=1.0, name="", threaded=False, sGDML_model=None, pars=None, dopbc=False): """Initialises FFsGDML Args: sGDML_model: Filename contaning the sGDML model """ # a socket to the communication library is created or linked super(FFsGDML, self).__init__(latency, name, pars, dopbc, threaded=threaded) # --- Load sGDML package --- try: from sgdml.predict import GDMLPredict from sgdml import __version__ info(" @ForceField: Using sGDML version " + __version__, verbosity.low) except: raise ValueError( "ERROR: sGDML package not located. Install it using via:\n" "$ git clone https://github.com/stefanch/sGDML.git\n$ cd sGDML\n$ pip2 install -e ." ) if map(int, '0.4.4.dev1'.split('.')[:3]) > map( int, __version__.split('.')[:3]): raise ValueError( "ERROR: Version of sGDML not supported. Install a newer version (>= 0.4.4) or the " "latest developer version via:\n" "$ git clone https://github.com/stefanch/sGDML.git\n$ cd sGDML\n$ pip2 install -e ." ) # A bit weird to use keyword argument for a required argument, but this # is also done in the code above. if sGDML_model is None: raise ValueError("Must provide a sGDML model file.") if dopbc is True: raise ValueError("Must set PBCs to False.") self.sGDML_model = sGDML_model # --- Load sGDML model file. --- try: self.model = np.load(self.sGDML_model) info(" @ForceField: sGDML model " + self.sGDML_model + " loaded", verbosity.medium) except: raise ValueError("ERROR: Reading sGDML model " + self.sGDML_model + " file failed.") if "r_unit" in self.model and "e_unit" in self.model: info(" @ForceField: The units used in your sGDML model are"\ + self.sGDML_model["r_unit"] + " and "+ self.sGDML_model["r_unit"], verbosity.low) info(" @ForceField: IMPORTANT: It is always assumed that the units in"\ + " the provided model file are in Angstroms and kcal/mol.", verbosity.low) # --- Constants --- self.bohr_to_ang = 1. / UnitMap["length"]['angstrom'] self.kcalmol_to_hartree = UnitMap["energy"]['cal/mol'] * 1000. self.kcalmolang_to_hartreebohr = self.bohr_to_ang * self.kcalmol_to_hartree # --- Creates predictor --- self.predictor = GDMLPredict(self.model) info(" @ForceField: Optimizing parallelization settings for sGDML FF.", verbosity.medium) self.predictor.prepare_parallel(n_bulk=1)