def test_load_and_reload(self, all_gps, validation_env, multihyps): test_gp = all_gps[multihyps] test_gp.write_model('test_gp_write', 'pickle') new_gp = GaussianProcess.from_file('test_gp_write.pickle') for d in [0, 1, 2]: assert np.all( test_gp.predict(x_t=validation_env, d=d) == new_gp.predict( x_t=validation_env, d=d)) os.remove('test_gp_write.pickle') test_gp.write_model('test_gp_write', 'json') with open('test_gp_write.json', 'r') as f: new_gp = GaussianProcess.from_dict(json.loads(f.readline())) for d in [0, 1, 2]: assert np.all( test_gp.predict(x_t=validation_env, d=d) == new_gp.predict( x_t=validation_env, d=d)) os.remove('test_gp_write.json') with raises(ValueError): test_gp.write_model('test_gp_write', 'cucumber')
def test_serialization_method(two_body_gp, test_point): """ Serialize and then un-serialize a GP and ensure that no info was lost. Compare one calculation to ensure predictions work correctly. :param two_body_gp: :return: """ old_gp_dict = two_body_gp.as_dict() new_gp = GaussianProcess.from_dict(old_gp_dict) new_gp_dict = new_gp.as_dict() assert len(new_gp_dict) == len(old_gp_dict) for k1, k2 in zip(sorted(new_gp_dict.keys()), sorted(old_gp_dict.keys())): x = new_gp_dict[k1] y = new_gp_dict[k2] if isinstance(x, np.ndarray): assert np.equal(x, y).all() elif hasattr(x, '__len__'): if isinstance(x[0], np.ndarray): assert np.equal(x, y).all() else: for xx, yy in zip(x, y): assert xx == yy else: assert x == y for d in [0, 1, 2]: assert np.all( two_body_gp.predict(x_t=test_point, d=d) == new_gp.predict( x_t=test_point, d=d))
def test_load_and_reload(two_body_gp, test_point): two_body_gp.write_model('two_body', 'pickle') with open('two_body.pickle', 'rb') as f: new_gp = pickle.load(f) for d in [0, 1, 2]: assert np.all( two_body_gp.predict(x_t=test_point, d=d) == new_gp.predict( x_t=test_point, d=d)) os.remove('two_body.pickle') two_body_gp.write_model('two_body', 'json') with open('two_body.json', 'r') as f: new_gp = GaussianProcess.from_dict(json.loads(f.readline())) for d in [0, 1, 2]: assert np.all( two_body_gp.predict(x_t=test_point, d=d) == new_gp.predict( x_t=test_point, d=d)) os.remove('two_body.json') with raises(ValueError): two_body_gp.write_model('two_body', 'cucumber')
def make_gp( self, cell=None, call_no=None, hyps=None, init_gp=None, hyp_no=None, **kwargs, ): if "restart" in self.header and self.header["restart"] > 0: assert ( init_gp is not None ), "Please input the init_gp as the gp model dumppedbefore restarting otf." if call_no is None: call_no = len(self.gp_position_list) if hyp_no is None: hyp_no = len(self.gp_hyp_list) # use the last hyps by default if hyps is None: # check out the last non-empty element from the list hyps = self.gp_hyp_list[hyp_no - 1] if cell is None: cell = self.header["cell"] if init_gp is None: # Use run's values as extracted from header # TODO Allow for kernel gradient in header dictionary = deepcopy(self.header) dictionary["hyps"] = hyps for k in kwargs: if kwargs[k] is not None: dictionary[k] = kwargs[k] gp_model = GaussianProcess.from_dict(dictionary) else: gp_model = init_gp gp_model.hyps = hyps for (positions, forces, atoms, species) in zip( self.gp_position_list[:call_no], self.gp_force_list[:call_no], self.gp_atom_list[:call_no], self.gp_species_list[:call_no], ): struc_curr = struc.Structure(cell, species, positions) gp_model.update_db(struc_curr, forces, custom_range=atoms) gp_model.set_L_alpha() return gp_model
def make_gp( self, cell=None, call_no=None, hyps=None, init_gp=None, hyp_no=None, **kwargs, ): if call_no is None: call_no = len(self.gp_position_list) if hyp_no is None: hyp_no = call_no if hyps is None: # check out the last non-empty element from the list for icall in reversed(range(hyp_no)): if len(self.gp_hyp_list[icall]) > 0: hyps = self.gp_hyp_list[icall][-1] break if cell is None: cell = self.header['cell'] if init_gp is None: # Use run's values as extracted from header # TODO Allow for kernel gradient in header dictionary = deepcopy(self.header) dictionary['hyps'] = hyps for k in kwargs: if kwargs[k] is not None: dictionary[k] = kwargs[k] gp_model = \ GaussianProcess.from_dict(dictionary) else: gp_model = init_gp gp_model.hyps = hyps for (positions, forces, atoms, _, species) in \ zip(self.gp_position_list[:call_no], self.gp_force_list[:call_no], self.gp_atom_list[:call_no], self.gp_hyp_list[:call_no], self.gp_species_list[:call_no]): struc_curr = struc.Structure(cell, species, positions) gp_model.update_db(struc_curr, forces, custom_range=atoms) gp_model.set_L_alpha() return gp_model
def test_serialization_method(two_body_gp, test_point): """ Serialize and then un-serialize a GP and ensure that no info was lost. Compare one calculation to ensure predictions work correctly. :param two_body_gp: :return: """ old_gp_dict = two_body_gp.as_dict() new_gp = GaussianProcess.from_dict(old_gp_dict) new_gp_dict = new_gp.as_dict() dumpcompare(new_gp_dict, old_gp_dict) for d in [0, 1, 2]: assert np.all( two_body_gp.predict(x_t=test_point, d=d) == new_gp.predict( x_t=test_point, d=d))
def test_serialization_method(self, all_gps, validation_env, multihyps): """ Serialize and then un-serialize a GP and ensure that no info was lost. Compare one calculation to ensure predictions work correctly. :param test_gp: :return: """ test_gp = all_gps[multihyps] old_gp_dict = test_gp.as_dict() new_gp = GaussianProcess.from_dict(old_gp_dict) new_gp_dict = new_gp.as_dict() assert len(new_gp_dict) == len(old_gp_dict) dumpcompare(new_gp_dict, old_gp_dict) for d in [0, 1, 2]: assert np.all(test_gp.predict(x_t=validation_env, d=d) == new_gp.predict(x_t=validation_env, d=d))
def from_dict(dct): dct["gp_model"] = GaussianProcess.from_dict(dct["gp_model"]) if dct["use_mapping"]: dct["mgp_model"] = MappedGaussianProcess.from_dict(dct["mgp_model"]) calc = FLARE_Calculator(**dct) res = dct["results"] for key in res: if isinstance(res[key], float): calc.results[key] = res[key] if isinstance(res[key], list): calc.results[key] = np.array(res[key]) if dct["use_mapping"]: for xb in calc.mgp_model.maps: xb_map = calc.mgp_model.maps[xb] xb_map.hyps_mask = calc.gp_model.hyps_mask return calc
def from_dict(dictionary: dict): """ Create MGP object from dictionary representation. """ new_mgp = MappedGaussianProcess( grid_params=dictionary['grid_params'], struc_params=dictionary['struc_params'], GP=None, mean_only=dictionary['mean_only'], container_only=True, lmp_file_name=dictionary['lmp_file_name'], n_cpus=dictionary['n_cpus'], n_sample=dictionary['n_sample'], autorun=False) # Restore kernel_info for i in dictionary['bodies']: kern_info = f'kernel{i}b_info' hyps_mask = dictionary[kern_info][-1] if (hyps_mask is None): multihyps = False else: multihyps = True kernel_info = dictionary[kern_info] kernel_name = kernel_info[0] kernel, _, _, efk = str_to_kernel_set(kernel_name, multihyps) kernel_info[0] = kernel kernel_info[1] = efk setattr(new_mgp, kern_info, kernel_info) # Fill up the model with the saved coeffs for m, map_2 in enumerate(new_mgp.maps_2): map_2.mean.__coeffs__ = np.array(dictionary['maps_2'][m]) for m, map_3 in enumerate(new_mgp.maps_3): map_3.mean.__coeffs__ = np.array(dictionary['maps_3'][m]) # Set GP if dictionary.get('GP'): new_mgp.GP = GaussianProcess.from_dict(dictionary.get("GP")) return new_mgp
def test_load_and_reload(self, all_gps, validation_env, multihyps): test_gp = all_gps[multihyps] test_gp.write_model("test_gp_write", "pickle") new_gp = GaussianProcess.from_file("test_gp_write.pickle") for d in [1, 2, 3]: assert np.all( test_gp.predict(x_t=validation_env, d=d) == new_gp.predict( x_t=validation_env, d=d)) try: os.remove("test_gp_write.pickle") except: pass test_gp.write_model("test_gp_write", "json") with open("test_gp_write.json", "r") as f: new_gp = GaussianProcess.from_dict(json.loads(f.readline())) for d in [1, 2, 3]: assert np.all( test_gp.predict(x_t=validation_env, d=d) == new_gp.predict( x_t=validation_env, d=d)) os.remove("test_gp_write.json") with raises(ValueError): test_gp.write_model("test_gp_write", "cucumber") # Test logic for auto-detecting format in write command for format in ["json", "pickle"]: write_string = "format_write_test." + format if os.path.exists(write_string): os.remove(write_string) test_gp.write_model(write_string) assert os.path.exists(write_string) os.remove(write_string)
def from_dict(dictionary: dict) -> "MappedGaussianProcess": """ Create MGP object from dictionary representation. """ # Set GP if dictionary.get("GP"): GP = GaussianProcess.from_dict(dictionary.get("GP")) else: dictionary["GP"] = None dictionary["unique_species"] = list(set(dictionary["species_labels"])) if "container_only" not in dictionary: dictionary["container_only"] = True init_arg_name = [ "grid_params", "unique_species", "GP", "var_map", "container_only", "lmp_file_name", "n_cpus", "n_sample", ] kwargs = {key: dictionary[key] for key in init_arg_name} new_mgp = MappedGaussianProcess(**kwargs) # Fill up the model with the saved coeffs if "twobody" in new_mgp.maps: new_mgp.maps["twobody"] = Map2body.from_dict( dictionary["maps"]["twobody"], Map2body ) if "threebody" in new_mgp.maps: new_mgp.maps["threebody"] = Map3body.from_dict( dictionary["maps"]["threebody"], Map3body ) return new_mgp
def make_gp( self, cell=None, call_no=None, hyps=None, init_gp=None, hyp_no=None, **kwargs, ): """ Build GP model from the training frames parsed from the log file. The cell, hyps and gp can be reset with customized values. Args: cell (np.ndarray): Default None to use the cell from the log file. A customized cell can be input as a 3x3 numpy array. call_no (int): Default None to use all the DFT frames as training data for building GP. If not None, then the frames 0 to `call_no` will be added to GP. hyps (np.ndarray): Default None to use the hyperparameters from the log file. Customized hyps can be input as an array. init_gp (GaussianProcess): Default to None to use no initial settings or training data. an initial GP can be used, and then the frames parsed in the log file will add to the initial GP. Then the final GP uses the hyps and kernels of `init_gp`, and consists of training data from `init_gp` and the data from the log file. **NOTE**: if a log file from restarted OTF is parsed, then an initial GP needs to be parsed from the prior log file as the `init_gp` of the restarted log file. hyp_no (int): Default None to use the final optimized hyperparameters to build GP. If not None, then use the hyps from the `hyp_no`th optimization step. kwargs: if a new GP setting is needed without inputing `init_gp`, the GP initial args can be input as kwargs. """ if "restart" in self.header and self.header["restart"] > 0: assert ( init_gp is not None ), "Please input the init_gp as the gp model dumppedbefore restarting otf." if call_no is None: call_no = len(self.gp_position_list) if hyp_no is None: hyp_no = len(self.gp_hyp_list) # use the last hyps by default if hyps is None: # check out the last non-empty element from the list hyps = self.gp_hyp_list[hyp_no - 1] if cell is None: cell = self.header["cell"] if init_gp is None: # Use run's values as extracted from header # TODO Allow for kernel gradient in header dictionary = deepcopy(self.header) dictionary["hyps"] = hyps for k in kwargs: if kwargs[k] is not None: dictionary[k] = kwargs[k] gp_model = GaussianProcess.from_dict(dictionary) else: gp_model = init_gp gp_model.hyps = hyps for (positions, forces, atoms, species) in zip( self.gp_position_list[:call_no], self.gp_force_list[:call_no], self.gp_atom_list[:call_no], self.gp_species_list[:call_no], ): struc_curr = struc.Structure(cell, species, positions) gp_model.update_db(struc_curr, forces, custom_range=atoms) gp_model.set_L_alpha() return gp_model