def update_bounds(self, GP): rebuild_container = False # double check the container and the GP is consistent if not Parameters.compare_dict(GP.hyps_mask, self.hyps_mask): rebuild_container = True lower_bound = self.bounds[0] min_dist = self.search_lower_bound(GP) # change lower bound only when there appears a smaller distance if lower_bound is None or min_dist < np.max(lower_bound): lower_bound = np.max((min_dist - self.lower_bound_relax, 0.0)) rebuild_container = True warnings.warn( "The minimal distance in training data is lower than " f"the current lower bound, will reset lower bound to {lower_bound}" ) upper_bound = self.bounds[1] if self.auto_upper or upper_bound is None: gp_cutoffs = Parameters.get_cutoff(self.kernel_name, self.species, GP.hyps_mask) if upper_bound is None or np.any(gp_cutoffs > upper_bound): upper_bound = gp_cutoffs rebuild_container = True if rebuild_container: self.set_bounds(lower_bound, upper_bound) self.build_map_container()
def test_constraints1(): ''' simplest senario ''' pm = ParameterHelper(species=['O', 'C', 'H'], kernels={'twobody': [['*', '*'], ['O', 'O']], 'threebody': [['*', '*', '*'], ['O', 'O', 'O']]}, parameters={'twobody0': [1, 0.5], 'twobody1': [2, 0.2], 'threebody0': [1, 0.5], 'threebody1': [2, 0.2], 'cutoff_twobody': 2, 'cutoff_threebody': 1}, constraints={'twobody0': [True, False], 'threebody0': [False, True], 'noise': False}, verbose="DEBUG") hm = pm.as_dict() Parameters.check_instantiation( hm['hyps'], hm['cutoffs'], hm['kernels'], hm) assert hm['train_noise'] is False hyps = hm['hyps'] assert len(hyps) == 6 assert hyps[0] == 1 assert hyps[1] == 2 assert hyps[2] == 0.2 assert hyps[3] == 2 assert hyps[4] == 0.5 assert hyps[5] == 0.2
def test_generate_by_line(): pm = ParameterHelper(verbose="DEBUG") pm.define_group("specie", "O", ["O"]) pm.define_group("specie", "C", ["C"]) pm.define_group("specie", "H", ["H"]) pm.define_group("twobody", "**", ["C", "H"]) pm.define_group("twobody", "OO", ["O", "O"], atomic_str=True) pm.define_group("threebody", "***", ["O", "O", "C"]) pm.define_group("threebody", "OOO", ["O", "O", "O"]) pm.define_group("manybody", "1.5", ["C", "H"]) pm.define_group("manybody", "1.5", ["C", "O"]) pm.define_group("manybody", "1.5", ["O", "H"]) pm.define_group("manybody", "2", ["O", "O"]) pm.define_group("manybody", "2", ["H", "O"]) pm.define_group("manybody", "2.8", ["O", "O"]) pm.set_parameters("**", [1, 0.5]) pm.set_parameters("OO", [1, 0.5]) pm.set_parameters("***", [1, 0.5]) pm.set_parameters("OOO", [1, 0.5]) pm.set_parameters("1.5", [1, 0.5, 1.5]) pm.set_parameters("2", [1, 0.5, 2]) pm.set_parameters("2.8", [1, 0.5, 2.8]) pm.set_constraints("2", [True, False]) pm.set_constraints("2.8", False) pm.set_parameters("cutoff_twobody", 5) pm.set_parameters("cutoff_threebody", 4) pm.set_parameters("cutoff_manybody", 3) hm = pm.as_dict() Parameters.check_instantiation(hm["hyps"], hm["cutoffs"], hm["kernels"], hm)
def test_initialization5(): """check universal""" pm = ParameterHelper( species=["O", "C", "H"], kernels={ "twobody": [["*", "*"], ["O", "O"]], "threebody": [["*", "*", "*"], ["O", "O", "O"]], }, parameters={ "sigma": 1, "lengthscale": 0.5, "cutoff_threebody": 3, "cutoff_twobody": 2, }, verbose="DEBUG", ) hm = pm.as_dict() Parameters.check_instantiation(hm["hyps"], hm["cutoffs"], hm["kernels"], hm) pm = ParameterHelper( kernels=["twobody", "threebody"], parameters={ "sigma": 1.0, "lengthscale": 0.5, "cutoff_twobody": 2, "cutoff_threebody": 1, "noise": 0.05, }, verbose="DEBUG", ) hm = pm.as_dict() Parameters.check_instantiation(hm["hyps"], hm["cutoffs"], hm["kernels"], hm)
def test_generate_by_line(): pm = ParameterHelper(verbose="DEBUG") pm.define_group('specie', 'O', ['O']) pm.define_group('specie', 'C', ['C']) pm.define_group('specie', 'H', ['H']) pm.define_group('twobody', '**', ['C', 'H']) pm.define_group('twobody', 'OO', ['O', 'O'], atomic_str=True) pm.define_group('threebody', '***', ['O', 'O', 'C']) pm.define_group('threebody', 'OOO', ['O', 'O', 'O']) pm.define_group('manybody', '1.5', ['C', 'H']) pm.define_group('manybody', '1.5', ['C', 'O']) pm.define_group('manybody', '1.5', ['O', 'H']) pm.define_group('manybody', '2', ['O', 'O']) pm.define_group('manybody', '2', ['H', 'O']) pm.define_group('manybody', '2.8', ['O', 'O']) pm.set_parameters('**', [1, 0.5]) pm.set_parameters('OO', [1, 0.5]) pm.set_parameters('***', [1, 0.5]) pm.set_parameters('OOO', [1, 0.5]) pm.set_parameters('1.5', [1, 0.5, 1.5]) pm.set_parameters('2', [1, 0.5, 2]) pm.set_parameters('2.8', [1, 0.5, 2.8]) pm.set_constraints('2', [True, False]) pm.set_constraints('2.8', False) pm.set_parameters('cutoff_twobody', 5) pm.set_parameters('cutoff_threebody', 4) pm.set_parameters('cutoff_manybody', 3) hm = pm.as_dict() Parameters.check_instantiation( hm['hyps'], hm['cutoffs'], hm['kernels'], hm)
def test_opt(): pm = ParameterHelper(species=['O', 'C', 'H'], kernels={'twobody': [['*', '*'], ['O', 'O']], 'threebody': [['*', '*', '*'], ['O', 'O', 'O']]}, parameters={'twobody0': [1, 0.5, 1], 'twobody1': [2, 0.2, 2], 'threebody0': [1, 0.5], 'threebody1': [2, 0.2], 'cutoff_twobody': 2, 'cutoff_threebody': 1}, constraints={'twobody0': [False, True]}, verbose="DEBUG") hm = pm.as_dict() Parameters.check_instantiation( hm['hyps'], hm['cutoffs'], hm['kernels'], hm)
def test_initialization3(): """check group definition""" pm = ParameterHelper(species=['O', 'C', 'H'], kernels={'twobody': [['*', '*'], ['O', 'O']], 'threebody': [['*', '*', '*'], ['O', 'O', 'O']]}, parameters={'twobody0': [1, 0.5], 'twobody1': [2, 0.2], 'threebody0': [1, 0.5], 'threebody1': [2, 0.2], 'cutoff_twobody': 2, 'cutoff_threebody': 1}, verbose="DEBUG") hm = pm.as_dict() Parameters.check_instantiation( hm['hyps'], hm['cutoffs'], hm['kernels'], hm)
def test_generate_by_list(): pm = ParameterHelper(verbose="DEBUG") pm.list_groups('specie', ['O', ['C', 'N'], 'H']) pm.list_groups('twobody', [['*', '*'], ['O', 'O']]) pm.list_groups('threebody', [['*', '*', '*'], ['O', 'O', 'O']]) pm.list_parameters({'twobody0': [1, 0.5], 'twobody1': [2, 0.2], 'threebody0': [1, 0.5], 'threebody1': [2, 0.2], 'cutoff_twobody': 2, 'cutoff_threebody': 1}) hm = pm.as_dict() Parameters.check_instantiation( hm['hyps'], hm['cutoffs'], hm['kernels'], hm)
def test_initialization2(ones): """ check ones, random """ pm = ParameterHelper( kernels=["twobody", "threebody"], parameters={"cutoff_twobody": 2, "cutoff_threebody": 1, "noise": 0.05}, ones=ones, random=not ones, verbose="DEBUG", ) hm = pm.as_dict() Parameters.check_instantiation(hm["hyps"], hm["cutoffs"], hm["kernels"], hm)
def test_initialization(): ''' simplest senario ''' pm = ParameterHelper(kernels=['twobody', 'threebody'], parameters={'twobody': [1, 0.5], 'threebody': [1, 0.5], 'cutoff_twobody': 2, 'cutoff_threebody': 1, 'noise': 0.05}, verbose="DEBUG") hm = pm.as_dict() Parameters.check_instantiation( hm['hyps'], hm['cutoffs'], hm['kernels'], hm)
def test_initialization2(ones): ''' check ones, random ''' pm = ParameterHelper(kernels=['twobody', 'threebody'], parameters={'cutoff_twobody': 2, 'cutoff_threebody': 1, 'noise': 0.05}, ones=ones, random=not ones, verbose="DEBUG") hm = pm.as_dict() Parameters.check_instantiation( hm['hyps'], hm['cutoffs'], hm['kernels'], hm)
def test_from_dict(): pm = ParameterHelper( species=["O", "C", "H"], kernels=["twobody", "threebody"], allseparate=True, random=True, parameters={"cutoff_twobody": 7, "cutoff_threebody": 4.5, "cutoff_manybody": 3}, verbose="debug", ) hm = pm.as_dict() Parameters.check_instantiation(hm["hyps"], hm["cutoffs"], hm["kernels"], hm) pm1 = ParameterHelper.from_dict(hm, verbose="debug", init_spec=["O", "C", "H"]) hm1 = pm1.as_dict() Parameters.compare_dict(hm, hm1)
def get_kernel_term(kernel_name, hyps_mask, hyps): hyps, cutoffs, hyps_mask = Parameters.get_component_mask(hyps_mask, kernel_name, hyps=hyps) kernel, _, ek, efk, _, _, _ = str_to_kernel_set([kernel_name], "mc", hyps_mask) return (ek, cutoffs, hyps, hyps_mask)
def search_lower_bound(self, GP): """ If the lower bound is set to be 'auto', search the minimal interatomic distances in the training set of GP. """ upper_bound = Parameters.get_cutoff(self.kernel_name, self.species, GP.hyps_mask) lower_bound = np.min(upper_bound) training_data = _global_training_data[GP.name] for env in training_data: if len(env.bond_array_2) == 0: continue min_dist = env.bond_array_2[0][0] if min_dist < lower_bound: lower_bound = min_dist training_struc = _global_training_structures[GP.name] for struc in training_struc: for env in struc: if len(env.bond_array_2) == 0: continue min_dist = env.bond_array_2[0][0] if min_dist < lower_bound: lower_bound = min_dist return lower_bound
def test_from_dict(): pm = ParameterHelper(species=['O', 'C', 'H'], kernels=['twobody', 'threebody'], allseparate=True, random=True, parameters={'cutoff_twobody': 7, 'cutoff_threebody': 4.5, 'cutoff_manybody': 3}, verbose="debug") hm = pm.as_dict() Parameters.check_instantiation( hm['hyps'], hm['cutoffs'], hm['kernels'], hm) pm1 = ParameterHelper.from_dict( hm, verbose="debug", init_spec=['O', 'C', 'H']) hm1 = pm1.as_dict() Parameters.compare_dict(hm, hm1)
def test_generate_by_line2(): pm = ParameterHelper(verbose="DEBUG") pm.define_group("specie", "O", ["O"]) pm.define_group("specie", "rest", ["C", "H"]) pm.define_group("twobody", "**", ["*", "*"]) pm.define_group("twobody", "OO", ["O", "O"]) pm.define_group("threebody", "***", ["*", "*", "*"]) pm.define_group("threebody", "Oall", ["O", "O", "O"]) pm.set_parameters("**", [1, 0.5]) pm.set_parameters("OO", [1, 0.5]) pm.set_parameters("Oall", [1, 0.5]) pm.set_parameters("***", [1, 0.5]) pm.set_parameters("cutoff_twobody", 5) pm.set_parameters("cutoff_threebody", 4) hm = pm.as_dict() Parameters.check_instantiation(hm["hyps"], hm["cutoffs"], hm["kernels"], hm)
def test_initialization(): """ simplest senario """ pm = ParameterHelper( kernels=["twobody", "threebody"], parameters={ "twobody": [1, 0.5], "threebody": [1, 0.5], "cutoff_twobody": 2, "cutoff_threebody": 1, "noise": 0.05, }, verbose="DEBUG", ) hm = pm.as_dict() Parameters.check_instantiation(hm["hyps"], hm["cutoffs"], hm["kernels"], hm)
def test_generate_by_list2(): pm = ParameterHelper(verbose="DEBUG") pm.list_groups("specie", {"s1": "O", "s2": ["C", "N"], "s3": "H"}) pm.list_groups("twobody", {"t0": ["*", "*"], "t1": [["s1", "s1"], ["s1", "s3"]]}) pm.list_groups("threebody", [["*", "*", "*"], ["s1", "s1", "s1"]]) pm.list_parameters( { "t0": [1, 0.5], "t1": [2, 0.2], "threebody0": [1, 0.5], "threebody1": [2, 0.2], "cutoff_twobody": 2, "cutoff_threebody": 1, } ) hm = pm.as_dict() Parameters.check_instantiation(hm["hyps"], hm["cutoffs"], hm["kernels"], hm)
def test_constraints2(): ''' simplest senario ''' pm = ParameterHelper(kernels=['twobody', 'threebody'], parameters={'twobody': [1, 0.5], 'threebody': [1, 0.5], 'cutoff_twobody': 2, 'cutoff_threebody': 1, 'noise': 0.05}, constraints={'twobody': [True, False]}, verbose="DEBUG") hm = pm.as_dict() Parameters.check_instantiation( hm['hyps'], hm['cutoffs'], hm['kernels'], hm) hyps = hm['hyps'] assert hyps[0] == 1 assert hyps[1] == 1
def test_generate_by_line2(): pm = ParameterHelper(verbose="DEBUG") pm.define_group('specie', 'O', ['O']) pm.define_group('specie', 'rest', ['C', 'H']) pm.define_group('twobody', '**', ['*', '*']) pm.define_group('twobody', 'OO', ['O', 'O']) pm.define_group('threebody', '***', ['*', '*', '*']) pm.define_group('threebody', 'Oall', ['O', 'O', 'O']) pm.set_parameters('**', [1, 0.5]) pm.set_parameters('OO', [1, 0.5]) pm.set_parameters('Oall', [1, 0.5]) pm.set_parameters('***', [1, 0.5]) pm.set_parameters('cutoff_twobody', 5) pm.set_parameters('cutoff_threebody', 4) hm = pm.as_dict() Parameters.check_instantiation( hm['hyps'], hm['cutoffs'], hm['kernels'], hm)
def check_instantiation(self): """ Runs a series of checks to ensure that the user has not supplied contradictory arguments which will result in undefined behavior with multiple hyperparameters. :return: """ if self.logger_name is None: if self.output is None: self.logger_name = self.name + "GaussianProcess" set_logger( self.logger_name, stream=True, fileout_name=None, verbose=self.verbose, ) else: self.logger_name = self.output.basename + "log" logger = logging.getLogger(self.logger_name) # check whether it's be loaded before loaded = False if self.name + "_0" in _global_training_labels: if (_global_training_labels.get(self.name + "_0", None) is not self.training_labels_np): loaded = True if self.name + "_0" in _global_energy_labels: if (_global_energy_labels.get(self.name + "_0", None) is not self.energy_labels_np): loaded = True if loaded: base = f"{self.name}" count = 2 while self.name + "_0" in _global_training_labels and count < 100: time.sleep(random()) self.name = f"{base}_{count}" logger.debug("Specified GP name is present in global memory; " "Attempting to rename the " f"GP instance to {self.name}") count += 1 if self.name + "_0" in _global_training_labels: milliseconds = int(round(time.time() * 1000) % 10000000) self.name = f"{base}_{milliseconds}" logger.debug( "Specified GP name still present in global memory: " f"renaming the gp instance to {self.name}") logger.debug(f"Final name of the gp instance is {self.name}") self.sync_data() self.hyps_mask = Parameters.check_instantiation( self.hyps, self.cutoffs, self.kernels, self.hyps_mask)
def test_opt(): pm = ParameterHelper( species=["O", "C", "H"], kernels={ "twobody": [["*", "*"], ["O", "O"]], "threebody": [["*", "*", "*"], ["O", "O", "O"]], }, parameters={ "twobody0": [1, 0.5, 1], "twobody1": [2, 0.2, 2], "threebody0": [1, 0.5], "threebody1": [2, 0.2], "cutoff_twobody": 2, "cutoff_threebody": 1, }, constraints={"twobody0": [False, True]}, verbose="DEBUG", ) hm = pm.as_dict() Parameters.check_instantiation(hm["hyps"], hm["cutoffs"], hm["kernels"], hm)
def test_initialization3(): """check group definition""" pm = ParameterHelper( species=["O", "C", "H"], kernels={ "twobody": [["*", "*"], ["O", "O"]], "threebody": [["*", "*", "*"], ["O", "O", "O"]], }, parameters={ "twobody0": [1, 0.5], "twobody1": [2, 0.2], "threebody0": [1, 0.5], "threebody1": [2, 0.2], "cutoff_twobody": 2, "cutoff_threebody": 1, }, verbose="DEBUG", ) hm = pm.as_dict() Parameters.check_instantiation(hm["hyps"], hm["cutoffs"], hm["kernels"], hm)
def backward_attributes(dictionary): """ add new attributes to old instance or update attribute types """ if 'name' not in dictionary: dictionary['name'] = 'default_gp' if 'per_atom_par' not in dictionary: dictionary['per_atom_par'] = True if 'optimization_algorithm' not in dictionary: dictionary['opt_algorithm'] = 'L-BFGS-B' if 'hyps_mask' not in dictionary: dictionary['hyps_mask'] = None if 'parallel' not in dictionary: dictionary['parallel'] = False if 'component' not in dictionary: dictionary['component'] = 'mc' if 'training_structures' not in dictionary: # Environments of each structure dictionary['training_structures'] = [] dictionary['energy_labels'] = [] # Energies of training structures dictionary['energy_labels_np'] = np.empty(0, ) if 'training_labels' not in dictionary: dictionary['training_labels'] = [] dictionary['training_labels_np'] = np.empty(0, ) if 'energy_noise' not in dictionary: dictionary['energy_noise'] = 0.01 if not isinstance(dictionary['cutoffs'], dict): dictionary['cutoffs'] = Parameters.cutoff_array_to_dict( dictionary['cutoffs']) dictionary['hyps_mask'] = Parameters.backward( dictionary['kernels'], deepcopy(dictionary['hyps_mask'])) if 'logger_name' not in dictionary: dictionary['logger_name'] = None
def backward_attributes(dictionary): """ add new attributes to old instance or update attribute types """ if "name" not in dictionary: dictionary["name"] = "default_gp" if "per_atom_par" not in dictionary: dictionary["per_atom_par"] = True if "optimization_algorithm" not in dictionary: dictionary["opt_algorithm"] = "L-BFGS-B" if "hyps_mask" not in dictionary: dictionary["hyps_mask"] = None if "parallel" not in dictionary: dictionary["parallel"] = False if "component" not in dictionary: dictionary["component"] = "mc" if "training_structures" not in dictionary: # Environments of each structure dictionary["training_structures"] = [] dictionary["energy_labels"] = [] # Energies of training structures dictionary["energy_labels_np"] = np.empty(0, ) if "training_labels" not in dictionary: dictionary["training_labels"] = [] dictionary["training_labels_np"] = np.empty(0, ) if "energy_noise" not in dictionary: dictionary["energy_noise"] = 0.01 if not isinstance(dictionary["cutoffs"], dict): dictionary["cutoffs"] = Parameters.cutoff_array_to_dict( dictionary["cutoffs"]) dictionary["hyps_mask"] = Parameters.backward( dictionary["kernels"], deepcopy(dictionary["hyps_mask"])) if "logger_name" not in dictionary: dictionary["logger_name"] = None
def test_constraints2(): """ simplest senario """ pm = ParameterHelper( kernels=["twobody", "threebody"], parameters={ "twobody": [1, 0.5], "threebody": [1, 0.5], "cutoff_twobody": 2, "cutoff_threebody": 1, "noise": 0.05, }, constraints={"twobody": [True, False]}, verbose="DEBUG", ) hm = pm.as_dict() Parameters.check_instantiation(hm["hyps"], hm["cutoffs"], hm["kernels"], hm) hyps = hm["hyps"] assert hyps[0] == 1 assert hyps[1] == 1
def test_initialization5(): """check universal""" pm = ParameterHelper(species=['O', 'C', 'H'], kernels={'twobody': [['*', '*'], ['O', 'O']], 'threebody': [['*', '*', '*'], ['O', 'O', 'O']]}, parameters={'sigma': 1, 'lengthscale': 0.5, 'cutoff_threebody': 3, 'cutoff_twobody': 2}, verbose="DEBUG") hm = pm.as_dict() Parameters.check_instantiation( hm['hyps'], hm['cutoffs'], hm['kernels'], hm) pm = ParameterHelper(kernels=['twobody', 'threebody'], parameters={'sigma': 1.0, 'lengthscale': 0.5, 'cutoff_twobody': 2, 'cutoff_threebody': 1, 'noise': 0.05}, verbose="DEBUG") hm = pm.as_dict() Parameters.check_instantiation( hm['hyps'], hm['cutoffs'], hm['kernels'], hm)
def train_gp(self): """Optimizes the hyperparameters of the current GP model.""" self.gp.train(logger_name=self.output.basename+'hyps') hyps, labels = Parameters.get_hyps( self.gp.hyps_mask, self.gp.hyps, constraint=False, label=True) if labels is None: labels = self.gp.hyp_labels self.output.write_hyps(labels, hyps, self.start_time, self.gp.likelihood, self.gp.likelihood_gradient, hyps_mask=self.gp.hyps_mask)
def test_initialization_allsep(ones): ''' check ones, random ''' specie_list = ['C', 'H', 'O'] pm = ParameterHelper(species=specie_list, kernels=['twobody', 'threebody'], parameters={'cutoff_twobody': 2, 'cutoff_threebody': 1, 'noise': 0.05}, allseparate=True, ones=ones, random=not ones, verbose="DEBUG") hm = pm.as_dict() Parameters.check_instantiation( hm['hyps'], hm['cutoffs'], hm['kernels'], hm) name_list = [] for i in range(3): name = pm.find_group('specie', specie_list[i]) assert name not in name_list name_list += [name] name_list = [] for i in range(3): for j in range(i, 3): name = pm.find_group('twobody', [specie_list[i], specie_list[j]]) assert name not in name_list name_list += [name] with raises(RuntimeError): pm = ParameterHelper(species=[], kernels=['twobody', 'threebody'], parameters={'cutoff_twobody': 2, 'cutoff_threebody': 1, 'noise': 0.05}, allseparate=True, ones=ones, random=not ones)
def predict(self, atom_env): assert Parameters.compare_dict( self.hyps_mask, atom_env.cutoffs_mask ), "GP.hyps_mask is not the same as atom_env.cutoffs_mask" f_spcs = np.zeros(3) vir_spcs = np.zeros(6) v_spcs = 0 e_spcs = 0 kern = 0 if len(atom_env.bond_array_2) == 0: return f_spcs, vir_spcs, kern, v_spcs, e_spcs en_kernel, cutoffs, hyps, hyps_mask = self.kernel_info args = from_mask_to_args(hyps, cutoffs, hyps_mask) if self.var_map == "pca": kern = en_kernel(atom_env, atom_env, *args) spcs, comp_r, comp_xyz = self.get_arrays(atom_env) # predict for each species rebuild_spc = [] new_bounds = [] for i, spc in enumerate(spcs): lengths = np.array(comp_r[i]) xyzs = np.array(comp_xyz[i]) map_ind = self.find_map_index(spc) try: f, vir, v, e = self.maps[map_ind].predict(lengths, xyzs) except ValueError as err_msg: rebuild_spc.append(err_msg.args[0]) new_bounds.append(err_msg.args[1]) if len(rebuild_spc) > 0: raise ValueError( rebuild_spc, new_bounds, f"The {self.kernel_name} map needs re-constructing.", ) f_spcs += f vir_spcs += vir v_spcs += v e_spcs += e return f_spcs, vir_spcs, kern, v_spcs, e_spcs