def from_dict(dictionary): """Create GP object from dictionary representation.""" multihyps = dictionary.get('multihyps', False) new_gp = GaussianProcess(kernel_name=dictionary['kernel_name'], cutoffs=np.array(dictionary['cutoffs']), hyps=np.array(dictionary['hyps']), hyp_labels=dictionary['hyp_labels'], parallel=dictionary.get('parallel', False) or dictionary.get('par', False), per_atom_par=dictionary.get('per_atom_par', True), n_cpus=dictionary.get( 'n_cpus') or dictionary.get('no_cpus'), maxiter=dictionary['maxiter'], opt_algorithm=dictionary.get( 'opt_algorithm', 'L-BFGS-B'), multihyps=multihyps, hyps_mask=dictionary.get('hyps_mask', None), name=dictionary.get('name', 'default_gp') ) # Save time by attempting to load in computed attributes new_gp.training_data = [AtomicEnvironment.from_dict(env) for env in dictionary['training_data']] new_gp.training_labels = deepcopy(dictionary['training_labels']) new_gp.training_labels_np = deepcopy(dictionary['training_labels_np']) new_gp.likelihood = dictionary['likelihood'] new_gp.likelihood_gradient = dictionary['likelihood_gradient'] new_gp.training_labels_np = np.hstack(new_gp.training_labels) _global_training_data[new_gp.name] = new_gp.training_data _global_training_labels[new_gp.name] = new_gp.training_labels_np # Save time by attempting to load in computed attributes if len(new_gp.training_data) > 5000: try: new_gp.ky_mat = np.load(dictionary['ky_mat_file']) new_gp.compute_matrices() except: new_gp.ky_mat = None new_gp.l_mat = None new_gp.alpha = None new_gp.ky_mat_inv = None filename = dictionary['ky_mat_file'] Warning("the covariance matrices are not loaded" \ f"because {filename} cannot be found") else: new_gp.ky_mat_inv = np.array(dictionary['ky_mat_inv']) \ if dictionary.get('ky_mat_inv') is not None else None new_gp.ky_mat = np.array(dictionary['ky_mat']) \ if dictionary.get('ky_mat') is not None else None new_gp.l_mat = np.array(dictionary['l_mat']) \ if dictionary.get('l_mat') is not None else None new_gp.alpha = np.array(dictionary['alpha']) \ if dictionary.get('alpha') is not None else None return new_gp
def test_backwards_compatibility(structure, mask, cutoff, result): """ This test can be deleted if backwards compatibility is dropped for the sake of code cleanup. (This test executes in about 5 milliseconds). Tests a particular branch of code within the Environment's as_dict() method for older pickled environments without a cutoffs mask. :return: """ if mask is True: mask = generate_mask(cutoff) else: mask = None env_test = deepcopy( AtomicEnvironment(structure, atom=0, cutoffs=cutoff, cutoffs_mask=mask)) pre_test_dict = env_test.as_dict() delattr(env_test, "cutoffs_mask") test_dict = env_test.as_dict() assert pre_test_dict["cutoffs_mask"] == test_dict["cutoffs_mask"] new_env = AtomicEnvironment.from_dict(test_dict) assert isinstance(new_env, AtomicEnvironment) assert str(new_env) == str(env_test)
def test_env_methods(structure, mask, cutoff, result): if mask is True: mask = generate_mask(cutoff) else: mask = None env_test = AtomicEnvironment(structure, atom=0, cutoffs=cutoff, cutoffs_mask=mask) assert str(env_test) == \ f'Atomic Env. of Type 1 surrounded by {result[0]} atoms' \ ' of Types [1, 2, 3]' the_dict = env_test.as_dict() assert isinstance(the_dict, dict) for key in ['positions', 'cell', 'atom', 'cutoffs', 'species']: assert key in the_dict.keys() remade_env = AtomicEnvironment.from_dict(the_dict) assert isinstance(remade_env, AtomicEnvironment) assert np.array_equal(remade_env.bond_array_2, env_test.bond_array_2) if len(cutoff) > 1: assert np.array_equal(remade_env.bond_array_3, env_test.bond_array_3) if len(cutoff) > 2: assert np.array_equal(remade_env.q_array, env_test.q_array)
def methanol_gp(): the_gp = GaussianProcess( kernel_name="2+3_mc", hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03, ]), cutoffs=np.array([5, 3]), hyp_labels=["l2", "s2", "l3", "s3", "n0"], maxiter=1, opt_algorithm="L-BFGS-B", ) with open(path.join(TEST_FILE_DIR, "methanol_envs.json"), "r") as f: dicts = [loads(s) for s in f.readlines()] for cur_dict in dicts: force = cur_dict["forces"] env = AtomicEnvironment.from_dict(cur_dict) the_gp.add_one_env(env, force) the_gp.set_L_alpha() return the_gp
def test_pred_on_elements(): the_gp = GaussianProcess(kernel_name="2+3_mc", hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03 ]), cutoffs=np.array([7, 3]), hyp_labels=['l2', 's2', 'l3', 's3', 'n0'], maxiter=1, opt_algorithm='L-BFGS-B') with open('./test_files/methanol_frames.json', 'r') as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] with open('./test_files/methanol_envs.json', 'r') as f: data_dicts = [loads(s) for s in f.readlines()[:6]] envs = [AtomicEnvironment.from_dict(d) for d in data_dicts] forces = [np.array(d['forces']) for d in data_dicts] seeds = list(zip(envs, forces)) all_frames = deepcopy(frames) tt = TrajectoryTrainer(frames, gp=the_gp, shuffle_frames=False, rel_std_tolerance=0, abs_std_tolerance=0, abs_force_tolerance=.001, skip=5, min_atoms_per_train=100, pre_train_seed_envs=seeds, pre_train_seed_frames=[frames[-1]], max_atoms_from_frame=4, output_name='meth_test', model_format='json', atom_checkpoint_interval=50, pre_train_atoms_per_element={'H': 1}, predict_atoms_per_element={ 'H': 0, 'C': 1, 'O': 0 }) # Set to predict only on Carbon after training on H to ensure errors are # high and that they get added to the gp tt.run() # Ensure forces weren't written directly to structure for i in range(len(all_frames)): assert np.array_equal(all_frames[i].forces, frames[i].forces) # Assert that Carbon atoms were correctly added assert the_gp.training_statistics['envs_by_species']['C'] > 2 for f in glob(f"meth_test*"): remove(f) for f in glob(f"gp_from_aimd*"): remove(f)
def test_seed_and_run(): the_gp = GaussianProcess( kernel_name="2+3_mc", hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03, ]), cutoffs=np.array([5, 3]), hyp_labels=["l2", "s2", "l3", "s3", "n0"], maxiter=1, opt_algorithm="L-BFGS-B", ) with open(path.join(TEST_FILE_DIR, "methanol_frames.json"), "r") as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] with open(path.join(TEST_FILE_DIR, "methanol_envs.json"), "r") as f: data_dicts = [loads(s) for s in f.readlines()[:6]] envs = [AtomicEnvironment.from_dict(d) for d in data_dicts] forces = [np.array(d["forces"]) for d in data_dicts] seeds = list(zip(envs, forces)) tt = TrajectoryTrainer( frames, gp=the_gp, shuffle_frames=True, rel_std_tolerance=0, abs_std_tolerance=0, skip=10, pre_train_seed_envs=seeds, pre_train_seed_frames=[frames[-1]], max_atoms_from_frame=4, output_name="meth_test", model_format="pickle", train_checkpoint_interval=1, pre_train_atoms_per_element={"H": 1}, ) tt.run() with open("meth_test_model.pickle", "rb") as f: new_gp = pickle.load(f) test_env = envs[0] for d in [1, 2, 3]: assert np.all( the_gp.predict(x_t=test_env, d=d) == new_gp.predict(x_t=test_env, d=d)) for f in glob(f"meth_test*"): remove(f)
def test_seed_and_run(): the_gp = GaussianProcess(kernel=two_plus_three_body_mc, kernel_grad=two_plus_three_body_mc_grad, hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03 ]), cutoffs=np.array([7, 7]), hyp_labels=['l2', 's2', 'l3', 's3', 'n0'], maxiter=1, opt_algorithm='L-BFGS-B') with open('./test_files/methanol_frames.json', 'r') as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] with open('./test_files/methanol_envs.json', 'r') as f: data_dicts = [loads(s) for s in f.readlines()[:6]] envs = [AtomicEnvironment.from_dict(d) for d in data_dicts] forces = [np.array(d['forces']) for d in data_dicts] seeds = list(zip(envs, forces)) tt = TrajectoryTrainer(frames, gp=the_gp, shuffle_frames=True, rel_std_tolerance=0, abs_std_tolerance=0, skip=15, pre_train_seed_envs=seeds, pre_train_seed_frames=[frames[-1]], max_atoms_from_frame=4, model_write='meth_test.pickle', model_format='pickle', checkpoint_interval=1, pre_train_atoms_per_element={'H': 1}) tt.run() with open('meth_test.pickle', 'rb') as f: new_gp = pickle.load(f) test_env = envs[0] for d in [0, 1, 2]: assert np.all( the_gp.predict(x_t=test_env, d=d) == new_gp.predict(x_t=test_env, d=d)) os.system('rm ./gp_from_aimd.out') os.system('rm ./gp_from_aimd.xyz') os.system('rm ./gp_from_aimd-f.xyz') os.system('rm ./meth_test.pickle')
def from_dict(dictionary): """Create GP object from dictionary representation.""" if 'mc' in dictionary['kernel_name']: force_kernel, grad = \ str_to_mc_kernel(dictionary['kernel_name'], include_grad=True) else: force_kernel, grad = str_to_kernel(dictionary['kernel_name'], include_grad=True) if dictionary['energy_kernel'] is not None: energy_kernel = str_to_kernel(dictionary['energy_kernel']) else: energy_kernel = None if dictionary['energy_force_kernel'] is not None: energy_force_kernel = \ str_to_kernel(dictionary['energy_force_kernel']) else: energy_force_kernel = None new_gp = GaussianProcess(kernel=force_kernel, kernel_grad=grad, energy_kernel=energy_kernel, energy_force_kernel=energy_force_kernel, cutoffs=np.array(dictionary['cutoffs']), hyps=np.array(dictionary['hyps']), hyp_labels=dictionary['hyp_labels'], par=dictionary['par'], no_cpus=dictionary['no_cpus'], maxiter=dictionary['maxiter'], opt_algorithm=dictionary['algo']) # Save time by attempting to load in computed attributes new_gp.l_mat = np.array(dictionary.get('l_mat', None)) new_gp.l_mat_inv = np.array(dictionary.get('l_mat_inv', None)) new_gp.alpha = np.array(dictionary.get('alpha', None)) new_gp.ky_mat = np.array(dictionary.get('ky_mat', None)) new_gp.ky_mat_inv = np.array(dictionary.get('ky_mat_inv', None)) new_gp.training_data = [ AtomicEnvironment.from_dict(env) for env in dictionary['training_data'] ] new_gp.training_labels = dictionary['training_labels'] new_gp.likelihood = dictionary['likelihood'] new_gp.likelihood_gradient = dictionary['likelihood_gradient'] new_gp.training_labels_np = np.hstack(new_gp.training_labels) return new_gp
def test_env_methods(structure, mask, cutoff, result): if mask is True: mask = generate_mask(cutoff) else: mask = None structure = deepcopy(structure) structure.forces = np.random.random(size=(len(structure), 3)) env_test = AtomicEnvironment(structure, atom=0, cutoffs=cutoff, cutoffs_mask=mask) assert np.array_equal(structure.forces[0], env_test.force) assert ( str(env_test) == f"Atomic Env. of Type 1 surrounded by {result[0]} atoms of Types [1, 2, 3]" ) the_dict = env_test.as_dict() assert isinstance(the_dict, dict) the_str = env_test.as_str() assert dumps(the_dict, cls=NumpyEncoder) == the_str for key in ["positions", "cell", "atom", "cutoffs", "species"]: assert key in the_dict.keys() # This saves a few seconds, the masked envs take longer to read/write if not mask: with open("test_environment.json", "w") as f: f.write(env_test.as_str()) remade_env = AtomicEnvironment.from_file("test_environment.json") else: remade_env = AtomicEnvironment.from_dict(the_dict) assert isinstance(remade_env, AtomicEnvironment) assert np.array_equal(remade_env.bond_array_2, env_test.bond_array_2) if len(cutoff) > 1: assert np.array_equal(remade_env.bond_array_3, env_test.bond_array_3) if len(cutoff) > 2: assert np.array_equal(remade_env.q_array, env_test.q_array) if not mask: remove("test_environment.json")
def test_env_methods(cutoff): cell = np.eye(3) species = [1, 2, 3] positions = np.array([[0, 0, 0], [0.5, 0.5, 0.5], [0.1, 0.1, 0.1]]) struc_test = Structure(cell, species, positions) env_test = AtomicEnvironment(struc_test, 0, np.array([1, 1])) assert str(env_test) == 'Atomic Env. of Type 1 surrounded by 12 atoms' \ ' of Types [2, 3]' the_dict = env_test.as_dict() assert isinstance(the_dict, dict) for key in ['positions', 'cell', 'atom', 'cutoffs', 'species']: assert key in the_dict.keys() remade_env = AtomicEnvironment.from_dict(the_dict) assert isinstance(remade_env, AtomicEnvironment) assert np.array_equal(remade_env.bond_array_2, env_test.bond_array_2) assert np.array_equal(remade_env.bond_array_3, env_test.bond_array_3) assert np.array_equal(remade_env.bond_array_mb, env_test.bond_array_mb)
def methanol_gp(): the_gp = GaussianProcess(kernel_name="2+3_mc", hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03 ]), cutoffs=np.array([7, 7]), hyp_labels=['l2', 's2', 'l3', 's3', 'n0'], maxiter=1, opt_algorithm='L-BFGS-B') with open('./test_files/methanol_envs.json') as f: dicts = [loads(s) for s in f.readlines()] for cur_dict in dicts: force = cur_dict['forces'] env = AtomicEnvironment.from_dict(cur_dict) the_gp.add_one_env(env, force) the_gp.set_L_alpha() return the_gp
def from_dict(dictionary): """Create GP object from dictionary representation.""" GaussianProcess.backward_arguments(dictionary, dictionary) GaussianProcess.backward_attributes(dictionary) new_gp = GaussianProcess(**dictionary) # Save time by attempting to load in computed attributes if "training_data" in dictionary: new_gp.training_data = [ AtomicEnvironment.from_dict(env) for env in dictionary["training_data"] ] new_gp.training_labels = deepcopy(dictionary["training_labels"]) new_gp.training_labels_np = deepcopy( dictionary["training_labels_np"]) new_gp.sync_data() # Reconstruct training structures. if "training_structures" in dictionary: new_gp.training_structures = [] for n, env_list in enumerate(dictionary["training_structures"]): new_gp.training_structures.append([]) for env_curr in env_list: new_gp.training_structures[n].append( AtomicEnvironment.from_dict(env_curr)) new_gp.energy_labels = deepcopy(dictionary["energy_labels"]) new_gp.energy_labels_np = deepcopy(dictionary["energy_labels_np"]) new_gp.sync_data() new_gp.all_labels = np.concatenate( (new_gp.training_labels_np, new_gp.energy_labels_np)) new_gp.likelihood = dictionary.get("likelihood", None) new_gp.likelihood_gradient = dictionary.get("likelihood_gradient", None) new_gp.n_envs_prev = len(new_gp.training_data) # Save time by attempting to load in computed attributes if dictionary.get("ky_mat_file"): try: new_gp.ky_mat = np.load(dictionary["ky_mat_file"]) new_gp.compute_matrices() new_gp.ky_mat_file = None except FileNotFoundError: new_gp.ky_mat = None new_gp.l_mat = None new_gp.alpha = None new_gp.ky_mat_inv = None filename = dictionary.get("ky_mat_file") logger = logging.getLogger(new_gp.logger_name) logger.warning("the covariance matrices are not loaded" f"because {filename} cannot be found") else: new_gp.ky_mat = (np.array(dictionary["ky_mat"]) if dictionary.get("ky_mat") is not None else None) new_gp.ky_mat_inv = (np.array(dictionary["ky_mat_inv"]) if dictionary.get("ky_mat_inv") is not None else None) new_gp.ky_mat = (np.array(dictionary["ky_mat"]) if dictionary.get("ky_mat") is not None else None) new_gp.l_mat = (np.array(dictionary["l_mat"]) if dictionary.get("l_mat") is not None else None) new_gp.alpha = (np.array(dictionary["alpha"]) if dictionary.get("alpha") is not None else None) return new_gp
def test_pred_on_elements(): the_gp = GaussianProcess( kernel_name="2+3_mc", hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03, ]), cutoffs=np.array([5, 3]), hyp_labels=["l2", "s2", "l3", "s3", "n0"], maxiter=1, opt_algorithm="L-BFGS-B", ) with open(path.join(TEST_FILE_DIR, "methanol_frames.json"), "r") as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] with open(path.join(TEST_FILE_DIR, "methanol_envs.json"), "r") as f: data_dicts = [loads(s) for s in f.readlines()[:6]] envs = [AtomicEnvironment.from_dict(d) for d in data_dicts] forces = [np.array(d["forces"]) for d in data_dicts] seeds = list(zip(envs, forces)) all_frames = deepcopy(frames) tt = TrajectoryTrainer( frames, gp=the_gp, shuffle_frames=False, rel_std_tolerance=0, abs_std_tolerance=0, abs_force_tolerance=0.001, skip=5, min_atoms_per_train=100, pre_train_seed_envs=seeds, pre_train_seed_frames=[frames[-1]], max_atoms_from_frame=4, output_name="meth_test", print_as_xyz=True, model_format="json", atom_checkpoint_interval=50, pre_train_atoms_per_element={"H": 1}, predict_atoms_per_element={ "H": 0, "C": 1, "O": 0 }, ) # Set to predict only on Carbon after training on H to ensure errors are # high and that they get added to the gp tt.run() # Ensure forces weren't written directly to structure for i in range(len(all_frames)): assert np.array_equal(all_frames[i].forces, frames[i].forces) # Assert that Carbon atoms were correctly added assert the_gp.training_statistics["envs_by_species"]["C"] > 2 for f in glob(f"meth_test*"): remove(f) for f in glob(f"gp_from_aimd*"): remove(f)
def from_dict(dictionary): """Create GP object from dictionary representation.""" multihyps = dictionary.get('multihyps', False) force_kernel, grad = str_to_kernels(dictionary['kernel_name'], multihyps, include_grad=True) if dictionary['energy_kernel'] is not None: energy_kernel = str_to_kernel(dictionary['energy_kernel'], multihyps) else: energy_kernel = None if dictionary['energy_force_kernel'] is not None: energy_force_kernel = \ str_to_kernel(dictionary['energy_force_kernel'], multihyps) else: energy_force_kernel = None new_gp = GaussianProcess(kernel=force_kernel, kernel_grad=grad, energy_kernel=energy_kernel, energy_force_kernel=energy_force_kernel, cutoffs=np.array(dictionary['cutoffs']), hyps=np.array(dictionary['hyps']), hyp_labels=dictionary['hyp_labels'], par=dictionary['par'], per_atom_par=dictionary.get('per_atom_par',True), n_cpus=dictionary.get('n_cpus') or dictionary.get('no_cpus'), maxiter=dictionary['maxiter'], opt_algorithm=dictionary['algo'], multihyps=multihyps, hyps_mask=dictionary.get('hyps_mask',None) ) new_gp.training_data = [AtomicEnvironment.from_dict(env) for env in dictionary['training_data']] new_gp.training_labels = deepcopy(dictionary['training_labels']) new_gp.training_labels_np = deepcopy(dictionary['training_labels_np']) new_gp.likelihood = dictionary['likelihood'] new_gp.likelihood_gradient = dictionary['likelihood_gradient'] new_gp.training_labels_np = np.hstack(new_gp.training_labels) # Save time by attempting to load in computed attributes if (len(new_gp.training_data)>5000): new_gp.ky_mat = np.load(dictionary['ky_mat_file']) new_gp.compute_matrices() else: new_gp.ky_mat_inv = np.array(dictionary['ky_mat_inv']) \ if dictionary.get('ky_mat_inv') is not None else None new_gp.ky_mat = np.array(dictionary['ky_mat']) \ if dictionary.get('ky_mat') is not None else None new_gp.l_mat = np.array(dictionary['l_mat']) \ if dictionary.get('l_mat') is not None else None new_gp.alpha = np.array(dictionary['alpha']) \ if dictionary.get('alpha') is not None else None return new_gp