def test_structureset_from_npz(): npz_path = './tests/data/md/6h2o.temelso.etal.pr.md.gfn2.300k.step10000-ase.md-orca.mp2.def2tzvp.300k.npz' # Getting entity_ids. h2o_size = 3 cluster_size = 6 entity_ids = [] for i in range(0, cluster_size): entity_ids.extend([i for _ in range(0, h2o_size)]) # Getting comp_ids solvent = 'h2o' comp_ids = [] for i in range(0, cluster_size): comp_ids.append(solvent) # Creating structure set. test_structureset = structureSet() test_structureset.from_npz(npz_path, 'z', 'R', 'Angstrom', entity_ids, comp_ids) # Naming of the structure set. assert test_structureset.name == '6h2o.temelso.etal.pr.md.gfn2.300k.step10000-ase.md-orca.mp2.def2tzvp.300k' test_structureset.name = '6h2o.temelso.etal.pr.md.gfn2.300k.step10000-ase.md-orca.mp2.def2tzvp.300k' example_6h2o_md(test_structureset)
def test_structureset_from_traj(): traj_path = './tests/data/md/10h2o.abc0.iter1.gfn2-xtb.md-gfn2.300k-1.traj' # Getting entity_ids. h2o_size = 3 cluster_size = 10 entity_ids = [] for i in range(0, cluster_size): entity_ids.extend([i for _ in range(0, h2o_size)]) # Getting comp_ids solvent = 'h2o' comp_ids = [] for i in range(0, cluster_size): comp_ids.append(solvent) # Creating structure set. test_structureset = structureSet() test_structureset.from_xyz(traj_path, 'Angstrom', entity_ids, comp_ids) # Naming of the structure set. assert test_structureset.name == '10h2o.abc0.iter1.gfn2-xtb.md-gfn2.300k-1' test_structureset.name = '10h2o.abc0.iter1.gfn2.md.gfn2.300k.iter1-mbgdml.structset' # test_structureset.save(test_structureset.name, test_structureset.structureset, './tests/data/structuresets/') example_10h2o(test_structureset)
def trim_140h2o_rset(): """Trims the 140h2o structure set to make tests easier. """ n_R = 3 # Number of structures to keep. n_entities = 5 # Number of molecules to keep in each structure. molecule_size = molecule_sizes['h2o'] # Number of atoms in a water molecule. rset = data.structureSet(rset_path_140h2o) assert rset.type == 's' assert rset.md5 == '3e5cf945f6b4e979373e7772410e3746' # Trims and checks z. rset.z = rset.z[:n_entities*molecule_size] z = np.array([8, 1, 1, 8, 1, 1, 8, 1, 1, 8, 1, 1, 8, 1, 1]) assert np.all(rset.z == z) # Trims and checks R. rset.R = rset.R[:n_R, :molecule_size*n_entities] r_2 = np.array([ [ 6.07124359, 0.7619846, 0.58984577], [ 6.47807882, -0.18138608, 0.67938893], [ 5.14951519, 0.76914325, 0.66198299], [-4.28204826, -3.57395445, 0.81850038], [-4.33819343, -4.29134079, 0.12722189], [-4.33829705, -2.80167393, 0.40818626], [-2.82371582, -3.52131402, -4.12086561], [-2.96180787, -4.46433929, -3.79287547], [-1.85909245, -3.46817877, -4.3649756,], [ 6.24586283, -1.76605224, 0.72883595], [ 5.51074538, -2.26847206, 1.21432844], [ 6.92768826, -2.3359825, 0.25592583], [-2.44826194, -6.14429515, -3.37660252], [-2.19536627, -6.12210888, -2.51171765], [-2.65953004, -7.04099688, -3.59504014] ]) assert np.allclose(rset.R[2], r_2) assert rset.z.shape[0] == rset.R.shape[1] # Trims and checks entity_ids. rset.entity_ids = rset.entity_ids[:n_entities*molecule_size] entity_ids = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4]) assert np.all(rset.entity_ids == entity_ids) # Trims and checks comp_ids rset.comp_ids = rset.comp_ids[:n_entities] comp_ids = np.array(['h2o', 'h2o', 'h2o', 'h2o', 'h2o']) assert np.all(rset.comp_ids == comp_ids) # Confirms changes with MD5. assert rset.md5 == 'e6a7a058b5fefb622fb3296e29a84150' return rset
def test_sample_dset_same_size(): """ """ dset_h2o_2body_path = f'{dset_dir}/2h2o/140h2o.sphere.gfn2.md.500k.prod1.3h2o.dset.2h2o-dset.mb.npz' dset_h2o_2body = data.dataSet(dset_h2o_2body_path) # Trim dset_h2o_2body to 50 structures remaining = 50 for key in ['r_prov_specs', 'E', 'R', 'F']: setattr(dset_h2o_2body, key, getattr(dset_h2o_2body, key)[:remaining]) dset_h2o_2body_cm_6 = data.dataSet() dset_h2o_2body_cm_6.name = '140h2o.sphere.gfn2.md.500k.prod1.3h2o.dset.2h2o-dset.mb-cm.6' dset_h2o_2body_cm_6 = dset_sample_structures( dset_h2o_2body_cm_6, dset_h2o_2body, 'all', 2, criteria.cm_distance_sum, np.array([]), np.array([6.0]), True, False ) assert dset_h2o_2body_cm_6.theory == 'mp2.def2tzvp.frozencore' assert dset_h2o_2body_cm_6.criteria == 'cm_distance_sum' assert np.array_equal(dset_h2o_2body_cm_6.z_slice, np.array([])) assert np.array_equal(dset_h2o_2body_cm_6.cutoff, np.array([6.0])) assert np.array_equal(dset_h2o_2body_cm_6.entity_ids, np.array([0, 0, 0, 1, 1, 1])) assert np.array_equal( dset_h2o_2body_cm_6.comp_ids, np.array(['h2o', 'h2o']) ) assert dset_h2o_2body_cm_6.centered == True assert dset_h2o_2body_cm_6.r_unit == 'Angstrom' # 8726c482c19cdf7889cd1e62b9e9c8e1 is the MD5 has for the full 140h2o rset. assert dset_h2o_2body_cm_6.r_prov_ids == {0: '8726c482c19cdf7889cd1e62b9e9c8e1'} assert np.array_equal(dset_h2o_2body_cm_6.z, np.array([8, 1, 1, 8, 1, 1])) rset = data.structureSet(rset_path_140h2o) check_R_with_rset(dset_h2o_2body_cm_6, rset, True) # Checking energies and forces. dset_r_prov_specs = dset_h2o_2body_cm_6.r_prov_specs dset_E = dset_h2o_2body_cm_6.E dset_F = dset_h2o_2body_cm_6.F dset_sample_r_prov_specs = dset_h2o_2body.r_prov_specs dset_sample_E = dset_h2o_2body.E dset_sample_F = dset_h2o_2body.F for i_r in range(len(dset_h2o_2body_cm_6.R)): i_r_dset_sample = np.where( np.all(dset_sample_r_prov_specs == dset_r_prov_specs[i_r], axis=1) )[0][0] assert np.allclose(dset_E[i_r], dset_sample_E[i_r_dset_sample]) assert np.allclose(dset_F[i_r], dset_sample_F[i_r_dset_sample])
def load_140h2o_rset(): return structureSet(Rset_140h2o_path)
def test_structureset_load(): structureset_path = './tests/data/structuresets/10h2o.abc0.iter1.gfn2.md.gfn2.300k.iter1-mbgdml.structset.npz' test_structureset = structureSet(structureset_path) example_10h2o(test_structureset)