Python structureSet Examples, mbgdml.data.structureSet Python Examples

Example #1

0

Show file

def test_structureset_from_npz():
    npz_path = './tests/data/md/6h2o.temelso.etal.pr.md.gfn2.300k.step10000-ase.md-orca.mp2.def2tzvp.300k.npz'

    # Getting entity_ids.
    h2o_size = 3
    cluster_size = 6
    entity_ids = []
    for i in range(0, cluster_size):
        entity_ids.extend([i for _ in range(0, h2o_size)])

    # Getting comp_ids
    solvent = 'h2o'
    comp_ids = []
    for i in range(0, cluster_size):
        comp_ids.append(solvent)

    # Creating structure set.
    test_structureset = structureSet()
    test_structureset.from_npz(npz_path, 'z', 'R', 'Angstrom', entity_ids,
                               comp_ids)

    # Naming of the structure set.
    assert test_structureset.name == '6h2o.temelso.etal.pr.md.gfn2.300k.step10000-ase.md-orca.mp2.def2tzvp.300k'
    test_structureset.name = '6h2o.temelso.etal.pr.md.gfn2.300k.step10000-ase.md-orca.mp2.def2tzvp.300k'

    example_6h2o_md(test_structureset)

Example #2

0

Show file

def test_structureset_from_traj():
    traj_path = './tests/data/md/10h2o.abc0.iter1.gfn2-xtb.md-gfn2.300k-1.traj'

    # Getting entity_ids.
    h2o_size = 3
    cluster_size = 10
    entity_ids = []
    for i in range(0, cluster_size):
        entity_ids.extend([i for _ in range(0, h2o_size)])

    # Getting comp_ids
    solvent = 'h2o'
    comp_ids = []
    for i in range(0, cluster_size):
        comp_ids.append(solvent)

    # Creating structure set.
    test_structureset = structureSet()
    test_structureset.from_xyz(traj_path, 'Angstrom', entity_ids, comp_ids)

    # Naming of the structure set.
    assert test_structureset.name == '10h2o.abc0.iter1.gfn2-xtb.md-gfn2.300k-1'
    test_structureset.name = '10h2o.abc0.iter1.gfn2.md.gfn2.300k.iter1-mbgdml.structset'

    # test_structureset.save(test_structureset.name, test_structureset.structureset, './tests/data/structuresets/')

    example_10h2o(test_structureset)

Example #3

0

Show file

def trim_140h2o_rset():
    """Trims the 140h2o structure set to make tests easier.
    """
    n_R = 3  # Number of structures to keep.
    n_entities = 5  # Number of molecules to keep in each structure.
    molecule_size = molecule_sizes['h2o']  # Number of atoms in a water molecule.
    rset = data.structureSet(rset_path_140h2o)

    assert rset.type == 's'
    assert rset.md5 == '3e5cf945f6b4e979373e7772410e3746'

    # Trims and checks z.
    rset.z = rset.z[:n_entities*molecule_size]
    z = np.array([8, 1, 1, 8, 1, 1, 8, 1, 1, 8, 1, 1, 8, 1, 1])
    assert np.all(rset.z == z)
    
    # Trims and checks R.
    rset.R = rset.R[:n_R, :molecule_size*n_entities]
    r_2 = np.array([
        [ 6.07124359,  0.7619846,   0.58984577],
        [ 6.47807882, -0.18138608,  0.67938893],
        [ 5.14951519,  0.76914325,  0.66198299],
        [-4.28204826, -3.57395445,  0.81850038],
        [-4.33819343, -4.29134079,  0.12722189],
        [-4.33829705, -2.80167393,  0.40818626],
        [-2.82371582, -3.52131402, -4.12086561],
        [-2.96180787, -4.46433929, -3.79287547],
        [-1.85909245, -3.46817877, -4.3649756,],
        [ 6.24586283, -1.76605224,  0.72883595],
        [ 5.51074538, -2.26847206,  1.21432844],
        [ 6.92768826, -2.3359825,   0.25592583],
        [-2.44826194, -6.14429515, -3.37660252],
        [-2.19536627, -6.12210888, -2.51171765],
        [-2.65953004, -7.04099688, -3.59504014]
    ])
    assert np.allclose(rset.R[2], r_2)
    assert rset.z.shape[0] == rset.R.shape[1]

    # Trims and checks entity_ids.
    rset.entity_ids = rset.entity_ids[:n_entities*molecule_size]
    entity_ids = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4])
    assert np.all(rset.entity_ids == entity_ids)

    # Trims and checks comp_ids
    rset.comp_ids = rset.comp_ids[:n_entities]
    comp_ids = np.array(['h2o', 'h2o', 'h2o', 'h2o', 'h2o'])
    assert np.all(rset.comp_ids == comp_ids)

    # Confirms changes with MD5.
    assert rset.md5 == 'e6a7a058b5fefb622fb3296e29a84150'

    return rset

Example #4

0

Show file

def test_sample_dset_same_size():
    """
    """
    dset_h2o_2body_path = f'{dset_dir}/2h2o/140h2o.sphere.gfn2.md.500k.prod1.3h2o.dset.2h2o-dset.mb.npz'

    dset_h2o_2body = data.dataSet(dset_h2o_2body_path)
    
    # Trim dset_h2o_2body to 50 structures
    remaining = 50
    for key in ['r_prov_specs', 'E', 'R', 'F']:
        setattr(dset_h2o_2body, key, getattr(dset_h2o_2body, key)[:remaining])

    dset_h2o_2body_cm_6 = data.dataSet()
    dset_h2o_2body_cm_6.name = '140h2o.sphere.gfn2.md.500k.prod1.3h2o.dset.2h2o-dset.mb-cm.6'
    dset_h2o_2body_cm_6 = dset_sample_structures(
        dset_h2o_2body_cm_6, dset_h2o_2body, 'all', 2, criteria.cm_distance_sum,
        np.array([]), np.array([6.0]), True, False
    )

    assert dset_h2o_2body_cm_6.theory == 'mp2.def2tzvp.frozencore'
    assert dset_h2o_2body_cm_6.criteria == 'cm_distance_sum'
    assert np.array_equal(dset_h2o_2body_cm_6.z_slice, np.array([]))
    assert np.array_equal(dset_h2o_2body_cm_6.cutoff, np.array([6.0]))
    assert np.array_equal(dset_h2o_2body_cm_6.entity_ids, np.array([0, 0, 0, 1, 1, 1]))
    assert np.array_equal(
        dset_h2o_2body_cm_6.comp_ids, np.array(['h2o', 'h2o'])
    )
    assert dset_h2o_2body_cm_6.centered == True
    assert dset_h2o_2body_cm_6.r_unit == 'Angstrom'
    # 8726c482c19cdf7889cd1e62b9e9c8e1 is the MD5 has for the full 140h2o rset.
    assert dset_h2o_2body_cm_6.r_prov_ids == {0: '8726c482c19cdf7889cd1e62b9e9c8e1'}

    assert np.array_equal(dset_h2o_2body_cm_6.z, np.array([8, 1, 1, 8, 1, 1]))
    rset = data.structureSet(rset_path_140h2o)
    check_R_with_rset(dset_h2o_2body_cm_6, rset, True)

    # Checking energies and forces.
    dset_r_prov_specs = dset_h2o_2body_cm_6.r_prov_specs
    dset_E = dset_h2o_2body_cm_6.E
    dset_F = dset_h2o_2body_cm_6.F
    dset_sample_r_prov_specs = dset_h2o_2body.r_prov_specs
    dset_sample_E = dset_h2o_2body.E
    dset_sample_F = dset_h2o_2body.F
    for i_r in range(len(dset_h2o_2body_cm_6.R)):
        i_r_dset_sample = np.where(
            np.all(dset_sample_r_prov_specs == dset_r_prov_specs[i_r], axis=1)
        )[0][0]
        assert np.allclose(dset_E[i_r], dset_sample_E[i_r_dset_sample])
        assert np.allclose(dset_F[i_r], dset_sample_F[i_r_dset_sample])

Example #5

0

Show file

def load_140h2o_rset():
    return structureSet(Rset_140h2o_path)

Example #6

0

Show file

def test_structureset_load():
    structureset_path = './tests/data/structuresets/10h2o.abc0.iter1.gfn2.md.gfn2.300k.iter1-mbgdml.structset.npz'

    test_structureset = structureSet(structureset_path)

    example_10h2o(test_structureset)