def test_adding_pes_data_with_qcjson(): dset = data.dataSet(f'{dset_dir}/6h2o/6h2o.temelso.etal-dset-no.data.npz') dset_ref = data.dataSet(f'{dset_dir}/6h2o/6h2o.temelso.etal-dset.npz') dset.add_pes_json( './tests/data/engrads/h2o/6h2o/6h2o.temelso.etal', 'MP2/def2-TZVP', 'kcal/mol', 'hartree', allow_remaining_nan=False ) assert np.array_equal(dset_ref.E, dset.E) assert np.array_equal(dset_ref.F, dset.F)
def test_sample_dset_same_size(): """ """ dset_h2o_2body_path = f'{dset_dir}/2h2o/140h2o.sphere.gfn2.md.500k.prod1.3h2o.dset.2h2o-dset.mb.npz' dset_h2o_2body = data.dataSet(dset_h2o_2body_path) # Trim dset_h2o_2body to 50 structures remaining = 50 for key in ['r_prov_specs', 'E', 'R', 'F']: setattr(dset_h2o_2body, key, getattr(dset_h2o_2body, key)[:remaining]) dset_h2o_2body_cm_6 = data.dataSet() dset_h2o_2body_cm_6.name = '140h2o.sphere.gfn2.md.500k.prod1.3h2o.dset.2h2o-dset.mb-cm.6' dset_h2o_2body_cm_6 = dset_sample_structures( dset_h2o_2body_cm_6, dset_h2o_2body, 'all', 2, criteria.cm_distance_sum, np.array([]), np.array([6.0]), True, False ) assert dset_h2o_2body_cm_6.theory == 'mp2.def2tzvp.frozencore' assert dset_h2o_2body_cm_6.criteria == 'cm_distance_sum' assert np.array_equal(dset_h2o_2body_cm_6.z_slice, np.array([])) assert np.array_equal(dset_h2o_2body_cm_6.cutoff, np.array([6.0])) assert np.array_equal(dset_h2o_2body_cm_6.entity_ids, np.array([0, 0, 0, 1, 1, 1])) assert np.array_equal( dset_h2o_2body_cm_6.comp_ids, np.array(['h2o', 'h2o']) ) assert dset_h2o_2body_cm_6.centered == True assert dset_h2o_2body_cm_6.r_unit == 'Angstrom' # 8726c482c19cdf7889cd1e62b9e9c8e1 is the MD5 has for the full 140h2o rset. assert dset_h2o_2body_cm_6.r_prov_ids == {0: '8726c482c19cdf7889cd1e62b9e9c8e1'} assert np.array_equal(dset_h2o_2body_cm_6.z, np.array([8, 1, 1, 8, 1, 1])) rset = data.structureSet(rset_path_140h2o) check_R_with_rset(dset_h2o_2body_cm_6, rset, True) # Checking energies and forces. dset_r_prov_specs = dset_h2o_2body_cm_6.r_prov_specs dset_E = dset_h2o_2body_cm_6.E dset_F = dset_h2o_2body_cm_6.F dset_sample_r_prov_specs = dset_h2o_2body.r_prov_specs dset_sample_E = dset_h2o_2body.E dset_sample_F = dset_h2o_2body.F for i_r in range(len(dset_h2o_2body_cm_6.R)): i_r_dset_sample = np.where( np.all(dset_sample_r_prov_specs == dset_r_prov_specs[i_r], axis=1) )[0][0] assert np.allclose(dset_E[i_r], dset_sample_E[i_r_dset_sample]) assert np.allclose(dset_F[i_r], dset_sample_F[i_r_dset_sample])
def test_rset_sampling_num_2mers_criteria(): rset = trim_140h2o_rset() dset = data.dataSet() dset.name = '140h2o.sphere.gfn2.md.500k.prod1' dset = dset_sample_structures( dset, rset, 5, 2, criteria.cm_distance_sum, np.array([]), np.array([6.0]), True, False ) assert isinstance(dset.criteria, str) assert dset.criteria in criteria.__dict__ assert dset.z_slice.shape == (0,) assert dset.cutoff.shape == (1,) assert np.array_equal(dset.cutoff, np.array([6.])) assert dset.r_unit == 'Angstrom' assert np.array_equal(dset.z, np.array([8, 1, 1, 8, 1, 1])) assert dset.R.shape == (5, 6, 3) assert dset.E.shape == (5,) assert dset.F.shape == (5, 6, 3) assert dset.r_prov_ids == {0: 'e6a7a058b5fefb622fb3296e29a84150'} assert np.array_equal(dset.entity_ids, np.array([0, 0, 0, 1, 1, 1])) assert np.array_equal(dset.comp_ids, np.array(['h2o', 'h2o'])) check_R_with_rset(dset, rset, True)
def test_predictset_correct_contribution_predictions(): """ """ dset_6h2o_path = f'{dset_dir}/6h2o/6h2o.temelso.etal-dset.npz' model_h2o_paths = [ f'{model_dir}/140h2o.sphere.gfn2.md.500k.prod1.3h2o.dset.1h2o-model-train500.npz', f'{model_dir}/140h2o.sphere.gfn2.md.500k.prod1.3h2o.dset.2h2o.cm.6-model.mb-train500.npz', f'{model_dir}/140h2o.sphere.gfn2.md.500k.prod1.3h2o-model.mb-train500.npz', ] models = ( dict(np.load(model_path, allow_pickle=True)) for model_path in model_h2o_paths ) models = [ gdmlModel( model, criteria_desc_func=cm_distance_sum, criteria_cutoff=model['cutoff'] ) for model in models ] pset = data.predictSet() pset.load_dataset(dset_6h2o_path) pset.load_models( models, predict_gdml_decomp, use_ray=False ) pset.prepare() E_pset, F_pset = pset.nbody_predictions([1, 2, 3]) dset_6h2o = data.dataSet(dset_6h2o_path) mbe_pred = mbePredict(models, predict_gdml, use_ray=False) E_predict, F_predict = mbe_pred.predict( dset_6h2o.z, dset_6h2o.R, dset_6h2o.entity_ids, dset_6h2o.comp_ids, ignore_criteria=False ) assert np.allclose(E_pset, E_predict) assert np.allclose(F_pset, F_predict)
def test_rset_sampling_all_2mers_centering(): rset = trim_140h2o_rset() dset = data.dataSet() dset.name = '140h2o.sphere.gfn2.md.500k.prod1' dset = dset_sample_structures( dset, rset, 'all', 2, None, np.array([]), np.array([]), False, False ) centered_R = utils.center_structures(dset.z, dset.R) dset_centered = data.dataSet() dset_centered.name = '140h2o.sphere.gfn2.md.500k.prod1-centered' dset_centered = dset_sample_structures( dset_centered, rset, 'all', 2, None, np.array([]), np.array([]), True, False ) assert np.allclose(centered_R, dset_centered.R)
def test_sample_dset_1mers_multiple_rsets(): """ """ dset_4h2o_lit_path = f'{dset_dir}/4h2o/4h2o.temelso.etal-dset.npz' dset_4h2o_lit_dset = data.dataSet(dset_4h2o_lit_path) # Sample all 1mers dset_1mers = data.dataSet() dset_1mers = dset_sample_structures( dset_1mers, dset_4h2o_lit_dset, 'all', 1, None, np.array([]), np.array([]), True, False ) # Checking data set r_prov_specs = np.array([ [0,0,0], [0,0,1], [0,0,2], [0,0,3], [1,0,0], [1,0,1], [1,0,2], [1,0,3], [2,0,0], [2,0,1], [2,0,2], [2,0,3] ]) assert np.array_equal(dset_1mers.r_prov_specs, r_prov_specs) r_prov_ids = {0: '92dd31a90a3d2a443023d9d708010a4f', 1: '5593ef822ede64f6011ece82d6702ff9', 2: '33098027b401c38efcb5f05fa33c93ad'} assert dset_1mers.r_prov_ids == r_prov_ids assert np.array_equal(dset_1mers.entity_ids, np.array([0, 0, 0])) assert np.array_equal(dset_1mers.comp_ids, np.array(['h2o'])) assert dset_1mers.centered == True assert dset_1mers.r_unit == 'Angstrom' assert np.array_equal(dset_1mers.z, np.array([8, 1, 1])) assert dset_1mers.R.shape == (12, 3, 3) r_3 = np.array([ [-0.02947763, -0.0325826, -0.05004315], [ 0.93292237, 0.1104174, 0.10365685], [-0.46497763, 0.4068174, 0.69075685] ]) assert np.allclose(dset_1mers.R[3], r_3) assert dset_1mers.E.shape == (12,) for e in dset_1mers.E: assert np.isnan(e) assert dset_1mers.F.shape == (12, 3, 3) for f in dset_1mers.F.flatten(): assert np.isnan(f)
def test_rset_sampling_all_2mers_normal(): """Sampling all dimers (2mers) from trimmed 140h2o structure set. """ rset = trim_140h2o_rset() ### NORMAL SAMPLING ### dset = data.dataSet() dset.name = '140h2o.sphere.gfn2.md.500k.prod1' dset = dset_sample_structures( dset, rset, 'all', 2, None, np.array([]), np.array([]), False, False ) # Checking properties. assert dset.r_prov_ids == {0: 'e6a7a058b5fefb622fb3296e29a84150'} assert dset.r_prov_specs.shape == (30, 4) assert np.all(dset.r_prov_specs[:, :1] == np.zeros((30,))) assert np.all( dset.r_prov_specs[:, 1] == np.array( [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]) ) assert dset.r_prov_specs.shape == np.unique(dset.r_prov_specs, axis=0).shape assert np.all(dset.entity_ids == np.array([0, 0, 0, 1, 1, 1])) assert np.all(dset.comp_ids == np.array(['h2o', 'h2o'])) assert np.all(dset.z == np.array([8, 1, 1, 8, 1, 1])) # Checking R. assert dset.R.shape == (30, 6, 3) r_prov_specs_r_check = np.array([0, 1, 1, 4]) r_index = np.where( np.all(dset.r_prov_specs == r_prov_specs_r_check, axis=1) )[0][0] r_check = np.array([ [-4.27804369, -3.56574992, 0.81519167], [-4.3569076, -4.2647005, 0.1558876], [-4.35184085, -2.82879184, 0.39925437], [-2.44708832, -6.14572336, -3.36929742], [-2.18964657, -6.13868747, -2.48473228], [-2.64909444, -7.04677952, -3.60878085] ]) assert np.allclose(dset.R[r_index], r_check) # Checking E. assert dset.E.shape == (30,) assert np.all(np.isnan(dset.E)) # Checking F assert dset.F.shape == (30, 6, 3) assert np.all(np.isnan(dset.F))
def test_1h2o_train_bayes_opt(): try: import bayes_opt except ImportError: pytest.skip("bayesian-optimization package not installed") global glob if 'glob' in globals(): del glob dset_path = os.path.join( dset_dir, '1h2o/140h2o.sphere.gfn2.md.500k.prod1.3h2o.dset.1h2o-dset.npz' ) dset = dataSet(dset_path) train_dir_1h2o = os.path.join(train_dir, '1h2o/') train_idxs_path = os.path.join(train_dir_1h2o, 'train_idxs.npy') valid_idxs_path = os.path.join(train_dir_1h2o, 'valid_idxs.npy') train_idxs = np.load(train_idxs_path, allow_pickle=True) valid_idxs = np.load(valid_idxs_path, allow_pickle=True) n_train = 50 n_valid = 100 sigmas = [32, 42, 52] train = mbGDMLTrain( use_sym=True, use_E=True, use_E_cstr=False, use_cprsn=False, solver='analytic', lam=1e-15, solver_tol=1e-4, interact_cut_off=None ) gp_params = {'init_points': 5, 'n_iter': 5, 'alpha': 0.001} model, optimizer = train.bayes_opt( dset, '1h2o', n_train, n_valid, sigma_bounds=(2, 100), save_dir='./tests/tmp/1h2o-bo', gp_params=gp_params, train_idxs=train_idxs, valid_idxs=valid_idxs, overwrite=True, write_json=True, write_idxs=True, ) best_sig = model['sig'].item() assert 40 <= best_sig <= 50 assert model['perms'].shape[0] == 2 del train
def test_rset_sampling_all_2mers_criteria(): rset = trim_140h2o_rset() dset_centered = data.dataSet() dset_centered.name = '140h2o.sphere.gfn2.md.500k.prod1-centered' dset_centered = dset_sample_structures( dset_centered, rset, 'all', 2, None, np.array([]), np.array([]), True, False ) dset_criteria = data.dataSet() dset_criteria.name = '140h2o.sphere.gfn2.md.500k.prod1-criteria' dset_criteria = dset_sample_structures( dset_criteria, rset, 'all', 2, criteria.cm_distance_sum, np.array([]), np.array([6.0]), True, False ) r_prov_specs_accpetable_criteria = np.array([ [0,0,0,3], [0,0,1,2], [0,0,1,4], [0,0,2,4], [0,1,0,3], [0,1,1,2], [0,1,1,4], [0,1,2,4], [0,2,0,3], [0,2,1,2], [0,2,1,4], [0,2,2,4] ]) assert np.array_equal(dset_criteria.r_prov_specs, r_prov_specs_accpetable_criteria)
def test_dset_sampling_all_2mers_after_3mers(): rset = trim_140h2o_rset() dset = data.dataSet() dset.name = '140h2o.sphere.gfn2.md.500k.prod1' dset = dset_sample_structures( dset, rset, 'all', 3, None, np.array([]), np.array([]), True, False ) dset_from_dset = data.dataSet() dset_from_dset = dset_sample_structures( dset_from_dset, dset, 'all', 2, criteria.cm_distance_sum, np.array([]), np.array([6.0]), True, False ) assert np.array_equal(dset_from_dset.entity_ids, np.array([0, 0, 0, 1, 1, 1])) assert np.array_equal( dset_from_dset.comp_ids, np.array(['h2o', 'h2o']) ) assert dset_from_dset.r_prov_ids == {0: 'e6a7a058b5fefb622fb3296e29a84150'} assert dset_from_dset.r_prov_specs.shape == (12, 4) # Same as test_rset_sampling_all_2mers_criteria, but organized to match # the 3mer then 2mer sampling. r_prov_specs_accpetable_criteria = np.array([ [0,0,1,2], [0,0,0,3], [0,0,1,4], [0,0,2,4], [0,1,1,2], [0,1,0,3], [0,1,1,4], [0,1,2,4], [0,2,1,2], [0,2,0,3], [0,2,1,4], [0,2,2,4] ]) assert np.array_equal(dset_from_dset.r_prov_specs, r_prov_specs_accpetable_criteria) assert dset_from_dset.R.shape == (12, 6, 3) assert dset_from_dset.E.shape == (12,) assert dset_from_dset.F.shape == (12, 6, 3) assert dset_from_dset.criteria == 'cm_distance_sum' assert np.array_equal(dset_from_dset.cutoff, np.array([6.0]))
def test_1h2o_train_grid_search(): global glob if 'glob' in globals(): del glob dset_path = os.path.join( dset_dir, '1h2o/140h2o.sphere.gfn2.md.500k.prod1.3h2o.dset.1h2o-dset.npz' ) dset = dataSet(dset_path) train_dir_1h2o = os.path.join(train_dir, '1h2o/') train_idxs_path = os.path.join(train_dir_1h2o, 'train_idxs.npy') valid_idxs_path = os.path.join(train_dir_1h2o, 'valid_idxs.npy') train_idxs = np.load(train_idxs_path, allow_pickle=True) valid_idxs = np.load(valid_idxs_path, allow_pickle=True) n_train = 50 n_valid = 100 sigmas = [32, 42, 52] train = mbGDMLTrain( use_sym=True, use_E=True, use_E_cstr=False, use_cprsn=False, solver='analytic', lam=1e-15, solver_tol=1e-4, interact_cut_off=None ) model = train.grid_search( dset, '1h2o', n_train, n_valid, sigmas, train_idxs=train_idxs, valid_idxs=valid_idxs, write_json=True, write_idxs=True, overwrite=True, save_dir='./tests/tmp/1h2o-grid' ) del train assert model['sig'].item() == 42 assert np.allclose( np.array(model['f_err'].item()['rmse']), 0.4673520776718695, rtol=1e-05, atol=1e-08 ) assert model['perms'].shape[0] == 2
def test_getting_test_idxs(): dset_path = os.path.join( dset_dir, '1h2o/140h2o.sphere.gfn2.md.500k.prod1.3h2o.dset.1h2o-dset.npz' ) model_path = os.path.join( './tests/data/models', '1h2o-model.npz' ) dset = dataSet(dset_path) model = dict(np.load(model_path, allow_pickle=True)) n_R = dset.n_R n_train = len(model['idxs_train']) n_valid = len(model['idxs_valid']) n_test = n_R - n_train - n_valid test_idxs = get_test_idxs(model, dset.asdict(), n_test=None) assert len(test_idxs) == n_test
def test_rset_sampling_all_2mers_ignore_duplicate(): rset = trim_140h2o_rset() dset = data.dataSet() dset.name = '140h2o.sphere.gfn2.md.500k.prod1' dset = dset_sample_structures( dset, rset, 'all', 2, None, np.array([]), np.array([]), False, False ) dset_duplicate = dset_sample_structures( dset, rset, 'all', 2, None, np.array([]), np.array([]), False, False ) assert dset_duplicate.r_prov_ids == {0: 'e6a7a058b5fefb622fb3296e29a84150'} assert dset_duplicate.r_prov_specs.shape == (30, 4) assert np.all(dset.entity_ids == np.array([0, 0, 0, 1, 1, 1])) assert np.all(dset.comp_ids == np.array(['h2o', 'h2o'])) assert dset_duplicate.R.shape == (30, 6, 3)
def test_1h2o_prob_indices(): global glob if 'glob' in globals(): del glob dset_path = os.path.join( dset_dir, '1h2o/140h2o.sphere.gfn2.md.500k.prod1.3h2o.dset.1h2o-dset.npz' ) model_path = os.path.join( './tests/data/models', '1h2o-model.npz' ) model = dict(np.load(model_path, allow_pickle=True)) model = gdmlModel( model, criteria_desc_func=None, criteria_cutoff=None ) dset = dataSet(dset_path) prob_s = prob_structures([model], predict_gdml) n_find = 100 prob_idxs = prob_s.find(dset, n_find, save_dir='./tests/tmp') prob_idxs = np.sort(prob_idxs) ref = np.array( [ 465, 541, 653, 798, 807, 921, 953, 1058, 1240, 1421, 1430, 1510, 1618, 1663, 1665, 1676, 1890, 2090, 2123, 2218, 2246, 2665, 2944, 3171, 3225, 3485, 3510, 3738, 3795, 3970, 3994, 4272, 4660, 5102, 5150, 5195, 5230, 6394, 6471, 6787, 6900, 6961, 6986, 7257, 7725, 7735, 7812, 7815, 8006, 8074, 8253, 8489, 8532, 8810, 9169, 9221, 9226, 9667, 9668, 9728, 9747, 9919, 9952, 9995, 10025, 10057, 10062, 10144, 10252, 10525, 10763, 10982, 11005, 11012, 11024, 11404, 11730, 11745, 11747, 11864, 11970, 12049, 12167, 12329, 12465, 12478, 12638, 12645, 12655, 12664, 12775, 12878, 13062, 13151, 13192, 13320, 13343, 13546, 13676, 13963 ] ) assert len(prob_idxs) == 100 # This is a very bad test, but will work for now? assert len(np.setdiff1d(prob_idxs, ref)) < 20
def test_rset_sampling_num_2mers_additional(): rset = trim_140h2o_rset() dset = data.dataSet() dset.name = '140h2o.sphere.gfn2.md.500k.prod1' dset = dset_sample_structures( dset, rset, 5, 2, criteria.cm_distance_sum, np.array([]), np.array([6.0]), True, False ) # Ensure energies and forces are not overwritten i_test = 1 e_test = -47583.29857 dset.E[i_test] = e_test f_test = np.array([ [4.4, 2.8, 6.0], [-3.65, 34.0, 2.3], [4.4, 2.8, 6.0], [-3.65, 34.0, 2.3], [4.4, 2.8, 6.0], [-3.65, 34.0, 2.3], ]) dset.F[i_test] = f_test dset = dset_sample_structures( dset, rset, 5, 2, criteria.cm_distance_sum, np.array([]), np.array([6.0]), True, False ) assert dset.r_prov_ids == {0: 'e6a7a058b5fefb622fb3296e29a84150'} assert np.array_equal(dset.entity_ids, np.array([0, 0, 0, 1, 1, 1])) assert np.array_equal(dset.comp_ids, np.array(['h2o', 'h2o'])) assert np.array_equal(dset.z, np.array([8, 1, 1, 8, 1, 1])) assert dset.R.shape == (10, 6, 3) assert dset.E.shape == (10,) assert np.allclose(dset.E[i_test], e_test) assert dset.F.shape == (10, 6, 3) assert np.allclose(dset.F[i_test], f_test) check_R_with_rset(dset, rset, True)
def test_dset_default_attributes(): dset = data.dataSet() assert isinstance(dset.r_prov_ids, dict) assert len(dset.r_prov_ids) == 0 assert dset.r_prov_specs.shape == (1, 0) assert dset.criteria == '' assert dset.z_slice.shape == (0,) assert dset.cutoff.shape == (0,) assert dset.z.shape == (0,) assert dset.R.shape == (1, 1, 0) assert dset.E.shape == (0,) assert dset.F.shape == (1, 1, 0) assert dset.entity_ids.shape == (0,) assert dset.comp_ids.shape == (0,) try: dset.md5 except AttributeError: pass
def test_predict_single_16mer(): """ """ dset_16h2o_path = f'{dset_dir}/16h2o/16h2o.yoo.etal.boat.b-dset-mp2.def2tzvp.npz' model_h2o_paths = [ f'{model_dir}/140h2o.sphere.gfn2.md.500k.prod1.3h2o.dset.1h2o-model-train500.npz', f'{model_dir}/140h2o.sphere.gfn2.md.500k.prod1.3h2o.dset.2h2o.cm.6-model.mb-train500.npz', f'{model_dir}/140h2o.sphere.gfn2.md.500k.prod1.3h2o-model.mb-train500.npz', ] models = (dict(np.load(model_path, allow_pickle=True)) for model_path in model_h2o_paths) models = [ gdmlModel(model, criteria_desc_func=cm_distance_sum, criteria_cutoff=model['cutoff']) for model in models ] dset_16h2o = data.dataSet(dset_16h2o_path) mbe_pred = mbePredict(models, predict_gdml, use_ray=False) E_predict, F_predict = mbe_pred.predict(dset_16h2o.z, dset_16h2o.R, dset_16h2o.entity_ids, dset_16h2o.comp_ids, ignore_criteria=False) E = np.array([-766368.03399751]) F = np.array([[[0.29906572, 0.14785963, 0.24781407], [-0.30412644, -0.72411633, -0.11358761], [-0.49192677, 0.86896897, -0.67525678], [0.36627638, 1.02869105, -2.56223656], [ -0.10503164, -0.89234795, 0.9294424, ], [ -0.1841222, -0.14389019, 1.2193703, ], [-1.38995634, 1.74512784, 0.20352509], [0.50352734, -1.84912139, -1.11214437], [ -0.45073645, -0.58830104, -0.0708215, ], [-0.05824096, -0.07168296, 3.05363522], [-0.21573588, 0.55601679, -0.93232724], [0.33556773, 0.3464968, -1.20999654], [1.13396357, 0.64719014, -0.37314183], [-0.14864126, -0.74782087, 0.92789942], [0.25446292, 0.18875155, 0.35677525], [1.18808078, 0.9989521, -1.70936528], [-0.42772192, -0.23482216, 2.22942188], [0.5023115, -0.2546999, 0.59431561], [1.03039212, -0.27777061, 0.43893643], [-1.6481248, -0.11736926, 0.39427926], [-0.8270073, -1.08703941, -0.46220551], [-1.65290086, -0.85447434, -0.25093955], [2.38457849, -0.51709509, -0.97800052], [ 0.70822521, 0.11395345, 1.4606325, ], [-0.49915379, 2.60146319, 1.20100891], [ -0.01957611, -1.61507913, -0.3507438, ], [-0.04340775, -0.95576235, -0.88557194], [-0.1068999, -1.47361438, -0.57488098], [0.10196448, 1.2622373, -0.57288566], [0.46155007, 0.86992573, -0.07612512], [-0.06659418, -1.53956909, -2.77945064], [-0.30081568, 0.14797997, 0.90844867], [0.38111199, 1.29149786, 0.63063523], [0.27202453, 0.04869613, -1.44668878], [0.03618388, -0.62330206, -1.39043361], [-0.5954522, 0.61790128, 1.67910304], [0.10622445, 0.31818432, 0.72714358], [-0.48496294, 0.85814888, -0.29055761], [-0.85844605, 0.18657187, -0.07795668], [ 2.58353778, -0.54173036, 0.4635027, ], [-1.56162087, 0.12760808, 0.02244887], [-0.65542649, 0.34366634, 0.19180049], [-2.35675996, -1.09049215, 0.22829278], [0.71868199, 0.072091, -0.36158273], [1.55157057, 0.37661812, -0.25918432], [ -1.39910186, -0.24662851, 2.7263307, ], [1.55454091, 0.60506067, -1.08736517], [0.3786482, 0.07707048, -0.23131207]]]) assert np.allclose(E_predict, E) assert np.allclose(F_predict, F, rtol=1e-04, atol=1e-02)
def test_train_results_1h2o(): """Checks the results of a training task.""" global glob if 'glob' in globals(): del glob dset_path = os.path.join( dset_dir, '1h2o/140h2o.sphere.gfn2.md.500k.prod1.3h2o.dset.1h2o-dset.npz' ) dset = dataSet(dset_path) dset_dict = dset.asdict() train_dir_1h2o = os.path.join(train_dir, '1h2o/') train_idxs_path = os.path.join(train_dir_1h2o, 'train_idxs.npy') valid_idxs_path = os.path.join(train_dir_1h2o, 'valid_idxs.npy') train_idxs = np.load(train_idxs_path, allow_pickle=True) valid_idxs = np.load(valid_idxs_path, allow_pickle=True) n_train = 50 n_valid = 100 sigma = 42 train = GDMLTrain() task = train.create_task( dset_dict, n_train, dset_dict, n_valid, sigma, lam=1e-15, use_sym=True, use_E=True, use_E_cstr=False, use_cprsn=False, solver='analytic', solver_tol=1e-4, interact_cut_off=None, idxs_train=train_idxs, idxs_valid=valid_idxs, ) model = train.train(task) alphas_F = model['alphas_F'] R_desc = model['R_desc'] tril_perms_lin = model['tril_perms_lin'] # Reference data alphas_F_ref = np.load( os.path.join(train_dir_1h2o, 'alphas_F.npy'), allow_pickle=True ) R_desc_ref = np.load( os.path.join(train_dir_1h2o, 'R_desc.npy'), allow_pickle=True ) tril_perms_lin_ref = np.load( os.path.join(train_dir_1h2o, 'tril_perms_lin.npy'), allow_pickle=True ) del train # Coefficients will not be exactly the same. assert np.allclose(R_desc, R_desc_ref, rtol=1e-05, atol=1e-08) assert np.allclose(alphas_F, alphas_F_ref, rtol=1e2, atol=0) assert np.allclose( np.array(model['c']), np.array(331288.48632617114) ) assert np.allclose( np.array(model['norm_y_train']), np.array(321987215081.7051), rtol=1e-3, atol=0 )