def test_append_fps(self): in_file = os.path.join(TESTS_DIR, "data/10mols.h5") fpe = FPSim2Engine(in_file, in_memory_fps=False, storage_backend="pytables") fpe.storage.append_fps([["CC", 11], ["CCC", 12], ["CCCC", 13]]) fpe = FPSim2Engine(in_file, in_memory_fps=True) assert fpe.fps.shape[0] == 13
def test_n_delete_fps(self): in_file = os.path.join(TESTS_DIR, "data/10mols.h5") fpe = FPSim2Engine(in_file, in_memory_fps=False, storage_backend="pytables") fpe.storage.delete_fps([11, 12, 13]) sort_db_file(in_file) fpe = FPSim2Engine(in_file) assert fpe.fps.shape[0] == 10 assert fpe.fps[0][-1] == 35 assert fpe.fps[-1][-1] == 48
def get_fpsim_engine(): global FPSIM_ENGINE, FPSIM2_FILE_PATH if FPSIM_ENGINE is None: t_ini = time.time() FPSIM_ENGINE = FPSim2Engine(FPSIM2_FILE_PATH) print('FPSIM2 FILE LOADED IN {0} SECS'.format(time.time() - t_ini)) return FPSIM_ENGINE
def test_calc_popcnt_bins(): in_file = os.path.join(TESTS_DIR, "data/test.h5") fpe = FPSim2Engine(in_file, in_memory_fps=True, fps_sort=True, storage_backend="pytables") assert fpe.popcnt_bins == popcnt_bins
def test_on_disk_substructure(n_workers): in_file = os.path.join(TESTS_DIR, "data/test.h5") fpe = FPSim2Engine(in_file, in_memory_fps=False, storage_backend="pytables") results = fpe.on_disk_substructure(query_smi, n_workers=n_workers) res = np.array(np.array([1], dtype="<u4")) np.testing.assert_array_equal(results, res) with pytest.raises(Exception): fpe.fps
def get_similar_mols(kekule_smiles: list, model: str): sim_vals = [] fp_dict_path = ''.join(['./train_data/', model, '.h5']) fp_dict_path = path.abspath(path.join(os.getcwd(), fp_dict_path)) fp_engine = FPSim2Engine(fp_dict_path) for smi in kekule_smiles: res = fp_engine.on_disk_similarity(smi, 0.01) sim_vals.append(res[0][1]) return sim_vals
def test_create_db_file_sdf(): in_file = os.path.join(TESTS_DIR, "data/10mols.sdf") out_file = os.path.join(TESTS_DIR, "data/10mols_sdf.h5") create_db_file(in_file, out_file, FP_TYPE, FP_PARAMS, mol_id_prop="mol_id") fpe = FPSim2Engine(out_file, storage_backend="pytables") fp_type, fp_params, _ = fpe.storage.read_parameters() assert fp_type == FP_TYPE assert fp_params["radius"] == FP_PARAMS["radius"] assert fp_params["nBits"] == FP_PARAMS["nBits"] assert fpe.fps.shape[0] == 10
def test_create_db_file_list(): out_file = os.path.join(TESTS_DIR, "data/10mols_list.h5") create_db_file([["CC", 1], ["CCC", 2], ["CCCC", 3]], out_file, FP_TYPE, FP_PARAMS) fpe = FPSim2Engine(out_file, storage_backend="pytables") fp_type, fp_params, _ = fpe.storage.read_parameters() assert fp_type == FP_TYPE assert fp_params["radius"] == FP_PARAMS["radius"] assert fp_params["nBits"] == FP_PARAMS["nBits"] assert fpe.fps.shape[0] == 3
def test_tversky(n_workers): in_file = os.path.join(TESTS_DIR, "data/test.h5") fpe = FPSim2Engine(in_file, storage_backend="pytables") results = fpe.tversky(query_smi, 0.85, 0.5, 0.5, n_workers=n_workers) res = np.array( [(1, 1.0), (6, 0.85057473)], dtype={ "names": ["mol_id", "coeff"], "formats": ["<u4", "<f4"] }, ) np.testing.assert_array_equal(results, res)
def test_validate_against_rdkit(): with open('test/10mols.smi') as f: smiles = f.readlines() fps = [Chem.rdMolDescriptors.GetMorganFingerprintAsBitVect(Chem.MolFromSmiles(smi), radius=2, nBits=2048) for smi in smiles] query = Chem.rdMolDescriptors.GetMorganFingerprintAsBitVect(Chem.MolFromSmiles(query_smi), radius=2, nBits=2048) rdresults = sorted([DataStructs.TanimotoSimilarity(query, fp) for fp in fps], reverse=True) fpe = FPSim2Engine('test/10mols.h5') results = fpe.similarity(query_smi, 0.0, n_workers=1)['coeff'] for rds, fpss in zip(rdresults, results): assert True == math.isclose(rds, fpss, rel_tol=1e-7)
def test_on_disk_similarity(n_workers): in_file = os.path.join(TESTS_DIR, "data/test.h5") fpe = FPSim2Engine(in_file, in_memory_fps=False, storage_backend="pytables") with pytest.raises(Exception): fpe.fps results = fpe.on_disk_similarity(query_smi, 0.7, n_workers=n_workers) r = np.array( [(1, 1.0), (6, 0.74), (7, 0.735849), (5, 0.72549)], dtype={ "names": ["mol_id", "coeff"], "formats": ["<u4", "<f4"], "offsets": [4, 8], "itemsize": 12, }, ) np.testing.assert_array_almost_equal(results["coeff"], r["coeff"])
def test_validate_against_rdkit(): in_file_smi = os.path.join(TESTS_DIR, "data/10mols.smi") in_file_h5 = os.path.join(TESTS_DIR, "data/test.h5") with open(in_file_smi) as f: smiles = f.readlines() fps = [ Chem.rdMolDescriptors.GetMorganFingerprintAsBitVect( Chem.MolFromSmiles(smi), **FP_PARAMS) for smi in smiles ] query = Chem.rdMolDescriptors.GetMorganFingerprintAsBitVect( Chem.MolFromSmiles(query_smi), **FP_PARAMS) rdresults = sorted( [DataStructs.TanimotoSimilarity(query, fp) for fp in fps], reverse=True) fpe = FPSim2Engine(in_file_h5, storage_backend="pytables") results = fpe.similarity(query_smi, 0.0, n_workers=1)["coeff"] for rds, fpss in zip(rdresults, results): assert math.isclose(rds, fpss, rel_tol=1e-7)
def test_symmetric_matrix(n_workers): in_file = os.path.join(TESTS_DIR, "data/test.h5") fpe = FPSim2Engine(in_file, storage_backend="pytables") csr_matrix = fpe.symmetric_distance_matrix(0.0, n_workers=n_workers) np.testing.assert_array_equal(MATRIX, csr_matrix.todense())
def test_substructure(n_workers): in_file = os.path.join(TESTS_DIR, "data/test.h5") fpe = FPSim2Engine(in_file, storage_backend="pytables") results = fpe.substructure(query_smi, n_workers=n_workers) res = np.array(np.array([1], dtype="<u4")) np.testing.assert_array_equal(results, res)
def test_similarity(n_workers): in_file = os.path.join(TESTS_DIR, "data/test.h5") fpe = FPSim2Engine(in_file, storage_backend="pytables") results = fpe.similarity(query_smi, 0.7, n_workers=n_workers) assert results.shape[0] == 4 assert list(results[0]) == [1, 1.0]
def test_sort_db_file(self): in_file = os.path.join(TESTS_DIR, "data/10mols.h5") sort_db_file(in_file) fpe = FPSim2Engine(in_file, storage_backend="pytables") assert fpe.fps[0][-1] == 2 assert fpe.fps[-1][-1] == 48
def test_on_disk_search(): fpe = FPSim2Engine('test/10mols.h5', in_memory_fps=False) results = fpe.on_disk_similarity(query_smi, 0.7, chunk_size=100000, n_workers=2) assert results.shape[0] == 4 assert list(results[0]) == [1, 1.0]
def test_search(): fpe = FPSim2Engine('test/10mols.h5') results = fpe.similarity(query_smi, 0.7, n_workers=1) assert results.shape[0] == 4 assert list(results[0]) == [1, 1.0]
def test_load_fps(): in_file = os.path.join(TESTS_DIR, "data/test.h5") fpe = FPSim2Engine(in_file, storage_backend="pytables") assert fpe.fps.shape[0] == 10 assert fpe.fps.shape[1] == 34 assert fpe.popcnt_bins != []
import numpy as np from FPSim2.FPSim2lib import ( PyPopcount, SortResults, SimilaritySearch, SubstructureScreenout, ) from FPSim2 import FPSim2Engine import os TESTS_DIR = os.path.dirname(os.path.abspath(__file__)) fpe = FPSim2Engine(os.path.join(TESTS_DIR, "data/test.h5")) def test_PyPopcount(): assert PyPopcount(fpe.fps[0]) == 39 assert PyPopcount(fpe.fps[1]) == 45 assert PyPopcount(fpe.fps[2]) == 43 def test_tanimoto_SimilaritySearch(): res = SimilaritySearch(fpe.fps[0], fpe.fps, 0.0, 0.0, 0.0, 0, 0, fpe.fps.shape[0]) np.testing.assert_array_almost_equal( res[0:5]["coeff"], np.array( [ (0, 4, 1.0), (3, 6, 0.63829786), (5, 5, 0.625), (7, 2, 0.5882353),
def test_load_fps_sort(): in_file = os.path.join(TESTS_DIR, "data/test.h5") fpe = FPSim2Engine(in_file, storage_backend="pytables") fpe2 = FPSim2Engine(in_file, fps_sort=True) assert fpe.popcnt_bins == fpe2.popcnt_bins