Esempio n. 1
0
 def test_append_fps(self):
     in_file = os.path.join(TESTS_DIR, "data/10mols.h5")
     fpe = FPSim2Engine(in_file,
                        in_memory_fps=False,
                        storage_backend="pytables")
     fpe.storage.append_fps([["CC", 11], ["CCC", 12], ["CCCC", 13]])
     fpe = FPSim2Engine(in_file, in_memory_fps=True)
     assert fpe.fps.shape[0] == 13
Esempio n. 2
0
 def test_n_delete_fps(self):
     in_file = os.path.join(TESTS_DIR, "data/10mols.h5")
     fpe = FPSim2Engine(in_file,
                        in_memory_fps=False,
                        storage_backend="pytables")
     fpe.storage.delete_fps([11, 12, 13])
     sort_db_file(in_file)
     fpe = FPSim2Engine(in_file)
     assert fpe.fps.shape[0] == 10
     assert fpe.fps[0][-1] == 35
     assert fpe.fps[-1][-1] == 48
Esempio n. 3
0
def get_fpsim_engine():
    global FPSIM_ENGINE, FPSIM2_FILE_PATH
    if FPSIM_ENGINE is None:
        t_ini = time.time()
        FPSIM_ENGINE = FPSim2Engine(FPSIM2_FILE_PATH)
        print('FPSIM2 FILE LOADED IN {0} SECS'.format(time.time() - t_ini))
    return FPSIM_ENGINE
Esempio n. 4
0
def test_calc_popcnt_bins():
    in_file = os.path.join(TESTS_DIR, "data/test.h5")
    fpe = FPSim2Engine(in_file,
                       in_memory_fps=True,
                       fps_sort=True,
                       storage_backend="pytables")
    assert fpe.popcnt_bins == popcnt_bins
Esempio n. 5
0
def test_on_disk_substructure(n_workers):
    in_file = os.path.join(TESTS_DIR, "data/test.h5")
    fpe = FPSim2Engine(in_file,
                       in_memory_fps=False,
                       storage_backend="pytables")
    results = fpe.on_disk_substructure(query_smi, n_workers=n_workers)
    res = np.array(np.array([1], dtype="<u4"))
    np.testing.assert_array_equal(results, res)
    with pytest.raises(Exception):
        fpe.fps
Esempio n. 6
0
def get_similar_mols(kekule_smiles: list, model: str):

    sim_vals = []
    fp_dict_path = ''.join(['./train_data/', model, '.h5'])
    fp_dict_path = path.abspath(path.join(os.getcwd(), fp_dict_path))
    fp_engine = FPSim2Engine(fp_dict_path)
    for smi in kekule_smiles:
        res = fp_engine.on_disk_similarity(smi, 0.01)
        sim_vals.append(res[0][1])

    return sim_vals
Esempio n. 7
0
def test_create_db_file_sdf():
    in_file = os.path.join(TESTS_DIR, "data/10mols.sdf")
    out_file = os.path.join(TESTS_DIR, "data/10mols_sdf.h5")
    create_db_file(in_file, out_file, FP_TYPE, FP_PARAMS, mol_id_prop="mol_id")

    fpe = FPSim2Engine(out_file, storage_backend="pytables")
    fp_type, fp_params, _ = fpe.storage.read_parameters()
    assert fp_type == FP_TYPE
    assert fp_params["radius"] == FP_PARAMS["radius"]
    assert fp_params["nBits"] == FP_PARAMS["nBits"]
    assert fpe.fps.shape[0] == 10
Esempio n. 8
0
def test_create_db_file_list():
    out_file = os.path.join(TESTS_DIR, "data/10mols_list.h5")
    create_db_file([["CC", 1], ["CCC", 2], ["CCCC", 3]], out_file, FP_TYPE,
                   FP_PARAMS)

    fpe = FPSim2Engine(out_file, storage_backend="pytables")
    fp_type, fp_params, _ = fpe.storage.read_parameters()
    assert fp_type == FP_TYPE
    assert fp_params["radius"] == FP_PARAMS["radius"]
    assert fp_params["nBits"] == FP_PARAMS["nBits"]
    assert fpe.fps.shape[0] == 3
Esempio n. 9
0
def test_tversky(n_workers):
    in_file = os.path.join(TESTS_DIR, "data/test.h5")
    fpe = FPSim2Engine(in_file, storage_backend="pytables")
    results = fpe.tversky(query_smi, 0.85, 0.5, 0.5, n_workers=n_workers)
    res = np.array(
        [(1, 1.0), (6, 0.85057473)],
        dtype={
            "names": ["mol_id", "coeff"],
            "formats": ["<u4", "<f4"]
        },
    )
    np.testing.assert_array_equal(results, res)
Esempio n. 10
0
def test_validate_against_rdkit():

    with open('test/10mols.smi') as f:
        smiles = f.readlines()
    fps = [Chem.rdMolDescriptors.GetMorganFingerprintAsBitVect(Chem.MolFromSmiles(smi), 
            radius=2, nBits=2048) for smi in smiles]
    query = Chem.rdMolDescriptors.GetMorganFingerprintAsBitVect(Chem.MolFromSmiles(query_smi), 
                radius=2, nBits=2048)
    rdresults = sorted([DataStructs.TanimotoSimilarity(query, fp) for fp in fps], reverse=True)

    fpe = FPSim2Engine('test/10mols.h5')
    results = fpe.similarity(query_smi, 0.0, n_workers=1)['coeff']

    for rds, fpss in zip(rdresults, results):
        assert True == math.isclose(rds, fpss, rel_tol=1e-7)
Esempio n. 11
0
def test_on_disk_similarity(n_workers):
    in_file = os.path.join(TESTS_DIR, "data/test.h5")
    fpe = FPSim2Engine(in_file,
                       in_memory_fps=False,
                       storage_backend="pytables")
    with pytest.raises(Exception):
        fpe.fps
    results = fpe.on_disk_similarity(query_smi, 0.7, n_workers=n_workers)
    r = np.array(
        [(1, 1.0), (6, 0.74), (7, 0.735849), (5, 0.72549)],
        dtype={
            "names": ["mol_id", "coeff"],
            "formats": ["<u4", "<f4"],
            "offsets": [4, 8],
            "itemsize": 12,
        },
    )
    np.testing.assert_array_almost_equal(results["coeff"], r["coeff"])
Esempio n. 12
0
def test_validate_against_rdkit():
    in_file_smi = os.path.join(TESTS_DIR, "data/10mols.smi")
    in_file_h5 = os.path.join(TESTS_DIR, "data/test.h5")
    with open(in_file_smi) as f:
        smiles = f.readlines()
    fps = [
        Chem.rdMolDescriptors.GetMorganFingerprintAsBitVect(
            Chem.MolFromSmiles(smi), **FP_PARAMS) for smi in smiles
    ]
    query = Chem.rdMolDescriptors.GetMorganFingerprintAsBitVect(
        Chem.MolFromSmiles(query_smi), **FP_PARAMS)
    rdresults = sorted(
        [DataStructs.TanimotoSimilarity(query, fp) for fp in fps],
        reverse=True)

    fpe = FPSim2Engine(in_file_h5, storage_backend="pytables")
    results = fpe.similarity(query_smi, 0.0, n_workers=1)["coeff"]
    for rds, fpss in zip(rdresults, results):
        assert math.isclose(rds, fpss, rel_tol=1e-7)
Esempio n. 13
0
def test_symmetric_matrix(n_workers):
    in_file = os.path.join(TESTS_DIR, "data/test.h5")
    fpe = FPSim2Engine(in_file, storage_backend="pytables")
    csr_matrix = fpe.symmetric_distance_matrix(0.0, n_workers=n_workers)
    np.testing.assert_array_equal(MATRIX, csr_matrix.todense())
Esempio n. 14
0
def test_substructure(n_workers):
    in_file = os.path.join(TESTS_DIR, "data/test.h5")
    fpe = FPSim2Engine(in_file, storage_backend="pytables")
    results = fpe.substructure(query_smi, n_workers=n_workers)
    res = np.array(np.array([1], dtype="<u4"))
    np.testing.assert_array_equal(results, res)
Esempio n. 15
0
def test_similarity(n_workers):
    in_file = os.path.join(TESTS_DIR, "data/test.h5")
    fpe = FPSim2Engine(in_file, storage_backend="pytables")
    results = fpe.similarity(query_smi, 0.7, n_workers=n_workers)
    assert results.shape[0] == 4
    assert list(results[0]) == [1, 1.0]
Esempio n. 16
0
 def test_sort_db_file(self):
     in_file = os.path.join(TESTS_DIR, "data/10mols.h5")
     sort_db_file(in_file)
     fpe = FPSim2Engine(in_file, storage_backend="pytables")
     assert fpe.fps[0][-1] == 2
     assert fpe.fps[-1][-1] == 48
Esempio n. 17
0
def test_on_disk_search():
    fpe = FPSim2Engine('test/10mols.h5', in_memory_fps=False)
    results = fpe.on_disk_similarity(query_smi, 0.7, chunk_size=100000, n_workers=2)
    assert results.shape[0] == 4
    assert list(results[0]) == [1, 1.0]
Esempio n. 18
0
def test_search():
    fpe = FPSim2Engine('test/10mols.h5')
    results = fpe.similarity(query_smi, 0.7, n_workers=1)
    assert results.shape[0] == 4
    assert list(results[0]) == [1, 1.0]
Esempio n. 19
0
def test_load_fps():
    in_file = os.path.join(TESTS_DIR, "data/test.h5")
    fpe = FPSim2Engine(in_file, storage_backend="pytables")
    assert fpe.fps.shape[0] == 10
    assert fpe.fps.shape[1] == 34
    assert fpe.popcnt_bins != []
Esempio n. 20
0
import numpy as np
from FPSim2.FPSim2lib import (
    PyPopcount,
    SortResults,
    SimilaritySearch,
    SubstructureScreenout,
)
from FPSim2 import FPSim2Engine
import os

TESTS_DIR = os.path.dirname(os.path.abspath(__file__))
fpe = FPSim2Engine(os.path.join(TESTS_DIR, "data/test.h5"))


def test_PyPopcount():
    assert PyPopcount(fpe.fps[0]) == 39
    assert PyPopcount(fpe.fps[1]) == 45
    assert PyPopcount(fpe.fps[2]) == 43


def test_tanimoto_SimilaritySearch():
    res = SimilaritySearch(fpe.fps[0], fpe.fps, 0.0, 0.0, 0.0, 0, 0,
                           fpe.fps.shape[0])
    np.testing.assert_array_almost_equal(
        res[0:5]["coeff"],
        np.array(
            [
                (0, 4, 1.0),
                (3, 6, 0.63829786),
                (5, 5, 0.625),
                (7, 2, 0.5882353),
Esempio n. 21
0
def test_load_fps_sort():
    in_file = os.path.join(TESTS_DIR, "data/test.h5")
    fpe = FPSim2Engine(in_file, storage_backend="pytables")
    fpe2 = FPSim2Engine(in_file, fps_sort=True)
    assert fpe.popcnt_bins == fpe2.popcnt_bins