def test_uniqueInsertion(self): db = setupDB() data_scheme = registration.MolDocScheme() assert 200 == write.WriteFromSDF(db.molecules, 'data/test_data/first_200.props.sdf', data_scheme) assert 0 == write.WriteFromSDF(db.molecules, 'data/test_data/first_200.props.sdf', data_scheme)
def test_hashes(self): db = setupDB() data_scheme = registration.MolDocScheme() data_scheme.set_index("CanonicalSmiles") assert 200 == write.WriteFromSDF(db.molecules, 'data/test_data/first_200.props.sdf', data_scheme) data_scheme.set_index("inchikey_standard") assert 200 == write.WriteFromSDF(db.molecules, 'data/test_data/first_200.props.sdf', data_scheme)
def test_writeLimit(self): db = setupDB() data_scheme = registration.MolDocScheme() assert 10 == write.WriteFromSDF(db.molecules, 'data/test_data/first_200.props.sdf', data_scheme, limit=10)
def test_SubSearchAccuracy(): db_mock = utils.setupMockDB() write.WriteFromSDF(db_mock.molecules, 'data/test_data/first_200.props.sdf') substructure.AddPatternFingerprints(db_mock.molecules) db_python = utils.setupPythonDB('data/test_data/first_200.props.sdf') for i in range(200): moldoc = db_python[i] pattern = Chem.Mol(moldoc['rdmol']) results_python = utils.SubSearchPython(pattern, db_python) results = substructure.SubSearch(pattern, db_mock.molecules) assert sorted(results_python) == sorted(results)
def test_zeroThreshold(): """ If we search the database with a Tanimoto threshold of zero, we should get the entire contents of the database back. """ db_python = utils.setupPythonDB('data/test_data/first_200.props.sdf') db_mongo = utils.setupMockDB() write.WriteFromSDF(db_mongo.molecules, 'data/test_data/first_200.props.sdf') similarity.AddMorganFingerprints(db_mongo.molecules, db_mongo.mfp_counts) mol = Chem.Mol(db_python[0]['rdmol']) assert 200 == len(utils.similaritySearchPython(mol, db_python, 0)) assert 200 == len(similarity.SimSearchAggregate(mol, db_mongo.molecules, db_mongo.mfp_counts, 0)) assert 200 == len(similarity.SimSearch(mol, db_mongo.molecules, db_mongo.mfp_counts, 0))
def test_similarityAccuracy(): """ Tests for basic accuracy against a brute-force constructed Python 'database' at thresholds 0.2, 0.4, 0.6, 0.8, and 1. This test is implemented using MongoMock. """ db_python = utils.setupPythonDB('data/test_data/first_200.props.sdf') db_mongo = utils.setupMockDB() write.WriteFromSDF(db_mongo.molecules, 'data/test_data/first_200.props.sdf') similarity.AddMorganFingerprints(db_mongo.molecules, db_mongo.mfp_counts) thresholds = [0.2, 0.4, 0.6, 0.8, 1] for t in thresholds: for i in range(200): mol = Chem.Mol(db_python[i]['rdmol']) search_python = utils.similaritySearchPython(mol, db_python, t) search_mongo = similarity.SimSearch(mol, db_mongo.molecules, db_mongo.mfp_counts, t) assert sorted(search_python) == sorted(search_mongo)
def test_similarityProgression(): """ Tests that decreasing similarity thresholds return increasing result lists. This test is implemented using MongoMock. """ db_python = utils.setupPythonDB('data/test_data/first_200.props.sdf') db_mongo = utils.setupMockDB() write.WriteFromSDF(db_mongo.molecules, 'data/test_data/first_200.props.sdf') similarity.AddMorganFingerprints(db_mongo.molecules, db_mongo.mfp_counts) thresholds = [1, 0.8, 0.6, 0.4, 0.2] for i in range(200): mol = Chem.Mol(db_python[i]['rdmol']) last = [] for t in thresholds: search_mongo = similarity.SimSearch(mol, db_mongo.molecules, db_mongo.mfp_counts, t) assert len(search_mongo) >= len(last) assert (all(l in search_mongo for l in last)) last = search_mongo
def test_similarityAggregateProgression(mongoURI): """ Tests that decreasing similarity thresholds return increasing result lists. This test will modify your local MongoDB instance. """ db_python = utils.setupPythonDB('data/test_data/first_200.props.sdf') if mongoURI == 'local': db_mongo = utils.setupMongoDB() else: db_mongo = utils.setupMongoDB(mongoURI) write.WriteFromSDF(db_mongo.molecules, 'data/test_data/first_200.props.sdf') similarity.AddMorganFingerprints(db_mongo.molecules, db_mongo.mfp_counts) thresholds = [1, 0.8, 0.6, 0.4, 0.2] for i in range(200): mol = Chem.Mol(db_python[i]['rdmol']) last = [] for t in thresholds: search_mongo = similarity.SimSearchAggregate(mol, db_mongo.molecules, db_mongo.mfp_counts, t) assert len(search_mongo) >= len(last) assert (all(l in search_mongo for l in last)) last = search_mongo
def test_similarityAccuracyAggregate(mongoURI): """ Tests for basic accuracy against a brute-force constructed Python 'database' at thresholds 0.2, 0.4, 0.6, 0.8, and 1. This test is relatively long and will modify your local MongoDB instance. """ db_python = utils.setupPythonDB('data/test_data/first_200.props.sdf') if mongoURI == 'local': db_mongo = utils.setupMongoDB() else: db_mongo = utils.setupMongoDB(mongoURI) write.WriteFromSDF(db_mongo.molecules, 'data/test_data/first_200.props.sdf') similarity.AddMorganFingerprints(db_mongo.molecules, db_mongo.mfp_counts) thresholds = [0.2, 0.4, 0.6, 0.8, 1] counter = 0 for t in thresholds: for i in range(200): mol = Chem.Mol(db_python[i]['rdmol']) search_python = utils.similaritySearchPython(mol, db_python, t) search_mongo_aggregate = similarity.SimSearchAggregate(mol, db_mongo.molecules, db_mongo.mfp_counts, t) assert sorted(search_python) == sorted(search_mongo_aggregate) print(counter) counter += 1
def test_similarity_accuracy_LSH(mongoURI): db_python = utils.setupPythonDB('data/test_data/first_200.props.sdf') if mongoURI == 'local': db_mongo = utils.setupMongoDB() else: db_mongo = utils.setupMongoDB(mongoURI) write.WriteFromSDF(db_mongo.molecules, 'data/test_data/first_200.props.sdf') similarity.AddMorganFingerprints(db_mongo.molecules, db_mongo.mfp_counts) similarity.AddRandPermutations(db_mongo.permutations) similarity.AddLocalityHashes(db_mongo.molecules, db_mongo.permutations, 25) similarity.AddHashCollections(db_mongo, db_mongo.molecules) thresholds = [1, 0.8, 0.6, 0.4, 0.2] counter = 0 for t in thresholds: for i in range(200): mol = Chem.Mol(db_python[i]['rdmol']) smiles = Chem.MolToSmiles(mol) search_python = [result[1] for result in utils.similaritySearchPython(mol, db_python, t)] search_mongo_LSH = [result[1] for result in similarity.SimSearchLSH(mol, db_mongo, db_mongo.molecules, db_mongo.permutations, db_mongo.mfp_counts, t)] assert set(search_mongo_LSH).issubset(search_python) print(counter) counter += 1
def test_addPatternFingerprints(): db = utils.setupMockDB() write.WriteFromSDF(db.molecules, 'data/test_data/first_200.props.sdf') substructure.AddPatternFingerprints(db.molecules) counter = 0 assert db.molecules.count_documents({"fingerprints.pattern_fp": {"$exists": True}}) == 200