Ejemplo n.º 1
0
 def test_uniqueInsertion(self):
     db = setupDB()
     data_scheme = registration.MolDocScheme()
     assert 200 == write.WriteFromSDF(db.molecules,
                                      'data/test_data/first_200.props.sdf',
                                      data_scheme)
     assert 0 == write.WriteFromSDF(db.molecules,
                                    'data/test_data/first_200.props.sdf',
                                    data_scheme)
Ejemplo n.º 2
0
 def test_hashes(self):
     db = setupDB()
     data_scheme = registration.MolDocScheme()
     data_scheme.set_index("CanonicalSmiles")
     assert 200 == write.WriteFromSDF(db.molecules,
                                      'data/test_data/first_200.props.sdf',
                                      data_scheme)
     data_scheme.set_index("inchikey_standard")
     assert 200 == write.WriteFromSDF(db.molecules,
                                      'data/test_data/first_200.props.sdf',
                                      data_scheme)
Ejemplo n.º 3
0
 def test_writeLimit(self):
     db = setupDB()
     data_scheme = registration.MolDocScheme()
     assert 10 == write.WriteFromSDF(db.molecules,
                                     'data/test_data/first_200.props.sdf',
                                     data_scheme,
                                     limit=10)
Ejemplo n.º 4
0
def test_SubSearchAccuracy():
    db_mock = utils.setupMockDB()
    write.WriteFromSDF(db_mock.molecules, 'data/test_data/first_200.props.sdf')
    substructure.AddPatternFingerprints(db_mock.molecules)
    db_python = utils.setupPythonDB('data/test_data/first_200.props.sdf')
    for i in range(200):
        moldoc = db_python[i]
        pattern = Chem.Mol(moldoc['rdmol'])
        results_python = utils.SubSearchPython(pattern, db_python)
        results = substructure.SubSearch(pattern, db_mock.molecules)
        assert sorted(results_python) == sorted(results)
Ejemplo n.º 5
0
def test_zeroThreshold():
    """
    If we search the database with a Tanimoto threshold of zero, we should get
    the entire contents of the database back.
    """
    db_python = utils.setupPythonDB('data/test_data/first_200.props.sdf')
    db_mongo = utils.setupMockDB()
    write.WriteFromSDF(db_mongo.molecules, 'data/test_data/first_200.props.sdf')
    similarity.AddMorganFingerprints(db_mongo.molecules, db_mongo.mfp_counts)

    mol = Chem.Mol(db_python[0]['rdmol'])
    assert 200 == len(utils.similaritySearchPython(mol, db_python, 0))
    assert 200 == len(similarity.SimSearchAggregate(mol, db_mongo.molecules, db_mongo.mfp_counts, 0))
    assert 200 == len(similarity.SimSearch(mol, db_mongo.molecules, db_mongo.mfp_counts, 0))
Ejemplo n.º 6
0
def test_similarityAccuracy():
    """
    Tests for basic accuracy against a brute-force constructed Python 'database'
    at thresholds 0.2, 0.4, 0.6, 0.8, and 1. This test is implemented using MongoMock.
    """
    db_python = utils.setupPythonDB('data/test_data/first_200.props.sdf')
    db_mongo = utils.setupMockDB()
    write.WriteFromSDF(db_mongo.molecules, 'data/test_data/first_200.props.sdf')
    similarity.AddMorganFingerprints(db_mongo.molecules, db_mongo.mfp_counts)
    thresholds = [0.2, 0.4, 0.6, 0.8, 1]
    for t in thresholds:
        for i in range(200):
            mol = Chem.Mol(db_python[i]['rdmol'])
            search_python = utils.similaritySearchPython(mol, db_python, t)
            search_mongo = similarity.SimSearch(mol, db_mongo.molecules, db_mongo.mfp_counts, t)
            assert sorted(search_python) == sorted(search_mongo)
Ejemplo n.º 7
0
def test_similarityProgression():
    """
    Tests that decreasing similarity thresholds return increasing result lists.
    This test is implemented using MongoMock.
    """
    db_python = utils.setupPythonDB('data/test_data/first_200.props.sdf')
    db_mongo = utils.setupMockDB()
    write.WriteFromSDF(db_mongo.molecules, 'data/test_data/first_200.props.sdf')
    similarity.AddMorganFingerprints(db_mongo.molecules, db_mongo.mfp_counts)
    thresholds = [1, 0.8, 0.6, 0.4, 0.2]
    for i in range(200):
        mol = Chem.Mol(db_python[i]['rdmol'])
        last = []
        for t in thresholds:
            search_mongo = similarity.SimSearch(mol, db_mongo.molecules, db_mongo.mfp_counts, t)
            assert len(search_mongo) >= len(last)
            assert (all(l in search_mongo for l in last))
            last = search_mongo
Ejemplo n.º 8
0
def test_similarityAggregateProgression(mongoURI):
    """
    Tests that decreasing similarity thresholds return increasing result lists. This
    test will modify your local MongoDB instance.
    """
    db_python = utils.setupPythonDB('data/test_data/first_200.props.sdf')
    if mongoURI == 'local':
        db_mongo = utils.setupMongoDB()
    else:
        db_mongo = utils.setupMongoDB(mongoURI)
    write.WriteFromSDF(db_mongo.molecules, 'data/test_data/first_200.props.sdf')
    similarity.AddMorganFingerprints(db_mongo.molecules, db_mongo.mfp_counts)
    thresholds = [1, 0.8, 0.6, 0.4, 0.2]
    for i in range(200):
        mol = Chem.Mol(db_python[i]['rdmol'])
        last = []
        for t in thresholds:
            search_mongo = similarity.SimSearchAggregate(mol, db_mongo.molecules, db_mongo.mfp_counts, t)
            assert len(search_mongo) >= len(last)
            assert (all(l in search_mongo for l in last))
            last = search_mongo
Ejemplo n.º 9
0
def test_similarityAccuracyAggregate(mongoURI):
    """
    Tests for basic accuracy against a brute-force constructed Python 'database'
    at thresholds 0.2, 0.4, 0.6, 0.8, and 1. This test is relatively long and
    will modify your local MongoDB instance.
    """
    db_python = utils.setupPythonDB('data/test_data/first_200.props.sdf')
    if mongoURI == 'local':
        db_mongo = utils.setupMongoDB()
    else:
        db_mongo = utils.setupMongoDB(mongoURI)
    write.WriteFromSDF(db_mongo.molecules, 'data/test_data/first_200.props.sdf')
    similarity.AddMorganFingerprints(db_mongo.molecules, db_mongo.mfp_counts)
    thresholds = [0.2, 0.4, 0.6, 0.8, 1]
    counter = 0
    for t in thresholds:
        for i in range(200):
            mol = Chem.Mol(db_python[i]['rdmol'])
            search_python = utils.similaritySearchPython(mol, db_python, t)
            search_mongo_aggregate = similarity.SimSearchAggregate(mol, db_mongo.molecules, db_mongo.mfp_counts, t)
            assert sorted(search_python) == sorted(search_mongo_aggregate)
            print(counter)
            counter += 1
Ejemplo n.º 10
0
def test_similarity_accuracy_LSH(mongoURI):
    db_python = utils.setupPythonDB('data/test_data/first_200.props.sdf')
    if mongoURI == 'local':
        db_mongo = utils.setupMongoDB()
    else:
        db_mongo = utils.setupMongoDB(mongoURI)
    write.WriteFromSDF(db_mongo.molecules, 'data/test_data/first_200.props.sdf')
    similarity.AddMorganFingerprints(db_mongo.molecules, db_mongo.mfp_counts)
    similarity.AddRandPermutations(db_mongo.permutations)
    similarity.AddLocalityHashes(db_mongo.molecules, db_mongo.permutations, 25)
    similarity.AddHashCollections(db_mongo, db_mongo.molecules)
    thresholds = [1, 0.8, 0.6, 0.4, 0.2]
    counter = 0
    for t in thresholds:
        for i in range(200):
            mol = Chem.Mol(db_python[i]['rdmol'])
            smiles = Chem.MolToSmiles(mol)
            search_python = [result[1] for result in utils.similaritySearchPython(mol, db_python, t)]
            search_mongo_LSH = [result[1] for result in
                                similarity.SimSearchLSH(mol, db_mongo, db_mongo.molecules, db_mongo.permutations, db_mongo.mfp_counts, t)]
            assert set(search_mongo_LSH).issubset(search_python)
            print(counter)
            counter += 1
Ejemplo n.º 11
0
def test_addPatternFingerprints():
    db = utils.setupMockDB()
    write.WriteFromSDF(db.molecules, 'data/test_data/first_200.props.sdf')
    substructure.AddPatternFingerprints(db.molecules)
    counter = 0
    assert db.molecules.count_documents({"fingerprints.pattern_fp": {"$exists": True}}) == 200