def molecule_mongo_db( mongo_client: pymongo.MongoClient, molecules: tuple[stk.BuildingBlock, ...], ) -> CaseData: inchi = stk.Inchi() smiles = stk.Smiles() database_name = '_test_get_all_molecules' mongo_client.drop_database(database_name) inchi_molecules = molecules[:2] smiles_molecules = molecules[2:4] inchi_and_smiles_molecules = molecules[4:] inchi_database = get_database( database_name=database_name, mongo_client=mongo_client, key_makers=(inchi, ), indices=(inchi.get_key_name(), ), ) smiles_database = get_database( database_name=database_name, mongo_client=mongo_client, key_makers=(smiles, ), indices=(smiles.get_key_name(), ), ) inchi_and_smiles_database = get_database( database_name=database_name, mongo_client=mongo_client, key_makers=(inchi, smiles), indices=(), ) for molecule in inchi_molecules: inchi_database.put(molecule) for molecule in smiles_molecules: smiles_database.put(molecule) for molecule in inchi_and_smiles_molecules: inchi_and_smiles_database.put(molecule) inchi_key_database = get_database( database_name=database_name, mongo_client=mongo_client, key_makers=(stk.InchiKey(), ), indices=(), ) expected_molecules = { smiles.get_key(molecule): molecule for molecule in molecules } return CaseData( database=inchi_key_database, expected_molecules=expected_molecules, )
def test_update_1(): """ Test that existing entries are updated. """ collection = '_test_update_1' database_name = '_test_update_1' client = pymongo.MongoClient() client.drop_database(database_name) database = stk.ValueMongoDb( mongo_client=client, collection=collection, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, ) molecule = stk.BuildingBlock('CCC') database.put(molecule, 12) assert_database_state( state1=get_database_state(database), state2=DatabaseState({ DatabaseEntry( InChIKey=stk.InchiKey().get_key(molecule), v=12, ): 1, }), ) database.put(molecule, 43) assert_database_state( state1=get_database_state(database), state2=DatabaseState({ DatabaseEntry( InChIKey=stk.InchiKey().get_key(molecule), v=43, ): 1, }), )
import numpy as np import pytest import stk from .case_data import CaseData @pytest.fixture( scope='session', params=( lambda: CaseData( jsonizer=stk.MoleculeJsonizer(key_makers=( stk.Inchi(), stk.InchiKey(), )), molecule=stk.BuildingBlock(smiles='Br[C+2][C+2]Br', ). with_position_matrix( np.array([ [0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3], ], dtype=np.float64)), json={ 'molecule': { 'a': ( (35, 0), (6, 2), (6, 2),
import pytest import stk from .case_data import CaseData @pytest.fixture( params=( CaseData( key_maker=stk.Inchi(), molecule=stk.BuildingBlock('NCCN'), key_name='InChI', key='InChI=1S/C2H8N2/c3-1-2-4/h1-4H2', ), CaseData( key_maker=stk.InchiKey(), molecule=stk.BuildingBlock('NCCN'), key_name='InChIKey', key='PIICEJLVQHRZGT-UHFFFAOYSA-N', ), CaseData( key_maker=stk.Smiles(), molecule=stk.BuildingBlock('NCCN'), key_name='SMILES', key='NCCN', ), CaseData( key_maker=stk.Smiles(), molecule=stk.BuildingBlock('C(N)CN'), key_name='SMILES', key='NCCN',
def test_update_2(mongo_client): """ Test that existing entries are updated. In this test, you first create two separate entries, using different molecule keys. You then update both at the same time, with a database which uses both molecule keys. """ collection = '_test_update_2' database_name = '_test_update_2' mongo_client.drop_database(database_name) database1 = stk.ValueMongoDb( mongo_client=mongo_client, collection=collection, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, key_makers=( stk.InchiKey(), ), ) database2 = stk.ValueMongoDb( mongo_client=mongo_client, collection=collection, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, key_makers=( stk.Smiles(), ), ) database3 = stk.ValueMongoDb( mongo_client=mongo_client, collection=collection, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, key_makers=( stk.InchiKey(), stk.Smiles(), ), ) molecule = stk.BuildingBlock('CCC') database1.put(molecule, 12) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry( InChIKey=stk.InchiKey().get_key(molecule), v=12, ): 1, }), ) # Should add another entry, as a different key maker is used. database2.put(molecule, 32) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry( InChIKey=stk.InchiKey().get_key(molecule), v=12, ): 1, DatabaseEntry( SMILES=stk.Smiles().get_key(molecule), v=32, ): 1, }), ) # Should update both entries as both key makers are used. database3.put(molecule, 56) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry( InChIKey=stk.InchiKey().get_key(molecule), SMILES=stk.Smiles().get_key(molecule), v=56, ): 2, }), )
def test_update_3(mongo_client): """ Test that existing entries are updated. In this test, you first create one entry with two keys. Then update the entry with databases, each using 1 different key. No duplicate entries should be made in the database this way. """ collection = '_test_update_3' database_name = '_test_update_3' mongo_client.drop_database(database_name) database1 = stk.ValueMongoDb( mongo_client=mongo_client, collection=collection, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, key_makers=( stk.InchiKey(), stk.Smiles(), ), ) database2 = stk.ValueMongoDb( mongo_client=mongo_client, collection=collection, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, key_makers=( stk.InchiKey(), ), ) database3 = stk.ValueMongoDb( mongo_client=mongo_client, collection=collection, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, key_makers=( stk.Smiles(), ), ) molecule = stk.BuildingBlock('CCC') database1.put(molecule, 12) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry( InChIKey=stk.InchiKey().get_key(molecule), SMILES=stk.Smiles().get_key(molecule), v=12, ): 1 }), ) # Should update the entry. database2.put(molecule, 32) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry( InChIKey=stk.InchiKey().get_key(molecule), SMILES=stk.Smiles().get_key(molecule), v=32, ): 1, }), ) # Should also update the entry. database3.put(molecule, 62) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry( InChIKey=stk.InchiKey().get_key(molecule), SMILES=stk.Smiles().get_key(molecule), v=62, ): 1, }), )
def test_update_3(): """ Test that existing entries are updated. In this test, your first create one entry with two keys. Then update the entry with databases, each using 1 different key. No duplicate entries should be made in the database this way. """ database_name = '_test_update_3' client = pymongo.MongoClient() client.drop_database(database_name) jsonizer1 = stk.ConstructedMoleculeJsonizer(key_makers=( stk.InchiKey(), stk.Smiles(), ), ) database1 = stk.ConstructedMoleculeMongoDb( mongo_client=client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer1, ) jsonizer2 = stk.ConstructedMoleculeJsonizer( key_makers=(stk.InchiKey(), ), ) database2 = stk.ConstructedMoleculeMongoDb( mongo_client=client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer2, ) jsonizer3 = stk.ConstructedMoleculeJsonizer(key_makers=(stk.Smiles(), ), ) database3 = stk.ConstructedMoleculeMongoDb( mongo_client=client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer3, ) molecule = stk.BuildingBlock( smiles='BrCCCBr', functional_groups=[stk.BromoFactory()], ).with_canonical_atom_ordering() polymer1 = stk.ConstructedMolecule( topology_graph=stk.polymer.Linear( # Use it as a building block twice, to make sure it is # not repeatedly added to the molecules database. building_blocks=(molecule, molecule), repeating_unit='AB', num_repeating_units=2, ), ).with_canonical_atom_ordering() json1 = jsonizer1.to_json(polymer1) database1.put(polymer1) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json1['matrix'])): 1, DatabaseEntry(**json1['buildingBlocks'][0]['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json=json1['buildingBlocks'][0]['matrix'], )): 1, DatabaseEntry(**to_hashable_constructed_molecule(json=json1['constructedMolecule'], )): 1, }), ) # Should update the entry. polymer2 = polymer1.with_position_matrix(position_matrix=np.zeros( (polymer1.get_num_atoms(), 3)), ) json2 = jsonizer2.to_json(polymer2) json2['matrix'] = dict(json1['matrix']) json2['matrix']['m'] = jsonizer2.to_json(polymer2)['matrix']['m'] database2.put(polymer2) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json2['matrix'])): 1, DatabaseEntry(**json1['buildingBlocks'][0]['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json=json1['buildingBlocks'][0]['matrix'], )): 1, DatabaseEntry(**to_hashable_constructed_molecule(json=json1['constructedMolecule'], )): 1, }), ) # Should also update the entry. polymer3 = polymer1.with_position_matrix(position_matrix=np.zeros( (polymer1.get_num_atoms(), 3)), ) json3 = jsonizer3.to_json(polymer3) json3['matrix'] = dict(json1['matrix']) json3['matrix']['m'] = jsonizer3.to_json(polymer3)['matrix']['m'] database3.put(polymer3) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json3['matrix'])): 1, DatabaseEntry(**json1['buildingBlocks'][0]['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json=json1['buildingBlocks'][0]['matrix'], )): 1, DatabaseEntry(**to_hashable_constructed_molecule(json=json1['constructedMolecule'], )): 1, }), )
def test_update_2(mongo_client): """ Test that existing entries are updated. In this test, you first create two separate entries, using different molecule keys. You then update both at the same time, with a database which uses both molecule keys. """ database_name = '_test_update_2' mongo_client.drop_database(database_name) jsonizer1 = stk.MoleculeJsonizer() database1 = stk.MoleculeMongoDb( mongo_client=mongo_client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer1, ) jsonizer2 = stk.MoleculeJsonizer( key_makers=( stk.Smiles(), ), ) database2 = stk.MoleculeMongoDb( mongo_client=mongo_client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer2, ) jsonizer3 = stk.MoleculeJsonizer( key_makers=( stk.InchiKey(), stk.Smiles(), ), ) database3 = stk.MoleculeMongoDb( mongo_client=mongo_client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer3, ) molecule1 = stk.BuildingBlock('CCC').with_canonical_atom_ordering() json1 = jsonizer1.to_json(molecule1) database1.put(molecule1) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable(json1['matrix'])): 1, }), ) # Should add another entry, as a different key maker is used. molecule2 = molecule1.with_position_matrix( position_matrix=np.zeros((molecule1.get_num_atoms(), 3)), ) json2 = jsonizer2.to_json(molecule2) database2.put(molecule2) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable(json1['matrix'])): 1, DatabaseEntry(**json2['molecule']): 1, DatabaseEntry(**to_hashable(json2['matrix'])): 1, }), ) # Should update both entries as both key makers are used. molecule3 = molecule1.with_position_matrix( position_matrix=np.ones((molecule1.get_num_atoms(), 3)), ) json3 = jsonizer3.to_json(molecule3) database3.put(molecule3) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json3['molecule']): 2, DatabaseEntry(**to_hashable(json3['matrix'])): 2, }), )
def test_update_3(mongo_client): """ Test that existing entries are updated. In this test, you first create one entry with two keys. Then update the entry with databases, each using 1 different key. No duplicate entries should be made in the database this way. """ database_name = '_test_update_3' mongo_client.drop_database(database_name) jsonizer1 = stk.MoleculeJsonizer( key_makers=( stk.InchiKey(), stk.Smiles(), ), ) database1 = stk.MoleculeMongoDb( mongo_client=mongo_client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer1, ) jsonizer2 = stk.MoleculeJsonizer() database2 = stk.MoleculeMongoDb( mongo_client=mongo_client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer2, ) jsonizer3 = stk.MoleculeJsonizer( key_makers=( stk.Smiles(), ), ) database3 = stk.MoleculeMongoDb( mongo_client=mongo_client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer3, ) molecule1 = stk.BuildingBlock('CCC').with_canonical_atom_ordering() json1 = jsonizer1.to_json(molecule1) database1.put(molecule1) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable(json1['matrix'])): 1, }), ) molecule2 = molecule1.with_position_matrix( position_matrix=np.zeros((molecule1.get_num_atoms(), 3)), ) json2 = jsonizer2.to_json(molecule2) json2['matrix'] = dict(json1['matrix']) json2['matrix']['m'] = jsonizer2.to_json(molecule2)['matrix']['m'] database2.put(molecule2) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable(json2['matrix'])): 1, }), ) molecule3 = molecule1.with_position_matrix( position_matrix=np.zeros((molecule1.get_num_atoms(), 3)), ) json3 = jsonizer3.to_json(molecule3) json3['matrix'] = dict(json1['matrix']) json3['matrix']['m'] = jsonizer3.to_json(molecule3)['matrix']['m'] database3.put(molecule3) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable(json3['matrix'])): 1, }), )
def test_update_2(mongo_client): """ Test that existing entries are updated. In this test, your first create two separate entries, using different molecule keys. You then update both at the same time, with a database which uses both molecule keys. """ database_name = '_test_update_2' mongo_client.drop_database(database_name) jsonizer1 = stk.ConstructedMoleculeJsonizer( key_makers=(stk.InchiKey(), ), ) database1 = stk.ConstructedMoleculeMongoDb( mongo_client=mongo_client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer1, ) jsonizer2 = stk.ConstructedMoleculeJsonizer(key_makers=(stk.Smiles(), ), ) database2 = stk.ConstructedMoleculeMongoDb( mongo_client=mongo_client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer2, ) jsonizer3 = stk.ConstructedMoleculeJsonizer(key_makers=( stk.InchiKey(), stk.Smiles(), ), ) database3 = stk.ConstructedMoleculeMongoDb( mongo_client=mongo_client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer3, ) molecule = stk.BuildingBlock( smiles='BrCCCBr', functional_groups=[stk.BromoFactory()], ).with_canonical_atom_ordering() polymer1 = stk.ConstructedMolecule( topology_graph=stk.polymer.Linear( # Use it as a building block twice, to make sure it is # not repeatedly added to the molecules database. building_blocks=(molecule, molecule), repeating_unit='AB', num_repeating_units=2, ), ).with_canonical_atom_ordering() json1 = jsonizer1.to_json(polymer1) database1.put(polymer1) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json1['matrix'])): 1, DatabaseEntry(**json1['buildingBlocks'][0]['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json=json1['buildingBlocks'][0]['matrix'], )): 1, DatabaseEntry(**to_hashable_constructed_molecule(json=json1['constructedMolecule'], )): 1, }), ) # Should add another entry, as a different key maker is used. polymer2 = polymer1.with_position_matrix(position_matrix=np.zeros( (polymer1.get_num_atoms(), 3)), ) json2 = jsonizer2.to_json(polymer2) database2.put(polymer2) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json1['matrix'])): 1, DatabaseEntry(**json1['buildingBlocks'][0]['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json=json1['buildingBlocks'][0]['matrix'], )): 1, DatabaseEntry(**to_hashable_constructed_molecule(json=json1['constructedMolecule'], )): 1, DatabaseEntry(**json2['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json2['matrix'])): 1, DatabaseEntry(**json2['buildingBlocks'][0]['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json=json2['buildingBlocks'][0]['matrix'], )): 1, DatabaseEntry(**to_hashable_constructed_molecule(json=json2['constructedMolecule'], )): 1, }), ) # Should update both entries. polymer3 = polymer1.with_position_matrix(position_matrix=np.zeros( (polymer1.get_num_atoms(), 3)), ) json3 = jsonizer3.to_json(polymer3) database3.put(polymer3) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json3['molecule']): 2, DatabaseEntry(**to_hashable_matrix(json3['matrix'])): 2, DatabaseEntry(**json3['buildingBlocks'][0]['molecule']): 2, DatabaseEntry(**to_hashable_matrix(json=json3['buildingBlocks'][0]['matrix'], )): 2, DatabaseEntry(**to_hashable_constructed_molecule(json=json3['constructedMolecule'], )): 2, DatabaseEntry(**json3['molecule']): 2, DatabaseEntry(**to_hashable_matrix(json3['matrix'])): 2, DatabaseEntry(**json3['buildingBlocks'][0]['molecule']): 2, DatabaseEntry(**to_hashable_matrix(json=json3['buildingBlocks'][0]['matrix'], )): 2, DatabaseEntry(**to_hashable_constructed_molecule(json=json3['constructedMolecule'], )): 2, }), )