def add_constructed_molecules( client, database, key_makers, ): constructed_molecule_db = stk.ConstructedMoleculeMongoDb( mongo_client=client, database=database, molecule_collection='molecules', position_matrix_collection='position_matrices', jsonizer=stk.ConstructedMoleculeJsonizer(key_makers=key_makers, ), ) num_atoms_db = stk.ValueMongoDb( mongo_client=client, collection='numAtoms', database=database, key_makers=key_makers, ) for bb1, bb2 in zip( get_molecules(200, 5), get_molecules(200, 5), ): molecule = stk.ConstructedMolecule(topology_graph=stk.polymer.Linear( building_blocks=(bb1, bb2), repeating_unit='AB', num_repeating_units=1, ), ) constructed_molecule_db.put(molecule) num_atoms_db.put(molecule, molecule.get_num_atoms()) num_atoms_db.put(bb1, bb1.get_num_atoms())
def test_get_all(): """ Test iteration over all molecules. """ database_name = '_test_get_entries_constructed_molecule' client = pymongo.MongoClient() client.drop_database(database_name) key_maker = stk.Inchi() jsonizer = stk.ConstructedMoleculeJsonizer(key_makers=(key_maker, )) database = stk.ConstructedMoleculeMongoDb( mongo_client=client, database=database_name, jsonizer=jsonizer, put_lru_cache_size=0, get_lru_cache_size=0, ) molecules = [ stk.ConstructedMolecule(topology_graph=stk.polymer.Linear( building_blocks=(stk.BuildingBlock( smiles='BrCCCBr', functional_groups=[stk.BromoFactory()]), ), repeating_unit='A', num_repeating_units=3, ), ), stk.ConstructedMolecule(topology_graph=stk.polymer.Linear( building_blocks=( stk.BuildingBlock(smiles='BrCCBr', functional_groups=[stk.BromoFactory()]), stk.BuildingBlock(smiles='BrCNCBr', functional_groups=[stk.BromoFactory()]), ), repeating_unit='AB', num_repeating_units=2, ), ), ] molecules_by_key = { key_maker.get_key(molecule): molecule for molecule in molecules } for molecule in molecules: database.put(molecule) for i, retrieved in enumerate(database.get_all()): key = key_maker.get_key(retrieved) molecule = molecules_by_key[key] is_equivalent_constructed_molecule( molecule.with_canonical_atom_ordering(), retrieved.with_canonical_atom_ordering(), ) # Check number of molecules. assert i + 1 == len(molecules)
def get_database( database_name: str, mongo_client: pymongo.MongoClient, key_makers: tuple[stk.MoleculeKeyMaker, ...], indices: tuple[str, ...], ) -> stk.ConstructedMoleculeMongoDb: return stk.ConstructedMoleculeMongoDb( mongo_client=mongo_client, database=database_name, jsonizer=stk.ConstructedMoleculeJsonizer(key_makers), put_lru_cache_size=0, get_lru_cache_size=0, indices=indices, )
), ), repeating_unit='A', num_repeating_units=2, ), ), key={ 'InChIKey': rdkit.MolToInchiKey(rdkit.MolFromSmiles(SMILES='BrCCCCBr')), }, ), lambda: CaseDataData( get_database=lambda mongo_client: (stk.ConstructedMoleculeMongoDb( mongo_client=mongo_client, database='_stk_test_database_for_testing', jsonizer=stk.ConstructedMoleculeJsonizer(key_makers=( stk.MoleculeKeyMaker( key_name='SMILES', get_key=lambda molecule: (rdkit.MolToSmiles(mol=molecule.to_rdkit_mol(), )), ), ), ), put_lru_cache_size=0, get_lru_cache_size=0, )), molecule=stk.ConstructedMolecule(topology_graph=stk.polymer.Linear( building_blocks=(stk.BuildingBlock( smiles='Br[C+2][C+2]Br', functional_groups=[stk.BromoFactory()], ), ), repeating_unit='A', num_repeating_units=2, ), ), key={'SMILES': 'Br[C+2][C+2][C+2][C+2]Br'}, ),
}, 'matrix': { 'm': [ [0., 0., 0.], [1., 1., 1.], [2., 2., 2.], [3., 3., 3.], ], 'InChI': 'InChI=1S/C2Br2/c3-1-2-4/q+4', 'InChIKey': 'UWAHASCVLDBPQQ-UHFFFAOYSA-N', } }, ), lambda: CaseData( jsonizer=stk.ConstructedMoleculeJsonizer(key_makers=( stk.Inchi(), stk.InchiKey(), ), ), molecule=stk.ConstructedMolecule(topology_graph=stk.polymer.Linear( building_blocks=(stk.BuildingBlock( smiles='Br[C+2][C+2]Br', functional_groups=[stk.BromoFactory()], ).with_position_matrix( np.array([ [0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3], ], dtype=np.float64)), ), repeating_unit='A', num_repeating_units=2,
def test_update_1(): """ Test that existing entries are updated. """ database_name = '_test_update_1' client = pymongo.MongoClient() client.drop_database(database_name) database = stk.ConstructedMoleculeMongoDb( mongo_client=client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, ) jsonizer = stk.ConstructedMoleculeJsonizer() molecule = stk.BuildingBlock( smiles='BrCCBr', functional_groups=[stk.BromoFactory()], ).with_canonical_atom_ordering() polymer = stk.ConstructedMolecule( topology_graph=stk.polymer.Linear( # Use it as a building block twice, to make sure it is # not repeatedly added to the molecules database. building_blocks=(molecule, molecule), repeating_unit='AB', num_repeating_units=2, ), ).with_canonical_atom_ordering() json = jsonizer.to_json(polymer) database.put(polymer) assert_database_state( state1=get_database_state(database), state2=DatabaseState({ DatabaseEntry(**json['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json['matrix'])): 1, DatabaseEntry(**json['buildingBlocks'][0]['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json=json['buildingBlocks'][0]['matrix'], )): 1, DatabaseEntry(**to_hashable_constructed_molecule(json=json['constructedMolecule'], )): 1, }), ) polymer2 = polymer.with_position_matrix(position_matrix=np.zeros( (polymer.get_num_atoms(), 3)), ) json2 = jsonizer.to_json(polymer2) database.put(polymer2) assert_database_state( state1=get_database_state(database), state2=DatabaseState({ DatabaseEntry(**json['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json2['matrix'])): 1, DatabaseEntry(**json['buildingBlocks'][0]['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json=json['buildingBlocks'][0]['matrix'], )): 1, DatabaseEntry(**to_hashable_constructed_molecule(json=json['constructedMolecule'], )): 1, }), )
def test_update_3(): """ Test that existing entries are updated. In this test, your first create one entry with two keys. Then update the entry with databases, each using 1 different key. No duplicate entries should be made in the database this way. """ database_name = '_test_update_3' client = pymongo.MongoClient() client.drop_database(database_name) jsonizer1 = stk.ConstructedMoleculeJsonizer(key_makers=( stk.InchiKey(), stk.Smiles(), ), ) database1 = stk.ConstructedMoleculeMongoDb( mongo_client=client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer1, ) jsonizer2 = stk.ConstructedMoleculeJsonizer( key_makers=(stk.InchiKey(), ), ) database2 = stk.ConstructedMoleculeMongoDb( mongo_client=client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer2, ) jsonizer3 = stk.ConstructedMoleculeJsonizer(key_makers=(stk.Smiles(), ), ) database3 = stk.ConstructedMoleculeMongoDb( mongo_client=client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer3, ) molecule = stk.BuildingBlock( smiles='BrCCCBr', functional_groups=[stk.BromoFactory()], ).with_canonical_atom_ordering() polymer1 = stk.ConstructedMolecule( topology_graph=stk.polymer.Linear( # Use it as a building block twice, to make sure it is # not repeatedly added to the molecules database. building_blocks=(molecule, molecule), repeating_unit='AB', num_repeating_units=2, ), ).with_canonical_atom_ordering() json1 = jsonizer1.to_json(polymer1) database1.put(polymer1) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json1['matrix'])): 1, DatabaseEntry(**json1['buildingBlocks'][0]['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json=json1['buildingBlocks'][0]['matrix'], )): 1, DatabaseEntry(**to_hashable_constructed_molecule(json=json1['constructedMolecule'], )): 1, }), ) # Should update the entry. polymer2 = polymer1.with_position_matrix(position_matrix=np.zeros( (polymer1.get_num_atoms(), 3)), ) json2 = jsonizer2.to_json(polymer2) json2['matrix'] = dict(json1['matrix']) json2['matrix']['m'] = jsonizer2.to_json(polymer2)['matrix']['m'] database2.put(polymer2) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json2['matrix'])): 1, DatabaseEntry(**json1['buildingBlocks'][0]['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json=json1['buildingBlocks'][0]['matrix'], )): 1, DatabaseEntry(**to_hashable_constructed_molecule(json=json1['constructedMolecule'], )): 1, }), ) # Should also update the entry. polymer3 = polymer1.with_position_matrix(position_matrix=np.zeros( (polymer1.get_num_atoms(), 3)), ) json3 = jsonizer3.to_json(polymer3) json3['matrix'] = dict(json1['matrix']) json3['matrix']['m'] = jsonizer3.to_json(polymer3)['matrix']['m'] database3.put(polymer3) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json3['matrix'])): 1, DatabaseEntry(**json1['buildingBlocks'][0]['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json=json1['buildingBlocks'][0]['matrix'], )): 1, DatabaseEntry(**to_hashable_constructed_molecule(json=json1['constructedMolecule'], )): 1, }), )
def test_update_2(mongo_client): """ Test that existing entries are updated. In this test, your first create two separate entries, using different molecule keys. You then update both at the same time, with a database which uses both molecule keys. """ database_name = '_test_update_2' mongo_client.drop_database(database_name) jsonizer1 = stk.ConstructedMoleculeJsonizer( key_makers=(stk.InchiKey(), ), ) database1 = stk.ConstructedMoleculeMongoDb( mongo_client=mongo_client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer1, ) jsonizer2 = stk.ConstructedMoleculeJsonizer(key_makers=(stk.Smiles(), ), ) database2 = stk.ConstructedMoleculeMongoDb( mongo_client=mongo_client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer2, ) jsonizer3 = stk.ConstructedMoleculeJsonizer(key_makers=( stk.InchiKey(), stk.Smiles(), ), ) database3 = stk.ConstructedMoleculeMongoDb( mongo_client=mongo_client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer3, ) molecule = stk.BuildingBlock( smiles='BrCCCBr', functional_groups=[stk.BromoFactory()], ).with_canonical_atom_ordering() polymer1 = stk.ConstructedMolecule( topology_graph=stk.polymer.Linear( # Use it as a building block twice, to make sure it is # not repeatedly added to the molecules database. building_blocks=(molecule, molecule), repeating_unit='AB', num_repeating_units=2, ), ).with_canonical_atom_ordering() json1 = jsonizer1.to_json(polymer1) database1.put(polymer1) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json1['matrix'])): 1, DatabaseEntry(**json1['buildingBlocks'][0]['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json=json1['buildingBlocks'][0]['matrix'], )): 1, DatabaseEntry(**to_hashable_constructed_molecule(json=json1['constructedMolecule'], )): 1, }), ) # Should add another entry, as a different key maker is used. polymer2 = polymer1.with_position_matrix(position_matrix=np.zeros( (polymer1.get_num_atoms(), 3)), ) json2 = jsonizer2.to_json(polymer2) database2.put(polymer2) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json1['matrix'])): 1, DatabaseEntry(**json1['buildingBlocks'][0]['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json=json1['buildingBlocks'][0]['matrix'], )): 1, DatabaseEntry(**to_hashable_constructed_molecule(json=json1['constructedMolecule'], )): 1, DatabaseEntry(**json2['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json2['matrix'])): 1, DatabaseEntry(**json2['buildingBlocks'][0]['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json=json2['buildingBlocks'][0]['matrix'], )): 1, DatabaseEntry(**to_hashable_constructed_molecule(json=json2['constructedMolecule'], )): 1, }), ) # Should update both entries. polymer3 = polymer1.with_position_matrix(position_matrix=np.zeros( (polymer1.get_num_atoms(), 3)), ) json3 = jsonizer3.to_json(polymer3) database3.put(polymer3) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json3['molecule']): 2, DatabaseEntry(**to_hashable_matrix(json3['matrix'])): 2, DatabaseEntry(**json3['buildingBlocks'][0]['molecule']): 2, DatabaseEntry(**to_hashable_matrix(json=json3['buildingBlocks'][0]['matrix'], )): 2, DatabaseEntry(**to_hashable_constructed_molecule(json=json3['constructedMolecule'], )): 2, DatabaseEntry(**json3['molecule']): 2, DatabaseEntry(**to_hashable_matrix(json3['matrix'])): 2, DatabaseEntry(**json3['buildingBlocks'][0]['molecule']): 2, DatabaseEntry(**to_hashable_matrix(json=json3['buildingBlocks'][0]['matrix'], )): 2, DatabaseEntry(**to_hashable_constructed_molecule(json=json3['constructedMolecule'], )): 2, }), )
def add_mixed_entries( client, database, key_makers, ): constructed_molecule_db = stk.ConstructedMoleculeMongoDb( mongo_client=client, database=database, molecule_collection='molecules', position_matrix_collection='position_matrices', jsonizer=stk.ConstructedMoleculeJsonizer(key_makers=key_makers, ), ) num_atoms_db = stk.ValueMongoDb( mongo_client=client, collection='numAtoms', database=database, key_makers=key_makers, ) num_bonds_db = stk.ValueMongoDb( mongo_client=client, collection='numBonds', database=database, key_makers=key_makers, ) cage1 = stk.ConstructedMolecule(topology_graph=stk.cage.FourPlusSix( building_blocks=( stk.BuildingBlock( smiles='BrC1C(Br)CCCC1', functional_groups=[stk.BromoFactory()], ), stk.BuildingBlock( smiles='Brc1cc(Br)cc(Br)c1', functional_groups=[stk.BromoFactory()], ), ), ), ) constructed_molecule_db.put(cage1) num_atoms_db.put(cage1, cage1.get_num_atoms()) cage2 = stk.ConstructedMolecule(topology_graph=stk.cage.TwentyPlusThirty( building_blocks=( stk.BuildingBlock( smiles='BrC1C(Br)CCCC1', functional_groups=[stk.BromoFactory()], ), stk.BuildingBlock( smiles='Brc1cc(Br)cc(Br)c1', functional_groups=[stk.BromoFactory()], ), ), ), ) constructed_molecule_db.put(cage2) num_atoms_db.put(cage2, cage2.get_num_atoms()) macrocycle = stk.ConstructedMolecule( topology_graph=stk.macrocycle.Macrocycle( building_blocks=( stk.BuildingBlock( smiles='BrCCBr', functional_groups=[stk.BromoFactory()], ), stk.BuildingBlock( smiles='BrNNBr', functional_groups=[stk.BromoFactory()], ), stk.BuildingBlock( smiles='BrOOBr', functional_groups=[stk.BromoFactory()], ), ), repeating_unit='ABC', num_repeating_units=2, ), ) num_atoms_db.put(macrocycle, macrocycle.get_num_atoms()) polymer = stk.ConstructedMolecule(topology_graph=stk.polymer.Linear( building_blocks=( stk.BuildingBlock( smiles='BrCCBr', functional_groups=[stk.BromoFactory()], ), stk.BuildingBlock( smiles='BrNNBr', functional_groups=[stk.BromoFactory()], ), ), repeating_unit='AB', num_repeating_units=4, ), ) rotaxane = stk.ConstructedMolecule(topology_graph=stk.rotaxane.NRotaxane( axle=stk.BuildingBlock.init_from_molecule(polymer), cycles=(stk.BuildingBlock.init_from_molecule(macrocycle), ), repeating_unit='A', num_repeating_units=1, ), ) constructed_molecule_db.put(polymer) constructed_molecule_db.put(macrocycle) constructed_molecule_db.put(rotaxane) num_bonds_db.put(rotaxane, rotaxane.get_num_bonds())