def molecule_db(mongo_client): """ A :class:`.MoleculeDatabase` instance. """ return stk.MoleculeMongoDb( mongo_client=mongo_client, database='_stk_pytest_database', jsonizer=stk.MoleculeJsonizer(key_makers=(stk.Smiles(), ), ), indices=(stk.Smiles().get_key_name(), ), )
def name_db(mongo_client): """ A :class:`.ValueDatabase` for holding the names of molecules. """ return stk.ValueMongoDb( mongo_client=mongo_client, database='_stk_pytest_database', collection='name', key_makers=(stk.Smiles(), ), indices=(stk.Smiles().get_key_name(), ), )
def main(): client = pymongo.MongoClient() database = 'stkVis' client.drop_database(database) add_entries( client=client, database=database, key_makers=(stk.InchiKey(), ), random_seed=5, ) add_entries( client=client, database=database, key_makers=(stk.Smiles(), ), random_seed=6, ) database2 = 'stkVis2' client.drop_database(database2) add_mixed_entries( client=client, database=database2, key_makers=(stk.InchiKey(), ), ) add_constructed_molecules( client=client, database=database2, key_makers=(stk.InchiKey(), ), )
def _test_get_all( database: stk.MoleculeDatabase, expected_molecules: dict[str, stk.BuildingBlock], ) -> None: """ Test iteration over all entries. Parameters: database: A database to test. expected_molecules: The expected molecules to get from the databases using their smiles as the key. """ smiles = stk.Smiles() for i, retrieved in enumerate(database.get_all()): expected = expected_molecules[smiles.get_key(retrieved)] is_equivalent_molecule( molecule1=expected.with_canonical_atom_ordering(), molecule2=retrieved.with_canonical_atom_ordering(), ) assert i+1 == len(expected_molecules)
def constructed_molecule_mongo_db( mongo_client: pymongo.MongoClient, molecules: tuple[stk.ConstructedMolecule, ...], ) -> CaseData: inchi = stk.Inchi() smiles = stk.Smiles() database_name = '_test_get_all_constructed_molecules' mongo_client.drop_database(database_name) inchi_molecules = molecules[:2] smiles_molecules = molecules[2:4] inchi_and_smiles_molecules = molecules[4:] inchi_database = get_database( database_name=database_name, mongo_client=mongo_client, key_makers=(inchi, ), indices=(inchi.get_key_name(), ), ) smiles_database = get_database( database_name=database_name, mongo_client=mongo_client, key_makers=(smiles, ), indices=(smiles.get_key_name(), ), ) inchi_and_smiles_database = get_database( database_name=database_name, mongo_client=mongo_client, key_makers=(inchi, smiles), indices=(), ) for molecule in inchi_molecules: inchi_database.put(molecule) for molecule in smiles_molecules: smiles_database.put(molecule) for molecule in inchi_and_smiles_molecules: inchi_and_smiles_database.put(molecule) inchi_key_database = get_database( database_name=database_name, mongo_client=mongo_client, key_makers=(stk.InchiKey(), ), indices=(), ) expected_molecules = { smiles.get_key(molecule): molecule for molecule in molecules } return CaseData( database=inchi_key_database, expected_molecules=expected_molecules, )
def _check_valid_comparison(self, mol): if mol.get_num_atoms() != (self._initial_molecule.get_num_atoms()): raise DifferentMoleculeException( f'{self._initial_molecule} and {mol} are not ' 'equivalent with different numbers of atoms.') smiles1 = stk.Smiles().get_key(self._initial_molecule) smiles2 = stk.Smiles().get_key(mol) if smiles1 != smiles2: raise DifferentMoleculeException( f'{self._initial_molecule} and {mol} are not ' 'equivalent with different smiles strings.') atoms1 = self._initial_molecule.get_atoms() atoms2 = mol.get_atoms() for atom1, atom2 in zip(atoms1, atoms2): if is_inequivalent_atom(atom1, atom2): raise DifferentAtomException( f'{atom1} and {atom2} are not equivalent.')
def test_update_2(mongo_client): """ Test that existing entries are updated. In this test, your first create two separate entries, using different molecule keys. You then update both at the same time, with a database which uses both molecule keys. """ database_name = '_test_update_2' mongo_client.drop_database(database_name) jsonizer1 = stk.ConstructedMoleculeJsonizer( key_makers=(stk.InchiKey(), ), ) database1 = stk.ConstructedMoleculeMongoDb( mongo_client=mongo_client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer1, ) jsonizer2 = stk.ConstructedMoleculeJsonizer(key_makers=(stk.Smiles(), ), ) database2 = stk.ConstructedMoleculeMongoDb( mongo_client=mongo_client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer2, ) jsonizer3 = stk.ConstructedMoleculeJsonizer(key_makers=( stk.InchiKey(), stk.Smiles(), ), ) database3 = stk.ConstructedMoleculeMongoDb( mongo_client=mongo_client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer3, ) molecule = stk.BuildingBlock( smiles='BrCCCBr', functional_groups=[stk.BromoFactory()], ).with_canonical_atom_ordering() polymer1 = stk.ConstructedMolecule( topology_graph=stk.polymer.Linear( # Use it as a building block twice, to make sure it is # not repeatedly added to the molecules database. building_blocks=(molecule, molecule), repeating_unit='AB', num_repeating_units=2, ), ).with_canonical_atom_ordering() json1 = jsonizer1.to_json(polymer1) database1.put(polymer1) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json1['matrix'])): 1, DatabaseEntry(**json1['buildingBlocks'][0]['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json=json1['buildingBlocks'][0]['matrix'], )): 1, DatabaseEntry(**to_hashable_constructed_molecule(json=json1['constructedMolecule'], )): 1, }), ) # Should add another entry, as a different key maker is used. polymer2 = polymer1.with_position_matrix(position_matrix=np.zeros( (polymer1.get_num_atoms(), 3)), ) json2 = jsonizer2.to_json(polymer2) database2.put(polymer2) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json1['matrix'])): 1, DatabaseEntry(**json1['buildingBlocks'][0]['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json=json1['buildingBlocks'][0]['matrix'], )): 1, DatabaseEntry(**to_hashable_constructed_molecule(json=json1['constructedMolecule'], )): 1, DatabaseEntry(**json2['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json2['matrix'])): 1, DatabaseEntry(**json2['buildingBlocks'][0]['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json=json2['buildingBlocks'][0]['matrix'], )): 1, DatabaseEntry(**to_hashable_constructed_molecule(json=json2['constructedMolecule'], )): 1, }), ) # Should update both entries. polymer3 = polymer1.with_position_matrix(position_matrix=np.zeros( (polymer1.get_num_atoms(), 3)), ) json3 = jsonizer3.to_json(polymer3) database3.put(polymer3) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json3['molecule']): 2, DatabaseEntry(**to_hashable_matrix(json3['matrix'])): 2, DatabaseEntry(**json3['buildingBlocks'][0]['molecule']): 2, DatabaseEntry(**to_hashable_matrix(json=json3['buildingBlocks'][0]['matrix'], )): 2, DatabaseEntry(**to_hashable_constructed_molecule(json=json3['constructedMolecule'], )): 2, DatabaseEntry(**json3['molecule']): 2, DatabaseEntry(**to_hashable_matrix(json3['matrix'])): 2, DatabaseEntry(**json3['buildingBlocks'][0]['molecule']): 2, DatabaseEntry(**to_hashable_matrix(json=json3['buildingBlocks'][0]['matrix'], )): 2, DatabaseEntry(**to_hashable_constructed_molecule(json=json3['constructedMolecule'], )): 2, }), )
def test_update_2(mongo_client): """ Test that existing entries are updated. In this test, you first create two separate entries, using different molecule keys. You then update both at the same time, with a database which uses both molecule keys. """ database_name = '_test_update_2' mongo_client.drop_database(database_name) jsonizer1 = stk.MoleculeJsonizer() database1 = stk.MoleculeMongoDb( mongo_client=mongo_client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer1, ) jsonizer2 = stk.MoleculeJsonizer( key_makers=( stk.Smiles(), ), ) database2 = stk.MoleculeMongoDb( mongo_client=mongo_client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer2, ) jsonizer3 = stk.MoleculeJsonizer( key_makers=( stk.InchiKey(), stk.Smiles(), ), ) database3 = stk.MoleculeMongoDb( mongo_client=mongo_client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer3, ) molecule1 = stk.BuildingBlock('CCC').with_canonical_atom_ordering() json1 = jsonizer1.to_json(molecule1) database1.put(molecule1) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable(json1['matrix'])): 1, }), ) # Should add another entry, as a different key maker is used. molecule2 = molecule1.with_position_matrix( position_matrix=np.zeros((molecule1.get_num_atoms(), 3)), ) json2 = jsonizer2.to_json(molecule2) database2.put(molecule2) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable(json1['matrix'])): 1, DatabaseEntry(**json2['molecule']): 1, DatabaseEntry(**to_hashable(json2['matrix'])): 1, }), ) # Should update both entries as both key makers are used. molecule3 = molecule1.with_position_matrix( position_matrix=np.ones((molecule1.get_num_atoms(), 3)), ) json3 = jsonizer3.to_json(molecule3) database3.put(molecule3) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json3['molecule']): 2, DatabaseEntry(**to_hashable(json3['matrix'])): 2, }), )
def get_lowest_energy_conformers( org_ligs, smiles_keys, file_prefix=None, gfn_exec=None, conformer_function=None, conformer_settings=None, ): """ Determine the lowest energy conformer of cage organic linkers. Will do multiple if there are multiple types. Parameters ---------- org_ligs : :class:`dict` of :class:`stk.BuildingBlock` Dictionary of building blocks where the key is the file name, and the value is the stk building block. smiles_keys : :class:`dict` of :class:`int` Key is the linker smiles, value is the idx of that smiles. file_prefix : :class:`str`, optional Prefix to file name of each output ligand structure. Eventual file name is: "file_prefix"{number of atoms}_{idx}_{i}.mol Where `idx` determines if a molecule is unique by smiles. gfn_exec : :class:`str`, optional Location of GFN-xTB executable to use. conformer_function : :class:`function`, optional Define the function used to rank and find the lowest energy conformer. """ if conformer_function is None: conformer_function = get_lowest_energy_conformer if conformer_settings is None: conformer_settings = None for lig in org_ligs: stk_lig = org_ligs[lig] smiles_key = stk.Smiles().get_key(stk_lig) idx = smiles_keys[smiles_key] sgt = str(stk_lig.get_num_atoms()) # Get optimized ligand name that excludes any cage information. if file_prefix is None: filename_ = f'organic_linker_s{sgt}_{idx}_opt.mol' ligand_name_ = f'organic_linker_s{sgt}_{idx}_opt' else: filename_ = f'{file_prefix}{sgt}_{idx}_opt.mol' ligand_name_ = f'{file_prefix}{sgt}_{idx}_opt' if not exists(filename_): if not exists(f'{ligand_name_}_confs/'): mkdir(f'{ligand_name_}_confs/') low_e_conf = conformer_function(name=ligand_name_, mol=stk_lig, gfn_exec=gfn_exec, settings=conformer_settings) low_e_conf.write(filename_)
@pytest.fixture( params=( CaseData( key_maker=stk.Inchi(), molecule=stk.BuildingBlock('NCCN'), key_name='InChI', key='InChI=1S/C2H8N2/c3-1-2-4/h1-4H2', ), CaseData( key_maker=stk.InchiKey(), molecule=stk.BuildingBlock('NCCN'), key_name='InChIKey', key='PIICEJLVQHRZGT-UHFFFAOYSA-N', ), CaseData( key_maker=stk.Smiles(), molecule=stk.BuildingBlock('NCCN'), key_name='SMILES', key='NCCN', ), CaseData( key_maker=stk.Smiles(), molecule=stk.BuildingBlock('C(N)CN'), key_name='SMILES', key='NCCN', ), CaseData( key_maker=stk.Smiles(), molecule=stk.BuildingBlock('C(#Cc1cccc2cnccc12)c1ccc2[nH]c3ccc' '(C#Cc4cccc5ccncc45)cc3c2c1'), key_name='SMILES',
def test_update_2(mongo_client): """ Test that existing entries are updated. In this test, you first create two separate entries, using different molecule keys. You then update both at the same time, with a database which uses both molecule keys. """ collection = '_test_update_2' database_name = '_test_update_2' mongo_client.drop_database(database_name) database1 = stk.ValueMongoDb( mongo_client=mongo_client, collection=collection, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, key_makers=( stk.InchiKey(), ), ) database2 = stk.ValueMongoDb( mongo_client=mongo_client, collection=collection, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, key_makers=( stk.Smiles(), ), ) database3 = stk.ValueMongoDb( mongo_client=mongo_client, collection=collection, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, key_makers=( stk.InchiKey(), stk.Smiles(), ), ) molecule = stk.BuildingBlock('CCC') database1.put(molecule, 12) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry( InChIKey=stk.InchiKey().get_key(molecule), v=12, ): 1, }), ) # Should add another entry, as a different key maker is used. database2.put(molecule, 32) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry( InChIKey=stk.InchiKey().get_key(molecule), v=12, ): 1, DatabaseEntry( SMILES=stk.Smiles().get_key(molecule), v=32, ): 1, }), ) # Should update both entries as both key makers are used. database3.put(molecule, 56) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry( InChIKey=stk.InchiKey().get_key(molecule), SMILES=stk.Smiles().get_key(molecule), v=56, ): 2, }), )
def get_organic_linkers(cage, metal_atom_nos, file_prefix=None): """ Extract a list of organic linker .Molecules from a cage. Parameters ---------- cage : :class:`stk.Molecule` Molecule to get the organic linkers from. metal_atom_nos : :class:`iterable` of :class:`int` The atomic number of metal atoms to remove from structure. file_prefix : :class:`str`, optional Prefix to file name of each output ligand structure. Eventual file name is: "file_prefix"{number of atoms}_{idx}_{i}.mol Where `idx` determines if a molecule is unique by smiles. Returns ------- org_lig : :class:`dict` of :class:`stk.BuildingBlock` Dictionary of building blocks where the key is the file name, and the value is the stk building block. smiles_keys : :class:`dict` of :class:`int` Key is the linker smiles, value is the idx of that smiles. """ org_lig = {} # Produce a graph from the cage that does not include metals. cage_g = nx.Graph() atom_ids_in_G = set() for atom in cage.get_atoms(): if atom.get_atomic_number() in metal_atom_nos: continue cage_g.add_node(atom) atom_ids_in_G.add(atom.get_id()) # Add edges. for bond in cage.get_bonds(): a1id = bond.get_atom1().get_id() a2id = bond.get_atom2().get_id() if a1id in atom_ids_in_G and a2id in atom_ids_in_G: cage_g.add_edge(bond.get_atom1(), bond.get_atom2()) # Get disconnected subgraphs as molecules. # Sort and sort atom ids to ensure molecules are read by RDKIT # correctly. connected_graphs = [ sorted(subgraph, key=lambda a: a.get_id()) for subgraph in sorted(nx.connected_components(cage_g)) ] smiles_keys = {} for i, cg in enumerate(connected_graphs): # Get atoms from nodes. atoms = list(cg) atom_ids = [i.get_id() for i in atoms] cage.write('temporary_linker.mol', atom_ids=atom_ids) temporary_linker = stk.BuildingBlock.init_from_file( 'temporary_linker.mol').with_canonical_atom_ordering() smiles_key = stk.Smiles().get_key(temporary_linker) if smiles_key not in smiles_keys: smiles_keys[smiles_key] = len(smiles_keys.values()) + 1 idx = smiles_keys[smiles_key] sgt = str(len(atoms)) # Write to mol file. if file_prefix is None: filename_ = f'organic_linker_s{sgt}_{idx}_{i}.mol' else: filename_ = f'{file_prefix}{sgt}_{idx}_{i}.mol' org_lig[filename_] = temporary_linker system('rm temporary_linker.mol') # Rewrite to fix atom ids. org_lig[filename_].write(filename_) org_lig[filename_] = stk.BuildingBlock.init_from_file(filename_) return org_lig, smiles_keys
def test_update_3(mongo_client): """ Test that existing entries are updated. In this test, you first create one entry with two keys. Then update the entry with databases, each using 1 different key. No duplicate entries should be made in the database this way. """ collection = '_test_update_3' database_name = '_test_update_3' mongo_client.drop_database(database_name) database1 = stk.ValueMongoDb( mongo_client=mongo_client, collection=collection, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, key_makers=( stk.InchiKey(), stk.Smiles(), ), ) database2 = stk.ValueMongoDb( mongo_client=mongo_client, collection=collection, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, key_makers=( stk.InchiKey(), ), ) database3 = stk.ValueMongoDb( mongo_client=mongo_client, collection=collection, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, key_makers=( stk.Smiles(), ), ) molecule = stk.BuildingBlock('CCC') database1.put(molecule, 12) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry( InChIKey=stk.InchiKey().get_key(molecule), SMILES=stk.Smiles().get_key(molecule), v=12, ): 1 }), ) # Should update the entry. database2.put(molecule, 32) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry( InChIKey=stk.InchiKey().get_key(molecule), SMILES=stk.Smiles().get_key(molecule), v=32, ): 1, }), ) # Should also update the entry. database3.put(molecule, 62) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry( InChIKey=stk.InchiKey().get_key(molecule), SMILES=stk.Smiles().get_key(molecule), v=62, ): 1, }), )
def test_update_3(): """ Test that existing entries are updated. In this test, your first create one entry with two keys. Then update the entry with databases, each using 1 different key. No duplicate entries should be made in the database this way. """ database_name = '_test_update_3' client = pymongo.MongoClient() client.drop_database(database_name) jsonizer1 = stk.ConstructedMoleculeJsonizer(key_makers=( stk.InchiKey(), stk.Smiles(), ), ) database1 = stk.ConstructedMoleculeMongoDb( mongo_client=client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer1, ) jsonizer2 = stk.ConstructedMoleculeJsonizer( key_makers=(stk.InchiKey(), ), ) database2 = stk.ConstructedMoleculeMongoDb( mongo_client=client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer2, ) jsonizer3 = stk.ConstructedMoleculeJsonizer(key_makers=(stk.Smiles(), ), ) database3 = stk.ConstructedMoleculeMongoDb( mongo_client=client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer3, ) molecule = stk.BuildingBlock( smiles='BrCCCBr', functional_groups=[stk.BromoFactory()], ).with_canonical_atom_ordering() polymer1 = stk.ConstructedMolecule( topology_graph=stk.polymer.Linear( # Use it as a building block twice, to make sure it is # not repeatedly added to the molecules database. building_blocks=(molecule, molecule), repeating_unit='AB', num_repeating_units=2, ), ).with_canonical_atom_ordering() json1 = jsonizer1.to_json(polymer1) database1.put(polymer1) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json1['matrix'])): 1, DatabaseEntry(**json1['buildingBlocks'][0]['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json=json1['buildingBlocks'][0]['matrix'], )): 1, DatabaseEntry(**to_hashable_constructed_molecule(json=json1['constructedMolecule'], )): 1, }), ) # Should update the entry. polymer2 = polymer1.with_position_matrix(position_matrix=np.zeros( (polymer1.get_num_atoms(), 3)), ) json2 = jsonizer2.to_json(polymer2) json2['matrix'] = dict(json1['matrix']) json2['matrix']['m'] = jsonizer2.to_json(polymer2)['matrix']['m'] database2.put(polymer2) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json2['matrix'])): 1, DatabaseEntry(**json1['buildingBlocks'][0]['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json=json1['buildingBlocks'][0]['matrix'], )): 1, DatabaseEntry(**to_hashable_constructed_molecule(json=json1['constructedMolecule'], )): 1, }), ) # Should also update the entry. polymer3 = polymer1.with_position_matrix(position_matrix=np.zeros( (polymer1.get_num_atoms(), 3)), ) json3 = jsonizer3.to_json(polymer3) json3['matrix'] = dict(json1['matrix']) json3['matrix']['m'] = jsonizer3.to_json(polymer3)['matrix']['m'] database3.put(polymer3) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json3['matrix'])): 1, DatabaseEntry(**json1['buildingBlocks'][0]['molecule']): 1, DatabaseEntry(**to_hashable_matrix(json=json1['buildingBlocks'][0]['matrix'], )): 1, DatabaseEntry(**to_hashable_constructed_molecule(json=json1['constructedMolecule'], )): 1, }), )
def calculate_deltann_distance(org_ligs, smiles_keys, fg_factory, file_prefix=None): """ Calculate the change of NN distance of each ligand in the cage. This function will not work for cages built from FGs other than metals + AromaticCNC and metals + AromaticCNN. Parameters ---------- org_lig : :class:`dict` of :class:`stk.BuildingBlock` Dictionary of building blocks where the key is the file name, and the value is the stk building block. smiles_keys : :class:`dict` of :class:`int` Key is the linker smiles, value is the idx of that smiles. fg_factory : :class:`iterable` of :class:`stk.FunctionalGroupFactory` Functional groups to asign to molecules. NN_distance calculator will not work for cages built from FGs other than metals + AromaticCNC and metals + AromaticCNN. file_prefix : :class:`str`, optional Prefix to file name of each output ligand structure. Eventual file name is: "file_prefix"{number of atoms}_{idx}_{i}.mol Where `idx` determines if a molecule is unique by smiles. Returns ------- delta_nns : :class:`dict` NN distance in cage - free optimised ligand for each ligand. Output is absolute values. """ delta_nns = {} # Iterate over ligands. for lig in org_ligs: stk_lig = org_ligs[lig] smiles_key = stk.Smiles().get_key(stk_lig) idx = smiles_keys[smiles_key] sgt = str(stk_lig.get_num_atoms()) # Get optimized ligand name that excludes any cage # information. if file_prefix is None: filename_ = f'organic_linker_s{sgt}_{idx}_opt.mol' else: filename_ = f'{file_prefix}{sgt}_{idx}_opt.mol' _in_cage = stk.BuildingBlock.init_from_molecule( stk_lig, functional_groups=fg_factory) _in_cage = _in_cage.with_functional_groups( functional_groups=get_furthest_pair_FGs(_in_cage)) _free = stk.BuildingBlock.init_from_file(filename_, functional_groups=fg_factory) _free = _free.with_functional_groups( functional_groups=get_furthest_pair_FGs(_free)) nn_in_cage = calculate_NN_distance(bb=_in_cage) nn_free = calculate_NN_distance(bb=_free) delta_nns[lig] = abs(nn_in_cage - nn_free) return delta_nns
def test_update_3(mongo_client): """ Test that existing entries are updated. In this test, you first create one entry with two keys. Then update the entry with databases, each using 1 different key. No duplicate entries should be made in the database this way. """ database_name = '_test_update_3' mongo_client.drop_database(database_name) jsonizer1 = stk.MoleculeJsonizer( key_makers=( stk.InchiKey(), stk.Smiles(), ), ) database1 = stk.MoleculeMongoDb( mongo_client=mongo_client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer1, ) jsonizer2 = stk.MoleculeJsonizer() database2 = stk.MoleculeMongoDb( mongo_client=mongo_client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer2, ) jsonizer3 = stk.MoleculeJsonizer( key_makers=( stk.Smiles(), ), ) database3 = stk.MoleculeMongoDb( mongo_client=mongo_client, database=database_name, put_lru_cache_size=0, get_lru_cache_size=0, jsonizer=jsonizer3, ) molecule1 = stk.BuildingBlock('CCC').with_canonical_atom_ordering() json1 = jsonizer1.to_json(molecule1) database1.put(molecule1) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable(json1['matrix'])): 1, }), ) molecule2 = molecule1.with_position_matrix( position_matrix=np.zeros((molecule1.get_num_atoms(), 3)), ) json2 = jsonizer2.to_json(molecule2) json2['matrix'] = dict(json1['matrix']) json2['matrix']['m'] = jsonizer2.to_json(molecule2)['matrix']['m'] database2.put(molecule2) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable(json2['matrix'])): 1, }), ) molecule3 = molecule1.with_position_matrix( position_matrix=np.zeros((molecule1.get_num_atoms(), 3)), ) json3 = jsonizer3.to_json(molecule3) json3['matrix'] = dict(json1['matrix']) json3['matrix']['m'] = jsonizer3.to_json(molecule3)['matrix']['m'] database3.put(molecule3) assert_database_state( state1=get_database_state(database1), state2=DatabaseState({ DatabaseEntry(**json1['molecule']): 1, DatabaseEntry(**to_hashable(json3['matrix'])): 1, }), )
def calculate_ligand_SE(org_ligs, smiles_keys, output_json, file_prefix=None): """ Calculate the strain energy of each ligand in the cage. Parameters ---------- org_lig : :class:`dict` of :class:`stk.BuildingBlock` Dictionary of building blocks where the key is the file name, and the value is the stk building block. smiles_keys : :class:`dict` of :class:`int` Key is the linker smiles, value is the idx of that smiles. output_json : :class:`str` File name to save output to to avoid reruns. file_prefix : :class:`str`, optional Prefix to file name of each output ligand structure. Eventual file name is: "file_prefix"{number of atoms}_{idx}_{i}.mol Where `idx` determines if a molecule is unique by smiles. Returns ------- strain_energies : :class:`dict` Strain energies for each ligand. """ # Check if output file exists. if not exists(output_json): strain_energies = {} # Iterate over ligands. for lig in org_ligs: stk_lig = org_ligs[lig] ey_file = lig.replace('mol', 'ey') smiles_key = stk.Smiles().get_key(stk_lig) idx = smiles_keys[smiles_key] sgt = str(stk_lig.get_num_atoms()) # Get optimized ligand name that excludes any cage # information. if file_prefix is None: filename_ = f'organic_linker_s{sgt}_{idx}_opt.mol' opt_lig_ey = f'organic_linker_s{sgt}_{idx}_opt.ey' opt_lig_n = f'organic_linker_s{sgt}_{idx}_opt' else: filename_ = f'{file_prefix}{sgt}_{idx}_opt.mol' opt_lig_ey = f'{file_prefix}{sgt}_{idx}_opt.ey' opt_lig_n = f'{file_prefix}{sgt}_{idx}_opt' # Calculate energy of extracted ligand. if not exists(ey_file): calculate_energy(name=lig.replace('.mol', ''), mol=stk_lig, ey_file=ey_file) # Read energy. # kJ/mol. E_extracted = read_gfnx2xtb_eyfile(ey_file) # Calculate energy of optimised ligand. # Load in lowest energy conformer. opt_mol = stk.BuildingBlock.init_from_file(filename_) if not exists(opt_lig_ey): calculate_energy(name=opt_lig_n, mol=opt_mol, ey_file=opt_lig_ey) # Read energy. # kJ/mol. E_free = read_gfnx2xtb_eyfile(opt_lig_ey) # Add to list the strain energy: # (E(extracted) - E(optimised/free)) lse = E_extracted - E_free # kJ/mol. strain_energies[lig] = lse # Write data. with open(output_json, 'w') as f: json.dump(strain_energies, f) # Get data. with open(output_json, 'r') as f: strain_energies = json.load(f) return strain_energies