def test_int_bond_order(): """Test bond orders are whole numbers""" from openeye import oechem hooh = { 'symbols': ['H', 'O', 'O', 'H'], 'geometry': [ 1.84719633, 1.47046223, 0.80987166, 1.3126021, -0.13023157, -0.0513322, -1.31320906, 0.13130216, -0.05020593, -1.83756335, -1.48745318, 0.80161212 ], 'name': 'HOOH', 'connectivity': [[0, 1, 1], [1, 2, 1], [2, 3, 1]], 'molecular_multiplicity': 1 } mol = utils.load_molecule(hooh) assert isinstance(mol, oechem.OEMol) hooh['connectivity'] = [[0, 1, 1.0], [1, 2, 1.0], [2, 3, 1]] mol = utils.load_molecule(hooh) assert isinstance(mol, oechem.OEMol) hooh['connectivity'] = [[0, 1, 1.0], [1, 2, 1.5], [2, 3, 1.0]] with pytest.raises(ValueError): utils.load_molecule(hooh)
def test_get_atom_map_mapped_smiles(toolkit): smiles_1 = '[H]C([H])(C([H])([H])O[H])O[H]' smiles_2 = '[H:5][C:1]([H:6])([C:2]([H:7])([H:8])[O:4][H:10])[O:3][H:9]' mol_1 = utils.load_molecule(smiles_1, toolkit=toolkit) if not utils.has_explicit_hydrogen(mol_1): mol_1 = utils.add_explicit_hydrogen(mol_1) mol_2 = utils.load_molecule(smiles_2, toolkit=toolkit) if not utils.has_explicit_hydrogen(mol_2): mol_2 = utils.add_explicit_hydrogen(mol_2)
def _standardize_smiles(smiles): """Standardizes a SMILES pattern to be canonical (but not necessarily isomeric) using the `cmiles` library. Parameters ---------- smiles: str The SMILES pattern to standardize. Returns ------- The standardized SMILES pattern. """ from cmiles.utils import load_molecule, mol_to_smiles molecule = load_molecule(smiles, toolkit="rdkit") try: # Try to make the smiles isomeric. smiles = mol_to_smiles( molecule, isomeric=True, explicit_hydrogen=False, mapped=False ) except ValueError: # Fall-back to non-isomeric. smiles = mol_to_smiles( molecule, isomeric=False, explicit_hydrogen=False, mapped=False ) return smiles
def test_remove_restore_atom_map(toolkit): mapped_smiles = '[H:5][C:1]([H:6])([C:2]([H:7])([H:8])[O:4][H:10])[O:3][H:9]' mapped_mol = utils.load_molecule(mapped_smiles, toolkit=toolkit) utils.remove_atom_map(mapped_mol) assert utils.has_atom_map(mapped_mol) == False assert utils.is_missing_atom_map(mapped_mol) == True utils.restore_atom_map(mapped_mol) assert utils.has_atom_map(mapped_mol) == True assert utils.is_missing_atom_map(mapped_mol) == False smiles = 'OCCO' mol = utils.load_molecule(smiles, toolkit=toolkit) with pytest.warns(UserWarning): utils.restore_atom_map(mol)
def test_n_valence(): from openeye import oechem json_molecule = { 'symbols': [ 'C', 'C', 'C', 'C', 'C', 'C', 'N', 'N', 'N', 'N', 'N', 'H', 'H', 'H', 'H' ], 'geometry': np.array([ 11.02088236, 0.30802536, 2.96687012, 10.37270642, 2.8383686, 2.75522059, 9.32012957, -1.48532476, 2.09948562, 8.06346176, 3.48843435, 1.68941515, 6.98820713, -0.772898, 1.02801107, 5.21186447, -2.73065435, 0.12850138, 5.70508328, -5.1797392, 0.28345893, 6.45152507, 1.7536658, 0.86549457, 2.97820833, -2.31491455, -0.90706852, 3.71709131, -6.31357514, -0.68408084, 2.05980154, -4.57124733, -1.40784597, 12.76887939, -0.24566439, 3.77189345, 11.61992628, 4.26322222, 3.39583795, 9.76610505, -3.43174262, 2.23743576, 7.53811768, 5.41217579, 1.50989122 ]), 'connectivity': [[0, 1, 1], [0, 2, 2], [0, 11, 1], [1, 3, 2], [1, 12, 1], [2, 4, 1], [2, 13, 1], [3, 7, 1], [3, 14, 1], [4, 5, 1], [4, 7, 2], [5, 6, 1], [5, 8, 2], [6, 9, 1], [8, 10, 1], [9, 10, 2]] } mol = utils.load_molecule(json_molecule) assert utils.has_explicit_hydrogen(mol) assert oechem.OEMolToSmiles(mol) == 'c1ccnc(c1)c2[n-]nnn2'
def test_mol_from_json(toolkit_str): """Test oemol from json""" import numpy as np hooh = { 'symbols': ['H', 'O', 'O', 'H'], 'geometry': [ 1.84719633, 1.47046223, 0.80987166, 1.3126021, -0.13023157, -0.0513322, -1.31320906, 0.13130216, -0.05020593, -1.83756335, -1.48745318, 0.80161212 ], 'name': 'HOOH', 'connectivity': [[0, 1, 1], [1, 2, 1], [2, 3, 1]], } mol = utils.load_molecule(hooh, toolkit=toolkit_str) if toolkit_str == 'openeye': assert mol.GetMaxAtomIdx() == 4 assert mol.GetMaxBondIdx() == 3 coordinates = mol.GetCoords() if toolkit_str == 'rdkit': assert mol.GetNumAtoms() == 4 assert mol.GetNumBonds() == 3 coordinates = mol.GetConformer().GetPositions() geometry = np.array(hooh['geometry'], dtype=float).reshape( int(len(hooh['geometry']) / 3), 3) * utils.BOHR_2_ANGSTROM for i in range(len(coordinates)): for j in range(3): assert coordinates[i][j] == pytest.approx(geometry[i][j], 0.0000001)
def test_map_order_geometry(permute, toolkit, toolkit_name): """Test map ordered geometry""" hooh = { 'symbols': ['H', 'O', 'O', 'H'], 'geometry': [ 1.84719633, 1.47046223, 0.80987166, 1.3126021, -0.13023157, -0.0513322, -1.31320906, 0.13130216, -0.05020593, -1.83756335, -1.48745318, 0.80161212 ], 'name': 'HOOH', 'connectivity': [[0, 1, 1], [1, 2, 1], [2, 3, 1]], } mol = utils.load_molecule(hooh, toolkit=toolkit_name, permute_xyz=permute) mapped_smiles = utils.mol_to_smiles(mol, isomeric=True, explicit_hydrogen=True, mapped=True) atom_map = utils.get_atom_map(mol, mapped_smiles) symbols, geometry = toolkit.get_map_ordered_geometry(mol, atom_map) json_geom = np.asarray(hooh['geometry']).reshape(int(len(geometry) / 3), 3) geometry_array = np.asarray(geometry).reshape(int(len(geometry) / 3), 3) for m in atom_map: for i in range(3): assert json_geom[atom_map[m]][i] == pytest.approx( geometry_array[m - 1][i], 0.0000001) if not permute: assert hooh['geometry'] == pytest.approx(geometry, 0.0000001)
def test_add_atom_map(toolkit): smiles = 'CCCC' mol = utils.load_molecule(smiles, toolkit=toolkit, strict=False) mapped_mol = utils.add_atom_map(mol, in_place=False) assert utils.has_atom_map(mapped_mol) assert not utils.has_atom_map(mol) assert not utils.is_missing_atom_map(mapped_mol) assert utils.is_missing_atom_map(mol)
def test_is_map_canonical(toolkit, smiles, canonicalization): molecule = utils.load_molecule(smiles, toolkit) canonical = utils.is_map_canonical(molecule) if toolkit == canonicalization: assert canonical else: assert not canonical
def test_has_stereochemistry(input1, input2, toolkit_name): mol = utils.load_molecule(input1, toolkit_name) if toolkit_name == 'openeye': from openeye import oechem oechem.OEAddExplicitHydrogens(mol) if toolkit_name == 'rdkit': from rdkit import Chem mol = Chem.AddHs(mol) assert utils.has_stereo_defined(mol) == True mol = utils.load_molecule(input2, toolkit_name) if toolkit_name == 'openeye': from openeye import oechem oechem.OEAddExplicitHydrogens(mol) if toolkit_name == 'rdkit': from rdkit import Chem mol = Chem.AddHs(mol) with pytest.warns(UserWarning): utils.has_stereo_defined(mol)
def test_load_molecule(toolkit): """Test load molecules""" mol = utils.load_molecule( '[H]C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H]', toolkit=toolkit) if toolkit == 'openeye': from openeye import oechem assert oechem.OEMolToSmiles(mol) == 'CCCC' if toolkit == 'rdkit': from rdkit import Chem assert Chem.MolToSmiles(mol) == 'CCCC'
def test_chiral_bond_exception(smiles, toolkit): """ Test bonds to ignore """ mol = utils.load_molecule(smiles, toolkit) if toolkit == 'openeye': from openeye import oechem oechem.OEAddExplicitHydrogens(mol) if toolkit == 'rdkit': from rdkit import Chem mol = Chem.AddHs(mol) with pytest.warns(UserWarning): utils.has_stereo_defined(mol)
def request_callback(request, context): context.status_code = 200 smiles = re.search(r'"smiData"\r\n\r\n(.*?)\r\n', request.text).group(1) cmiles_molecule = load_molecule(smiles, toolkit="rdkit") smiles = mol_to_smiles(cmiles_molecule, isomeric=False, explicit_hydrogen=False, mapped=False) assert smiles == "C" return 'value="/tmp/0000.xml"'
def test_all_equivelant_torsions(): oemol = load_molecule( '[H:5][C:1]([H:6])([C:2]([H:7])([H:8])[O:4][H:10])[O:3][H:9]') expected = { (0, 1): [(4, 0, 1, 6), (4, 0, 1, 7), (4, 0, 1, 3), (5, 0, 1, 6), (5, 0, 1, 7), (5, 0, 1, 3), (2, 0, 1, 6), (2, 0, 1, 7), (2, 0, 1, 3)], (1, 3): [(0, 1, 3, 9), (6, 1, 3, 9), (7, 1, 3, 9)], (0, 2): [(4, 0, 2, 8), (5, 0, 2, 8), (1, 0, 2, 8)] } eq_torsions = torsions.find_equivelant_torsions(oemol) assert eq_torsions == expected
def test_connectivity(mapped_smiles, expected_table, toolkit): """Test connectivity table""" molecule = utils.load_molecule(mapped_smiles, toolkit) atom_map = utils.get_atom_map(molecule, mapped_smiles) connectivity_table = utils.get_connectivity_table(molecule, atom_map) for bond in connectivity_table: xi = np.isin(expected_table, bond[:2]) match = np.where(np.array([i[:2].sum() for i in xi]) == 2)[0] # assert that a match was found and only one was found assert len(match) == 1 # assert that bond order is the same assert expected_table[match][0][-1] == bond[-1]
def test_atom_order_in_mol_copy(toolkit, smiles): """Test that atom orders do not change when copying molecule""" import copy mol = utils.load_molecule(smiles, toolkit=toolkit) if not utils.has_explicit_hydrogen(mol): mol = utils.add_explicit_hydrogen(mol) molcopy = copy.deepcopy(mol) for a1, a2 in zip(mol.GetAtoms(), molcopy.GetAtoms()): if toolkit == 'openeye': assert a1.GetIdx() == a2.GetIdx() assert a1.GetName() == a2.GetName() assert a1.GetMapIdx() == a2.GetMapIdx() if toolkit == 'rdkit': assert a1.GetIdx() == a2.GetIdx() assert a1.GetAtomMapNum() == a2.GetAtomMapNum() assert a1.GetSmarts() == a2.GetSmarts()
def test_atom_map(smiles): """Test that atom map orders geometry the same way every time no matter the SMILES used to create the molecule""" import cmiles mapped_smiles = '[H:5][C:1]([H:6])([C:2]([H:7])([H:8])[O:4][H:10])[O:3][H:9]' mol_id_oe = cmiles.get_molecule_ids(mapped_smiles, toolkit='openeye') oemol = utils.load_molecule(mapped_smiles, toolkit='openeye') mapped_symbols = ['C', 'C', 'O', 'O', 'H', 'H', 'H', 'H', 'H', 'H'] mapped_geometry = [ -1.6887193912042044, 0.8515190939276903, 0.8344587822904272, -4.05544806361675, -0.3658269566455062, -0.22848169646448416, -1.6111611950422127, 0.4463128276938808, 3.490617694146934, -3.97756355964586, -3.0080934853087373, 0.25948499322223956, -1.6821252026076652, 2.891135395246369, 0.4936556190978574, 0.0, 0.0, 0.0, -4.180315034973438, -0.09210893239246959, -2.2748227320305525, -5.740516456782416, 0.4115539217904015, 0.6823267491485907, -0.07872657410528058, 1.2476492272884379, 4.101615944163073, -5.514569080545831, -3.7195945404657222, -0.4441653010509862 ] mol = cmiles.utils.load_molecule(smiles, toolkit='openeye') if not utils.has_explicit_hydrogen(mol): mol = utils.add_explicit_hydrogen(mol) atom_map = utils.get_atom_map(mol, mapped_smiles=mapped_smiles) # use the atom map to add coordinates to molecule. First reorder mapped geometry to order in molecule mapped_coords = np.array(mapped_geometry, dtype=float).reshape( int(len(mapped_geometry) / 3), 3) coords = np.zeros((mapped_coords.shape)) for m in atom_map: coords[atom_map[m]] = mapped_coords[m - 1] # flatten coords = coords.flatten() # convert to Angstroms coords = coords * utils.BOHR_2_ANGSTROM # set coordinates in oemol mol.SetCoords(coords) mol.SetDimension(3) # Get new atom map atom_map = utils.get_atom_map(mol, mapped_smiles) symbols, geometry = _cmiles_oe.get_map_ordered_geometry(mol, atom_map) assert geometry == mapped_geometry assert symbols == mapped_symbols
def test_get_atom_map(toolkit, toolkit_name): smiles = 'C[C@@H](c1c(ccc(c1Cl)F)Cl)OC' mol = utils.load_molecule(smiles, toolkit_name) if toolkit_name == 'openeye': from openeye import oechem oechem.OEAddExplicitHydrogens(mol) for a in mol.GetAtoms(): a.SetMapIdx(a.GetIdx() + 1) mapped_smiles = oechem.OEMolToSmiles(mol) if toolkit_name == 'rdkit': from rdkit import Chem mol = Chem.AddHs(mol) for a in mol.GetAtoms(): a.SetAtomMapNum(a.GetIdx() + 1) mapped_smiles = Chem.MolToSmiles(mol) atom_map = utils.get_atom_map(mol, mapped_smiles) for m in atom_map: assert m == (atom_map[m] + 1)
def test_explicit_h_oe(input, output): """Test input SMILES for explicit H""" mol = utils.load_molecule(input, toolkit='openeye') assert utils.has_explicit_hydrogen(mol) == output
def test_is_missing_map(toolkit, input, output): #ToDo - Known problem that RDKit does not add explicit H to molecules even with explicit H SMILES so if map of H is missing it will not pick it up mol = utils.load_molecule(input, toolkit=toolkit) assert utils.is_missing_atom_map(mol) == output
def test_is_mapped(toolkit, input, output): """Test is mapped""" mapped_mol = utils.load_molecule(input, toolkit=toolkit) assert utils.has_atom_map(mapped_mol) == output utils.remove_atom_map(mapped_mol) assert utils.has_atom_map(mapped_mol) == False
def test_explicit_h(input, output, toolkit_name): """Test input SMILES for explicit H""" mol = utils.load_molecule(input, toolkit=toolkit_name) assert utils.has_explicit_hydrogen(mol) == output