def test_sanitizeSMILES(): """ Test SMILES sanitization. """ smiles_list = [ 'CC', 'CCC', '[H][C@]1(NC[C@@H](CC1CO[C@H]2CC[C@@H](CC2)O)N)[H]' ] sanitized_smiles_list = sanitizeSMILES(smiles_list, mode='drop') if len(sanitized_smiles_list) != 2: raise Exception( "Molecules with undefined stereochemistry are not being properly dropped (size=%d)." % len(sanitized_smiles_list)) sanitized_smiles_list = sanitizeSMILES(smiles_list, mode='expand') if len(sanitized_smiles_list) != 4: raise Exception( "Molecules with undefined stereochemistry are not being properly expanded (size=%d)." % len(sanitized_smiles_list)) # Check that all molecules can be round-tripped. from openeye.oechem import OEGraphMol, OESmilesToMol, OECreateIsoSmiString for smiles in sanitized_smiles_list: molecule = OEGraphMol() OESmilesToMol(molecule, smiles) isosmiles = OECreateIsoSmiString(molecule) if (smiles != isosmiles): raise Exception( "Molecule '%s' was not properly round-tripped (result was '%s')" % (smiles, isosmiles))
def __makeChemCompDescriptorCategory(self, ccId, oeMol): ''' loop_ _pdbx_chem_comp_descriptor.comp_id _pdbx_chem_comp_descriptor.type _pdbx_chem_comp_descriptor.program _pdbx_chem_comp_descriptor.program_version _pdbx_chem_comp_descriptor.descriptor ARG SMILES ACDLabs 10.04 "O=C(O)C(N)CCCN\\C(=[NH2+])N" ARG SMILES_CANONICAL CACTVS 3.341 "N[C@@H](CCCNC(N)=[NH2+])C(O)=O" ARG SMILES CACTVS 3.341 "N[CH](CCCNC(N)=[NH2+])C(O)=O" ARG SMILES_CANONICAL "OpenEye OEToolkits" 1.5.0 "C(C[C@@H](C(=O)O)N)CNC(=[NH2+])N" ARG SMILES "OpenEye OEToolkits" 1.5.0 "C(CC(C(=O)O)N)CNC(=[NH2+])N" ARG InChI InChI 1.03 "InChI=1S/C6H14N4O2/c7-4(5(11)12)2-1-3-1..... " ARG InChIKey InChI 1.03 ODKSFYDXXFIFQN-BYPYZUCNSA-O # ''' rowL = [] # aRow = {} aRow['comp_id'] = ccId aRow['type'] = 'SMILES_CANONICAL' aRow['program'] = "OpenEye OEToolkits" aRow['program_version'] = '2016.2' aRow['descriptor'] = OECreateIsoSmiString(oeMol) rowL.append(aRow) # aRow = {} aRow['comp_id'] = ccId aRow['type'] = 'SMILES' aRow['program'] = "OpenEye OEToolkits" aRow['program_version'] = '2016.2' aRow['descriptor'] = OECreateCanSmiString(oeMol) rowL.append(aRow) # aRow = {} aRow['comp_id'] = ccId aRow['type'] = 'InChI' aRow['program'] = "OpenEye OEToolkits" aRow['program_version'] = '2016.2' aRow['descriptor'] = OECreateInChI(oeMol) rowL.append(aRow) # aRow = {} aRow['comp_id'] = ccId aRow['type'] = 'InChIKey' aRow['program'] = "OpenEye OEToolkits" aRow['program_version'] = '2016.2' aRow['descriptor'] = OECreateInChIKey(oeMol) rowL.append(aRow) # return rowL
def sanitizeSMILES(smiles_list, mode='drop', verbose=False): """ Sanitize set of SMILES strings by ensuring all are canonical isomeric SMILES. Duplicates are also removed. Parameters ---------- smiles_list : iterable of str The set of SMILES strings to sanitize. mode : str, optional, default='drop' When a SMILES string that does not correspond to canonical isomeric SMILES is found, select the action to be performed. 'exception' : raise an `Exception` 'drop' : drop the SMILES string 'expand' : expand all stereocenters into multiple molecules verbose : bool, optional, default=False If True, print verbose output. Returns ------- sanitized_smiles_list : list of str Sanitized list of canonical isomeric SMILES strings. Examples -------- Sanitize a simple list. >>> smiles_list = ['CC', 'CCC', '[H][C@]1(NC[C@@H](CC1CO[C@H]2CC[C@@H](CC2)O)N)[H]'] Throw an exception if undefined stereochemistry is present. >>> sanitized_smiles_list = sanitizeSMILES(smiles_list, mode='exception') Traceback (most recent call last): ... Exception: Molecule '[H][C@]1(NC[C@@H](CC1CO[C@H]2CC[C@@H](CC2)O)N)[H]' has undefined stereocenters Drop molecules iwth undefined stereochemistry. >>> sanitized_smiles_list = sanitizeSMILES(smiles_list, mode='drop') >>> len(sanitized_smiles_list) 2 Expand molecules iwth undefined stereochemistry. >>> sanitized_smiles_list = sanitizeSMILES(smiles_list, mode='expand') >>> len(sanitized_smiles_list) 4 """ from openeye.oechem import OEGraphMol, OESmilesToMol, OECreateIsoSmiString sanitized_smiles_set = set() for smiles in smiles_list: molecule = OEGraphMol() OESmilesToMol(molecule, smiles) if verbose: molecule.SetTitle(smiles) oechem.OETriposAtomNames(molecule) if has_undefined_stereocenters(molecule, verbose=verbose): if mode == 'drop': if verbose: print("Dropping '%s' due to undefined stereocenters." % smiles) continue elif mode == 'exception': raise Exception("Molecule '%s' has undefined stereocenters" % smiles) elif mode == 'expand': if verbose: print('Expanding stereochemistry:') print('original: %s', smiles) molecules = enumerate_undefined_stereocenters(molecule, verbose=verbose) for molecule in molecules: isosmiles = OECreateIsoSmiString(molecule) if verbose: print('expanded: %s', isosmiles) sanitized_smiles_set.add(isosmiles) else: # Convert to OpenEye's canonical isomeric SMILES. isosmiles = OECreateIsoSmiString(molecule) sanitized_smiles_set.add(isosmiles) sanitized_smiles_list = list(sanitized_smiles_set) return sanitized_smiles_list
def getIsoSMILES(self): """ Return the cannonical stereo SMILES string derived from the current OE molecule. """ return OECreateIsoSmiString(self.__oeMol)