예제 #1
0
def test_sanitizeSMILES():
    """
    Test SMILES sanitization.
    """
    smiles_list = [
        'CC', 'CCC', '[H][C@]1(NC[C@@H](CC1CO[C@H]2CC[C@@H](CC2)O)N)[H]'
    ]

    sanitized_smiles_list = sanitizeSMILES(smiles_list, mode='drop')
    if len(sanitized_smiles_list) != 2:
        raise Exception(
            "Molecules with undefined stereochemistry are not being properly dropped (size=%d)."
            % len(sanitized_smiles_list))

    sanitized_smiles_list = sanitizeSMILES(smiles_list, mode='expand')
    if len(sanitized_smiles_list) != 4:
        raise Exception(
            "Molecules with undefined stereochemistry are not being properly expanded (size=%d)."
            % len(sanitized_smiles_list))

    # Check that all molecules can be round-tripped.
    from openeye.oechem import OEGraphMol, OESmilesToMol, OECreateIsoSmiString
    for smiles in sanitized_smiles_list:
        molecule = OEGraphMol()
        OESmilesToMol(molecule, smiles)
        isosmiles = OECreateIsoSmiString(molecule)
        if (smiles != isosmiles):
            raise Exception(
                "Molecule '%s' was not properly round-tripped (result was '%s')"
                % (smiles, isosmiles))
예제 #2
0
 def __makeChemCompDescriptorCategory(self, ccId, oeMol):
     '''
         loop_
         _pdbx_chem_comp_descriptor.comp_id
         _pdbx_chem_comp_descriptor.type
         _pdbx_chem_comp_descriptor.program
         _pdbx_chem_comp_descriptor.program_version
         _pdbx_chem_comp_descriptor.descriptor
         ARG SMILES           ACDLabs              10.04 "O=C(O)C(N)CCCN\\C(=[NH2+])N"
         ARG SMILES_CANONICAL CACTVS               3.341 "N[C@@H](CCCNC(N)=[NH2+])C(O)=O"
         ARG SMILES           CACTVS               3.341 "N[CH](CCCNC(N)=[NH2+])C(O)=O"
         ARG SMILES_CANONICAL "OpenEye OEToolkits" 1.5.0 "C(C[C@@H](C(=O)O)N)CNC(=[NH2+])N"
         ARG SMILES           "OpenEye OEToolkits" 1.5.0 "C(CC(C(=O)O)N)CNC(=[NH2+])N"
         ARG InChI            InChI                1.03  "InChI=1S/C6H14N4O2/c7-4(5(11)12)2-1-3-1..... "
         ARG InChIKey         InChI                1.03  ODKSFYDXXFIFQN-BYPYZUCNSA-O
         #
     '''
     rowL = []
     #
     aRow = {}
     aRow['comp_id'] = ccId
     aRow['type'] = 'SMILES_CANONICAL'
     aRow['program'] = "OpenEye OEToolkits"
     aRow['program_version'] = '2016.2'
     aRow['descriptor'] = OECreateIsoSmiString(oeMol)
     rowL.append(aRow)
     #
     aRow = {}
     aRow['comp_id'] = ccId
     aRow['type'] = 'SMILES'
     aRow['program'] = "OpenEye OEToolkits"
     aRow['program_version'] = '2016.2'
     aRow['descriptor'] = OECreateCanSmiString(oeMol)
     rowL.append(aRow)
     #
     aRow = {}
     aRow['comp_id'] = ccId
     aRow['type'] = 'InChI'
     aRow['program'] = "OpenEye OEToolkits"
     aRow['program_version'] = '2016.2'
     aRow['descriptor'] = OECreateInChI(oeMol)
     rowL.append(aRow)
     #
     aRow = {}
     aRow['comp_id'] = ccId
     aRow['type'] = 'InChIKey'
     aRow['program'] = "OpenEye OEToolkits"
     aRow['program_version'] = '2016.2'
     aRow['descriptor'] = OECreateInChIKey(oeMol)
     rowL.append(aRow)
     #
     return rowL
예제 #3
0
파일: utils.py 프로젝트: CHEMPHY/perses
def sanitizeSMILES(smiles_list, mode='drop', verbose=False):
    """
    Sanitize set of SMILES strings by ensuring all are canonical isomeric SMILES.
    Duplicates are also removed.

    Parameters
    ----------
    smiles_list : iterable of str
        The set of SMILES strings to sanitize.
    mode : str, optional, default='drop'
        When a SMILES string that does not correspond to canonical isomeric SMILES is found, select the action to be performed.
        'exception' : raise an `Exception`
        'drop' : drop the SMILES string
        'expand' : expand all stereocenters into multiple molecules
    verbose : bool, optional, default=False
        If True, print verbose output.

    Returns
    -------
    sanitized_smiles_list : list of str
         Sanitized list of canonical isomeric SMILES strings.

    Examples
    --------

    Sanitize a simple list.
    >>> smiles_list = ['CC', 'CCC', '[H][C@]1(NC[C@@H](CC1CO[C@H]2CC[C@@H](CC2)O)N)[H]']

    Throw an exception if undefined stereochemistry is present.
    >>> sanitized_smiles_list = sanitizeSMILES(smiles_list, mode='exception')
    Traceback (most recent call last):
      ...
    Exception: Molecule '[H][C@]1(NC[C@@H](CC1CO[C@H]2CC[C@@H](CC2)O)N)[H]' has undefined stereocenters

    Drop molecules iwth undefined stereochemistry.
    >>> sanitized_smiles_list = sanitizeSMILES(smiles_list, mode='drop')
    >>> len(sanitized_smiles_list)
    2

    Expand molecules iwth undefined stereochemistry.
    >>> sanitized_smiles_list = sanitizeSMILES(smiles_list, mode='expand')
    >>> len(sanitized_smiles_list)
    4

    """
    from openeye.oechem import OEGraphMol, OESmilesToMol, OECreateIsoSmiString
    sanitized_smiles_set = set()
    for smiles in smiles_list:
        molecule = OEGraphMol()
        OESmilesToMol(molecule, smiles)

        if verbose:
            molecule.SetTitle(smiles)
            oechem.OETriposAtomNames(molecule)

        if has_undefined_stereocenters(molecule, verbose=verbose):
            if mode == 'drop':
                if verbose:
                    print("Dropping '%s' due to undefined stereocenters." % smiles)
                continue
            elif mode == 'exception':
                raise Exception("Molecule '%s' has undefined stereocenters" % smiles)
            elif mode == 'expand':
                if verbose:
                    print('Expanding stereochemistry:')
                    print('original: %s', smiles)
                molecules = enumerate_undefined_stereocenters(molecule, verbose=verbose)
                for molecule in molecules:
                    isosmiles = OECreateIsoSmiString(molecule)
                    if verbose: print('expanded: %s', isosmiles)
                    sanitized_smiles_set.add(isosmiles)
        else:
            # Convert to OpenEye's canonical isomeric SMILES.
            isosmiles = OECreateIsoSmiString(molecule)
            sanitized_smiles_set.add(isosmiles)

    sanitized_smiles_list = list(sanitized_smiles_set)
    return sanitized_smiles_list
예제 #4
0
 def getIsoSMILES(self):
     """ Return the cannonical stereo SMILES string derived from the current OE molecule.
     """
     return OECreateIsoSmiString(self.__oeMol)