Python OECreateIsoSmiString 예제들, openeye.oechem.OECreateIsoSmiString Python 예제들

예제 #1

0

파일 보기

    def test_repeating_molecules(self):
        """
        Test methods used to create minidrugbank
        """
        smiles = set()
        # check for repeating SMILES
        for idx, ff_mol in enumerate(TestMiniDrugBank.ff_mols):
            # get SMILES information
            ff_smile = oechem.OECreateIsoSmiString(ff_mol)
            tri_mol = TestMiniDrugBank.tripos_mols[idx]
            tri_smile = oechem.OECreateIsoSmiString(tri_mol)

            # SMILES should be the same for the two force fields
            self.assertEqual(
                ff_smile,
                tri_smile,
                msg=
                "SMILES for tripos molecule %s and parm@frosst molecule % should agree and don't"
                % (tri_mol.GetTitle(), ff_mol.GetTitle))

            # there should also be no repeating smiles
            self.assertFalse(
                (ff_smile in smiles),
                msg="Found repeating SMILES string for %s" % ff_mol.GetTitle())

            # add smiles to the list
            smiles.add(ff_smile)

예제 #2

0

파일 보기

    def descriptorToMol(self,
                        descr,
                        descrType,
                        limitPerceptions=False,
                        messageTag=None):
        """Parse the input descriptor string and return a molecule object (OeGraphMol/OeQMol).

        Args:
            descr (str): descriptor
            descrType (str): descriptor type
            limitPerceptions (bool): flag to limit the perceptions/transformations of input descriptor
            messageTag (srt, optional): prefix string for error messages. Defaults to None.

        Returns:
            object: OeGraphMol()/OeQmol() object or None for failure

            ifs.SetFlavor(oechem.OEFormat_PDB, oechem.OEIFlavor_PDB_Default | oechem.OEIFlavor_PDB_DATA | oechem.OEIFlavor_PDB_ALTLOC)  # noq
        """
        try:
            if "SMILES" in descrType.upper() and "ISO" in descrType.upper():
                oeMol = self.smilesToMol(descr,
                                         limitPerceptions=limitPerceptions,
                                         messageTag=messageTag)
                if oeMol:
                    isoSmiles = oechem.OECreateIsoSmiString(oeMol)
                    return self.smilesToMol(isoSmiles,
                                            limitPerceptions=limitPerceptions,
                                            messageTag=messageTag)
                else:
                    return None
            if "SMILES" in descrType.upper():
                oeMol = self.smilesToMol(descr,
                                         limitPerceptions=limitPerceptions,
                                         messageTag=messageTag)
                if oeMol:
                    smiles = oechem.OECreateCanSmiString(oeMol)
                    return self.smilesToMol(smiles,
                                            limitPerceptions=limitPerceptions,
                                            messageTag=messageTag)
                else:
                    return None
            elif "INCHI" in descrType.upper():
                oeMol = self.inchiToMol(descr,
                                        limitPerceptions=limitPerceptions,
                                        messageTag=messageTag)
                if oeMol:
                    isoSmiles = oechem.OECreateIsoSmiString(oeMol)
                    return self.smilesToMol(isoSmiles,
                                            limitPerceptions=limitPerceptions,
                                            messageTag=messageTag)
            elif "SMARTS" in descrType.upper():
                return self.smartsToQmol(descr, messageTag=messageTag)
            else:
                return None
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return None

예제 #3

0

파일 보기

def canonicalize_SMILES(smiles_list):
    """Ensure all SMILES strings end up in canonical form.
    Stereochemistry must already have been expanded.
    SMILES strings are converted to a OpenEye Topology and back again.
    Parameters
    ----------
    smiles_list : list of str
        List of SMILES strings
    Returns
    -------
    canonical_smiles_list : list of str
        List of SMILES strings, after canonicalization.
    """

    # Round-trip each molecule to a Topology to end up in canonical form
    from openmoltools.forcefield_generators import generateOEMolFromTopologyResidue, generateTopologyFromOEMol
    from perses.utils.openeye import smiles_to_oemol
    from openeye import oechem
    canonical_smiles_list = list()
    for smiles in smiles_list:
        molecule = smiles_to_oemol(smiles)
        topology = generateTopologyFromOEMol(molecule)
        residues = [ residue for residue in topology.residues() ]
        new_molecule = generateOEMolFromTopologyResidue(residues[0])
        new_smiles = oechem.OECreateIsoSmiString(new_molecule)
        canonical_smiles_list.append(new_smiles)
    return canonical_smiles_list

예제 #4

0

파일 보기

파일: check_param_coverage.py 프로젝트: simonmb/open-forcefield-data

def find_parameter_ids(filename: str, indices: set) -> \
        ({int : {"smiles": str, "ids": {"id": ["atom_indices"]}}}, set):
    """Finds the SMILES parameter ids associated with the molecule at each index
    Returns:
        - params_by_molecule: mapping from molecule indices to SMILES string and
            parameter ids
        - param_ids: set of all parameter ids found
    """
    logging.info("Finding parameters for molecules")

    params_by_molecule = {}
    param_ids = set()

    for mol, index in read_index_mols_from_file(filename, indices):
        oechem.OEAddExplicitHydrogens(mol)

        smiles = oechem.OECreateIsoSmiString(mol)
        logging.info("Looking at molecule %d => %s", index, smiles)

        params = get_smirnoff_params(mol)
        logging.info("Parameter IDs: %s", list(params.keys()))

        param_ids |= params.keys()

        params_by_molecule[index] = {
            "smiles": smiles,
            "ids": params,
        }

    return params_by_molecule, param_ids

예제 #5

0

파일 보기

def frag_to_smiles(frags, mol):
    """
    Convert fragments (AtomBondSet) to smiles string
    Parameters
    ----------
    frags
    mol

    Returns
    -------
    smiles: list of smiles strings

    """
    smiles = {}
    for frag in frags:
        fragatompred = oechem.OEIsAtomMember(frag.GetAtoms())
        fragbondpred = oechem.OEIsBondMember(frag.GetBonds())

        fragment = oechem.OEGraphMol()
        adjustHCount = True
        oechem.OESubsetMol(fragment, mol, fragatompred, fragbondpred,
                           adjustHCount)
        s = oechem.OECreateIsoSmiString(fragment)
        if s not in smiles:
            smiles[s] = []
        smiles[s].append(frag)
    return smiles

예제 #6

0

파일 보기

    def descriptorToSmiles(self,
                           descr,
                           descrType,
                           limitPerceptions=False,
                           messageTag=None):
        """Parse the input descriptor string and return an OE smiles.

        Args:
            descr (str): descriptor
            descrType (str): descriptor type
            limitPerceptions (bool): flag to limit the perceptions/transformations of input descriptor
            messageTag (srt, optional): prefix string for error messages. Defaults to None.

        Returns:
            str: SMILES string
        """
        try:
            if "SMILES" in descrType.upper() and "ISO" in descrType.upper():
                oeMol = self.smilesToMol(descr,
                                         limitPerceptions=limitPerceptions,
                                         messageTag=messageTag)
                if oeMol:
                    return oechem.OECreateIsoSmiString(oeMol)
                else:
                    return None
            if "SMILES" in descrType.upper():
                oeMol = self.smilesToMol(descr,
                                         limitPerceptions=limitPerceptions,
                                         messageTag=messageTag)
                if oeMol:
                    return oechem.OECreateCanSmiString(oeMol)
                else:
                    return None
            elif "INCHI" in descrType.upper():
                oeMol = self.inchiToMol(descr,
                                        limitPerceptions=limitPerceptions,
                                        messageTag=messageTag)
                if oeMol:
                    return oechem.OECreateIsoSmiString(oeMol)
            else:
                return None
        except Exception as e:
            logger.exception("Failing with %s", str(e))
        return None

예제 #7

0

파일 보기

파일: test_topology_proposal.py 프로젝트: mrauha/perses

def test_molecular_atom_mapping():
    """
    Test the creation of atom maps between pairs of molecules from the JACS benchmark set.

    """
    from openeye import oechem
    from perses.rjmc.topology_proposal import SmallMoleculeSetProposalEngine
    from perses.tests.utils import createOEMolFromSMILES
    from perses.tests.utils import render_atom_mapping
    from itertools import combinations

    # Test mappings for JACS dataset ligands
    for dataset_name in [
            'CDK2'
    ]:  #, 'p38', 'Tyk2', 'Thrombin', 'PTP1B', 'MCL1', 'Jnk1', 'Bace']:
        # Read molecules
        dataset_path = 'data/schrodinger-jacs-datasets/%s_ligands.sdf' % dataset_name
        mol2_filename = resource_filename('perses', dataset_path)
        ifs = oechem.oemolistream(mol2_filename)
        molecules = list()
        for mol in ifs.GetOEGraphMols():
            molecules.append(oechem.OEGraphMol(mol))

        # Build atom map for some transformations.
        #for (molecule1, molecule2) in combinations(molecules, 2): # too slow
        molecule1 = molecules[0]
        for i, molecule2 in enumerate(molecules[1:]):
            new_to_old_atom_map = SmallMoleculeSetProposalEngine._get_mol_atom_map(
                molecule1, molecule2)
            # Make sure we aren't mapping hydrogens onto anything else
            atoms1 = [atom for atom in molecule1.GetAtoms()]
            atoms2 = [atom for atom in molecule2.GetAtoms()]
            #for (index2, index1) in new_to_old_atom_map.items():
            #    atom1, atom2 = atoms1[index1], atoms2[index2]
            #    if (atom1.GetAtomicNum()==1) != (atom2.GetAtomicNum()==1):
            filename = 'mapping-error-%d.png' % i
            render_atom_mapping(filename, molecule1, molecule2,
                                new_to_old_atom_map)
            #msg = 'Atom atomic number %d is being mapped to atomic number %d\n' % (atom1.GetAtomicNum(), atom2.GetAtomicNum())
            msg = 'molecule 1 : %s\n' % oechem.OECreateIsoSmiString(molecule1)
            msg += 'molecule 2 : %s\n' % oechem.OECreateIsoSmiString(molecule2)
            msg += 'Wrote atom mapping to %s for inspection; please check this.' % filename
            msg += str(new_to_old_atom_map)
            print(msg)

예제 #8

0

파일 보기

def filter_molecules(input_molstream,
                     output_molstream,
                     allow_repeats=False,
                     allow_warnings=False,
                     max_heavy_atoms=100,
                     remove_smirks=list(),
                     max_metals=0,
                     explicitHs=True,
                     elements=None,
                     check_type=None):
    """
    Takes input file and removes molecules using given criteria then
    writes a new output file
    """
    errs = oechem.oeosstream()
    oechem.OEThrow.SetOutputStream(errs)

    molecule = oechem.OECreateOEGraphMol()
    smiles = list()

    count = 0
    warnings = 0
    smile_count = 0
    saved = 0

    while oechem.OEReadMolecule(input_molstream, molecule):
        count += 1
        if ("warning" in errs.str().lower()) and not allow_warnings:
            warnings += 1
            errs.clear()
            continue

        smi = oechem.OECreateIsoSmiString(molecule)
        mol_copy = oechem.OEMol(molecule)
        if explicitHs:
            oechem.OEAddExplicitHydrogens(mol_copy)
        new_smile = smi not in smiles
        if not new_smile:
            smile_count += 1

        if new_smile or allow_repeats:
            keep = keep_molecule(mol_copy, max_heavy_atoms, remove_smirks,
                                 max_metals, elements, check_type)
            if keep:
                smiles.append(smi)
                oechem.OEWriteMolecule(output_molstream, mol_copy)
                saved += 1
        errs.clear()

    print(f"{count} molecules in input stream")
    print(f"{warnings} molecules resulted in warnings when parsing")
    print(f"{smile_count} molecules were had repeated isomeric SMILES")
    print(f"{saved} molecules saved")

예제 #9

0

파일 보기

 def __makeChemCompDescriptorCategory(self, ccId, oeMol):
     """
         loop_
         _pdbx_chem_comp_descriptor.comp_id
         _pdbx_chem_comp_descriptor.type
         _pdbx_chem_comp_descriptor.program
         _pdbx_chem_comp_descriptor.program_version
         _pdbx_chem_comp_descriptor.descriptor
         ARG SMILES           ACDLabs              10.04 "O=C(O)C(N)CCCNC(=[NH2+])N"
         ARG SMILES_CANONICAL CACTVS               3.341 "N[C@@H](CCCNC(N)=[NH2+])C(O)=O"
         ARG SMILES           CACTVS               3.341 "N[CH](CCCNC(N)=[NH2+])C(O)=O"
         ARG SMILES_CANONICAL "OpenEye OEToolkits" 1.5.0 "C(C[C@@H](C(=O)O)N)CNC(=[NH2+])N"
         ARG SMILES           "OpenEye OEToolkits" 1.5.0 "C(CC(C(=O)O)N)CNC(=[NH2+])N"
         ARG InChI            InChI                1.03  "InChI=1S/C6H14N4O2/c7-4(5(11)12)2-1-3-1..... "
         ARG InChIKey         InChI                1.03  ODKSFYDXXFIFQN-BYPYZUCNSA-O
     #
     """
     rowL = []
     #
     aRow = {}
     aRow["comp_id"] = ccId
     aRow["type"] = "SMILES_CANONICAL"
     aRow["program"] = "OpenEye OEToolkits"
     aRow["program_version"] = self.__oeVersion
     aRow["descriptor"] = oechem.OECreateIsoSmiString(oeMol)
     rowL.append(aRow)
     #
     aRow = {}
     aRow["comp_id"] = ccId
     aRow["type"] = "SMILES"
     aRow["program"] = "OpenEye OEToolkits"
     aRow["program_version"] = self.__oeVersion
     aRow["descriptor"] = oechem.OECreateCanSmiString(oeMol)
     rowL.append(aRow)
     #
     aRow = {}
     aRow["comp_id"] = ccId
     aRow["type"] = "InChI"
     aRow["program"] = "OpenEye OEToolkits"
     aRow["program_version"] = self.__oeVersion
     aRow["descriptor"] = oechem.OECreateInChI(oeMol)
     rowL.append(aRow)
     #
     aRow = {}
     aRow["comp_id"] = ccId
     aRow["type"] = "InChIKey"
     aRow["program"] = "OpenEye OEToolkits"
     aRow["program_version"] = self.__oeVersion
     aRow["descriptor"] = oechem.OECreateInChIKey(oeMol)
     rowL.append(aRow)
     #
     return rowL

예제 #10

0

파일 보기

    def standardizeSmiles(self, smiles, type="ISOMERIC"):  # pylint: disable=redefined-builtin
        """ Return a standardized SMILES (type) or None
        """
        smilesOut = None
        try:
            mol = oechem.OEGraphMol()
            if (oechem.OEParseSmiles(mol, smiles) == 1):
                oechem.OEAssignAromaticFlags(mol)
                if type == "CANNONICAL":
                    smilesOut = oechem.OECreateCanSmiString(mol)
                elif type == "ISOMERIC":
                    smilesOut = oechem.OECreateIsoSmiString(mol)
            else:
                logger.error("Unable to parse input SMILES '%s'", smiles)

        except Exception as e:
            logger.exception("Error '%s' occured. Arguments %s.", str(e), e.args)

        return smilesOut

예제 #11

0

파일 보기

파일: coordinates_for_DrugBank.py 프로젝트: wade1990/openforcefield

c_mol = oechem.OECreateOEGraphMol()
while oechem.OEReadMolecule(ifs, c_mol):
    index += 1
    # process molecules individually, storing less
    p = multiprocessing.Process(target=genConfs,
                                args=(
                                    c_mol,
                                    ofsff,
                                    ofsTri,
                                    index,
                                ))
    p.start()
    p.join(24)
    if p.is_alive():
        print("TIMED OUT %s" % oechem.OECreateIsoSmiString(c_mol))
        oechem.OEWriteConstMolecule(ofsFail, oechem.OEMol(c_mol))
        time_out += 1
        p.terminate()
        p.join()
    elif p.exitcode:
        success += 1
        p.terminate()
        p.join()
    else:
        print("CONF FAIL %s" % oechem.OECreateIsoSmiString(c_mol))
        oechem.OEWriteConstMolecule(ofsFail, oechem.OEMol(c_mol))
        conf_fail += 1
        p.terminate()
        p.join()

예제 #12

0

파일 보기

파일: rms_calculator.py 프로젝트: driesvr/fragment_expansion

 def __init__(self, refmol):
     self.refmol = oechem.OEGraphMol(refmol)
     self.ss = oechem.OESubSearch(oechem.OECreateIsoSmiString(refmol))
     self.ref_match = self.get_match(self.refmol)

예제 #13

0

파일 보기

파일: filter_molecules.py 프로젝트: techeye220/off-ffcompare

def eMolecules_filtering(input_f, current_smiles = list()):
    """
    This function was used to filter eMolecules database
    and the eMolecules_incremental database.
    It creates all the filtered output files with 1000 molecules
    in each sdf file and 1,000,000 molecule-ID to smiles strings in each
    text file

    Parameter
    ---------
    input_f : string "path/to/inputfile.sdf"
    current_smiles : list of strings; smiles already in your molecule sets
    """
    set_name = input_f.split('.')[0]
    output_f = set_name+"_%i.sdf"
    smiles_base = set_name+"_%i.txt"
    molecule_name = set_name+"_%i_%i"

    # Load and check input file
    ifs = oechem.oemolistream(input_f)
    if not ifs.IsValid():
        raise Exception("Error: input_file (%s) was not valid" % input_f)

    errs = oechem.oeosstream()
    oechem.OEThrow.SetOutputStream(errs)

    molecule = oechem.OECreateOEGraphMol()
    count = 0
    smile_count = 0
    saved = 0
    switch = False

    # first output file
    current_letter = 1000
    ofs_file = output_f%current_letter
    ofs = oechem.oemolostream(ofs_file)
    if not ofs.IsValid():
        raise Exception("output file %s is not valid" % ofs_file)
    add_smiles = open(smiles_base % current_letter, 'a')

    while oechem.OEReadMolecule(ifs, molecule):
        # count input file molecules
        count +=1

        if switch: # If True create new output file
            switch = False
            ofs.close()
            current_letter += 1
            ofs_file = output_f % current_letter
            # Load and check output file
            ofs = oechem.oemolostream(ofs_file)
            if not ofs.IsValid():
                raise Exception("output file %s is not valid" % ofs_file)
            print("Switching to file %s, currently saved %i molecules" % (ofs_file, saved))
            if current_letter%100 == 0:
                add_smiles.close()
                add_smiles = open(smiles_base % current_letter, 'a')

        # IF smiles in current list skip the molecule
        smi = oechem.OECreateIsoSmiString(molecule)
        if smi in current_smiles:
            smile_count += 1
            continue

        # Make copy of molecule before making changes
        mol_copy = oechem.OEMol(molecule)
        oechem.OEAddExplicitHydrogens(mol_copy)
        # if the molecule meets our requirements save to current output
        if keep_molecule(mol_copy):
            mol_title = molecule_name % (current_letter,count)
            mol_copy.SetTitle(mol_title)
            add_smiles.writelines("%s\t\t%s\n" % (mol_title, smi))
            oechem.OEWriteMolecule(ofs, mol_copy)
            saved += 1
            if saved%1000 == 0:
                switch = True

    print("%i molecules in input file" % (count))
    print("%i molecules were had repeated isomeric SMILES" % smile_count)
    print("%i molecules saved to output files" % (saved))

    ifs.close()
    ofs.close()

예제 #14

0

파일 보기

파일: filter_molecules.py 프로젝트: techeye220/off-ffcompare

def check_valence(mol):
    """
    Checks for hypervalency
    Parameter
    ---------
    mol - OEMol()

    Return
    ------
    boolean - True (no inappropriate valency)
              False (an atom with atomic number < 10 has > 4 Valence)
    """
    for atom in mol.GetAtoms():
        atomNum = atom.GetAtomicNum()
        # find number of neighbors to this atom
        valence = atom.GetValence()
        if atomNum <= 10: # first row elements
            if valence > 4:
                print("Found a #%i atom with valence %i in molecule %s" % (atomNum, valence, oechem.OECreateIsoSmiString(mol)))
                return False
    return True

예제 #15

0

파일 보기

파일: filter_molecules.py 프로젝트: techeye220/off-ffcompare

        # count input file
        count +=1

        if switch: # If True, open new output file
            switch = False
            ofs.close()
            current_letter = letters.pop(0)
            ofs_file = output_f % current_letter
            # Load and check output file
            ofs = oechem.oemolostream(ofs_file)
            if not ofs.IsValid():
                raise Exception("output file %s is not valid" % ofs_file)
            print("Switching to file %s, currently saved %i molecules" % (ofs_file, saved))

        # get isomeric smiles string
        smi = oechem.OECreateIsoSmiString(molecule)
        # if it isn't a new molecule skip it
        if smi in current_smiles:
            smile_count += 1
            continue

        # create and save molecule name in form DrugBank_[letter][number]
        mol_title = molecule_name % (current_letter,count)

        # Make copy before making changes to molecule
        mol_copy = oechem.OEMol(molecule)
        mol_copy.SetTitle(mol_title)
        oechem.OEAddExplicitHydrogens(mol_copy)
        # Determine if molecule meets requirements
        keep = keep_molecule(mol_copy)
        if keep:

예제 #16

0

파일 보기

def get_molecule_parameterIDs(oemols, ffxml):
    """Process a list of oemols with a specified SMIRNOFF ffxml file and determine which parameters are used by which molecules, returning collated results.


    Parameters
    ----------
    oemols : list
        List of OpenEye OEChem molecules to parse; must have explicit hydrogens.

    Returns
    -------
    parameters_by_molecule : dict
        Parameter IDs used in each molecule, keyed by isomeric SMILES
        generated from provided OEMols. Each entry in the dict is a list
        which does not necessarily have unique entries; i.e. parameter IDs
        which are used more than once will occur multiple times.

    parameters_by_ID : dict
        Molecules in which each parameter ID occur, keyed by parameter ID.
        Each entry in the dict is a set of isomeric SMILES for molecules
        in which that parameter occurs. No frequency information is stored.

    """

    # Create storage
    parameters_by_molecule = {}
    parameters_by_ID = {}

    # Generate isomeric SMILES
    isosmiles = list()
    for mol in oemols:
        smi = oechem.OECreateIsoSmiString(mol)
        if not smi in isosmiles:
            isosmiles.append(smi)
        # If the molecule is already here, raise exception
        else:
            raise ValueError(
                "Error: get_molecule_parameterIDs has been provided a list of oemols which contains the same molecule, having isomeric smiles %s, more than once."
                % smi)
    # Label molecules
    ff = ForceField(ffxml)
    labels = ff.labelMolecules(oemols)

    # Organize labels into output dictionary by looping over all molecules/smiles
    for idx in range(len(isosmiles)):
        # Pull smiles, initialize storage
        smi = isosmiles[idx]
        parameters_by_molecule[smi] = []

        # Organize data for this molecule
        data = labels[idx]
        for force_type in data.keys():
            for (atom_indices, pid, smirks) in data[force_type]:
                # Store pid to molecule
                parameters_by_molecule[smi].append(pid)

                # Store which molecule this pid occurred in
                if pid not in parameters_by_ID:
                    parameters_by_ID[pid] = set()
                    parameters_by_ID[pid].add(smi)
                else:
                    parameters_by_ID[pid].add(smi)

    return parameters_by_molecule, parameters_by_ID

예제 #17

0

파일 보기

파일: filter_molecule_sets.py 프로젝트: wade1990/openforcefield

def check_atomtype(mol, types):
    for atom in mol.GetAtoms():
        if atom.GetType() in types:
            print("Found type %s atom in molecule %s" % (atom.GetType(), oechem.OECreateIsoSmiString(mol)))
            return False
    return True

예제 #18

0

파일 보기

파일: testOeDepictCompare.py 프로젝트: rcsb/py-rcsb_utils_chem

    def __testReproduceDescriptors(self, molBuildType, limitPerceptions=True):
        #
        ccMolD, ccIdxD = self.__getChemCompDefs()
        oemf = OeMoleculeFactory()
        countD = defaultdict(int)
        for ccId, ccDef in ccMolD.items():
            tId = oemf.setChemCompDef(ccDef)
            if ccId != tId:
                continue
            oemf.build(molBuildType=molBuildType,
                       limitPerceptions=limitPerceptions)
            oeMol = oemf.getMol()
            #
            countD["total components"] += 1
            if ccId not in ccIdxD:
                logger.info("Missing ccIndex entry for %s", ccId)
                continue
            ccdD = ccIdxD[ccId]
            if ccdD["ambiguous"]:
                countD["ambiguous component"] += 1
                continue
            #
            countD["total molecules"] += 1

            nativeCanIsoSmiles = oechem.OECreateIsoSmiString(oeMol)
            canIsoSmiles = oechem.OEMolToSmiles(oeMol)
            isoSmiles = oemf.getIsoSMILES()
            canSmiles = oemf.getCanSMILES()
            # check interal consistency
            if nativeCanIsoSmiles != isoSmiles:
                logger.error("%s stored and calculated OE smiles differ %s %s",
                             ccId, nativeCanIsoSmiles, isoSmiles)
            if canIsoSmiles != isoSmiles:
                logger.error(
                    "%s calculated OE ISO and canonical smiles differ %s %s",
                    ccId, isoSmiles, canIsoSmiles)

            # compare with archived values
            if isoSmiles != ccdD["oe-iso-smiles"]:
                logger.info("%s ISO SMILES differ \nccd: %r  \nOE:  %r", ccId,
                            ccdD["oe-iso-smiles"], isoSmiles)
                countD["iso_smiles_diff"] += 1
            # ----------
            if canSmiles != ccdD["oe-smiles"]:
                logger.info("%s CAN SMILES differ \nccd: %r  \nOE:  %r", ccId,
                            ccdD["oe-smiles"], canSmiles)
                countD["smiles_diff"] += 1

            formula = oemf.getFormula()
            if formula.upper() != ccdD["formula"].upper():
                logger.debug("%s formulas differ \nccd: %r  \nOE:  %r", ccId,
                             ccdD["formula"], formula)
                countD["formula_diff"] += 1
            # ---------
            inchiKey = oemf.getInChIKey()
            if inchiKey != ccdD["inchikey"]:
                logger.debug("%s InChI keys differ \nccd: %r  \nOE:  %r", ccId,
                             ccdD["inchikey"], inchiKey)
                countD["inchikey_diff"] += 1
            #
            inchi = oemf.getInChI()
            if inchi != ccdD["inchi"]:
                logger.debug("%s InChIs differ \nccd: %r  \nOE:  %r", ccId,
                             ccdD["inchi"], inchi)
                countD["inchi_diff"] += 1
        #
        #
        for ky, vl in countD.items():
            logger.info("%-12s %6d", ky, vl)