def sanitizeOEMolecule(molecule): """ This function checks if the molecule has coordinates, explicit hydrogens, aromaticity missing and not unique atom names. If the molecule does not have coordinates a fatal error is raised. If the molecule does not have hydrogens or aramatic flags are missing then a copy of the molecule is fixed, if missing or not unique atom names are found then a copy of the molecule is fixed Parameters: ----------- molecule: OEMol The molecule to be checked Return: ------- mol_copy: OEMol A copy of the checked molecule with fixed aromaticity, hydrogens and unique atom names if they are missing """ mol_copy = oechem.OEMol(molecule) # Check if the molecule has 3D coordinates if not mol_copy.NumAtoms() == 1: # Mono-atomic molecules are skipped if not oechem.OEGetDimensionFromCoords(mol_copy): raise ValueError("The molecule coordinates are set to zero") # Check if the molecule has hydrogens if not oechem.OEHasExplicitHydrogens(mol_copy): oechem.OEAddExplicitHydrogens(mol_copy) # Check if the molecule has assigned aromaticity if not mol_copy.HasPerceived(oechem.OEPerceived_Aromaticity): # oechem.OEAssignAromaticFlags(mol_copy, oechem.OEAroModelOpenEye) oechem.OEAssignAromaticFlags(mol_copy, oechem.OEAroModelMDL) if not mol_copy.HasPerceived(oechem.OEPerceived_Chiral): oechem.OEPerceiveChiral(mol_copy) # Check for any missing and not unique atom names. # If found reassign all of them as Tripos atom names atm_list_names = [] for atom in mol_copy.GetAtoms(): atm_list_names.append(atom.GetName()) reassign_names = False if len(set(atm_list_names)) != len(atm_list_names): reassign_names = True if '' in atm_list_names: reassign_names = True if reassign_names: oechem.OETriposAtomNames(mol_copy) return mol_copy
def rdmol_from_oemol(oemol): """ Create an RDKit molecule identical to the input OpenEye molecule. Reference --------- Written by Caitlin Bannan: https://gist.github.com/bannanc/810ccc4636b930a4522636baab1965a6 May not be needed in newer openforcefield versions. See: https://github.com/openforcefield/openforcefield/issues/135 Parameters ---------- oemol : OEMol """ #print("Starting OpenEye molecule: ", oechem.OEMolToSmiles(oemol)) # start function rdmol = Chem.RWMol() # RDKit keeps bond order as a type instead using these values, I don't really understand 7, # I took them from Shuzhe's example linked above _bondtypes = { 1: Chem.BondType.SINGLE, 1.5: Chem.BondType.AROMATIC, 2: Chem.BondType.DOUBLE, 3: Chem.BondType.TRIPLE, 4: Chem.BondType.QUADRUPLE, 5: Chem.BondType.QUINTUPLE, 6: Chem.BondType.HEXTUPLE, 7: Chem.BondType.ONEANDAHALF, } # atom map lets you find atoms again map_atoms = dict() # {oe_idx: rd_idx} for oea in oemol.GetAtoms(): oe_idx = oea.GetIdx() rda = Chem.Atom(oea.GetAtomicNum()) rda.SetFormalCharge(oea.GetFormalCharge()) rda.SetIsAromatic(oea.IsAromatic()) # unlike OE, RDK lets you set chirality directly cip = oechem.OEPerceiveCIPStereo(oemol, oea) if cip == oechem.OECIPAtomStereo_S: rda.SetChiralTag(Chem.CHI_TETRAHEDRAL_CW) if cip == oechem.OECIPAtomStereo_R: rda.SetChiralTag(Chem.CHI_TETRAHEDRAL_CCW) map_atoms[oe_idx] = rdmol.AddAtom(rda) # As discussed above, setting bond stereochemistry requires neighboring bonds # so we will store that information by atom index in this list stereo_bonds = list() # stereo_bonds will have tuples with the form (rda1, rda2, rda3, rda4, is_cis) # where rda[n] is an atom index for a double bond of form 1-2=3-4 # and is_cis is a Boolean is True then onds 1-2 and 3-4 are cis to each other for oeb in oemol.GetBonds(): # get neighboring rd atoms rd_a1 = map_atoms[oeb.GetBgnIdx()] rd_a2 = map_atoms[oeb.GetEndIdx()] # AddBond returns the total number of bonds, so addbond and then get it rdmol.AddBond(rd_a1, rd_a2) rdbond = rdmol.GetBondBetweenAtoms(rd_a1, rd_a2) # Assign bond type, which is based on order unless it is aromatic order = oeb.GetOrder() if oeb.IsAromatic(): rdbond.SetBondType(_bondtypes[1.5]) rdbond.SetIsAromatic(True) else: rdbond.SetBondType(_bondtypes[order]) rdbond.SetIsAromatic(False) # If the bond has specified stereo add the required information to stereo_bonds if oeb.HasStereoSpecified(oechem.OEBondStereo_CisTrans): # OpenEye determined stereo based on neighboring atoms so get two outside atoms n1 = [n for n in oeb.GetBgn().GetAtoms() if n != oeb.GetEnd()][0] n2 = [n for n in oeb.GetEnd().GetAtoms() if n != oeb.GetBgn()][0] rd_n1 = map_atoms[n1.GetIdx()] rd_n2 = map_atoms[n2.GetIdx()] stereo = oeb.GetStereo([n1, n2], oechem.OEBondStereo_CisTrans) if stereo == oechem.OEBondStereo_Cis: print('cis') stereo_bonds.append((rd_n1, rd_a1, rd_a2, rd_n2, True)) elif stereo == oechem.OEBondStereo_Trans: print('trans') stereo_bonds.append((rd_n1, rd_a1, rd_a2, rd_n2, False)) # add bond stereochemistry: for (rda1, rda2, rda3, rda4, is_cis) in stereo_bonds: # get neighbor bonds bond1 = rdmol.GetBondBetweenAtoms(rda1, rda2) bond2 = rdmol.GetBondBetweenAtoms(rda3, rda4) # Since this is relative, the first bond always goes up # as explained above these names come from SMILES slashes so UP/UP is Trans and Up/Down is cis bond1.SetBondDir(Chem.BondDir.ENDUPRIGHT) if is_cis: bond2.SetBondDir(Chem.BondDir.ENDDOWNRIGHT) else: bond2.SetBondDir(Chem.BondDir.ENDUPRIGHT) # if oemol has coordinates (The dimension is non-zero) # add those coordinates to the rdmol if oechem.OEGetDimensionFromCoords(oemol) > 0: conformer = Chem.Conformer() oecoords = oemol.GetCoords() for oe_idx, rd_idx in map_atoms.items(): (x, y, z) = oecoords[oe_idx] conformer.SetAtomPosition(rd_idx, Geometry.Point3D(x, y, z)) rdmol.AddConformer(conformer) # Save the molecule title rdmol.SetProp("_Name", oemol.GetTitle()) # Cleanup the rdmol # Note I copied UpdatePropertyCache and GetSSSR from Shuzhe's code to convert oemol to rdmol here: rdmol.UpdatePropertyCache(strict=False) Chem.GetSSSR(rdmol) # I added AssignStereochemistry which takes the directions of the bond set # and assigns the stereochemistry tags on the double bonds Chem.AssignStereochemistry(rdmol, force=False) #print("Final RDKit molecule: ", Chem.MolToSmiles(Chem.RemoveHs(rdmol), isomericSmiles=True)) return rdmol.GetMol()