def SetMolProp(self, value, pname, ptype='s', plen=6, floatPoint=3): if not self._pmp: logger.warning('SetAtomsProp: No PMP information!') return lines = self._pmp.splitlines() plen = str(plen) newpmp = "" if ptype == 'str' or ptype == 'string' or ptype == 's': ptype = 's' elif ptype == 'float' or ptype == 'double' or ptype == 'f': ptype = 'f' elif ptype == 'int' or ptype == 'long' or ptype == 'i' or ptype == 'd': ptype = 'd' else: ptype = 's' ## Formatting string format if ptype != 'f': sformat = "{:>" + plen + ptype + "}" else: sformat = "{:>" + plen + "." + str(floatPoint) + ptype + "}" for line in lines: if line[:17] == "REMARK 666 LABELS": newpmp += "REMARK 111 MOLP: " + pname + ": " + ptype + ": " + sformat.format( value) + "\n" newpmp += line + '\n' self.SetMolValue(value, pname, ptype) else: newpmp += line + '\n' self._pmp = newpmp
def PMPfromPDBblock(self, PDBblock): '''Convert PDB Block to PMP format. PDB block can be generated from Chem.MolToPDBBlock(mol) ''' lines = PDBblock.splitlines() connectDict = {} connectAtoms = [] for line in lines: if line[:6] == 'CONECT': tmp = line[6:].rstrip() # Maybe some bug if len(tmp) % 5 != 0: logger.warning( "PMPfromPDBblock(): Connect length maybe something wrong!" ) if len(tmp) / 5 >= 2: atomid = tmp[:5].strip() connectAtoms.append(atomid) connectDict[atomid] = [tmp[5:10].strip()] for i in range(len(tmp) / 5 - 2): connectDict[atomid].append(tmp[10 + 5 * i:15 + 5 * i].strip()) connectDict[atomid] = list( OrderedDict.fromkeys(connectDict[atomid])) else: logger.warning( "PMPfromPDBblock(): Connect length maybe too short!") newblock = "" firstline = False for line in lines: if line[:6] == "ATOM " or line[:6] == "HETATM": if not firstline: newblock += "REMARK 666 LABELS|PMP_INFO:80s\n" firstline = True atomid = line[6:11].strip() if connectDict.has_key(atomid): connects = connectDict[atomid] bondnum = len(connects) newblock += line[:55] + str(bondnum) if bondnum > 4: newblock += " " * 20 else: for i in range(4): if i <= bondnum - 1: newblock += "%5s" % connects[i] else: newblock += " " * 5 newblock += line[76:] + "\n" else: newblock += line[:55] + "0" + " " * 20 + line[76:] + "\n" else: newblock += line + "\n" return newblock
def MolMatchBondByMol2File(self, mol2file, mol=None): '''Using ref mol2 file to modify the bond type of given molecule "mol". If "mol" not given, using the inner molecule and update it! ''' refmol = Chem.MolFromMol2File(mol2file, sanitize=True, removeHs=False) if mol: mdone = self.AssignBondOrdersFromTemplate(refmol, mol) return mdone elif self._mol: mdone = self.AssignBondOrdersFromTemplate(refmol, self._mol) pdbblock = Chem.MolToPDBBlock(mdone, flavor=4) self._pmp = self.PMPfromPDBblock(pdbblock) self._mol = mdone return mdone else: logger.warning( "MolMatchBondByMol2File: No input molecule was given!") return None
def GetMolsFromSDFile(dataFilename, errFile, nameProp): suppl = Chem.SDMolSupplier(dataFilename) for idx, m in enumerate(suppl): if not m: if errFile: if hasattr(suppl, 'GetItemText'): d = suppl.GetItemText(idx) errFile.write(d) else: logger.warning('full error file support not complete') continue smi = Chem.MolToSmiles(m, True) if m.HasProp(nameProp): nm = m.GetProp(nameProp) if not nm: logger.warning('molecule found with empty name property') else: nm = 'Mol_%d' % (idx + 1) yield nm, smi, m
def GetMolsFromSDFile(dataFilename,errFile,nameProp): suppl = Chem.SDMolSupplier(dataFilename) for idx,m in enumerate(suppl): if not m: if errFile: if hasattr(suppl,'GetItemText'): d = suppl.GetItemText(idx) errFile.write(d) else: logger.warning('full error file support not complete') continue smi = Chem.MolToSmiles(m,True) if m.HasProp(nameProp): nm = m.GetProp(nameProp) if not nm: logger.warning('molecule found with empty name property') else: nm = 'Mol_%d'%(idx+1) yield nm,smi,m
def MolMatchBondBySmiles(self, smiles, mol=None): '''Using ref smiles to modify the bond type of given molecule "mol". If "mol" not given, using the inner molecule and update it! ''' # TODO: Some bug may be wrong even using # "AllChem.RemoveHs(,updateExplicitCount=True)" # to remove H. If smiles contain explicit H, can't match!! # refmol = Chem.MolFromSmiles(smiles, sanitize=True) refmol = Chem.MolFromSmiles(Chem.MolToSmiles(refmol), sanitize=True) if mol: mdone = self.AssignBondOrdersFromTemplate(refmol, mol) return mdone elif self._mol: mdone = self.AssignBondOrdersFromTemplate(refmol, self._mol) pdbblock = Chem.MolToPDBBlock(mdone, flavor=4) self._pmp = self.PMPfromPDBblock(pdbblock) self._mol = mdone return mdone else: logger.warning( "MolMatchBondBySmiles: No input molecule was given!") return None
def SetAtomsProp(self, values, pname, ptype='s', plen=6, floatPoint=3): '''Set Atom property : values, property name, property type, string length''' if len(values) != self._mol.GetNumAtoms(): logger.warning('SetAtomsProp: ' + self._mol.GetProp('Filename') + 'The length of values not equal to atom number!') return if not self._pmp: logger.warning('SetAtomsProp: No PMP information!') return lines = self._pmp.splitlines() newpmp = "" findLabel = False plen = str(plen) if ptype == 'str' or ptype == 'string' or ptype == 's': ptype = 's' elif ptype == 'float' or ptype == 'double' or ptype == 'f': ptype = 'f' elif ptype == 'int' or ptype == 'long' or ptype == 'i' or ptype == 'd': ptype = 'd' else: ptype = 's' ## Formatting string format if ptype != 'f': sformat = "{:>" + plen + ptype + "}" else: sformat = "{:>" + plen + "." + str(floatPoint) + ptype + "}" atomcount = 0 for line in lines: if not findLabel and line[:17] == "REMARK 666 LABELS": newpmp += line + "|" + pname + ":" + plen + ptype + '\n' elif line[:6] == "ATOM " or line[:6] == "HETATM": newpmp += line + sformat.format(values[atomcount]) + '\n' self.SetAtomValue(atomcount, values[atomcount], pname, ptype) atomcount += 1 else: newpmp += line + '\n' self._pmp = newpmp
def AssignBondOrdersFromTemplate(refmol, mol): """ assigns bond orders to a molecule based on the bond orders in a template molecule Arguments - refmol: the template molecule - mol: the molecule to assign bond orders to An example, start by generating a template from a SMILES and read in the PDB structure of the molecule >>> import os >>> from rdkit.Chem import AllChem >>> template = AllChem.MolFromSmiles("CN1C(=NC(C1=O)(c2ccccc2)c3ccccc3)N") >>> mol = AllChem.MolFromPDBFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '4DJU_lig.pdb')) >>> len([1 for b in template.GetBonds() if b.GetBondTypeAsDouble() == 1.0]) 8 >>> len([1 for b in mol.GetBonds() if b.GetBondTypeAsDouble() == 1.0]) 22 Now assign the bond orders based on the template molecule >>> newMol = AllChem.AssignBondOrdersFromTemplate(template, mol) >>> len([1 for b in newMol.GetBonds() if b.GetBondTypeAsDouble() == 1.0]) 8 Note that the template molecule should have no explicit hydrogens else the algorithm will fail. It also works if there are different formal charges (this was github issue 235): >>> template=AllChem.MolFromSmiles('CN(C)C(=O)Cc1ccc2c(c1)NC(=O)c3ccc(cc3N2)c4ccc(c(c4)OC)[N+](=O)[O-]') >>> mol = AllChem.MolFromMolFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '4FTR_lig.mol')) >>> AllChem.MolToSmiles(mol) 'COC1CC(C2CCC3C(O)NC4CC(CC(O)N(C)C)CCC4NC3C2)CCC1N(O)O' >>> newMol = AllChem.AssignBondOrdersFromTemplate(template, mol) >>> AllChem.MolToSmiles(newMol) 'COc1cc(-c2ccc3c(c2)Nc2ccc(CC(=O)N(C)C)cc2NC3=O)ccc1[N+](=O)[O-]' """ refmol2 = rdchem.Mol(refmol) mol2 = rdchem.Mol(mol) # do the molecules match already? matching = mol2.GetSubstructMatch(refmol2) if not matching: # no, they don't match # check if bonds of mol are SINGLE for b in mol2.GetBonds(): if b.GetBondType() != BondType.SINGLE: b.SetBondType(BondType.SINGLE) b.SetIsAromatic(False) # set the bonds of mol to SINGLE for b in refmol2.GetBonds(): b.SetBondType(BondType.SINGLE) b.SetIsAromatic(False) # set atom charges to zero; for a in refmol2.GetAtoms(): a.SetFormalCharge(0) for a in mol2.GetAtoms(): a.SetFormalCharge(0) matching = mol2.GetSubstructMatches(refmol2, uniquify=False) # do the molecules match now? if matching: if len(matching) > 1: logger.warning( "More than one matching pattern found - picking one") matching = matching[0] # apply matching: set bond properties for b in refmol.GetBonds(): atom1 = matching[b.GetBeginAtomIdx()] atom2 = matching[b.GetEndAtomIdx()] b2 = mol2.GetBondBetweenAtoms(atom1, atom2) b2.SetBondType(b.GetBondType()) b2.SetIsAromatic(b.GetIsAromatic()) # apply matching: set atom properties for a in refmol.GetAtoms(): a2 = mol2.GetAtomWithIdx(matching[a.GetIdx()]) a2.SetHybridization(a.GetHybridization()) a2.SetIsAromatic(a.GetIsAromatic()) a2.SetNumExplicitHs(a.GetNumExplicitHs()) a2.SetFormalCharge(a.GetFormalCharge()) SanitizeMol(mol2) if hasattr(mol2, '__sssAtoms'): mol2.__sssAtoms = None # we don't want all bonds highlighted else: raise ValueError("No matching found") return mol2
def AssignBondOrdersFromTemplate(refmol, mol): """ assigns bond orders to a molecule based on the bond orders in a template molecule Arguments - refmol: the template molecule - mol: the molecule to assign bond orders to An example, start by generating a template from a SMILES and read in the PDB structure of the molecule >>> from rdkit.Chem import AllChem >>> template = AllChem.MolFromSmiles("CN1C(=NC(C1=O)(c2ccccc2)c3ccccc3)N") >>> mol = AllChem.MolFromPDBFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '4DJU_lig.pdb')) >>> len([1 for b in template.GetBonds() if b.GetBondTypeAsDouble() == 1.0]) 8 >>> len([1 for b in mol.GetBonds() if b.GetBondTypeAsDouble() == 1.0]) 22 Now assign the bond orders based on the template molecule >>> newMol = AllChem.AssignBondOrdersFromTemplate(template, mol) >>> len([1 for b in newMol.GetBonds() if b.GetBondTypeAsDouble() == 1.0]) 8 Note that the template molecule should have no explicit hydrogens else the algorithm will fail. It also works if there are different formal charges (this was github issue 235): >>> template=AllChem.MolFromSmiles('CN(C)C(=O)Cc1ccc2c(c1)NC(=O)c3ccc(cc3N2)c4ccc(c(c4)OC)[N+](=O)[O-]') >>> mol = AllChem.MolFromMolFile(os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '4FTR_lig.mol')) >>> AllChem.MolToSmiles(mol) 'COC1CC(C2CCC3C(O)NC4CC(CC(O)N(C)C)CCC4NC3C2)CCC1N(O)O' >>> newMol = AllChem.AssignBondOrdersFromTemplate(template, mol) >>> AllChem.MolToSmiles(newMol) 'COc1cc(-c2ccc3c(c2)Nc2ccc(CC(=O)N(C)C)cc2NC3=O)ccc1[N+](=O)[O-]' """ refmol2 = rdchem.Mol(refmol) mol2 = rdchem.Mol(mol) # do the molecules match already? matching = mol2.GetSubstructMatch(refmol2) if not matching: # no, they don't match # check if bonds of mol are SINGLE for b in mol2.GetBonds(): if b.GetBondType() != BondType.SINGLE: b.SetBondType(BondType.SINGLE) b.SetIsAromatic(False) # set the bonds of mol to SINGLE for b in refmol2.GetBonds(): b.SetBondType(BondType.SINGLE) b.SetIsAromatic(False) # set atom charges to zero; for a in refmol2.GetAtoms(): a.SetFormalCharge(0) for a in mol2.GetAtoms(): a.SetFormalCharge(0) matching = mol2.GetSubstructMatches(refmol2, uniquify=False) # do the molecules match now? if matching: if len(matching) > 1: logger.warning("More than one matching pattern found - picking one") matching = matching[0] # apply matching: set bond properties for b in refmol.GetBonds(): atom1 = matching[b.GetBeginAtomIdx()] atom2 = matching[b.GetEndAtomIdx()] b2 = mol2.GetBondBetweenAtoms(atom1, atom2) b2.SetBondType(b.GetBondType()) b2.SetIsAromatic(b.GetIsAromatic()) # apply matching: set atom properties for a in refmol.GetAtoms(): a2 = mol2.GetAtomWithIdx(matching[a.GetIdx()]) a2.SetHybridization(a.GetHybridization()) a2.SetIsAromatic(a.GetIsAromatic()) a2.SetNumExplicitHs(a.GetNumExplicitHs()) a2.SetFormalCharge(a.GetFormalCharge()) SanitizeMol(mol2) if hasattr(mol2, '__sssAtoms'): mol2.__sssAtoms = None # we don't want all bonds highlighted else: raise ValueError("No matching found") return mol2
def Gen2DFingerprint(mol, sigFactory, perms=None, dMat=None): """ generates a 2D fingerprint for a molecule using the parameters in _sig_ **Arguments** - mol: the molecule for which the signature should be generated - sigFactory : the SigFactory object with signature parameters NOTE: no preprocessing is carried out for _sigFactory_. It *must* be pre-initialized. - perms: (optional) a sequence of permutation indices limiting which pharmacophore combinations are allowed - dMat: (optional) the distance matrix to be used """ if not isinstance(sigFactory, SigFactory.SigFactory): raise ValueError('bad factory') featFamilies = sigFactory.GetFeatFamilies() if _verbose: print('* feat famillies:', featFamilies) nFeats = len(featFamilies) minCount = sigFactory.minPointCount maxCount = sigFactory.maxPointCount if maxCount > 3: logger.warning( ' Pharmacophores with more than 3 points are not currently supported.\nSetting maxCount to 3.' ) maxCount = 3 # generate the molecule's distance matrix, if required if dMat is None: from rdkit import Chem useBO = sigFactory.includeBondOrder dMat = Chem.GetDistanceMatrix(mol, useBO) # generate the permutations, if required if perms is None: perms = [] for count in range(minCount, maxCount + 1): perms += Utils.GetIndexCombinations(nFeats, count) # generate the matches: featMatches = sigFactory.GetMolFeats(mol) if _verbose: print(' featMatches:', featMatches) sig = sigFactory.GetSignature() for perm in perms: # the permutation is a combination of feature indices # defining the feature set for a proto-pharmacophore featClasses = [0] for i in range(1, len(perm)): if perm[i] == perm[i - 1]: featClasses.append(featClasses[-1]) else: featClasses.append(featClasses[-1] + 1) # Get a set of matches at each index of # the proto-pharmacophore. matchPerms = [featMatches[x] for x in perm] if _verbose: print('\n->Perm: %s' % (str(perm))) print(' matchPerms: %s' % (str(matchPerms))) # Get all unique combinations of those possible matches: matchesToMap = Utils.GetUniqueCombinations(matchPerms, featClasses) for i, entry in enumerate(matchesToMap): entry = [x[1] for x in entry] matchesToMap[i] = entry if _verbose: print(' mtM:', matchesToMap) for match in matchesToMap: if sigFactory.shortestPathsOnly: _ShortestPathsMatch(match, perm, sig, dMat, sigFactory) return sig
def Gen2DFingerprint(mol, sigFactory, perms=None, dMat=None, bitInfo=None): """ generates a 2D fingerprint for a molecule using the parameters in _sig_ **Arguments** - mol: the molecule for which the signature should be generated - sigFactory : the SigFactory object with signature parameters NOTE: no preprocessing is carried out for _sigFactory_. It *must* be pre-initialized. - perms: (optional) a sequence of permutation indices limiting which pharmacophore combinations are allowed - dMat: (optional) the distance matrix to be used - bitInfo: (optional) used to return the atoms involved in the bits """ if not isinstance(sigFactory, SigFactory.SigFactory): raise ValueError('bad factory') featFamilies = sigFactory.GetFeatFamilies() if _verbose: print('* feat famillies:', featFamilies) nFeats = len(featFamilies) minCount = sigFactory.minPointCount maxCount = sigFactory.maxPointCount if maxCount > 3: logger.warning(' Pharmacophores with more than 3 points are not currently supported.\n' + 'Setting maxCount to 3.') maxCount = 3 # generate the molecule's distance matrix, if required if dMat is None: from rdkit import Chem useBO = sigFactory.includeBondOrder dMat = Chem.GetDistanceMatrix(mol, useBO) # generate the permutations, if required if perms is None: perms = [] for count in range(minCount, maxCount + 1): perms += Utils.GetIndexCombinations(nFeats, count) # generate the matches: featMatches = sigFactory.GetMolFeats(mol) if _verbose: print(' featMatches:', featMatches) sig = sigFactory.GetSignature() for perm in perms: # the permutation is a combination of feature indices # defining the feature set for a proto-pharmacophore featClasses = [0] for i in range(1, len(perm)): if perm[i] == perm[i - 1]: featClasses.append(featClasses[-1]) else: featClasses.append(featClasses[-1] + 1) # Get a set of matches at each index of # the proto-pharmacophore. matchPerms = [featMatches[x] for x in perm] if _verbose: print('\n->Perm: %s' % (str(perm))) print(' matchPerms: %s' % (str(matchPerms))) # Get all unique combinations of those possible matches: matchesToMap = Utils.GetUniqueCombinations(matchPerms, featClasses) for i, entry in enumerate(matchesToMap): entry = [x[1] for x in entry] matchesToMap[i] = entry if _verbose: print(' mtM:', matchesToMap) for match in matchesToMap: if sigFactory.shortestPathsOnly: idx = _ShortestPathsMatch(match, perm, sig, dMat, sigFactory) if idx is not None and bitInfo is not None: l = bitInfo.get(idx, []) l.append(match) bitInfo[idx] = l return sig
ts.append(t2 - t1) if tests[16]: logger.info('Generate topological fingerprints') t1 = time.time() for mol in mols: Chem.RDKFingerprint(mol) t2 = time.time() logger.info('Results16: %.2f seconds' % (t2 - t1)) ts.append(t2 - t1) if tests[17]: logger.info('MMFF optimizing the molecules:') t1 = time.time() for i, mol in enumerate(mols): mol = Chem.Mol(mol) if not mol.GetNumConformers(): continue if not AllChem.MMFFHasAllMoleculeParams(mol): continue needMore = 1 while needMore: try: needMore = AllChem.MMFFOptimizeMolecule(mol, maxIters=200) except ValueError: logger.warning('Problems with MMFF and mol %d' % i) break t2 = time.time() logger.info('Results17: %.2f seconds' % (t2 - t1)) ts.append(t2 - t1) print('times: ', ' || '.join(['%.1f' % x for x in ts]))
logger.info('Generate topological fingerprints') t1=time.time() for mol in mols: Chem.RDKFingerprint(mol) t2 = time.time() logger.info('Results16: %.2f seconds'%(t2-t1)) ts.append(t2-t1) if tests[17]: logger.info('MMFF optimizing the molecules:') t1=time.time() for i,mol in enumerate(mols): mol = Chem.Mol(mol) if not mol.GetNumConformers(): continue if not AllChem.MMFFHasAllMoleculeParams(mol): continue needMore=1 while needMore: try: needMore=AllChem.MMFFOptimizeMolecule(mol,maxIters=200) except ValueError: logger.warning('Problems with MMFF and mol %d'%i) break t2 = time.time() logger.info('Results17: %.2f seconds'%(t2-t1)) ts.append(t2-t1) print('times: ',' || '.join(['%.1f'%x for x in ts]))
def AssignBondOrdersFromTemplate(self, refmol, mol): """ assigns bond orders to a molecule based on the bond orders in a template molecule Revised from AllChem.AssignBondOrderFromTemplate(refmol, mol) """ AllChem.AssignBondOrdersFromTemplate refmol2 = Chem.rdchem.Mol(refmol) mol2 = Chem.rdchem.Mol(mol) # do the molecules match already? matching = mol2.GetSubstructMatch(refmol2) if not matching: # no, they don't match # check if bonds of mol are SINGLE for b in mol2.GetBonds(): if b.GetBondType() != Chem.BondType.SINGLE: b.SetBondType(Chem.BondType.SINGLE) b.SetIsAromatic(False) # set the bonds of mol to SINGLE for b in refmol2.GetBonds(): b.SetBondType(Chem.BondType.SINGLE) b.SetIsAromatic(False) # set atom charges to zero; for a in refmol2.GetAtoms(): a.SetFormalCharge(0) for a in mol2.GetAtoms(): a.SetFormalCharge(0) matching = mol2.GetSubstructMatches(refmol2, uniquify=False) # do the molecules match now? if matching: if len(matching) > 1: #logger.warning("More than one matching pattern found - picking one") pass matchings = matching[:] for matching in matchings: #matching = matching[0] ## use each matching # apply matching: set bond properties for b in refmol.GetBonds(): atom1 = matching[b.GetBeginAtomIdx()] atom2 = matching[b.GetEndAtomIdx()] b2 = mol2.GetBondBetweenAtoms(atom1, atom2) b2.SetBondType(b.GetBondType()) b2.SetIsAromatic(b.GetIsAromatic()) # apply matching: set atom properties for a in refmol.GetAtoms(): a2 = mol2.GetAtomWithIdx(matching[a.GetIdx()]) a2.SetHybridization(a.GetHybridization()) a2.SetIsAromatic(a.GetIsAromatic()) a2.SetNumExplicitHs(a.GetNumExplicitHs()) a2.SetFormalCharge(a.GetFormalCharge()) try: Chem.SanitizeMol(mol2) if hasattr(mol2, '__sssAtoms'): mol2.__sssAtoms = None # we don't want all bonds highlighted break except ValueError: logger.warning( "More than one matching pattern, Fail at this matching. Try next." ) else: raise ValueError("No matching found") return mol2
if tests[16]: logger.info("Generate topological fingerprints") t1 = time.time() for mol in mols: Chem.RDKFingerprint(mol) t2 = time.time() logger.info("Results16: %.2f seconds" % (t2 - t1)) ts.append(t2 - t1) if tests[17]: logger.info("MMFF optimizing the molecules:") t1 = time.time() for i, mol in enumerate(mols): mol = Chem.Mol(mol) if not mol.GetNumConformers(): continue if not AllChem.MMFFHasAllMoleculeParams(mol): continue needMore = 1 while needMore: try: needMore = AllChem.MMFFOptimizeMolecule(mol, maxIters=200) except ValueError: logger.warning("Problems with MMFF and mol %d" % i) break t2 = time.time() logger.info("Results17: %.2f seconds" % (t2 - t1)) ts.append(t2 - t1) print("times: ", " || ".join(["%.1f" % x for x in ts]))