def ClassifyExample(self, example, appendExamples=0): """ Classify an example by summing over the conditional probabilities The most likely class is the one with the largest probability """ if appendExamples: self._examples.append(example) clsProb = {} for key, prob in iteritems(self._classProbs): clsProb[key] = prob tmp = self._condProbs[key] for ai in self._attrs: if not (hasattr(self, '_useSigs') and self._useSigs): bid = example[ai] if self._qBounds[ai] > 0: bid = _getBinId(bid, self._QBoundVals[ai]) else: if example[1].GetBit(ai): bid = 1 else: bid = 0 clsProb[key] *= tmp[ai][bid] mkey = -1 self.mprob = -1.0 for key, prob in iteritems(clsProb): if (prob > self.mprob): mkey = key self.mprob = prob return mkey
def ClassifyExample(self, example, appendExamples=0) : """ Classify an example by summing over the conditional probabilities The most likely class is the one with the largest probability """ if appendExamples: self._examples.append(example) clsProb = {} for key,prob in iteritems(self._classProbs): clsProb[key] = prob tmp = self._condProbs[key] for ai in self._attrs: if not (hasattr(self,'_useSigs') and self._useSigs): bid = example[ai] if self._qBounds[ai] > 0 : bid = _getBinId(bid, self._QBoundVals[ai]) else: if example[1].GetBit(ai): bid=1 else: bid=0 clsProb[key] *= tmp[ai][bid] mkey = -1 self.mprob = -1.0 for key,prob in iteritems(clsProb): if (prob > self.mprob) : mkey = key self.mprob = prob return mkey
def MolToMPL(mol,size=(300,300),kekulize=True, wedgeBonds=True, imageType=None, fitImage=False, options=None, **kwargs): """ Generates a drawing of a molecule on a matplotlib canvas """ if not mol: raise ValueError('Null molecule provided') from rdkit.Chem.Draw.mplCanvas import Canvas canvas = Canvas(size) if options is None: options = DrawingOptions() options.bgColor=None if fitImage: drawingOptions.dotsPerAngstrom = int(min(size) / 10) options.wedgeDashedBonds=wedgeBonds drawer = MolDrawing(canvas=canvas, drawingOptions=options) omol=mol if kekulize: from rdkit import Chem mol = Chem.Mol(mol.ToBinary()) Chem.Kekulize(mol) if not mol.GetNumConformers(): from rdkit.Chem import AllChem AllChem.Compute2DCoords(mol) drawer.AddMol(mol,**kwargs) omol._atomPs=drawer.atomPs[mol] for k,v in iteritems(omol._atomPs): omol._atomPs[k]=canvas.rescalePt(v) canvas._figure.set_size_inches(float(size[0])/100,float(size[1])/100) return canvas._figure
def GetAllChildren(self): " returns a dictionary, keyed by SMILES, of children " res = {} for smi, child in iteritems(self.children): res[smi] = child child._gacRecurse(res, terminalOnly=False) return res
def GetAllChildren(self): " returns a dictionary, keyed by SMILES, of children " res = {} for smi,child in iteritems(self.children): res[smi] = child child._gacRecurse(res,terminalOnly=False) return res
def calculateSAScore(m, fscores): # fragment score fp = rdMolDescriptors.GetMorganFingerprint(m, 2) fps = fp.GetNonzeroElements() score1 = 0. nf = 0 for bitId, v in iteritems(fps): nf += v sfp = bitId score1 += fscores.get(sfp, -4) * v score1 /= nf # features score nAtoms = m.GetNumAtoms() nChiralCenters = len(Chem.FindMolChiralCenters(m, includeUnassigned=True)) ri = m.GetRingInfo() nBridgeheads, nSpiro = numBridgeheadsAndSpiro(m, ri) nMacrocycles = 0 for x in ri.AtomRings(): if len(x) > 8: nMacrocycles += 1 sizePenalty = nAtoms**1.005 - nAtoms stereoPenalty = math.log10(nChiralCenters + 1) spiroPenalty = math.log10(nSpiro + 1) bridgePenalty = math.log10(nBridgeheads + 1) macrocyclePenalty = 0. # --------------------------------------- # This differs from the paper, which defines: # macrocyclePenalty = math.log10(nMacrocycles+1) # This form generates better results when 2 or more macrocycles are present if nMacrocycles > 0: macrocyclePenalty = math.log10(2) score2 = 0. - sizePenalty - stereoPenalty - spiroPenalty - \ bridgePenalty - macrocyclePenalty # correction for the fingerprint density # not in the original publication, added in version 1.1 # to make highly symmetrical molecules easier to synthetise score3 = 0. if nAtoms > len(fps): score3 = math.log(float(nAtoms) / len(fps)) * .5 sascore = score1 + score2 + score3 # need to transform "raw" value into scale between 1 and 10 min = -4.0 max = 2.5 sascore = 11. - (sascore - min + 1) / (max - min) * 9. # smooth the 10-end if sascore > 8.: sascore = 8. + math.log(sascore + 1. - 9.) if sascore > 10.: sascore = 10.0 elif sascore < 1.: sascore = 1.0 return sascore
def GetLeaves(self): " returns a dictionary, keyed by SMILES, of leaf (terminal) nodes " res = {} for smi,child in iteritems(self.children): if not len(child.children): res[smi] = child else: child._gacRecurse(res,terminalOnly=True) return res
def GetLeaves(self): " returns a dictionary, keyed by SMILES, of leaf (terminal) nodes " res = {} for smi, child in iteritems(self.children): if not len(child.children): res[smi] = child else: child._gacRecurse(res, terminalOnly=True) return res
def calculateScore(m): if _fscores is None: readFragmentScores() # fragment score fp = rdMolDescriptors.GetMorganFingerprint(m,2) #<- 2 is the *radius* of the circular fingerprint fps = fp.GetNonzeroElements() score1 = 0. nf = 0 for bitId,v in iteritems(fps): nf += v sfp = bitId score1 += _fscores.get(sfp,-4)*v score1 /= nf # features score nAtoms = m.GetNumAtoms() nChiralCenters = len(Chem.FindMolChiralCenters(m,includeUnassigned=True)) ri = m.GetRingInfo() nBridgeheads,nSpiro=numBridgeheadsAndSpiro(m,ri) nMacrocycles=0 for x in ri.AtomRings(): if len(x)>8: nMacrocycles+=1 sizePenalty = nAtoms**1.005 - nAtoms stereoPenalty = math.log10(nChiralCenters+1) spiroPenalty = math.log10(nSpiro+1) bridgePenalty = math.log10(nBridgeheads+1) macrocyclePenalty = 0. # --------------------------------------- # This differs from the paper, which defines: # macrocyclePenalty = math.log10(nMacrocycles+1) # This form generates better results when 2 or more macrocycles are present if nMacrocycles > 0: macrocyclePenalty = math.log10(2) score2 = 0. -sizePenalty -stereoPenalty -spiroPenalty -bridgePenalty -macrocyclePenalty # correction for the fingerprint density # not in the original publication, added in version 1.1 # to make highly symmetrical molecules easier to synthetise score3 = 0. if nAtoms > len(fps): score3 = math.log(float(nAtoms) / len(fps)) * .5 sascore = score1 + score2 + score3 # need to transform "raw" value into scale between 1 and 10 min = -4.0 max = 2.5 sascore = 11. - (sascore - min + 1) / (max - min) * 9. # smooth the 10-end if sascore > 8.: sascore = 8. + math.log(sascore+1.-9.) if sascore > 10.: sascore = 10.0 elif sascore < 1.: sascore = 1.0 return sascore
def GetMorganFingerprint(mol, atomId=-1, radius=2, fpType='bv', nBits=2048, useFeatures=False, **kwargs): """ Calculates the Morgan fingerprint with the environments of atomId removed. Parameters: mol -- the molecule of interest radius -- the maximum radius fpType -- the type of Morgan fingerprint: 'count' or 'bv' atomId -- the atom to remove the environments for (if -1, no environments is removed) nBits -- the size of the bit vector (only for fpType = 'bv') useFeatures -- if false: ConnectivityMorgan, if true: FeatureMorgan any additional keyword arguments will be passed to the fingerprinting function. """ if fpType not in ['bv', 'count']: raise ValueError("Unknown Morgan fingerprint type") if not hasattr(mol, '_fpInfo'): info = {} # get the fingerprint if fpType == 'bv': molFp = rdMD.GetMorganFingerprintAsBitVect(mol, radius, nBits=nBits, useFeatures=useFeatures, bitInfo=info, **kwargs) else: molFp = rdMD.GetMorganFingerprint(mol, radius, useFeatures=useFeatures, bitInfo=info, **kwargs) # construct the bit map if fpType == 'bv': bitmap = [DataStructs.ExplicitBitVect(nBits) for x in range(mol.GetNumAtoms())] else: bitmap = [[] for x in range(mol.GetNumAtoms())] for bit, es in iteritems(info): for at1, rad in es: if rad == 0: # for radius 0 if fpType == 'bv': bitmap[at1][bit] = 1 else: bitmap[at1].append(bit) else: # for radii > 0 env = Chem.FindAtomEnvironmentOfRadiusN(mol, rad, at1) amap = {} submol = Chem.PathToSubmol(mol, env, atomMap=amap) for at2 in amap.keys(): if fpType == 'bv': bitmap[at2][bit] = 1 else: bitmap[at2].append(bit) mol._fpInfo = (molFp, bitmap) if atomId < 0: return mol._fpInfo[0] else: # remove the bits of atomId if atomId >= mol.GetNumAtoms(): raise ValueError("atom index greater than number of atoms") if len(mol._fpInfo) != 2: raise ValueError("_fpInfo not set") if fpType == 'bv': molFp = mol._fpInfo[0] ^ mol._fpInfo[1][atomId] # xor else: # count molFp = copy.deepcopy(mol._fpInfo[0]) # delete the bits with atomId for bit in mol._fpInfo[1][atomId]: molFp[bit] -= 1 return molFp
g2 = re.sub('[a-z,A-Z]', '', g2) sma = '[$(%s):1]%s;!@[$(%s):2]>>[%s*]-[*:1].[%s*]-[*:2]' % (r1, bnd, r2, g1, g2) gp[j] = sma for gp in smartsGps: for defn in gp: try: t = Reactions.ReactionFromSmarts(defn) t.Initialize() except Exception: print(defn) raise environMatchers = {} for env, sma in iteritems(environs): environMatchers[env] = Chem.MolFromSmarts(sma) bondMatchers = [] for i, compats in enumerate(reactionDefs): tmp = [] for i1, i2, bType in compats: e1 = environs['L%s' % i1] e2 = environs['L%s' % i2] patt = '[$(%s)]%s;!@[$(%s)]' % (e1, bType, e2) patt = Chem.MolFromSmarts(patt) tmp.append((i1, i2, bType, patt)) bondMatchers.append(tmp) reactions = tuple([[Reactions.ReactionFromSmarts(y) for y in x] for x in smartsGps])
def FindBRICSBonds(mol, randomizeOrder=False, silent=True): """ returns the bonds in a molecule that BRICS would cleave >>> from rdkit import Chem >>> m = Chem.MolFromSmiles('CCCOCC') >>> res = list(FindBRICSBonds(m)) >>> res [((3, 2), ('3', '4')), ((3, 4), ('3', '4'))] a more complicated case: >>> m = Chem.MolFromSmiles('CCCOCCC(=O)c1ccccc1') >>> res = list(FindBRICSBonds(m)) >>> res [((3, 2), ('3', '4')), ((3, 4), ('3', '4')), ((6, 8), ('6', '16'))] we can also randomize the order of the results: >>> random.seed(23) >>> res = list(FindBRICSBonds(m,randomizeOrder=True)) >>> sorted(res) [((3, 2), ('3', '4')), ((3, 4), ('3', '4')), ((6, 8), ('6', '16'))] Note that this is a generator function : >>> res = FindBRICSBonds(m) >>> res <generator object ...> >>> next(res) ((3, 2), ('3', '4')) >>> m = Chem.MolFromSmiles('CC=CC') >>> res = list(FindBRICSBonds(m)) >>> sorted(res) [((1, 2), ('7', '7'))] make sure we don't match ring bonds: >>> m = Chem.MolFromSmiles('O=C1NCCC1') >>> list(FindBRICSBonds(m)) [] another nice one, make sure environment 8 doesn't match something connected to a ring atom: >>> m = Chem.MolFromSmiles('CC1(C)CCCCC1') >>> list(FindBRICSBonds(m)) [] """ letter = re.compile('[a-z,A-Z]') indices = list(range(len(bondMatchers))) bondsDone = set() if randomizeOrder: random.shuffle(indices, random=random.random) envMatches = {} for env, patt in iteritems(environMatchers): envMatches[env] = mol.HasSubstructMatch(patt) for gpIdx in indices: if randomizeOrder: compats = bondMatchers[gpIdx][:] random.shuffle(compats, random=random.random) else: compats = bondMatchers[gpIdx] for i1, i2, bType, patt in compats: if not envMatches['L' + i1] or not envMatches['L' + i2]: continue matches = mol.GetSubstructMatches(patt) i1 = letter.sub('', i1) i2 = letter.sub('', i2) for match in matches: if match not in bondsDone and (match[1], match[0]) not in bondsDone: bondsDone.add(match) yield (((match[0], match[1]), (i1, i2)))
def calculate_score(m): if _fscores is None: read_fragment_scores() # fragment score fp = rdMolDescriptors.GetMorganFingerprint(m, 2) # <- 2 is the *radius* of the circular fingerprint fps = fp.GetNonzeroElements() score1 = 0. nf = 0 for bitId, v in iteritems(fps): nf += v sfp = bitId score1 += _fscores.get(sfp, -4) * v score1 /= nf # features score n_atoms = m.GetNumAtoms() n_chiral_centers = len(Chem.FindMolChiralCenters(m, includeUnassigned=True)) ri = m.GetRingInfo() n_bridgeheads, n_spiro = num_bridgeheads_and_spiro(m) n_macrocycles = 0 for x in ri.AtomRings(): if len(x) > 8: n_macrocycles += 1 size_penalty = n_atoms ** 1.005 - n_atoms stereo_penalty = math.log10(n_chiral_centers + 1) spiro_penalty = math.log10(n_spiro + 1) bridge_penalty = math.log10(n_bridgeheads + 1) macrocycle_penalty = 0. # --------------------------------------- # This differs from the paper, which defines: # macrocycle_penalty = math.log10(n_macrocycles+1) # This form generates better results when 2 or more macrocycles are present if n_macrocycles > 0: macrocycle_penalty = math.log10(2) score2 = 0. - size_penalty - stereo_penalty - spiro_penalty - bridge_penalty - macrocycle_penalty # correction for the fingerprint density # not in the original publication, added in version 1.1 # to make highly symmetrical molecules easier to synthetise score3 = 0. if n_atoms > len(fps): score3 = math.log(float(n_atoms) / len(fps)) * .5 sascore = score1 + score2 + score3 # need to transform "raw" value into scale between 1 and 10 minimum = -4.0 maximum = 2.5 sascore = 11. - (sascore - minimum + 1) / (maximum - minimum) * 9. # smooth the 10-end if sascore > 8.: sascore = 8. + math.log(sascore + 1. - 9.) if sascore > 10.: sascore = 10.0 elif sascore < 1.: sascore = 1.0 return sascore
def FindBRICSBonds(mol,randomizeOrder=False,silent=True): """ returns the bonds in a molecule that BRICS would cleave >>> from rdkit import Chem >>> m = Chem.MolFromSmiles('CCCOCC') >>> res = list(FindBRICSBonds(m)) >>> res [((3, 2), ('3', '4')), ((3, 4), ('3', '4'))] a more complicated case: >>> m = Chem.MolFromSmiles('CCCOCCC(=O)c1ccccc1') >>> res = list(FindBRICSBonds(m)) >>> res [((3, 2), ('3', '4')), ((3, 4), ('3', '4')), ((6, 8), ('6', '16'))] we can also randomize the order of the results: >>> random.seed(23) >>> res = list(FindBRICSBonds(m,randomizeOrder=True)) >>> sorted(res) [((3, 2), ('3', '4')), ((3, 4), ('3', '4')), ((6, 8), ('6', '16'))] Note that this is a generator function : >>> res = FindBRICSBonds(m) >>> res <generator object ...> >>> next(res) ((3, 2), ('3', '4')) >>> m = Chem.MolFromSmiles('CC=CC') >>> res = list(FindBRICSBonds(m)) >>> sorted(res) [((1, 2), ('7', '7'))] make sure we don't match ring bonds: >>> m = Chem.MolFromSmiles('O=C1NCCC1') >>> list(FindBRICSBonds(m)) [] another nice one, make sure environment 8 doesn't match something connected to a ring atom: >>> m = Chem.MolFromSmiles('CC1(C)CCCCC1') >>> list(FindBRICSBonds(m)) [] """ letter = re.compile('[a-z,A-Z]') indices = list(range(len(bondMatchers))) bondsDone=set() if randomizeOrder: random.shuffle(indices,random=random.random) envMatches={} for env,patt in iteritems(environMatchers): envMatches[env]=mol.HasSubstructMatch(patt) for gpIdx in indices: if randomizeOrder: compats =bondMatchers[gpIdx][:] random.shuffle(compats,random=random.random) else: compats = bondMatchers[gpIdx] for i1,i2,bType,patt in compats: if not envMatches['L'+i1] or not envMatches['L'+i2]: continue matches = mol.GetSubstructMatches(patt) i1 = letter.sub('',i1) i2 = letter.sub('',i2) for match in matches: if match not in bondsDone and (match[1],match[0]) not in bondsDone: bondsDone.add(match) yield(((match[0],match[1]),(i1,i2)))
def __call__(self, smile): if _fscores is None: self.readFragmentScores() m = Chem.MolFromSmiles(smile) if m: try: # fragment score fp = rdMolDescriptors.GetMorganFingerprint( m, 2) #<- 2 is the *radius* of the circular fingerprint fps = fp.GetNonzeroElements() score1 = 0. nf = 0 for bitId, v in iteritems(fps): nf += v sfp = bitId score1 += _fscores.get(sfp, -4) * v score1 /= nf # features score nAtoms = m.GetNumAtoms() nChiralCenters = len( Chem.FindMolChiralCenters(m, includeUnassigned=True)) ri = m.GetRingInfo() nBridgeheads = rdMolDescriptors.CalcNumBridgeheadAtoms(m) nSpiro = nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(m) nMacrocycles = 0 for x in ri.AtomRings(): if len(x) > 8: nMacrocycles += 1 sizePenalty = nAtoms**1.005 - nAtoms stereoPenalty = math.log10(nChiralCenters + 1) spiroPenalty = math.log10(nSpiro + 1) bridgePenalty = math.log10(nBridgeheads + 1) macrocyclePenalty = 0. # --------------------------------------- # This differs from the paper, which defines: # macrocyclePenalty = math.log10(nMacrocycles+1) # This form generates better results when 2 or more macrocycles are present if nMacrocycles > 0: macrocyclePenalty = math.log10(2) score2 = 0. - sizePenalty - stereoPenalty - spiroPenalty - bridgePenalty - macrocyclePenalty # correction for the fingerprint density # not in the original publication, added in version 1.1 # to make highly symmetrical molecules easier to synthetise score3 = 0. if nAtoms > len(fps): score3 = math.log(float(nAtoms) / len(fps)) * .5 sascore = score1 + score2 + score3 # need to transform "raw" value into scale between 1 and 10 min_score = -4.0 max_score = 2.5 sascore = 11. - (sascore - min_score + 1) / (max_score - min_score) * 9. # smooth the 10-end if sascore > 8.: sascore = 8. + math.log(sascore + 1. - 9.) if sascore > 10.: sascore = 10.0 elif sascore < 1.: sascore = 1.0 sascore = math.exp(1 - sascore) # minimize the sascore return sascore except: return 0.0 else: return 0.0
def _gacRecurse(self, res, terminalOnly=False): for smi, child in iteritems(self.children): if not terminalOnly or not len(child.children): res[smi] = child child._gacRecurse(res, terminalOnly=terminalOnly)
def CalcSAScore(rmol): if _fscores is None: ReadFragScores() mol = copy.deepcopy(rmol) #Chem.SanitizeMol(mol) # gives crashes! #fragment score fp = AllChem.GetMorganFingerprint( mol, 2) #<- 2 is the *radius* of the circular fingerprint fps = fp.GetNonzeroElements() score1 = 0.0 nf = 0 for bitId, v in iteritems(fps): nf += v sfp = bitId score1 += _fscores.get(sfp, -4) * v score1 /= nf #features score nAtoms = mol.GetNumAtoms() nChiralCenters = len(Chem.FindMolChiralCenters(mol, includeUnassigned=True)) ri = mol.GetRingInfo() nBridgehead, nSpiro = NumBridgeheadsAndSpiro(mol, ri) nMacrocycles = 0 for x in ri.AtomRings(): if len(x) > 8: nMacrocycles += 1 sizePenalty = nAtoms**1.005 - nAtoms stereoPenalty = math.log10(nChiralCenters + 1) spiroPenalty = math.log10(nSpiro + 1) bridgePenalty = math.log10(nBridgehead + 1) macrocyclePenalty = 0.0 # ----------------------------- # This differs from the paper, which defines: # macrocyclePenalty = math.log10(nMacrocycles+1) # This form generates better results when 2 or more macrocycles are present if nMacrocycles > 0: macrocyclePenalty = math.log10(2) score2 = 0.0 - sizePenalty - stereoPenalty - spiroPenalty - bridgePenalty - macrocyclePenalty # correction for the fingerprint density # not in the original publication # to make highly symmetrical molecules easier to synthesize score3 = 0.0 if nAtoms > len(fps): score3 = math.log(float(nAtoms) / len(fps)) * 0.5 sascore = score1 + score2 + score3 # need to transform "raw" value into scale between 1 and 10 minv = -4.0 maxv = 2.5 sascore = 11.0 - (sascore - minv + 1) / (maxv - minv) * 9.0 # smooth the 10-end if sascore > 8.0: sascore = 8.0 + math.log(sascore - 8.0) if sascore > 10.0: sascore = 10.0 elif sascore < 1.0: sascore = 1.0 return sascore
def _gacRecurse(self,res,terminalOnly=False): for smi,child in iteritems(self.children): if not terminalOnly or not len(child.children): res[smi] = child child._gacRecurse(res,terminalOnly=terminalOnly)
g1 = re.sub('[a-z,A-Z]','',g1) g2 = re.sub('[a-z,A-Z]','',g2) sma='[$(%s):1]%s;!@[$(%s):2]>>[%s*]-[*:1].[%s*]-[*:2]'%(r1,bnd,r2,g1,g2) gp[j] =sma for gp in smartsGps: for defn in gp: try: t=Reactions.ReactionFromSmarts(defn) t.Initialize() except: print(defn) raise environMatchers={} for env,sma in iteritems(environs): environMatchers[env]=Chem.MolFromSmarts(sma) bondMatchers=[] for i,compats in enumerate(reactionDefs): tmp=[] for i1,i2,bType in compats: e1 = environs['L%s'%i1] e2 = environs['L%s'%i2] patt = '[$(%s)]%s;!@[$(%s)]'%(e1,bType,e2) patt = Chem.MolFromSmarts(patt) tmp.append((i1,i2,bType,patt)) bondMatchers.append(tmp) reactions = tuple([[Reactions.ReactionFromSmarts(y) for y in x] for x in smartsGps]) reverseReactions = []
def synthetic_accessibility(mol, _fscores=None): ''' calculation of synthetic accessibility score as described in: 'Estimation of Synthetic Accessibility Score of Drug-like Molecules based on Molecular Complexity and Fragment Contributions' Peter Ertl and Ansgar Schuffenhauer Journal of Cheminformatics 1:8 (2009) http://www.jcheminf.com/content/1/1/8 several small modifications to the original paper are included particularly slightly different formula for marocyclic penalty and taking into account also molecule symmetry (fingerprint density) for a set of 10k diverse molecules the agreement between the original method as implemented in PipelinePilot and this implementation is r2 = 0.97 peter ertl & greg landrum, september 2013 Parameters ---------- mol : Mol Returns ------- float : synthetic accessibility score ''' if _fscores is None: with gzip.open(os.path.join(os.path.dirname(__file__), 'fpscores.pkl.gz'), 'rb') as f: _fscores = pickle.load(f) out_dict = {} for each_list in _fscores: for each_idx in range(1,len(each_list)): out_dict[each_list[each_idx]] = float(each_list[0]) _fscores = out_dict # fragment score # 2 is the *radius* of the circular fingerprint fingerprint = rdMolDescriptors.GetMorganFingerprint(mol, 2) fingerprints = fingerprint.GetNonzeroElements() score1 = 0. nf = 0 for bit_id, value in iteritems(fingerprints): nf += value sfp = bit_id score1 += _fscores.get(sfp, -4) * value score1 /= nf # features score num_atoms = mol.GetNumAtoms() num_chiral_centers = len(Chem.FindMolChiralCenters(mol, includeUnassigned=True)) ring_info = mol.GetRingInfo() num_spiro = rdMolDescriptors.CalcNumSpiroAtoms(mol) num_bridgeheads = rdMolDescriptors.CalcNumBridgeheadAtoms(mol) num_macrocycles = 0 for each_ring in ring_info.AtomRings(): if len(each_ring) > 8: num_macrocycles += 1 size_penalty = num_atoms ** 1.005 - num_atoms stereo_penalty = math.log10(num_chiral_centers + 1) spiro_penalty = math.log10(num_spiro + 1) bridge_penalty = math.log10(num_bridgeheads + 1) macrocycle_penalty = 0. # --------------------------------------- # This differs from the paper, which defines: # macrocycle_penalty = math.log10(num_macrocycles+1) # This form generates better results when 2 or more macrocycles are present if num_macrocycles > 0: macrocycle_penalty = math.log10(2) score2 = 0. -size_penalty -stereo_penalty -spiro_penalty -bridge_penalty -macrocycle_penalty # correction for the fingerprint density # not in the original publication, added in version 1.1 # to make highly symmetrical molecules easier to synthetise score3 = 0. if num_atoms > len(fingerprints): score3 = math.log(float(num_atoms) / len(fingerprints)) * .5 sascore = score1 + score2 + score3 # need to transform "raw" value into scale between 1 and 10 min_score = -4.0 max_score = 2.5 sascore = 11. - (sascore - min_score + 1) / (max_score - min_score) * 9. # smooth the 10-end if sascore > 8.: sascore = 8. + math.log(sascore+1.-9.) if sascore > 10.: sascore = 10.0 elif sascore < 1.: sascore = 1.0 return sascore