def get_molecular_features(dataframe, mol_list): df = dataframe for i in range(len(mol_list)): print("Getting molecular features for molecule: ", i) mol = mol_list[i] natoms = mol.GetNumAtoms() nbonds = mol.GetNumBonds() mw = Descriptors.ExactMolWt(mol) df.at[i, "NbrAtoms"] = natoms df.at[i, "NbrBonds"] = nbonds df.at[i, "mw"] = mw df.at[i, 'HeavyAtomMolWt'] = Chem.Descriptors.HeavyAtomMolWt(mol) df.at[i, 'NumValenceElectrons'] = Chem.Descriptors.NumValenceElectrons( mol) ''' # These four descriptors are producing the value of infinity for refcode_csd = YOLJUF (CCOP(=O)(Cc1ccc(cc1)NC(=S)NP(OC(C)C)(OC(C)C)[S])OCC\t\n) df.at[i,'MaxAbsPartialCharge'] = Chem.Descriptors.MaxAbsPartialCharge(mol) df.at[i,'MaxPartialCharge'] = Chem.Descriptors.MaxPartialCharge(mol) df.at[i,'MinAbsPartialCharge'] = Chem.Descriptors.MinAbsPartialCharge(mol) df.at[i,'MinPartialCharge'] = Chem.Descriptors.MinPartialCharge(mol) ''' df.at[i, 'FpDensityMorgan1'] = Chem.Descriptors.FpDensityMorgan1(mol) df.at[i, 'FpDensityMorgan2'] = Chem.Descriptors.FpDensityMorgan2(mol) df.at[i, 'FpDensityMorgan3'] = Chem.Descriptors.FpDensityMorgan3(mol) #print(natoms, nbonds) # Now get some specific features fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') factory = ChemicalFeatures.BuildFeatureFactory(fdefName) feats = factory.GetFeaturesForMol(mol) #df["Acceptor"] = 0 #df["Aromatic"] = 0 #df["Hydrophobe"] = 0 nbrAcceptor = 0 nbrDonor = 0 nbrHydrophobe = 0 nbrLumpedHydrophobe = 0 nbrPosIonizable = 0 nbrNegIonizable = 0 for j in range(len(feats)): #print(feats[j].GetFamily(), feats[j].GetType()) if ('Acceptor' == (feats[j].GetFamily())): nbrAcceptor = nbrAcceptor + 1 elif ('Donor' == (feats[j].GetFamily())): nbrDonor = nbrDonor + 1 elif ('Hydrophobe' == (feats[j].GetFamily())): nbrHydrophobe = nbrHydrophobe + 1 elif ('LumpedHydrophobe' == (feats[j].GetFamily())): nbrLumpedHydrophobe = nbrLumpedHydrophobe + 1 elif ('PosIonizable' == (feats[j].GetFamily())): nbrPosIonizable = nbrPosIonizable + 1 elif ('NegIonizable' == (feats[j].GetFamily())): nbrNegIonizable = nbrNegIonizable + 1 else: pass #print(feats[j].GetFamily()) df.at[i, "Acceptor"] = nbrAcceptor df.at[i, "Donor"] = nbrDonor df.at[i, "Hydrophobe"] = nbrHydrophobe df.at[i, "LumpedHydrophobe"] = nbrLumpedHydrophobe df.at[i, "PosIonizable"] = nbrPosIonizable df.at[i, "NegIonizable"] = nbrNegIonizable # We can also get some more molecular features using rdMolDescriptors df.at[i, "NumRotatableBonds"] = rdMolDescriptors.CalcNumRotatableBonds( mol) df.at[i, "CalcChi0n"] = rdMolDescriptors.CalcChi0n(mol) df.at[i, "CalcChi0v"] = rdMolDescriptors.CalcChi0v(mol) df.at[i, "CalcChi1n"] = rdMolDescriptors.CalcChi1n(mol) df.at[i, "CalcChi1v"] = rdMolDescriptors.CalcChi1v(mol) df.at[i, "CalcChi2n"] = rdMolDescriptors.CalcChi2n(mol) df.at[i, "CalcChi2v"] = rdMolDescriptors.CalcChi2v(mol) df.at[i, "CalcChi3n"] = rdMolDescriptors.CalcChi3n(mol) #df.at[i,"CalcChi3v"] = rdMolDescriptors.CalcChi3v(mol) df.at[i, "CalcChi4n"] = rdMolDescriptors.CalcChi4n(mol) df.at[i, "CalcChi4v"] = rdMolDescriptors.CalcChi4v(mol) df.at[i, "CalcFractionCSP3"] = rdMolDescriptors.CalcFractionCSP3(mol) df.at[i, "CalcHallKierAlpha"] = rdMolDescriptors.CalcHallKierAlpha(mol) df.at[i, "CalcKappa1"] = rdMolDescriptors.CalcKappa1(mol) df.at[i, "CalcKappa2"] = rdMolDescriptors.CalcKappa2(mol) #df.at[i,"CalcKappa3"] = rdMolDescriptors.CalcKappa3(mol) df.at[i, "CalcLabuteASA"] = rdMolDescriptors.CalcLabuteASA(mol) df.at[ i, "CalcNumAliphaticCarbocycles"] = rdMolDescriptors.CalcNumAliphaticCarbocycles( mol) df.at[ i, "CalcNumAliphaticHeterocycles"] = rdMolDescriptors.CalcNumAliphaticHeterocycles( mol) df.at[ i, "CalcNumAliphaticRings"] = rdMolDescriptors.CalcNumAliphaticRings( mol) df.at[i, "CalcNumAmideBonds"] = rdMolDescriptors.CalcNumAmideBonds(mol) df.at[ i, "CalcNumAromaticCarbocycles"] = rdMolDescriptors.CalcNumAromaticCarbocycles( mol) df.at[ i, "CalcNumAromaticHeterocycles"] = rdMolDescriptors.CalcNumAromaticHeterocycles( mol) df.at[i, "CalcNumAromaticRings"] = rdMolDescriptors.CalcNumAromaticRings( mol) df.at[ i, "CalcNumBridgeheadAtoms"] = rdMolDescriptors.CalcNumBridgeheadAtoms( mol) df.at[i, "CalcNumHBA"] = rdMolDescriptors.CalcNumHBA(mol) df.at[i, "CalcNumHBD"] = rdMolDescriptors.CalcNumHBD(mol) df.at[i, "CalcNumHeteroatoms"] = rdMolDescriptors.CalcNumHeteroatoms(mol) df.at[i, "CalcNumHeterocycles"] = rdMolDescriptors.CalcNumHeterocycles( mol) df.at[i, "CalcNumLipinskiHBA"] = rdMolDescriptors.CalcNumLipinskiHBA(mol) df.at[i, "CalcNumLipinskiHBD"] = rdMolDescriptors.CalcNumLipinskiHBD(mol) df.at[i, "CalcNumRings"] = rdMolDescriptors.CalcNumRings(mol) df.at[ i, "CalcNumSaturatedCarbocycles"] = rdMolDescriptors.CalcNumSaturatedCarbocycles( mol) df.at[ i, "CalcNumSaturatedHeterocycles"] = rdMolDescriptors.CalcNumSaturatedHeterocycles( mol) df.at[ i, "CalcNumSaturatedRings"] = rdMolDescriptors.CalcNumSaturatedRings( mol) df.at[i, "CalcNumSpiroAtoms"] = rdMolDescriptors.CalcNumSpiroAtoms(mol) df.at[i, "CalcTPSA"] = rdMolDescriptors.CalcTPSA(mol) return (df)
def numBridgeheadsAndSpiro(mol, ri=None): nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(mol) nBridgehead = rdMolDescriptors.CalcNumBridgeheadAtoms(mol) return nBridgehead, nSpiro
def __call__(self, smile): if _fscores is None: self.readFragmentScores() m = Chem.MolFromSmiles(smile) if m: try: # fragment score fp = rdMolDescriptors.GetMorganFingerprint( m, 2) # <- 2 is the *radius* of the circular fingerprint fps = fp.GetNonzeroElements() score1 = 0.0 nf = 0 for bitId, v in iteritems(fps): nf += v sfp = bitId score1 += _fscores.get(sfp, -4) * v score1 /= nf # features score nAtoms = m.GetNumAtoms() nChiralCenters = len( Chem.FindMolChiralCenters(m, includeUnassigned=True)) ri = m.GetRingInfo() nBridgeheads = rdMolDescriptors.CalcNumBridgeheadAtoms(m) nSpiro = nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(m) nMacrocycles = 0 for x in ri.AtomRings(): if len(x) > 8: nMacrocycles += 1 sizePenalty = nAtoms**1.005 - nAtoms stereoPenalty = math.log10(nChiralCenters + 1) spiroPenalty = math.log10(nSpiro + 1) bridgePenalty = math.log10(nBridgeheads + 1) macrocyclePenalty = 0.0 # --------------------------------------- # This differs from the paper, which defines: # macrocyclePenalty = math.log10(nMacrocycles+1) # This form generates better results when 2 or more macrocycles are present if nMacrocycles > 0: macrocyclePenalty = math.log10(2) score2 = (0.0 - sizePenalty - stereoPenalty - spiroPenalty - bridgePenalty - macrocyclePenalty) # correction for the fingerprint density # not in the original publication, added in version 1.1 # to make highly symmetrical molecules easier to synthetise score3 = 0.0 if nAtoms > len(fps): score3 = math.log(float(nAtoms) / len(fps)) * 0.5 sascore = score1 + score2 + score3 # need to transform "raw" value into scale between 1 and 10 min_score = -4.0 max_score = 2.5 sascore = (11.0 - (sascore - min_score + 1) / (max_score - min_score) * 9.0) # smooth the 10-end if sascore > 8.0: sascore = 8.0 + math.log(sascore + 1.0 - 9.0) if sascore > 10.0: sascore = 10.0 elif sascore < 1.0: sascore = 1.0 sascore = math.exp(1 - sascore) # minimize the sascore return sascore except: return 0.0 else: return 0.0