Ejemplo n.º 1
0
def get_molecular_features(dataframe, mol_list):
    df = dataframe
    for i in range(len(mol_list)):
        print("Getting molecular features for molecule: ", i)
        mol = mol_list[i]
        natoms = mol.GetNumAtoms()
        nbonds = mol.GetNumBonds()
        mw = Descriptors.ExactMolWt(mol)
        df.at[i, "NbrAtoms"] = natoms
        df.at[i, "NbrBonds"] = nbonds
        df.at[i, "mw"] = mw
        df.at[i, 'HeavyAtomMolWt'] = Chem.Descriptors.HeavyAtomMolWt(mol)
        df.at[i, 'NumValenceElectrons'] = Chem.Descriptors.NumValenceElectrons(
            mol)
        ''' # These four descriptors are producing the value of infinity for refcode_csd = YOLJUF (CCOP(=O)(Cc1ccc(cc1)NC(=S)NP(OC(C)C)(OC(C)C)[S])OCC\t\n)
        df.at[i,'MaxAbsPartialCharge'] = Chem.Descriptors.MaxAbsPartialCharge(mol)
        df.at[i,'MaxPartialCharge'] = Chem.Descriptors.MaxPartialCharge(mol)
        df.at[i,'MinAbsPartialCharge'] = Chem.Descriptors.MinAbsPartialCharge(mol)
        df.at[i,'MinPartialCharge'] = Chem.Descriptors.MinPartialCharge(mol)
        '''
        df.at[i, 'FpDensityMorgan1'] = Chem.Descriptors.FpDensityMorgan1(mol)
        df.at[i, 'FpDensityMorgan2'] = Chem.Descriptors.FpDensityMorgan2(mol)
        df.at[i, 'FpDensityMorgan3'] = Chem.Descriptors.FpDensityMorgan3(mol)

        #print(natoms, nbonds)

        # Now get some specific features
        fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
        factory = ChemicalFeatures.BuildFeatureFactory(fdefName)
        feats = factory.GetFeaturesForMol(mol)
        #df["Acceptor"] = 0
        #df["Aromatic"] = 0
        #df["Hydrophobe"] = 0
        nbrAcceptor = 0
        nbrDonor = 0
        nbrHydrophobe = 0
        nbrLumpedHydrophobe = 0
        nbrPosIonizable = 0
        nbrNegIonizable = 0
        for j in range(len(feats)):
            #print(feats[j].GetFamily(), feats[j].GetType())
            if ('Acceptor' == (feats[j].GetFamily())):
                nbrAcceptor = nbrAcceptor + 1
            elif ('Donor' == (feats[j].GetFamily())):
                nbrDonor = nbrDonor + 1
            elif ('Hydrophobe' == (feats[j].GetFamily())):
                nbrHydrophobe = nbrHydrophobe + 1
            elif ('LumpedHydrophobe' == (feats[j].GetFamily())):
                nbrLumpedHydrophobe = nbrLumpedHydrophobe + 1
            elif ('PosIonizable' == (feats[j].GetFamily())):
                nbrPosIonizable = nbrPosIonizable + 1
            elif ('NegIonizable' == (feats[j].GetFamily())):
                nbrNegIonizable = nbrNegIonizable + 1
            else:
                pass  #print(feats[j].GetFamily())

        df.at[i, "Acceptor"] = nbrAcceptor
        df.at[i, "Donor"] = nbrDonor
        df.at[i, "Hydrophobe"] = nbrHydrophobe
        df.at[i, "LumpedHydrophobe"] = nbrLumpedHydrophobe
        df.at[i, "PosIonizable"] = nbrPosIonizable
        df.at[i, "NegIonizable"] = nbrNegIonizable

        # We can also get some more molecular features using rdMolDescriptors

        df.at[i, "NumRotatableBonds"] = rdMolDescriptors.CalcNumRotatableBonds(
            mol)
        df.at[i, "CalcChi0n"] = rdMolDescriptors.CalcChi0n(mol)
        df.at[i, "CalcChi0v"] = rdMolDescriptors.CalcChi0v(mol)
        df.at[i, "CalcChi1n"] = rdMolDescriptors.CalcChi1n(mol)
        df.at[i, "CalcChi1v"] = rdMolDescriptors.CalcChi1v(mol)
        df.at[i, "CalcChi2n"] = rdMolDescriptors.CalcChi2n(mol)
        df.at[i, "CalcChi2v"] = rdMolDescriptors.CalcChi2v(mol)
        df.at[i, "CalcChi3n"] = rdMolDescriptors.CalcChi3n(mol)
        #df.at[i,"CalcChi3v"] = rdMolDescriptors.CalcChi3v(mol)
        df.at[i, "CalcChi4n"] = rdMolDescriptors.CalcChi4n(mol)
        df.at[i, "CalcChi4v"] = rdMolDescriptors.CalcChi4v(mol)
        df.at[i, "CalcFractionCSP3"] = rdMolDescriptors.CalcFractionCSP3(mol)
        df.at[i, "CalcHallKierAlpha"] = rdMolDescriptors.CalcHallKierAlpha(mol)
        df.at[i, "CalcKappa1"] = rdMolDescriptors.CalcKappa1(mol)
        df.at[i, "CalcKappa2"] = rdMolDescriptors.CalcKappa2(mol)
        #df.at[i,"CalcKappa3"] = rdMolDescriptors.CalcKappa3(mol)
        df.at[i, "CalcLabuteASA"] = rdMolDescriptors.CalcLabuteASA(mol)
        df.at[
            i,
            "CalcNumAliphaticCarbocycles"] = rdMolDescriptors.CalcNumAliphaticCarbocycles(
                mol)
        df.at[
            i,
            "CalcNumAliphaticHeterocycles"] = rdMolDescriptors.CalcNumAliphaticHeterocycles(
                mol)
        df.at[
            i,
            "CalcNumAliphaticRings"] = rdMolDescriptors.CalcNumAliphaticRings(
                mol)
        df.at[i, "CalcNumAmideBonds"] = rdMolDescriptors.CalcNumAmideBonds(mol)
        df.at[
            i,
            "CalcNumAromaticCarbocycles"] = rdMolDescriptors.CalcNumAromaticCarbocycles(
                mol)
        df.at[
            i,
            "CalcNumAromaticHeterocycles"] = rdMolDescriptors.CalcNumAromaticHeterocycles(
                mol)
        df.at[i,
              "CalcNumAromaticRings"] = rdMolDescriptors.CalcNumAromaticRings(
                  mol)
        df.at[
            i,
            "CalcNumBridgeheadAtoms"] = rdMolDescriptors.CalcNumBridgeheadAtoms(
                mol)
        df.at[i, "CalcNumHBA"] = rdMolDescriptors.CalcNumHBA(mol)
        df.at[i, "CalcNumHBD"] = rdMolDescriptors.CalcNumHBD(mol)
        df.at[i,
              "CalcNumHeteroatoms"] = rdMolDescriptors.CalcNumHeteroatoms(mol)
        df.at[i, "CalcNumHeterocycles"] = rdMolDescriptors.CalcNumHeterocycles(
            mol)
        df.at[i,
              "CalcNumLipinskiHBA"] = rdMolDescriptors.CalcNumLipinskiHBA(mol)
        df.at[i,
              "CalcNumLipinskiHBD"] = rdMolDescriptors.CalcNumLipinskiHBD(mol)
        df.at[i, "CalcNumRings"] = rdMolDescriptors.CalcNumRings(mol)
        df.at[
            i,
            "CalcNumSaturatedCarbocycles"] = rdMolDescriptors.CalcNumSaturatedCarbocycles(
                mol)
        df.at[
            i,
            "CalcNumSaturatedHeterocycles"] = rdMolDescriptors.CalcNumSaturatedHeterocycles(
                mol)
        df.at[
            i,
            "CalcNumSaturatedRings"] = rdMolDescriptors.CalcNumSaturatedRings(
                mol)
        df.at[i, "CalcNumSpiroAtoms"] = rdMolDescriptors.CalcNumSpiroAtoms(mol)
        df.at[i, "CalcTPSA"] = rdMolDescriptors.CalcTPSA(mol)
    return (df)
Ejemplo n.º 2
0
def numBridgeheadsAndSpiro(mol, ri=None):
    nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(mol)
    nBridgehead = rdMolDescriptors.CalcNumBridgeheadAtoms(mol)
    return nBridgehead, nSpiro
Ejemplo n.º 3
0
    def __call__(self, smile):
        if _fscores is None:
            self.readFragmentScores()
        m = Chem.MolFromSmiles(smile)
        if m:
            try:
                # fragment score
                fp = rdMolDescriptors.GetMorganFingerprint(
                    m, 2)  # <- 2 is the *radius* of the circular fingerprint
                fps = fp.GetNonzeroElements()
                score1 = 0.0
                nf = 0
                for bitId, v in iteritems(fps):
                    nf += v
                    sfp = bitId
                    score1 += _fscores.get(sfp, -4) * v
                score1 /= nf

                # features score
                nAtoms = m.GetNumAtoms()
                nChiralCenters = len(
                    Chem.FindMolChiralCenters(m, includeUnassigned=True))
                ri = m.GetRingInfo()
                nBridgeheads = rdMolDescriptors.CalcNumBridgeheadAtoms(m)
                nSpiro = nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(m)
                nMacrocycles = 0
                for x in ri.AtomRings():
                    if len(x) > 8:
                        nMacrocycles += 1

                sizePenalty = nAtoms**1.005 - nAtoms
                stereoPenalty = math.log10(nChiralCenters + 1)
                spiroPenalty = math.log10(nSpiro + 1)
                bridgePenalty = math.log10(nBridgeheads + 1)
                macrocyclePenalty = 0.0
                # ---------------------------------------
                # This differs from the paper, which defines:
                #  macrocyclePenalty = math.log10(nMacrocycles+1)
                # This form generates better results when 2 or more macrocycles are present
                if nMacrocycles > 0:
                    macrocyclePenalty = math.log10(2)
                score2 = (0.0 - sizePenalty - stereoPenalty - spiroPenalty -
                          bridgePenalty - macrocyclePenalty)
                # correction for the fingerprint density
                # not in the original publication, added in version 1.1
                # to make highly symmetrical molecules easier to synthetise
                score3 = 0.0
                if nAtoms > len(fps):
                    score3 = math.log(float(nAtoms) / len(fps)) * 0.5
                sascore = score1 + score2 + score3

                # need to transform "raw" value into scale between 1 and 10
                min_score = -4.0
                max_score = 2.5
                sascore = (11.0 - (sascore - min_score + 1) /
                           (max_score - min_score) * 9.0)
                # smooth the 10-end
                if sascore > 8.0:
                    sascore = 8.0 + math.log(sascore + 1.0 - 9.0)
                if sascore > 10.0:
                    sascore = 10.0
                elif sascore < 1.0:
                    sascore = 1.0
                sascore = math.exp(1 - sascore)  # minimize the sascore
                return sascore
            except:
                return 0.0
        else:
            return 0.0