예제 #1
0
 def testIssue80(self):
     from rdkit.Chem import Lipinski
     m = Chem.MolFromSmiles('CCOC')
     ref = Crippen.MolLogP(m)
     Lipinski.NHOHCount(m)
     probe = Crippen.MolLogP(m)
     self.assertTrue(probe == ref)
예제 #2
0
def lipinski_rule(mol):
    fingerprint = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol)
    return [
        Lipinski.NHOHCount(mol) <= 5,
        Lipinski.NOCount(mol) <= 10,
        Descriptors.ExactMolWt(mol) <= 500,
        LogP('logP').run(fingerprint) <= 5]
예제 #3
0
def get_descriptors(mol, write=False):
    # Make a copy of the molecule dataframe
    desc = [
        Lipinski.NumAromaticHeterocycles(mol),
        Lipinski.NumAromaticRings(mol),
        Lipinski.NumHDonors(mol),
        Lipinski.RingCount(mol),
        Lipinski.NHOHCount(mol),
        Lipinski.NumHeteroatoms(mol),
        Lipinski.NumAliphaticCarbocycles(mol),
        Lipinski.NumSaturatedCarbocycles(mol),
        Lipinski.NumAliphaticHeterocycles(mol),
        Lipinski.NumHAcceptors(mol),
        Lipinski.NumSaturatedHeterocycles(mol),
        Lipinski.NumAliphaticRings(mol),
        Descriptors.NumRadicalElectrons(mol),
        Descriptors.MaxPartialCharge(mol),
        Descriptors.NumValenceElectrons(mol),
        Lipinski.FractionCSP3(mol),
        Descriptors.MaxAbsPartialCharge(mol),
        Lipinski.NumAromaticCarbocycles(mol),
        Lipinski.NumSaturatedRings(mol),
        Lipinski.NumRotatableBonds(mol)
    ]

    desc = [0 if i != i else i for i in desc]
    return desc
예제 #4
0
    def test1(self):
        " testing first 200 mols from NCI "
        # figure out which rotor version we are using
        m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C")
        if Lipinski.NumRotatableBonds(m) == 2:
            rot_prop = NonStrict
        else:
            rot_prop = Strict

        suppl = Chem.SDMolSupplier(self.inFileName)
        idx = 1
        for m in suppl:
            if m:
                calc = Lipinski.NHOHCount(m)
                orig = int(m.GetProp('NUM_LIPINSKIHDONORS'))
                assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NOCount(m)
                orig = int(m.GetProp('NUM_LIPINSKIHACCEPTORS'))
                assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHDonors(m)
                orig = int(m.GetProp('NUM_HDONORS'))
                assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHAcceptors(m)
                orig = int(m.GetProp('NUM_HACCEPTORS'))
                assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHeteroatoms(m)
                orig = int(m.GetProp('NUM_HETEROATOMS'))
                assert calc == orig, 'bad num heteroatoms for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumRotatableBonds(m)
                orig = int(m.GetProp(rot_prop))
                assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                # test the underlying numrotatable bonds
                calc = rdMolDescriptors.CalcNumRotatableBonds(
                    m, rdMolDescriptors.NumRotatableBondsOptions.NonStrict)
                orig = int(m.GetProp(NonStrict))
                assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = rdMolDescriptors.CalcNumRotatableBonds(
                    m, rdMolDescriptors.NumRotatableBondsOptions.Strict)
                orig = int(m.GetProp(Strict))
                assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

            idx += 1
예제 #5
0
def auto_sampling(mult_factor, mol, log):
    auto_samples = 0
    auto_samples += 3 * (Lipinski.NumRotatableBonds(mol)
                         )  # x3, for C3 rotations
    auto_samples += 3 * (Lipinski.NHOHCount(mol))  # x3, for OH/NH rotations
    auto_samples += 3 * (Lipinski.NumSaturatedRings(mol)
                         )  # x3, for boat/chair/envelope confs
    if auto_samples == 0:
        auto_samples = mult_factor
    else:
        auto_samples = mult_factor * auto_samples
    return auto_samples
예제 #6
0
def auto_sampling(mult_factor,mol,args,log):
	if args.metal_complex:
		if len(args.metal_idx) > 0:
			mult_factor = mult_factor*3*len(args.metal_idx) # this accounts for possible trans/cis isomers in metal complexes
	auto_samples = 0
	auto_samples += 3*(Lipinski.NumRotatableBonds(mol)) # x3, for C3 rotations
	auto_samples += 3*(Lipinski.NHOHCount(mol)) # x3, for OH/NH rotations
	auto_samples += 3*(Lipinski.NumSaturatedRings(mol)) # x3, for boat/chair/envelope confs
	if auto_samples == 0:
		auto_samples = mult_factor
	else:
		auto_samples = mult_factor*auto_samples
	return auto_samples
 def descriptors(self, mol):
     aromatic_frac = self.arofrac(mol)
     mw = Descriptors.ExactMolWt(mol, False)
     valence_e = Descriptors.NumValenceElectrons(mol)
     h_acceptors = Lipinski.NumHAcceptors(mol)
     h_donors = Lipinski.NumHDonors(mol)
     NO_counts = Lipinski.NOCount(mol)
     NHOH_count = Lipinski.NHOHCount(mol)
     rotors = Lipinski.NumRotatableBonds(mol)
     SP3_frac = Lipinski.FractionCSP3(mol)
     logP = Crippen.MolLogP(mol)
     SP_bonds = len(mol.GetSubstructMatches(Chem.MolFromSmarts('[^1]')))
     return([aromatic_frac,mw,valence_e,h_acceptors,h_donors,NO_counts,NHOH_count, rotors,SP3_frac,logP,SP_bonds])
예제 #8
0
def CalculateNumLipinskiHDonors(mol):
    """
    Caculation of the number of Hydrogen Bond Donors
    (nitrogen–hydrogen and oxygen–hydrogen bonds)
    
    --->nLipinskiHD
    
    :param mol: molecular
    :type mol: rdkit.Chem.rdchem.Mol
    :return: the number of Hydrogen Bond Donors
    :rtype: int
    
    """
    nLipinskiHD = Lipinski.NHOHCount(mol)
    return nLipinskiHD
예제 #9
0
    def test1(self):
        " testing first 200 mols from NCI "
        suppl = Chem.SDMolSupplier(self.inFileName)
        idx = 1
        oldDonorSmarts = Chem.MolFromSmarts('[NH1,NH2,OH1]')
        OldDonorCount = lambda x, y=oldDonorSmarts: Lipinski._NumMatches(x, y)
        oldAcceptorSmarts = Chem.MolFromSmarts('[N,O]')
        OldAcceptorCount = lambda x, y=oldAcceptorSmarts: Lipinski._NumMatches(
            x, y)
        for m in suppl:
            if m:
                calc = Lipinski.NHOHCount(m)
                orig = int(m.GetProp('NUM_LIPINSKIHDONORS'))
                assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NOCount(m)
                orig = int(m.GetProp('NUM_LIPINSKIHACCEPTORS'))
                assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHDonors(m)
                orig = int(m.GetProp('NUM_HDONORS'))
                assert calc == orig, 'bad num h donors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHAcceptors(m)
                orig = int(m.GetProp('NUM_HACCEPTORS'))
                assert calc == orig, 'bad num h acceptors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumHeteroatoms(m)
                orig = int(m.GetProp('NUM_HETEROATOMS'))
                assert calc == orig, 'bad num heteroatoms for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)

                calc = Lipinski.NumRotatableBonds(m)
                orig = int(m.GetProp('NUM_ROTATABLEBONDS'))
                assert calc == orig, 'bad num rotors for mol %d (%s): %d != %d' % (
                    idx, m.GetProp('SMILES'), calc, orig)
            idx += 1
예제 #10
0
    def extract(x, from_smiles):
        if from_smiles:
            mol = Chem.MolFromSmiles(x)
        else:
            mol = x

        if (mol is None) or (len(mol.GetAtoms()) == 0):
            if include_3D:
                return [0] * 29
            else:
                return [0] * 24
        else:
            logP = Crippen.MolLogP(mol)
            refractivity = Crippen.MolMR(mol)

            weight = Descriptors.MolWt(mol)
            exact_weight = Descriptors.ExactMolWt(mol)
            heavy_weight = Descriptors.HeavyAtomMolWt(mol)
            heavy_count = Lipinski.HeavyAtomCount(mol)
            nhoh_count = Lipinski.NHOHCount(mol)
            no_count = Lipinski.NOCount(mol)
            hacceptor_count = Lipinski.NumHAcceptors(mol)
            hdonor_count = Lipinski.NumHDonors(mol)
            hetero_count = Lipinski.NumHeteroatoms(mol)
            rotatable_bond_count = Lipinski.NumRotatableBonds(mol)
            valance_electron_count = Descriptors.NumValenceElectrons(mol)
            amide_bond_count = rdMolDescriptors.CalcNumAmideBonds(mol)
            aliphatic_ring_count = Lipinski.NumAliphaticRings(mol)
            aromatic_ring_count = Lipinski.NumAromaticRings(mol)
            saturated_ring_count = Lipinski.NumSaturatedRings(mol)
            aliphatic_cycle_count = Lipinski.NumAliphaticCarbocycles(mol)
            aliphaticHetero_cycle_count = Lipinski.NumAliphaticHeterocycles(
                mol)
            aromatic_cycle_count = Lipinski.NumAromaticCarbocycles(mol)
            aromaticHetero_cycle_count = Lipinski.NumAromaticHeterocycles(mol)
            saturated_cycle_count = Lipinski.NumSaturatedCarbocycles(mol)
            saturatedHetero_cycle_count = Lipinski.NumSaturatedHeterocycles(
                mol)

            tpsa = rdMolDescriptors.CalcTPSA(mol)

            if include_3D:
                mol_3D = Chem.AddHs(mol)
                AllChem.EmbedMolecule(mol_3D)
                AllChem.MMFFOptimizeMolecule(mol_3D)
                eccentricity = rdMolDescriptors.CalcEccentricity(mol_3D)
                asphericity = rdMolDescriptors.CalcAsphericity(mol_3D)
                spherocity = rdMolDescriptors.CalcSpherocityIndex(mol_3D)
                inertial = rdMolDescriptors.CalcInertialShapeFactor(mol_3D)
                gyration = rdMolDescriptors.CalcRadiusOfGyration(mol_3D)

                return [
                    logP, refractivity, weight, exact_weight, heavy_weight,
                    heavy_count, nhoh_count, no_count, hacceptor_count,
                    hdonor_count, hetero_count, rotatable_bond_count,
                    valance_electron_count, amide_bond_count,
                    aliphatic_ring_count, aromatic_ring_count,
                    saturated_ring_count, aliphatic_cycle_count,
                    aliphaticHetero_cycle_count, aromatic_cycle_count,
                    aromaticHetero_cycle_count, saturated_cycle_count,
                    saturatedHetero_cycle_count, tpsa, eccentricity,
                    asphericity, spherocity, inertial, gyration
                ]
            else:
                return [
                    logP, refractivity, weight, exact_weight, heavy_weight,
                    heavy_count, nhoh_count, no_count, hacceptor_count,
                    hdonor_count, hetero_count, rotatable_bond_count,
                    valance_electron_count, amide_bond_count,
                    aliphatic_ring_count, aromatic_ring_count,
                    saturated_ring_count, aliphatic_cycle_count,
                    aliphaticHetero_cycle_count, aromatic_cycle_count,
                    aromaticHetero_cycle_count, saturated_cycle_count,
                    saturatedHetero_cycle_count, tpsa
                ]
예제 #11
0
def calc_rdkit(mol):
    descriptors = pd.Series(
        np.array([
            Crippen.MolLogP(mol),
            Crippen.MolMR(mol),
            Descriptors.FpDensityMorgan1(mol),
            Descriptors.FpDensityMorgan2(mol),
            Descriptors.FpDensityMorgan3(mol),
            Descriptors.FractionCSP3(mol),
            Descriptors.HeavyAtomMolWt(mol),
            Descriptors.MaxAbsPartialCharge(mol),
            Descriptors.MaxPartialCharge(mol),
            Descriptors.MinAbsPartialCharge(mol),
            Descriptors.MinPartialCharge(mol),
            Descriptors.MolWt(mol),
            Descriptors.NumRadicalElectrons(mol),
            Descriptors.NumValenceElectrons(mol),
            EState.EState.MaxAbsEStateIndex(mol),
            EState.EState.MaxEStateIndex(mol),
            EState.EState.MinAbsEStateIndex(mol),
            EState.EState.MinEStateIndex(mol),
            EState.EState_VSA.EState_VSA1(mol),
            EState.EState_VSA.EState_VSA10(mol),
            EState.EState_VSA.EState_VSA11(mol),
            EState.EState_VSA.EState_VSA2(mol),
            EState.EState_VSA.EState_VSA3(mol),
            EState.EState_VSA.EState_VSA4(mol),
            EState.EState_VSA.EState_VSA5(mol),
            EState.EState_VSA.EState_VSA6(mol),
            EState.EState_VSA.EState_VSA7(mol),
            EState.EState_VSA.EState_VSA8(mol),
            EState.EState_VSA.EState_VSA9(mol),
            Fragments.fr_Al_COO(mol),
            Fragments.fr_Al_OH(mol),
            Fragments.fr_Al_OH_noTert(mol),
            Fragments.fr_aldehyde(mol),
            Fragments.fr_alkyl_carbamate(mol),
            Fragments.fr_alkyl_halide(mol),
            Fragments.fr_allylic_oxid(mol),
            Fragments.fr_amide(mol),
            Fragments.fr_amidine(mol),
            Fragments.fr_aniline(mol),
            Fragments.fr_Ar_COO(mol),
            Fragments.fr_Ar_N(mol),
            Fragments.fr_Ar_NH(mol),
            Fragments.fr_Ar_OH(mol),
            Fragments.fr_ArN(mol),
            Fragments.fr_aryl_methyl(mol),
            Fragments.fr_azide(mol),
            Fragments.fr_azo(mol),
            Fragments.fr_barbitur(mol),
            Fragments.fr_benzene(mol),
            Fragments.fr_benzodiazepine(mol),
            Fragments.fr_bicyclic(mol),
            Fragments.fr_C_O(mol),
            Fragments.fr_C_O_noCOO(mol),
            Fragments.fr_C_S(mol),
            Fragments.fr_COO(mol),
            Fragments.fr_COO2(mol),
            Fragments.fr_diazo(mol),
            Fragments.fr_dihydropyridine(mol),
            Fragments.fr_epoxide(mol),
            Fragments.fr_ester(mol),
            Fragments.fr_ether(mol),
            Fragments.fr_furan(mol),
            Fragments.fr_guanido(mol),
            Fragments.fr_halogen(mol),
            Fragments.fr_hdrzine(mol),
            Fragments.fr_hdrzone(mol),
            Fragments.fr_HOCCN(mol),
            Fragments.fr_imidazole(mol),
            Fragments.fr_imide(mol),
            Fragments.fr_Imine(mol),
            Fragments.fr_isocyan(mol),
            Fragments.fr_isothiocyan(mol),
            Fragments.fr_ketone(mol),
            Fragments.fr_ketone_Topliss(mol),
            Fragments.fr_lactam(mol),
            Fragments.fr_lactone(mol),
            Fragments.fr_methoxy(mol),
            Fragments.fr_morpholine(mol),
            Fragments.fr_N_O(mol),
            Fragments.fr_Ndealkylation1(mol),
            Fragments.fr_Ndealkylation2(mol),
            Fragments.fr_NH0(mol),
            Fragments.fr_NH1(mol),
            Fragments.fr_NH2(mol),
            Fragments.fr_Nhpyrrole(mol),
            Fragments.fr_nitrile(mol),
            Fragments.fr_nitro(mol),
            Fragments.fr_nitro_arom(mol),
            Fragments.fr_nitro_arom_nonortho(mol),
            Fragments.fr_nitroso(mol),
            Fragments.fr_oxazole(mol),
            Fragments.fr_oxime(mol),
            Fragments.fr_para_hydroxylation(mol),
            Fragments.fr_phenol(mol),
            Fragments.fr_phenol_noOrthoHbond(mol),
            Fragments.fr_phos_acid(mol),
            Fragments.fr_phos_ester(mol),
            Fragments.fr_piperdine(mol),
            Fragments.fr_piperzine(mol),
            Fragments.fr_priamide(mol),
            Fragments.fr_prisulfonamd(mol),
            Fragments.fr_pyridine(mol),
            Fragments.fr_quatN(mol),
            Fragments.fr_SH(mol),
            Fragments.fr_sulfide(mol),
            Fragments.fr_sulfonamd(mol),
            Fragments.fr_sulfone(mol),
            Fragments.fr_term_acetylene(mol),
            Fragments.fr_tetrazole(mol),
            Fragments.fr_thiazole(mol),
            Fragments.fr_thiocyan(mol),
            Fragments.fr_thiophene(mol),
            Fragments.fr_unbrch_alkane(mol),
            Fragments.fr_urea(mol),
            GraphDescriptors.BalabanJ(mol),
            GraphDescriptors.BertzCT(mol),
            GraphDescriptors.Chi0(mol),
            GraphDescriptors.Chi0n(mol),
            GraphDescriptors.Chi0v(mol),
            GraphDescriptors.Chi1(mol),
            GraphDescriptors.Chi1n(mol),
            GraphDescriptors.Chi1v(mol),
            GraphDescriptors.Chi2n(mol),
            GraphDescriptors.Chi2v(mol),
            GraphDescriptors.Chi3n(mol),
            GraphDescriptors.Chi3v(mol),
            GraphDescriptors.Chi4n(mol),
            GraphDescriptors.Chi4v(mol),
            GraphDescriptors.HallKierAlpha(mol),
            GraphDescriptors.Ipc(mol),
            GraphDescriptors.Kappa1(mol),
            GraphDescriptors.Kappa2(mol),
            GraphDescriptors.Kappa3(mol),
            Lipinski.HeavyAtomCount(mol),
            Lipinski.NHOHCount(mol),
            Lipinski.NOCount(mol),
            Lipinski.NumAliphaticCarbocycles(mol),
            Lipinski.NumAliphaticHeterocycles(mol),
            Lipinski.NumAliphaticRings(mol),
            Lipinski.NumAromaticCarbocycles(mol),
            Lipinski.NumAromaticHeterocycles(mol),
            Lipinski.NumAromaticRings(mol),
            Lipinski.NumHAcceptors(mol),
            Lipinski.NumHDonors(mol),
            Lipinski.NumHeteroatoms(mol),
            Lipinski.NumRotatableBonds(mol),
            Lipinski.NumSaturatedCarbocycles(mol),
            Lipinski.NumSaturatedHeterocycles(mol),
            Lipinski.NumSaturatedRings(mol),
            Lipinski.RingCount(mol),
            MolSurf.LabuteASA(mol),
            MolSurf.PEOE_VSA1(mol),
            MolSurf.PEOE_VSA10(mol),
            MolSurf.PEOE_VSA11(mol),
            MolSurf.PEOE_VSA12(mol),
            MolSurf.PEOE_VSA13(mol),
            MolSurf.PEOE_VSA14(mol),
            MolSurf.PEOE_VSA2(mol),
            MolSurf.PEOE_VSA3(mol),
            MolSurf.PEOE_VSA4(mol),
            MolSurf.PEOE_VSA5(mol),
            MolSurf.PEOE_VSA6(mol),
            MolSurf.PEOE_VSA7(mol),
            MolSurf.PEOE_VSA8(mol),
            MolSurf.PEOE_VSA9(mol),
            MolSurf.SlogP_VSA1(mol),
            MolSurf.SlogP_VSA10(mol),
            MolSurf.SlogP_VSA11(mol),
            MolSurf.SlogP_VSA12(mol),
            MolSurf.SlogP_VSA2(mol),
            MolSurf.SlogP_VSA3(mol),
            MolSurf.SlogP_VSA4(mol),
            MolSurf.SlogP_VSA5(mol),
            MolSurf.SlogP_VSA6(mol),
            MolSurf.SlogP_VSA7(mol),
            MolSurf.SlogP_VSA8(mol),
            MolSurf.SlogP_VSA9(mol),
            MolSurf.SMR_VSA1(mol),
            MolSurf.SMR_VSA10(mol),
            MolSurf.SMR_VSA2(mol),
            MolSurf.SMR_VSA3(mol),
            MolSurf.SMR_VSA4(mol),
            MolSurf.SMR_VSA5(mol),
            MolSurf.SMR_VSA6(mol),
            MolSurf.SMR_VSA7(mol),
            MolSurf.SMR_VSA8(mol),
            MolSurf.SMR_VSA9(mol),
            MolSurf.TPSA(mol)
        ]))
    return descriptors
# Sets up a list to hold the index of compounds that violate two of Lipinski's
# rules of five. The counter is to track that index number.
lipinski_violators = []
counter = 0

print("Scanning molecules for Lipinski violations.")
for mol in tqdm(mols):
    # Assume no violations
    dono_viol = False
    acceptor_viol = False
    mw_viol = False
    logp_viol = False

    # Use RDKit functions to get hdonors, acceptors, molecular weight and
    # logP.
    hdonors = Lipinski.NHOHCount(mol)
    hacceptors = Lipinski.NOCount(mol)
    mw = rdMolDescriptors.CalcExactMolWt(mol)
    logp = Crippen.MolLogP(mol)

    # Make the checks if the current mol actually violates a role.
    if hdonors > 5:
        dono_viol = True
    if hacceptors > 10:
        acceptor_viol = True
    if mw > 500:
        mw_viol = True
    if logp > 5:
        logp_viol = True

    # Check if the violation sum is greater than one and assign the molecule
예제 #13
0
for line in data.readlines():
    split = line.split(' ')

    mol = Chem.MolFromSmiles(split[0])
    fingerprint = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol)

    pKa = acid_model.run(GetAvalonFP(mol) +
                         MACCSkeys.GenMACCSKeys(mol) +
                         fingerprint)
    sim_pKa = sim_model.run(split[0], acids)

    X.append([pKa,
              sim_pKa,
              Lipinski.NumHDonors(mol),
              Lipinski.NumHAcceptors(mol),
              Lipinski.NHOHCount(mol)])

    Y.append(float(split[1][:-1]))

scaler = preprocessing.StandardScaler()
X = scaler.fit_transform(np.asarray(X))
Y = np.asarray(Y)

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1,
                                                    random_state=1)

model = MLPRegressor(solver='adam', alpha=1e-5, hidden_layer_sizes=(1048, 128),
                     random_state=1, verbose=1, max_iter=1000, batch_size=500)
model.fit(X_train, y_train)

print(model.score(X_test, y_test))