def testChi1v(self):
    data = [('CCCCCC', 2.914), ('CCC(C)CC', 2.808), ('CC(C)CCC', 2.770), ('CC(C)C(C)C', 2.643),
            ('CC(C)(C)CC', 2.561), ('CCCCCO', 2.523), ('CCC(O)CC', 2.489), ('CC(O)(C)CC', 2.284),
            ('c1ccccc1O', 2.134)]
    for smi, res in data:
      m = Chem.MolFromSmiles(smi)
      chi = GraphDescriptors.Chi1v(m)
      assert feq(chi, res, 1e-3), 'mol %s (Chi1v=%f) should have Chi1V=%f' % (smi, chi, res)

    if doLong:
      self.__testDesc('PP_descrs_regress.rest.2.csv', 10, GraphDescriptors.Chi1v)
Beispiel #2
0
  def testChi1v(self):
    """ test calculation of Chi1v

    """
    data = [('CCCCCC',2.914),('CCC(C)CC',2.808),('CC(C)CCC',2.770),
            ('CC(C)C(C)C',2.643),('CC(C)(C)CC',2.561),
            ('CCCCCO',2.523),('CCC(O)CC',2.489),('CC(O)(C)CC',2.284),('c1ccccc1O',2.134)]
    for smi,res in data:
      m = Chem.MolFromSmiles(smi)
      chi = GraphDescriptors.Chi1v(m)
      assert feq(chi,res,1e-3),'mol %s (Chi1v=%f) should have Chi1V=%f'%(smi,chi,res)
Beispiel #3
0
def calc_rdkit(mol):
    descriptors = pd.Series(
        np.array([
            Crippen.MolLogP(mol),
            Crippen.MolMR(mol),
            Descriptors.FpDensityMorgan1(mol),
            Descriptors.FpDensityMorgan2(mol),
            Descriptors.FpDensityMorgan3(mol),
            Descriptors.FractionCSP3(mol),
            Descriptors.HeavyAtomMolWt(mol),
            Descriptors.MaxAbsPartialCharge(mol),
            Descriptors.MaxPartialCharge(mol),
            Descriptors.MinAbsPartialCharge(mol),
            Descriptors.MinPartialCharge(mol),
            Descriptors.MolWt(mol),
            Descriptors.NumRadicalElectrons(mol),
            Descriptors.NumValenceElectrons(mol),
            EState.EState.MaxAbsEStateIndex(mol),
            EState.EState.MaxEStateIndex(mol),
            EState.EState.MinAbsEStateIndex(mol),
            EState.EState.MinEStateIndex(mol),
            EState.EState_VSA.EState_VSA1(mol),
            EState.EState_VSA.EState_VSA10(mol),
            EState.EState_VSA.EState_VSA11(mol),
            EState.EState_VSA.EState_VSA2(mol),
            EState.EState_VSA.EState_VSA3(mol),
            EState.EState_VSA.EState_VSA4(mol),
            EState.EState_VSA.EState_VSA5(mol),
            EState.EState_VSA.EState_VSA6(mol),
            EState.EState_VSA.EState_VSA7(mol),
            EState.EState_VSA.EState_VSA8(mol),
            EState.EState_VSA.EState_VSA9(mol),
            Fragments.fr_Al_COO(mol),
            Fragments.fr_Al_OH(mol),
            Fragments.fr_Al_OH_noTert(mol),
            Fragments.fr_aldehyde(mol),
            Fragments.fr_alkyl_carbamate(mol),
            Fragments.fr_alkyl_halide(mol),
            Fragments.fr_allylic_oxid(mol),
            Fragments.fr_amide(mol),
            Fragments.fr_amidine(mol),
            Fragments.fr_aniline(mol),
            Fragments.fr_Ar_COO(mol),
            Fragments.fr_Ar_N(mol),
            Fragments.fr_Ar_NH(mol),
            Fragments.fr_Ar_OH(mol),
            Fragments.fr_ArN(mol),
            Fragments.fr_aryl_methyl(mol),
            Fragments.fr_azide(mol),
            Fragments.fr_azo(mol),
            Fragments.fr_barbitur(mol),
            Fragments.fr_benzene(mol),
            Fragments.fr_benzodiazepine(mol),
            Fragments.fr_bicyclic(mol),
            Fragments.fr_C_O(mol),
            Fragments.fr_C_O_noCOO(mol),
            Fragments.fr_C_S(mol),
            Fragments.fr_COO(mol),
            Fragments.fr_COO2(mol),
            Fragments.fr_diazo(mol),
            Fragments.fr_dihydropyridine(mol),
            Fragments.fr_epoxide(mol),
            Fragments.fr_ester(mol),
            Fragments.fr_ether(mol),
            Fragments.fr_furan(mol),
            Fragments.fr_guanido(mol),
            Fragments.fr_halogen(mol),
            Fragments.fr_hdrzine(mol),
            Fragments.fr_hdrzone(mol),
            Fragments.fr_HOCCN(mol),
            Fragments.fr_imidazole(mol),
            Fragments.fr_imide(mol),
            Fragments.fr_Imine(mol),
            Fragments.fr_isocyan(mol),
            Fragments.fr_isothiocyan(mol),
            Fragments.fr_ketone(mol),
            Fragments.fr_ketone_Topliss(mol),
            Fragments.fr_lactam(mol),
            Fragments.fr_lactone(mol),
            Fragments.fr_methoxy(mol),
            Fragments.fr_morpholine(mol),
            Fragments.fr_N_O(mol),
            Fragments.fr_Ndealkylation1(mol),
            Fragments.fr_Ndealkylation2(mol),
            Fragments.fr_NH0(mol),
            Fragments.fr_NH1(mol),
            Fragments.fr_NH2(mol),
            Fragments.fr_Nhpyrrole(mol),
            Fragments.fr_nitrile(mol),
            Fragments.fr_nitro(mol),
            Fragments.fr_nitro_arom(mol),
            Fragments.fr_nitro_arom_nonortho(mol),
            Fragments.fr_nitroso(mol),
            Fragments.fr_oxazole(mol),
            Fragments.fr_oxime(mol),
            Fragments.fr_para_hydroxylation(mol),
            Fragments.fr_phenol(mol),
            Fragments.fr_phenol_noOrthoHbond(mol),
            Fragments.fr_phos_acid(mol),
            Fragments.fr_phos_ester(mol),
            Fragments.fr_piperdine(mol),
            Fragments.fr_piperzine(mol),
            Fragments.fr_priamide(mol),
            Fragments.fr_prisulfonamd(mol),
            Fragments.fr_pyridine(mol),
            Fragments.fr_quatN(mol),
            Fragments.fr_SH(mol),
            Fragments.fr_sulfide(mol),
            Fragments.fr_sulfonamd(mol),
            Fragments.fr_sulfone(mol),
            Fragments.fr_term_acetylene(mol),
            Fragments.fr_tetrazole(mol),
            Fragments.fr_thiazole(mol),
            Fragments.fr_thiocyan(mol),
            Fragments.fr_thiophene(mol),
            Fragments.fr_unbrch_alkane(mol),
            Fragments.fr_urea(mol),
            GraphDescriptors.BalabanJ(mol),
            GraphDescriptors.BertzCT(mol),
            GraphDescriptors.Chi0(mol),
            GraphDescriptors.Chi0n(mol),
            GraphDescriptors.Chi0v(mol),
            GraphDescriptors.Chi1(mol),
            GraphDescriptors.Chi1n(mol),
            GraphDescriptors.Chi1v(mol),
            GraphDescriptors.Chi2n(mol),
            GraphDescriptors.Chi2v(mol),
            GraphDescriptors.Chi3n(mol),
            GraphDescriptors.Chi3v(mol),
            GraphDescriptors.Chi4n(mol),
            GraphDescriptors.Chi4v(mol),
            GraphDescriptors.HallKierAlpha(mol),
            GraphDescriptors.Ipc(mol),
            GraphDescriptors.Kappa1(mol),
            GraphDescriptors.Kappa2(mol),
            GraphDescriptors.Kappa3(mol),
            Lipinski.HeavyAtomCount(mol),
            Lipinski.NHOHCount(mol),
            Lipinski.NOCount(mol),
            Lipinski.NumAliphaticCarbocycles(mol),
            Lipinski.NumAliphaticHeterocycles(mol),
            Lipinski.NumAliphaticRings(mol),
            Lipinski.NumAromaticCarbocycles(mol),
            Lipinski.NumAromaticHeterocycles(mol),
            Lipinski.NumAromaticRings(mol),
            Lipinski.NumHAcceptors(mol),
            Lipinski.NumHDonors(mol),
            Lipinski.NumHeteroatoms(mol),
            Lipinski.NumRotatableBonds(mol),
            Lipinski.NumSaturatedCarbocycles(mol),
            Lipinski.NumSaturatedHeterocycles(mol),
            Lipinski.NumSaturatedRings(mol),
            Lipinski.RingCount(mol),
            MolSurf.LabuteASA(mol),
            MolSurf.PEOE_VSA1(mol),
            MolSurf.PEOE_VSA10(mol),
            MolSurf.PEOE_VSA11(mol),
            MolSurf.PEOE_VSA12(mol),
            MolSurf.PEOE_VSA13(mol),
            MolSurf.PEOE_VSA14(mol),
            MolSurf.PEOE_VSA2(mol),
            MolSurf.PEOE_VSA3(mol),
            MolSurf.PEOE_VSA4(mol),
            MolSurf.PEOE_VSA5(mol),
            MolSurf.PEOE_VSA6(mol),
            MolSurf.PEOE_VSA7(mol),
            MolSurf.PEOE_VSA8(mol),
            MolSurf.PEOE_VSA9(mol),
            MolSurf.SlogP_VSA1(mol),
            MolSurf.SlogP_VSA10(mol),
            MolSurf.SlogP_VSA11(mol),
            MolSurf.SlogP_VSA12(mol),
            MolSurf.SlogP_VSA2(mol),
            MolSurf.SlogP_VSA3(mol),
            MolSurf.SlogP_VSA4(mol),
            MolSurf.SlogP_VSA5(mol),
            MolSurf.SlogP_VSA6(mol),
            MolSurf.SlogP_VSA7(mol),
            MolSurf.SlogP_VSA8(mol),
            MolSurf.SlogP_VSA9(mol),
            MolSurf.SMR_VSA1(mol),
            MolSurf.SMR_VSA10(mol),
            MolSurf.SMR_VSA2(mol),
            MolSurf.SMR_VSA3(mol),
            MolSurf.SMR_VSA4(mol),
            MolSurf.SMR_VSA5(mol),
            MolSurf.SMR_VSA6(mol),
            MolSurf.SMR_VSA7(mol),
            MolSurf.SMR_VSA8(mol),
            MolSurf.SMR_VSA9(mol),
            MolSurf.TPSA(mol)
        ]))
    return descriptors
Beispiel #4
0
def get_descriptors(smiles):
    """
    Get a dictionary of RDKit descriptors from a SMILES string.

    Parameters
    ----------
    smiles : str
        The SMILES string of the chemical of interest

    Returns
    -------
    descriptors : dict
        A collection of molecular descriptors
    
    Notes: Developed with RDKit 2019.03.4, although doc pages listed 2019.03.1
    """

    mol = Chem.MolFromSmiles(smiles)
    mol = Chem.AddHs(mol)

    Chem.EmbedMolecule(mol, Chem.ETKDG())

    descriptors = {}

    # Starting with simple descriptors:
    # https://www.rdkit.org/docs/source/rdkit.Chem.Descriptors.html

    # Molecular weight
    descriptors['molwt'] = Descriptors.ExactMolWt(mol)

    # Partial charge metrics
    descriptors['max_abs_partial_charge'] = Descriptors.MaxAbsPartialCharge(mol)
    descriptors['max_partial_charge'] = Descriptors.MaxPartialCharge(mol)
    descriptors['min_abs_partial_charge'] = Descriptors.MinAbsPartialCharge(mol)
    descriptors['min_partial_charge'] = Descriptors.MinPartialCharge(mol)

    # Basic electron counts
    descriptors['num_radical_electrons'] = Descriptors.NumRadicalElectrons(mol)
    descriptors['num_valence_electrons'] = Descriptors.NumValenceElectrons(mol)

    # 3-D descriptors
    # https://www.rdkit.org/docs/source/rdkit.Chem.Descriptors3D.html

    # Calculating these should produce the same result, according to some basic tests
    # descriptors['asphericity'] = rdMolDescriptors.CalcAsphericity(mol)
    # descriptors['eccentricity'] = rdMolDescriptors.CalcEccentricity(mol)
    descriptors['asphericity'] = Descriptors3D.Asphericity(mol)
    descriptors['eccentricity'] = Descriptors3D.Eccentricity(mol)

    descriptors['inertial_shape_factor'] = Descriptors3D.InertialShapeFactor(mol)

    descriptors['radius_of_gyration'] = Descriptors3D.RadiusOfGyration(mol)
    descriptors['spherocity_index'] = Descriptors3D.SpherocityIndex(mol)

    # Graph descriptors
    # https://www.rdkit.org/docs/source/rdkit.Chem.GraphDescriptors.html
    descriptors['balaban_j'] = GraphDescriptors.BalabanJ(mol)
    descriptors['bertz_ct'] = GraphDescriptors.BertzCT(mol)

    descriptors['chi0'] = GraphDescriptors.Chi0(mol)
    descriptors['chi0n'] = GraphDescriptors.Chi0n(mol)
    descriptors['chi0v'] = GraphDescriptors.Chi0v(mol)
    descriptors['chi1'] = GraphDescriptors.Chi1(mol)
    descriptors['chi1n'] = GraphDescriptors.Chi1n(mol)
    descriptors['chi1v'] = GraphDescriptors.Chi1v(mol)
    descriptors['chi2n'] = GraphDescriptors.Chi2n(mol)
    descriptors['chi2v'] = GraphDescriptors.Chi2v(mol)
    descriptors['chi3n'] = GraphDescriptors.Chi3n(mol)
    descriptors['chi3v'] = GraphDescriptors.Chi3v(mol)
    descriptors['chi4n'] = GraphDescriptors.Chi4n(mol)
    descriptors['chi4v'] = GraphDescriptors.Chi4v(mol)

    descriptors['hall_kier_alpha'] = GraphDescriptors.HallKierAlpha(mol)

    descriptors['kappa1'] = GraphDescriptors.Kappa1(mol)
    descriptors['kappa2'] = GraphDescriptors.Kappa2(mol)
    descriptors['kappa3'] = GraphDescriptors.Kappa3(mol)

    # Predicted properties from Wildman and Crippen
    descriptors['log_p'] = Descriptors.MolLogP(mol)
    descriptors['refractivity'] = Descriptors.MolMR(mol)

    return descriptors