Exemple #1
0
  def testOrderDepend(self):
    """ test order dependence of some descriptors:
    """
    data = [('C=CC=C',21.01955,2.73205),
            ('O=CC=O',25.01955,2.73205),
            ('FCC(=O)CF',46.7548875,2.98816),
            ('O=C1C=CC(=O)C=C1',148.705216,2.8265),
            ('C12C(F)=C(O)C(F)C1C(F)=C(O)C(F)2',315.250442,2.4509),
            ('C12CC=CCC1C(=O)C3CC=CCC3C(=O)2',321.539522,1.95986)]

    for smi,CT,bal in data:
      m = Chem.MolFromSmiles(smi)
      newBal = GraphDescriptors.BalabanJ(m, forceDMat = 1)
      assert feq(newBal,bal,1e-4),'mol %s %f!=%f'%(smi,newBal,bal)
      m = Chem.MolFromSmiles(smi)
      newCT = GraphDescriptors.BertzCT(m, forceDMat = 1)
      assert feq(newCT,CT,1e-4),'mol %s (CT calc = %f) should have CT = %f'%(smi,newCT,CT)
      m = Chem.MolFromSmiles(smi)
      newCT = GraphDescriptors.BertzCT(m, forceDMat = 1)
      assert feq(newCT,CT,1e-4),'mol %s (CT calc = %f) should have CT = %f'%(smi,newCT,CT)
      newBal = GraphDescriptors.BalabanJ(m, forceDMat = 1)
      assert feq(newBal,bal,1e-4),'mol %s %f!=%f'%(smi,newBal,bal)

      m = Chem.MolFromSmiles(smi)
      newBal = GraphDescriptors.BalabanJ(m, forceDMat = 1)
      assert feq(newBal,bal,1e-4),'mol %s %f!=%f'%(smi,newBal,bal)
      newCT = GraphDescriptors.BertzCT(m, forceDMat = 1)
      assert feq(newCT,CT,1e-4),'mol %s (CT calc = %f) should have CT = %f'%(smi,newCT,CT)
Exemple #2
0
 def testIssue125(self):
     # test an issue with calculating BalabanJ
     smi = 'O=C(OC)C1=C(C)NC(C)=C(C(OC)=O)C1C2=CC=CC=C2[N+]([O-])=O'
     m1 = Chem.MolFromSmiles(smi)
     m2 = Chem.MolFromSmiles(smi)
     Chem.MolToSmiles(m1)
     j1 = GraphDescriptors.BalabanJ(m1)
     j2 = GraphDescriptors.BalabanJ(m2)
     assert feq(j1, j2)
Exemple #3
0
  def testBalabanJ(self):
    """ test calculation of the Balaban J value 

      J values are from Balaban's paper and have had roundoff
      errors and typos corrected.
    """
    data = [# alkanes
      ('CC',1.0),('CCC',1.6330),
      ('CCCC',1.9747),('CC(C)C',2.3238),
      ('CCCCC',2.1906),('CC(C)CC',2.5396),('CC(C)(C)C',3.0237),
      ('CCCCCC',2.3391),('CC(C)CCC',2.6272),('CCC(C)CC',2.7542),('CC(C)(C)CC',3.1685),
      ('CC(C)C(C)C',2.9935),

      # cycloalkanes
      ('C1CCCCC1',2.0000),
      ('C1C(C)CCCC1',2.1229),
      ('C1C(CC)CCCC1',2.1250),
      ('C1C(C)C(C)CCC1',2.2794),
      ('C1C(C)CC(C)CC1',2.2307),
      ('C1C(C)CCC(C)C1',2.1924),
      ('C1C(CCC)CCCC1',2.0779),
      ('C1C(C(C)C)CCCC1',2.2284),
      ('C1C(CC)C(C)CCC1',2.2973),
      ('C1C(CC)CC(C)CC1',2.2317),
      ('C1C(CC)CCC(C)C1',2.1804),
      ('C1C(C)C(C)C(C)CC1',2.4133),
      ('C1C(C)C(C)CC(C)C1',2.3462),
      ('C1C(C)CC(C)CC1(C)',2.3409),
      # aromatics
      ('c1ccccc1',3.0000),
      ('c1c(C)cccc1',3.0215),
      ('c1c(CC)cccc1',2.8321),
      ('c1c(C)c(C)ccc1',3.1349),
      ('c1c(C)cc(C)cc1',3.0777),
      ('c1c(C)ccc(C)c1',3.0325),
      ('c1c(CCC)cccc1',2.6149),
      ('c1c(C(C)C)cccc1',2.8483),
      ('c1c(CC)c(C)ccc1',3.0065),
      ('c1c(CC)cc(C)cc1',2.9369),
      ('c1c(CC)ccc(C)c1',2.8816),
      ('c1c(C)c(C)c(C)cc1',3.2478),
      ('c1c(C)c(C)cc(C)c1',3.1717),
      ('c1c(C)cc(C)cc1(C)',3.1657)
      ]
    for smi,res in data:
      m = Chem.MolFromSmiles(smi)
      j = GraphDescriptors.BalabanJ(m,forceDMat=1)
      assert feq(j,res),'mol %s (J=%f) should have J=%f'%(smi,j,res)
      j = GraphDescriptors.BalabanJ(m)
      assert feq(j,res),'second pass: mol %s (J=%f) should have J=%f'%(smi,j,res)
Exemple #4
0
    'HBD', 'jIndex'
]
for name in prop_names:
    d[f'{name}'] = []

for i, s in enumerate(smiles):
    if (i % 10000 == 0):
        print(i)
    m = Chem.MolFromSmiles(s)
    if (m == None or 'i' in s or '.' in s):
        DUD = DUD.drop(i)
        print(s, i)
    else:
        d['QED'].append(QED.default(m))
        d['logP'].append(Crippen.MolLogP(m))
        d['molWt'].append(Descriptors.MolWt(m))
        d['maxCharge'].append(Descriptors.MaxPartialCharge(m))
        d['minCharge'].append(Descriptors.MinPartialCharge(m))
        d['valence'].append(Descriptors.NumValenceElectrons(m))
        d['TPSA'].append(rdMolDescriptors.CalcTPSA(m))
        d['HBA'].append(rdMolDescriptors.CalcNumHBA(m))
        d['HBD'].append(rdMolDescriptors.CalcNumHBD(m))
        d['jIndex'].append(GraphDescriptors.BalabanJ(m))

df = pd.DataFrame.from_dict(d)

df_merge = pd.merge(df, DUD, on=df.index)

#df_merge.to_csv('/home/mcb/jboitr/data/DUD_full.csv')
df_merge.to_csv('C:/Users/jacqu/Documents/data/DUD_full.csv')
Exemple #5
0
def calc_rdkit(mol):
    descriptors = pd.Series(
        np.array([
            Crippen.MolLogP(mol),
            Crippen.MolMR(mol),
            Descriptors.FpDensityMorgan1(mol),
            Descriptors.FpDensityMorgan2(mol),
            Descriptors.FpDensityMorgan3(mol),
            Descriptors.FractionCSP3(mol),
            Descriptors.HeavyAtomMolWt(mol),
            Descriptors.MaxAbsPartialCharge(mol),
            Descriptors.MaxPartialCharge(mol),
            Descriptors.MinAbsPartialCharge(mol),
            Descriptors.MinPartialCharge(mol),
            Descriptors.MolWt(mol),
            Descriptors.NumRadicalElectrons(mol),
            Descriptors.NumValenceElectrons(mol),
            EState.EState.MaxAbsEStateIndex(mol),
            EState.EState.MaxEStateIndex(mol),
            EState.EState.MinAbsEStateIndex(mol),
            EState.EState.MinEStateIndex(mol),
            EState.EState_VSA.EState_VSA1(mol),
            EState.EState_VSA.EState_VSA10(mol),
            EState.EState_VSA.EState_VSA11(mol),
            EState.EState_VSA.EState_VSA2(mol),
            EState.EState_VSA.EState_VSA3(mol),
            EState.EState_VSA.EState_VSA4(mol),
            EState.EState_VSA.EState_VSA5(mol),
            EState.EState_VSA.EState_VSA6(mol),
            EState.EState_VSA.EState_VSA7(mol),
            EState.EState_VSA.EState_VSA8(mol),
            EState.EState_VSA.EState_VSA9(mol),
            Fragments.fr_Al_COO(mol),
            Fragments.fr_Al_OH(mol),
            Fragments.fr_Al_OH_noTert(mol),
            Fragments.fr_aldehyde(mol),
            Fragments.fr_alkyl_carbamate(mol),
            Fragments.fr_alkyl_halide(mol),
            Fragments.fr_allylic_oxid(mol),
            Fragments.fr_amide(mol),
            Fragments.fr_amidine(mol),
            Fragments.fr_aniline(mol),
            Fragments.fr_Ar_COO(mol),
            Fragments.fr_Ar_N(mol),
            Fragments.fr_Ar_NH(mol),
            Fragments.fr_Ar_OH(mol),
            Fragments.fr_ArN(mol),
            Fragments.fr_aryl_methyl(mol),
            Fragments.fr_azide(mol),
            Fragments.fr_azo(mol),
            Fragments.fr_barbitur(mol),
            Fragments.fr_benzene(mol),
            Fragments.fr_benzodiazepine(mol),
            Fragments.fr_bicyclic(mol),
            Fragments.fr_C_O(mol),
            Fragments.fr_C_O_noCOO(mol),
            Fragments.fr_C_S(mol),
            Fragments.fr_COO(mol),
            Fragments.fr_COO2(mol),
            Fragments.fr_diazo(mol),
            Fragments.fr_dihydropyridine(mol),
            Fragments.fr_epoxide(mol),
            Fragments.fr_ester(mol),
            Fragments.fr_ether(mol),
            Fragments.fr_furan(mol),
            Fragments.fr_guanido(mol),
            Fragments.fr_halogen(mol),
            Fragments.fr_hdrzine(mol),
            Fragments.fr_hdrzone(mol),
            Fragments.fr_HOCCN(mol),
            Fragments.fr_imidazole(mol),
            Fragments.fr_imide(mol),
            Fragments.fr_Imine(mol),
            Fragments.fr_isocyan(mol),
            Fragments.fr_isothiocyan(mol),
            Fragments.fr_ketone(mol),
            Fragments.fr_ketone_Topliss(mol),
            Fragments.fr_lactam(mol),
            Fragments.fr_lactone(mol),
            Fragments.fr_methoxy(mol),
            Fragments.fr_morpholine(mol),
            Fragments.fr_N_O(mol),
            Fragments.fr_Ndealkylation1(mol),
            Fragments.fr_Ndealkylation2(mol),
            Fragments.fr_NH0(mol),
            Fragments.fr_NH1(mol),
            Fragments.fr_NH2(mol),
            Fragments.fr_Nhpyrrole(mol),
            Fragments.fr_nitrile(mol),
            Fragments.fr_nitro(mol),
            Fragments.fr_nitro_arom(mol),
            Fragments.fr_nitro_arom_nonortho(mol),
            Fragments.fr_nitroso(mol),
            Fragments.fr_oxazole(mol),
            Fragments.fr_oxime(mol),
            Fragments.fr_para_hydroxylation(mol),
            Fragments.fr_phenol(mol),
            Fragments.fr_phenol_noOrthoHbond(mol),
            Fragments.fr_phos_acid(mol),
            Fragments.fr_phos_ester(mol),
            Fragments.fr_piperdine(mol),
            Fragments.fr_piperzine(mol),
            Fragments.fr_priamide(mol),
            Fragments.fr_prisulfonamd(mol),
            Fragments.fr_pyridine(mol),
            Fragments.fr_quatN(mol),
            Fragments.fr_SH(mol),
            Fragments.fr_sulfide(mol),
            Fragments.fr_sulfonamd(mol),
            Fragments.fr_sulfone(mol),
            Fragments.fr_term_acetylene(mol),
            Fragments.fr_tetrazole(mol),
            Fragments.fr_thiazole(mol),
            Fragments.fr_thiocyan(mol),
            Fragments.fr_thiophene(mol),
            Fragments.fr_unbrch_alkane(mol),
            Fragments.fr_urea(mol),
            GraphDescriptors.BalabanJ(mol),
            GraphDescriptors.BertzCT(mol),
            GraphDescriptors.Chi0(mol),
            GraphDescriptors.Chi0n(mol),
            GraphDescriptors.Chi0v(mol),
            GraphDescriptors.Chi1(mol),
            GraphDescriptors.Chi1n(mol),
            GraphDescriptors.Chi1v(mol),
            GraphDescriptors.Chi2n(mol),
            GraphDescriptors.Chi2v(mol),
            GraphDescriptors.Chi3n(mol),
            GraphDescriptors.Chi3v(mol),
            GraphDescriptors.Chi4n(mol),
            GraphDescriptors.Chi4v(mol),
            GraphDescriptors.HallKierAlpha(mol),
            GraphDescriptors.Ipc(mol),
            GraphDescriptors.Kappa1(mol),
            GraphDescriptors.Kappa2(mol),
            GraphDescriptors.Kappa3(mol),
            Lipinski.HeavyAtomCount(mol),
            Lipinski.NHOHCount(mol),
            Lipinski.NOCount(mol),
            Lipinski.NumAliphaticCarbocycles(mol),
            Lipinski.NumAliphaticHeterocycles(mol),
            Lipinski.NumAliphaticRings(mol),
            Lipinski.NumAromaticCarbocycles(mol),
            Lipinski.NumAromaticHeterocycles(mol),
            Lipinski.NumAromaticRings(mol),
            Lipinski.NumHAcceptors(mol),
            Lipinski.NumHDonors(mol),
            Lipinski.NumHeteroatoms(mol),
            Lipinski.NumRotatableBonds(mol),
            Lipinski.NumSaturatedCarbocycles(mol),
            Lipinski.NumSaturatedHeterocycles(mol),
            Lipinski.NumSaturatedRings(mol),
            Lipinski.RingCount(mol),
            MolSurf.LabuteASA(mol),
            MolSurf.PEOE_VSA1(mol),
            MolSurf.PEOE_VSA10(mol),
            MolSurf.PEOE_VSA11(mol),
            MolSurf.PEOE_VSA12(mol),
            MolSurf.PEOE_VSA13(mol),
            MolSurf.PEOE_VSA14(mol),
            MolSurf.PEOE_VSA2(mol),
            MolSurf.PEOE_VSA3(mol),
            MolSurf.PEOE_VSA4(mol),
            MolSurf.PEOE_VSA5(mol),
            MolSurf.PEOE_VSA6(mol),
            MolSurf.PEOE_VSA7(mol),
            MolSurf.PEOE_VSA8(mol),
            MolSurf.PEOE_VSA9(mol),
            MolSurf.SlogP_VSA1(mol),
            MolSurf.SlogP_VSA10(mol),
            MolSurf.SlogP_VSA11(mol),
            MolSurf.SlogP_VSA12(mol),
            MolSurf.SlogP_VSA2(mol),
            MolSurf.SlogP_VSA3(mol),
            MolSurf.SlogP_VSA4(mol),
            MolSurf.SlogP_VSA5(mol),
            MolSurf.SlogP_VSA6(mol),
            MolSurf.SlogP_VSA7(mol),
            MolSurf.SlogP_VSA8(mol),
            MolSurf.SlogP_VSA9(mol),
            MolSurf.SMR_VSA1(mol),
            MolSurf.SMR_VSA10(mol),
            MolSurf.SMR_VSA2(mol),
            MolSurf.SMR_VSA3(mol),
            MolSurf.SMR_VSA4(mol),
            MolSurf.SMR_VSA5(mol),
            MolSurf.SMR_VSA6(mol),
            MolSurf.SMR_VSA7(mol),
            MolSurf.SMR_VSA8(mol),
            MolSurf.SMR_VSA9(mol),
            MolSurf.TPSA(mol)
        ]))
    return descriptors
Exemple #6
0
 def calculate(self, D):
     return float(RDKit.BalabanJ(self.mol, dMat=D))
def get_descriptors(smiles):
    """
    Get a dictionary of RDKit descriptors from a SMILES string.

    Parameters
    ----------
    smiles : str
        The SMILES string of the chemical of interest

    Returns
    -------
    descriptors : dict
        A collection of molecular descriptors
    
    Notes: Developed with RDKit 2019.03.4, although doc pages listed 2019.03.1
    """

    mol = Chem.MolFromSmiles(smiles)
    mol = Chem.AddHs(mol)

    Chem.EmbedMolecule(mol, Chem.ETKDG())

    descriptors = {}

    # Starting with simple descriptors:
    # https://www.rdkit.org/docs/source/rdkit.Chem.Descriptors.html

    # Molecular weight
    descriptors['molwt'] = Descriptors.ExactMolWt(mol)

    # Partial charge metrics
    descriptors['max_abs_partial_charge'] = Descriptors.MaxAbsPartialCharge(mol)
    descriptors['max_partial_charge'] = Descriptors.MaxPartialCharge(mol)
    descriptors['min_abs_partial_charge'] = Descriptors.MinAbsPartialCharge(mol)
    descriptors['min_partial_charge'] = Descriptors.MinPartialCharge(mol)

    # Basic electron counts
    descriptors['num_radical_electrons'] = Descriptors.NumRadicalElectrons(mol)
    descriptors['num_valence_electrons'] = Descriptors.NumValenceElectrons(mol)

    # 3-D descriptors
    # https://www.rdkit.org/docs/source/rdkit.Chem.Descriptors3D.html

    # Calculating these should produce the same result, according to some basic tests
    # descriptors['asphericity'] = rdMolDescriptors.CalcAsphericity(mol)
    # descriptors['eccentricity'] = rdMolDescriptors.CalcEccentricity(mol)
    descriptors['asphericity'] = Descriptors3D.Asphericity(mol)
    descriptors['eccentricity'] = Descriptors3D.Eccentricity(mol)

    descriptors['inertial_shape_factor'] = Descriptors3D.InertialShapeFactor(mol)

    descriptors['radius_of_gyration'] = Descriptors3D.RadiusOfGyration(mol)
    descriptors['spherocity_index'] = Descriptors3D.SpherocityIndex(mol)

    # Graph descriptors
    # https://www.rdkit.org/docs/source/rdkit.Chem.GraphDescriptors.html
    descriptors['balaban_j'] = GraphDescriptors.BalabanJ(mol)
    descriptors['bertz_ct'] = GraphDescriptors.BertzCT(mol)

    descriptors['chi0'] = GraphDescriptors.Chi0(mol)
    descriptors['chi0n'] = GraphDescriptors.Chi0n(mol)
    descriptors['chi0v'] = GraphDescriptors.Chi0v(mol)
    descriptors['chi1'] = GraphDescriptors.Chi1(mol)
    descriptors['chi1n'] = GraphDescriptors.Chi1n(mol)
    descriptors['chi1v'] = GraphDescriptors.Chi1v(mol)
    descriptors['chi2n'] = GraphDescriptors.Chi2n(mol)
    descriptors['chi2v'] = GraphDescriptors.Chi2v(mol)
    descriptors['chi3n'] = GraphDescriptors.Chi3n(mol)
    descriptors['chi3v'] = GraphDescriptors.Chi3v(mol)
    descriptors['chi4n'] = GraphDescriptors.Chi4n(mol)
    descriptors['chi4v'] = GraphDescriptors.Chi4v(mol)

    descriptors['hall_kier_alpha'] = GraphDescriptors.HallKierAlpha(mol)

    descriptors['kappa1'] = GraphDescriptors.Kappa1(mol)
    descriptors['kappa2'] = GraphDescriptors.Kappa2(mol)
    descriptors['kappa3'] = GraphDescriptors.Kappa3(mol)

    # Predicted properties from Wildman and Crippen
    descriptors['log_p'] = Descriptors.MolLogP(mol)
    descriptors['refractivity'] = Descriptors.MolMR(mol)

    return descriptors