def testOrderDepend(self): """ test order dependence of some descriptors: """ data = [('C=CC=C',21.01955,2.73205), ('O=CC=O',25.01955,2.73205), ('FCC(=O)CF',46.7548875,2.98816), ('O=C1C=CC(=O)C=C1',148.705216,2.8265), ('C12C(F)=C(O)C(F)C1C(F)=C(O)C(F)2',315.250442,2.4509), ('C12CC=CCC1C(=O)C3CC=CCC3C(=O)2',321.539522,1.95986)] for smi,CT,bal in data: m = Chem.MolFromSmiles(smi) newBal = GraphDescriptors.BalabanJ(m, forceDMat = 1) assert feq(newBal,bal,1e-4),'mol %s %f!=%f'%(smi,newBal,bal) m = Chem.MolFromSmiles(smi) newCT = GraphDescriptors.BertzCT(m, forceDMat = 1) assert feq(newCT,CT,1e-4),'mol %s (CT calc = %f) should have CT = %f'%(smi,newCT,CT) m = Chem.MolFromSmiles(smi) newCT = GraphDescriptors.BertzCT(m, forceDMat = 1) assert feq(newCT,CT,1e-4),'mol %s (CT calc = %f) should have CT = %f'%(smi,newCT,CT) newBal = GraphDescriptors.BalabanJ(m, forceDMat = 1) assert feq(newBal,bal,1e-4),'mol %s %f!=%f'%(smi,newBal,bal) m = Chem.MolFromSmiles(smi) newBal = GraphDescriptors.BalabanJ(m, forceDMat = 1) assert feq(newBal,bal,1e-4),'mol %s %f!=%f'%(smi,newBal,bal) newCT = GraphDescriptors.BertzCT(m, forceDMat = 1) assert feq(newCT,CT,1e-4),'mol %s (CT calc = %f) should have CT = %f'%(smi,newCT,CT)
def testIssue125(self): # test an issue with calculating BalabanJ smi = 'O=C(OC)C1=C(C)NC(C)=C(C(OC)=O)C1C2=CC=CC=C2[N+]([O-])=O' m1 = Chem.MolFromSmiles(smi) m2 = Chem.MolFromSmiles(smi) Chem.MolToSmiles(m1) j1 = GraphDescriptors.BalabanJ(m1) j2 = GraphDescriptors.BalabanJ(m2) assert feq(j1, j2)
def testBalabanJ(self): """ test calculation of the Balaban J value J values are from Balaban's paper and have had roundoff errors and typos corrected. """ data = [# alkanes ('CC',1.0),('CCC',1.6330), ('CCCC',1.9747),('CC(C)C',2.3238), ('CCCCC',2.1906),('CC(C)CC',2.5396),('CC(C)(C)C',3.0237), ('CCCCCC',2.3391),('CC(C)CCC',2.6272),('CCC(C)CC',2.7542),('CC(C)(C)CC',3.1685), ('CC(C)C(C)C',2.9935), # cycloalkanes ('C1CCCCC1',2.0000), ('C1C(C)CCCC1',2.1229), ('C1C(CC)CCCC1',2.1250), ('C1C(C)C(C)CCC1',2.2794), ('C1C(C)CC(C)CC1',2.2307), ('C1C(C)CCC(C)C1',2.1924), ('C1C(CCC)CCCC1',2.0779), ('C1C(C(C)C)CCCC1',2.2284), ('C1C(CC)C(C)CCC1',2.2973), ('C1C(CC)CC(C)CC1',2.2317), ('C1C(CC)CCC(C)C1',2.1804), ('C1C(C)C(C)C(C)CC1',2.4133), ('C1C(C)C(C)CC(C)C1',2.3462), ('C1C(C)CC(C)CC1(C)',2.3409), # aromatics ('c1ccccc1',3.0000), ('c1c(C)cccc1',3.0215), ('c1c(CC)cccc1',2.8321), ('c1c(C)c(C)ccc1',3.1349), ('c1c(C)cc(C)cc1',3.0777), ('c1c(C)ccc(C)c1',3.0325), ('c1c(CCC)cccc1',2.6149), ('c1c(C(C)C)cccc1',2.8483), ('c1c(CC)c(C)ccc1',3.0065), ('c1c(CC)cc(C)cc1',2.9369), ('c1c(CC)ccc(C)c1',2.8816), ('c1c(C)c(C)c(C)cc1',3.2478), ('c1c(C)c(C)cc(C)c1',3.1717), ('c1c(C)cc(C)cc1(C)',3.1657) ] for smi,res in data: m = Chem.MolFromSmiles(smi) j = GraphDescriptors.BalabanJ(m,forceDMat=1) assert feq(j,res),'mol %s (J=%f) should have J=%f'%(smi,j,res) j = GraphDescriptors.BalabanJ(m) assert feq(j,res),'second pass: mol %s (J=%f) should have J=%f'%(smi,j,res)
'HBD', 'jIndex' ] for name in prop_names: d[f'{name}'] = [] for i, s in enumerate(smiles): if (i % 10000 == 0): print(i) m = Chem.MolFromSmiles(s) if (m == None or 'i' in s or '.' in s): DUD = DUD.drop(i) print(s, i) else: d['QED'].append(QED.default(m)) d['logP'].append(Crippen.MolLogP(m)) d['molWt'].append(Descriptors.MolWt(m)) d['maxCharge'].append(Descriptors.MaxPartialCharge(m)) d['minCharge'].append(Descriptors.MinPartialCharge(m)) d['valence'].append(Descriptors.NumValenceElectrons(m)) d['TPSA'].append(rdMolDescriptors.CalcTPSA(m)) d['HBA'].append(rdMolDescriptors.CalcNumHBA(m)) d['HBD'].append(rdMolDescriptors.CalcNumHBD(m)) d['jIndex'].append(GraphDescriptors.BalabanJ(m)) df = pd.DataFrame.from_dict(d) df_merge = pd.merge(df, DUD, on=df.index) #df_merge.to_csv('/home/mcb/jboitr/data/DUD_full.csv') df_merge.to_csv('C:/Users/jacqu/Documents/data/DUD_full.csv')
def calc_rdkit(mol): descriptors = pd.Series( np.array([ Crippen.MolLogP(mol), Crippen.MolMR(mol), Descriptors.FpDensityMorgan1(mol), Descriptors.FpDensityMorgan2(mol), Descriptors.FpDensityMorgan3(mol), Descriptors.FractionCSP3(mol), Descriptors.HeavyAtomMolWt(mol), Descriptors.MaxAbsPartialCharge(mol), Descriptors.MaxPartialCharge(mol), Descriptors.MinAbsPartialCharge(mol), Descriptors.MinPartialCharge(mol), Descriptors.MolWt(mol), Descriptors.NumRadicalElectrons(mol), Descriptors.NumValenceElectrons(mol), EState.EState.MaxAbsEStateIndex(mol), EState.EState.MaxEStateIndex(mol), EState.EState.MinAbsEStateIndex(mol), EState.EState.MinEStateIndex(mol), EState.EState_VSA.EState_VSA1(mol), EState.EState_VSA.EState_VSA10(mol), EState.EState_VSA.EState_VSA11(mol), EState.EState_VSA.EState_VSA2(mol), EState.EState_VSA.EState_VSA3(mol), EState.EState_VSA.EState_VSA4(mol), EState.EState_VSA.EState_VSA5(mol), EState.EState_VSA.EState_VSA6(mol), EState.EState_VSA.EState_VSA7(mol), EState.EState_VSA.EState_VSA8(mol), EState.EState_VSA.EState_VSA9(mol), Fragments.fr_Al_COO(mol), Fragments.fr_Al_OH(mol), Fragments.fr_Al_OH_noTert(mol), Fragments.fr_aldehyde(mol), Fragments.fr_alkyl_carbamate(mol), Fragments.fr_alkyl_halide(mol), Fragments.fr_allylic_oxid(mol), Fragments.fr_amide(mol), Fragments.fr_amidine(mol), Fragments.fr_aniline(mol), Fragments.fr_Ar_COO(mol), Fragments.fr_Ar_N(mol), Fragments.fr_Ar_NH(mol), Fragments.fr_Ar_OH(mol), Fragments.fr_ArN(mol), Fragments.fr_aryl_methyl(mol), Fragments.fr_azide(mol), Fragments.fr_azo(mol), Fragments.fr_barbitur(mol), Fragments.fr_benzene(mol), Fragments.fr_benzodiazepine(mol), Fragments.fr_bicyclic(mol), Fragments.fr_C_O(mol), Fragments.fr_C_O_noCOO(mol), Fragments.fr_C_S(mol), Fragments.fr_COO(mol), Fragments.fr_COO2(mol), Fragments.fr_diazo(mol), Fragments.fr_dihydropyridine(mol), Fragments.fr_epoxide(mol), Fragments.fr_ester(mol), Fragments.fr_ether(mol), Fragments.fr_furan(mol), Fragments.fr_guanido(mol), Fragments.fr_halogen(mol), Fragments.fr_hdrzine(mol), Fragments.fr_hdrzone(mol), Fragments.fr_HOCCN(mol), Fragments.fr_imidazole(mol), Fragments.fr_imide(mol), Fragments.fr_Imine(mol), Fragments.fr_isocyan(mol), Fragments.fr_isothiocyan(mol), Fragments.fr_ketone(mol), Fragments.fr_ketone_Topliss(mol), Fragments.fr_lactam(mol), Fragments.fr_lactone(mol), Fragments.fr_methoxy(mol), Fragments.fr_morpholine(mol), Fragments.fr_N_O(mol), Fragments.fr_Ndealkylation1(mol), Fragments.fr_Ndealkylation2(mol), Fragments.fr_NH0(mol), Fragments.fr_NH1(mol), Fragments.fr_NH2(mol), Fragments.fr_Nhpyrrole(mol), Fragments.fr_nitrile(mol), Fragments.fr_nitro(mol), Fragments.fr_nitro_arom(mol), Fragments.fr_nitro_arom_nonortho(mol), Fragments.fr_nitroso(mol), Fragments.fr_oxazole(mol), Fragments.fr_oxime(mol), Fragments.fr_para_hydroxylation(mol), Fragments.fr_phenol(mol), Fragments.fr_phenol_noOrthoHbond(mol), Fragments.fr_phos_acid(mol), Fragments.fr_phos_ester(mol), Fragments.fr_piperdine(mol), Fragments.fr_piperzine(mol), Fragments.fr_priamide(mol), Fragments.fr_prisulfonamd(mol), Fragments.fr_pyridine(mol), Fragments.fr_quatN(mol), Fragments.fr_SH(mol), Fragments.fr_sulfide(mol), Fragments.fr_sulfonamd(mol), Fragments.fr_sulfone(mol), Fragments.fr_term_acetylene(mol), Fragments.fr_tetrazole(mol), Fragments.fr_thiazole(mol), Fragments.fr_thiocyan(mol), Fragments.fr_thiophene(mol), Fragments.fr_unbrch_alkane(mol), Fragments.fr_urea(mol), GraphDescriptors.BalabanJ(mol), GraphDescriptors.BertzCT(mol), GraphDescriptors.Chi0(mol), GraphDescriptors.Chi0n(mol), GraphDescriptors.Chi0v(mol), GraphDescriptors.Chi1(mol), GraphDescriptors.Chi1n(mol), GraphDescriptors.Chi1v(mol), GraphDescriptors.Chi2n(mol), GraphDescriptors.Chi2v(mol), GraphDescriptors.Chi3n(mol), GraphDescriptors.Chi3v(mol), GraphDescriptors.Chi4n(mol), GraphDescriptors.Chi4v(mol), GraphDescriptors.HallKierAlpha(mol), GraphDescriptors.Ipc(mol), GraphDescriptors.Kappa1(mol), GraphDescriptors.Kappa2(mol), GraphDescriptors.Kappa3(mol), Lipinski.HeavyAtomCount(mol), Lipinski.NHOHCount(mol), Lipinski.NOCount(mol), Lipinski.NumAliphaticCarbocycles(mol), Lipinski.NumAliphaticHeterocycles(mol), Lipinski.NumAliphaticRings(mol), Lipinski.NumAromaticCarbocycles(mol), Lipinski.NumAromaticHeterocycles(mol), Lipinski.NumAromaticRings(mol), Lipinski.NumHAcceptors(mol), Lipinski.NumHDonors(mol), Lipinski.NumHeteroatoms(mol), Lipinski.NumRotatableBonds(mol), Lipinski.NumSaturatedCarbocycles(mol), Lipinski.NumSaturatedHeterocycles(mol), Lipinski.NumSaturatedRings(mol), Lipinski.RingCount(mol), MolSurf.LabuteASA(mol), MolSurf.PEOE_VSA1(mol), MolSurf.PEOE_VSA10(mol), MolSurf.PEOE_VSA11(mol), MolSurf.PEOE_VSA12(mol), MolSurf.PEOE_VSA13(mol), MolSurf.PEOE_VSA14(mol), MolSurf.PEOE_VSA2(mol), MolSurf.PEOE_VSA3(mol), MolSurf.PEOE_VSA4(mol), MolSurf.PEOE_VSA5(mol), MolSurf.PEOE_VSA6(mol), MolSurf.PEOE_VSA7(mol), MolSurf.PEOE_VSA8(mol), MolSurf.PEOE_VSA9(mol), MolSurf.SlogP_VSA1(mol), MolSurf.SlogP_VSA10(mol), MolSurf.SlogP_VSA11(mol), MolSurf.SlogP_VSA12(mol), MolSurf.SlogP_VSA2(mol), MolSurf.SlogP_VSA3(mol), MolSurf.SlogP_VSA4(mol), MolSurf.SlogP_VSA5(mol), MolSurf.SlogP_VSA6(mol), MolSurf.SlogP_VSA7(mol), MolSurf.SlogP_VSA8(mol), MolSurf.SlogP_VSA9(mol), MolSurf.SMR_VSA1(mol), MolSurf.SMR_VSA10(mol), MolSurf.SMR_VSA2(mol), MolSurf.SMR_VSA3(mol), MolSurf.SMR_VSA4(mol), MolSurf.SMR_VSA5(mol), MolSurf.SMR_VSA6(mol), MolSurf.SMR_VSA7(mol), MolSurf.SMR_VSA8(mol), MolSurf.SMR_VSA9(mol), MolSurf.TPSA(mol) ])) return descriptors
def calculate(self, D): return float(RDKit.BalabanJ(self.mol, dMat=D))
def get_descriptors(smiles): """ Get a dictionary of RDKit descriptors from a SMILES string. Parameters ---------- smiles : str The SMILES string of the chemical of interest Returns ------- descriptors : dict A collection of molecular descriptors Notes: Developed with RDKit 2019.03.4, although doc pages listed 2019.03.1 """ mol = Chem.MolFromSmiles(smiles) mol = Chem.AddHs(mol) Chem.EmbedMolecule(mol, Chem.ETKDG()) descriptors = {} # Starting with simple descriptors: # https://www.rdkit.org/docs/source/rdkit.Chem.Descriptors.html # Molecular weight descriptors['molwt'] = Descriptors.ExactMolWt(mol) # Partial charge metrics descriptors['max_abs_partial_charge'] = Descriptors.MaxAbsPartialCharge(mol) descriptors['max_partial_charge'] = Descriptors.MaxPartialCharge(mol) descriptors['min_abs_partial_charge'] = Descriptors.MinAbsPartialCharge(mol) descriptors['min_partial_charge'] = Descriptors.MinPartialCharge(mol) # Basic electron counts descriptors['num_radical_electrons'] = Descriptors.NumRadicalElectrons(mol) descriptors['num_valence_electrons'] = Descriptors.NumValenceElectrons(mol) # 3-D descriptors # https://www.rdkit.org/docs/source/rdkit.Chem.Descriptors3D.html # Calculating these should produce the same result, according to some basic tests # descriptors['asphericity'] = rdMolDescriptors.CalcAsphericity(mol) # descriptors['eccentricity'] = rdMolDescriptors.CalcEccentricity(mol) descriptors['asphericity'] = Descriptors3D.Asphericity(mol) descriptors['eccentricity'] = Descriptors3D.Eccentricity(mol) descriptors['inertial_shape_factor'] = Descriptors3D.InertialShapeFactor(mol) descriptors['radius_of_gyration'] = Descriptors3D.RadiusOfGyration(mol) descriptors['spherocity_index'] = Descriptors3D.SpherocityIndex(mol) # Graph descriptors # https://www.rdkit.org/docs/source/rdkit.Chem.GraphDescriptors.html descriptors['balaban_j'] = GraphDescriptors.BalabanJ(mol) descriptors['bertz_ct'] = GraphDescriptors.BertzCT(mol) descriptors['chi0'] = GraphDescriptors.Chi0(mol) descriptors['chi0n'] = GraphDescriptors.Chi0n(mol) descriptors['chi0v'] = GraphDescriptors.Chi0v(mol) descriptors['chi1'] = GraphDescriptors.Chi1(mol) descriptors['chi1n'] = GraphDescriptors.Chi1n(mol) descriptors['chi1v'] = GraphDescriptors.Chi1v(mol) descriptors['chi2n'] = GraphDescriptors.Chi2n(mol) descriptors['chi2v'] = GraphDescriptors.Chi2v(mol) descriptors['chi3n'] = GraphDescriptors.Chi3n(mol) descriptors['chi3v'] = GraphDescriptors.Chi3v(mol) descriptors['chi4n'] = GraphDescriptors.Chi4n(mol) descriptors['chi4v'] = GraphDescriptors.Chi4v(mol) descriptors['hall_kier_alpha'] = GraphDescriptors.HallKierAlpha(mol) descriptors['kappa1'] = GraphDescriptors.Kappa1(mol) descriptors['kappa2'] = GraphDescriptors.Kappa2(mol) descriptors['kappa3'] = GraphDescriptors.Kappa3(mol) # Predicted properties from Wildman and Crippen descriptors['log_p'] = Descriptors.MolLogP(mol) descriptors['refractivity'] = Descriptors.MolMR(mol) return descriptors