def get_descriptors(mols=None): table = pd.DataFrame() i = 0 print('Computing descriptors for {} molecules'.format(len(mols))) for mol in mols: if mol: table.loc[i, 'Smiles'] = Chem.MolToSmiles(mol, True) table.loc[i, 'MolWt'] = Descriptors.MolWt(mol) table.loc[i, 'LogP'] = Descriptors.MolLogP(mol) table.loc[i, 'NumHAcceptors'] = Descriptors.NumHAcceptors(mol) table.loc[i, 'NumHDonors'] = Descriptors.NumHDonors(mol) table.loc[i, 'NumHeteroatoms'] = Descriptors.NumHeteroatoms(mol) table.loc[i, 'NumRotableBonds'] = Descriptors.NumRotatableBonds(mol) try: AllChem.EmbedMolecule(mol, useRandomCoords=True) AllChem.MMFFOptimizeMolecule(mol, mmffVariant='MMFF94s') table.loc[i, 'TPSA'] = Descriptors.TPSA(mol) table.loc[i, 'PMI1'] = Descriptors3D.PMI1(mol) table.loc[i, 'PMI2'] = Descriptors3D.PMI2(mol) table.loc[i, 'PMI3'] = Descriptors3D.PMI3(mol) table.loc[i, 'PBF'] = rdMolDescriptors.CalcPBF(mol) table.loc[i, 'NPR1'] = rdMolDescriptors.CalcNPR1(mol) table.loc[i, 'NPR2'] = rdMolDescriptors.CalcNPR2(mol) table.loc[i, 'ISF'] = Descriptors3D.InertialShapeFactor(mol) i = i + 1 except Exception: i = i + 1 continue table.to_csv('Descriptors.csv') print('Computed descriptors in file Descriptors.csv') return (plot_NPR(table=table))
def testGithub2037(self): m = Chem.AddHs(Chem.MolFromSmiles("CCCCCCC")) cids = AllChem.EmbedMultipleConfs(m, 10) # start with defaults (which does not cache results): npr1s = [] npr2s = [] for cid in cids: npr1s.append(rdMD.CalcNPR1(m, confId=cid)) npr2s.append(rdMD.CalcNPR2(m, confId=cid)) for i in range(1, len(npr1s)): self.assertNotAlmostEqual(npr1s[0], npr1s[i]) self.assertNotAlmostEqual(npr2s[0], npr2s[i]) # now ensure that we can cache: npr1s = [] npr2s = [] for cid in cids: npr1s.append(rdMD.CalcNPR1(m, confId=cid, force=False)) npr2s.append(rdMD.CalcNPR2(m, confId=cid, force=False)) for i in range(1, len(npr1s)): self.assertAlmostEqual(npr1s[0], npr1s[i]) self.assertAlmostEqual(npr2s[0], npr2s[i])
def calculate_scalar_descriptors(molecule, symbols): features = list() features.append(rdMD.CalcAsphericity(molecule)) features += list(rdMD.CalcCrippenDescriptors(molecule)) features.append(rdMD.CalcExactMolWt(molecule)) features.append(rdMD.CalcEccentricity(molecule)) features.append(rdMD.CalcFractionCSP3(molecule)) features.append(rdMD.CalcLabuteASA(molecule)) features.append(rdMD.CalcNPR1(molecule)) features.append(rdMD.CalcNPR2(molecule)) features.append(rdMD.CalcHallKierAlpha(molecule)) # elemental distribution symbols = np.array(symbols) features.append(np.sum(symbols == 'H')) features.append(np.sum(symbols == 'C')) features.append(np.sum(symbols == 'N')) features.append(np.sum(symbols == 'O')) features.append(np.sum(symbols == 'F')) # ring features features.append(rdMD.CalcNumAliphaticCarbocycles(molecule)) features.append(rdMD.CalcNumAliphaticHeterocycles(molecule)) features.append(rdMD.CalcNumAromaticCarbocycles(molecule)) features.append(rdMD.CalcNumAromaticHeterocycles(molecule)) features.append(rdMD.CalcNumSaturatedCarbocycles(molecule)) features.append(rdMD.CalcNumSaturatedHeterocycles(molecule)) features.append(rdMD.CalcNumSpiroAtoms( molecule)) # atom shared between rings with one bond features.append(rdMD.CalcNumBridgeheadAtoms( molecule)) # atom shared between rings with at least two bonds # other counts features.append(rdMD.CalcNumAmideBonds(molecule)) features.append(rdMD.CalcNumHBA(molecule)) # number of hydrogen acceptors features.append(rdMD.CalcNumHBD(molecule)) # number of hydrogen donors return np.array(features)
def main(args): print(rdBase.rdkitVersion) input_filename = os.path.basename(args[1]).split('.')[0] result_file = "output_" + input_filename print(result_file) try: with open(result_file, 'a') as output: header = ['smiles', 'cid', 'npr1', 'npr2'] output.write((' '.join(header)) + '\n') with open(args[1]) as f: for line in f: try: smiles, cid = line.strip().split(None, 1) mol = C.MolFromSmiles(smiles) mol = C.AddHs(mol) AllChem.EmbedMolecule(mol, useExpTorsionAnglePrefs=True, useBasicKnowledge=True) except Exception as e: continue try: npr1 = round(CD.CalcNPR1(mol), 4) npr2 = round(CD.CalcNPR2(mol), 4) except Exception as e: print("Failed molecules:" + cid) record_error(input_filename, smiles, cid) continue a = [smiles, cid, npr1, npr2] a = map(str, a) #print (' '.join(a)) output.write((' '.join(a)) + '\n') #print (' '.join(a)) f.close() output.close() except IOError as err: print(err)
from Sauer and Schwarz JCIM 43:987-1003 (2003) https://doi.org/10.1021/ci025599w **Arguments** - inMol: a molecule - confId: (optional) the conformation ID to use - useAtomicMasses: (optional) toggles use of atomic masses in the calculation. Defaults to True """ NPR2 = lambda *x, **y: rdMolDescriptors.CalcNPR2(*x, **y) NPR2.version = rdMolDescriptors._CalcNPR2_version NPR2.__doc__ = """ Normalized principal moments ratio 2 (=I2/I3) from Sauer and Schwarz JCIM 43:987-1003 (2003) https://doi.org/10.1021/ci025599w **Arguments** - inMol: a molecule - confId: (optional) the conformation ID to use - useAtomicMasses: (optional) toggles use of atomic masses in the calculation. Defaults to True