def get_descriptors(df): PandasTools.ChangeMoleculeRendering(renderer='String') Lmol = df['ROMol'] Ldescriptors = [] for m in Lmol: # Calculer les propriétés chimiques MW = round(Descriptors.ExactMolWt(m), 1) LogP = round(Descriptors.MolLogP(m), 1) TPSA = round(Descriptors.TPSA(m), 1) LabuteASA = round(Descriptors.LabuteASA(m), 1) HBA = Descriptors.NumHAcceptors(m) HBD = Descriptors.NumHDonors(m) FCSP3 = Lipinski.FractionCSP3(m) MQN8 = rdMolDescriptors.MQNs_(m)[7] MQN10 = rdMolDescriptors.MQNs_(m)[9] NAR = Lipinski.NumAromaticRings(m) NRB = Chem.Descriptors.NumRotatableBonds(m) Ldescriptors.append([ MW, LogP, TPSA, LabuteASA, HBA, HBD, FCSP3, MQN8, MQN10, NAR, NRB ]) # Create pandas row for conditions results with values and information whether rule of five is violated prop_df = pd.DataFrame(Ldescriptors) prop_df.columns = [ 'MW', 'LogP', 'TPSA', 'LabuteASA', 'HBA', 'HBD', 'FCSP3', 'MQN8', 'MQN10', 'NAR', 'NRB' ] prop_df = prop_df.set_index(df.index) return prop_df
def _MolQuantumNumbers(m): ''' MQN : (molecular quantum numbers) Nguyen et al. ChemMedChem 4:1803-5 (2009) ''' X = rdMolDescriptors.MQNs_(m) return dict(zip(_MQN_NAMES, X))
def testMQNDetails(self): refFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', 'MQNs_regress.pkl') refFile2 = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', 'MQNs_non_strict_regress.pkl') # figure out which definition we are currently using m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C") if Lipinski.NumRotatableBonds(m) == 2: refFile = refFile2 with open(refFile, 'r') as intf: buf = intf.read().replace('\r\n', '\n').encode('utf-8') intf.close() with io.BytesIO(buf) as inf: pkl = inf.read() refData = cPickle.loads(pkl, encoding='bytes') fn = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', 'aromat_regress.txt') ms = [x for x in Chem.SmilesMolSupplier(fn, delimiter='\t')] refData2 = [] for i, m in enumerate(ms): mqns = rdMolDescriptors.MQNs_(m) refData2.append((m, mqns)) if mqns != refData[i][1]: indices = [ (j, x, y) for j, x, y in zip(range(len(mqns)), mqns, refData[i][1]) if x != y ] print(i, Chem.MolToSmiles(m), indices) self.assertEqual(mqns, refData[i][1])
def testMQNDetails(self): refFile = os.path.join(os.path.dirname(__file__), 'test_data', 'MQNs_regress.pkl') refFile2 = os.path.join(os.path.dirname(__file__), 'test_data', 'MQNs_non_strict_regress.pkl') # figure out which definition we are currently using m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C") if Lipinski.NumRotatableBonds(m) == 2: refFile = refFile2 with open(refFile, 'rb') as intf: refData = pickle.load(intf) fn = os.path.join(os.path.dirname(__file__), 'test_data', 'aromat_regress.txt') ms = [x for x in Chem.SmilesMolSupplier(fn, delimiter='\t')] for i, m in enumerate(ms): mqns = rdMolDescriptors.MQNs_(m) if mqns != refData[i][1]: indices = [ (j, x, y) for j, x, y in zip(range(len(mqns)), mqns, refData[i][1]) if x != y ] print(i, Chem.MolToSmiles(m), indices) self.assertEqual(mqns, refData[i][1])
def testMQN(self): m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C") if Lipinski.NumRotatableBonds(m) == 2: tgt = [ 42917, 274, 870, 621, 135, 1582, 29, 3147, 5463, 6999, 470, 62588, 19055, 4424, 309, 24061, 17820, 1, 9303, 24146, 16076, 5560, 4262, 646, 746, 13725, 5430, 2629, 362, 24211, 15939, 292, 41, 20, 1852, 5642, 31, 9, 1, 2, 3060, 1750 ] else: tgt = [ 42917, 274, 870, 621, 135, 1582, 29, 3147, 5463, 6999, 470, 62588, 19055, 4424, 309, 24061, 17820, 1, 8314, 24146, 16076, 5560, 4262, 646, 746, 13725, 5430, 2629, 362, 24211, 15939, 292, 41, 20, 1852, 5642, 31, 9, 1, 2, 3060, 1750 ] tgt = [ 42917, 274, 870, 621, 135, 1582, 29, 3147, 5463, 6999, 470, 62588, 19055, 4424, 309, 24059, 17822, 1, 8314, 24146, 16076, 5560, 4262, 646, 746, 13725, 5430, 2629, 362, 24211, 15939, 292, 41, 20, 1852, 5642, 31, 9, 1, 2, 3060, 1750 ] fn = os.path.join(os.path.dirname(__file__), 'test_data', 'aromat_regress.txt') ms = [x for x in Chem.SmilesMolSupplier(fn, delimiter='\t')] vs = np.zeros((42, ), np.int32) for m in ms: vs += rdMolDescriptors.MQNs_(m) self.assertEqual(list(vs), tgt)
def get_MQNs(x, from_smiles): if from_smiles: mol = Chem.MolFromSmiles(x) else: mol = x if (mol is None) or (len(mol.GetAtoms()) == 0): return [0] * 42 else: return rdMolDescriptors.MQNs_(mol)
def _calculateDescriptors(mol): df = pd.DataFrame(index=[0]) df["SlogP"] = rdMolDescriptors.CalcCrippenDescriptors(mol)[0] df["SMR"] = rdMolDescriptors.CalcCrippenDescriptors(mol)[1] df["LabuteASA"] = rdMolDescriptors.CalcLabuteASA(mol) df["TPSA"] = Descriptors.TPSA(mol) df["AMW"] = Descriptors.MolWt(mol) df["ExactMW"] = rdMolDescriptors.CalcExactMolWt(mol) df["NumLipinskiHBA"] = rdMolDescriptors.CalcNumLipinskiHBA(mol) df["NumLipinskiHBD"] = rdMolDescriptors.CalcNumLipinskiHBD(mol) df["NumRotatableBonds"] = rdMolDescriptors.CalcNumRotatableBonds(mol) df["NumHBD"] = rdMolDescriptors.CalcNumHBD(mol) df["NumHBA"] = rdMolDescriptors.CalcNumHBA(mol) df["NumAmideBonds"] = rdMolDescriptors.CalcNumAmideBonds(mol) df["NumHeteroAtoms"] = rdMolDescriptors.CalcNumHeteroatoms(mol) df["NumHeavyAtoms"] = Chem.rdchem.Mol.GetNumHeavyAtoms(mol) df["NumAtoms"] = Chem.rdchem.Mol.GetNumAtoms(mol) df["NumRings"] = rdMolDescriptors.CalcNumRings(mol) df["NumAromaticRings"] = rdMolDescriptors.CalcNumAromaticRings(mol) df["NumSaturatedRings"] = rdMolDescriptors.CalcNumSaturatedRings(mol) df["NumAliphaticRings"] = rdMolDescriptors.CalcNumAliphaticRings(mol) df["NumAromaticHeterocycles"] = \ rdMolDescriptors.CalcNumAromaticHeterocycles(mol) df["NumSaturatedHeterocycles"] = \ rdMolDescriptors.CalcNumSaturatedHeterocycles(mol) df["NumAliphaticHeterocycles"] = \ rdMolDescriptors.CalcNumAliphaticHeterocycles(mol) df["NumAromaticCarbocycles"] = \ rdMolDescriptors.CalcNumAromaticCarbocycles(mol) df["NumSaturatedCarbocycles"] = \ rdMolDescriptors.CalcNumSaturatedCarbocycles(mol) df["NumAliphaticCarbocycles"] = \ rdMolDescriptors.CalcNumAliphaticCarbocycles(mol) df["FractionCSP3"] = rdMolDescriptors.CalcFractionCSP3(mol) df["Chi0v"] = rdMolDescriptors.CalcChi0v(mol) df["Chi1v"] = rdMolDescriptors.CalcChi1v(mol) df["Chi2v"] = rdMolDescriptors.CalcChi2v(mol) df["Chi3v"] = rdMolDescriptors.CalcChi3v(mol) df["Chi4v"] = rdMolDescriptors.CalcChi4v(mol) df["Chi1n"] = rdMolDescriptors.CalcChi1n(mol) df["Chi2n"] = rdMolDescriptors.CalcChi2n(mol) df["Chi3n"] = rdMolDescriptors.CalcChi3n(mol) df["Chi4n"] = rdMolDescriptors.CalcChi4n(mol) df["HallKierAlpha"] = rdMolDescriptors.CalcHallKierAlpha(mol) df["kappa1"] = rdMolDescriptors.CalcKappa1(mol) df["kappa2"] = rdMolDescriptors.CalcKappa2(mol) df["kappa3"] = rdMolDescriptors.CalcKappa3(mol) slogp_VSA = list(map(lambda i: "slogp_VSA" + str(i), list(range(1, 13)))) df = df.assign(**dict(zip(slogp_VSA, rdMolDescriptors.SlogP_VSA_(mol)))) smr_VSA = list(map(lambda i: "smr_VSA" + str(i), list(range(1, 11)))) df = df.assign(**dict(zip(smr_VSA, rdMolDescriptors.SMR_VSA_(mol)))) peoe_VSA = list(map(lambda i: "peoe_VSA" + str(i), list(range(1, 15)))) df = df.assign(**dict(zip(peoe_VSA, rdMolDescriptors.PEOE_VSA_(mol)))) MQNs = list(map(lambda i: "MQN" + str(i), list(range(1, 43)))) df = df.assign(**dict(zip(MQNs, rdMolDescriptors.MQNs_(mol)))) return df
def testMQNDetails(self): refFile = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','MQNs_regress.pkl') refData = cPickle.load(file(refFile)) fn = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','aromat_regress.txt') ms = [x for x in Chem.SmilesMolSupplier(fn,delimiter='\t')] for i,m in enumerate(ms): mqns = rdMolDescriptors.MQNs_(m) if mqns!=refData[i][1]: indices=[(j,x,y) for j,x,y in zip(range(len(mqns)),mqns,refData[i][1]) if x!=y] print Chem.MolToSmiles(m),indices self.failUnlessEqual(mqns,refData[i][1])
def testMQN(self): tgt = np.array([42917, 274, 870, 621, 135, 1582, 29, 3147, 5463, 6999, 470, 81, 19055, 4424, 309, 24061, 17820, 1, 9303, 24146, 16076, 5560, 4262, 646, 746, 13725, 5430, 2629, 362, 24211, 15939, 292, 41, 20, 1852, 5642, 31, 9, 1, 2, 3060, 1750]) fn = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','aromat_regress.txt') ms = [x for x in Chem.SmilesMolSupplier(fn,delimiter='\t')] vs = np.zeros((42,),np.int32) for m in ms: vs += rdMolDescriptors.MQNs_(m) self.failIf(False in (vs==tgt))
def get_fingerprint(in_smiles): results = [] smiles = [] for s in in_smiles: mol = AllChem.MolFromSmiles(s) if mol is not None: if mol.GetNumAtoms() > 100: continue mqn = Descriptors.MQNs_(mol) results.append(np.array(mqn)) smiles.append(s) return results, smiles
def testMQNDetails(self): refFile = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','MQNs_regress.pkl') with open(refFile,'rb') as inf: pkl = inf.read() refData = cPickle.loads(pkl,encoding='bytes') fn = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','aromat_regress.txt') ms = [x for x in Chem.SmilesMolSupplier(fn,delimiter='\t')] for i,m in enumerate(ms): mqns = rdMolDescriptors.MQNs_(m) if mqns!=refData[i][1]: indices=[(j,x,y) for j,x,y in zip(range(len(mqns)),mqns,refData[i][1]) if x!=y] print(Chem.MolToSmiles(m),indices) self.assertEqual(mqns,refData[i][1])
def compute_MQN_descriptors(self): """compute the MQN-type descriptors. Ref: Nguyen et al. ChemMedChem 4:1803-5 (2009) Returns: MOE_dict: MQN dictionary, data type: int """ assert type(self.Molecule) == Chem.rdchem.Mol MQN_names = [] for i in range(1, 43): MQN_names.append('MQN' + str(i)) MQN_dict = dict(zip(MQN_names, rdDesc.MQNs_(self.Molecule))) return MQN_dict
def feature_fp(smiles): mol = Chem.MolFromSmiles(smiles) fp = rdMolDescriptors.MQNs_(mol) fp.append(rdMolDescriptors.CalcNumRotatableBonds(mol)) fp.append(rdMolDescriptors.CalcExactMolWt(mol)) fp.append(rdMolDescriptors.CalcNumRotatableBonds(mol)) fp.append(rdMolDescriptors.CalcFractionCSP3(mol)) fp.append(rdMolDescriptors.CalcNumAliphaticCarbocycles(mol)) fp.append(rdMolDescriptors.CalcNumAliphaticHeterocycles(mol)) fp.append(rdMolDescriptors.CalcNumAliphaticRings((mol))) fp.append(rdMolDescriptors.CalcNumAromaticCarbocycles(mol)) fp.append(rdMolDescriptors.CalcNumAromaticHeterocycles(mol)) fp.append(rdMolDescriptors.CalcNumAromaticRings(mol)) fp.append(rdMolDescriptors.CalcNumBridgeheadAtoms(mol)) fp.append(rdMolDescriptors.CalcNumRings(mol)) fp.append(rdMolDescriptors.CalcNumAmideBonds(mol)) fp.append(rdMolDescriptors.CalcNumHeterocycles(mol)) fp.append(rdMolDescriptors.CalcNumSpiroAtoms(mol)) fp.append(rdMolDescriptors.CalcTPSA(mol)) return np.array(fp)
def CalculateStandAloneDescriptor(molObject): """ Get all standaloneDescriptor Args: Returns: List Raise: Exceptions """ value_list = [] if AllChem.ComputeGasteigerCharges(molObject) == None: value_list.append(0.0) else: value_list.append(1.0) value_list.append(rdMolDescriptors.CalcNumAmideBonds(molObject)) value_list.append(rdMolDescriptors.CalcNumSpiroAtoms(molObject)) value_list.append(rdMolDescriptors.CalcNumBridgeheadAtoms(molObject)) value_list += rdMolDescriptors.MQNs_(molObject) return value_list
#structure embed_fn = np.nan_to_num(fngroups.values) embed_graph = graph.values #molecular fingerprint #https://www.rdkit.org/UGM/2012/Landrum_RDKit_UGM.Fingerprints.Final.pptx.pdf finger_mqn = [] finger_morgan = [] finger_maccs = [] finger_ap = [] for i in smiles: mol = AllChem.MolFromSmiles(i) finger_mqn.append(np.array(Descriptors.MQNs_(mol))) finger_maccs.append(np.array(Descriptors.GetMACCSKeysFingerprint((mol)))) #finger_morgan.append(np.array(Descriptors.GetMorganFingerprint((mol)))) finger_ap.append(np.array(Descriptors.GetAtomPairFingerprint((mol)))) ### names = 'vec_spec,vec_smiles,embed_fn,finger_mqn,finger_maccs,finger_ap,embed_graph'.split( ',') data = [ vec_spec, vec_smiles, embed_fn, finger_mqn, finger_maccs, finger_ap, embed_graph ] counter = 0 for i in data: try: res = do_pca(i)
def getMQN(mol): l_MQNs = rdMolDescriptors.MQNs_(mol) return l_MQNs