Esempio n. 1
0
def get_descriptors(df):
    PandasTools.ChangeMoleculeRendering(renderer='String')
    Lmol = df['ROMol']
    Ldescriptors = []
    for m in Lmol:

        # Calculer les propriétés chimiques
        MW = round(Descriptors.ExactMolWt(m), 1)
        LogP = round(Descriptors.MolLogP(m), 1)
        TPSA = round(Descriptors.TPSA(m), 1)
        LabuteASA = round(Descriptors.LabuteASA(m), 1)
        HBA = Descriptors.NumHAcceptors(m)
        HBD = Descriptors.NumHDonors(m)
        FCSP3 = Lipinski.FractionCSP3(m)
        MQN8 = rdMolDescriptors.MQNs_(m)[7]
        MQN10 = rdMolDescriptors.MQNs_(m)[9]
        NAR = Lipinski.NumAromaticRings(m)
        NRB = Chem.Descriptors.NumRotatableBonds(m)

        Ldescriptors.append([
            MW, LogP, TPSA, LabuteASA, HBA, HBD, FCSP3, MQN8, MQN10, NAR, NRB
        ])

    # Create pandas row for conditions results with values and information whether rule of five is violated
    prop_df = pd.DataFrame(Ldescriptors)
    prop_df.columns = [
        'MW', 'LogP', 'TPSA', 'LabuteASA', 'HBA', 'HBD', 'FCSP3', 'MQN8',
        'MQN10', 'NAR', 'NRB'
    ]
    prop_df = prop_df.set_index(df.index)

    return prop_df
Esempio n. 2
0
def _MolQuantumNumbers(m):
    '''
    MQN : (molecular quantum numbers) Nguyen et al. ChemMedChem 4:1803-5 (2009)
    '''

    X = rdMolDescriptors.MQNs_(m)
    return dict(zip(_MQN_NAMES, X))
Esempio n. 3
0
    def testMQNDetails(self):
        refFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data',
                               'MQNs_regress.pkl')
        refFile2 = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data',
                                'MQNs_non_strict_regress.pkl')
        # figure out which definition we are currently using
        m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C")
        if Lipinski.NumRotatableBonds(m) == 2:
            refFile = refFile2

        with open(refFile, 'r') as intf:
            buf = intf.read().replace('\r\n', '\n').encode('utf-8')
            intf.close()
        with io.BytesIO(buf) as inf:
            pkl = inf.read()
        refData = cPickle.loads(pkl, encoding='bytes')
        fn = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data',
                          'aromat_regress.txt')
        ms = [x for x in Chem.SmilesMolSupplier(fn, delimiter='\t')]
        refData2 = []
        for i, m in enumerate(ms):
            mqns = rdMolDescriptors.MQNs_(m)
            refData2.append((m, mqns))
            if mqns != refData[i][1]:
                indices = [
                    (j, x, y)
                    for j, x, y in zip(range(len(mqns)), mqns, refData[i][1])
                    if x != y
                ]
                print(i, Chem.MolToSmiles(m), indices)
            self.assertEqual(mqns, refData[i][1])
Esempio n. 4
0
    def testMQNDetails(self):
        refFile = os.path.join(os.path.dirname(__file__), 'test_data',
                               'MQNs_regress.pkl')
        refFile2 = os.path.join(os.path.dirname(__file__), 'test_data',
                                'MQNs_non_strict_regress.pkl')
        # figure out which definition we are currently using
        m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C")
        if Lipinski.NumRotatableBonds(m) == 2:
            refFile = refFile2

        with open(refFile, 'rb') as intf:
            refData = pickle.load(intf)
        fn = os.path.join(os.path.dirname(__file__), 'test_data',
                          'aromat_regress.txt')
        ms = [x for x in Chem.SmilesMolSupplier(fn, delimiter='\t')]
        for i, m in enumerate(ms):
            mqns = rdMolDescriptors.MQNs_(m)
            if mqns != refData[i][1]:
                indices = [
                    (j, x, y)
                    for j, x, y in zip(range(len(mqns)), mqns, refData[i][1])
                    if x != y
                ]
                print(i, Chem.MolToSmiles(m), indices)
            self.assertEqual(mqns, refData[i][1])
Esempio n. 5
0
    def testMQN(self):
        m = Chem.MolFromSmiles("CC(C)(C)c1cc(O)c(cc1O)C(C)(C)C")
        if Lipinski.NumRotatableBonds(m) == 2:
            tgt = [
                42917, 274, 870, 621, 135, 1582, 29, 3147, 5463, 6999, 470,
                62588, 19055, 4424, 309, 24061, 17820, 1, 9303, 24146, 16076,
                5560, 4262, 646, 746, 13725, 5430, 2629, 362, 24211, 15939,
                292, 41, 20, 1852, 5642, 31, 9, 1, 2, 3060, 1750
            ]
        else:
            tgt = [
                42917, 274, 870, 621, 135, 1582, 29, 3147, 5463, 6999, 470,
                62588, 19055, 4424, 309, 24061, 17820, 1, 8314, 24146, 16076,
                5560, 4262, 646, 746, 13725, 5430, 2629, 362, 24211, 15939,
                292, 41, 20, 1852, 5642, 31, 9, 1, 2, 3060, 1750
            ]
            tgt = [
                42917, 274, 870, 621, 135, 1582, 29, 3147, 5463, 6999, 470,
                62588, 19055, 4424, 309, 24059, 17822, 1, 8314, 24146, 16076,
                5560, 4262, 646, 746, 13725, 5430, 2629, 362, 24211, 15939,
                292, 41, 20, 1852, 5642, 31, 9, 1, 2, 3060, 1750
            ]
        fn = os.path.join(os.path.dirname(__file__), 'test_data',
                          'aromat_regress.txt')
        ms = [x for x in Chem.SmilesMolSupplier(fn, delimiter='\t')]
        vs = np.zeros((42, ), np.int32)

        for m in ms:
            vs += rdMolDescriptors.MQNs_(m)
        self.assertEqual(list(vs), tgt)
Esempio n. 6
0
 def get_MQNs(x, from_smiles):
     if from_smiles:
         mol = Chem.MolFromSmiles(x)
     else:
         mol = x
     if (mol is None) or (len(mol.GetAtoms()) == 0):
         return [0] * 42
     else:
         return rdMolDescriptors.MQNs_(mol)
Esempio n. 7
0
def _calculateDescriptors(mol):
    df = pd.DataFrame(index=[0])
    df["SlogP"] = rdMolDescriptors.CalcCrippenDescriptors(mol)[0]
    df["SMR"] = rdMolDescriptors.CalcCrippenDescriptors(mol)[1]
    df["LabuteASA"] = rdMolDescriptors.CalcLabuteASA(mol)
    df["TPSA"] = Descriptors.TPSA(mol)
    df["AMW"] = Descriptors.MolWt(mol)
    df["ExactMW"] = rdMolDescriptors.CalcExactMolWt(mol)
    df["NumLipinskiHBA"] = rdMolDescriptors.CalcNumLipinskiHBA(mol)
    df["NumLipinskiHBD"] = rdMolDescriptors.CalcNumLipinskiHBD(mol)
    df["NumRotatableBonds"] = rdMolDescriptors.CalcNumRotatableBonds(mol)
    df["NumHBD"] = rdMolDescriptors.CalcNumHBD(mol)
    df["NumHBA"] = rdMolDescriptors.CalcNumHBA(mol)
    df["NumAmideBonds"] = rdMolDescriptors.CalcNumAmideBonds(mol)
    df["NumHeteroAtoms"] = rdMolDescriptors.CalcNumHeteroatoms(mol)
    df["NumHeavyAtoms"] = Chem.rdchem.Mol.GetNumHeavyAtoms(mol)
    df["NumAtoms"] = Chem.rdchem.Mol.GetNumAtoms(mol)
    df["NumRings"] = rdMolDescriptors.CalcNumRings(mol)
    df["NumAromaticRings"] = rdMolDescriptors.CalcNumAromaticRings(mol)
    df["NumSaturatedRings"] = rdMolDescriptors.CalcNumSaturatedRings(mol)
    df["NumAliphaticRings"] = rdMolDescriptors.CalcNumAliphaticRings(mol)
    df["NumAromaticHeterocycles"] = \
        rdMolDescriptors.CalcNumAromaticHeterocycles(mol)
    df["NumSaturatedHeterocycles"] = \
        rdMolDescriptors.CalcNumSaturatedHeterocycles(mol)
    df["NumAliphaticHeterocycles"] = \
        rdMolDescriptors.CalcNumAliphaticHeterocycles(mol)
    df["NumAromaticCarbocycles"] = \
        rdMolDescriptors.CalcNumAromaticCarbocycles(mol)
    df["NumSaturatedCarbocycles"] = \
        rdMolDescriptors.CalcNumSaturatedCarbocycles(mol)
    df["NumAliphaticCarbocycles"] = \
        rdMolDescriptors.CalcNumAliphaticCarbocycles(mol)
    df["FractionCSP3"] = rdMolDescriptors.CalcFractionCSP3(mol)
    df["Chi0v"] = rdMolDescriptors.CalcChi0v(mol)
    df["Chi1v"] = rdMolDescriptors.CalcChi1v(mol)
    df["Chi2v"] = rdMolDescriptors.CalcChi2v(mol)
    df["Chi3v"] = rdMolDescriptors.CalcChi3v(mol)
    df["Chi4v"] = rdMolDescriptors.CalcChi4v(mol)
    df["Chi1n"] = rdMolDescriptors.CalcChi1n(mol)
    df["Chi2n"] = rdMolDescriptors.CalcChi2n(mol)
    df["Chi3n"] = rdMolDescriptors.CalcChi3n(mol)
    df["Chi4n"] = rdMolDescriptors.CalcChi4n(mol)
    df["HallKierAlpha"] = rdMolDescriptors.CalcHallKierAlpha(mol)
    df["kappa1"] = rdMolDescriptors.CalcKappa1(mol)
    df["kappa2"] = rdMolDescriptors.CalcKappa2(mol)
    df["kappa3"] = rdMolDescriptors.CalcKappa3(mol)
    slogp_VSA = list(map(lambda i: "slogp_VSA" + str(i), list(range(1, 13))))
    df = df.assign(**dict(zip(slogp_VSA, rdMolDescriptors.SlogP_VSA_(mol))))
    smr_VSA = list(map(lambda i: "smr_VSA" + str(i), list(range(1, 11))))
    df = df.assign(**dict(zip(smr_VSA, rdMolDescriptors.SMR_VSA_(mol))))
    peoe_VSA = list(map(lambda i: "peoe_VSA" + str(i), list(range(1, 15))))
    df = df.assign(**dict(zip(peoe_VSA, rdMolDescriptors.PEOE_VSA_(mol))))
    MQNs = list(map(lambda i: "MQN" + str(i), list(range(1, 43))))
    df = df.assign(**dict(zip(MQNs, rdMolDescriptors.MQNs_(mol))))
    return df
Esempio n. 8
0
 def testMQNDetails(self):
   refFile = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','MQNs_regress.pkl')
   refData = cPickle.load(file(refFile))
   fn = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','aromat_regress.txt')
   ms = [x for x in Chem.SmilesMolSupplier(fn,delimiter='\t')]
   for i,m in enumerate(ms):
     mqns = rdMolDescriptors.MQNs_(m) 
     if mqns!=refData[i][1]:
       indices=[(j,x,y) for j,x,y in zip(range(len(mqns)),mqns,refData[i][1]) if x!=y]
       print Chem.MolToSmiles(m),indices
     self.failUnlessEqual(mqns,refData[i][1])
Esempio n. 9
0
 def testMQN(self):
   tgt = np.array([42917,   274,   870,   621,   135,  1582,    29,  3147,  5463,
       6999,   470,    81, 19055,  4424,   309, 24061, 17820,     1,
       9303, 24146, 16076,  5560,  4262,   646,   746, 13725,  5430,
       2629,   362, 24211, 15939,   292,    41,    20,  1852,  5642,
         31,     9,     1,     2,  3060,  1750])
   fn = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','aromat_regress.txt')
   ms = [x for x in Chem.SmilesMolSupplier(fn,delimiter='\t')]
   vs = np.zeros((42,),np.int32)
   for m in ms:
     vs += rdMolDescriptors.MQNs_(m)
   self.failIf(False in (vs==tgt))
Esempio n. 10
0
def get_fingerprint(in_smiles):
    results = []
    smiles = []
    for s in in_smiles:
        mol = AllChem.MolFromSmiles(s)
        if mol is not None:
            if mol.GetNumAtoms() > 100:
                continue
            mqn = Descriptors.MQNs_(mol)
            results.append(np.array(mqn))
            smiles.append(s)

    return results, smiles
Esempio n. 11
0
 def testMQNDetails(self):
   refFile = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','MQNs_regress.pkl')
   with open(refFile,'rb') as inf:
     pkl = inf.read()
   refData  = cPickle.loads(pkl,encoding='bytes')
   fn = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','aromat_regress.txt')
   ms = [x for x in Chem.SmilesMolSupplier(fn,delimiter='\t')]
   for i,m in enumerate(ms):
     mqns = rdMolDescriptors.MQNs_(m) 
     if mqns!=refData[i][1]:
       indices=[(j,x,y) for j,x,y in zip(range(len(mqns)),mqns,refData[i][1]) if x!=y]
       print(Chem.MolToSmiles(m),indices)
     self.assertEqual(mqns,refData[i][1])
Esempio n. 12
0
    def compute_MQN_descriptors(self):
        """compute the MQN-type descriptors.
        Ref: Nguyen et al. ChemMedChem 4:1803-5 (2009)

        Returns:
            MOE_dict: MQN dictionary, data type: int
        """
        assert type(self.Molecule) == Chem.rdchem.Mol

        MQN_names = []
        for i in range(1, 43):
            MQN_names.append('MQN' + str(i))

        MQN_dict = dict(zip(MQN_names, rdDesc.MQNs_(self.Molecule)))

        return MQN_dict
Esempio n. 13
0
def feature_fp(smiles):
    mol = Chem.MolFromSmiles(smiles)
    fp = rdMolDescriptors.MQNs_(mol)
    
    fp.append(rdMolDescriptors.CalcNumRotatableBonds(mol))
    fp.append(rdMolDescriptors.CalcExactMolWt(mol))
    fp.append(rdMolDescriptors.CalcNumRotatableBonds(mol))
    fp.append(rdMolDescriptors.CalcFractionCSP3(mol))
    fp.append(rdMolDescriptors.CalcNumAliphaticCarbocycles(mol))
    fp.append(rdMolDescriptors.CalcNumAliphaticHeterocycles(mol))
    fp.append(rdMolDescriptors.CalcNumAliphaticRings((mol)))
    fp.append(rdMolDescriptors.CalcNumAromaticCarbocycles(mol))
    fp.append(rdMolDescriptors.CalcNumAromaticHeterocycles(mol))
    fp.append(rdMolDescriptors.CalcNumAromaticRings(mol))
    fp.append(rdMolDescriptors.CalcNumBridgeheadAtoms(mol))
    fp.append(rdMolDescriptors.CalcNumRings(mol))
    fp.append(rdMolDescriptors.CalcNumAmideBonds(mol))
    fp.append(rdMolDescriptors.CalcNumHeterocycles(mol))
    fp.append(rdMolDescriptors.CalcNumSpiroAtoms(mol))
    fp.append(rdMolDescriptors.CalcTPSA(mol))
    
    return np.array(fp)
Esempio n. 14
0
def CalculateStandAloneDescriptor(molObject):
    """
	Get all standaloneDescriptor
	Args:

	Returns:
		List
	Raise:
		Exceptions
	"""
    value_list = []

    if AllChem.ComputeGasteigerCharges(molObject) == None:
        value_list.append(0.0)
    else:
        value_list.append(1.0)

    value_list.append(rdMolDescriptors.CalcNumAmideBonds(molObject))
    value_list.append(rdMolDescriptors.CalcNumSpiroAtoms(molObject))
    value_list.append(rdMolDescriptors.CalcNumBridgeheadAtoms(molObject))
    value_list += rdMolDescriptors.MQNs_(molObject)

    return value_list
Esempio n. 15
0
#structure
embed_fn = np.nan_to_num(fngroups.values)
embed_graph = graph.values

#molecular fingerprint
#https://www.rdkit.org/UGM/2012/Landrum_RDKit_UGM.Fingerprints.Final.pptx.pdf
finger_mqn = []
finger_morgan = []
finger_maccs = []
finger_ap = []

for i in smiles:
    mol = AllChem.MolFromSmiles(i)

    finger_mqn.append(np.array(Descriptors.MQNs_(mol)))
    finger_maccs.append(np.array(Descriptors.GetMACCSKeysFingerprint((mol))))
    #finger_morgan.append(np.array(Descriptors.GetMorganFingerprint((mol))))
    finger_ap.append(np.array(Descriptors.GetAtomPairFingerprint((mol))))

###
names = 'vec_spec,vec_smiles,embed_fn,finger_mqn,finger_maccs,finger_ap,embed_graph'.split(
    ',')
data = [
    vec_spec, vec_smiles, embed_fn, finger_mqn, finger_maccs, finger_ap,
    embed_graph
]
counter = 0
for i in data:
    try:
        res = do_pca(i)
Esempio n. 16
0
def getMQN(mol):
    l_MQNs = rdMolDescriptors.MQNs_(mol)
    return l_MQNs