def testCustomVSA(self): mol = Chem.MolFromSmiles("c1ccccc1O") peoe_vsa = rdMD.PEOE_VSA_(mol) AllChem.ComputeGasteigerCharges(mol) bins = [-.3, -.25, -.20, -.15, -.10, -.05, 0, .05, .10, .15, .20, .25, .30] custom_vsa = rdMD.CustomProp_VSA_(mol, customPropName='_GasteigerCharge', bins=bins) for p, c in zip(peoe_vsa, custom_vsa): self.assertTrue(feq(p, c, .001))
def _calculateDescriptors(mol): df = pd.DataFrame(index=[0]) df["SlogP"] = rdMolDescriptors.CalcCrippenDescriptors(mol)[0] df["SMR"] = rdMolDescriptors.CalcCrippenDescriptors(mol)[1] df["LabuteASA"] = rdMolDescriptors.CalcLabuteASA(mol) df["TPSA"] = Descriptors.TPSA(mol) df["AMW"] = Descriptors.MolWt(mol) df["ExactMW"] = rdMolDescriptors.CalcExactMolWt(mol) df["NumLipinskiHBA"] = rdMolDescriptors.CalcNumLipinskiHBA(mol) df["NumLipinskiHBD"] = rdMolDescriptors.CalcNumLipinskiHBD(mol) df["NumRotatableBonds"] = rdMolDescriptors.CalcNumRotatableBonds(mol) df["NumHBD"] = rdMolDescriptors.CalcNumHBD(mol) df["NumHBA"] = rdMolDescriptors.CalcNumHBA(mol) df["NumAmideBonds"] = rdMolDescriptors.CalcNumAmideBonds(mol) df["NumHeteroAtoms"] = rdMolDescriptors.CalcNumHeteroatoms(mol) df["NumHeavyAtoms"] = Chem.rdchem.Mol.GetNumHeavyAtoms(mol) df["NumAtoms"] = Chem.rdchem.Mol.GetNumAtoms(mol) df["NumRings"] = rdMolDescriptors.CalcNumRings(mol) df["NumAromaticRings"] = rdMolDescriptors.CalcNumAromaticRings(mol) df["NumSaturatedRings"] = rdMolDescriptors.CalcNumSaturatedRings(mol) df["NumAliphaticRings"] = rdMolDescriptors.CalcNumAliphaticRings(mol) df["NumAromaticHeterocycles"] = \ rdMolDescriptors.CalcNumAromaticHeterocycles(mol) df["NumSaturatedHeterocycles"] = \ rdMolDescriptors.CalcNumSaturatedHeterocycles(mol) df["NumAliphaticHeterocycles"] = \ rdMolDescriptors.CalcNumAliphaticHeterocycles(mol) df["NumAromaticCarbocycles"] = \ rdMolDescriptors.CalcNumAromaticCarbocycles(mol) df["NumSaturatedCarbocycles"] = \ rdMolDescriptors.CalcNumSaturatedCarbocycles(mol) df["NumAliphaticCarbocycles"] = \ rdMolDescriptors.CalcNumAliphaticCarbocycles(mol) df["FractionCSP3"] = rdMolDescriptors.CalcFractionCSP3(mol) df["Chi0v"] = rdMolDescriptors.CalcChi0v(mol) df["Chi1v"] = rdMolDescriptors.CalcChi1v(mol) df["Chi2v"] = rdMolDescriptors.CalcChi2v(mol) df["Chi3v"] = rdMolDescriptors.CalcChi3v(mol) df["Chi4v"] = rdMolDescriptors.CalcChi4v(mol) df["Chi1n"] = rdMolDescriptors.CalcChi1n(mol) df["Chi2n"] = rdMolDescriptors.CalcChi2n(mol) df["Chi3n"] = rdMolDescriptors.CalcChi3n(mol) df["Chi4n"] = rdMolDescriptors.CalcChi4n(mol) df["HallKierAlpha"] = rdMolDescriptors.CalcHallKierAlpha(mol) df["kappa1"] = rdMolDescriptors.CalcKappa1(mol) df["kappa2"] = rdMolDescriptors.CalcKappa2(mol) df["kappa3"] = rdMolDescriptors.CalcKappa3(mol) slogp_VSA = list(map(lambda i: "slogp_VSA" + str(i), list(range(1, 13)))) df = df.assign(**dict(zip(slogp_VSA, rdMolDescriptors.SlogP_VSA_(mol)))) smr_VSA = list(map(lambda i: "smr_VSA" + str(i), list(range(1, 11)))) df = df.assign(**dict(zip(smr_VSA, rdMolDescriptors.SMR_VSA_(mol)))) peoe_VSA = list(map(lambda i: "peoe_VSA" + str(i), list(range(1, 15)))) df = df.assign(**dict(zip(peoe_VSA, rdMolDescriptors.PEOE_VSA_(mol)))) MQNs = list(map(lambda i: "MQN" + str(i), list(range(1, 43)))) df = df.assign(**dict(zip(MQNs, rdMolDescriptors.MQNs_(mol)))) return df
def compute_MOE_descriptors(self): """compute the MOE-type descriptors. Ref:??? Returns: MOE_dict: MOE dictionary, data type: float """ assert type(self.Molecule) == Chem.rdchem.Mol MOE_dict = {} SlogP_VSA_names = [] for i in range(1, 13): SlogP_VSA_names.append('SlogP_VSA' + str(i)) MOE_dict.update( dict(zip(SlogP_VSA_names, rdDesc.SlogP_VSA_(self.Molecule)))) SMR_VSA_names = [] for i in range(1, 11): SMR_VSA_names.append('SMR_VSA' + str(i)) MOE_dict.update( dict(zip(SMR_VSA_names, rdDesc.SMR_VSA_(self.Molecule)))) PEOE_VSA_names = [] for i in range(1, 15): PEOE_VSA_names.append('PEOE_VSA' + str(i)) MOE_dict.update( dict(zip(PEOE_VSA_names, rdDesc.PEOE_VSA_(self.Molecule)))) EState_VSA_names = [] for i in range(1, 12): EState_VSA_names.append('EState_VSA' + str(i)) MOE_dict.update( dict(zip(EState_VSA_names, EState.EState_VSA_(self.Molecule)))) VSA_EState_names = [] for i in range(1, 12): VSA_EState_names.append('VSA_EState' + str(i)) MOE_dict.update( dict(zip(VSA_EState_names, EState.VSA_EState_(self.Molecule)))) return MOE_dict
def extractFeatureData(mol): smr_vsa = rdMolDescriptors.SMR_VSA_(mol) slogp_vsa = rdMolDescriptors.SlogP_VSA_(mol) peoe_vsa = rdMolDescriptors.PEOE_VSA_(mol) hbd = rdMolDescriptors.CalcNumHBD(mol) hba = rdMolDescriptors.CalcNumHBA(mol) feats = [smr_vsa, slogp_vsa, peoe_vsa, hbd, hba] feature_data = [] for f in feats: if (isinstance(f, int)): feature_data.append(f) else: for data in f: feature_data.append(data) #feature_data = np.asarray(feature_data) # convert to numpy array return feature_data
def extractFeatureData(mol): global index_of_1d_feature smr_vsa = rdMolDescriptors.SMR_VSA_(mol) slogp_vsa = rdMolDescriptors.SlogP_VSA_(mol) peoe_vsa = rdMolDescriptors.PEOE_VSA_(mol) hbd = rdMolDescriptors.CalcNumHBD(mol) hba = rdMolDescriptors.CalcNumHBA(mol) index_of_1d_feature = -1 # Need to make sure this references the index of a 1D feature # (a negative index refers to counting backwards from the end of a list) feats = [smr_vsa, slogp_vsa, peoe_vsa, hbd, hba] feature_data = [] for f in feats: if (isinstance(f, int)): feature_data.append(f) else: for data in f: feature_data.append(data) #feature_data = np.asarray(feature_data) # convert to numpy array return feature_data