예제 #1
0
 def test_RDKit(self):
     from cinfony import rdk
     mol = rdk.readstring("smi", "CCC")
     desc = mol.calcdesc(rdk.descs[0:3])
     for d in desc:
         if desc[d] != desc[d]:
             desc[d] = '?'
     expectedDesc = {'fr_Ar_COO': 0, 'Chi4v': 0.0, 'fr_C_O_noCOO': 0}
     self.assertEqual(desc,expectedDesc)
예제 #2
0
 def test_RDKit(self):
     from cinfony import rdk
     mol = rdk.readstring("smi", "CCC")
     desc = mol.calcdesc(rdk.descs[0:3])
     for d in desc:
         if desc[d] != desc[d]:
             desc[d] = '?'
     expectedDesc = {'fr_Ar_COO': 0, 'Chi4v': 0.0, 'fr_C_O_noCOO': 0}
     self.assertEqual(desc,expectedDesc)
예제 #3
0
def validate(fn, typ):
    ofn = fn.replace('.sdf.gz', '_sanitized.sdf')
    if not os.path.isfile(ofn):
        good = 0
        total = 0
        o = rdk.Outputfile('sdf', ofn, overwrite=True)
        n = 0
        for mol in pybel.readfile('sdf', fn):
            mol.title = typ + "_" + mol.title + "_" + str(n)
            total += 1
            n += 1
            try:
                o.write(rdk.readstring('mol', mol.write('mol')))
                good += 1
            except Exception, e:
                print e
                pass
        print "%s %% good mols (%s/%s)" % ((good * 100. / total), good, total)
        o.close()
예제 #4
0
def clean(fn):
    ligands = os.path.join(os.path.dirname(fn), "..", "PDB",
                           "ligands_allgood.sdf")
    if not os.path.isfile(ligands):
        return
    inchikeys = set(
        [mol.write('inchikey') for mol in pybel.readfile('sdf', ligands)])
    ofn = fn.replace('.sdf', '_filtered.sdf')
    if not os.path.isfile(ofn):
        good = 0
        total = 0
        o = rdk.Outputfile('sdf', ofn, overwrite=True)
        for mol in pybel.readfile('sdf', fn):
            total += 1
            if mol.write('inchikey') not in inchikeys:
                try:
                    o.write(rdk.readstring('mol', mol.write('mol')))
                    good += 1
                except Exception, e:
                    #print e
                    pass
        o.close()
        print "%s %% remaining mols (%s/%s)" % (
            (good * 100. / total), good, total)
예제 #5
0
def getRdkDescResult(data,descList, radius = 1):
    """ Calculates the descriptors for the descList using RDK
        It expects an attribute containing smiles with a name defined in AZOrangeConfig.SMILESNAMES
        It returns a dataset with the same smiles input variable, and as many variables as the descriptors 
       returned by the toolkit
    """
    if "rdk" not in toolkitsEnabled:
        return None
    FingerPrints = False
    smilesName = getSMILESAttr(data) 
    if not smilesName: return None
    
    myDescList = [desc.replace(rdkTag,"") for desc in descList if rdkTag in desc]
    if not myDescList: return None

    if "FingerPrints" in myDescList:
        FingerPrints = True
        myDescList.remove("FingerPrints")
         
    #Get fingerprints in advance
    fingerPrintsAttrs = []
    fingerPrintsRes = {}
    if FingerPrints:
        for ex in data:
            mol = str(ex[smilesName].value)
            try:
                chemMol = rdk.Chem.MolFromSmiles(mol,True)
                if not chemMol:
                    chemMol = rdk.Chem.MolFromSmiles(mol,False)
                fingerPrint = rdk.AllChem.GetMorganFingerprint(chemMol,radius)
                resDict = fingerPrint.GetNonzeroElements()
            except:
                continue
            fingerPrintsRes[mol] = {}
            for ID in resDict:
                count = resDict[ID]
                name = rdkTag+"FP_"+str(ID)
                if name not in [x.name for x in fingerPrintsAttrs]:
                    fingerPrintsAttrs.append(orange.FloatVariable(name))
                fingerPrintsRes[mol][name]=int(count)
    resData = orange.ExampleTable(orange.Domain([data.domain[smilesName]] + [orange.FloatVariable(rdkTag+name) for name in myDescList] + [name for name in fingerPrintsAttrs],0))     
    badCompounds = 0
    for ex in data:
        newEx = orange.Example(resData.domain)
        newEx[smilesName] = ex[smilesName]
        molStr = str(newEx[smilesName].value)
        # OBS - add something keeping count on the number of unused smiles
        try:
             chemMol = rdk.Chem.MolFromSmiles(molStr,True)
             if not chemMol:
                chemMol = rdk.Chem.MolFromSmiles(molStr,False) 
             mol = rdk.readstring("mol", rdk.Chem.MolToMolBlock(chemMol))
             #mol = rdk.readstring("smi", molStr)
             moldesc = mol.calcdesc(myDescList)
             for desc in myDescList:
                 newEx[rdkTag+desc] = moldesc[desc]
 
             #Process fingerprints
             if FingerPrints:
                 for desc in fingerPrintsAttrs:
                     if desc.name in fingerPrintsRes[molStr]:
                         newEx[desc.name] = fingerPrintsRes[molStr][desc.name]
                     else:
                         newEx[desc.name] = 0
             resData.append(newEx)
        except: 
            badCompounds += 1
    print "Compounds in original data:       ",len(data)
    print "Compounds able to calculate descs:",len(resData)
    print "Ignored Compounds:                ",badCompounds

    return resData
예제 #6
0
def get_RMSD_value(refmol, probemol):
    """Input is 2 mol files."""
    rdref = rdk.readstring('mol', str(refmol))
    rdprobe = rdk.readstring('mol', str(probemol))
    return rdk.Chem.AllChem.GetBestRMS(rdref.Mol, rdprobe.Mol)
예제 #7
0
def getRdkDescResult(data,descList, radius = 1):
    """ Calculates the descriptors for the descList using RDK
        It expects an attribute containing smiles with a name defined in AZOrangeConfig.SMILESNAMES
        It returns a dataset with the same smiles input variable, and as many variables as the descriptors 
       returned by the toolkit
    """
    if "rdk" not in toolkitsEnabled:
        return None
    FingerPrints = False
    smilesName = getSMILESAttr(data) 
    if not smilesName: return None
    
    FP_desc = []
    myDescList = [desc.replace(toolkitsDef["rdk"]["tag"],"") for desc in descList if toolkitsDef["rdk"]["tag"] in desc]
    if not myDescList: return None

    if "FingerPrints" in myDescList:
        FingerPrints = True
        myDescList.remove("FingerPrints")
    if sum(["FP_" in fp for fp in myDescList]):
        tmpDescList = []
        FingerPrints = True
        for attr in myDescList:
            if "FP_" not in attr:
                tmpDescList.append(attr)
            else:
                FP_desc.append(attr)
        myDescList = tmpDescList

    #Get fingerprints in advance
    fingerPrintsAttrs = []
    fingerPrintsRes = {}
    if FingerPrints:
        for ex in data:
            mol = str(ex[smilesName].value)
            try:
                chemMol = rdk.Chem.MolFromSmiles(mol,True)
                if not chemMol:
                    chemMol = rdk.Chem.MolFromSmiles(mol,False)
                fingerPrint = rdk.AllChem.GetMorganFingerprint(chemMol,radius)
                resDict = fingerPrint.GetNonzeroElements()
            except:
                continue
            fingerPrintsRes[mol] = {}
            for ID in resDict:
                count = resDict[ID]
                name = toolkitsDef["rdk"]["tag"]+"FP_"+str(ID)
                if name not in [x.name for x in fingerPrintsAttrs]:
                    fingerPrintsAttrs.append(orange.FloatVariable(name))
                fingerPrintsRes[mol][name] = float(count)
        #Add FP attributes even if there was no reference to it. Models will need it as FP not present, i.e. equal 0.0 !
        for fpDesc in FP_desc:
            name = toolkitsDef["rdk"]["tag"]+fpDesc
            if name not in [str(attr.name) for attr in fingerPrintsAttrs]:
                fingerPrintsAttrs.append(orange.FloatVariable(name))
    #Test attrTypes
    for ex in data:
        try:
             attrObj = []
             molStr = str(ex[smilesName].value)
             chemMol = rdk.Chem.MolFromSmiles(molStr,True)
             if not chemMol:
                chemMol = rdk.Chem.MolFromSmiles(molStr,False)
             mol = rdk.readstring("mol", rdk.Chem.MolToMolBlock(chemMol))
             moldesc = mol.calcdesc(myDescList)
             for desc in myDescList:
		 if type(moldesc[desc]) == str:
                     attrObj.append(orange.StringVariable(toolkitsDef["rdk"]["tag"] + desc))
                 else:
                     attrObj.append(orange.FloatVariable(toolkitsDef["rdk"]["tag"] + desc))

             #Process fingerprints
             if FingerPrints:
                 for desc in [fp for fp in fingerPrintsAttrs if fp.name not in attrObj]:
                     attrObj.append(desc)#orange.FloatVariable(desc.name))
             break
        except:
            continue    


    resData = orange.ExampleTable(orange.Domain([data.domain[smilesName]] + attrObj,0))     
    badCompounds = 0
    for ex in data:
        newEx = orange.Example(resData.domain)   # All attrs: ?, ?, ?, ..., ?
        newEx[smilesName] = ex[smilesName]
        molStr = str(newEx[smilesName].value)
        # OBS - add something keeping count on the number of unused smiles
        try:
             chemMol = rdk.Chem.MolFromSmiles(molStr,True)
             if not chemMol:
                chemMol = rdk.Chem.MolFromSmiles(molStr,False) 
             mol = rdk.readstring("mol", rdk.Chem.MolToMolBlock(chemMol))
             #mol = rdk.readstring("smi", molStr)
             moldesc = mol.calcdesc(myDescList)
             for desc in myDescList:
                 newEx[toolkitsDef["rdk"]["tag"]+desc] = moldesc[desc]
             #Process fingerprints
             if FingerPrints:
                 for desc in fingerPrintsAttrs:
                     if desc.name in fingerPrintsRes[molStr]:
                         newEx[desc.name] = fingerPrintsRes[molStr][desc.name]
                     else:
                         newEx[desc.name] = 0.0
             resData.append(newEx)
        except:
            #print "Unexpected error:", sys.exc_info()
            badCompounds += 1
    print "Compounds in original data:       ",len(data)
    print "Compounds able to calculate descs:",len(resData)
    print "Ignored Compounds:                ",badCompounds

    return resData
예제 #8
0
def getRdkDescResult(data,descList, radius = 1):
    """ Calculates the descriptors for the descList using RDK
        It expects an attribute containing smiles with a name defined in AZOrangeConfig.SMILESNAMES
        It returns a dataset with the same smiles input variable, and as many variables as the descriptors 
       returned by the toolkit
    """
    if "rdk" not in toolkitsEnabled:
        return None
    FingerPrints = False
    smilesName = getSMILESAttr(data) 
    if not smilesName: return None
    
    FP_desc = []
    myDescList = [desc.replace(toolkitsDef["rdk"]["tag"],"") for desc in descList if toolkitsDef["rdk"]["tag"] in desc]
    if not myDescList: return None

    if "FingerPrints" in myDescList:
        FingerPrints = True
        myDescList.remove("FingerPrints")
    if sum(["FP_" in fp for fp in myDescList]):
        tmpDescList = []
        FingerPrints = True
        for attr in myDescList:
            if "FP_" not in attr:
                tmpDescList.append(attr)
            else:
                FP_desc.append(attr)
        myDescList = tmpDescList

    #Get fingerprints in advance
    fingerPrintsAttrs = []
    fingerPrintsRes = {}
    if FingerPrints:
        for ex in data:
            mol = str(ex[smilesName].value)
            try:
                chemMol = rdk.Chem.MolFromSmiles(mol,True)
                if not chemMol:
                    chemMol = rdk.Chem.MolFromSmiles(mol,False)
                fingerPrint = rdk.AllChem.GetMorganFingerprint(chemMol,radius)
                resDict = fingerPrint.GetNonzeroElements()
            except:
                continue
            fingerPrintsRes[mol] = {}
            for ID in resDict:
                count = resDict[ID]
                name = toolkitsDef["rdk"]["tag"]+"FP_"+str(ID)
                if name not in [x.name for x in fingerPrintsAttrs]:
                    fingerPrintsAttrs.append(orange.FloatVariable(name))
                fingerPrintsRes[mol][name] = float(count)
        #Add FP attributes even if there was no reference to it. Models will need it as FP not present, i.e. equal 0.0 !
        for fpDesc in FP_desc:
            name = toolkitsDef["rdk"]["tag"]+fpDesc
            if name not in [str(attr.name) for attr in fingerPrintsAttrs]:
                fingerPrintsAttrs.append(orange.FloatVariable(name))
    #Test attrTypes
    for ex in data:
        try:
             attrObj = []
             molStr = str(ex[smilesName].value)
             chemMol = rdk.Chem.MolFromSmiles(molStr,True)
             if not chemMol:
                chemMol = rdk.Chem.MolFromSmiles(molStr,False)
             mol = rdk.readstring("mol", rdk.Chem.MolToMolBlock(chemMol))
             moldesc = mol.calcdesc(myDescList)
             for desc in myDescList:
		 if type(moldesc[desc]) == str:
                     attrObj.append(orange.StringVariable(toolkitsDef["rdk"]["tag"] + desc))
                 else:
                     attrObj.append(orange.FloatVariable(toolkitsDef["rdk"]["tag"] + desc))

             #Process fingerprints
             if FingerPrints:
                 for desc in [fp for fp in fingerPrintsAttrs if fp.name not in attrObj]:
                     attrObj.append(desc)#orange.FloatVariable(desc.name))
             break
        except:
            continue    


    resData = orange.ExampleTable(orange.Domain([data.domain[smilesName]] + attrObj,0))     
    badCompounds = 0
    for ex in data:
        newEx = orange.Example(resData.domain)   # All attrs: ?, ?, ?, ..., ?
        newEx[smilesName] = ex[smilesName]
        molStr = str(newEx[smilesName].value)
        # OBS - add something keeping count on the number of unused smiles
        try:
             chemMol = rdk.Chem.MolFromSmiles(molStr,True)
             if not chemMol:
                chemMol = rdk.Chem.MolFromSmiles(molStr,False) 
             mol = rdk.readstring("mol", rdk.Chem.MolToMolBlock(chemMol))
             #mol = rdk.readstring("smi", molStr)
             moldesc = mol.calcdesc(myDescList)
             for desc in myDescList:
                 newEx[toolkitsDef["rdk"]["tag"]+desc] = moldesc[desc]
 
             #Process fingerprints
             if FingerPrints:
                 for desc in fingerPrintsAttrs:
                     if desc.name in fingerPrintsRes[molStr]:
                         newEx[desc.name] = fingerPrintsRes[molStr][desc.name]
                     else:
                         newEx[desc.name] = 0.0
             resData.append(newEx)
        except: 
            badCompounds += 1
    print "Compounds in original data:       ",len(data)
    print "Compounds able to calculate descs:",len(resData)
    print "Ignored Compounds:                ",badCompounds

    return resData