Beispiel #1
0
    def getDescriptors(self, smiles):
        self.getSmilesData(smiles)

        # Calculate descriptors defined in the model files
        descList = self.model.varNames
   
        savedSmilesData = dataUtilities.DataTable(self.smilesData)

        #Try 3 time to get All compounds descriptors
        nTry = 3       
        errorDesc = "" 
        while nTry > 0:
           try:
                traceLog = "Model Location:"+str(self.modelLocation)+"\n"
                nBadEx = 0        
                # Determine Signature and non-Signature descriptor names
                cinfonyDesc, clabDesc, signatureHeight, bbrcDesc, signDesc = descUtilities.getDescTypes(descList)
                # Signatures
                if "sign" in DescMethodsAvailable and signatureHeight:
                    traceLog += "Calculating signatures...\n"
                    print "Calculating signatures...."
                    preCalcData = dataUtilities.DataTable(self.preDefSignatureFile)
                    startHeight = 0                # Not used desc ignored in model prediction
                    endHeight = signatureHeight  
                    self.smilesData  = getSignatures.getSignatures(self.smilesData, startHeight, endHeight, preCalcData)

                # C-Lab desc
                if "clab" in DescMethodsAvailable and clabDesc:
                    traceLog += "Calculating C-Lab...\n"
                    print "Calculating C-Lab desc...."
                    self.smilesData = ClabUtilities.appendCLabDesc(clabDesc, self.smilesData)

                # Cinfony
                if cinfonyDesc:
                    traceLog += "Calculating Cinfony...\n"
                    print "Calculating Cinfony desc..."
                    self.smilesData = getCinfonyDesc.getCinfonyDescResults(self.smilesData, cinfonyDesc, radius = 5)

                # bbrcDesc
                if "bbrc" in DescMethodsAvailable and bbrcDesc:
                    traceLog += "Calculating BBRC...\n"
                    print "Calculating BBRC desc..."
                    self.smilesData = getBBRCDesc.getBBRCDescResult(self.smilesData, algo = "FTM", minSupPar = 1, descList = bbrcDesc)

                # Detect if the descripts calaculation or something else went wrong!
                for ex in self.smilesData:
                   if sum([ex[attr].isSpecial() for attr in self.smilesData.domain.attributes]) == len(self.smilesData.domain.attributes):
                        nBadEx +=1
                if nBadEx:
                    traceLog += "WARNING: Desc. Calculation: From the "+str(len(self.smilesData))+" compounds, "+str(nBadEx)+" could not be calculated!\n"
                    print "WARNING: Desc. Calculation: From the "+str(len(self.smilesData))+" compounds, "+str(nBadEx)+" could not be calculated!"
                    print "WARNING:   Tying again..."
                    self.smilesData = dataUtilities.DataTable(savedSmilesData)
                    nTry -= 1
                else:
                    nTry = 0
           except Exception, e:
                errorDesc = "Error Calculating Descriptors:;"+traceLog+str(e)+";"
                nTry -= 1
    def getClabTasksAndSignatures(self, smiles):
        if "clab" not in DescMethodsAvailable or "sign" not in DescMethodsAvailable:
            return
        self.getSmilesData(smiles) 

        # Signatures
        preCalcData = dataUtilities.DataTable(self.preDefSignatureFile)
        startHeight = 0
        endHeight = 1
        dataSign  = getSignatures.getSignatures(self.smilesData, startHeight, endHeight, preCalcData)

        # C-Lab descriptors
        self.exToPred = ClabUtilities.appendCLabTasks(self.clabTasks, dataSign)
Beispiel #3
0
    def getClabTasksAndSignatures(self, smiles):
        if "clab" not in DescMethodsAvailable or "sign" not in DescMethodsAvailable:
            return
        self.getSmilesData(smiles) 

        # Signatures
        preCalcData = dataUtilities.DataTable(self.preDefSignatureFile)
        startHeight = 0
        endHeight = 1
        dataSign  = getSignatures.getSignatures(self.smilesData, startHeight, endHeight, preCalcData)

        # C-Lab descriptors
        self.exToPred = ClabUtilities.appendCLabTasks(self.clabTasks, dataSign)
Beispiel #4
0
    def getClabDescSignList(self, smiles, getMolFile=False):
        # Create an Orange ExampleTable with a smiles attribute
        smilesAttr = orange.EnumVariable("SMILEStoPred", values=[smiles])
        myDomain = orange.Domain([smilesAttr], 0)
        smilesData = dataUtilities.DataTable(myDomain, [[smiles]])
        #    Calculate descriptors defined in the model files
        try:
            descList = self.model.varNames
        except:  # Consensus object different
            attributes = self.model.domain.variables
            descList = []
            for attr in attributes:
                descList.append(attr.name)
        #    Determine Signature and non-Signature descriptor names
        cinfonyDesc, clabDesc, signatureHeight, bbrcDesc, signDesc = descUtilities.getDescTypes(
            descList)
        #    Signatures
        if "sign" in DescMethodsAvailable and signatureHeight:
            print "Calculating signatures..."
            preCalcData = dataUtilities.DataTable(self.preDefSignatureFile)
            startHeight = 0  # Not used desc ignored in model prediction
            endHeight = signatureHeight
            dataSign, cmpdSignDict, cmpdSignList, sdfStr = getSignatures.getSignatures(
                smilesData,
                startHeight,
                endHeight,
                preCalcData,
                returnAtomID=True)
        else:
            cmpdSignList = [[]]
            sdfStr = ""
        if not getMolFile:
            return (clabDesc, cmpdSignList[0])
        elif not sdfStr:
            return (clabDesc, cmpdSignList[0], "", "")
        # create a mol file
        molFile = miscUtilities.generateUniqueFile(desc="NN", ext="mol")
        file = open(molFile, "w")
        molStr = ""
        for line in sdfStr[0]:
            if "$$$$" in line:
                break
            molStr += line
            file.write(line)
        file.close()

        return (clabDesc, cmpdSignList[0], molFile, molStr)
Beispiel #5
0
    def getClabDescSignList(self, smiles, getMolFile=False):
        # Create an Orange ExampleTable with a smiles attribute
        smilesAttr = orange.EnumVariable("SMILEStoPred", values = [smiles])
        myDomain = orange.Domain([smilesAttr], 0)
        smilesData = dataUtilities.DataTable(myDomain, [[smiles]])
        #    Calculate descriptors defined in the model files
        try:
            descList = self.model.varNames
        except:   # Consensus object different
            attributes = self.model.domain.variables
            descList = []
            for attr in attributes:
                descList.append(attr.name)
        #    Determine Signature and non-Signature descriptor names
        cinfonyDesc, clabDesc, signatureHeight, bbrcDesc, signDesc = descUtilities.getDescTypes(descList)
        #    Signatures
        if "sign" in DescMethodsAvailable and signatureHeight:
            print "Calculating signatures..."
            preCalcData = dataUtilities.DataTable(self.preDefSignatureFile)
            startHeight = 0                # Not used desc ignored in model prediction
            endHeight = signatureHeight
            dataSign,cmpdSignDict, cmpdSignList, sdfStr  = getSignatures.getSignatures(smilesData, startHeight, endHeight, preCalcData, returnAtomID=True)
        else:
            cmpdSignList = [[]]
            sdfStr = ""
        if not getMolFile:
            return (clabDesc,cmpdSignList[0])
        elif not sdfStr:
            return (clabDesc,cmpdSignList[0],"","")
        # create a mol file
        molFile = miscUtilities.generateUniqueFile(desc="NN", ext = "mol")
        file= open(molFile,"w")
        molStr=""
        for line in sdfStr[0]:
            if "$$$$" in line:
                break
            molStr += line
            file.write(line)
        file.close()

        return (clabDesc,cmpdSignList[0],molFile,molStr)
Beispiel #6
0
    def processSignificance(self,
                            smi,
                            prediction,
                            orderedDesc,
                            res,
                            resultsPath,
                            exWithDesc=None,
                            idx=0,
                            topN=1,
                            regMinIsDesired=True):
        """descs* = [(1.3, ["LogP"]), (0.2, ["[So2]", ...]), ...]
           res =  { "signature"     : "",       
                    "imgPath"       : "",      for placing the results 
                    "non-signature" : "",
                    "molStr"        : "",
                    "atoms"         : []
                    "color"         : [(r,g,b),(),...]}
        
           It uses for Classificartion: 
                        self.predictionOutcomes that must define [BADlabel, GOODlabel] in this same order
            and for Regression:
                        self.significanceThreshold for which a GOOD prediction is BELOW the threshold
        
        orderedDesc = { "molStr":''...,                                                              # only on specialType=1
                        "height":2,                                                                  # only on specialType=1
                        "atoms":[1,2,3],                                                             # only on specialType=1
                          'Continuous': {
                          'DOWN':[ [('[F]', -0.008885456983609475), ... ('[F]',-0,0001)],
                                 [('[O3]', -0.007324209285573964)],
                                 [('[C3]([C3][C3])', -0.0047175657931883405)],
                                 [('[C3]', -0.00389763719161594)]],
                           'UP': [[('[Car]([Car][Nar])', 0.009768981717302358)],
                                 [('[HC]([C3])', 0.009135563633559857)]]},
                       'Discrete': {'DOWN': [], 'UP': []}}
 
        """
        atomColor = None

        orderedDesc_sign = {
            'Continuous': {
                'DOWN': [],
                'UP': []
            },
            'Discrete': {
                'DOWN': [],
                'UP': []
            }
        }

        orderedDesc_nonSign = {
            'Continuous': {
                'DOWN': [],
                'UP': []
            },
            'Discrete': {
                'DOWN': [],
                'UP': []
            }
        }

        if hasattr(self.model, "specialType") and self.model.specialType == 1:
            print "This is a special model nr 1: It calculates itself the Significant Signatures"
            endHeight = orderedDesc["height"]
            try:
                molStr = orderedDesc["molStr"]
                atoms = eval(orderedDesc["atoms"])
            except:
                atoms = None
                molStr = None
            if not molStr or type(atoms) != list or not atoms:
                atoms = None
                molStr = None

            orderedDesc_sign = orderedDesc
        else:
            atoms = None
            endHeight = None
            molStr = None
            cinfonyDesc, clabDesc, signatureHeight, bbrcDesc, signDesc = descUtilities.getDescTypes(
                [attr.name for attr in self.model.domain.attributes])
            for attrType in ['Continuous', 'Discrete']:
                for vector in ['UP', 'DOWN']:
                    for ord in range(len(orderedDesc[attrType][vector])):
                        signEmpty = True
                        nonSignEmpty = True
                        for attr in orderedDesc[attrType][vector][ord]:
                            if attr[0] in signDesc:
                                if signEmpty:
                                    signEmpty = False
                                    orderedDesc_sign[attrType][vector].append(
                                        [])
                                orderedDesc_sign[attrType][vector][-1].append(
                                    attr)
                            else:
                                if nonSignEmpty:
                                    nonSignEmpty = False
                                    orderedDesc_nonSign[attrType][
                                        vector].append([])
                                orderedDesc_nonSign[attrType][vector][
                                    -1].append(attr)

        #Process color to use if highlight is used
        outComeIsRev = None
        if self.model.classVar.varType == orange.VarTypes.Discrete:
            if self.predictionOutcomes is None:
                print "WARNING: Cannot process Significance, Missing definition of predictionOutcomes for the EndPoint"
                return
            theGoodPred = str(self.predictionOutcomes[1])
            theBadPred = str(self.predictionOutcomes[0])
            if [str(p) for p in self.model.classVar.values
                ] == self.predictionOutcomes:
                outComeIsRev = False
            elif [str(p) for p in self.model.classVar.values
                  ][::-1] == self.predictionOutcomes:
                outComeIsRev = True
            else:
                print "ERROR: User outcome ordered list is not consistens toth model: ",\
                      self.predictionOutcomes, "<-->",self.model.classVar.values

            if prediction == theGoodPred:
                atomColor = 'g'
            else:
                atomColor = 'r'
        else:
            if self.significanceThreshold is None:
                print "WARNING: Cannot process Significance, Missing definition of significanceThreshold for the EndPoint"
                return
            if prediction < self.significanceThreshold:  # It is a GOOD prediction
                atomColor = 'g'
            else:
                atomColor = 'r'
        #Process Signatures
        if exWithDesc:  # Precalculated signatures
            # cmpdSignList differ from when it is calc from smiles. However, not used.
            dataSign, cmpdSignDict, cmpdSignList, sdfStr = self.getSignDataStruct(
                exWithDesc)
        else:
            # OBS Hard coded for signatures 0 to 1.
            smilesData = self.getAZOdata(smi)
            if "sign" in DescMethodsAvailable:
                dataSign, cmpdSignDict, cmpdSignList, sdfStr = getSignatures.getSignatures(
                    smilesData, 0, 1, returnAtomID=True, useClabSmiles=False)
            else:
                dataSign, cmpdSignDict, cmpdSignList, sdfStr = None, [
                    {}
                ] * len(smilesData), [[]] * len(smilesData), ""
        # If signSVM model already returning one sign as the most significant
        if not (hasattr(self.model, "specialType")
                and self.model.specialType == 1):
            downAbs = 0.0
            rankIdxDown = 0
            elemIdxDown = 0
            if len(orderedDesc_sign["Continuous"]["DOWN"]):
                for rankIdx in range(
                        len(orderedDesc_sign["Continuous"]["DOWN"])):
                    if downAbs != 0.0:
                        break
                    for elemIdx in range(
                            len(orderedDesc_sign["Continuous"]["DOWN"]
                                [rankIdx])):
                        # Test that the signature exists in the molecule
                        if orderedDesc_sign["Continuous"]["DOWN"][rankIdx][
                                elemIdx][0] in cmpdSignDict[0].keys():
                            downAbs = abs(orderedDesc_sign["Continuous"]
                                          ["DOWN"][rankIdx][elemIdx][1])
                            rankIdxDown = rankIdx
                            elemIdxDown = elemIdx
                            break
        else:
            if len(orderedDesc_sign["Continuous"]["DOWN"]):
                downAbs = abs(orderedDesc_sign["Continuous"]["DOWN"][0][0][1])
            else:
                downAbs = 0.0

        # If signSVM model already returning one sign as the most significant
        if not (hasattr(self.model, "specialType")
                and self.model.specialType == 1):
            upAbs = 0.0
            rankIdxUp = 0
            elemIdxUp = 0
            if len(orderedDesc_sign["Continuous"]["UP"]):
                for rankIdx in range(len(
                        orderedDesc_sign["Continuous"]["UP"])):
                    if upAbs != 0.0:
                        break
                    for elemIdx in range(
                            len(orderedDesc_sign["Continuous"]["UP"]
                                [rankIdx])):
                        # Test that the signature exists in the molecule
                        if orderedDesc_sign["Continuous"]["UP"][rankIdx][
                                elemIdx][0] in cmpdSignDict[0].keys():
                            upAbs = abs(orderedDesc_sign["Continuous"]["UP"]
                                        [rankIdx][elemIdx][1])
                            rankIdxUp = rankIdx
                            elemIdxUp = elemIdx
                            break
        else:
            if len(orderedDesc_sign["Continuous"]["UP"]):
                upAbs = abs(orderedDesc_sign["Continuous"]["UP"][0][0][1])
            else:
                upAbs = 0.0

        # Could happen that all derivatives are smaller than epsilon
        if orderedDesc_sign["Continuous"]["UP"] or orderedDesc_sign[
                "Continuous"]["DOWN"]:
            if topN == 1:  # Preserve syntax for Plato, only one significant signature
                if upAbs > downAbs:
                    if not (hasattr(self.model, "specialType")
                            and self.model.specialType == 1):
                        MSDsign = orderedDesc_sign["Continuous"]["UP"][
                            rankIdxUp][elemIdxUp][0]
                        MSDdv = orderedDesc_sign["Continuous"]["UP"][
                            rankIdxUp][elemIdxUp][1]
                    else:
                        MSDsign = orderedDesc_sign["Continuous"]["UP"][0][0][0]
                        MSDdv = orderedDesc_sign["Continuous"]["UP"][0][0][1]
                elif downAbs > upAbs:
                    if not (hasattr(self.model, "specialType")
                            and self.model.specialType == 1):
                        MSDsign = orderedDesc_sign["Continuous"]["DOWN"][
                            rankIdxDown][elemIdxDown][0]
                        MSDdv = orderedDesc_sign["Continuous"]["DOWN"][
                            rankIdxDown][elemIdxDown][1]
                    else:
                        MSDsign = orderedDesc_sign["Continuous"]["DOWN"][0][0][
                            0]
                        MSDdv = orderedDesc_sign["Continuous"]["DOWN"][0][0][1]
                elif downAbs != 0.0:
                    if not (hasattr(self.model, "specialType")
                            and self.model.specialType == 1):
                        MSDsign = orderedDesc_sign["Continuous"]["DOWN"][
                            rankIdxDown][elemIdxDown][0]
                        MSDdv = orderedDesc_sign["Continuous"]["DOWN"][
                            rankIdxDown][elemIdxDown][1]
                    else:
                        MSDsign = orderedDesc_sign["Continuous"]["DOWN"][0][0][
                            0]
                        MSDdv = orderedDesc_sign["Continuous"]["DOWN"][0][0][1]
                else:
                    MSDsign = None
                    MSDsign = 0
            else:
                MSDsign = []
                MSDdv = []
                for idx in range(topN):
                    try:
                        if abs(
                                orderedDesc_sign["Continuous"]["DOWN"][0][0][1]
                        ) > abs(orderedDesc_sign["Continuous"]["UP"][0][0][1]):
                            MSDsign.append(orderedDesc_sign["Continuous"]
                                           ["DOWN"][0][0][0])
                            MSDdv.append(orderedDesc_sign["Continuous"]
                                         ["DOWN"].pop(0)[0][1])
                        else:
                            MSDsign.append(
                                orderedDesc_sign["Continuous"]["UP"][0][0][0])
                            MSDdv.append(orderedDesc_sign["Continuous"]
                                         ["UP"].pop(0)[0][1])
                    except:
                        try:
                            MSDsign.append(orderedDesc_sign["Continuous"]
                                           ["DOWN"][0][0][0])
                            MSDdv.append(orderedDesc_sign["Continuous"]
                                         ["DOWN"].pop(0)[0][1])
                        except:
                            try:
                                MSDsign.append(orderedDesc_sign["Continuous"]
                                               ["UP"][0][0][0])
                                MSDdv.append(orderedDesc_sign["Continuous"]
                                             ["UP"].pop(0)[0][1])
                            except:
                                pass
        else:
            MSDsign = None
            MSDsign = 0

        #Process non-signatures
        if self.model.classVar.varType == orange.VarTypes.Discrete and outComeIsRev:
            UP = "DOWN"
            DOWN = "UP"
        elif self.model.classVar.varType != orange.VarTypes.Discrete and not regMinIsDesired:
            UP = "DOWN"
            DOWN = "UP"
        else:
            UP = "UP"
            DOWN = "DOWN"
        #Process DiscreteAttrs
        MSDnonSign = ""
        nD_DOWN = len(orderedDesc_nonSign["Discrete"][DOWN])
        if nD_DOWN:
            for n in range(min(topN, nD_DOWN)):
                if topN > 1:
                    MSDnonSign += str(n + 1) + ": "
                MSDnonSign += string.join([
                    "Change " + x[0]
                    for x in orderedDesc_nonSign["Discrete"][DOWN][n]
                ], '\n') + '\n'

        #Process Continuous attributes
        for n in range(topN):
            if topN > 1:
                order = str(n + 1) + ": "
            else:
                order = ""
            if len(orderedDesc_nonSign["Continuous"][DOWN]):
                downAbs = abs(orderedDesc_nonSign["Continuous"][DOWN][0][0][1])
            else:
                downAbs = 0.0
            if len(orderedDesc_nonSign["Continuous"][UP]):
                upAbs = abs(orderedDesc_nonSign["Continuous"][UP][0][0][1])
            else:
                upAbs = 0.0

            if orderedDesc_nonSign["Continuous"][UP] and upAbs >= downAbs:
                TOPmsd = orderedDesc_nonSign["Continuous"][UP].pop(0)
                MSDnonSign += order + string.join(
                    ["Decrease " + x[0] for x in TOPmsd], '\n') + '\n'
            if orderedDesc_nonSign["Continuous"][DOWN] and downAbs >= upAbs:
                TOPmsd = orderedDesc_nonSign["Continuous"][DOWN].pop(0)
                MSDnonSign += order + string.join(
                    ["Increase " + x[0] for x in TOPmsd], '\n') + '\n'

        res["non-signature"] = MSDnonSign

        # Most probably Signatures will always be associated with Discrete attributes. Nevertheless, it happens that some are Continuous, and therefore
        #  we will be using signatures reported as Continuous if any
        if not MSDsign:
            res["imgPath"] = ""
            res["signature"] = ""
            res["signarure_deriv_val"] = 0
            return
        if resultsPath and os.path.isdir(resultsPath):
            imgPath = os.path.join(
                resultsPath, "significance_" + str(idx) + "_" +
                str(time.time()).replace(".", '') + ".png")
        else:
            imgPath = ""
        # Call the method to create the image/mol specifying the color of the hilighted atoms
        if exWithDesc == None:  # Don't use with precalc desc
            if molStr and atoms and endHeight is not None and not imgPath:
                print "Using molStr and atoms from Learner Significant Signature"
                res["imgPath"] = ''
                res["molStr"] = molStr
                allAtoms = self.getNNAtoms(molStr, atoms, endHeight)
                res["atoms"] = allAtoms
                res["color"] = [atomColor] * len(allAtoms)
            else:
                res["imgPath"], res["molStr"], res["atoms"], res[
                    "color"] = self.createSignImg(smi, MSDsign, atomColor,
                                                  imgPath, endHeight)
        #Fix the significant descriptors so that it is a formated string
        res["signature"] = MSDsign
        res["signarure_deriv_val"] = MSDdv

        return res
Beispiel #7
0
    def getDescriptors(self, smiles):
        self.getSmilesData(smiles)

        # Calculate descriptors defined in the model files
        descList = self.model.varNames

        savedSmilesData = dataUtilities.DataTable(self.smilesData)

        #Try 3 time to get All compounds descriptors
        nTry = 3
        errorDesc = ""
        while nTry > 0:
            try:
                #if True:
                traceLog = "Model Location:" + str(self.modelLocation) + "\n"
                nBadEx = 0
                # Determine Signature and non-Signature descriptor names
                cinfonyDesc, clabDesc, signatureHeight, bbrcDesc, signDesc = descUtilities.getDescTypes(
                    descList)
                # Signatures
                if "sign" in DescMethodsAvailable and signatureHeight:
                    traceLog += "Calculating signatures...\n"
                    print "Calculating signatures...."
                    preCalcData = dataUtilities.DataTable(
                        self.preDefSignatureFile)
                    startHeight = 0  # Not used desc ignored in model prediction
                    endHeight = signatureHeight
                    self.smilesData = getSignatures.getSignatures(
                        self.smilesData, startHeight, endHeight, preCalcData)

                # C-Lab desc
                if "clab" in DescMethodsAvailable and clabDesc:
                    traceLog += "Calculating C-Lab...\n"
                    print "Calculating C-Lab desc...."
                    self.smilesData = ClabUtilities.appendCLabDesc(
                        clabDesc, self.smilesData)

                # Cinfony
                if cinfonyDesc:
                    traceLog += "Calculating Cinfony...\n"
                    print "Calculating Cinfony desc..."
                    self.smilesData = getCinfonyDesc.getCinfonyDescResults(
                        self.smilesData, cinfonyDesc, radius=5)

                # bbrcDesc
                if "bbrc" in DescMethodsAvailable and bbrcDesc:
                    traceLog += "Calculating BBRC...\n"
                    print "Calculating BBRC desc..."
                    self.smilesData = getBBRCDesc.getBBRCDescResult(
                        self.smilesData,
                        algo="FTM",
                        minSupPar=1,
                        descList=bbrcDesc)

                # Detect if the descripts calaculation or something else went wrong!
                for ex in self.smilesData:
                    if sum([
                            ex[attr].isSpecial()
                            for attr in self.smilesData.domain.attributes
                    ]) == len(self.smilesData.domain.attributes):
                        nBadEx += 1
                if nBadEx:
                    traceLog += "WARNING: Desc. Calculation: From the " + str(
                        len(self.smilesData)) + " compounds, " + str(
                            nBadEx) + " could not be calculated!\n"
                    print "WARNING: Desc. Calculation: From the " + str(
                        len(self.smilesData)) + " compounds, " + str(
                            nBadEx) + " could not be calculated!"
                    print "WARNING:   Tying again..."
                    self.smilesData = dataUtilities.DataTable(savedSmilesData)
                    nTry -= 1
                else:
                    nTry = 0
            #else:
            except Exception, e:
                errorDesc = "Error Calculating Descriptors:;" + traceLog + str(
                    e) + ";"
                nTry -= 1
Beispiel #8
0
    def createSignImg(self,
                      smi,
                      signature,
                      atomColor,
                      imgPath,
                      endHeight=None):
        colors = []
        print "Creating signature image..."
        if not signature or not atomColor or not smi:
            print "Missing inputs:", str([smi, signature, atomColor])
            return "", "", [], []
        if hasattr(self.model, "specialType") and self.model.specialType == 1:
            # Create an Orange ExampleTable with a smiles attribute
            smilesAttr = orange.EnumVariable("SMILEStoPred", values=[smi])
            myDomain = orange.Domain([smilesAttr], 0)
            smilesData = dataUtilities.DataTable(myDomain, [[smi]])
            preCalcData = None
            startHeight = 0
            dataSign, cmpdSignDict, cmpdSignList, sdfStr = getSignatures.getSignatures(
                smilesData,
                startHeight,
                endHeight,
                preCalcData,
                returnAtomID=True)
            cmpdSignList = cmpdSignList[0]
            CLabDesc = []
            # create a mol file
            tmpFile = miscUtilities.generateUniqueFile(desc="NN", ext="mol")
            file = open(tmpFile, "w")
            molStr = ""
            for line in sdfStr[0]:
                if "$$$$" in line:
                    break
                molStr += line
                file.write(line)
            file.close()
        else:
            CLabDesc, cmpdSignList, tmpFile, molStr = self.getClabDescSignList(
                smi, getMolFile=True)
        if not cmpdSignList or not tmpFile:
            print "Couldn't get the cmpd list or the mol file"
            return "", "", [], []
        # create an RDKit mol
        mol = Chem.MolFromMolFile(tmpFile, True, False)
        if not mol:
            mol = Chem.MolFromMolFile(tmpFile, False, False)
        if not mol:
            print "Could not create mol for: ", smi
            return "", "", [], []
        adj = GetAdjacencyMatrix(mol)
        # find the NN
        hights = []
        for i in miscUtilities.Range(0, len(cmpdSignList), mol.GetNumAtoms()):
            hList = cmpdSignList[i:i + mol.GetNumAtoms()]
            if len(hList):
                hights.append(cmpdSignList[i:i + mol.GetNumAtoms()])

        atoms = []
        hight = None
        for idx, h in enumerate(hights):
            if signature in h:
                for i, a in enumerate(h):
                    if a == signature:
                        atoms.append(i)
                hight = idx
                break
        if len(atoms) == 0:
            print "ERROR: Could not find the atom for ", signature
            return "signatureNOTfound", "", [], []
        #print "IniAtoms: ",atoms
        visitedAtoms = []
        for n in range(hight):
            for atom in copy.deepcopy(atoms):
                if atom not in visitedAtoms:
                    lNN = findNeighbors(atom, adj)
                    visitedAtoms.append(atom)
                    for lnn in lNN:
                        if lnn not in atoms:
                            atoms.append(lnn)
        atoms.sort()
        os.system("rm " + tmpFile)
        #Specify the atom colors
        colors = [atomColor] * len(atoms)

        if not imgPath:
            return "", molStr, atoms, colors
        try:
            #Draw the image
            MolDrawing.elemDict = defaultdict(lambda: (0, 0, 0))
            Draw.MolToImageFile(mol,
                                imgPath,
                                size=(300, 300),
                                kekulize=True,
                                wedgeBonds=True,
                                highlightAtoms=atoms)
            #Color the Highlighted atoms with the choosen atomColor.
            # Only using one color
            if atomColor == 'r':
                rgb = (255, 0, 0)
            elif atomColor == 'g':
                rgb = (0, 255, 0)
            else:
                rgb = (0, 0, 255)  #Blue

            img = Image.open(imgPath)
            img = img.convert("RGBA")
            pixdata = img.getdata()
            newData = list()
            for item in pixdata:
                if item[0] == 255 and item[1] == 0 and item[2] == 0:
                    newData.append(rgb + (255, ))
                else:
                    newData.append(item)
            img.putdata(newData)
            img.save(imgPath)

            if os.path.isfile(imgPath):
                return imgPath, molStr, atoms, colors
            else:
                return "", molStr, atoms, colors
        except:
            return "", molStr, atoms, colors
Beispiel #9
0
    def processSignificance(self, smi, prediction, orderedDesc, res, resultsPath, idx = 0, topN = 1):
        """descs* = [(1.3, ["LogP"]), (0.2, ["[So2]", ...]), ...]
           res =  { "signature"     : "",       
                    "imgPath"       : "",      for placing the results 
                    "non-signature" : "",
                    "molStr"        : "",
                    "atoms"         : []
                    "color"         : [(r,g,b),(),...]}
        
           It uses for Classificartion: 
                        self.predictionOutcomes that must define [BADlabel, GOODlabel] in this same order
            and for Regression:
                        self.significanceThreshold for which a GOOD prediction is BELOW the threshold
        
        orderedDesc = { "molStr":''...,                                                              # only on specialType=1
                        "height":2,                                                                  # only on specialType=1
                        "atoms":[1,2,3],                                                             # only on specialType=1
                          'Continuous': {
                          'DOWN':[ [('[F]', -0.008885456983609475), ... ('[F]',-0,0001)],
                                 [('[O3]', -0.007324209285573964)],
                                 [('[C3]([C3][C3])', -0.0047175657931883405)],
                                 [('[C3]', -0.00389763719161594)]],
                           'UP': [[('[Car]([Car][Nar])', 0.009768981717302358)],
                                 [('[HC]([C3])', 0.009135563633559857)]]},
                       'Discrete': {'DOWN': [], 'UP': []}}
 
        """
        atomColor = None
      
        orderedDesc_sign =    {'Continuous': {'DOWN': [], 'UP': []},
                               'Discrete': {'DOWN': [], 'UP': []}}

        orderedDesc_nonSign = {'Continuous': {'DOWN': [], 'UP': []},
                               'Discrete': {'DOWN': [], 'UP': []}}
        

        if hasattr(self.model, "specialType") and self.model.specialType == 1:
            print "This is a special model nr 1: It calculates itself the Significant Signatures"
            endHeight = orderedDesc["height"]
            try:
                molStr = orderedDesc["molStr"]
                atoms = eval(orderedDesc["atoms"])
            except:
                atoms = None
                molStr = None
            if not molStr or type(atoms)!=list or not atoms:
                atoms = None
                molStr = None

            orderedDesc_sign = orderedDesc
        else:
            atoms = None
            endHeight = None
            molStr = None
            cinfonyDesc, clabDesc, signatureHeight, bbrcDesc, signDesc = descUtilities.getDescTypes([attr.name for attr in self.model.domain.attributes])
            for attrType in ['Continuous', 'Discrete']:
                for vector in ['UP','DOWN']:
                    for ord in range(len(orderedDesc[attrType][vector])):
                        signEmpty=True
                        nonSignEmpty=True
                        for attr in orderedDesc[attrType][vector][ord]:
                            if attr[0] in signDesc:
                                if signEmpty:
                                    signEmpty = False
                                    orderedDesc_sign[attrType][vector].append([])
                                orderedDesc_sign[attrType][vector][-1].append(attr)
                            else:
                                if nonSignEmpty:
                                    nonSignEmpty = False
                                    orderedDesc_nonSign[attrType][vector].append([])
                                orderedDesc_nonSign[attrType][vector][-1].append(attr)


        #Process color to use if highlight is used
        outComeIsRev = None
        if self.model.classVar.varType == orange.VarTypes.Discrete:
            if self.predictionOutcomes is None:
                print "WARNING: Cannot process Significance, Missing definition of predictionOutcomes for the EndPoint"
                return
            theGoodPred = str(self.predictionOutcomes[1])
            theBadPred = str(self.predictionOutcomes[0])
            if [str(p) for p in self.model.classVar.values] == self.predictionOutcomes:
                outComeIsRev = False
            elif [str(p) for p in self.model.classVar.values][::-1] == self.predictionOutcomes:
                outComeIsRev = True
            else:
                print "ERROR: User outcome ordered list is not consistens toth model: ",\
                      self.predictionOutcomes, "<-->",self.model.classVar.values

            if prediction == theGoodPred:
                atomColor = 'g'
            else:
                atomColor = 'r'
        else:
            if self.significanceThreshold is None:
                print "WARNING: Cannot process Significance, Missing definition of significanceThreshold for the EndPoint"
                return
            if prediction < self.significanceThreshold:    # It is a GOOD prediction
                atomColor = 'g'
            else:
                atomColor = 'r'
        #Process Signatures
        # OBS Hard coded for signatures 0 to 1.  
        smilesData = self.getAZOdata(smi)
        dataSign, cmpdSignDict, cmpdSignList, sdfStr  = getSignatures.getSignatures(smilesData, 0, 1, returnAtomID = True, useClabSmiles = False)
        # If signSVM model already returning one sign as the most significant
        if not (hasattr(self.model, "specialType") and self.model.specialType == 1):
            downAbs = 0.0
            rankIdxDown = 0
            elemIdxDown = 0
            if len(orderedDesc_sign["Continuous"]["DOWN"]):
                for rankIdx in range(len(orderedDesc_sign["Continuous"]["DOWN"])):
                    if downAbs != 0.0:
                        break
                    for elemIdx in range(len(orderedDesc_sign["Continuous"]["DOWN"][rankIdx])):
                        # Test that the signature exists in the molecule
                        if orderedDesc_sign["Continuous"]["DOWN"][rankIdx][elemIdx][0] in cmpdSignDict[0].keys():
                            downAbs = abs(orderedDesc_sign["Continuous"]["DOWN"][rankIdx][elemIdx][1])
                            rankIdxDown = rankIdx
                            elemIdxDown = elemIdx
                            break
        else: 
            if len(orderedDesc_sign["Continuous"]["DOWN"]):
                downAbs = abs(orderedDesc_sign["Continuous"]["DOWN"][0][0][1])
            else:
                downAbs = 0.0
                        
        # If signSVM model already returning one sign as the most significant
        if not (hasattr(self.model, "specialType") and self.model.specialType == 1):
            upAbs = 0.0
            rankIdxUp = 0
            elemIdxUp = 0
            if len(orderedDesc_sign["Continuous"]["UP"]):
                for rankIdx in range(len(orderedDesc_sign["Continuous"]["UP"])):
                    if upAbs != 0.0:
                        break
                    for elemIdx in range(len(orderedDesc_sign["Continuous"]["UP"][rankIdx])):
                        # Test that the signature exists in the molecule
                        if orderedDesc_sign["Continuous"]["UP"][rankIdx][elemIdx][0] in cmpdSignDict[0].keys():
                            upAbs = abs(orderedDesc_sign["Continuous"]["UP"][rankIdx][elemIdx][1])
                            rankIdxUp = rankIdx
                            elemIdxUp = elemIdx
                            break
        else: 
            if len(orderedDesc_sign["Continuous"]["UP"]):
                upAbs = abs(orderedDesc_sign["Continuous"]["UP"][0][0][1])
            else:
                upAbs = 0.0

        if upAbs > downAbs:
            if not (hasattr(self.model, "specialType") and self.model.specialType == 1):
                MSDsign = orderedDesc_sign["Continuous"]["UP"][rankIdxUp][elemIdxUp][0]
                MSDdv = orderedDesc_sign["Continuous"]["UP"][rankIdxUp][elemIdxUp][1]
            else:
                MSDsign = orderedDesc_sign["Continuous"]["UP"][0][0][0]
                MSDdv = orderedDesc_sign["Continuous"]["UP"][0][0][1]
        elif downAbs > upAbs:
            if not (hasattr(self.model, "specialType") and self.model.specialType == 1):
                MSDsign = orderedDesc_sign["Continuous"]["DOWN"][rankIdxDown][elemIdxDown][0]
                MSDdv = orderedDesc_sign["Continuous"]["DOWN"][rankIdxDown][elemIdxDown][1]
            else:
                MSDsign = orderedDesc_sign["Continuous"]["DOWN"][0][0][0]
                MSDdv = orderedDesc_sign["Continuous"]["DOWN"][0][0][1]
        elif downAbs != 0.0:
            if not (hasattr(self.model, "specialType") and self.model.specialType == 1):
                MSDsign = orderedDesc_sign["Continuous"]["DOWN"][rankIdxDown][elemIdxDown][0]
                MSDdv = orderedDesc_sign["Continuous"]["DOWN"][rankIdxDown][elemIdxDown][1]
            else:
                MSDsign = orderedDesc_sign["Continuous"]["DOWN"][0][0][0]
                MSDdv = orderedDesc_sign["Continuous"]["DOWN"][0][0][1]
        else:
            MSDsign = None
            MSDsign = 0

        #Process non-signatures
        if self.model.classVar.varType == orange.VarTypes.Discrete and outComeIsRev:
            UP = "DOWN"
            DOWN = "UP"
        else:
            UP = "UP"
            DOWN = "DOWN"
        #Process DiscreteAttrs
        MSDnonSign = ""
        nD_DOWN = len(orderedDesc_nonSign["Discrete"][DOWN])
        if nD_DOWN: 
            for n in range(min(topN,nD_DOWN)):
                if topN > 1:
                    MSDnonSign += str(n+1)+": "
                MSDnonSign += string.join(["Change "+x[0] for x in orderedDesc_nonSign["Discrete"][DOWN][n]],'\n')+'\n'

        #Process Continuous attributes 
        for n in range(topN):
                if topN > 1:
                    order = str(n+1)+": "
                else:
                    order = ""
                if len(orderedDesc_nonSign["Continuous"][DOWN]):
                    downAbs = abs(orderedDesc_nonSign["Continuous"][DOWN][0][0][1])
                else:
                    downAbs = 0.0
                if len(orderedDesc_nonSign["Continuous"][UP]):
                    upAbs = abs(orderedDesc_nonSign["Continuous"][UP][0][0][1])
                else:
                    upAbs = 0.0

                if orderedDesc_nonSign["Continuous"][UP] and upAbs >= downAbs:
                    TOPmsd = orderedDesc_nonSign["Continuous"][UP].pop(0)
                    MSDnonSign += order + string.join(["Decrease "+x[0] for x in TOPmsd],'\n')+'\n'
                if orderedDesc_nonSign["Continuous"][DOWN] and downAbs >= upAbs:
                    TOPmsd = orderedDesc_nonSign["Continuous"][DOWN].pop(0)
                    MSDnonSign += order + string.join(["Increase "+x[0] for x in TOPmsd],'\n')+'\n'

        
        res["non-signature"] = MSDnonSign



        # Most probably Signatures will always be associated with Discrete attributes. Nevertheless, it happens that some are Continuous, and therefore
        #  we will be using signatures reported as Continuous if any
        if not MSDsign:
            res["imgPath"] = ""
            res["signature"] = ""
            res["signarure_deriv_val"] = 0
            return
        if resultsPath and os.path.isdir(resultsPath):
            imgPath = os.path.join(resultsPath,"significance_"+str(idx)+"_"+str(time.time()).replace(".",'')+".png")
        else:
            imgPath = ""
        # Call the method to create the image/mol specifying the color of the hilighted atoms  
        if molStr and atoms and endHeight is not None and not imgPath:
            print "Using molStr and atoms from Learner Significant Signature"
            res["imgPath"]=''
            res["molStr"] = molStr
            allAtoms = self.getNNAtoms(molStr, atoms, endHeight)
            res["atoms"] = allAtoms
            res["color"] = [atomColor]*len(allAtoms)
        else:
            res["imgPath"] , res["molStr"], res["atoms"], res["color"] = self.createSignImg(smi,MSDsign,atomColor,imgPath,endHeight)
        #Fix the significant descriptors so that it is a formated string
        res["signature"] = MSDsign
        res["signarure_deriv_val"] = MSDdv 
Beispiel #10
0
    def createSignImg(self,smi,signature,atomColor,imgPath, endHeight = None):
        colors = []
        print "Creating signature image..."
        if not signature or not atomColor or not smi:
            print "Missing inputs:",str([smi,signature,atomColor])
            return "","",[], []
        if hasattr(self.model, "specialType") and self.model.specialType == 1:
            # Create an Orange ExampleTable with a smiles attribute
            smilesAttr = orange.EnumVariable("SMILEStoPred", values = [smi])
            myDomain = orange.Domain([smilesAttr], 0)
            smilesData = dataUtilities.DataTable(myDomain, [[smi]])
            preCalcData = None
            startHeight = 0
            dataSign,cmpdSignDict, cmpdSignList, sdfStr  = getSignatures.getSignatures(smilesData, startHeight, endHeight, preCalcData, returnAtomID=True)
            cmpdSignList = cmpdSignList[0]
            CLabDesc = []
            # create a mol file
            tmpFile = miscUtilities.generateUniqueFile(desc="NN", ext = "mol")
            file= open(tmpFile,"w")
            molStr=""
            for line in sdfStr[0]:
                if "$$$$" in line:
                    break
                molStr += line
                file.write(line)
            file.close()
        else: 
            CLabDesc,cmpdSignList, tmpFile, molStr  =  self.getClabDescSignList(smi, getMolFile=True)
        if not cmpdSignList or not tmpFile:
            print "Couldn't get the cmpd list or the mol file"
            return "","",[], []
        # create an RDKit mol
        mol = Chem.MolFromMolFile(tmpFile,True,False)
        if not mol:
            mol = Chem.MolFromMolFile(tmpFile,False,False)
        if not mol:
            print "Could not create mol for: ",smi
            return "","",[], []
        adj = GetAdjacencyMatrix(mol)
        # find the NN
        hights = []
        for i in miscUtilities.Range(0,len(cmpdSignList),mol.GetNumAtoms()):
            hList = cmpdSignList[i:i+mol.GetNumAtoms()]
            if len(hList):
                hights.append(cmpdSignList[i:i+mol.GetNumAtoms()])
       
        atoms = []
        hight = None
        for idx,h in enumerate(hights):
            if signature in h:
                for i,a in enumerate(h):
                    if a == signature:
                        atoms.append(i)
                hight = idx
                break
        if len(atoms) == 0:
            print "ERROR: Could not find the atom for ",signature
            return "signatureNOTfound","",[],[]
        #print "IniAtoms: ",atoms
        visitedAtoms = []
        for n in range(hight):
          for atom in copy.deepcopy(atoms):
             if atom not in visitedAtoms:    
                lNN = findNeighbors(atom,adj)
                visitedAtoms.append(atom)
                for lnn in lNN:
                    if lnn not in atoms: 
                        atoms.append(lnn)
        atoms.sort()
        os.system("rm " + tmpFile)
        #Specify the atom colors
        colors=[atomColor]*len(atoms)

        if not imgPath:
            return "",molStr,atoms,colors 
        try:
                #Draw the image
                MolDrawing.elemDict=defaultdict(lambda : (0,0,0))
                Draw.MolToImageFile(mol,imgPath,size=(300, 300), kekulize=True, wedgeBonds=True, highlightAtoms=atoms)
                #Color the Highlighted atoms with the choosen atomColor.
                # Only using one color
                if atomColor == 'r':
                    rgb = (255,0,0)
                elif atomColor == 'g':
                    rgb = (0,255,0)
                else:
                    rgb = (0,0,255)    #Blue
                    
                img = Image.open(imgPath)
                img = img.convert("RGBA")
                pixdata = img.getdata()
                newData = list()
                for item in pixdata:
                  if item[0] == 255 and item[1] == 0 and item[2] == 0:
                    newData.append(rgb + (255,) )
                  else:
                    newData.append(item)
                img.putdata(newData)
                img.save(imgPath)

                if os.path.isfile(imgPath):
                    return imgPath,molStr,atoms,colors
                else:
                    return "",molStr,atoms,colors
        except:
                return "",molStr,atoms,colors