Ejemplo n.º 1
0
    def getDescriptors(self, smiles):
        self.getSmilesData(smiles)

        # Calculate descriptors defined in the model files
        descList = self.model.varNames
   
        savedSmilesData = dataUtilities.DataTable(self.smilesData)

        #Try 3 time to get All compounds descriptors
        nTry = 3       
        errorDesc = "" 
        while nTry > 0:
           try:
                traceLog = "Model Location:"+str(self.modelLocation)+"\n"
                nBadEx = 0        
                # Determine Signature and non-Signature descriptor names
                cinfonyDesc, clabDesc, signatureHeight, bbrcDesc, signDesc = descUtilities.getDescTypes(descList)
                # Signatures
                if "sign" in DescMethodsAvailable and signatureHeight:
                    traceLog += "Calculating signatures...\n"
                    print "Calculating signatures...."
                    preCalcData = dataUtilities.DataTable(self.preDefSignatureFile)
                    startHeight = 0                # Not used desc ignored in model prediction
                    endHeight = signatureHeight  
                    self.smilesData  = getSignatures.getSignatures(self.smilesData, startHeight, endHeight, preCalcData)

                # C-Lab desc
                if "clab" in DescMethodsAvailable and clabDesc:
                    traceLog += "Calculating C-Lab...\n"
                    print "Calculating C-Lab desc...."
                    self.smilesData = ClabUtilities.appendCLabDesc(clabDesc, self.smilesData)

                # Cinfony
                if cinfonyDesc:
                    traceLog += "Calculating Cinfony...\n"
                    print "Calculating Cinfony desc..."
                    self.smilesData = getCinfonyDesc.getCinfonyDescResults(self.smilesData, cinfonyDesc, radius = 5)

                # bbrcDesc
                if "bbrc" in DescMethodsAvailable and bbrcDesc:
                    traceLog += "Calculating BBRC...\n"
                    print "Calculating BBRC desc..."
                    self.smilesData = getBBRCDesc.getBBRCDescResult(self.smilesData, algo = "FTM", minSupPar = 1, descList = bbrcDesc)

                # Detect if the descripts calaculation or something else went wrong!
                for ex in self.smilesData:
                   if sum([ex[attr].isSpecial() for attr in self.smilesData.domain.attributes]) == len(self.smilesData.domain.attributes):
                        nBadEx +=1
                if nBadEx:
                    traceLog += "WARNING: Desc. Calculation: From the "+str(len(self.smilesData))+" compounds, "+str(nBadEx)+" could not be calculated!\n"
                    print "WARNING: Desc. Calculation: From the "+str(len(self.smilesData))+" compounds, "+str(nBadEx)+" could not be calculated!"
                    print "WARNING:   Tying again..."
                    self.smilesData = dataUtilities.DataTable(savedSmilesData)
                    nTry -= 1
                else:
                    nTry = 0
           except Exception, e:
                errorDesc = "Error Calculating Descriptors:;"+traceLog+str(e)+";"
                nTry -= 1
Ejemplo n.º 2
0
    def getClabDescSignList(self, smiles, getMolFile=False):
        # Create an Orange ExampleTable with a smiles attribute
        smilesAttr = orange.EnumVariable("SMILEStoPred", values=[smiles])
        myDomain = orange.Domain([smilesAttr], 0)
        smilesData = dataUtilities.DataTable(myDomain, [[smiles]])
        #    Calculate descriptors defined in the model files
        try:
            descList = self.model.varNames
        except:  # Consensus object different
            attributes = self.model.domain.variables
            descList = []
            for attr in attributes:
                descList.append(attr.name)
        #    Determine Signature and non-Signature descriptor names
        cinfonyDesc, clabDesc, signatureHeight, bbrcDesc, signDesc = descUtilities.getDescTypes(
            descList)
        #    Signatures
        if "sign" in DescMethodsAvailable and signatureHeight:
            print "Calculating signatures..."
            preCalcData = dataUtilities.DataTable(self.preDefSignatureFile)
            startHeight = 0  # Not used desc ignored in model prediction
            endHeight = signatureHeight
            dataSign, cmpdSignDict, cmpdSignList, sdfStr = getSignatures.getSignatures(
                smilesData,
                startHeight,
                endHeight,
                preCalcData,
                returnAtomID=True)
        else:
            cmpdSignList = [[]]
            sdfStr = ""
        if not getMolFile:
            return (clabDesc, cmpdSignList[0])
        elif not sdfStr:
            return (clabDesc, cmpdSignList[0], "", "")
        # create a mol file
        molFile = miscUtilities.generateUniqueFile(desc="NN", ext="mol")
        file = open(molFile, "w")
        molStr = ""
        for line in sdfStr[0]:
            if "$$$$" in line:
                break
            molStr += line
            file.write(line)
        file.close()

        return (clabDesc, cmpdSignList[0], molFile, molStr)
Ejemplo n.º 3
0
    def getClabDescSignList(self, smiles, getMolFile=False):
        # Create an Orange ExampleTable with a smiles attribute
        smilesAttr = orange.EnumVariable("SMILEStoPred", values = [smiles])
        myDomain = orange.Domain([smilesAttr], 0)
        smilesData = dataUtilities.DataTable(myDomain, [[smiles]])
        #    Calculate descriptors defined in the model files
        try:
            descList = self.model.varNames
        except:   # Consensus object different
            attributes = self.model.domain.variables
            descList = []
            for attr in attributes:
                descList.append(attr.name)
        #    Determine Signature and non-Signature descriptor names
        cinfonyDesc, clabDesc, signatureHeight, bbrcDesc, signDesc = descUtilities.getDescTypes(descList)
        #    Signatures
        if "sign" in DescMethodsAvailable and signatureHeight:
            print "Calculating signatures..."
            preCalcData = dataUtilities.DataTable(self.preDefSignatureFile)
            startHeight = 0                # Not used desc ignored in model prediction
            endHeight = signatureHeight
            dataSign,cmpdSignDict, cmpdSignList, sdfStr  = getSignatures.getSignatures(smilesData, startHeight, endHeight, preCalcData, returnAtomID=True)
        else:
            cmpdSignList = [[]]
            sdfStr = ""
        if not getMolFile:
            return (clabDesc,cmpdSignList[0])
        elif not sdfStr:
            return (clabDesc,cmpdSignList[0],"","")
        # create a mol file
        molFile = miscUtilities.generateUniqueFile(desc="NN", ext = "mol")
        file= open(molFile,"w")
        molStr=""
        for line in sdfStr[0]:
            if "$$$$" in line:
                break
            molStr += line
            file.write(line)
        file.close()

        return (clabDesc,cmpdSignList[0],molFile,molStr)
Ejemplo n.º 4
0
    def processSignificance(self,
                            smi,
                            prediction,
                            orderedDesc,
                            res,
                            resultsPath,
                            exWithDesc=None,
                            idx=0,
                            topN=1,
                            regMinIsDesired=True):
        """descs* = [(1.3, ["LogP"]), (0.2, ["[So2]", ...]), ...]
           res =  { "signature"     : "",       
                    "imgPath"       : "",      for placing the results 
                    "non-signature" : "",
                    "molStr"        : "",
                    "atoms"         : []
                    "color"         : [(r,g,b),(),...]}
        
           It uses for Classificartion: 
                        self.predictionOutcomes that must define [BADlabel, GOODlabel] in this same order
            and for Regression:
                        self.significanceThreshold for which a GOOD prediction is BELOW the threshold
        
        orderedDesc = { "molStr":''...,                                                              # only on specialType=1
                        "height":2,                                                                  # only on specialType=1
                        "atoms":[1,2,3],                                                             # only on specialType=1
                          'Continuous': {
                          'DOWN':[ [('[F]', -0.008885456983609475), ... ('[F]',-0,0001)],
                                 [('[O3]', -0.007324209285573964)],
                                 [('[C3]([C3][C3])', -0.0047175657931883405)],
                                 [('[C3]', -0.00389763719161594)]],
                           'UP': [[('[Car]([Car][Nar])', 0.009768981717302358)],
                                 [('[HC]([C3])', 0.009135563633559857)]]},
                       'Discrete': {'DOWN': [], 'UP': []}}
 
        """
        atomColor = None

        orderedDesc_sign = {
            'Continuous': {
                'DOWN': [],
                'UP': []
            },
            'Discrete': {
                'DOWN': [],
                'UP': []
            }
        }

        orderedDesc_nonSign = {
            'Continuous': {
                'DOWN': [],
                'UP': []
            },
            'Discrete': {
                'DOWN': [],
                'UP': []
            }
        }

        if hasattr(self.model, "specialType") and self.model.specialType == 1:
            print "This is a special model nr 1: It calculates itself the Significant Signatures"
            endHeight = orderedDesc["height"]
            try:
                molStr = orderedDesc["molStr"]
                atoms = eval(orderedDesc["atoms"])
            except:
                atoms = None
                molStr = None
            if not molStr or type(atoms) != list or not atoms:
                atoms = None
                molStr = None

            orderedDesc_sign = orderedDesc
        else:
            atoms = None
            endHeight = None
            molStr = None
            cinfonyDesc, clabDesc, signatureHeight, bbrcDesc, signDesc = descUtilities.getDescTypes(
                [attr.name for attr in self.model.domain.attributes])
            for attrType in ['Continuous', 'Discrete']:
                for vector in ['UP', 'DOWN']:
                    for ord in range(len(orderedDesc[attrType][vector])):
                        signEmpty = True
                        nonSignEmpty = True
                        for attr in orderedDesc[attrType][vector][ord]:
                            if attr[0] in signDesc:
                                if signEmpty:
                                    signEmpty = False
                                    orderedDesc_sign[attrType][vector].append(
                                        [])
                                orderedDesc_sign[attrType][vector][-1].append(
                                    attr)
                            else:
                                if nonSignEmpty:
                                    nonSignEmpty = False
                                    orderedDesc_nonSign[attrType][
                                        vector].append([])
                                orderedDesc_nonSign[attrType][vector][
                                    -1].append(attr)

        #Process color to use if highlight is used
        outComeIsRev = None
        if self.model.classVar.varType == orange.VarTypes.Discrete:
            if self.predictionOutcomes is None:
                print "WARNING: Cannot process Significance, Missing definition of predictionOutcomes for the EndPoint"
                return
            theGoodPred = str(self.predictionOutcomes[1])
            theBadPred = str(self.predictionOutcomes[0])
            if [str(p) for p in self.model.classVar.values
                ] == self.predictionOutcomes:
                outComeIsRev = False
            elif [str(p) for p in self.model.classVar.values
                  ][::-1] == self.predictionOutcomes:
                outComeIsRev = True
            else:
                print "ERROR: User outcome ordered list is not consistens toth model: ",\
                      self.predictionOutcomes, "<-->",self.model.classVar.values

            if prediction == theGoodPred:
                atomColor = 'g'
            else:
                atomColor = 'r'
        else:
            if self.significanceThreshold is None:
                print "WARNING: Cannot process Significance, Missing definition of significanceThreshold for the EndPoint"
                return
            if prediction < self.significanceThreshold:  # It is a GOOD prediction
                atomColor = 'g'
            else:
                atomColor = 'r'
        #Process Signatures
        if exWithDesc:  # Precalculated signatures
            # cmpdSignList differ from when it is calc from smiles. However, not used.
            dataSign, cmpdSignDict, cmpdSignList, sdfStr = self.getSignDataStruct(
                exWithDesc)
        else:
            # OBS Hard coded for signatures 0 to 1.
            smilesData = self.getAZOdata(smi)
            if "sign" in DescMethodsAvailable:
                dataSign, cmpdSignDict, cmpdSignList, sdfStr = getSignatures.getSignatures(
                    smilesData, 0, 1, returnAtomID=True, useClabSmiles=False)
            else:
                dataSign, cmpdSignDict, cmpdSignList, sdfStr = None, [
                    {}
                ] * len(smilesData), [[]] * len(smilesData), ""
        # If signSVM model already returning one sign as the most significant
        if not (hasattr(self.model, "specialType")
                and self.model.specialType == 1):
            downAbs = 0.0
            rankIdxDown = 0
            elemIdxDown = 0
            if len(orderedDesc_sign["Continuous"]["DOWN"]):
                for rankIdx in range(
                        len(orderedDesc_sign["Continuous"]["DOWN"])):
                    if downAbs != 0.0:
                        break
                    for elemIdx in range(
                            len(orderedDesc_sign["Continuous"]["DOWN"]
                                [rankIdx])):
                        # Test that the signature exists in the molecule
                        if orderedDesc_sign["Continuous"]["DOWN"][rankIdx][
                                elemIdx][0] in cmpdSignDict[0].keys():
                            downAbs = abs(orderedDesc_sign["Continuous"]
                                          ["DOWN"][rankIdx][elemIdx][1])
                            rankIdxDown = rankIdx
                            elemIdxDown = elemIdx
                            break
        else:
            if len(orderedDesc_sign["Continuous"]["DOWN"]):
                downAbs = abs(orderedDesc_sign["Continuous"]["DOWN"][0][0][1])
            else:
                downAbs = 0.0

        # If signSVM model already returning one sign as the most significant
        if not (hasattr(self.model, "specialType")
                and self.model.specialType == 1):
            upAbs = 0.0
            rankIdxUp = 0
            elemIdxUp = 0
            if len(orderedDesc_sign["Continuous"]["UP"]):
                for rankIdx in range(len(
                        orderedDesc_sign["Continuous"]["UP"])):
                    if upAbs != 0.0:
                        break
                    for elemIdx in range(
                            len(orderedDesc_sign["Continuous"]["UP"]
                                [rankIdx])):
                        # Test that the signature exists in the molecule
                        if orderedDesc_sign["Continuous"]["UP"][rankIdx][
                                elemIdx][0] in cmpdSignDict[0].keys():
                            upAbs = abs(orderedDesc_sign["Continuous"]["UP"]
                                        [rankIdx][elemIdx][1])
                            rankIdxUp = rankIdx
                            elemIdxUp = elemIdx
                            break
        else:
            if len(orderedDesc_sign["Continuous"]["UP"]):
                upAbs = abs(orderedDesc_sign["Continuous"]["UP"][0][0][1])
            else:
                upAbs = 0.0

        # Could happen that all derivatives are smaller than epsilon
        if orderedDesc_sign["Continuous"]["UP"] or orderedDesc_sign[
                "Continuous"]["DOWN"]:
            if topN == 1:  # Preserve syntax for Plato, only one significant signature
                if upAbs > downAbs:
                    if not (hasattr(self.model, "specialType")
                            and self.model.specialType == 1):
                        MSDsign = orderedDesc_sign["Continuous"]["UP"][
                            rankIdxUp][elemIdxUp][0]
                        MSDdv = orderedDesc_sign["Continuous"]["UP"][
                            rankIdxUp][elemIdxUp][1]
                    else:
                        MSDsign = orderedDesc_sign["Continuous"]["UP"][0][0][0]
                        MSDdv = orderedDesc_sign["Continuous"]["UP"][0][0][1]
                elif downAbs > upAbs:
                    if not (hasattr(self.model, "specialType")
                            and self.model.specialType == 1):
                        MSDsign = orderedDesc_sign["Continuous"]["DOWN"][
                            rankIdxDown][elemIdxDown][0]
                        MSDdv = orderedDesc_sign["Continuous"]["DOWN"][
                            rankIdxDown][elemIdxDown][1]
                    else:
                        MSDsign = orderedDesc_sign["Continuous"]["DOWN"][0][0][
                            0]
                        MSDdv = orderedDesc_sign["Continuous"]["DOWN"][0][0][1]
                elif downAbs != 0.0:
                    if not (hasattr(self.model, "specialType")
                            and self.model.specialType == 1):
                        MSDsign = orderedDesc_sign["Continuous"]["DOWN"][
                            rankIdxDown][elemIdxDown][0]
                        MSDdv = orderedDesc_sign["Continuous"]["DOWN"][
                            rankIdxDown][elemIdxDown][1]
                    else:
                        MSDsign = orderedDesc_sign["Continuous"]["DOWN"][0][0][
                            0]
                        MSDdv = orderedDesc_sign["Continuous"]["DOWN"][0][0][1]
                else:
                    MSDsign = None
                    MSDsign = 0
            else:
                MSDsign = []
                MSDdv = []
                for idx in range(topN):
                    try:
                        if abs(
                                orderedDesc_sign["Continuous"]["DOWN"][0][0][1]
                        ) > abs(orderedDesc_sign["Continuous"]["UP"][0][0][1]):
                            MSDsign.append(orderedDesc_sign["Continuous"]
                                           ["DOWN"][0][0][0])
                            MSDdv.append(orderedDesc_sign["Continuous"]
                                         ["DOWN"].pop(0)[0][1])
                        else:
                            MSDsign.append(
                                orderedDesc_sign["Continuous"]["UP"][0][0][0])
                            MSDdv.append(orderedDesc_sign["Continuous"]
                                         ["UP"].pop(0)[0][1])
                    except:
                        try:
                            MSDsign.append(orderedDesc_sign["Continuous"]
                                           ["DOWN"][0][0][0])
                            MSDdv.append(orderedDesc_sign["Continuous"]
                                         ["DOWN"].pop(0)[0][1])
                        except:
                            try:
                                MSDsign.append(orderedDesc_sign["Continuous"]
                                               ["UP"][0][0][0])
                                MSDdv.append(orderedDesc_sign["Continuous"]
                                             ["UP"].pop(0)[0][1])
                            except:
                                pass
        else:
            MSDsign = None
            MSDsign = 0

        #Process non-signatures
        if self.model.classVar.varType == orange.VarTypes.Discrete and outComeIsRev:
            UP = "DOWN"
            DOWN = "UP"
        elif self.model.classVar.varType != orange.VarTypes.Discrete and not regMinIsDesired:
            UP = "DOWN"
            DOWN = "UP"
        else:
            UP = "UP"
            DOWN = "DOWN"
        #Process DiscreteAttrs
        MSDnonSign = ""
        nD_DOWN = len(orderedDesc_nonSign["Discrete"][DOWN])
        if nD_DOWN:
            for n in range(min(topN, nD_DOWN)):
                if topN > 1:
                    MSDnonSign += str(n + 1) + ": "
                MSDnonSign += string.join([
                    "Change " + x[0]
                    for x in orderedDesc_nonSign["Discrete"][DOWN][n]
                ], '\n') + '\n'

        #Process Continuous attributes
        for n in range(topN):
            if topN > 1:
                order = str(n + 1) + ": "
            else:
                order = ""
            if len(orderedDesc_nonSign["Continuous"][DOWN]):
                downAbs = abs(orderedDesc_nonSign["Continuous"][DOWN][0][0][1])
            else:
                downAbs = 0.0
            if len(orderedDesc_nonSign["Continuous"][UP]):
                upAbs = abs(orderedDesc_nonSign["Continuous"][UP][0][0][1])
            else:
                upAbs = 0.0

            if orderedDesc_nonSign["Continuous"][UP] and upAbs >= downAbs:
                TOPmsd = orderedDesc_nonSign["Continuous"][UP].pop(0)
                MSDnonSign += order + string.join(
                    ["Decrease " + x[0] for x in TOPmsd], '\n') + '\n'
            if orderedDesc_nonSign["Continuous"][DOWN] and downAbs >= upAbs:
                TOPmsd = orderedDesc_nonSign["Continuous"][DOWN].pop(0)
                MSDnonSign += order + string.join(
                    ["Increase " + x[0] for x in TOPmsd], '\n') + '\n'

        res["non-signature"] = MSDnonSign

        # Most probably Signatures will always be associated with Discrete attributes. Nevertheless, it happens that some are Continuous, and therefore
        #  we will be using signatures reported as Continuous if any
        if not MSDsign:
            res["imgPath"] = ""
            res["signature"] = ""
            res["signarure_deriv_val"] = 0
            return
        if resultsPath and os.path.isdir(resultsPath):
            imgPath = os.path.join(
                resultsPath, "significance_" + str(idx) + "_" +
                str(time.time()).replace(".", '') + ".png")
        else:
            imgPath = ""
        # Call the method to create the image/mol specifying the color of the hilighted atoms
        if exWithDesc == None:  # Don't use with precalc desc
            if molStr and atoms and endHeight is not None and not imgPath:
                print "Using molStr and atoms from Learner Significant Signature"
                res["imgPath"] = ''
                res["molStr"] = molStr
                allAtoms = self.getNNAtoms(molStr, atoms, endHeight)
                res["atoms"] = allAtoms
                res["color"] = [atomColor] * len(allAtoms)
            else:
                res["imgPath"], res["molStr"], res["atoms"], res[
                    "color"] = self.createSignImg(smi, MSDsign, atomColor,
                                                  imgPath, endHeight)
        #Fix the significant descriptors so that it is a formated string
        res["signature"] = MSDsign
        res["signarure_deriv_val"] = MSDdv

        return res
Ejemplo n.º 5
0
    def getDescriptors(self, smiles):
        self.getSmilesData(smiles)

        # Calculate descriptors defined in the model files
        descList = self.model.varNames

        savedSmilesData = dataUtilities.DataTable(self.smilesData)

        #Try 3 time to get All compounds descriptors
        nTry = 3
        errorDesc = ""
        while nTry > 0:
            try:
                #if True:
                traceLog = "Model Location:" + str(self.modelLocation) + "\n"
                nBadEx = 0
                # Determine Signature and non-Signature descriptor names
                cinfonyDesc, clabDesc, signatureHeight, bbrcDesc, signDesc = descUtilities.getDescTypes(
                    descList)
                # Signatures
                if "sign" in DescMethodsAvailable and signatureHeight:
                    traceLog += "Calculating signatures...\n"
                    print "Calculating signatures...."
                    preCalcData = dataUtilities.DataTable(
                        self.preDefSignatureFile)
                    startHeight = 0  # Not used desc ignored in model prediction
                    endHeight = signatureHeight
                    self.smilesData = getSignatures.getSignatures(
                        self.smilesData, startHeight, endHeight, preCalcData)

                # C-Lab desc
                if "clab" in DescMethodsAvailable and clabDesc:
                    traceLog += "Calculating C-Lab...\n"
                    print "Calculating C-Lab desc...."
                    self.smilesData = ClabUtilities.appendCLabDesc(
                        clabDesc, self.smilesData)

                # Cinfony
                if cinfonyDesc:
                    traceLog += "Calculating Cinfony...\n"
                    print "Calculating Cinfony desc..."
                    self.smilesData = getCinfonyDesc.getCinfonyDescResults(
                        self.smilesData, cinfonyDesc, radius=5)

                # bbrcDesc
                if "bbrc" in DescMethodsAvailable and bbrcDesc:
                    traceLog += "Calculating BBRC...\n"
                    print "Calculating BBRC desc..."
                    self.smilesData = getBBRCDesc.getBBRCDescResult(
                        self.smilesData,
                        algo="FTM",
                        minSupPar=1,
                        descList=bbrcDesc)

                # Detect if the descripts calaculation or something else went wrong!
                for ex in self.smilesData:
                    if sum([
                            ex[attr].isSpecial()
                            for attr in self.smilesData.domain.attributes
                    ]) == len(self.smilesData.domain.attributes):
                        nBadEx += 1
                if nBadEx:
                    traceLog += "WARNING: Desc. Calculation: From the " + str(
                        len(self.smilesData)) + " compounds, " + str(
                            nBadEx) + " could not be calculated!\n"
                    print "WARNING: Desc. Calculation: From the " + str(
                        len(self.smilesData)) + " compounds, " + str(
                            nBadEx) + " could not be calculated!"
                    print "WARNING:   Tying again..."
                    self.smilesData = dataUtilities.DataTable(savedSmilesData)
                    nTry -= 1
                else:
                    nTry = 0
            #else:
            except Exception, e:
                errorDesc = "Error Calculating Descriptors:;" + traceLog + str(
                    e) + ";"
                nTry -= 1
Ejemplo n.º 6
0
    def getTopImportantVars(self,
                            inEx,
                            nVars=1,
                            gradRef=None,
                            absGradient=True,
                            c_step=None,
                            getGrad=False):
        """Return the n top important variables (n = nVars) for the given example
            if nVars is 0, it returns all variables ordered by importance
            if c_step (costume step) is passed, force it instead of hardcoded
        """
        #    Determine Signature and non-Signature descriptor names
        #signDesc = []   # This Disable distinction from signatures ans non-signatures
        cinfonyDesc, clabDesc, signatureHeight, bbrcDesc, signDesc = descUtilities.getDescTypes(
            [attr.name for attr in self.domain.attributes])

        varGrad = []

        ExFix = dataUtilities.ExFix()
        ExFix.set_domain(self.domain)
        ex = ExFix.fixExample(inEx)
        if self.basicStat == None or not self.NTrainEx or (
                self.domain.classVar.varType == orange.VarTypes.Discrete
                and len(self.domain.classVar.values) != 2):
            return None
        if gradRef == None:
            gradRef = self(ex, returnDFV=True)[1]

        def calcDiscVarGrad(var, ex, gradRef):
            step = 1  # MUST be 1!!
            if ex[var].isSpecial():
                return ([gradRef, gradRef], step)
            localMaxDiff = 0
            localMaxPred = gradRef
            #Uncomment next line to skip discrete variables
            #return localMaxPred
            for val in self.domain[var].values:
                localEx = orange.Example(ex)
                localEx[var] = val
                pred = self(localEx, returnDFV=True)[1]
                if abs(pred - gradRef) > localMaxDiff:
                    localMaxDiff = abs(pred - gradRef)
                    localMaxPred = pred
            return ([localMaxPred, gradRef], step)

        def calcContVarGrad(var, ex, gradRef):
            localEx = orange.Example(ex)
            if c_step is None:
                if self.domain.classVar.varType == orange.VarTypes.Discrete:  # Classification
                    coef_step = 1.0
                else:
                    coef_step = 0.08  # Needs confirmation! Coefficient step: c
            else:
                #  used for testing significance: comment next and uncomment next-next
                raise (Exception(
                    "This mode should only be used for debugging! Comment this line if debugging."
                ))
                #coef_step = float(c_step)

            if var in signDesc:
                step = 1  # Set step to one in case od signatures
            else:
                #   dev - Standard deviation:  http://orange.biolab.si/doc/reference/Orange.statistics.basic/
                if "dev" in self.basicStat[var]:
                    step = self.basicStat[var]["dev"] * coef_step
                else:
                    return ([gradRef, gradRef], 0)

            if ex[var].isSpecial():
                return ([gradRef, gradRef], step)
            # step UP
            localEx[var] = ex[var] + step
            ResUp = self(localEx, returnDFV=True)[1]
            # step DOWN
            localEx[var] = ex[var] - step
            ResDown = self(localEx, returnDFV=True)[1]
            return ([ResUp, ResDown], step)

        def calcVarGrad(var, ex, gradRef):
            if attr.varType == orange.VarTypes.Discrete:
                res, step = calcDiscVarGrad(attr.name, ex, gradRef)
                #          f(a)   f(x)
                _grad = (res[0] - res[1])  # /step   ... but step MUST be 1!!
                _faMax = res[0]
            else:
                res, step = calcContVarGrad(attr.name, ex, gradRef)
                if step == 0:
                    _grad = 0
                else:
                    _grad = (res[0] - res[1]) / (2.0 * step)
                _faMax = None
            return (_grad, _faMax)

        def compareABS(x, y):
            if abs(x) > abs(y):
                return 1
            elif abs(x) < abs(y):
                return -1
            else:
                return 0

        eps = 1E-5  # epsilon: amplitude of derivatives that will be considered 0. Attributes with derivative amplitude less than epsilon will not be considered.
        # Print used for algorithm final confirmation
        #print "  %s  " % (str(gradRef)),

        for attr in self.domain.attributes:
            grad = calcVarGrad(attr.name, ex, gradRef)
            # Print used for testing significance
            #print  "  %s  " % (str(grad[0])),

            # Print used for algorithm final confirmation
            #print "  %s  " % (str(grad[1])),

            if attr.name in signDesc:
                actualEps = 0
            else:
                actualEps = eps
            if abs(
                    grad[0]
            ) > actualEps:  # only consider attributes with derivative greatest than epsilon
                #                  f'(x)                  x             f(a)
                #                derivative value     direction      f(a) farest away from f(x) only setted for classification
                varGrad.append((grad[0], attr.name, grad[1]))

        #Separate continuous from categorical variables
        contVars = []
        discVars = []
        for var in varGrad:
            if self.domain[var[1]].varType == orange.VarTypes.Discrete:
                discVars.append(var)
            else:
                contVars.append(var)

        if nVars == 0:
            nRet = None
        else:
            nRet = nVars

        #Order the vars in terms of importance
        if absGradient:
            contVars.sort(reverse=1, cmp=lambda x, y: compareABS(x[0], y[0]))
            contVars = getVarNames(groupTiedScores(contVars, 0),
                                   getGrad=getGrad)
            discVars.sort(reverse=1, cmp=lambda x, y: compareABS(x[0], y[0]))
            discVars = getVarNames(groupTiedScores(discVars, 0),
                                   getGrad=getGrad)
            return {"Continuous":contVars[0:min(nRet,len(contVars))] ,\
                    "Discrete"  :discVars[0:min(nRet,len(discVars))] }

        if self.domain.classVar.varType == orange.VarTypes.Discrete:  # Classificatio
            # We will be looking to the max f(a) [2]
            # Will be excluding attributes for which f(a) was between 0 and f(x):  |f(a)| < |f(x)| AND f(x)*f(a)>0
            idx4Rem = []
            for idx, v in enumerate(discVars):
                fx = gradRef
                fa = v[2]
                if abs(fa) < abs(fx) and (fx * fa) > 0:
                    idx4Rem.append(idx)
            idx4Rem.sort(reverse=True)
            for idx in idx4Rem:
                discVars.pop(idx)

        # (3 lines) Print used for algorithm final confirmation
        #        print "   %s   " % (idx4Rem),
        #else:
        #        print "   %s   " % ([]),

        # Now we will be looking only to the actual derivative value; [0]
        UPd = [v for v in discVars if v[0] > 0]
        UPd.sort(reverse=1, cmp=lambda x, y: compareABS(x[0], y[0]))
        UPd = getVarNames(groupTiedScores(UPd, 0), getGrad=getGrad)

        DOWNd = [v for v in discVars if v[0] < 0]
        DOWNd.sort(reverse=1, cmp=lambda x, y: compareABS(x[0], y[0]))
        DOWNd = getVarNames(groupTiedScores(DOWNd, 0), getGrad=getGrad)

        UPc = [v for v in contVars if v[0] > 0]
        UPc.sort(reverse=1, cmp=lambda x, y: compareABS(x[0], y[0]))
        UPc = getVarNames(groupTiedScores(UPc, 0), getGrad=getGrad)
        DOWNc = [v for v in contVars if v[0] < 0]
        DOWNc.sort(reverse=1, cmp=lambda x, y: compareABS(x[0], y[0]))
        DOWNc = getVarNames(groupTiedScores(DOWNc, 0), getGrad=getGrad)


        return {"Continuous":{"UP":   UPc[0:min(nRet,len(  UPc))],\
                              "DOWN": DOWNc[0:min(nRet,len(DOWNc))]},\
                "Discrete":  {"UP":   UPd[0:min(nRet,len(  UPd))],\
                              "DOWN": DOWNd[0:min(nRet,len(DOWNd))]}   }
Ejemplo n.º 7
0
    def getTopImportantVars(self, inEx, nVars = 1, gradRef = None, absGradient = True, c_step = None, getGrad = False):
        """Return the n top important variables (n = nVars) for the given example
            if nVars is 0, it returns all variables ordered by importance
            if c_step (costume step) is passed, force it instead of hardcoded
        """
        #    Determine Signature and non-Signature descriptor names
        #signDesc = []   # This Disable distinction from signatures ans non-signatures
        cinfonyDesc, clabDesc, signatureHeight, bbrcDesc, signDesc = descUtilities.getDescTypes([attr.name for attr in self.domain.attributes])

        varGrad = []

        ExFix = dataUtilities.ExFix()
        ExFix.set_domain(self.domain)
        ex = ExFix.fixExample(inEx)
        if self.basicStat == None or not self.NTrainEx or (self.domain.classVar.varType == orange.VarTypes.Discrete and len(self.domain.classVar.values)!=2):
            return None
        if gradRef == None:
            gradRef = self(ex,returnDFV = True)[1]
        
        def calcDiscVarGrad(var,ex,gradRef):
            step = 1   # MUST be 1!!
            if ex[var].isSpecial():
                return ([gradRef, gradRef],step)
            localMaxDiff = 0
            localMaxPred = gradRef
            #Uncomment next line to skip discrete variables
            #return localMaxPred
            for val in self.domain[var].values:
                localEx = orange.Example(ex)
                localEx[var] = val
                pred = self(localEx,returnDFV = True)[1]
                if abs(pred - gradRef) > localMaxDiff:
                    localMaxDiff = abs(pred - gradRef)
                    localMaxPred = pred
            return ([localMaxPred, gradRef], step)


        def calcContVarGrad(var,ex,gradRef):
            localEx = orange.Example(ex)
            if c_step is None:
                coef_step = 0.5   # Needs confirmation! Coefficient step: c
            else:
                #  used for testing significance: comment next and uncomment next-next
                raise(Exception("This mode should only be used for debugging! Comment this line if debugging."))
                #coef_step = float(c_step)

            if var in signDesc:
                step = 1           # Set step to one in case od signatures
            else:
                #   dev - Standard deviation:  http://orange.biolab.si/doc/reference/Orange.statistics.basic/
                if "dev" in self.basicStat[var]:
                    step = self.basicStat[var]["dev"] * coef_step
                else:
                    return ([gradRef, gradRef], 0) 

            if ex[var].isSpecial():
                return ([gradRef, gradRef], step)
            # step UP
            localEx[var] = ex[var] + step
            ResUp = self(localEx,returnDFV = True)[1]
            # step DOWN
            localEx[var] = ex[var] - step 
            ResDown = self(localEx,returnDFV = True)[1]
            return ([ResUp, ResDown], step)

        def calcVarGrad(var,ex,gradRef):
            if attr.varType == orange.VarTypes.Discrete:
                res,step = calcDiscVarGrad(attr.name,ex,gradRef)
                #          f(a)   f(x)
                _grad = (res[0]-res[1])  # /step   ... but step MUST be 1!!
                _faMax = res[0]
            else:
                res,step = calcContVarGrad(attr.name,ex,gradRef)
                if step == 0:
                    _grad = 0
                else:
                    _grad =  (res[0]-res[1])/(2.0*step)
                _faMax = None
            return (_grad, _faMax)

        def compareABS(x,y):
             if abs(x) > abs(y):
                 return 1
             elif abs(x) < abs(y):
                 return -1
             else:
                 return 0

        eps = 1E-5   # epsilon: amplitude of derivatives that will be considered 0. Attributes with derivative amplitude less than epsilon will not be considered.
        # Print used for algorithm final confirmation
        #print "  %s  " % (str(gradRef)),

        for attr in self.domain.attributes:
            grad = calcVarGrad(attr.name,ex,gradRef)
            # Print used for testing significance
            #print  "  %s  " % (str(grad[0])),

            # Print used for algorithm final confirmation
            #print "  %s  " % (str(grad[1])),

            if attr.name in signDesc:
                actualEps = 0
            else:
                actualEps = eps
            if abs(grad[0]) > actualEps: # only consider attributes with derivative greatest than epsilon
                #                  f'(x)                  x             f(a) 
                #                derivative value     direction      f(a) farest away from f(x) only setted for classification
                varGrad.append( (grad[0],             attr.name,     grad[1]) )

        #Separate continuous from categorical variables
        contVars = []
        discVars = []
        for var in varGrad:
            if self.domain[var[1]].varType == orange.VarTypes.Discrete:
                discVars.append(var)
            else:
                contVars.append(var)
        

        if nVars == 0:
            nRet = None
        else:
            nRet = nVars

        #Order the vars in terms of importance
        if absGradient:
            contVars.sort(reverse=1, cmp=lambda x,y: compareABS(x[0], y[0]))
            contVars = getVarNames(groupTiedScores(contVars,0), getGrad=getGrad)
            discVars.sort(reverse=1, cmp=lambda x,y: compareABS(x[0], y[0]))
            discVars = getVarNames(groupTiedScores(discVars,0), getGrad=getGrad)
            return {"Continuous":contVars[0:min(nRet,len(contVars))] ,\
                    "Discrete"  :discVars[0:min(nRet,len(discVars))] }


        if self.domain.classVar.varType == orange.VarTypes.Discrete:  # Classificatio
                # We will be looking to the max f(a) [2]
                # Will be excluding attributes for which f(a) was between 0 and f(x):  |f(a)| < |f(x)| AND f(x)*f(a)>0
                idx4Rem = []
                for idx,v in enumerate(discVars):
                    fx = gradRef 
                    fa = v[2]
                    if abs(fa) < abs(fx) and (fx * fa) > 0:
                        idx4Rem.append(idx)
                idx4Rem.sort(reverse=True)
                for idx in idx4Rem:
                    discVars.pop(idx)

        # (3 lines) Print used for algorithm final confirmation
        #        print "   %s   " % (idx4Rem),
        #else:
        #        print "   %s   " % ([]),

                     

        # Now we will be looking only to the actual derivative value; [0]
        UPd = [v for v in discVars if v[0] > 0]
        UPd.sort(reverse=1, cmp=lambda x,y: compareABS(x[0], y[0]))
        UPd = getVarNames(groupTiedScores(UPd,0), getGrad=getGrad)

        DOWNd = [v for v in discVars if v[0] < 0]
        DOWNd.sort(reverse=1, cmp=lambda x,y: compareABS(x[0], y[0]))
        DOWNd = getVarNames(groupTiedScores(DOWNd,0), getGrad=getGrad)



        UPc = [v for v in contVars if v[0] > 0]
        UPc.sort(reverse=1, cmp=lambda x,y: compareABS(x[0], y[0]))
        UPc = getVarNames(groupTiedScores(UPc,0), getGrad=getGrad)
        DOWNc = [v for v in contVars if v[0] < 0]
        DOWNc.sort(reverse=1, cmp=lambda x,y: compareABS(x[0], y[0]))
        DOWNc = getVarNames(groupTiedScores(DOWNc,0), getGrad=getGrad)


        return {"Continuous":{"UP":   UPc[0:min(nRet,len(  UPc))],\
                              "DOWN": DOWNc[0:min(nRet,len(DOWNc))]},\
                "Discrete":  {"UP":   UPd[0:min(nRet,len(  UPd))],\
                              "DOWN": DOWNd[0:min(nRet,len(DOWNd))]}   } 
Ejemplo n.º 8
0
    def processSignificance(self, smi, prediction, orderedDesc, res, resultsPath, idx = 0, topN = 1):
        """descs* = [(1.3, ["LogP"]), (0.2, ["[So2]", ...]), ...]
           res =  { "signature"     : "",       
                    "imgPath"       : "",      for placing the results 
                    "non-signature" : "",
                    "molStr"        : "",
                    "atoms"         : []
                    "color"         : [(r,g,b),(),...]}
        
           It uses for Classificartion: 
                        self.predictionOutcomes that must define [BADlabel, GOODlabel] in this same order
            and for Regression:
                        self.significanceThreshold for which a GOOD prediction is BELOW the threshold
        
        orderedDesc = { "molStr":''...,                                                              # only on specialType=1
                        "height":2,                                                                  # only on specialType=1
                        "atoms":[1,2,3],                                                             # only on specialType=1
                          'Continuous': {
                          'DOWN':[ [('[F]', -0.008885456983609475), ... ('[F]',-0,0001)],
                                 [('[O3]', -0.007324209285573964)],
                                 [('[C3]([C3][C3])', -0.0047175657931883405)],
                                 [('[C3]', -0.00389763719161594)]],
                           'UP': [[('[Car]([Car][Nar])', 0.009768981717302358)],
                                 [('[HC]([C3])', 0.009135563633559857)]]},
                       'Discrete': {'DOWN': [], 'UP': []}}
 
        """
        atomColor = None
      
        orderedDesc_sign =    {'Continuous': {'DOWN': [], 'UP': []},
                               'Discrete': {'DOWN': [], 'UP': []}}

        orderedDesc_nonSign = {'Continuous': {'DOWN': [], 'UP': []},
                               'Discrete': {'DOWN': [], 'UP': []}}
        

        if hasattr(self.model, "specialType") and self.model.specialType == 1:
            print "This is a special model nr 1: It calculates itself the Significant Signatures"
            endHeight = orderedDesc["height"]
            try:
                molStr = orderedDesc["molStr"]
                atoms = eval(orderedDesc["atoms"])
            except:
                atoms = None
                molStr = None
            if not molStr or type(atoms)!=list or not atoms:
                atoms = None
                molStr = None

            orderedDesc_sign = orderedDesc
        else:
            atoms = None
            endHeight = None
            molStr = None
            cinfonyDesc, clabDesc, signatureHeight, bbrcDesc, signDesc = descUtilities.getDescTypes([attr.name for attr in self.model.domain.attributes])
            for attrType in ['Continuous', 'Discrete']:
                for vector in ['UP','DOWN']:
                    for ord in range(len(orderedDesc[attrType][vector])):
                        signEmpty=True
                        nonSignEmpty=True
                        for attr in orderedDesc[attrType][vector][ord]:
                            if attr[0] in signDesc:
                                if signEmpty:
                                    signEmpty = False
                                    orderedDesc_sign[attrType][vector].append([])
                                orderedDesc_sign[attrType][vector][-1].append(attr)
                            else:
                                if nonSignEmpty:
                                    nonSignEmpty = False
                                    orderedDesc_nonSign[attrType][vector].append([])
                                orderedDesc_nonSign[attrType][vector][-1].append(attr)


        #Process color to use if highlight is used
        outComeIsRev = None
        if self.model.classVar.varType == orange.VarTypes.Discrete:
            if self.predictionOutcomes is None:
                print "WARNING: Cannot process Significance, Missing definition of predictionOutcomes for the EndPoint"
                return
            theGoodPred = str(self.predictionOutcomes[1])
            theBadPred = str(self.predictionOutcomes[0])
            if [str(p) for p in self.model.classVar.values] == self.predictionOutcomes:
                outComeIsRev = False
            elif [str(p) for p in self.model.classVar.values][::-1] == self.predictionOutcomes:
                outComeIsRev = True
            else:
                print "ERROR: User outcome ordered list is not consistens toth model: ",\
                      self.predictionOutcomes, "<-->",self.model.classVar.values

            if prediction == theGoodPred:
                atomColor = 'g'
            else:
                atomColor = 'r'
        else:
            if self.significanceThreshold is None:
                print "WARNING: Cannot process Significance, Missing definition of significanceThreshold for the EndPoint"
                return
            if prediction < self.significanceThreshold:    # It is a GOOD prediction
                atomColor = 'g'
            else:
                atomColor = 'r'
        #Process Signatures
        # OBS Hard coded for signatures 0 to 1.  
        smilesData = self.getAZOdata(smi)
        dataSign, cmpdSignDict, cmpdSignList, sdfStr  = getSignatures.getSignatures(smilesData, 0, 1, returnAtomID = True, useClabSmiles = False)
        # If signSVM model already returning one sign as the most significant
        if not (hasattr(self.model, "specialType") and self.model.specialType == 1):
            downAbs = 0.0
            rankIdxDown = 0
            elemIdxDown = 0
            if len(orderedDesc_sign["Continuous"]["DOWN"]):
                for rankIdx in range(len(orderedDesc_sign["Continuous"]["DOWN"])):
                    if downAbs != 0.0:
                        break
                    for elemIdx in range(len(orderedDesc_sign["Continuous"]["DOWN"][rankIdx])):
                        # Test that the signature exists in the molecule
                        if orderedDesc_sign["Continuous"]["DOWN"][rankIdx][elemIdx][0] in cmpdSignDict[0].keys():
                            downAbs = abs(orderedDesc_sign["Continuous"]["DOWN"][rankIdx][elemIdx][1])
                            rankIdxDown = rankIdx
                            elemIdxDown = elemIdx
                            break
        else: 
            if len(orderedDesc_sign["Continuous"]["DOWN"]):
                downAbs = abs(orderedDesc_sign["Continuous"]["DOWN"][0][0][1])
            else:
                downAbs = 0.0
                        
        # If signSVM model already returning one sign as the most significant
        if not (hasattr(self.model, "specialType") and self.model.specialType == 1):
            upAbs = 0.0
            rankIdxUp = 0
            elemIdxUp = 0
            if len(orderedDesc_sign["Continuous"]["UP"]):
                for rankIdx in range(len(orderedDesc_sign["Continuous"]["UP"])):
                    if upAbs != 0.0:
                        break
                    for elemIdx in range(len(orderedDesc_sign["Continuous"]["UP"][rankIdx])):
                        # Test that the signature exists in the molecule
                        if orderedDesc_sign["Continuous"]["UP"][rankIdx][elemIdx][0] in cmpdSignDict[0].keys():
                            upAbs = abs(orderedDesc_sign["Continuous"]["UP"][rankIdx][elemIdx][1])
                            rankIdxUp = rankIdx
                            elemIdxUp = elemIdx
                            break
        else: 
            if len(orderedDesc_sign["Continuous"]["UP"]):
                upAbs = abs(orderedDesc_sign["Continuous"]["UP"][0][0][1])
            else:
                upAbs = 0.0

        if upAbs > downAbs:
            if not (hasattr(self.model, "specialType") and self.model.specialType == 1):
                MSDsign = orderedDesc_sign["Continuous"]["UP"][rankIdxUp][elemIdxUp][0]
                MSDdv = orderedDesc_sign["Continuous"]["UP"][rankIdxUp][elemIdxUp][1]
            else:
                MSDsign = orderedDesc_sign["Continuous"]["UP"][0][0][0]
                MSDdv = orderedDesc_sign["Continuous"]["UP"][0][0][1]
        elif downAbs > upAbs:
            if not (hasattr(self.model, "specialType") and self.model.specialType == 1):
                MSDsign = orderedDesc_sign["Continuous"]["DOWN"][rankIdxDown][elemIdxDown][0]
                MSDdv = orderedDesc_sign["Continuous"]["DOWN"][rankIdxDown][elemIdxDown][1]
            else:
                MSDsign = orderedDesc_sign["Continuous"]["DOWN"][0][0][0]
                MSDdv = orderedDesc_sign["Continuous"]["DOWN"][0][0][1]
        elif downAbs != 0.0:
            if not (hasattr(self.model, "specialType") and self.model.specialType == 1):
                MSDsign = orderedDesc_sign["Continuous"]["DOWN"][rankIdxDown][elemIdxDown][0]
                MSDdv = orderedDesc_sign["Continuous"]["DOWN"][rankIdxDown][elemIdxDown][1]
            else:
                MSDsign = orderedDesc_sign["Continuous"]["DOWN"][0][0][0]
                MSDdv = orderedDesc_sign["Continuous"]["DOWN"][0][0][1]
        else:
            MSDsign = None
            MSDsign = 0

        #Process non-signatures
        if self.model.classVar.varType == orange.VarTypes.Discrete and outComeIsRev:
            UP = "DOWN"
            DOWN = "UP"
        else:
            UP = "UP"
            DOWN = "DOWN"
        #Process DiscreteAttrs
        MSDnonSign = ""
        nD_DOWN = len(orderedDesc_nonSign["Discrete"][DOWN])
        if nD_DOWN: 
            for n in range(min(topN,nD_DOWN)):
                if topN > 1:
                    MSDnonSign += str(n+1)+": "
                MSDnonSign += string.join(["Change "+x[0] for x in orderedDesc_nonSign["Discrete"][DOWN][n]],'\n')+'\n'

        #Process Continuous attributes 
        for n in range(topN):
                if topN > 1:
                    order = str(n+1)+": "
                else:
                    order = ""
                if len(orderedDesc_nonSign["Continuous"][DOWN]):
                    downAbs = abs(orderedDesc_nonSign["Continuous"][DOWN][0][0][1])
                else:
                    downAbs = 0.0
                if len(orderedDesc_nonSign["Continuous"][UP]):
                    upAbs = abs(orderedDesc_nonSign["Continuous"][UP][0][0][1])
                else:
                    upAbs = 0.0

                if orderedDesc_nonSign["Continuous"][UP] and upAbs >= downAbs:
                    TOPmsd = orderedDesc_nonSign["Continuous"][UP].pop(0)
                    MSDnonSign += order + string.join(["Decrease "+x[0] for x in TOPmsd],'\n')+'\n'
                if orderedDesc_nonSign["Continuous"][DOWN] and downAbs >= upAbs:
                    TOPmsd = orderedDesc_nonSign["Continuous"][DOWN].pop(0)
                    MSDnonSign += order + string.join(["Increase "+x[0] for x in TOPmsd],'\n')+'\n'

        
        res["non-signature"] = MSDnonSign



        # Most probably Signatures will always be associated with Discrete attributes. Nevertheless, it happens that some are Continuous, and therefore
        #  we will be using signatures reported as Continuous if any
        if not MSDsign:
            res["imgPath"] = ""
            res["signature"] = ""
            res["signarure_deriv_val"] = 0
            return
        if resultsPath and os.path.isdir(resultsPath):
            imgPath = os.path.join(resultsPath,"significance_"+str(idx)+"_"+str(time.time()).replace(".",'')+".png")
        else:
            imgPath = ""
        # Call the method to create the image/mol specifying the color of the hilighted atoms  
        if molStr and atoms and endHeight is not None and not imgPath:
            print "Using molStr and atoms from Learner Significant Signature"
            res["imgPath"]=''
            res["molStr"] = molStr
            allAtoms = self.getNNAtoms(molStr, atoms, endHeight)
            res["atoms"] = allAtoms
            res["color"] = [atomColor]*len(allAtoms)
        else:
            res["imgPath"] , res["molStr"], res["atoms"], res["color"] = self.createSignImg(smi,MSDsign,atomColor,imgPath,endHeight)
        #Fix the significant descriptors so that it is a formated string
        res["signature"] = MSDsign
        res["signarure_deriv_val"] = MSDdv