コード例 #1
0
 def hide_test_getCinfonyDesc(self):
     allDescs = getCinfonyDesc.getAvailableDescs()
     print "\nN Retrieved desc: ",len(allDescs)
     self.assert_(len(allDescs) > 200)  # Actualy 220
     descs = ['cdk.BCUT', 'cdk.weight','webel.AtomCountDescriptor', 'webel.AutocorrelationDescriptorCharge', 'webel.AutocorrelationDescriptorMass',"obabel.TPSA","obabel.HBA2" , "obabel.MW","rdk.TPSA","rdk.Chi0n","rdk.MolWt"]
     resD = getCinfonyDesc.getCinfonyDescResults(self.smiData,descs)
     self.assertEqual(len(resD),len(self.smiData))
コード例 #2
0
 def test_getCinfonyDesc(self):
     allDescs = getCinfonyDesc.getAvailableDescs()
     print "\nN Retrieved desc: ",len(allDescs)
     self.assert_(len(allDescs) > 200)  # Actualy 220
     descs = ['cdk.BCUT', 'cdk.weight','webel.AtomCountDescriptor', 'webel.AutocorrelationDescriptorCharge', 'webel.AutocorrelationDescriptorMass',"obabel.TPSA","obabel.HBA2" , "obabel.MW","rdk.TPSA","rdk.Chi0n","rdk.MolWt"]
     resD = getCinfonyDesc.getCinfonyDescResults(self.smiData,descs)
     self.assertEqual(len(resD),len(self.smiData))
コード例 #3
0
    def getDescriptors(self, smiles):
        self.getSmilesData(smiles)

        # Calculate descriptors defined in the model files
        descList = self.model.varNames
   
        savedSmilesData = dataUtilities.DataTable(self.smilesData)

        #Try 3 time to get All compounds descriptors
        nTry = 3       
        errorDesc = "" 
        while nTry > 0:
           try:
                traceLog = "Model Location:"+str(self.modelLocation)+"\n"
                nBadEx = 0        
                # Determine Signature and non-Signature descriptor names
                cinfonyDesc, clabDesc, signatureHeight, bbrcDesc, signDesc = descUtilities.getDescTypes(descList)
                # Signatures
                if "sign" in DescMethodsAvailable and signatureHeight:
                    traceLog += "Calculating signatures...\n"
                    print "Calculating signatures...."
                    preCalcData = dataUtilities.DataTable(self.preDefSignatureFile)
                    startHeight = 0                # Not used desc ignored in model prediction
                    endHeight = signatureHeight  
                    self.smilesData  = getSignatures.getSignatures(self.smilesData, startHeight, endHeight, preCalcData)

                # C-Lab desc
                if "clab" in DescMethodsAvailable and clabDesc:
                    traceLog += "Calculating C-Lab...\n"
                    print "Calculating C-Lab desc...."
                    self.smilesData = ClabUtilities.appendCLabDesc(clabDesc, self.smilesData)

                # Cinfony
                if cinfonyDesc:
                    traceLog += "Calculating Cinfony...\n"
                    print "Calculating Cinfony desc..."
                    self.smilesData = getCinfonyDesc.getCinfonyDescResults(self.smilesData, cinfonyDesc, radius = 5)

                # bbrcDesc
                if "bbrc" in DescMethodsAvailable and bbrcDesc:
                    traceLog += "Calculating BBRC...\n"
                    print "Calculating BBRC desc..."
                    self.smilesData = getBBRCDesc.getBBRCDescResult(self.smilesData, algo = "FTM", minSupPar = 1, descList = bbrcDesc)

                # Detect if the descripts calaculation or something else went wrong!
                for ex in self.smilesData:
                   if sum([ex[attr].isSpecial() for attr in self.smilesData.domain.attributes]) == len(self.smilesData.domain.attributes):
                        nBadEx +=1
                if nBadEx:
                    traceLog += "WARNING: Desc. Calculation: From the "+str(len(self.smilesData))+" compounds, "+str(nBadEx)+" could not be calculated!\n"
                    print "WARNING: Desc. Calculation: From the "+str(len(self.smilesData))+" compounds, "+str(nBadEx)+" could not be calculated!"
                    print "WARNING:   Tying again..."
                    self.smilesData = dataUtilities.DataTable(savedSmilesData)
                    nTry -= 1
                else:
                    nTry = 0
           except Exception, e:
                errorDesc = "Error Calculating Descriptors:;"+traceLog+str(e)+";"
                nTry -= 1
コード例 #4
0
def getDesc(trainDataFile):

    # Read the SAR for which to calculate descriptors
    sarData = dataUtilities.DataTable(trainDataFile) 

    # Get names of RDK descriptors
    rdkDescs = getCinfonyDesc.getAvailableDescs("rdk")
   
    # Calculate the descriptors 
    trainData = getCinfonyDesc.getCinfonyDescResults(sarData, rdkDescs)

    # Deselect the SMILES attribute
    attrList = ["SMILES"]
    trainData = dataUtilities.attributeDeselectionData(trainData, attrList)
     
    # Save the trainData set
    trainData.save("trainData.tab") 

    return trainData
コード例 #5
0
ファイル: OWCinfonyDesc.py プロジェクト: AZCompTox/AZOrange
    def __retrieve(self,descList):
        self.error(0)
        self.warning(0)
        resultsData = None
        if not self.data:
            self.warning("Missing input data!")
            self.send("Examples",None)
            return
        else:
            progressSteps = 5 
            progress1 = QProgressDialog("Calculating selected descriptors with Cinfony", "Cancel", 0, progressSteps , None, Qt.Dialog )
            progress1.setWindowModality(Qt.WindowModal)
            progress1.setMinimumDuration(0)
            progress1.forceShow()
            progress1.setValue(1)
            time.sleep(0.1)
            progress1.setValue(2)
            try: 
                resultsData = getCinfonyDesc.getCinfonyDescResults(self.data , descList)
                progress1.setValue(3)
            except:
                typeEx, val, traceback = sys.exc_info()
                progress1.close()
                self.error("Error calculating the descriptors with Cinfony.\n")
                print typeEx
                print "\t",val
                self.send("Examples",None)
                return
            time.sleep(0.1)
            progress1.setValue(4)
            time.sleep(0.1)
            progress1.setValue(5)
            progress1.close()

        if resultsData:
            self.send("Examples",resultsData)
        else:
            self.warning("Nothing retrieved from Cinfony.")
            self.send("Examples",None)
コード例 #6
0
def getDesc(trainDataFile):

    # Read the SAR for which to calculate descriptors
    sarData = dataUtilities.DataTable(trainDataFile)

    # Get names of RDK descriptors
    rdkDescs = getCinfonyDesc.getAvailableDescs("rdk")

    # Calculate the descriptors
    trainData = getCinfonyDesc.getCinfonyDescResults(sarData, rdkDescs)

    # Deselect the SMILES attribute
    attrList = [
        attr.name for attr in trainData.domain.attributes
        if attr.varType == orange.Variable.String
    ]

    trainData = dataUtilities.attributeDeselectionData(trainData, attrList)

    # Save the trainData set
    trainData.save("trainData.tab")

    return trainData
コード例 #7
0
    def getAcc(self, callBack=None, algorithm=None, params=None, atts=None, holdout=None):
        """ For regression problems, it returns the RMSE and the Q2 
            For Classification problems, it returns CA and the ConfMat
            The return is made in a Dict: {"RMSE":0.2,"Q2":0.1,"CA":0.98,"CM":[[TP, FP],[FN,TN]]}
            For the EvalResults not supported for a specific learner/datase, the respective result will be None

            if the learner is a dict {"LearnerName":learner, ...} the results will be a dict with results for all Learners and for a consensus
                made out of those that were stable

            It some error occurred, the respective values in the Dict will be None
                
			parameters:
                algorithm - list of feature generation algorithms (set dependent features that have to be calculated inside the crossvalidation)
                params - dictionary of parameters
                atts - attributes to be removed before learning (e.g. meta etc...)
        """
        self.__log("Starting Calculating MLStatistics")
        statistics = {}
        if not self.__areInputsOK():
            return None

        if holdout:
            self.nExtFolds = 1

        if algorithm:
            self.__log(" Additional features to be calculated inside of cross-validation")
            for i in algorithm:
                self.__log(" Algorithm: " + str(i))
            for j, v in params.iteritems():
                self.__log(" Parameter: " + str(j) + " = " + str(v))

        # Set the response type
        self.responseType = (
            self.data.domain.classVar.varType == orange.VarTypes.Discrete and "Classification" or "Regression"
        )
        self.__log("  " + str(self.responseType))

        # Create the Train and test sets
        DataIdxs = None
        if holdout:
            self.__log("Using hold out evaluation with " + str(holdout) + "*100 % of data for training")
            DataIdxs = dataUtilities.SeedDataSampler_holdOut(self.data, holdout)
        else:
            DataIdxs = dataUtilities.SeedDataSampler(self.data, self.nExtFolds)

        # Var for saving each Fols result
        optAcc = {}
        results = {}
        exp_pred = {}
        nTrainEx = {}
        nTestEx = {}

        # Set a dict of learners
        MLmethods = {}
        if type(self.learner) == dict:
            for ml in self.learner:
                MLmethods[ml] = self.learner[ml]
        else:
            MLmethods[self.learner.name] = self.learner

        models = {}
        rocs = {}
        self.__log("Calculating Statistics for MLmethods:")
        self.__log("  " + str([x for x in MLmethods]))

        # Check data in advance so that, by chance, it will not fail at the last fold!
        for foldN in range(self.nExtFolds):
            trainData = self.data.select(DataIdxs[foldN], negate=1)
            self.__checkTrainData(trainData)

        # Optional!!
        # Order Learners so that PLS is the first
        sortedML = [ml for ml in MLmethods]
        if "PLS" in sortedML:
            sortedML.remove("PLS")
            sortedML.insert(0, "PLS")

        stepsDone = 0
        nTotalSteps = len(sortedML) * self.nExtFolds
        for ml in sortedML:
            self.__log("    > " + str(ml) + "...")
            try:
                # Var for saving each Fols result
                results[ml] = []
                exp_pred[ml] = []
                models[ml] = []
                rocs[ml] = []
                nTrainEx[ml] = []
                nTestEx[ml] = []
                optAcc[ml] = []
                logTxt = ""

                for foldN in range(self.nExtFolds):
                    if type(self.learner) == dict:
                        self.paramList = None

                    trainData = self.data.select(DataIdxs[foldN], negate=1)
                    orig_len = len(trainData.domain.attributes)
                    refs = None
                    methods = [
                        "rdk_MACCS_keys",
                        "rdk_topo_fps",
                        "rdk_morgan_fps",
                        "rdk_morgan_features_fps",
                        "rdk_atompair_fps",
                    ]
                    train_domain = None
                    # add structural descriptors to the training data (TG)
                    if algorithm:
                        for i in range(len(algorithm)):
                            if algorithm[i] == "structClust":
                                self.__log("Algorithm " + str(i) + ": " + str(algorithm[i]))
                                actData = orange.ExampleTable(trainData.domain)
                                for d in trainData:
                                    # only valid for simboosted qsar paper experiments!?
                                    if d.getclass() == "2":
                                        actData.append(d)

                                refs = structuralClustering.getReferenceStructures(
                                    actData,
                                    threshold=params["threshold"],
                                    minClusterSize=params["minClusterSize"],
                                    numThreads=2,
                                )
                                self.__log(
                                    " found "
                                    + str(len(refs))
                                    + " reference structures in "
                                    + str(len(actData))
                                    + " active structures"
                                )
                                orig_len = orig_len + (len(refs) * len(methods))
                                trainData_sim = SimBoostedQSAR.getSimDescriptors(refs, trainData, methods)

                                if i == (len(algorithm) - 1):
                                    trainData = dataUtilities.attributeDeselectionData(trainData_sim, atts)
                                else:
                                    trainData = dataUtilities.attributeDeselectionData(trainData_sim, [])

                            elif algorithm[i] == "ECFP":
                                self.__log("Algorithm " + str(i) + ": " + str(algorithm[i]))
                                trainData_ecfp = getCinfonyDesc.getCinfonyDescResults(trainData, ["rdk.FingerPrints"])
                                train_domain = trainData_ecfp.domain
                                if i == (len(algorithm) - 1):
                                    trainData = dataUtilities.attributeDeselectionData(trainData_ecfp, atts)
                                else:
                                    trainData = dataUtilities.attributeDeselectionData(trainData_ecfp, [])

                            else:
                                self.__log("Algorithm " + str(i) + ": " + str(algorithm[i]))
                                trainData_structDesc = getStructuralDesc.getStructuralDescResult(
                                    trainData, algorithm[i], params["minsup"]
                                )
                                if i == (len(algorithm) - 1):
                                    trainData = dataUtilities.attributeDeselectionData(trainData_structDesc, atts)
                                else:
                                    trainData = dataUtilities.attributeDeselectionData(trainData_structDesc, [])

                                    # trainData.save("/home/girschic/proj/AZ/ProjDev/train.tab")
                    testData = self.data.select(DataIdxs[foldN])
                    # calculate the feature values for the test data (TG)
                    if algorithm:
                        for i in range(len(algorithm)):
                            if algorithm[i] == "structClust":
                                self.__log(str(algorithm[i]))
                                testData_sim = SimBoostedQSAR.getSimDescriptors(refs, testData, methods)
                                if i == (len(algorithm) - 1):
                                    testData = dataUtilities.attributeDeselectionData(testData_sim, atts)
                                else:
                                    testData = dataUtilities.attributeDeselectionData(testData_sim, [])
                            elif algorithm[i] == "ECFP":
                                self.__log(str(algorithm[i]))
                                # testData_ecfp = orange.ExampleTable(train_domain)
                                tmp_dat = []
                                for d in testData:
                                    tmp = getCinfonyDesc.getRdkFPforTestInstance(train_domain, d)
                                    tmp_dat.append(tmp)
                                testData_ecfp = orange.ExampleTable(tmp_dat[0].domain, tmp_dat)
                                if i == (len(algorithm) - 1):
                                    # 						print "removing atts"
                                    testData = dataUtilities.attributeDeselectionData(testData_ecfp, atts)
                                else:
                                    # 						print "removing no atts"
                                    testData = dataUtilities.attributeDeselectionData(testData_ecfp, [])

                            else:
                                cut_off = orig_len - len(atts)
                                smarts = trainData.domain.attributes[cut_off:]
                                self.__log("  Number of structural features added: " + str(len(smarts)))
                                testData_structDesc = getStructuralDesc.getSMARTSrecalcDesc(testData, smarts)
                                if i == (len(algorithm) - 1):
                                    testData = dataUtilities.attributeDeselectionData(testData_structDesc, atts)
                                else:
                                    testData = dataUtilities.attributeDeselectionData(testData_structDesc, [])

                    #                testData.save("/home/girschic/proj/AZ/ProjDev/test.tab")
                    nTrainEx[ml].append(len(trainData))
                    nTestEx[ml].append(len(testData))
                    # Test if trainsets inside optimizer will respect dataSize criterias.
                    #  if not, don't optimize, but still train the model
                    dontOptimize = False
                    if self.responseType != "Classification" and (len(trainData) * (1 - 1.0 / self.nInnerFolds) < 20):
                        dontOptimize = True
                    else:
                        tmpDataIdxs = dataUtilities.SeedDataSampler(trainData, self.nInnerFolds)
                        tmpTrainData = trainData.select(tmpDataIdxs[0], negate=1)
                        if not self.__checkTrainData(tmpTrainData, False):
                            dontOptimize = True

                    if dontOptimize:
                        logTxt += (
                            "       Fold " + str(foldN) + ": Too few compounds to optimize model hyper-parameters\n"
                        )
                        self.__log(logTxt)
                        if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
                            res = orngTest.crossValidation(
                                [MLmethods[ml]],
                                trainData,
                                folds=5,
                                strat=orange.MakeRandomIndices.StratifiedIfPossible,
                                randomGenerator=random.randint(0, 100),
                            )
                            CA = evalUtilities.CA(res)[0]
                            optAcc[ml].append(CA)
                        else:
                            res = orngTest.crossValidation(
                                [MLmethods[ml]],
                                trainData,
                                folds=5,
                                strat=orange.MakeRandomIndices.StratifiedIfPossible,
                                randomGenerator=random.randint(0, 100),
                            )
                            R2 = evalUtilities.R2(res)[0]
                            optAcc[ml].append(R2)
                    else:
                        runPath = miscUtilities.createScratchDir(
                            baseDir=AZOC.NFS_SCRATCHDIR, desc="AccWOptParam", seed=id(trainData)
                        )
                        # 		    self.__log("	run path:"+str(runPath))
                        trainData.save(os.path.join(runPath, "trainData.tab"))

                        tunedPars = paramOptUtilities.getOptParam(
                            learner=MLmethods[ml],
                            trainDataFile=os.path.join(runPath, "trainData.tab"),
                            paramList=self.paramList,
                            useGrid=False,
                            verbose=self.verbose,
                            queueType=self.queueType,
                            runPath=runPath,
                            nExtFolds=None,
                            nFolds=self.nInnerFolds,
                            logFile=self.logFile,
                            getTunedPars=True,
                        )
                        if not MLmethods[ml] or not MLmethods[ml].optimized:
                            self.__log(
                                "       WARNING: GETACCWOPTPARAM: The learner " + str(ml) + " was not optimized."
                            )
                            self.__log("                It will be ignored")
                            # self.__log("                It will be set to default parameters")
                            self.__log("                    DEBUG can be done in: " + runPath)
                            # Set learner back to default
                            # MLmethods[ml] = MLmethods[ml].__class__()
                            raise Exception("The learner " + str(ml) + " was not optimized.")
                        else:
                            if trainData.domain.classVar.varType == orange.VarTypes.Discrete:
                                optAcc[ml].append(tunedPars[0])
                            else:
                                res = orngTest.crossValidation(
                                    [MLmethods[ml]],
                                    trainData,
                                    folds=5,
                                    strat=orange.MakeRandomIndices.StratifiedIfPossible,
                                    randomGenerator=random.randint(0, 100),
                                )
                                R2 = evalUtilities.R2(res)[0]
                                optAcc[ml].append(R2)

                            miscUtilities.removeDir(runPath)
                    # Train the model
                    model = MLmethods[ml](trainData)
                    models[ml].append(model)
                    # Test the model
                    if self.responseType == "Classification":
                        results[ml].append(
                            (
                                evalUtilities.getClassificationAccuracy(testData, model),
                                evalUtilities.getConfMat(testData, model),
                            )
                        )
                        roc = self.aroc(testData, [model])
                        rocs[ml].append(roc)
                    else:
                        local_exp_pred = []
                        for ex in testData:
                            local_exp_pred.append((ex.getclass(), model(ex)))
                        results[ml].append(
                            (evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred))
                        )
                        # Save the experimental value and correspondent predicted value
                        exp_pred[ml] += local_exp_pred
                    if callBack:
                        stepsDone += 1
                        if not callBack((100 * stepsDone) / nTotalSteps):
                            return None

                res = self.createStatObj(
                    results[ml],
                    exp_pred[ml],
                    nTrainEx[ml],
                    nTestEx[ml],
                    self.responseType,
                    self.nExtFolds,
                    logTxt,
                    rocs[ml],
                )

                if self.verbose > 0:
                    print "UnbiasedAccuracyGetter!Results  " + ml + ":\n"
                    pprint(res)
                if not res:
                    raise Exception("No results available!")
                statistics[ml] = copy.deepcopy(res)
                self.__writeResults(statistics)
                self.__log("       OK")
            except:
                print "Unexpected error:",
                print sys.exc_info()[0]
                print sys.exc_info()[1]
                self.__log("       Learner " + str(ml) + " failed to create/optimize the model!")
                res = self.createStatObj(
                    results[ml],
                    exp_pred[ml],
                    nTrainEx[ml],
                    nTestEx[ml],
                    self.responseType,
                    self.nExtFolds,
                    logTxt,
                    rocs[ml],
                )
                statistics[ml] = copy.deepcopy(res)
                self.__writeResults(statistics)

        if not statistics or len(statistics) < 1:
            self.__log("ERROR: No statistics to return!")
            return None
        elif len(statistics) > 1:
            # We still need to build a consensus model out of the stable models
            #   ONLY if there are more that one model stable!
            #   When only one or no stable models, build a consensus based on all models
            consensusMLs = {}
            for modelName in statistics:
                StabilityValue = statistics[modelName]["StabilityValue"]
                if StabilityValue is not None and statistics[modelName]["stable"]:
                    consensusMLs[modelName] = copy.deepcopy(statistics[modelName])

            self.__log(
                "Found " + str(len(consensusMLs)) + " stable MLmethods out of " + str(len(statistics)) + " MLmethods."
            )

            if len(consensusMLs) <= 1:  # we need more models to build a consensus!
                consensusMLs = {}
                for modelName in statistics:
                    consensusMLs[modelName] = copy.deepcopy(statistics[modelName])

            if len(consensusMLs) >= 2:
                # Var for saving each Fols result
                Cresults = []
                Cexp_pred = []
                CnTrainEx = []
                CnTestEx = []
                self.__log(
                    "Calculating the statistics for a Consensus model based on " + str([ml for ml in consensusMLs])
                )
                for foldN in range(self.nExtFolds):
                    if self.responseType == "Classification":
                        CLASS0 = str(self.data.domain.classVar.values[0])
                        CLASS1 = str(self.data.domain.classVar.values[1])
                        exprTest0 = "(0"
                        for ml in consensusMLs:
                            exprTest0 += "+( " + ml + " == " + CLASS0 + " )*" + str(optAcc[ml][foldN]) + " "
                        exprTest0 += ")/IF0(sum([False"
                        for ml in consensusMLs:
                            exprTest0 += ", " + ml + " == " + CLASS0 + " "
                        exprTest0 += "]),1)"
                        exprTest1 = exprTest0.replace(CLASS0, CLASS1)
                        expression = [exprTest0 + " >= " + exprTest1 + " -> " + CLASS0, " -> " + CLASS1]
                    else:
                        Q2sum = sum([optAcc[ml][foldN] for ml in consensusMLs])
                        expression = "(1 / " + str(Q2sum) + ") * (0"
                        for ml in consensusMLs:
                            expression += " + " + str(optAcc[ml][foldN]) + " * " + ml + " "
                        expression += ")"

                    testData = self.data.select(DataIdxs[foldN])
                    CnTestEx.append(len(testData))
                    consensusClassifiers = {}
                    for learnerName in consensusMLs:
                        consensusClassifiers[learnerName] = models[learnerName][foldN]

                    model = AZorngConsensus.ConsensusClassifier(classifiers=consensusClassifiers, expression=expression)
                    CnTrainEx.append(model.NTrainEx)
                    # Test the model
                    if self.responseType == "Classification":
                        Cresults.append(
                            (
                                evalUtilities.getClassificationAccuracy(testData, model),
                                evalUtilities.getConfMat(testData, model),
                            )
                        )
                    else:
                        local_exp_pred = []
                        for ex in testData:
                            local_exp_pred.append((ex.getclass(), model(ex)))
                        Cresults.append(
                            (evalUtilities.calcRMSE(local_exp_pred), evalUtilities.calcRsqrt(local_exp_pred))
                        )
                        # Save the experimental value and correspondent predicted value
                        Cexp_pred += local_exp_pred

                res = self.createStatObj(Cresults, Cexp_pred, CnTrainEx, CnTestEx, self.responseType, self.nExtFolds)
                statistics["Consensus"] = copy.deepcopy(res)
                statistics["Consensus"]["IndividualStatistics"] = copy.deepcopy(consensusMLs)
                self.__writeResults(statistics)
            self.__log("Returned multiple ML methods statistics.")
            return statistics

        # By default return the only existing statistics!
        self.__writeResults(statistics)
        self.__log("Returned only one ML method statistics.")
        return statistics[statistics.keys()[0]]
コード例 #8
0
    def getDescriptors(self, smiles):
        self.getSmilesData(smiles)

        # Calculate descriptors defined in the model files
        descList = self.model.varNames

        savedSmilesData = dataUtilities.DataTable(self.smilesData)

        #Try 3 time to get All compounds descriptors
        nTry = 3
        errorDesc = ""
        while nTry > 0:
            try:
                #if True:
                traceLog = "Model Location:" + str(self.modelLocation) + "\n"
                nBadEx = 0
                # Determine Signature and non-Signature descriptor names
                cinfonyDesc, clabDesc, signatureHeight, bbrcDesc, signDesc = descUtilities.getDescTypes(
                    descList)
                # Signatures
                if "sign" in DescMethodsAvailable and signatureHeight:
                    traceLog += "Calculating signatures...\n"
                    print "Calculating signatures...."
                    preCalcData = dataUtilities.DataTable(
                        self.preDefSignatureFile)
                    startHeight = 0  # Not used desc ignored in model prediction
                    endHeight = signatureHeight
                    self.smilesData = getSignatures.getSignatures(
                        self.smilesData, startHeight, endHeight, preCalcData)

                # C-Lab desc
                if "clab" in DescMethodsAvailable and clabDesc:
                    traceLog += "Calculating C-Lab...\n"
                    print "Calculating C-Lab desc...."
                    self.smilesData = ClabUtilities.appendCLabDesc(
                        clabDesc, self.smilesData)

                # Cinfony
                if cinfonyDesc:
                    traceLog += "Calculating Cinfony...\n"
                    print "Calculating Cinfony desc..."
                    self.smilesData = getCinfonyDesc.getCinfonyDescResults(
                        self.smilesData, cinfonyDesc, radius=5)

                # bbrcDesc
                if "bbrc" in DescMethodsAvailable and bbrcDesc:
                    traceLog += "Calculating BBRC...\n"
                    print "Calculating BBRC desc..."
                    self.smilesData = getBBRCDesc.getBBRCDescResult(
                        self.smilesData,
                        algo="FTM",
                        minSupPar=1,
                        descList=bbrcDesc)

                # Detect if the descripts calaculation or something else went wrong!
                for ex in self.smilesData:
                    if sum([
                            ex[attr].isSpecial()
                            for attr in self.smilesData.domain.attributes
                    ]) == len(self.smilesData.domain.attributes):
                        nBadEx += 1
                if nBadEx:
                    traceLog += "WARNING: Desc. Calculation: From the " + str(
                        len(self.smilesData)) + " compounds, " + str(
                            nBadEx) + " could not be calculated!\n"
                    print "WARNING: Desc. Calculation: From the " + str(
                        len(self.smilesData)) + " compounds, " + str(
                            nBadEx) + " could not be calculated!"
                    print "WARNING:   Tying again..."
                    self.smilesData = dataUtilities.DataTable(savedSmilesData)
                    nTry -= 1
                else:
                    nTry = 0
            #else:
            except Exception, e:
                errorDesc = "Error Calculating Descriptors:;" + traceLog + str(
                    e) + ";"
                nTry -= 1
コード例 #9
0
def parseToFile(assay, data):
    descList = getCinfonyDesc.getAvailableDescs("rdkPhysChem")
    print descList
    data = getCinfonyDesc.getCinfonyDescResults(data, descList)
    data.save(assay + "AZOdesc.txt")