Exemple #1
0
    def test_MetaDataHandleForSavingModel(self):
        """Test the handling of SaveModel for Data with Meta Atributes
        """
        expectedAccWMeta = 1.0  # VEr 0.3
        expectedAccNoMetaValues = [
            0.56666666700000001,  # Ver 0.3
            0.563636364
        ]

        #Test the save of a model created from a train data with meta attributes
        self.assert_(
            len(self.WMetaTest.domain.getmetas()) >= 1,
            "The dataset WMetaTest should have Meta Attributes")
        RFlearner = AZorngRF.RFLearner(NumThreads = 1, maxDepth = "20", minSample = "5", useSurrogates = "false", getVarVariance = "false", \
                                        nActVars = "0", nTrees = "100", forestAcc = "0.1", termCrit = "0")
        rfM = RFlearner(self.WMetaTest)
        AccNoMetaBefore = evalUtilities.getClassificationAccuracy(
            self.NoMetaTrain, rfM)
        AccWMetaBefore = evalUtilities.getClassificationAccuracy(
            self.WMetaTest, rfM)

        # Save the model
        scratchdir = os.path.join(AZOC.SCRATCHDIR,
                                  "scratchdirTest" + str(time.time()))
        os.mkdir(scratchdir)
        modelPath = os.path.join(scratchdir, "RFModel.RF")
        rfM.write(modelPath)

        # Read in the model
        rfR = AZorngRF.RFread(modelPath)
        self.assert_(
            len(rfR.domain.getmetas()) == 0,
            "There shouldn't be any Meta data now!")

        # Calculate classification accuracy
        AccNoMetaAfter = evalUtilities.getClassificationAccuracy(
            self.NoMetaTrain, rfR)
        AccWMetaAfter = evalUtilities.getClassificationAccuracy(
            self.WMetaTest, rfR)

        # Test that the accuracy of the model before and after saved
        self.assertEqual(
            AccNoMetaBefore, AccNoMetaAfter,
            "NoMeta: Predictions after loading saved model were different")
        self.assertEqual(
            AccWMetaBefore, AccWMetaAfter,
            "WMeta: Predictions after loading saved model were different")
        self.assertEqual(round(AccWMetaAfter, 9), round(expectedAccWMeta, 9))

        self.assertRoundedToExpectedArray(AccNoMetaAfter,
                                          expectedAccNoMetaValues, 9)

        # Remove the scratch directory
        os.system("/bin/rm -rf " + scratchdir)
Exemple #2
0
    def test_save_load_Regression_D_Attr(self):
        """ Test Save/Load Regression model with Discrete Attribute"""

        #Create a selector to select just the correct attributes
        selector = range(len(self.RegDAttr.domain))
        #Remove the second attribute (idx=1)
        selector.pop(1)
        #Apply the selector to the self.RegDAttr
        data = self.RegDAttr.select(selector)

        RFsign = AZorngRF.RFLearner(data,
                                    nTrees=200,
                                    nActVars=155,
                                    maxDepth=100)

        res1 = []
        for ex in self.RegDAttr:
            res1.append(str(RFsign(ex)))

        scratchdir = os.path.join(AZOC.SCRATCHDIR,
                                  "scratchdirTest" + str(time.time()))
        os.mkdir(scratchdir)
        modelPath = os.path.join(scratchdir, "RFModel")
        RFsign.write(modelPath)

        loadedRFmodel = AZorngRF.RFread(modelPath)

        res2 = []
        for ex in self.RegDAttr:
            res2.append(str(loadedRFmodel(ex)))

        self.assertEqual(res1, res2)
        self.assertEqual(res1, [
            '5.404782', '2.568249', '2.979486', '4.287185', '5.335753',
            '4.439877', '3.682451', '8.054751', '6.511803', '5.760388',
            '7.771009', '2.328262', '6.062288', '5.577081', '3.639579',
            '6.862591', '3.793468', '2.865258', '3.531777', '6.833398',
            '6.376686', '3.338588', '7.002612', '7.137580', '7.258987',
            '6.899173', '7.547265', '8.708020', '6.262212', '7.563741',
            '8.166364', '6.614120', '7.865033', '9.060866', '8.057292',
            '4.877943', '7.993115', '9.198319', '9.428467', '8.537990',
            '9.130789', '6.328936', '8.247712', '7.605743', '8.755456',
            '6.983065', '7.712387', '9.972745', '9.763152', '7.934700',
            '8.447981', '7.272462', '8.824869', '7.654151', '7.795481',
            '7.229007', '8.680950', '9.439033', '9.130064', '8.505672',
            '8.082146', '6.086042', '7.493593', '8.981513', '8.880632',
            '6.548739'
        ])

        # Remove the scratch directory
        os.system("/bin/rm -rf " + scratchdir)
Exemple #3
0
    def test_Priors(self):
        """Test to assure that priors are set correcly."""

        # Create a RF model
        RFlearner = AZorngRF.RFLearner(NumThreads = 1, maxDepth = "20", minSample = "5", useSurrogates = "false", getVarVariance = "false", \
                                        nActVars = "0", nTrees = "100", forestAcc = "0.1", termCrit = "0", priors = {"Iris-versicolor":0.35, "Iris-virginica":0.13, "Iris-setosa":0.52})
        RFmodel = RFlearner(self.irisData)

        # Calculate classification accuracy
        Acc = evalUtilities.getClassificationAccuracy(self.irisData, RFmodel)

        # Save the model
        scratchdir = os.path.join(AZOC.SCRATCHDIR,
                                  "scratchdirTest" + str(time.time()))
        os.mkdir(scratchdir)
        modelPath = os.path.join(scratchdir, "modelPriors.RF")
        RFmodel.write(modelPath)

        # Read in the model
        newRFmodel = AZorngRF.RFread(modelPath)

        # Calculate classification accuracy
        savedAcc = evalUtilities.getClassificationAccuracy(
            self.irisData, newRFmodel)

        # Test that the accuracy of the two classifiers is the exact same
        self.assertEqual(Acc, savedAcc)

        #Check the priors saved in the model
        file = open(os.path.join(modelPath, "model.rf"), "r")
        lines = file.readlines()
        file.close()
        priors = [
            round(x, 2) for x in eval((lines[22].strip() +
                                       lines[23].strip()).replace("data:", ""))
        ]
        self.assertEqual(len(priors), 3)
        self.assertEqual(
            priors[self.irisData.domain.classVar.values.index("Iris-setosa")],
            0.52)
        self.assertEqual(
            priors[self.irisData.domain.classVar.values.index(
                "Iris-versicolor")], 0.35)
        self.assertEqual(
            priors[self.irisData.domain.classVar.values.index(
                "Iris-virginica")], 0.13)

        # Remove the scratch directory
        os.system("/bin/rm -rf " + scratchdir)
Exemple #4
0
    def test_SavedModel(self):
        """Test to assure that a saved RF model gives the same predictions as before saving."""

        # Create a RF model
        RFlearner = AZorngRF.RFLearner(maxDepth = "20", minSample = "5", useSurrogates = "false", getVarVariance = "false", \
                                        nActVars = "0", nTrees = "100", forestAcc = "0.1", termCrit = "0")
        RFmodel = RFlearner(self.trainData)

        # Calculate classification accuracy
        Acc = evalUtilities.getClassificationAccuracy(self.testData, RFmodel)

        # Save the model
        scratchdir = os.path.join(AZOC.SCRATCHDIR,
                                  "scratchdirTest" + str(time.time()))
        os.mkdir(scratchdir)
        modelPath = os.path.join(scratchdir, "model.RF")
        RFmodel.write(modelPath)

        # Read in the model
        newRFmodel = AZorngRF.RFread(modelPath)

        # Calculate classification accuracy
        savedAcc = evalUtilities.getClassificationAccuracy(
            self.testData, newRFmodel)

        # Test that the accuracy of the two classifiers is the exact same
        self.assertEqual(Acc, savedAcc)

        #Check the priors saved in the model
        file = open(os.path.join(modelPath, "model.rf"), "r")
        lines = file.readlines()
        file.close()
        priors = [
            round(x, 2) for x in eval((lines[22].strip()).replace("data:", ""))
        ]
        self.assertEqual(len(priors), 2)
        self.assertEqual(
            priors[self.testData.domain.classVar.values.index("POS")], 0.50)
        self.assertEqual(
            priors[self.testData.domain.classVar.values.index("NEG")], 0.50)

        # Remove the scratch directory
        os.system("/bin/rm -rf " + scratchdir)
Exemple #5
0
        def TopVarImportanceTest(data, expectNone=False):
            resA = []
            resB = []
            RF = AZorngRF.RFLearner(data)

            for ex in data:
                resA.append(RF.getTopImportantVars(ex, 1))

            scratchdir = miscUtilities.createScratchDir(
                desc="TopVarImportanceTest")
            modelPath = os.path.join(scratchdir, "CvRFModel")
            RF.write(modelPath)
            LoadedRF = AZorngRF.RFread(modelPath)
            miscUtilities.removeDir(scratchdir)

            for ex in data:
                resB.append(LoadedRF.getTopImportantVars(ex, 1))
            if expectNone:
                return resA == resB == [None] * len(data)
            else:
                return resA == resB and None not in resA and resA.count(
                    resA[0]) != len(resA)
Exemple #6
0
    def test_BuiltIn_Impute(self):
        """Test RF BuiltIn missing values imputation
        Assure that imputation works for the rf models. Test on data with missing values
        """
        #This data is loaded here to speed up the test suite since it is too big
        contTestDataPath = os.path.join(AZOC.AZORANGEHOME,
                                        "tests/source/data/linearTest.tab")
        contTrainDataPath = os.path.join(AZOC.AZORANGEHOME,
                                         "tests/source/data/linearTrain.tab")
        contTrain = dataUtilities.DataTable(contTrainDataPath)
        contTest = dataUtilities.DataTable(contTestDataPath)

        ex1 = contTest[5]
        ex2 = contTest[2]
        AttrEx1 = "Desc 71"
        AttrEx2 = "Desc 72"
        self.assert_(ex1[AttrEx1] != "?",
                     "The var Desc 671 shouldn't be missing!")
        self.assert_(ex2[AttrEx2] != "?",
                     "The var Desc 138 shouldn't be missing!")

        imputer = orange.ImputerConstructor_average(contTrain)
        RFlearner = AZorngRF.RFLearner(NumThreads = 1, maxDepth = "20", minSample = "5", useSurrogates = "false", getVarVariance = "false", \
                                        nActVars = "0", nTrees = "100", forestAcc = "0.001", termCrit = "0",useBuiltInMissValHandling = True )
        rf = RFlearner(contTrain)

        # Prediction for data as it is
        P1 = rf(ex1)
        P2 = rf(ex2)

        # Predictions changing one continuous and one discrete variable to 0
        ex1[AttrEx1] = 0
        ex2[AttrEx2] = 0
        P1_0 = rf(ex1)
        P2_0 = rf(ex2)

        # Predictions changing the same continuous and discrete variable to it's correspondent imputation value
        #ex1["Desc 71"]=imputer.defaults["Desc 71"]
        #ex2["Desc 138"]=imputer.defaults["Desc 138"]
        #P1_imp=rf(ex1)
        #P2_imp=rf(ex2)

        # Predictions changing the same continuous and discrete variable to '?' wich means that the same imputation
        # as in the last case will have to be made inside the classifier. So, the predicted value must be the same
        ex1[AttrEx1] = "?"
        ex2[AttrEx2] = "?"
        self.assert_(ex1[AttrEx1] == "?",
                     "The var Desc 71 should be missing now!")
        self.assert_(ex2[AttrEx2] == "?",
                     "The var Desc 138 should be missing now!")
        P1Miss = rf(ex1)
        P2Miss = rf(ex2)

        # Test if the prediction made for the example with mising value is the same as the one
        # for the example which missing values were substituted using the same method as the classifier does.
        #self.assert_(P1_imp==P1Miss,"Imputation was not made correctly inside the classifier")
        #self.assert_(P2_imp==P2Miss,"Imputation was not made correctly inside the classifier")

        # Assure that if other substitutions on those variables were made, the predicted value would be different,
        # and so, this is a valid method for testing the imputation

        self.assert_(
            P1.value !=
            P2.value)  # Just to assure that we are not comaring equal examples
        self.assert_(
            P1.value != P1Miss.value,
            "The imputed 1 was the same as the original ... try other example")
        self.assert_(
            P1_0.value != P1Miss.value,
            "The imputed 1 was the same as the replaced by 0. The classifier may be replacing missing values by 0"
        )
        self.assert_(
            P2.value != P2Miss.value,
            "The missing imputed 2 was the same as the original ... try other example"
        )
        #self.assert_(P2_0.value!=P2Miss.value,"The missing imputed 2 was the same as the replaced by 0. The classifier may be replacing missing values by 0")

        self.assert_(rf.useBuiltInMissValHandling == True)
        #Test the imputer for saved models
        # Save the model
        scratchdir = os.path.join(AZOC.SCRATCHDIR,
                                  "scratchdirTest" + str(time.time()))
        os.mkdir(scratchdir)
        modelPath = os.path.join(scratchdir, "RFModel")
        rf.write(modelPath)

        # Read in the model
        rfM = AZorngRF.RFread(modelPath)
        self.assert_(rfM.useBuiltInMissValHandling == True)
        # Predict the ex1 and ex2 which are still the examples with missing values '?'
        self.assert_(ex1[AttrEx1] == "?",
                     "Value of Var Desc 6 should be missing!")
        self.assert_(ex2[AttrEx2] == "?",
                     "Value of Var Desc 71 should be missing!")
        self.assert_(
            rfM(ex1) == P1Miss, "Imputation on loaded model is not correct")
        self.assert_(
            rfM(ex2) == P2Miss, "Imputation on loaded model is not correct")
        # Remove the scratch directory
        os.system("/bin/rm -rf " + scratchdir)
Exemple #7
0
    for ex in data:
        mol = Chem.MolFromSmiles(ex["Smiles"].value)
        if mol:
            molList.append(mol)
        else:
            print ex["Smiles"].value
            print ex["Leonumber"].value
    fps = [FingerprintMols.FingerprintMol(x) for x in molList]  # Topological
    #fps = [AllChem.GetMorganFingerprint(x, 2) for x in molList]
    #print "Length of data and fp ", len(data), len(fps)
    return fps


THRS = 0.75

model = AZorngRF.RFread("OI_RFmodel")
predictor = AZOrangePredictor.AZOrangePredictor("OI_RFmodel")

train = dataUtilities.DataTable("BioActivityAZOdesc.txt")

# Calculate fingerprints for train and test sets
fps = getFps(train)

#smiles = test[idx]["Smiles"].value
smiles = "CC(C)n1c(/C=C/[C@H](O)C[C@H](O)CC(=O)O)c(-c2ccc(F)cc2)c2ccccc21"
smiles = "Cc1cc(=Nc2cc(CN3CCOCC3)c3nc(C)c(Cc4ccc(Cl)cc4F)n3n2)[nH][nH]1"  # Train set
#smiles = "Cc1nc2c(CN3CCOCC3)cc(NC3=CC(C)NN3)nn2c1Cc1ccc(Cl)cc1F"  # From Drawing - Wrong no tautomer
smiles = "Cc1cc(Nc2cc(CN3CCOCC3)c3nc(C)c(Cc4ccc(Cl)cc4F)n3n2)[nH]n1"  #From drawing of Galilei structure
#smiles = "Cc1cc(=Nc2cc(CN3CCOCC3)c3nc(C)c(Cc4ccc(Cl)cc4F)n3n2)[nH][nH]1" # Canonicalized from drawing in Galilei
cmd = "env -i HOME='$HOME' bash -l -c './cleanSmiles.sh " + '"' + smiles + '"' + "'"
print cmd
Exemple #8
0
def modelRead(modelFile=None, verbose=0, retrunClassifier=True):
    """Get the type of model saved in 'modelPath' and loads the respective model
       Returns the Classifier saved in the respective model path
       If called without parameters, it returns a list of known classifier types
       It can returns the classifier, or just a string with the Type

            modelRead (modelFile [, verbose = 0] [, retrunClassifier = True] )"""

    if not modelFile:
        return ("SignSVM", "CvSVM", "CvANN", "PLS", "CvRF", "CvBoost",
                "CvBayes", "Consensus")

    modelType = None
    loadedModel = None
    if os.path.isfile(os.path.join(modelFile, "model.svm")):
        modelType = "CvSVM"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngCvSVM
        loadedModel = AZorngCvSVM.CvSVMread(modelFile, verbose)
    elif os.path.isdir(os.path.join(modelFile, "model.SignSvm")):
        modelType = "SignSVM"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngSignSVM
        loadedModel = AZorngSignSVM.SignSVMread(modelFile, verbose)
    elif os.path.isfile(os.path.join(modelFile, "model.ann")):
        modelType = "CvANN"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngCvANN
        loadedModel = AZorngCvANN.CvANNread(modelFile, verbose)
    elif os.path.isfile(os.path.join(modelFile, "Model.pls")):
        modelType = "PLS"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngPLS
        loadedModel = AZorngPLS.PLSread(modelFile, verbose)
    elif os.path.isfile(os.path.join(modelFile, "model.rf")):
        modelType = "RF"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngRF
        loadedModel = AZorngRF.RFread(modelFile, verbose)
    elif os.path.isdir(os.path.join(modelFile, "C0.model")):
        modelType = "Consensus"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngConsensus
        loadedModel = AZorngConsensus.Consensusread(modelFile, verbose)
    elif os.path.isfile(os.path.join(modelFile, "model.boost")):
        modelType = "CvBoost"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngCvBoost
        loadedModel = AZorngCvBoost.CvBoostread(modelFile, verbose)
    elif os.path.isfile(os.path.join(modelFile, "model.bayes")):
        modelType = "CvBayes"
        if not retrunClassifier: return modelType
        from trainingMethods import AZorngCvBayes
        loadedModel = AZorngCvBayes.CvBayesread(modelFile, verbose)
    else:  # Assuming an RF old format for backcompatibility
        try:
            if os.path.isdir(modelFile):
                modelType = "RF"
                if not retrunClassifier: return modelType
                from trainingMethods import AZorngRF
                loadedModel = AZorngRF.RFread(modelFile, verbose)
            else:
                modelType = None
                loadedModel = None
        except:
            modelType = None
            loadedModel = None

    return loadedModel