def __init__(self, YList, X, featuresName, ages, args):
        super(MetabolomicsExpRunner, self).__init__(args=args)
        self.X = X
        self.YList = YList #The list of concentrations 
        self.featuresName = featuresName
        self.args = args
        self.ages = ages 

        self.maxDepth = 10
        self.numTrees = 10
        self.sampleSize = 1.0
        self.sampleReplace = True
        self.folds = 5
        self.resultsDir = PathDefaults.getOutputDir() + "metabolomics/"

        self.leafRankGenerators = []
        self.leafRankGenerators.append((LinearSvmGS.generate(), "SVM"))
        self.leafRankGenerators.append((SvcGS.generate(), "RBF-SVM"))
        self.leafRankGenerators.append((DecisionTree.generate(), "CART"))

        self.pcaLeafRankGenerators = [(LinearSvmPca.generate(), "LinearSVM-PCA")]

        self.funcLeafRankGenerators = []
        self.funcLeafRankGenerators.append((LinearSvmFGs.generate, "SVMF"))
        self.funcLeafRankGenerators.append((SvcFGs.generate, "RBF-SVMF"))
        self.funcLeafRankGenerators.append((DecisionTreeF.generate, "CARTF"))

        #Store all the label vectors and their missing values
        YIgf1Inds, YICortisolInds, YTestoInds = MetabolomicsUtils.createIndicatorLabels(YList)
        self.hormoneInds = [YIgf1Inds, YICortisolInds, YTestoInds]
        self.hormoneNames = MetabolomicsUtils.getLabelNames()
Beispiel #2
0
class DecisionTreeF(AbstractFunctionalPredictor):
    def __init__(self):
        super(DecisionTreeF, self).__init__()
        self.decisionTree = DecisionTree()

    def learnModel(self, X, y, folds=3):
        """
        Train using the given examples and labels, however first conduct grid
        search in conjunction with cross validation to find the best parameters.
        We also conduct filtering with a variety of values.
        """
        if self.waveletInds == None:
            self.waveletInds = numpy.arange(X.shape[1])

        nonWaveletInds = numpy.setdiff1d(numpy.arange(X.shape[1]),  self.waveletInds)

        Xw = X[:, self.waveletInds]
        Xo = X[:, nonWaveletInds]

        featureInds = numpy.flipud(numpy.argsort(numpy.sum(Xw**2, 0)))
        meanAUCs = numpy.zeros(self.candidatesN.shape[0])
        stdAUCs = numpy.zeros(self.candidatesN.shape[0])

        #Standardise the data
        Xw = Standardiser().standardiseArray(Xw)
        Xo = Standardiser().standardiseArray(Xo)

        for i in range(self.candidatesN.shape[0]):
            newX = numpy.c_[Xw[:, featureInds[0:self.candidatesN[i]]], Xo]
            meanAUCs[i], stdAUCs[i] = self.decisionTree.evaluateStratifiedCv(newX, y, folds, metricMethod=Evaluator.auc)

        bestI = numpy.argmax(meanAUCs)
        self.featureInds = numpy.r_[self.waveletInds[featureInds[0:self.candidatesN[bestI]]], nonWaveletInds]
        logging.debug("Best learner found: " + str(self.decisionTree) + " N:" + str(self.candidatesN[bestI]))

        self.standardiser = Standardiser()
        newX = self.standardiser.standardiseArray(X[:, self.featureInds])
        self.decisionTree.learnModel(newX, y)

    def predict(self, X):
        newX = self.standardiser.standardiseArray(X[:, self.featureInds])
        return self.decisionTree.predict(newX)

    @staticmethod
    def generate(waveletInds=None):
        """
        Generate a classifier which does a grid search.
        """
        def generatorFunc():
            decisionTree = DecisionTreeF()
            decisionTree.setWaveletInds(waveletInds)
            return decisionTree
        return generatorFunc

    def setWeight(self, weight):
        self.decisionTree.setWeight(weight)
Beispiel #3
0
 def __init__(self):
     super(DecisionTreeF, self).__init__()
     self.decisionTree = DecisionTree()