def __init__(self, YList, X, featuresName, ages, args): super(MetabolomicsExpRunner, self).__init__(args=args) self.X = X self.YList = YList #The list of concentrations self.featuresName = featuresName self.args = args self.ages = ages self.maxDepth = 10 self.numTrees = 10 self.sampleSize = 1.0 self.sampleReplace = True self.folds = 5 self.resultsDir = PathDefaults.getOutputDir() + "metabolomics/" self.leafRankGenerators = [] self.leafRankGenerators.append((LinearSvmGS.generate(), "SVM")) self.leafRankGenerators.append((SvcGS.generate(), "RBF-SVM")) self.leafRankGenerators.append((DecisionTree.generate(), "CART")) self.pcaLeafRankGenerators = [(LinearSvmPca.generate(), "LinearSVM-PCA")] self.funcLeafRankGenerators = [] self.funcLeafRankGenerators.append((LinearSvmFGs.generate, "SVMF")) self.funcLeafRankGenerators.append((SvcFGs.generate, "RBF-SVMF")) self.funcLeafRankGenerators.append((DecisionTreeF.generate, "CARTF")) #Store all the label vectors and their missing values YIgf1Inds, YICortisolInds, YTestoInds = MetabolomicsUtils.createIndicatorLabels(YList) self.hormoneInds = [YIgf1Inds, YICortisolInds, YTestoInds] self.hormoneNames = MetabolomicsUtils.getLabelNames()
class DecisionTreeF(AbstractFunctionalPredictor): def __init__(self): super(DecisionTreeF, self).__init__() self.decisionTree = DecisionTree() def learnModel(self, X, y, folds=3): """ Train using the given examples and labels, however first conduct grid search in conjunction with cross validation to find the best parameters. We also conduct filtering with a variety of values. """ if self.waveletInds == None: self.waveletInds = numpy.arange(X.shape[1]) nonWaveletInds = numpy.setdiff1d(numpy.arange(X.shape[1]), self.waveletInds) Xw = X[:, self.waveletInds] Xo = X[:, nonWaveletInds] featureInds = numpy.flipud(numpy.argsort(numpy.sum(Xw**2, 0))) meanAUCs = numpy.zeros(self.candidatesN.shape[0]) stdAUCs = numpy.zeros(self.candidatesN.shape[0]) #Standardise the data Xw = Standardiser().standardiseArray(Xw) Xo = Standardiser().standardiseArray(Xo) for i in range(self.candidatesN.shape[0]): newX = numpy.c_[Xw[:, featureInds[0:self.candidatesN[i]]], Xo] meanAUCs[i], stdAUCs[i] = self.decisionTree.evaluateStratifiedCv(newX, y, folds, metricMethod=Evaluator.auc) bestI = numpy.argmax(meanAUCs) self.featureInds = numpy.r_[self.waveletInds[featureInds[0:self.candidatesN[bestI]]], nonWaveletInds] logging.debug("Best learner found: " + str(self.decisionTree) + " N:" + str(self.candidatesN[bestI])) self.standardiser = Standardiser() newX = self.standardiser.standardiseArray(X[:, self.featureInds]) self.decisionTree.learnModel(newX, y) def predict(self, X): newX = self.standardiser.standardiseArray(X[:, self.featureInds]) return self.decisionTree.predict(newX) @staticmethod def generate(waveletInds=None): """ Generate a classifier which does a grid search. """ def generatorFunc(): decisionTree = DecisionTreeF() decisionTree.setWaveletInds(waveletInds) return decisionTree return generatorFunc def setWeight(self, weight): self.decisionTree.setWeight(weight)
def __init__(self): super(DecisionTreeF, self).__init__() self.decisionTree = DecisionTree()