def predict(self, X): """ Make a prediction for a set of examples given as the rows of the matrix X. :param X: A matrix with examples as rows :type X: :class:`ndarray` :return: A vector of scores corresponding to each example. """ Parameter.checkClass(X, numpy.ndarray) Parameter.checkArray(X) scores = numpy.zeros(X.shape[0]) root = self.tree.getVertex((0, 0)) root.setTestInds(numpy.arange(X.shape[0])) #We go down the tree making predictions at each stage for d in range(self.maxDepth+1): for k in range(2**d): if self.tree.vertexExists((d, k)): self.classifyNode(self.tree, X, d, k) node = self.tree.getVertex((d,k)) if node.isLeafNode(): inds = node.getTestInds() scores[inds] = node.getScore() return scores
def learnModel(self, X, Y): Parameter.checkClass(X, numpy.ndarray) Parameter.checkClass(Y, numpy.ndarray) Parameter.checkArray(X) Parameter.checkArray(Y) if numpy.unique(Y).shape[0] < 2: raise ValueError("Vector of labels must be binary, currently numpy.unique(Y) = " + str(numpy.unique(Y))) #If Y is 1D make it 2D if Y.ndim == 1: Y = numpy.array([Y]).T XY = self._getDataFrame(X, Y) formula = robjects.Formula('class ~ .') self.learnModelDataFrame(formula, XY) gc.collect() robjects.r('gc(verbose=TRUE)') robjects.r('memory.profile()') gc.collect() if self.printMemStats: logging.debug(self.getLsos()()) logging.debug(ProfileUtils.memDisplay(locals()))
def learnModel(self, X, Y): Parameter.checkClass(X, numpy.ndarray) Parameter.checkClass(Y, numpy.ndarray) Parameter.checkArray(X) Parameter.checkArray(Y) if numpy.unique(Y).shape[0] < 2: raise ValueError( "Vector of labels must be binary, currently numpy.unique(Y) = " + str(numpy.unique(Y))) #If Y is 1D make it 2D if Y.ndim == 1: Y = numpy.array([Y]).T XY = self._getDataFrame(X, Y) formula = robjects.Formula('class ~ .') self.learnModelDataFrame(formula, XY) gc.collect() robjects.r('gc(verbose=TRUE)') robjects.r('memory.profile()') gc.collect() if self.printMemStats: logging.debug(self.getLsos()()) logging.debug(ProfileUtils.memDisplay(locals()))
def evaluateLearn(X, y, idx, learnModel, predict, metricMethod, progress=True): """ Evaluate this learning algorithm using the given list of training/test splits The metricMethod is a method which takes (predictedY, realY) as input and returns a metric about the quality of the evaluation. :param X: A matrix with examples as rows :type X: :class:`ndarray` :param y: A vector of labels :type y: :class:`ndarray` :param idx: A list of training/test splits :type idx: :class:`list` :param learnModel: A function such that learnModel(X, y) finds a mapping from X to y :type learnModel: :class:`function` :param predict: A function such that predict(X) makes predictions for X :type predict: :class:`function` :param metricMethod: A function such that metricMethod(predY, testY) returns the quality of predicted labels predY :type metricMethod: :class:`function` Output: the mean and variation of the cross validation folds. """ #Parameter.checkClass(idx, list) Parameter.checkClass(X, numpy.ndarray) Parameter.checkArray(X, softCheck=True) Parameter.checkInt(X.shape[0], 1, float('inf')) Parameter.checkClass(y, numpy.ndarray) Parameter.checkArray(y, softCheck=True) if y.ndim != 1: raise ValueError("Dimention of y must be 1") i = 0 metrics = numpy.zeros(len(idx)) logging.debug("EvaluateLearn: Using " + str(len(idx)) + " splits on " + str(X.shape[0]) + " examples") for idxtr, idxts in idx: if progress: Util.printConciseIteration(i, 1, len(idx)) trainX, testX = X[idxtr, :], X[idxts, :] trainY, testY = y[idxtr], y[idxts] #logging.debug("Distribution of labels in evaluateLearn train: " + str(numpy.bincount(trainY))) #logging.debug("Distribution of labels in evaluateLearn test: " + str(numpy.bincount(testY))) learnModel(trainX, trainY) predY = predict(testX) gc.collect() metrics[i] = metricMethod(predY, testY) i += 1 return metrics
def learnModel(self, X, y): """ Learn a model for a set of examples given as the rows of the matrix X, with corresponding labels given in the elements of 1D array y. :param X: A matrix with examples as rows :type X: :class:`ndarray` :param y: A vector of labels :type y: :class:`ndarray` """ Parameter.checkClass(X, numpy.ndarray) Parameter.checkClass(y, numpy.ndarray) Parameter.checkArray(X) Parameter.checkArray(y) labels = numpy.unique(y) if labels.shape[0] != 2: raise ValueError("Can only accept binary labelled data") if (labels != numpy.array([-1, 1])).any(): raise ValueError("Labels must be -1/+1: " + str(labels)) forestList = [] indList = [] numSampledExamples = int(numpy.round(self.sampleSize * X.shape[0])) for i in range(self.numTrees): Util.printConciseIteration(i, 1, self.numTrees, "Tree: ") if self.sampleReplace: inds = numpy.random.randint(0, X.shape[0], numSampledExamples) else: inds = numpy.random.permutation( X.shape[0])[0:numSampledExamples] treeRank = TreeRank(self.leafRanklearner) treeRank.setMaxDepth(self.maxDepth) treeRank.setMinSplit(self.minSplit) treeRank.setFeatureSize(self.featureSize) treeRank.setBestResponse(self.bestResponse) treeRank.learnModel(X[inds, :], y[inds]) forestList.append(treeRank) indList.append(inds) self.forestList = forestList self.indList = indList
def learnModel(self, X, y): """ Learn a model for a set of examples given as the rows of the matrix X, with corresponding labels given in the elements of 1D array y. :param X: A matrix with examples as rows :type X: :class:`ndarray` :param y: A vector of labels :type y: :class:`ndarray` """ Parameter.checkClass(X, numpy.ndarray) Parameter.checkClass(y, numpy.ndarray) Parameter.checkArray(X) Parameter.checkArray(y) labels = numpy.unique(y) if labels.shape[0] != 2: raise ValueError("Can only accept binary labelled data") if (labels != numpy.array([-1, 1])).any(): raise ValueError("Labels must be -1/+1: " + str(labels)) forestList = [] indList = [] numSampledExamples = int(numpy.round(self.sampleSize*X.shape[0])) for i in range(self.numTrees): Util.printConciseIteration(i, 1, self.numTrees, "Tree: ") if self.sampleReplace: inds = numpy.random.randint(0, X.shape[0], numSampledExamples) else: inds = numpy.random.permutation(X.shape[0])[0:numSampledExamples] treeRank = TreeRank(self.leafRanklearner) treeRank.setMaxDepth(self.maxDepth) treeRank.setMinSplit(self.minSplit) treeRank.setFeatureSize(self.featureSize) treeRank.setBestResponse(self.bestResponse) treeRank.learnModel(X[inds, :], y[inds]) forestList.append(treeRank) indList.append(inds) self.forestList = forestList self.indList = indList
def learnModel(self, X, Y): """ Learn a model for a set of examples given as the rows of the matrix X, with corresponding labels given in the elements of 1D array Y. :param X: A matrix with examples as rows :type X: :class:`ndarray` :param Y: A vector of binary labels as a 1D array :type Y: :class:`ndarray` """ Parameter.checkClass(X, numpy.ndarray) Parameter.checkClass(Y, numpy.ndarray) Parameter.checkArray(X) Parameter.checkArray(Y) labels = numpy.unique(Y) if labels.shape[0] != 2: raise ValueError("Can only accept binary labelled data: " + str(labels)) if (labels != numpy.array([-1, 1])).any(): raise ValueError("Labels must be -1/+1: " + str(labels)) if self.featureSize == None: featureSize = numpy.sqrt(X.shape[1])/float(X.shape[1]) else: featureSize = self.featureSize tree = DictTree() trainInds = numpy.arange(Y.shape[0]) featureInds = numpy.sort(numpy.random.permutation(X.shape[1])[0:int(numpy.round(X.shape[1]*featureSize))]) #Seed the tree node = RankNode(trainInds, featureInds) tree.setVertex((0, 0), node) for d in range(self.maxDepth): for k in range(2**d): if tree.vertexExists((d, k)): node = tree.getVertex((d, k)) if not node.isPure() and not node.isLeafNode(): self.splitNode(tree, X, Y, d, k) self.tree = tree
def predict(self, X): """ Make a prediction for a set of examples given as the rows of the matrix X. The set of scores is the mean over all the trees in the forest. :param X: A matrix with examples as rows :type X: :class:`ndarray` :return: A vector of scores corresponding to each example. """ Parameter.checkClass(X, numpy.ndarray) Parameter.checkArray(X) scores = numpy.zeros(X.shape[0]) for i in range(self.numTrees): scores += self.forestList[i].predict(X) scores = scores/self.numTrees return scores
def predict(self, X): """ Make a prediction for a set of examples given as the rows of the matrix X. The set of scores is the mean over all the trees in the forest. :param X: A matrix with examples as rows :type X: :class:`ndarray` :return: A vector of scores corresponding to each example. """ Parameter.checkClass(X, numpy.ndarray) Parameter.checkArray(X) scores = numpy.zeros(X.shape[0]) for i in range(self.numTrees): scores += self.forestList[i].predict(X) scores = scores / self.numTrees return scores
def evaluateLearn2(X, Y, indexList, learnModel, predict, metricMethods): """ Evaluate a learner given functions (learnModel, predict) and save metrics on the training and test sets given by metric methods. #Could combine this with evaluateLearn """ Parameter.checkClass(X, numpy.ndarray) Parameter.checkArray(X, softCheck=True) Parameter.checkClass(Y, numpy.ndarray) Parameter.checkArray(Y, softCheck=True) if Y.ndim != 1: raise ValueError("Expecting Y to be 1D") trainMetrics = [] testMetrics = [] for i in range(len(metricMethods)): trainMetrics.append([]) testMetrics.append([]) for trainInds, testInds in indexList: trainX, trainY = X[trainInds, :], Y[trainInds] testX, testY = X[testInds, :], Y[testInds] learnModel(trainX, trainY) predTrainY = predict(trainX) predTestY = predict(testX) #Now compute all metrics i = 0 for metricMethod in metricMethods: trainMetrics[i].append(metricMethod(trainY, predTrainY)) testMetrics[i].append(metricMethod(testY, predTestY)) i += 1 gc.collect() logging.debug("All done") return trainMetrics, testMetrics
def svd_from_eigh(A, eps=10 ** -8, tol=10 ** -8): """ Find the SVD of an ill conditioned matrix A. This uses numpy.linalg.eig but conditions the matrix so is not as precise as numpy.linalg.svd, but can be useful if svd does not coverge. Uses the eigenvectors of A^T*A and return singular vectors corresponding to nonzero singular values. Note: This is slightly different to linalg.svd which returns zero singular values. """ AA = A.conj().T.dot(A) lmbda, Q = scipy.linalg.eigh(AA + eps * numpy.eye(A.shape[1])) lmbda = lmbda - eps inds = numpy.arange(lmbda.shape[0])[lmbda > tol] lmbda, Q = Util.indEig(lmbda, Q, inds) sigma = lmbda ** 0.5 P = A.dot(Q) / sigma Qh = Q.conj().T if __debug__: if not scipy.allclose(A, (P * sigma).dot(Qh), atol=tol): logging.warn(" SVD obtained from EVD is too poor") Parameter.checkArray(P, softCheck=True, arrayInfo="P in svd_from_eigh()") if not Parameter.checkOrthogonal( P, tol=tol, softCheck=True, arrayInfo="P in svd_from_eigh()", investigate=True ): print("corresponding sigma: ", sigma) Parameter.checkArray(sigma, softCheck=True, arrayInfo="sigma in svd_from_eigh()") Parameter.checkArray(Qh, softCheck=True, arrayInfo="Qh in svd_from_eigh()") if not Parameter.checkOrthogonal(Qh.conj().T, tol=tol, softCheck=True, arrayInfo="Qh.H in svd_from_eigh()"): print("corresponding sigma: ", sigma) return P, sigma, Qh
def evaluateLearners(X, Y, indexList, splitFunction, learnerIterator, metricMethods, progress=True): """ Perform model selection and output an average metric over a number of train/test splits as defined by idx. Finds the *minimum* model according to the evaluation of the predicted labels with metricMethods[0]. The variable metricMethods is a list of functions to call metricMethod(predY, trueY) of which the first is used in model selection. """ Parameter.checkClass(X, numpy.ndarray) Parameter.checkArray(X, softCheck=True) Parameter.checkClass(Y, numpy.ndarray) Parameter.checkArray(Y, softCheck=True) if Y.ndim != 1: raise ValueError("Expecting Y to be 1D") i = 0 mainMetricMethod = metricMethods[0] bestLearners = [] allMetrics = [] for trainInds, testInds in indexList: trainX = X[trainInds, :] trainY = Y[trainInds] testX = X[testInds, :] testY = Y[testInds] minMetric = float('inf') for learner in learnerIterator: logging.debug("Learning with " + str(learner)) idx = splitFunction(trainX, trainY) metrics = AbstractPredictor.evaluateLearn( trainX, trainY, idx, learner.learnModel, learner.predict, mainMetricMethod, progress) meanMetric = numpy.mean(metrics) stdMetric = numpy.std(metrics) if meanMetric < minMetric: bestLearner = learner minMetric = meanMetric #Try to get some memory back gc.collect() bestLearner.learnModel(trainX, trainY) predY = bestLearner.predict(testX) bestLearners.append(bestLearner) #Now compute all metrics currentMetrics = [] for metricMethod in metricMethods: currentMetrics.append(metricMethod(predY, testY)) allMetrics.append(currentMetrics) logging.debug("Outer metric(s): " + str(currentMetrics)) i += 1 for i in range(len(allMetrics)): logging.debug("Learner = " + str(bestLearners[i]) + " error= " + str(allMetrics[i])) logging.debug("All done") return allMetrics, bestLearners
def eigenAdd2(omega, Q, Y1, Y2, k, debug= False): """ Compute an approximation of the eigendecomposition A^*A + Y1Y2^* +Y2Y1^* in which Y1, Y2 are low rank matrices, Y1^*Y2=0 and A^*A = Q Omega Q*. We use the rank-k approximation of A^*A: Q_k Omega_k Q_k^* and then find [A^*A_k + Y1Y2^* + Y2Y1^*]. If debug=False then pi, V are returned which respectively correspond to all the eigenvalues/eigenvectors of [A^*A_k + Y1Y2^* + Y2Y1^*]. """ #logging.debug("< eigenAdd2 >") Parameter.checkInt(k, 0, float('inf')) Parameter.checkClass(omega, numpy.ndarray) Parameter.checkClass(Q, numpy.ndarray) Parameter.checkClass(Y1, numpy.ndarray) Parameter.checkClass(Y2, numpy.ndarray) if not numpy.isrealobj(omega) or not numpy.isrealobj(Q): logging.warn("Eigenvalues or eigenvectors are not real") if not numpy.isrealobj(Y1) or not numpy.isrealobj(Y2): logging.warn("Y1 or Y2 are not real") if omega.ndim != 1: raise ValueError("omega must be 1-d array") if omega.shape[0] != Q.shape[1]: raise ValueError("Must have same number of eigenvalues and eigenvectors") if Q.shape[0] != Y1.shape[0]: raise ValueError("Q must have the same number of rows as Y1 rows") if Q.shape[0] != Y2.shape[0]: raise ValueError("Q must have the same number of rows as Y2 rows") if Y1.shape[1] != Y2.shape[1]: raise ValueError("Y1 must have the same number of columns as Y2 columns") if __debug__: Parameter.checkArray(omega, softCheck=True, arrayInfo="omega as input in eigenAdd2()") Parameter.checkArray(Q, softCheck=True, arrayInfo="Q as input in eigenAdd2()") Parameter.checkOrthogonal(Q, tol=EigenUpdater.tol, softCheck=True, arrayInfo="Q as input in eigenAdd2()") Parameter.checkArray(Y1, softCheck=True, arrayInfo="Y1 as input in eigenAdd2()") Parameter.checkArray(Y2, softCheck=True, arrayInfo="Y2 as input in eigenAdd2()") #Get first k eigenvectors/values of A^*A omega, Q = Util.indEig(omega, Q, numpy.flipud(numpy.argsort(omega))[0:k]) QY1 = Q.conj().T.dot(Y1) Y1bar = Y1 - Q.dot(QY1) P1bar, sigma1Bar, Q1bar = Util.safeSvd(Y1bar) inds = numpy.arange(sigma1Bar.shape[0])[numpy.abs(sigma1Bar)>EigenUpdater.tol] P1bar, sigma1Bar, Q1bar = Util.indSvd(P1bar, sigma1Bar, Q1bar, inds) # checks on SVD decomposition of Y1bar if __debug__: Parameter.checkArray(QY1, softCheck=True, arrayInfo="QY1 in eigenAdd2()") Parameter.checkArray(Y1bar, softCheck=True, arrayInfo="Y1bar in eigenAdd2()") Parameter.checkArray(P1bar, softCheck=True, arrayInfo="P1bar in eigenAdd2()") if not Parameter.checkOrthogonal(P1bar, tol=EigenUpdater.tol, softCheck=True, arrayInfo="P1bar in eigenAdd2()", investigate=True): print ("corresponding sigma: ", sigma1Bar) Parameter.checkArray(sigma1Bar, softCheck=True, arrayInfo="sigma1Bar in eigenAdd2()") Parameter.checkArray(Q1bar, softCheck=True, arrayInfo="Q1bar in eigenAdd2()") if not Parameter.checkOrthogonal(Q1bar, tol=EigenUpdater.tol, softCheck=True, arrayInfo="Q1bar in eigenAdd2()"): print ("corresponding sigma: ", sigma1Bar) del Y1bar P1barY2 = P1bar.conj().T.dot(Y2) QY2 = Q.conj().T.dot(Y2) Y2bar = Y2 - Q.dot(QY2) - P1bar.dot(P1barY2) P2bar, sigma2Bar, Q2bar = Util.safeSvd(Y2bar) inds = numpy.arange(sigma2Bar.shape[0])[numpy.abs(sigma2Bar)>EigenUpdater.tol] P2bar, sigma2Bar, Q2bar = Util.indSvd(P2bar, sigma2Bar, Q2bar, inds) # checks on SVD decomposition of Y1bar if __debug__: Parameter.checkArray(P1barY2, softCheck=True, arrayInfo="P1barY2 in eigenAdd2()") Parameter.checkArray(QY2, softCheck=True, arrayInfo="QY2 in eigenAdd2()") Parameter.checkArray(Y2bar, softCheck=True, arrayInfo="Y2bar in eigenAdd2()") Parameter.checkArray(P2bar, softCheck=True, arrayInfo="P2bar in eigenAdd2()") Parameter.checkOrthogonal(P2bar, tol=EigenUpdater.tol, softCheck=True, arrayInfo="P2bar in eigenAdd2()") Parameter.checkArray(sigma2Bar, softCheck=True, arrayInfo="sigma2Bar in eigenAdd2()") Parameter.checkArray(Q2bar, softCheck=True, arrayInfo="Q2bar in eigenAdd2()") Parameter.checkOrthogonal(Q2bar, tol=EigenUpdater.tol, softCheck=True, arrayInfo="Q2bar in eigenAdd2()") del Y2bar r = omega.shape[0] p = Y1.shape[1] p1 = sigma1Bar.shape[0] p2 = sigma2Bar.shape[0] D = numpy.c_[Q, P1bar, P2bar] del P1bar del P2bar # rem: A*s = A.dot(diag(s)) ; A*s[:,new] = diag(s).dot(A) DStarY1 = numpy.r_[QY1, sigma1Bar[:,numpy.newaxis] * Q1bar.conj().T, numpy.zeros((p2, p))] DStarY2 = numpy.r_[QY2, P1barY2, sigma2Bar[:,numpy.newaxis] * Q2bar.conj().T] DStarY1Y2StarD = DStarY1.dot(DStarY2.conj().T) del DStarY1 del DStarY2 r = omega.shape[0] F = numpy.zeros((r+p1+p2, r+p1+p2)) F[range(r),range(r)] = omega F = F + DStarY1Y2StarD + DStarY1Y2StarD.conj().T #A check to make sure DFD^T is AA_k + Y1Y2 + Y2Y1 #assert numpy.linalg.norm(D.dot(F).dot(D.T) - Q.dot(numpy.diag(omega).dot(Q.T)) - Y1.dot(Y2.T) - Y2.dot(Y1.T)) < 10**-6 # checks on F if __debug__: #Parameter.checkArray(DStarY1, softCheck=True, arrayInfo="DStarY1 in eigenAdd2()") #Parameter.checkArray(DStarY2, softCheck=True, arrayInfo="DStarY2 in eigenAdd2()") Parameter.checkArray(DStarY1Y2StarD, softCheck=True, arrayInfo="DStarY1Y2StarD in eigenAdd2()") Parameter.checkArray(F, softCheck=True, arrayInfo="F in eigenAdd2()") Parameter.checkSymmetric(F, tol=EigenUpdater.tol, softCheck=True, arrayInfo="F in eigenAdd2()") pi, H = scipy.linalg.eigh(F) # remove too small eigenvalues pi, H = Util.indEig(pi, H, numpy.arange(pi.shape[0])[numpy.abs(pi)>EigenUpdater.tol]) # keep greatest eigenvalues #pi, H = Util.indEig(pi, H, numpy.flipud(numpy.argsort(pi))[:min(k,pi.shape[0])]) V = D.dot(H) if __debug__: if not Parameter.checkOrthogonal(D, tol=EigenUpdater.tol, softCheck=True, investigate=True, arrayInfo="D in eigenAdd2()"): print("pi:\n", pi) if not Parameter.checkOrthogonal(H, tol=EigenUpdater.tol, softCheck=True, investigate=True, arrayInfo="H in eigenAdd2()"): print("pi:\n", pi) if ProfileUtils.memory() > 10**9: ProfileUtils.memDisplay(locals()) #logging.debug("</ eigenAdd2 >") if debug: return pi, V, D, DStarY1Y2StarD + DStarY1Y2StarD.conj().T else: return pi, V
def safeSvd(A, eps=10 ** -8, tol=10 ** -8): """ Compute the SVD of a matrix using scipy.linalg.svd, and if convergence fails revert to Util.svd. """ # check input matrix if __debug__: if not Parameter.checkArray(A, softCheck=True): logging.info("... in Util.safeSvd") try: # run scipy.linalg.svd try: P, sigma, Qh = scipy.linalg.svd(A, full_matrices=False) except scipy.linalg.LinAlgError as e: logging.warn(str(e)) raise Exception("SVD decomposition has to be computed from EVD decomposition") # --- only when the SVD decomposition comes from scipy.linalg.svd --- # clean output singular values (sometimes scipy.linalg.svd returns NaN or negative singular values, let's remove them) inds = numpy.arange(sigma.shape[0])[sigma > tol] if inds.shape[0] < sigma.shape[0]: P, sigma, Q = Util.indSvd(P, sigma, Qh, inds) Qh = Q.conj().T # an expensive check but we really need it # rem: A*s = A.dot(diag(s)) ; A*s[:,new] = diag(s).dot(A) if not scipy.allclose(A, (P * sigma).dot(Qh)): logging.warn( " After cleaning singular values from scipy.linalg.svd, the SVD decomposition is too far from the original matrix" ) # numpy.savez("matrix_leading_to_bad_SVD.npz", A) raise Exception("SVD decomposition has to be computed from EVD decomposition") # check scipy.linalg.svd output matrices (expensive) if __debug__: badAnswerFromScipySvd = False if not Parameter.checkArray(P, softCheck=True, arrayInfo="P in Util.safeSvd()"): badAnswerFromScipySvd = True if not Parameter.checkArray(sigma, softCheck=True, arrayInfo="sigma in Util.safeSvd()"): badAnswerFromScipySvd = True if not Parameter.checkArray(Qh, softCheck=True, arrayInfo="Qh in Util.safeSvd()"): badAnswerFromScipySvd = True if badAnswerFromScipySvd: logging.warn( " After cleaning singular values from scipy.linalg.svd, the SVD decomposition still contains 'NaN', 'inf' or complex values" ) raise Exception("SVD decomposition has to be computed from EVD decomposition") except Exception as inst: if inst.args != ("SVD decomposition has to be computed from EVD decomposition",): raise logging.warn(" Using EVD method to compute the SVD.") P, sigma, Qh = Util.svd(A, eps, tol) # check Util.svd output matrices (expensive) if __debug__: badAnswerFromUtilSvd = False if not Parameter.checkArray(P, softCheck=True): logging.info("... in P in Util.safeSvd") badAnswerFromUtilSvd = True # print nan_rows in P: numpy.isnan(P).sum(0).nonzero() if not Parameter.checkArray(sigma, softCheck=True): logging.info("... in sigma in Util.safeSvd") badAnswerFromUtilSvd = True # print numpy.isnan(sigma).nonzero() if not Parameter.checkArray(Qh, softCheck=True): logging.info("... in Q in Util.safeSvd") badAnswerFromUtilSvd = True # blop = numpy.isnan(Qh).sum(1) # print blop.nonzero() # print blop[blop.nonzero()] if badAnswerFromUtilSvd: logging.warn( " SVD decomposition obtained from EVD decomposition contains 'NaN', 'inf' or real values" ) from sandbox.util.ProfileUtils import ProfileUtils if ProfileUtils.memory() > 10 ** 9: ProfileUtils.memDisplay(locals()) return P, sigma, Qh
def evaluateLearners(X, Y, indexList, splitFunction, learnerIterator, metricMethods, progress=True): """ Perform model selection and output an average metric over a number of train/test splits as defined by idx. Finds the *minimum* model according to the evaluation of the predicted labels with metricMethods[0]. The variable metricMethods is a list of functions to call metricMethod(predY, trueY) of which the first is used in model selection. """ Parameter.checkClass(X, numpy.ndarray) Parameter.checkArray(X, softCheck=True) Parameter.checkClass(Y, numpy.ndarray) Parameter.checkArray(Y, softCheck=True) if Y.ndim != 1: raise ValueError("Expecting Y to be 1D") i = 0 mainMetricMethod = metricMethods[0] bestLearners = [] allMetrics = [] for trainInds, testInds in indexList: trainX = X[trainInds, :] trainY = Y[trainInds] testX = X[testInds, :] testY = Y[testInds] minMetric = float('inf') for learner in learnerIterator: logging.debug("Learning with " + str(learner)) idx = splitFunction(trainX, trainY) metrics = AbstractPredictor.evaluateLearn(trainX, trainY, idx, learner.learnModel, learner.predict, mainMetricMethod, progress) meanMetric = numpy.mean(metrics) stdMetric = numpy.std(metrics) if meanMetric < minMetric: bestLearner = learner minMetric = meanMetric #Try to get some memory back gc.collect() bestLearner.learnModel(trainX, trainY) predY = bestLearner.predict(testX) bestLearners.append(bestLearner) #Now compute all metrics currentMetrics = [] for metricMethod in metricMethods: currentMetrics.append(metricMethod(predY, testY)) allMetrics.append(currentMetrics) logging.debug("Outer metric(s): " + str(currentMetrics)) i += 1 for i in range(len(allMetrics)): logging.debug("Learner = " + str(bestLearners[i]) + " error= " + str(allMetrics[i])) logging.debug("All done") return allMetrics, bestLearners