예제 #1
0
    def predict(self, X):
        """
        Make a prediction for a set of examples given as the rows of the matrix X.

        :param X: A matrix with examples as rows
        :type X: :class:`ndarray`

        :return: A vector of scores corresponding to each example. 
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkArray(X)

        scores = numpy.zeros(X.shape[0])
        root = self.tree.getVertex((0, 0))
        root.setTestInds(numpy.arange(X.shape[0]))

        #We go down the tree making predictions at each stage 
        for d in range(self.maxDepth+1):
            for k in range(2**d):
                if self.tree.vertexExists((d, k)):
                    self.classifyNode(self.tree, X, d, k)

                    node = self.tree.getVertex((d,k))
                    if node.isLeafNode():
                        inds = node.getTestInds()
                        scores[inds] = node.getScore()

        return scores 
예제 #2
0
    def learnModel(self, X, Y):
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(Y, numpy.ndarray)
        Parameter.checkArray(X)
        Parameter.checkArray(Y)

        if numpy.unique(Y).shape[0] < 2:
            raise ValueError("Vector of labels must be binary, currently numpy.unique(Y) = " + str(numpy.unique(Y)))

        #If Y is 1D make it 2D
        if Y.ndim == 1:
            Y = numpy.array([Y]).T
        
        XY = self._getDataFrame(X, Y)
        formula = robjects.Formula('class ~ .')
        self.learnModelDataFrame(formula, XY)

        gc.collect()
        robjects.r('gc(verbose=TRUE)')
        robjects.r('memory.profile()')
        gc.collect()

        if self.printMemStats:
            logging.debug(self.getLsos()())
            logging.debug(ProfileUtils.memDisplay(locals()))
예제 #3
0
    def learnModel(self, X, Y):
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(Y, numpy.ndarray)
        Parameter.checkArray(X)
        Parameter.checkArray(Y)

        if numpy.unique(Y).shape[0] < 2:
            raise ValueError(
                "Vector of labels must be binary, currently numpy.unique(Y) = "
                + str(numpy.unique(Y)))

        #If Y is 1D make it 2D
        if Y.ndim == 1:
            Y = numpy.array([Y]).T

        XY = self._getDataFrame(X, Y)
        formula = robjects.Formula('class ~ .')
        self.learnModelDataFrame(formula, XY)

        gc.collect()
        robjects.r('gc(verbose=TRUE)')
        robjects.r('memory.profile()')
        gc.collect()

        if self.printMemStats:
            logging.debug(self.getLsos()())
            logging.debug(ProfileUtils.memDisplay(locals()))
예제 #4
0
    def evaluateLearn(X, y, idx, learnModel, predict, metricMethod, progress=True):
        """
        Evaluate this learning algorithm using the given list of training/test splits 
        The metricMethod is a method which takes (predictedY, realY) as input
        and returns a metric about the quality of the evaluation.

        :param X: A matrix with examples as rows 
        :type X: :class:`ndarray`

        :param y: A vector of labels 
        :type y: :class:`ndarray`

        :param idx: A list of training/test splits 
        :type idx: :class:`list`

        :param learnModel: A function such that learnModel(X, y) finds a mapping from X to y 
        :type learnModel: :class:`function`

        :param predict: A function such that predict(X) makes predictions for X
        :type predict: :class:`function`

        :param metricMethod: A function such that metricMethod(predY, testY) returns the quality of predicted labels predY
        :type metricMethod: :class:`function`

        Output: the mean and variation of the cross validation folds. 
        """
        #Parameter.checkClass(idx, list)
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkArray(X, softCheck=True)
        Parameter.checkInt(X.shape[0], 1, float('inf'))
        Parameter.checkClass(y, numpy.ndarray)
        Parameter.checkArray(y, softCheck=True)

        if y.ndim != 1:
            raise ValueError("Dimention of y must be 1")
        
        i = 0
        metrics = numpy.zeros(len(idx))
        logging.debug("EvaluateLearn: Using " + str(len(idx)) + " splits on " + str(X.shape[0]) + " examples")

        for idxtr, idxts in idx:
            if progress:
                Util.printConciseIteration(i, 1, len(idx))

            trainX, testX = X[idxtr, :], X[idxts, :]
            trainY, testY = y[idxtr], y[idxts]
            #logging.debug("Distribution of labels in evaluateLearn train: " + str(numpy.bincount(trainY)))
            #logging.debug("Distribution of labels in evaluateLearn test: " + str(numpy.bincount(testY)))

            learnModel(trainX, trainY)
            predY = predict(testX)
            gc.collect()

            metrics[i] = metricMethod(predY, testY)
            i += 1

        return metrics
예제 #5
0
    def learnModel(self, X, y):
        """
        Learn a model for a set of examples given as the rows of the matrix X,
        with corresponding labels given in the elements of 1D array y.

        :param X: A matrix with examples as rows
        :type X: :class:`ndarray`

        :param y: A vector of labels
        :type y: :class:`ndarray`
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(y, numpy.ndarray)
        Parameter.checkArray(X)
        Parameter.checkArray(y)

        labels = numpy.unique(y)
        if labels.shape[0] != 2:
            raise ValueError("Can only accept binary labelled data")
        if (labels != numpy.array([-1, 1])).any():
            raise ValueError("Labels must be -1/+1: " + str(labels))

        forestList = []
        indList = []
        numSampledExamples = int(numpy.round(self.sampleSize * X.shape[0]))

        for i in range(self.numTrees):
            Util.printConciseIteration(i, 1, self.numTrees, "Tree: ")
            if self.sampleReplace:
                inds = numpy.random.randint(0, X.shape[0], numSampledExamples)
            else:
                inds = numpy.random.permutation(
                    X.shape[0])[0:numSampledExamples]

            treeRank = TreeRank(self.leafRanklearner)
            treeRank.setMaxDepth(self.maxDepth)
            treeRank.setMinSplit(self.minSplit)
            treeRank.setFeatureSize(self.featureSize)
            treeRank.setBestResponse(self.bestResponse)
            treeRank.learnModel(X[inds, :], y[inds])
            forestList.append(treeRank)
            indList.append(inds)

        self.forestList = forestList
        self.indList = indList
예제 #6
0
    def learnModel(self, X, y):
        """
        Learn a model for a set of examples given as the rows of the matrix X,
        with corresponding labels given in the elements of 1D array y.

        :param X: A matrix with examples as rows
        :type X: :class:`ndarray`

        :param y: A vector of labels
        :type y: :class:`ndarray`
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(y, numpy.ndarray)
        Parameter.checkArray(X)
        Parameter.checkArray(y)
        
        labels = numpy.unique(y)
        if labels.shape[0] != 2:
            raise ValueError("Can only accept binary labelled data")
        if (labels != numpy.array([-1, 1])).any(): 
            raise ValueError("Labels must be -1/+1: " + str(labels))

        forestList = []
        indList = []
        numSampledExamples = int(numpy.round(self.sampleSize*X.shape[0]))

        for i in range(self.numTrees):
            Util.printConciseIteration(i, 1, self.numTrees, "Tree: ")
            if self.sampleReplace:
                inds = numpy.random.randint(0, X.shape[0], numSampledExamples)
            else:
                inds = numpy.random.permutation(X.shape[0])[0:numSampledExamples]

            treeRank = TreeRank(self.leafRanklearner)
            treeRank.setMaxDepth(self.maxDepth)
            treeRank.setMinSplit(self.minSplit)
            treeRank.setFeatureSize(self.featureSize)
            treeRank.setBestResponse(self.bestResponse)
            treeRank.learnModel(X[inds, :], y[inds])
            forestList.append(treeRank)
            indList.append(inds)

        self.forestList = forestList
        self.indList = indList
예제 #7
0
    def learnModel(self, X, Y):
        """
        Learn a model for a set of examples given as the rows of the matrix X,
        with corresponding labels given in the elements of 1D array Y.

        :param X: A matrix with examples as rows
        :type X: :class:`ndarray`

        :param Y: A vector of binary labels as a 1D array
        :type Y: :class:`ndarray`
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(Y, numpy.ndarray)
        Parameter.checkArray(X)
        Parameter.checkArray(Y)
        labels = numpy.unique(Y)
        if labels.shape[0] != 2:
            raise ValueError("Can only accept binary labelled data: " + str(labels))
        if (labels != numpy.array([-1, 1])).any(): 
            raise ValueError("Labels must be -1/+1: " + str(labels))
        if self.featureSize == None: 
            featureSize = numpy.sqrt(X.shape[1])/float(X.shape[1])
        else: 
            featureSize = self.featureSize

        tree = DictTree()
        trainInds = numpy.arange(Y.shape[0])
        featureInds = numpy.sort(numpy.random.permutation(X.shape[1])[0:int(numpy.round(X.shape[1]*featureSize))]) 

        #Seed the tree
        node = RankNode(trainInds, featureInds)
        tree.setVertex((0, 0), node)

        for d in range(self.maxDepth):
            for k in range(2**d):
                if tree.vertexExists((d, k)):
                    node = tree.getVertex((d, k))

                    if not node.isPure() and not node.isLeafNode():
                        self.splitNode(tree, X, Y, d, k)

        self.tree = tree 
예제 #8
0
    def predict(self, X):
        """
        Make a prediction for a set of examples given as the rows of the matrix X.
        The set of scores is the mean over all the trees in the forest.

        :param X: A matrix with examples as rows
        :type X: :class:`ndarray`

        :return: A vector of scores corresponding to each example.
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkArray(X)

        scores = numpy.zeros(X.shape[0])

        for i in range(self.numTrees):
            scores += self.forestList[i].predict(X)

        scores = scores/self.numTrees
        return scores
예제 #9
0
    def predict(self, X):
        """
        Make a prediction for a set of examples given as the rows of the matrix X.
        The set of scores is the mean over all the trees in the forest.

        :param X: A matrix with examples as rows
        :type X: :class:`ndarray`

        :return: A vector of scores corresponding to each example.
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkArray(X)

        scores = numpy.zeros(X.shape[0])

        for i in range(self.numTrees):
            scores += self.forestList[i].predict(X)

        scores = scores / self.numTrees
        return scores
예제 #10
0
    def evaluateLearn2(X, Y, indexList, learnModel, predict, metricMethods):
        """
        Evaluate a learner given  functions (learnModel, predict)
        and save metrics on the training and test sets given by metric methods.

        #Could combine this with evaluateLearn 
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkArray(X, softCheck=True)
        Parameter.checkClass(Y, numpy.ndarray)
        Parameter.checkArray(Y, softCheck=True)

        if Y.ndim != 1:
            raise ValueError("Expecting Y to be 1D")

        trainMetrics = []
        testMetrics = []
        for i in range(len(metricMethods)):
            trainMetrics.append([])
            testMetrics.append([])

        for trainInds, testInds in indexList:
            trainX, trainY = X[trainInds, :], Y[trainInds]
            testX, testY = X[testInds, :], Y[testInds]

            learnModel(trainX, trainY)
            predTrainY = predict(trainX)
            predTestY = predict(testX)

            #Now compute all metrics
            i = 0
            for metricMethod in metricMethods:
                trainMetrics[i].append(metricMethod(trainY, predTrainY))
                testMetrics[i].append(metricMethod(testY, predTestY))
                i += 1

            gc.collect()

        logging.debug("All done")

        return trainMetrics, testMetrics
예제 #11
0
    def evaluateLearn2(X, Y, indexList, learnModel, predict, metricMethods):
        """
        Evaluate a learner given  functions (learnModel, predict)
        and save metrics on the training and test sets given by metric methods.

        #Could combine this with evaluateLearn 
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkArray(X, softCheck=True)
        Parameter.checkClass(Y, numpy.ndarray)
        Parameter.checkArray(Y, softCheck=True)

        if Y.ndim != 1:
            raise ValueError("Expecting Y to be 1D")

        trainMetrics = []
        testMetrics = []
        for i in range(len(metricMethods)): 
            trainMetrics.append([])
            testMetrics.append([])

        for trainInds, testInds in indexList:
            trainX, trainY = X[trainInds, :], Y[trainInds]
            testX, testY = X[testInds, :], Y[testInds]

            learnModel(trainX, trainY)
            predTrainY = predict(trainX)
            predTestY = predict(testX)

            #Now compute all metrics
            i = 0 
            for metricMethod in metricMethods:
                trainMetrics[i].append(metricMethod(trainY, predTrainY))
                testMetrics[i].append(metricMethod(testY, predTestY))
                i += 1 

            gc.collect()

        logging.debug("All done")

        return trainMetrics, testMetrics
예제 #12
0
파일: Util.py 프로젝트: kentwang/sandbox
    def svd_from_eigh(A, eps=10 ** -8, tol=10 ** -8):
        """
        Find the SVD of an ill conditioned matrix A. This uses numpy.linalg.eig
        but conditions the matrix so is not as precise as numpy.linalg.svd, but
        can be useful if svd does not coverge. Uses the eigenvectors of A^T*A and
        return singular vectors corresponding to nonzero singular values.

        Note: This is slightly different to linalg.svd which returns zero singular
        values. 
        """
        AA = A.conj().T.dot(A)
        lmbda, Q = scipy.linalg.eigh(AA + eps * numpy.eye(A.shape[1]))
        lmbda = lmbda - eps

        inds = numpy.arange(lmbda.shape[0])[lmbda > tol]
        lmbda, Q = Util.indEig(lmbda, Q, inds)

        sigma = lmbda ** 0.5
        P = A.dot(Q) / sigma
        Qh = Q.conj().T

        if __debug__:
            if not scipy.allclose(A, (P * sigma).dot(Qh), atol=tol):
                logging.warn(" SVD obtained from EVD is too poor")
            Parameter.checkArray(P, softCheck=True, arrayInfo="P in svd_from_eigh()")
            if not Parameter.checkOrthogonal(
                P, tol=tol, softCheck=True, arrayInfo="P in svd_from_eigh()", investigate=True
            ):
                print("corresponding sigma: ", sigma)
            Parameter.checkArray(sigma, softCheck=True, arrayInfo="sigma in svd_from_eigh()")
            Parameter.checkArray(Qh, softCheck=True, arrayInfo="Qh in svd_from_eigh()")
            if not Parameter.checkOrthogonal(Qh.conj().T, tol=tol, softCheck=True, arrayInfo="Qh.H in svd_from_eigh()"):
                print("corresponding sigma: ", sigma)

        return P, sigma, Qh
예제 #13
0
    def evaluateLearners(X,
                         Y,
                         indexList,
                         splitFunction,
                         learnerIterator,
                         metricMethods,
                         progress=True):
        """
        Perform model selection and output an average metric over a number of train/test
        splits as defined by idx. Finds the *minimum* model according to the evaluation
        of the predicted labels with metricMethods[0]. The variable metricMethods is a list
        of functions to call metricMethod(predY, trueY) of which the first is used
        in model selection.
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkArray(X, softCheck=True)
        Parameter.checkClass(Y, numpy.ndarray)
        Parameter.checkArray(Y, softCheck=True)

        if Y.ndim != 1:
            raise ValueError("Expecting Y to be 1D")

        i = 0
        mainMetricMethod = metricMethods[0]

        bestLearners = []
        allMetrics = []

        for trainInds, testInds in indexList:
            trainX = X[trainInds, :]
            trainY = Y[trainInds]

            testX = X[testInds, :]
            testY = Y[testInds]

            minMetric = float('inf')

            for learner in learnerIterator:
                logging.debug("Learning with " + str(learner))
                idx = splitFunction(trainX, trainY)
                metrics = AbstractPredictor.evaluateLearn(
                    trainX, trainY, idx, learner.learnModel, learner.predict,
                    mainMetricMethod, progress)

                meanMetric = numpy.mean(metrics)
                stdMetric = numpy.std(metrics)

                if meanMetric < minMetric:
                    bestLearner = learner
                    minMetric = meanMetric

                #Try to get some memory back
                gc.collect()

            bestLearner.learnModel(trainX, trainY)
            predY = bestLearner.predict(testX)

            bestLearners.append(bestLearner)

            #Now compute all metrics
            currentMetrics = []
            for metricMethod in metricMethods:
                currentMetrics.append(metricMethod(predY, testY))

            allMetrics.append(currentMetrics)
            logging.debug("Outer metric(s): " + str(currentMetrics))
            i += 1

        for i in range(len(allMetrics)):
            logging.debug("Learner = " + str(bestLearners[i]) + " error= " +
                          str(allMetrics[i]))
        logging.debug("All done")

        return allMetrics, bestLearners
예제 #14
0
    def evaluateLearn(X,
                      y,
                      idx,
                      learnModel,
                      predict,
                      metricMethod,
                      progress=True):
        """
        Evaluate this learning algorithm using the given list of training/test splits 
        The metricMethod is a method which takes (predictedY, realY) as input
        and returns a metric about the quality of the evaluation.

        :param X: A matrix with examples as rows 
        :type X: :class:`ndarray`

        :param y: A vector of labels 
        :type y: :class:`ndarray`

        :param idx: A list of training/test splits 
        :type idx: :class:`list`

        :param learnModel: A function such that learnModel(X, y) finds a mapping from X to y 
        :type learnModel: :class:`function`

        :param predict: A function such that predict(X) makes predictions for X
        :type predict: :class:`function`

        :param metricMethod: A function such that metricMethod(predY, testY) returns the quality of predicted labels predY
        :type metricMethod: :class:`function`

        Output: the mean and variation of the cross validation folds. 
        """
        #Parameter.checkClass(idx, list)
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkArray(X, softCheck=True)
        Parameter.checkInt(X.shape[0], 1, float('inf'))
        Parameter.checkClass(y, numpy.ndarray)
        Parameter.checkArray(y, softCheck=True)

        if y.ndim != 1:
            raise ValueError("Dimention of y must be 1")

        i = 0
        metrics = numpy.zeros(len(idx))
        logging.debug("EvaluateLearn: Using " + str(len(idx)) + " splits on " +
                      str(X.shape[0]) + " examples")

        for idxtr, idxts in idx:
            if progress:
                Util.printConciseIteration(i, 1, len(idx))

            trainX, testX = X[idxtr, :], X[idxts, :]
            trainY, testY = y[idxtr], y[idxts]
            #logging.debug("Distribution of labels in evaluateLearn train: " + str(numpy.bincount(trainY)))
            #logging.debug("Distribution of labels in evaluateLearn test: " + str(numpy.bincount(testY)))

            learnModel(trainX, trainY)
            predY = predict(testX)
            gc.collect()

            metrics[i] = metricMethod(predY, testY)
            i += 1

        return metrics
예제 #15
0
    def eigenAdd2(omega, Q, Y1, Y2, k, debug= False):
        """
        Compute an approximation of the eigendecomposition A^*A + Y1Y2^* +Y2Y1^*
        in which Y1, Y2 are low rank matrices, Y1^*Y2=0 and A^*A = Q Omega Q*. We 
        use the rank-k approximation of A^*A: Q_k Omega_k Q_k^* and then find
        [A^*A_k + Y1Y2^* + Y2Y1^*]. If debug=False then pi, V are returned which 
        respectively correspond to all the eigenvalues/eigenvectors of 
        [A^*A_k + Y1Y2^* + Y2Y1^*]. 
        """
        #logging.debug("< eigenAdd2 >")
        Parameter.checkInt(k, 0, float('inf'))
        Parameter.checkClass(omega, numpy.ndarray)
        Parameter.checkClass(Q, numpy.ndarray)
        Parameter.checkClass(Y1, numpy.ndarray)
        Parameter.checkClass(Y2, numpy.ndarray)
        if not numpy.isrealobj(omega) or not numpy.isrealobj(Q):
            logging.warn("Eigenvalues or eigenvectors are not real")
        if not numpy.isrealobj(Y1) or not numpy.isrealobj(Y2):
            logging.warn("Y1 or Y2 are not real")
        if omega.ndim != 1:
            raise ValueError("omega must be 1-d array")
        if omega.shape[0] != Q.shape[1]:
            raise ValueError("Must have same number of eigenvalues and eigenvectors")
        if Q.shape[0] != Y1.shape[0]:
            raise ValueError("Q must have the same number of rows as Y1 rows")
        if Q.shape[0] != Y2.shape[0]:
            raise ValueError("Q must have the same number of rows as Y2 rows")
        if Y1.shape[1] != Y2.shape[1]:
            raise ValueError("Y1 must have the same number of columns as Y2 columns")

        if __debug__:
            Parameter.checkArray(omega, softCheck=True, arrayInfo="omega as input in eigenAdd2()")
            Parameter.checkArray(Q, softCheck=True, arrayInfo="Q as input in eigenAdd2()")
            Parameter.checkOrthogonal(Q, tol=EigenUpdater.tol, softCheck=True, arrayInfo="Q as input in eigenAdd2()")
            Parameter.checkArray(Y1, softCheck=True, arrayInfo="Y1 as input in eigenAdd2()")
            Parameter.checkArray(Y2, softCheck=True, arrayInfo="Y2 as input in eigenAdd2()")
            


        #Get first k eigenvectors/values of A^*A
        omega, Q = Util.indEig(omega, Q, numpy.flipud(numpy.argsort(omega))[0:k])

        QY1 = Q.conj().T.dot(Y1)
        Y1bar = Y1 - Q.dot(QY1)

        P1bar, sigma1Bar, Q1bar = Util.safeSvd(Y1bar)
        inds = numpy.arange(sigma1Bar.shape[0])[numpy.abs(sigma1Bar)>EigenUpdater.tol]
        P1bar, sigma1Bar, Q1bar = Util.indSvd(P1bar, sigma1Bar, Q1bar, inds)
        # checks on SVD decomposition of Y1bar
        if __debug__:
            Parameter.checkArray(QY1, softCheck=True, arrayInfo="QY1 in eigenAdd2()")
            Parameter.checkArray(Y1bar, softCheck=True, arrayInfo="Y1bar in eigenAdd2()")
            Parameter.checkArray(P1bar, softCheck=True, arrayInfo="P1bar in eigenAdd2()")
            if not Parameter.checkOrthogonal(P1bar, tol=EigenUpdater.tol, softCheck=True, arrayInfo="P1bar in eigenAdd2()", investigate=True):
                print ("corresponding sigma: ", sigma1Bar)
            Parameter.checkArray(sigma1Bar, softCheck=True, arrayInfo="sigma1Bar in eigenAdd2()")
            Parameter.checkArray(Q1bar, softCheck=True, arrayInfo="Q1bar in eigenAdd2()")
            if not Parameter.checkOrthogonal(Q1bar, tol=EigenUpdater.tol, softCheck=True, arrayInfo="Q1bar in eigenAdd2()"):
                print ("corresponding sigma: ", sigma1Bar)

        del Y1bar

        P1barY2 = P1bar.conj().T.dot(Y2)
        QY2 = Q.conj().T.dot(Y2)
        Y2bar = Y2 - Q.dot(QY2) - P1bar.dot(P1barY2)
        
        P2bar, sigma2Bar, Q2bar = Util.safeSvd(Y2bar)
        inds = numpy.arange(sigma2Bar.shape[0])[numpy.abs(sigma2Bar)>EigenUpdater.tol]
        P2bar, sigma2Bar, Q2bar = Util.indSvd(P2bar, sigma2Bar, Q2bar, inds)
        # checks on SVD decomposition of Y1bar
        if __debug__:
            Parameter.checkArray(P1barY2, softCheck=True, arrayInfo="P1barY2 in eigenAdd2()")
            Parameter.checkArray(QY2, softCheck=True, arrayInfo="QY2 in eigenAdd2()")
            Parameter.checkArray(Y2bar, softCheck=True, arrayInfo="Y2bar in eigenAdd2()")
            Parameter.checkArray(P2bar, softCheck=True, arrayInfo="P2bar in eigenAdd2()")
            Parameter.checkOrthogonal(P2bar, tol=EigenUpdater.tol, softCheck=True, arrayInfo="P2bar in eigenAdd2()")
            Parameter.checkArray(sigma2Bar, softCheck=True, arrayInfo="sigma2Bar in eigenAdd2()")
            Parameter.checkArray(Q2bar, softCheck=True, arrayInfo="Q2bar in eigenAdd2()")
            Parameter.checkOrthogonal(Q2bar, tol=EigenUpdater.tol, softCheck=True, arrayInfo="Q2bar in eigenAdd2()")

        del Y2bar 

        r = omega.shape[0]
        p = Y1.shape[1]
        p1 = sigma1Bar.shape[0]
        p2 = sigma2Bar.shape[0]

        D = numpy.c_[Q, P1bar, P2bar]
        del P1bar
        del P2bar 
        # rem: A*s = A.dot(diag(s)) ; A*s[:,new] = diag(s).dot(A)
        DStarY1 = numpy.r_[QY1, sigma1Bar[:,numpy.newaxis] * Q1bar.conj().T, numpy.zeros((p2, p))]
        DStarY2 = numpy.r_[QY2, P1barY2, sigma2Bar[:,numpy.newaxis] * Q2bar.conj().T]
        DStarY1Y2StarD = DStarY1.dot(DStarY2.conj().T)

        del DStarY1
        del DStarY2
        
        r = omega.shape[0]
        F = numpy.zeros((r+p1+p2, r+p1+p2))
        F[range(r),range(r)] = omega
        F = F + DStarY1Y2StarD + DStarY1Y2StarD.conj().T

        #A check to make sure DFD^T is AA_k + Y1Y2 + Y2Y1
        #assert numpy.linalg.norm(D.dot(F).dot(D.T) - Q.dot(numpy.diag(omega).dot(Q.T)) - Y1.dot(Y2.T) - Y2.dot(Y1.T)) < 10**-6
        
        # checks on F
        if __debug__:
            #Parameter.checkArray(DStarY1, softCheck=True, arrayInfo="DStarY1 in eigenAdd2()")
            #Parameter.checkArray(DStarY2, softCheck=True, arrayInfo="DStarY2 in eigenAdd2()")
            Parameter.checkArray(DStarY1Y2StarD, softCheck=True, arrayInfo="DStarY1Y2StarD in eigenAdd2()")
            Parameter.checkArray(F, softCheck=True, arrayInfo="F in eigenAdd2()")
            Parameter.checkSymmetric(F, tol=EigenUpdater.tol, softCheck=True, arrayInfo="F in eigenAdd2()")

        pi, H = scipy.linalg.eigh(F)
        # remove too small eigenvalues
        pi, H = Util.indEig(pi, H, numpy.arange(pi.shape[0])[numpy.abs(pi)>EigenUpdater.tol])
        # keep greatest eigenvalues
        #pi, H = Util.indEig(pi, H, numpy.flipud(numpy.argsort(pi))[:min(k,pi.shape[0])])


        V = D.dot(H)

        if __debug__:
            if not Parameter.checkOrthogonal(D, tol=EigenUpdater.tol, softCheck=True, investigate=True, arrayInfo="D in eigenAdd2()"):
                print("pi:\n", pi)
            if not Parameter.checkOrthogonal(H, tol=EigenUpdater.tol, softCheck=True, investigate=True, arrayInfo="H in eigenAdd2()"):
                print("pi:\n", pi)

        if ProfileUtils.memory() > 10**9:
            ProfileUtils.memDisplay(locals())
            
        #logging.debug("</ eigenAdd2 >")
        if debug:
            return pi, V, D, DStarY1Y2StarD + DStarY1Y2StarD.conj().T
        else:
            return pi, V
예제 #16
0
파일: Util.py 프로젝트: kentwang/sandbox
    def safeSvd(A, eps=10 ** -8, tol=10 ** -8):
        """
        Compute the SVD of a matrix using scipy.linalg.svd, and if convergence fails
        revert to Util.svd.
        """
        # check input matrix
        if __debug__:
            if not Parameter.checkArray(A, softCheck=True):
                logging.info("... in Util.safeSvd")

        try:
            # run scipy.linalg.svd
            try:
                P, sigma, Qh = scipy.linalg.svd(A, full_matrices=False)
            except scipy.linalg.LinAlgError as e:
                logging.warn(str(e))
                raise Exception("SVD decomposition has to be computed from EVD decomposition")

            # --- only when the SVD decomposition comes from scipy.linalg.svd ---
            # clean output singular values (sometimes scipy.linalg.svd returns NaN or negative singular values, let's remove them)
            inds = numpy.arange(sigma.shape[0])[sigma > tol]
            if inds.shape[0] < sigma.shape[0]:
                P, sigma, Q = Util.indSvd(P, sigma, Qh, inds)
                Qh = Q.conj().T
                # an expensive check but we really need it
                # rem: A*s = A.dot(diag(s)) ; A*s[:,new] = diag(s).dot(A)
                if not scipy.allclose(A, (P * sigma).dot(Qh)):
                    logging.warn(
                        " After cleaning singular values from scipy.linalg.svd, the SVD decomposition is too far from the original matrix"
                    )
                    #                    numpy.savez("matrix_leading_to_bad_SVD.npz", A)
                    raise Exception("SVD decomposition has to be computed from EVD decomposition")

            # check scipy.linalg.svd output matrices (expensive)
            if __debug__:
                badAnswerFromScipySvd = False
                if not Parameter.checkArray(P, softCheck=True, arrayInfo="P in Util.safeSvd()"):
                    badAnswerFromScipySvd = True
                if not Parameter.checkArray(sigma, softCheck=True, arrayInfo="sigma in Util.safeSvd()"):
                    badAnswerFromScipySvd = True
                if not Parameter.checkArray(Qh, softCheck=True, arrayInfo="Qh in Util.safeSvd()"):
                    badAnswerFromScipySvd = True
                if badAnswerFromScipySvd:
                    logging.warn(
                        " After cleaning singular values from scipy.linalg.svd, the SVD decomposition still contains 'NaN', 'inf' or complex values"
                    )
                    raise Exception("SVD decomposition has to be computed from EVD decomposition")

        except Exception as inst:
            if inst.args != ("SVD decomposition has to be computed from EVD decomposition",):
                raise
            logging.warn(" Using EVD method to compute the SVD.")
            P, sigma, Qh = Util.svd(A, eps, tol)

            # check Util.svd output matrices (expensive)
            if __debug__:
                badAnswerFromUtilSvd = False
                if not Parameter.checkArray(P, softCheck=True):
                    logging.info("... in P in Util.safeSvd")
                    badAnswerFromUtilSvd = True
                #                        print nan_rows in P: numpy.isnan(P).sum(0).nonzero()
                if not Parameter.checkArray(sigma, softCheck=True):
                    logging.info("... in sigma in Util.safeSvd")
                    badAnswerFromUtilSvd = True
                #                        print numpy.isnan(sigma).nonzero()
                if not Parameter.checkArray(Qh, softCheck=True):
                    logging.info("... in Q in Util.safeSvd")
                    badAnswerFromUtilSvd = True
                #                        blop = numpy.isnan(Qh).sum(1)
                #                        print blop.nonzero()
                #                        print blop[blop.nonzero()]
                if badAnswerFromUtilSvd:
                    logging.warn(
                        " SVD decomposition obtained from EVD decomposition contains 'NaN', 'inf' or real values"
                    )

        from sandbox.util.ProfileUtils import ProfileUtils

        if ProfileUtils.memory() > 10 ** 9:
            ProfileUtils.memDisplay(locals())

        return P, sigma, Qh
예제 #17
0
    def evaluateLearners(X, Y, indexList, splitFunction, learnerIterator, metricMethods, progress=True):
        """
        Perform model selection and output an average metric over a number of train/test
        splits as defined by idx. Finds the *minimum* model according to the evaluation
        of the predicted labels with metricMethods[0]. The variable metricMethods is a list
        of functions to call metricMethod(predY, trueY) of which the first is used
        in model selection.
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkArray(X, softCheck=True)
        Parameter.checkClass(Y, numpy.ndarray)
        Parameter.checkArray(Y, softCheck=True)

        if Y.ndim != 1:
            raise ValueError("Expecting Y to be 1D")

        i = 0
        mainMetricMethod = metricMethods[0]

        bestLearners = []
        allMetrics = []

        for trainInds, testInds in indexList:
            trainX = X[trainInds, :]
            trainY = Y[trainInds]

            testX = X[testInds, :]
            testY = Y[testInds]

            minMetric = float('inf')

            for learner in learnerIterator:
                logging.debug("Learning with " + str(learner))
                idx = splitFunction(trainX, trainY)
                metrics = AbstractPredictor.evaluateLearn(trainX, trainY, idx, learner.learnModel, learner.predict, mainMetricMethod, progress)

                meanMetric = numpy.mean(metrics)
                stdMetric = numpy.std(metrics)

                if meanMetric < minMetric:
                    bestLearner = learner
                    minMetric = meanMetric

                #Try to get some memory back
                gc.collect()

            bestLearner.learnModel(trainX, trainY)
            predY = bestLearner.predict(testX)

            bestLearners.append(bestLearner)

            #Now compute all metrics
            currentMetrics = []
            for metricMethod in metricMethods:
                currentMetrics.append(metricMethod(predY, testY))

            allMetrics.append(currentMetrics)
            logging.debug("Outer metric(s): " + str(currentMetrics))
            i += 1

        for i in range(len(allMetrics)):
            logging.debug("Learner = " + str(bestLearners[i]) + " error= " + str(allMetrics[i]))
        logging.debug("All done")

        return allMetrics, bestLearners