예제 #1
0
 def setBestResponse(self, bestResponse):
     """
     :param bestResponse: the label corresponding to "positive"
     :type bestResponse: :class:`int`
     """
     Parameter.checkInt(bestResponse, -float("inf"), float("inf"))
     self.bestResponse = bestResponse
예제 #2
0
    def shuffleSplit(repetitions, numExamples, trainProportion=None):
        """
        Random permutation cross-validation iterator. The training set is sampled
        without replacement and of size (repetitions-1)/repetitions of the examples,
        and the test set represents the remaining examples. Each repetition is
        sampled independently.

        :param repetitions: The number of repetitions to perform.
        :type repetitions: :class:`int`

        :param numExamples: The number of examples.
        :type numExamples: :class:`int`

        :param trainProp: The size of the training set relative to numExamples, between 0 and 1 or None to use (repetitions-1)/repetitions
        :type trainProp: :class:`int`
        """
        Parameter.checkInt(numExamples, 2, float('inf'))
        Parameter.checkInt(repetitions, 1, float('inf'))
        if trainProportion != None:
            Parameter.checkFloat(trainProportion, 0.0, 1.0)

        if trainProportion == None:
            trainSize = int((repetitions-1)*numExamples/repetitions)
        else:
            trainSize = int(trainProportion*numExamples)

        idx = [] 
        for i in range(repetitions):
            inds = numpy.random.permutation(numExamples)
            trainInds = inds[0:trainSize]
            testInds = inds[trainSize:]
            idx.append((trainInds, testInds))
        return idx 
예제 #3
0
    def bootstrap2(repetitions, numExamples):
        """
        Perform 0.632 bootstrap in whcih we take a sample with replacement from
        the dataset of size numExamples. The examples not present in the training
        set are used to form the test set. We oversample the test set to include
        0.368 of the examples from the training set. Returns a list of tuples of the form
        (trainIndices, testIndices).

        :param repetitions: The number of repetitions of bootstrap to perform.
        :type repetitions: :class:`int`

        :param numExamples: The number of examples.
        :type numExamples: :class:`int`

        """
        Parameter.checkInt(numExamples, 2, float('inf'))
        Parameter.checkInt(repetitions, 1, float('inf'))

        inds = []
        for i in range(repetitions):
            trainInds = numpy.random.randint(numExamples, size=numExamples)
            testInds = numpy.setdiff1d(numpy.arange(numExamples), numpy.unique(trainInds))
            #testInds = numpy.r_[testInds, trainInds[0:(numExamples*0.368)]]

            inds.append((trainInds, testInds))

        return inds
예제 #4
0
    def randCrossValidation(folds, numExamples, dtype=numpy.int32):
        """
        Returns a list of tuples (trainIndices, testIndices) using k-fold cross
        validation. In this case we randomise the indices and then split into 
        folds. 

        :param folds: The number of cross validation folds.
        :type folds: :class:`int`

        :param numExamples: The number of examples.
        :type numExamples: :class:`int`
        """
        Parameter.checkInt(folds, 1, numExamples)
        Parameter.checkInt(numExamples, 2, float('inf'))

        foldSize = float(numExamples)/folds
        indexList = []

        inds = numpy.array(numpy.random.permutation(numExamples), dtype)

        for i in range(0, folds):
            testIndices = inds[int(foldSize*i): int(foldSize*(i+1))]
            trainIndices = numpy.setdiff1d(numpy.arange(0, numExamples), testIndices)
            indexList.append((trainIndices, testIndices))

        return indexList 
    def generateGraph(self, alpha, p, dim):
        Parameter.checkFloat(alpha, 0.0, float('inf'))
        Parameter.checkFloat(p, 0.0, 1.0)
        Parameter.checkInt(dim, 0, float('inf'))
        
        numVertices = self.graph.getNumVertices()
        self.X = numpy.random.rand(numVertices, dim)

        D = KernelUtils.computeDistanceMatrix(numpy.dot(self.X, self.X.T))
        P = numpy.exp(-alpha * D)
        diagIndices = numpy.array(list(range(0, numVertices)))
        P[(diagIndices, diagIndices)] = numpy.zeros(numVertices)

        B = numpy.random.rand(numVertices, numVertices) <= P 

        #Note that B is symmetric - could just go through e.g. upper triangle 
        for i in range(numpy.nonzero(B)[0].shape[0]):
            v1 = numpy.nonzero(B)[0][i]
            v2 = numpy.nonzero(B)[1][i]
            
            self.graph.addEdge(v1, v2)

        erdosRenyiGenerator = ErdosRenyiGenerator(p)
        self.graph = erdosRenyiGenerator.generate(self.graph, False)

        return self.graph
예제 #6
0
    def evaluateCvOuter(self, X, y, folds):
        """
        Computer the average AUC using k-fold cross validation and the linear kernel. 
        """
        Parameter.checkInt(folds, 2, float('inf'))
        idx = cross_val.StratifiedKFold(y, folds)
        metricMethods = [Evaluator.auc2, Evaluator.roc]

        if self.kernel == "linear":
            logging.debug("Running linear rank SVM ")
            trainMetrics, testMetrics = AbstractPredictor.evaluateLearn2(X, y, idx, self.modelSelectLinear, self.predict, metricMethods)
        elif self.kernel == "rbf":
            logging.debug("Running RBF rank SVM")
            trainMetrics, testMetrics = AbstractPredictor.evaluateLearn2(X, y, idx, self.modelSelectRBF, self.predict, metricMethods)

        bestTrainAUCs = trainMetrics[0]
        bestTrainROCs = trainMetrics[1]
        bestTestAUCs = testMetrics[0]
        bestTestROCs = testMetrics[1]

        bestParams = {}
        bestMetaDicts = {}
        allMetrics = [bestTrainAUCs, bestTrainROCs, bestTestAUCs, bestTestROCs]

        return (bestParams, allMetrics, bestMetaDicts)
예제 #7
0
 def setNumTrees(self, numTrees):
     """
     :param numTrees: The number of trees to generate in the forest.
     :type numTrees: :class:`int`
     """
     Parameter.checkInt(numTrees, 1, float('inf'))
     self.numTrees = numTrees
예제 #8
0
 def setMinSplit(self, minSplit):
     """
     :param minSplit: the minimum number of examples in a node for it to be split. 
     :type minSplit: :class:`int`
     """
     Parameter.checkInt(minSplit, 2, float("inf"))
     self.minSplit = minSplit
예제 #9
0
 def setMaxDepth(self, maxDepth):
     """
     :param maxDepth: the maximum depth of the learnt tree. 
     :type maxDepth: :class:`int`
     """
     Parameter.checkInt(maxDepth, 1, float("inf"))
     self.maxDepth = int(maxDepth)
예제 #10
0
    def predictEdges(self, vertexIndices):
        """
        This makes a prediction for a series of edges using the following score
        \sum_z \in n(x) \cup n(y) = 1/|log(n(z)|
        Returns a matrix with rows are a ranked list of verticies of length self.windowSize.
        """

        Parameter.checkInt(self.windowSize, 1, self.graph.getNumVertices())
        logging.info("Running predictEdges in " + str(self.__class__.__name__))

        P = numpy.zeros((vertexIndices.shape[0], self.windowSize))
        S = numpy.zeros((vertexIndices.shape[0], self.windowSize))
        W = self.graph.getWeightMatrix()


        for i in range(vertexIndices.shape[0]):
            Util.printIteration(i, self.printStep, vertexIndices.shape[0])
            scores = numpy.zeros(self.graph.getNumVertices())

            for j in range(0, self.graph.getNumVertices()):
                commonNeighbours = numpy.nonzero(W[vertexIndices[i], :] * W[j, :])[0]

                for k in commonNeighbours:
                    q = numpy.log(numpy.nonzero(W[k, :])[0].shape[0])
                    if q != 0:
                        scores[j] = scores[j] + 1/q


            P[i, :], S[i, :] = self.indicesFromScores(vertexIndices[i], scores)

        return P, S
예제 #11
0
파일: Latex.py 프로젝트: kentwang/sandbox
    def array1DToRow(X, precision=3):
        """
        Take a 1D numpy array and print in latex table row format i.e. x1 & x2 .. xn

        :param X: The array to print
        :type X: :class:`ndarray`

        :param precision: The precision of the printed floating point numbers.
        :type precision: :class:`int`
        """
        Parameter.checkInt(precision, 0, 10)
        if X.ndim != 1:
            raise ValueError("Array must be one dimensional")

        n = X.shape[0]
        outputStr = ""

        if X.dtype == float:
            fmtStr = "%." + str(precision) + "f & "
            endFmtStr = "%." + str(precision) + "f"
        else:
            fmtStr = "%d & "
            endFmtStr = "%d"

        for i in range(0, n):
            if i != n - 1:
                outputStr += fmtStr % X[i]
            else:
                outputStr += endFmtStr % X[i]

        return outputStr
예제 #12
0
    def eigenAdd(omega, Q, Y, k):
        """
        Perform an eigen update of the form A*A + Y*Y in which Y is a low-rank matrix
        and A^*A = Q Omega Q*. We use the rank-k approximation of A:  Q_k Omega_k Q_k^*
        and then approximate [A^*A_k Y^*Y]_k.
        """
        #logging.debug("< eigenAdd >")
        Parameter.checkInt(k, 0, omega.shape[0])
        #if not numpy.isrealobj(omega) or not numpy.isrealobj(Q):
        #    raise ValueError("Eigenvalues and eigenvectors must be real")
        if omega.ndim != 1:
            raise ValueError("omega must be 1-d array")
        if omega.shape[0] != Q.shape[1]:
            raise ValueError("Must have same number of eigenvalues and eigenvectors")

        if __debug__:
            Parameter.checkOrthogonal(Q, tol=EigenUpdater.tol, softCheck=True, arrayInfo="input Q in eigenAdd()")

        #Taking the abs of the eigenvalues is correct
        inds = numpy.flipud(numpy.argsort(numpy.abs(omega)))

        omega, Q = Util.indEig(omega, Q, inds[numpy.abs(omega)>EigenUpdater.tol])
        Omega = numpy.diag(omega)

        YY = Y.conj().T.dot(Y)
        QQ = Q.dot(Q.conj().T)
        Ybar = Y - Y.dot(QQ)

        Pbar, sigmaBar, Qbar = numpy.linalg.svd(Ybar, full_matrices=False)
        inds = numpy.flipud(numpy.argsort(numpy.abs(sigmaBar)))
        inds = inds[numpy.abs(sigmaBar)>EigenUpdater.tol]
        Pbar, sigmaBar, Qbar = Util.indSvd(Pbar, sigmaBar, Qbar, inds)
        
        SigmaBar = numpy.diag(sigmaBar)
        Qbar = Ybar.T.dot(Pbar)
        Qbar = Qbar.dot(numpy.diag(numpy.diag(Qbar.T.dot(Qbar))**-0.5))

        r = sigmaBar.shape[0]

        YQ = Y.dot(Q)
        Zeros = numpy.zeros((r, omega.shape[0]))
        D = numpy.c_[Q, Qbar]

        YYQQ = YY.dot(QQ)
        Z = D.conj().T.dot(YYQQ + YYQQ.conj().T).dot(D)
        F = numpy.c_[numpy.r_[Omega - YQ.conj().T.dot(YQ), Zeros], numpy.r_[Zeros.T, SigmaBar.conj().dot(SigmaBar)]]
        F = F + Z 

        pi, H = scipy.linalg.eigh(F)
        inds = numpy.flipud(numpy.argsort(numpy.abs(pi)))

        H = H[:, inds[0:k]]
        pi = pi[inds[0:k]]

        V = D.dot(H)
        #logging.debug("</ eigenAdd >")
        return pi, V
예제 #13
0
 def setPosteriorSampleSize(self, posteriorSampleSize):
     """
     Set the sample size of the posterior distribution (population size).
     
     :param posteriorSampleSize: The size of the population 
     :type posteriorSampleSize: `int`
     """
     Parameter.checkInt(posteriorSampleSize, 0, numpy.float('inf'))
     self.N = posteriorSampleSize
예제 #14
0
    def evaluateLearn(X, y, idx, learnModel, predict, metricMethod, progress=True):
        """
        Evaluate this learning algorithm using the given list of training/test splits 
        The metricMethod is a method which takes (predictedY, realY) as input
        and returns a metric about the quality of the evaluation.

        :param X: A matrix with examples as rows 
        :type X: :class:`ndarray`

        :param y: A vector of labels 
        :type y: :class:`ndarray`

        :param idx: A list of training/test splits 
        :type idx: :class:`list`

        :param learnModel: A function such that learnModel(X, y) finds a mapping from X to y 
        :type learnModel: :class:`function`

        :param predict: A function such that predict(X) makes predictions for X
        :type predict: :class:`function`

        :param metricMethod: A function such that metricMethod(predY, testY) returns the quality of predicted labels predY
        :type metricMethod: :class:`function`

        Output: the mean and variation of the cross validation folds. 
        """
        #Parameter.checkClass(idx, list)
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkArray(X, softCheck=True)
        Parameter.checkInt(X.shape[0], 1, float('inf'))
        Parameter.checkClass(y, numpy.ndarray)
        Parameter.checkArray(y, softCheck=True)

        if y.ndim != 1:
            raise ValueError("Dimention of y must be 1")
        
        i = 0
        metrics = numpy.zeros(len(idx))
        logging.debug("EvaluateLearn: Using " + str(len(idx)) + " splits on " + str(X.shape[0]) + " examples")

        for idxtr, idxts in idx:
            if progress:
                Util.printConciseIteration(i, 1, len(idx))

            trainX, testX = X[idxtr, :], X[idxts, :]
            trainY, testY = y[idxtr], y[idxts]
            #logging.debug("Distribution of labels in evaluateLearn train: " + str(numpy.bincount(trainY)))
            #logging.debug("Distribution of labels in evaluateLearn test: " + str(numpy.bincount(testY)))

            learnModel(trainX, trainY)
            predY = predict(testX)
            gc.collect()

            metrics[i] = metricMethod(predY, testY)
            i += 1

        return metrics
예제 #15
0
    def evaluateCvOuter(self, X, Y, folds):
        """
        Run cross validation and output some ROC curves. In this case Y is a 1D array.

        :param X: A matrix with examples as rows
        :type X: :class:`ndarray`

        :param y: A vector of labels
        :type y: :class:`ndarray`

        :param folds: The number of cross validation folds
        :type folds: :class:`int`
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(Y, numpy.ndarray)
        Parameter.checkInt(folds, 2, float('inf'))
        if Y.ndim != 1:
            raise ValueError("Expecting Y to be 1D")

        indexList = cross_val.StratifiedKFold(Y, folds)

        bestParams = []
        bestTrainAUCs = numpy.zeros(folds)
        bestTrainROCs = []
        bestTestAUCs = numpy.zeros(folds)
        bestTestROCs = []
        bestMetaDicts = []
        i = 0

        for trainInds, testInds in indexList:
            Util.printIteration(i, 1, folds, "Outer CV: ")
            trainX, trainY = X[trainInds, :], Y[trainInds]
            testX, testY = X[testInds, :], Y[testInds]

            self.learnModel(trainX, trainY)
            #self.learnModelCut(trainX, trainY)

            predTrainY = self.predict(trainX)
            predTestY = self.predict(testX)
            bestTrainAUCs[i] = Evaluator.auc(predTrainY, trainY)
            bestTestAUCs[i] = Evaluator.auc(predTestY, testY)

            #Store the parameters and ROC curves
            bestTrainROCs.append(Evaluator.roc(trainY, predTrainY))
            bestTestROCs.append(Evaluator.roc(testY, predTestY))

            metaDict = {}
            bestMetaDicts.append(metaDict)

            i += 1

        logging.debug("Mean test AUC = " + str(numpy.mean(bestTestAUCs)))
        logging.debug("Std test AUC = " + str(numpy.std(bestTestAUCs)))
        allMetrics = [bestTrainAUCs, bestTrainROCs, bestTestAUCs, bestTestROCs]

        return (bestParams, allMetrics, bestMetaDicts)
예제 #16
0
    def setDegree(self, degree):
        """
        Set the degree parameter.

        :param degree: kernel degree parameter.
        :type degree: :class:`int`
        """
        Parameter.checkInt(degree, 1, float('inf'))

        self.degree = degree
예제 #17
0
    def evaluateCv(self, X, y, folds, metricMethod=Evaluator.binaryError):
        """
        Compute the cross validation according to a given metric. 
        """
        Parameter.checkInt(folds, 2, float('inf'))
        idx = Sampling.crossValidation(folds, y.shape[0])
        metrics = AbstractPredictor.evaluateLearn(X, y, idx, self.learnModel, self.predict, metricMethod)

        mean = numpy.mean(metrics, 0)
        var = numpy.var(metrics, 0)

        return (mean, var)
예제 #18
0
 def eigWeight(W, m, k, orthogonalise=True): 
     """
     Find the k largest eigenvectors and eigenvalues of M = D^-1/2 W D^-1/2 using a weight 
     matrix. This is the same as I - L where L is the normalised Laplacian. 
     
     :param W: A sparse weight matrix 
     
     :param m: The number of columns to sample 
     
     :param k: The number of eigenvectors/eigenvalues to find. 
     
     :param orthogonalise: Whether the orthogonalise the final eigenvectors 
     """
     Parameter.checkInt(k, 1, W.shape[0])   
     #This constraint is due to MStar being rank m and only being able to find m-1 eigenvalues 
     m = min(W.shape[0], m)
     Parameter.checkInt(m, k+1, W.shape[0])
     
     if isinstance(m, int):
         inds = numpy.sort(numpy.random.permutation(W.shape[0])[0:m])
     else:
         inds = m      
     
     W11 = W[:, inds][inds, :]
     dStar = numpy.array(W11.sum(0)).ravel()
     dStar[dStar!=0] = dStar[dStar!=0]**-0.5
     DStar = scipy.sparse.spdiags(dStar, 0, dStar.shape[0], dStar.shape[0], format='csr')
     
     MStar = DStar.dot(W11).dot(DStar)
     
     lmbda, V = scipy.sparse.linalg.eigsh(MStar, min(k, MStar.shape[0]-1), which="LM", ncv = min(10*k, MStar.shape[0]))
     
     Lmbda = scipy.sparse.spdiags(lmbda, 0, k, k, format='csr')
     InvLmbda = scipy.sparse.spdiags(lmbda**-1, 0, k, k, format='csr')
     V = scipy.sparse.csr_matrix(V)
     B = DStar.dot(V).dot(InvLmbda)
     
     Q = W[:, inds].dot(B)
     dHat = numpy.array((Q.dot(Lmbda).dot(Q.sum(0).transpose()))).ravel()
     #Note that W[:, inds] may have all zero rows (even when full W doesn't) and hence
     #Q can have zero columns meaning dHat can have zero elements and DHat is no longer valid. 
     #There is no answer to this in the paper 
     
     DHat = scipy.sparse.spdiags(dHat**-0.5, 0, dHat.shape[0], dHat.shape[0], format='csr')
             
     U = DHat.dot(Q)
     U = numpy.asarray(U.todense())
     
     if not orthogonalise: 
         return lmbda, U 
     else: 
         return EfficientNystrom.orthogonalise(lmbda, U) 
예제 #19
0
    def eigenConcat(omega, Q, AB, BB, k):
        """
        Find the eigen update of a matrix [A, B]'[A B] where  A'A = V diag(s) V*
        and AB = A*B, BB = B*B. Q is the set of eigenvectors of A*A and s is the
        vector of eigenvalues. 
        """
        #logging.debug("< eigenConcat >")
        Parameter.checkInt(k, 0, omega.shape[0])
        if not numpy.isrealobj(omega) or not numpy.isrealobj(Q):
            raise ValueError("Eigenvalues and eigenvectors must be real")
        if not numpy.isrealobj(AB) or not numpy.isrealobj(BB):
            raise ValueError("AB and BB must be real")
        if omega.ndim != 1:
            raise ValueError("omega must be 1-d array")
        if omega.shape[0] != Q.shape[1]:
            raise ValueError("Must have same number of eigenvalues and eigenvectors")
        if Q.shape[0] != AB.shape[0]:
            raise ValueError("Q must have the same number of rows as AB")
        if AB.shape[1] != BB.shape[0] or  BB.shape[0]!=BB.shape[1]:
            raise ValueError("AB must have the same number of cols/rows as BB")

        #Check Q is orthogonal
        if __debug__:
            Parameter.checkOrthogonal(Q, tol=EigenUpdater.tol, softCheck=True, arrayInfo = "input Q in eigenConcat()")

        m = Q.shape[0]
        p = BB.shape[0]

        inds = numpy.flipud(numpy.argsort(numpy.abs(omega)))
        Q = Q[:, inds[0:k]]
        omega = omega[inds[0:k]]
        Omega = numpy.diag(omega)

        QAB = Q.conj().T.dot(AB)

        F = numpy.c_[numpy.r_[Omega, QAB.conj().T], numpy.r_[QAB, BB]]
        D = numpy.c_[numpy.r_[Q, numpy.zeros((p, Q.shape[1]))], numpy.r_[numpy.zeros((m, p)), numpy.eye(p)]]

        pi, H = scipy.linalg.eigh(F)

        inds = numpy.flipud(numpy.argsort(numpy.abs(pi)))
        inds = inds[numpy.abs(pi)>EigenUpdater.tol]

        H = H[:, inds[0:k]]
        pi = pi[inds[0:k]]

        V = numpy.dot(D, H)

        #logging.debug("</ eigenConcat >")
        return pi, V
예제 #20
0
    def evaluateCvOuter(self, X, Y, folds, leafRank):
        """
        Run cross validation and output some ROC curves. In this case Y is a 1D array.
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(Y, numpy.ndarray)
        Parameter.checkInt(folds, 2, float('inf'))
        if Y.ndim != 1:
            raise ValueError("Expecting Y to be 1D")

        indexList = cross_val.StratifiedKFold(Y, folds)
        self.setLeafRank(leafRank)

        bestParams = []
        bestTrainAUCs = numpy.zeros(folds)
        bestTrainROCs = []
        bestTestAUCs = numpy.zeros(folds)
        bestTestROCs = []
        bestMetaDicts = []
        i = 0

        for trainInds, testInds in indexList:
            Util.printIteration(i, 1, folds)
            trainX, trainY = X[trainInds, :], Y[trainInds]
            testX, testY = X[testInds, :], Y[testInds]

            logging.debug("Distribution of labels in train: " + str(numpy.bincount(trainY)))
            logging.debug("Distribution of labels in test: " + str(numpy.bincount(testY)))

            self.learnModel(trainX, trainY)
            predTrainY = self.predict(trainX)
            predTestY = self.predict(testX)
            bestTrainAUCs[i] = Evaluator.auc(predTrainY, trainY)
            bestTestAUCs[i] = Evaluator.auc(predTestY, testY)

            #Store the parameters and ROC curves
            bestTrainROCs.append(Evaluator.roc(trainY, predTrainY))
            bestTestROCs.append(Evaluator.roc(testY, predTestY))

            metaDict = {}
            bestMetaDicts.append(metaDict)

            i += 1

        logging.debug("Mean test AUC = " + str(numpy.mean(bestTestAUCs)))
        logging.debug("Std test AUC = " + str(numpy.std(bestTestAUCs)))
        allMetrics = [bestTrainAUCs, bestTrainROCs, bestTestAUCs, bestTestROCs]

        return (bestParams, allMetrics, bestMetaDicts)
예제 #21
0
 def updateSvd(A, U, s, V, E, k, p=10): 
     """
     Given a matrix A whose approximate SVD is U s V.T, compute the SVD 
     of the new matrix A + E, using previous info. A and E are sparse 
     matrices. The rank of the approximation is p, and k is an oversampling
     parameter. 
     """
     Parameter.checkInt(k, 1, float("inf"))
     Parameter.checkInt(p, 0, float("inf"))     
                
     if isinstance(A, GeneralLinearOperator): 
         L = A 
     else: 
         L = GeneralLinearOperator.asLinearOperator(A)                    
                
     if isinstance(E, GeneralLinearOperator): 
         M = E 
     else: 
         M = GeneralLinearOperator.asLinearOperator(E) 
         
     N = GeneralLinearOperator.asLinearOperatorSum(L, M)
     
     n = A.shape[1]
     omega = numpy.random.randn(n, p)
     
     Y = U*s + M.matmat(V)
     Y = numpy.c_[Y, N.matmat(omega)]
     
     Q, R = numpy.linalg.qr(Y)
     del omega 
         
     del Y 
     del R 
     gc.collect() 
     
     B = N.rmatmat(Q).T 
     U, s, V = numpy.linalg.svd(B, full_matrices=False)
     del B 
     V = V.T
     U = Q.dot(U)
 
     U = U[:, 0:k]
     s = s[0:k]
     V = V[:, 0:k]        
     
     return U, s, V 
     
     
예제 #22
0
    def cluster(self, XList, k, tau):
        """
        Take a set of zero mean and unit variance examples in the rows of X (the
        entries of XList), and find clusters. Each matrix X must have the same
        number of rows, but can have differing numbers of columns. 
        """
        Parameter.checkInt(k, 1, float('inf'))
        Parameter.checkFloat(tau, 0.0, 1.0)

        n = XList[0].shape[0]
        m = len(XList)

        muList = []

        #Randomly assign initial means
        for i in range(m):
            numFeatures = XList[i].shape[1]
            mu = numpy.random.randn(k, numFeatures)
            muList.append(mu)

        #Each column represents class membership of all examples at a time point
        #Each row is the class membership of an example for all times 
        C = numpy.zeros((n, m), numpy.int)
        CLast = C+1

        while((C != CLast).any()):
            CLast = C

            #Need centered class membership 
            for i in range(m):
                for j in range(n):
                    dists = numpy.zeros(k)
                    for s in range(k):
                        dists[s] = (1-tau)*numpy.linalg.norm(XList[i][j, :] - muList[i][s, :])

                        tempCRow = C[j, :]
                        tempCRow[i] = s
                        dists[s] += tau*numpy.var(tempCRow)

                    #print(dists)
                    C[j, i] = numpy.argmin(dists)

            #Update means
            for i in range(m):
                for s in range(k):
                    muList[i][s, :] = numpy.mean(XList[i][C[:, i]==s, :], 0)

        return C, muList
예제 #23
0
    def learnModel(self, graph):
        """
        Learn a prediction model based on all of the edges of the input graph.
        For each ego, X contains a list of neighbours and non-neighbours in the same
        ratio, and y = 1 when for a neighbour otherwise -1. We then find the set of
        primal weights w for each ego network and then regress onto the set of weights
        using the ego labels.

        One can either learn by comparing neighbours and non-neighbours, or alternatively
        using the labels of edges and making prediction on unlabelled edges. 

        :param graph: The input graph to learn from.
        :type graph: class:`apgl.graph.AbstractSingleGraph`

        :param randomNegLabel: How to compute edge labels, False means use the labels
        themselves, and True means randomly pick non-neighbours to have -1 labels
        :type randomNegLabel: class `bool`
        """

        Parameter.checkInt(self.windowSize, 1, graph.getNumVertices())
        self.graph = graph
        logging.info("Learning model on graph of size " + str(graph.getNumVertices()))

        allIndices = numpy.arange(0, graph.getNumVertices())
        V = graph.getVertexList().getVertices(allIndices)
        W = numpy.zeros((0, graph.getVertexList().getNumFeatures()))
        Xe  =  numpy.zeros((0, graph.getVertexList().getNumFeatures()))
        printStep = numpy.floor(graph.getNumVertices()/10)

        for i in range(graph.getNumVertices()):
            Util.printIteration(i, printStep, graph.getNumVertices())
            neighbours = graph.neighbours(i)

            if neighbours.shape[0] != 0:
                compNeighbours = numpy.setdiff1d(allIndices, neighbours)
                perm = numpy.random.permutation(compNeighbours.shape[0])[0:neighbours.shape[0]]
                negativeVertices = V[compNeighbours[perm], :]
                X = numpy.r_[V[neighbours, :], negativeVertices]
                y = numpy.ones(X.shape[0])
                y[neighbours.shape[0]:] = -1
 
                w = self.alterRegressor.learnModel(X, y)
                W = numpy.r_[W, numpy.array([w])]
                Xe = numpy.r_[Xe, numpy.array([V[i, :]])]

        #Now we need to solve least to find regressor of Xe onto W
        self.egoRegressor.learnModel(Xe, W)
예제 #24
0
    def eigenRemove(omega, Q, n, k, debug=False):
        """
        Remove a set of rows and columns from a matrix whose eigen-decomposition
        is Q diag(omega) Q^T. Keep the first n rows/cols i.e. the rows/cols starting
        from n to the end are removed and k is the number of eigenvectors/values
        to return for the new matrix. We could generalise this to delete a given
        list of rows/cols.
        """
        #logging.debug("< eigenRemove >")
        Parameter.checkClass(omega, numpy.ndarray)
        Parameter.checkClass(Q, numpy.ndarray)
        Parameter.checkInt(k, 0, float('inf'))
        Parameter.checkInt(n, 0, Q.shape[0])
        if omega.ndim != 1:
            raise ValueError("omega must be 1-d array")
        if omega.shape[0] != Q.shape[1]:
            raise ValueError("Must have same number of eigenvalues and eigenvectors")

        if __debug__:
            Parameter.checkOrthogonal(Q, tol=EigenUpdater.tol, softCheck=True, arrayInfo="input Q in eigenRemove()")

        inds = numpy.flipud(numpy.argsort(numpy.abs(omega)))
        inds = inds[omega[inds]>EigenUpdater.tol]
        
        omega, Q = Util.indEig(omega, Q, inds[0:k])
        AB = (Q[0:n, :]*omega).dot(Q[n:, :].T)
        BB = (Q[n:, :]*omega).dot(Q[n:, :].T)

        p = BB.shape[0]
        Y1 = numpy.r_[numpy.zeros((n, p)), numpy.eye(p)]
        Y2 = -numpy.r_[AB, 0.5*BB]
        pi, V = EigenUpdater.eigenAdd2(omega, Q, Y1, Y2, k)

        #check last rows are zero
        if numpy.linalg.norm(V[n:, :]) >= EigenUpdater.tol:
            logging.warn("numpy.linalg.norm(V[n:, :])= %s" % str(numpy.linalg.norm(V[n:, :])))

        #logging.debug("</ eigenRemove >")
        if not debug:
            return pi, V[0:n, :]
        else:
            C = (Q*omega).dot(Q.T)
            K = C + Y1.dot(Y2.T) + Y2.dot(Y1.T)
            assert numpy.linalg.norm(BB- C[n:, n:]) <= EigenUpdater.tol
            assert numpy.linalg.norm(AB - C[0:n, n:]) <= EigenUpdater.tol, "%s \n %s" % (AB, C[0:n, n:])
            return pi, V[0:n, :], K, Y1, Y2, omega
예제 #25
0
    def evaluateStratifiedCv(self, X, y, folds, metricMethod=Evaluator.binaryError):
        """
        Compute the stratified cross validation according to a given metric.
        """
        try:
            from sklearn.cross_validation import StratifiedKFold
            Parameter.checkInt(folds, 2, float('inf'))
            idx = StratifiedKFold(y, folds)
            metrics = AbstractPredictor.evaluateLearn(X, y, idx, self.learnModel, self.predict, metricMethod)

            mean = numpy.mean(metrics, 0)
            var = numpy.var(metrics, 0)

            return (mean, var)

        except ImportError:
            logging.warn("Failed to import scikits")
            raise 
예제 #26
0
    def evaluateCvOuter(self, X, y, folds):
        """
        Computer the average AUC using k-fold cross validation and the linear kernel.
        """
        Parameter.checkInt(folds, 2, float('inf'))
        idx = StratifiedKFold(y, folds)
        metricMethods = [Evaluator.auc2, Evaluator.roc]
        trainMetrics, testMetrics = AbstractPredictor.evaluateLearn2(X, y, idx, self.modelSelect, self.predict, metricMethods)

        bestTrainAUCs = trainMetrics[0]
        bestTrainROCs = trainMetrics[1]
        bestTestAUCs = testMetrics[0]
        bestTestROCs = testMetrics[1]

        bestParams = {}
        bestMetaDicts = {}
        allMetrics = [bestTrainAUCs, bestTrainROCs, bestTestAUCs, bestTestROCs]

        return (bestParams, allMetrics, bestMetaDicts)
예제 #27
0
    def predictEdges(self, vertexIndices):
        """
        Make prediction for all posible edges given the vertex indices 
        """
        Parameter.checkInt(self.graph.getVertexList().getNumFeatures(), 1, float('inf'))
        logging.info("Making prediction over " + str(vertexIndices.shape[0]) + " vertices")

        testX = self.graph.getVertexList().getVertices(vertexIndices)
        testW = self.egoRegressor.predict(testX)

        #Output scores of resulting vertices
        V = self.graph.getVertexList().getVertices(list(range(0, self.graph.getNumVertices())))
        P = numpy.zeros((vertexIndices.shape[0], self.windowSize))
        S = numpy.zeros((vertexIndices.shape[0], self.windowSize))

        for i in range(testX.shape[0]):
            scores = numpy.dot(V, testW[i, :])
            P[i, :], S[i, :] = self.indicesFromScores(vertexIndices[i], scores) 

        return P, S
예제 #28
0
    def learnModel(self, X, Y):
        """
        Learn the weight matrix which matches X and Y.
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(Y, numpy.ndarray)
        Parameter.checkInt(X.shape[0], 1, float('inf'))
        Parameter.checkInt(X.shape[1], 1, float('inf'))

        self.pdcca = PrimalDualCCA(self.kernel, self.tau1, self.tau2)
        alpha, V, lmbdas = self.pdcca.learnModel(X, Y)

        a = 10**-5
        I = numpy.eye(V.shape[0])
        VV = numpy.dot(V, V.T) + a*I

        self.A = Util.mdot(alpha, V.T, numpy.linalg.inv(VV))
        self.X = X

        return self.A
예제 #29
0
    def lazyEigenConcatAsUpdate(omega, Q, AB, BB, k, debug= False):
        """
        Find the eigen update of a matrix [A, B]'[A B] where
        A'A = Q diag(omega) Q* and AB = A*B, BB = B*B. Q is the set of
        eigenvectors of A*A and omega is the vector of eigenvalues.
        
        Simply expand Q, and update the eigen decomposition using EigenAdd2.
        Computation could be upgraded a bit because of the particular update
        type (Y1Bar = Y1 = [0,I]',  Y2Bar = [(I-QQ')A'B, 0]').
        """
        #logging.debug("< lazyEigenConcatAsUpdate >")
        Parameter.checkClass(omega, numpy.ndarray)
        Parameter.checkClass(Q, numpy.ndarray)
        Parameter.checkClass(AB, numpy.ndarray)
        Parameter.checkClass(BB, numpy.ndarray)
        Parameter.checkInt(k, 0, AB.shape[0] + BB.shape[0])
        if not numpy.isrealobj(omega) or not numpy.isrealobj(Q):
            logging.info("Eigenvalues or eigenvectors are not real")
        if not numpy.isrealobj(AB) or not numpy.isrealobj(BB):
            logging.info("AB or BB are not real")
        if omega.ndim != 1:
            raise ValueError("omega must be 1-d array")
        if omega.shape[0] != Q.shape[1]:
            raise ValueError("Must have same number of eigenvalues and eigenvectors")
        if Q.shape[0] != AB.shape[0]:
            raise ValueError("Q must have the same number of rows as AB")
        if AB.shape[1] != BB.shape[0] or  BB.shape[0]!=BB.shape[1]:
            raise ValueError("AB must have the same number of cols/rows as BB")

        if __debug__:
            if not Parameter.checkOrthogonal(Q, tol=EigenUpdater.tol, softCheck=True, investigate=True, arrayInfo="input Q in lazyEigenConcatAsUpdate()"):
                print("omega:\n", omega)


        m = Q.shape[0]
        p = BB.shape[0]
        
        Q = numpy.r_[Q, numpy.zeros((p, Q.shape[1]))]
        Y1 = numpy.r_[numpy.zeros((m,p)), numpy.eye(p)]
        Y2 = numpy.r_[AB, 0.5*BB]
        return EigenUpdater.eigenAdd2(omega, Q, Y1, Y2, k, debug=debug)
    def __init__(self, k1, k2=20, k3=100, k4=100, alg="exact", T=10, computeBound=False, logStep=1, computeSinTheta=False):
        """
        Intialise this object with integer k1 which is the number of clusters to
        find, and k2 which is the maximum rank of the approximation of the shift
        Laplacian. When using the Nystrom approximation k3 is the number of row/cols
        used to find the approximate eigenvalues. 
        
        :param k1: The number of clusters 
        
        :param k2: The number of eigenvectors to keep for IASC 
        
        :param k3: The number of columns to sample for Nystrom approximation 
        
        :param k4: The number of random projections to use with randomised SVD 
        
        :param alg: The algorithm to use: "exact", "IASC", "nystrom", "randomisedSvd" or "efficientNystrom" clustering
        
        :param T: The number of iterations before eigenvectors are recomputed in IASC 
        """
        Parameter.checkInt(k1, 1, float('inf'))
        Parameter.checkInt(k2, 1, float('inf'))
        Parameter.checkInt(k3, 1, float('inf'))
        Parameter.checkInt(k4, 1, float('inf'))
        Parameter.checkInt(T, 1, float('inf'))
        
        if alg not in ["exact", "IASC", "nystrom", "efficientNystrom", "randomisedSvd"]: 
            raise ValueError("Invalid algorithm : " + str(alg))

        self.k1 = k1
        self.k2 = k2
        self.k3 = k3
        self.k4 = k4
        self.T = T
        
        logging.debug("IterativeSpectralClustering(" + str((k1, k2, k3, k4, T)) + ") with algorithm " + alg)

        self.nb_iter_kmeans = 100
        self.alg = alg
        self.computeBound = computeBound 
        self.computeSinTheta = computeSinTheta 
        self.logStep = logStep
예제 #31
0
 def setK(self, k):
     Parameter.checkInt(k, 1, float('inf'))
     
     self.k = k 
예제 #32
0
 def setMaxIter(self, maxIter):
     Parameter.checkInt(maxIter, 1, float("inf"))
     maxIter = maxIter
예제 #33
0
 def setIterations(self, iterations):
     Parameter.checkInt(iterations, 0, float('inf'))
     self.iterations = iterations
예제 #34
0
 def setLearners(self, learners):
     Parameter.checkInt(learners, 0, float('inf'))
     self.learners = learners
예제 #35
0
 def learnModel(self, graph):
     Parameter.checkInt(self.windowSize, 1, graph.getNumVertices())
     self.graph = graph
예제 #36
0
 def setSampleSize(self, sampleSize):
     Parameter.checkInt(sampleSize, 1, float("inf"))
     self.sampleSize = sampleSize
예제 #37
0
 def setNumTrees(self, numTrees):
     Parameter.checkInt(numTrees, 1, float('inf'))
     self.numTrees = numTrees
예제 #38
0
 def setMaxDepth(self, maxDepth):
     Parameter.checkInt(maxDepth, 1, float('inf'))
     self.maxDepth = maxDepth
예제 #39
0
 def setRank(self, rank):
     Parameter.checkInt(rank, 1, float("inf"))
     rank = self.rank
예제 #40
0
 def setMinSplit(self, minSplit):
     Parameter.checkInt(minSplit, 0, float('inf'))
     self.minSplit = minSplit
예제 #41
0
 def setPruneCV(self, folds):
     Parameter.checkInt(folds, 1, float("inf"))
     self.folds = folds
예제 #42
0
    def eigWeight(W, m, k, orthogonalise=True):
        """
        Find the k largest eigenvectors and eigenvalues of M = D^-1/2 W D^-1/2 using a weight 
        matrix. This is the same as I - L where L is the normalised Laplacian. 
        
        :param W: A sparse weight matrix 
        
        :param m: The number of columns to sample 
        
        :param k: The number of eigenvectors/eigenvalues to find. 
        
        :param orthogonalise: Whether the orthogonalise the final eigenvectors 
        """
        Parameter.checkInt(k, 1, W.shape[0])
        #This constraint is due to MStar being rank m and only being able to find m-1 eigenvalues
        m = min(W.shape[0], m)
        Parameter.checkInt(m, k + 1, W.shape[0])

        if isinstance(m, int):
            inds = numpy.sort(numpy.random.permutation(W.shape[0])[0:m])
        else:
            inds = m

        W11 = W[:, inds][inds, :]
        dStar = numpy.array(W11.sum(0)).ravel()
        dStar[dStar != 0] = dStar[dStar != 0]**-0.5
        DStar = scipy.sparse.spdiags(dStar,
                                     0,
                                     dStar.shape[0],
                                     dStar.shape[0],
                                     format='csr')

        MStar = DStar.dot(W11).dot(DStar)

        lmbda, V = scipy.sparse.linalg.eigsh(MStar,
                                             min(k, MStar.shape[0] - 1),
                                             which="LM",
                                             ncv=min(10 * k, MStar.shape[0]))

        Lmbda = scipy.sparse.spdiags(lmbda, 0, k, k, format='csr')
        InvLmbda = scipy.sparse.spdiags(lmbda**-1, 0, k, k, format='csr')
        V = scipy.sparse.csr_matrix(V)
        B = DStar.dot(V).dot(InvLmbda)

        Q = W[:, inds].dot(B)
        dHat = numpy.array((Q.dot(Lmbda).dot(Q.sum(0).transpose()))).ravel()
        #Note that W[:, inds] may have all zero rows (even when full W doesn't) and hence
        #Q can have zero columns meaning dHat can have zero elements and DHat is no longer valid.
        #There is no answer to this in the paper

        DHat = scipy.sparse.spdiags(dHat**-0.5,
                                    0,
                                    dHat.shape[0],
                                    dHat.shape[0],
                                    format='csr')

        U = DHat.dot(Q)
        U = numpy.asarray(U.todense())

        if not orthogonalise:
            return lmbda, U
        else:
            return EfficientNystrom.orthogonalise(lmbda, U)
예제 #43
0
 def setGamma(self, gamma):
     """
     Gamma is an upper bound on the number of nodes in the tree. 
     """
     Parameter.checkInt(gamma, 1, float("inf"))
     self.gamma = gamma
예제 #44
0
    def evaluateCvOuter(self, X, Y, folds, leafRank, innerFolds=3):
        """
        Run model selection and output some ROC curves. In this case Y is a 1D array. 
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(Y, numpy.ndarray)
        Parameter.checkInt(folds, 2, float('inf'))
        if Y.ndim != 1:
            raise ValueError("Expecting Y to be 1D")

        indexList = cross_val.StratifiedKFold(Y, folds)

        maxDepths = numpy.flipud(numpy.arange(1, 12, 1))
        if leafRank == self.getTreeRankLib().LRforest:
            varSplits = numpy.arange(0.6, 1.01, 0.2)
        else:
            varSplits = numpy.array([1])
        #According to Nicolas nfcv>1 doesn't help
        nfcvs = [1]
        #This is tied in with depth 
        mincrit = 0.00
        #If minsplit is too low sometimes get a node with no positive labels
        minSplits = numpy.array([50])

        self.setLeafRank(leafRank)

        bestParams = []
        bestTrainAUCs = numpy.zeros(folds)
        bestTrainROCs = []
        bestTestAUCs = numpy.zeros(folds)
        bestTestROCs = []
        bestMetaDicts = []
        i = 0 

        for trainInds, testInds in indexList:
            trainX, trainY = X[trainInds, :], Y[trainInds]
            testX, testY = X[testInds, :], Y[testInds]

            meanParamAUCs = []
            paramList = [] 

            logging.debug("Distribution of labels in train: " + str(numpy.bincount(trainY)))
            logging.debug("Distribution of labels in test: " + str(numpy.bincount(testY)))

            for varSplit in varSplits:
                for nfcv in nfcvs:
                    for minSplit in minSplits:

                        self.setMaxDepth(maxDepths[0])
                        self.setVarSplit(varSplit)
                        self.setNfcv(nfcv)
                        self.setMinSplit(minSplit)
                        logging.debug(self)
                        idx = cross_val.StratifiedKFold(trainY, innerFolds)

                        j = 0
                        metrics = numpy.zeros((len(idx), maxDepths.shape[0]))

                        for idxtr, idxts in idx:
                            Util.printIteration(j, 1, innerFolds)

                            innerTrainX, innerTestX = trainX[idxtr, :], trainX[idxts, :]
                            innerTrainY, innerTestY = trainY[idxtr], trainY[idxts]

                            self.learnModel(innerTrainX, innerTrainY)

                            for k in range(maxDepths.shape[0]):
                                maxDepth = maxDepths[k]

                                robjects.globalenv["maxDepth"] = maxDepth
                                robjects.globalenv["tree"] = self.tree
                                nodeList = robjects.r('tree$nodes[tree$depth>=maxDepth]')
                                self.tree = self.treeRankLib.subTreeRank(self.tree, nodeList)

                                predY = self.predict(innerTestX)
                                gc.collect()

                                metrics[j, k] = Evaluator.auc(predY, innerTestY)
                                
                            j += 1

                        meanAUC = numpy.mean(metrics, 0)
                        varAUC = numpy.var(metrics, 0)
                        logging.warn(self.baseLib.warnings())
                        logging.debug("Mean AUCs and variances at each depth " + str((meanAUC, varAUC)))

                        for k in range(maxDepths.shape[0]):
                            maxDepth = maxDepths[k]
                            meanParamAUCs.append(meanAUC[k])
                            paramList.append((maxDepth, varSplit, nfcv, minSplit))

                        #Try to get some memory back
                        gc.collect()
                        robjects.r('gc(verbose=TRUE)')
                        robjects.r('memory.profile()')

                        #print(self.hp.heap())

            #Now choose best params
            bestInd = numpy.argmax(numpy.array(meanParamAUCs))

            self.setMaxDepth(paramList[bestInd][0])
            self.setVarSplit(paramList[bestInd][1])
            self.setNfcv(paramList[bestInd][2])
            self.setMinSplit(paramList[bestInd][3])

            self.learnModel(trainX, trainY)
            predTrainY = self.predict(trainX)
            predTestY = self.predict(testX)
            bestTrainAUCs[i] = Evaluator.auc(predTrainY, trainY)
            bestTestAUCs[i] = Evaluator.auc(predTestY, testY)

            #Store the parameters and ROC curves
            bestParams.append(paramList[bestInd])
            bestTrainROCs.append(Evaluator.roc(trainY, predTrainY))
            bestTestROCs.append(Evaluator.roc(testY, predTestY))

            metaDict = {}
            metaDict["size"] = self.getTreeSize()
            metaDict["depth"] = self.getTreeDepth()
            bestMetaDicts.append(metaDict)

            i += 1

        allMetrics = [bestTrainAUCs, bestTrainROCs, bestTestAUCs, bestTestROCs]

        return (bestParams, allMetrics, bestMetaDicts)
예제 #45
0
    def eigenAdd2(omega, Q, Y1, Y2, k, debug= False):
        """
        Compute an approximation of the eigendecomposition A^*A + Y1Y2^* +Y2Y1^*
        in which Y1, Y2 are low rank matrices, Y1^*Y2=0 and A^*A = Q Omega Q*. We 
        use the rank-k approximation of A^*A: Q_k Omega_k Q_k^* and then find
        [A^*A_k + Y1Y2^* + Y2Y1^*]. If debug=False then pi, V are returned which 
        respectively correspond to all the eigenvalues/eigenvectors of 
        [A^*A_k + Y1Y2^* + Y2Y1^*]. 
        """
        #logging.debug("< eigenAdd2 >")
        Parameter.checkInt(k, 0, float('inf'))
        Parameter.checkClass(omega, numpy.ndarray)
        Parameter.checkClass(Q, numpy.ndarray)
        Parameter.checkClass(Y1, numpy.ndarray)
        Parameter.checkClass(Y2, numpy.ndarray)
        if not numpy.isrealobj(omega) or not numpy.isrealobj(Q):
            logging.warn("Eigenvalues or eigenvectors are not real")
        if not numpy.isrealobj(Y1) or not numpy.isrealobj(Y2):
            logging.warn("Y1 or Y2 are not real")
        if omega.ndim != 1:
            raise ValueError("omega must be 1-d array")
        if omega.shape[0] != Q.shape[1]:
            raise ValueError("Must have same number of eigenvalues and eigenvectors")
        if Q.shape[0] != Y1.shape[0]:
            raise ValueError("Q must have the same number of rows as Y1 rows")
        if Q.shape[0] != Y2.shape[0]:
            raise ValueError("Q must have the same number of rows as Y2 rows")
        if Y1.shape[1] != Y2.shape[1]:
            raise ValueError("Y1 must have the same number of columns as Y2 columns")

        if __debug__:
            Parameter.checkArray(omega, softCheck=True, arrayInfo="omega as input in eigenAdd2()")
            Parameter.checkArray(Q, softCheck=True, arrayInfo="Q as input in eigenAdd2()")
            Parameter.checkOrthogonal(Q, tol=EigenUpdater.tol, softCheck=True, arrayInfo="Q as input in eigenAdd2()")
            Parameter.checkArray(Y1, softCheck=True, arrayInfo="Y1 as input in eigenAdd2()")
            Parameter.checkArray(Y2, softCheck=True, arrayInfo="Y2 as input in eigenAdd2()")
            


        #Get first k eigenvectors/values of A^*A
        omega, Q = Util.indEig(omega, Q, numpy.flipud(numpy.argsort(omega))[0:k])

        QY1 = Q.conj().T.dot(Y1)
        Y1bar = Y1 - Q.dot(QY1)

        P1bar, sigma1Bar, Q1bar = Util.safeSvd(Y1bar)
        inds = numpy.arange(sigma1Bar.shape[0])[numpy.abs(sigma1Bar)>EigenUpdater.tol]
        P1bar, sigma1Bar, Q1bar = Util.indSvd(P1bar, sigma1Bar, Q1bar, inds)
        # checks on SVD decomposition of Y1bar
        if __debug__:
            Parameter.checkArray(QY1, softCheck=True, arrayInfo="QY1 in eigenAdd2()")
            Parameter.checkArray(Y1bar, softCheck=True, arrayInfo="Y1bar in eigenAdd2()")
            Parameter.checkArray(P1bar, softCheck=True, arrayInfo="P1bar in eigenAdd2()")
            if not Parameter.checkOrthogonal(P1bar, tol=EigenUpdater.tol, softCheck=True, arrayInfo="P1bar in eigenAdd2()", investigate=True):
                print ("corresponding sigma: ", sigma1Bar)
            Parameter.checkArray(sigma1Bar, softCheck=True, arrayInfo="sigma1Bar in eigenAdd2()")
            Parameter.checkArray(Q1bar, softCheck=True, arrayInfo="Q1bar in eigenAdd2()")
            if not Parameter.checkOrthogonal(Q1bar, tol=EigenUpdater.tol, softCheck=True, arrayInfo="Q1bar in eigenAdd2()"):
                print ("corresponding sigma: ", sigma1Bar)

        del Y1bar

        P1barY2 = P1bar.conj().T.dot(Y2)
        QY2 = Q.conj().T.dot(Y2)
        Y2bar = Y2 - Q.dot(QY2) - P1bar.dot(P1barY2)
        
        P2bar, sigma2Bar, Q2bar = Util.safeSvd(Y2bar)
        inds = numpy.arange(sigma2Bar.shape[0])[numpy.abs(sigma2Bar)>EigenUpdater.tol]
        P2bar, sigma2Bar, Q2bar = Util.indSvd(P2bar, sigma2Bar, Q2bar, inds)
        # checks on SVD decomposition of Y1bar
        if __debug__:
            Parameter.checkArray(P1barY2, softCheck=True, arrayInfo="P1barY2 in eigenAdd2()")
            Parameter.checkArray(QY2, softCheck=True, arrayInfo="QY2 in eigenAdd2()")
            Parameter.checkArray(Y2bar, softCheck=True, arrayInfo="Y2bar in eigenAdd2()")
            Parameter.checkArray(P2bar, softCheck=True, arrayInfo="P2bar in eigenAdd2()")
            Parameter.checkOrthogonal(P2bar, tol=EigenUpdater.tol, softCheck=True, arrayInfo="P2bar in eigenAdd2()")
            Parameter.checkArray(sigma2Bar, softCheck=True, arrayInfo="sigma2Bar in eigenAdd2()")
            Parameter.checkArray(Q2bar, softCheck=True, arrayInfo="Q2bar in eigenAdd2()")
            Parameter.checkOrthogonal(Q2bar, tol=EigenUpdater.tol, softCheck=True, arrayInfo="Q2bar in eigenAdd2()")

        del Y2bar 

        r = omega.shape[0]
        p = Y1.shape[1]
        p1 = sigma1Bar.shape[0]
        p2 = sigma2Bar.shape[0]

        D = numpy.c_[Q, P1bar, P2bar]
        del P1bar
        del P2bar 
        # rem: A*s = A.dot(diag(s)) ; A*s[:,new] = diag(s).dot(A)
        DStarY1 = numpy.r_[QY1, sigma1Bar[:,numpy.newaxis] * Q1bar.conj().T, numpy.zeros((p2, p))]
        DStarY2 = numpy.r_[QY2, P1barY2, sigma2Bar[:,numpy.newaxis] * Q2bar.conj().T]
        DStarY1Y2StarD = DStarY1.dot(DStarY2.conj().T)

        del DStarY1
        del DStarY2
        
        r = omega.shape[0]
        F = numpy.zeros((r+p1+p2, r+p1+p2))
        F[range(r),range(r)] = omega
        F = F + DStarY1Y2StarD + DStarY1Y2StarD.conj().T

        #A check to make sure DFD^T is AA_k + Y1Y2 + Y2Y1
        #assert numpy.linalg.norm(D.dot(F).dot(D.T) - Q.dot(numpy.diag(omega).dot(Q.T)) - Y1.dot(Y2.T) - Y2.dot(Y1.T)) < 10**-6
        
        # checks on F
        if __debug__:
            #Parameter.checkArray(DStarY1, softCheck=True, arrayInfo="DStarY1 in eigenAdd2()")
            #Parameter.checkArray(DStarY2, softCheck=True, arrayInfo="DStarY2 in eigenAdd2()")
            Parameter.checkArray(DStarY1Y2StarD, softCheck=True, arrayInfo="DStarY1Y2StarD in eigenAdd2()")
            Parameter.checkArray(F, softCheck=True, arrayInfo="F in eigenAdd2()")
            Parameter.checkSymmetric(F, tol=EigenUpdater.tol, softCheck=True, arrayInfo="F in eigenAdd2()")

        pi, H = scipy.linalg.eigh(F)
        # remove too small eigenvalues
        pi, H = Util.indEig(pi, H, numpy.arange(pi.shape[0])[numpy.abs(pi)>EigenUpdater.tol])
        # keep greatest eigenvalues
        #pi, H = Util.indEig(pi, H, numpy.flipud(numpy.argsort(pi))[:min(k,pi.shape[0])])


        V = D.dot(H)

        if __debug__:
            if not Parameter.checkOrthogonal(D, tol=EigenUpdater.tol, softCheck=True, investigate=True, arrayInfo="D in eigenAdd2()"):
                print("pi:\n", pi)
            if not Parameter.checkOrthogonal(H, tol=EigenUpdater.tol, softCheck=True, investigate=True, arrayInfo="H in eigenAdd2()"):
                print("pi:\n", pi)

        if ProfileUtils.memory() > 10**9:
            ProfileUtils.memDisplay(locals())
            
        #logging.debug("</ eigenAdd2 >")
        if debug:
            return pi, V, D, DStarY1Y2StarD + DStarY1Y2StarD.conj().T
        else:
            return pi, V
예제 #46
0
 def __init__(self, k):
     """
     Intialise this object with integer k which is the number of clusters.
     """
     Parameter.checkInt(k, 0, float('inf'))
     self.k = k