예제 #1
0
 def setEll(self, ell):
     """
     :param ell: the initial number of vertices.
     :type ell: :class:`int`
     """
     Parameter.checkInt(ell, 2, float('inf'))
     self.ell = ell
    def cut(self, d):
        """
        Return a new tree containing all the vertices of the current one up to
        a depth of d. The edge and vertex labels are copied by reference only. 

        :param d: The depth of the new cut tree
        :type d: :class:`int`
        """
        Parameter.checkInt(d, 0, float("inf"))

        root = self.getRootId()
        newTree = DictTree()
        stack = [(root, 0)]

        newTree.setVertex(root)

        while(len(stack) != 0):
            (vertexId, depth) = stack.pop()
            neighbours = self.neighbours(vertexId)

            if depth <= d:
                newTree.setVertex(vertexId, self.getVertex(vertexId))

            for neighbour in neighbours:
                stack.append((neighbour, depth+1))

                if depth+1 <= d:
                    newTree.addEdge(vertexId, neighbour, self.getEdge(vertexId, neighbour))

        return newTree
예제 #3
0
 def setM(self, m):
     """
     :param m: the number of edges to be added at each step
     :type m: :class:`int`
     """
     Parameter.checkInt(m, 0, self.ell)
     self.m = m
 def setBestResponse(self, bestResponse):
     """
     :param bestResponse: the label corresponding to "positive"
     :type bestResponse: :class:`int`
     """
     Parameter.checkInt(bestResponse, -float('inf'), float('inf'))
     self.bestResponse = bestResponse
 def setM(self, m):
     """
     :param m: the number of edges to be added at each step
     :type m: :class:`int`
     """
     Parameter.checkInt(m, 0, self.ell)
     self.m = m
예제 #6
0
    def evaluateCvOuter(self, X, y, folds):
        """
        Computer the average AUC using k-fold cross validation and the linear kernel. 
        """
        Parameter.checkInt(folds, 2, float('inf'))
        idx = cross_val.StratifiedKFold(y, folds)
        metricMethods = [Evaluator.auc2, Evaluator.roc]

        if self.kernel == "linear":
            logging.debug("Running linear rank SVM ")
            trainMetrics, testMetrics = AbstractPredictor.evaluateLearn2(X, y, idx, self.modelSelectLinear, self.predict, metricMethods)
        elif self.kernel == "rbf":
            logging.debug("Running RBF rank SVM")
            trainMetrics, testMetrics = AbstractPredictor.evaluateLearn2(X, y, idx, self.modelSelectRBF, self.predict, metricMethods)

        bestTrainAUCs = trainMetrics[0]
        bestTrainROCs = trainMetrics[1]
        bestTestAUCs = testMetrics[0]
        bestTestROCs = testMetrics[1]

        bestParams = {}
        bestMetaDicts = {}
        allMetrics = [bestTrainAUCs, bestTrainROCs, bestTestAUCs, bestTestROCs]

        return (bestParams, allMetrics, bestMetaDicts)
예제 #7
0
    def cut(self, d):
        """
        Return a new tree containing all the vertices of the current one up to
        a depth of d. The edge and vertex labels are copied by reference only. 

        :param d: The depth of the new cut tree
        :type d: :class:`int`
        """
        Parameter.checkInt(d, 0, float("inf"))

        root = self.getRootId()
        newTree = DictTree()
        stack = [(root, 0)]

        newTree.setVertex(root)

        while (len(stack) != 0):
            (vertexId, depth) = stack.pop()
            neighbours = self.neighbours(vertexId)

            if depth <= d:
                newTree.setVertex(vertexId, self.getVertex(vertexId))

            for neighbour in neighbours:
                stack.append((neighbour, depth + 1))

                if depth + 1 <= d:
                    newTree.addEdge(vertexId, neighbour,
                                    self.getEdge(vertexId, neighbour))

        return newTree
예제 #8
0
    def shuffleSplit(repetitions, numExamples, trainProportion=None):
        """
        Random permutation cross-validation iterator. The training set is sampled
        without replacement and of size (repetitions-1)/repetitions of the examples,
        and the test set represents the remaining examples. Each repetition is
        sampled independently.

        :param repetitions: The number of repetitions to perform.
        :type repetitions: :class:`int`

        :param numExamples: The number of examples.
        :type numExamples: :class:`int`

        :param trainProp: The size of the training set relative to numExamples, between 0 and 1 or None to use (repetitions-1)/repetitions
        :type trainProp: :class:`int`
        """
        Parameter.checkInt(numExamples, 2, float('inf'))
        Parameter.checkInt(repetitions, 1, float('inf'))
        if trainProportion != None:
            Parameter.checkFloat(trainProportion, 0.0, 1.0)

        if trainProportion == None:
            trainSize = (repetitions-1)*numExamples/repetitions
        else:
            trainSize = trainProportion*numExamples

        idx = [] 
        for i in range(repetitions):
            inds = numpy.random.permutation(numExamples)
            trainInds = inds[0:trainSize]
            testInds = inds[trainSize:]
            idx.append((trainInds, testInds))
        return idx 
예제 #9
0
파일: Latex.py 프로젝트: omosola/APGL
    def array1DToRow(X, precision=3):
        """
        Take a 1D numpy array and print in latex table row format i.e. x1 & x2 .. xn

        :param X: The array to print
        :type X: :class:`ndarray`

        :param precision: The precision of the printed floating point numbers.
        :type precision: :class:`int`
        """
        Parameter.checkInt(precision, 0, 10)
        if X.ndim != 1:
            raise ValueError("Array must be one dimensional")

        n = X.shape[0]
        outputStr = ""

        if X.dtype == float:
            fmtStr = "%." + str(precision) + "f & "
            endFmtStr = "%." + str(precision) + "f"
        else:
            fmtStr = "%d & "
            endFmtStr = "%d"

        for i in range(0, n):
            if i != n-1:
                outputStr += fmtStr % X[i]
            else:
                outputStr += endFmtStr % X[i]

        return outputStr
예제 #10
0
 def svd(A, k, q=2): 
     """
     Compute the SVD of a sparse or dense matrix A, finding the first k 
     singular vectors/values, using exponent q. Returns the left and right singular 
     vectors, and the singular values. The resulting matrix can be approximated 
     using A ~ U s V.T. 
     """
     Parameter.checkInt(k, 1, float("inf"))
     Parameter.checkInt(q, 1, float("inf"))        
     
     n = A.shape[0]
     omega = numpy.random.randn(n, k)
     Y = A.dot(omega)
     
     for i in range(q): 
         Y = A.T.dot(Y)
         Y = A.dot(Y)
     
     Q, R = numpy.linalg.qr(Y)
     B = A.T.dot(Q).T   
     U, s, V = numpy.linalg.svd(B, full_matrices=False)
     V = V.T
     U = Q.dot(U)
     
     return U, s, V 
예제 #11
0
    def learnModel(self, X, Y):
        """
        Learn the weight matrix which matches X and Y.
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(Y, numpy.ndarray)
        Parameter.checkInt(X.shape[0], 1, float('inf'))
        Parameter.checkInt(X.shape[1], 1, float('inf'))

        numExamples = X.shape[0]
        numFeatures = X.shape[1]

        logging.debug("Training with " + str(numExamples) + " examples and " + str(numFeatures) + " features")
        
        I = numpy.eye(numFeatures)
        XX = numpy.dot(X.T, X)
        XY = numpy.dot(X.T, Y)
        
        invXX = numpy.linalg.inv(XX + self.lmbda*I)

        self.U = numpy.dot(invXX, XY)
        logging.debug("Trace of X'X " + str(numpy.trace(XX)))
        logging.debug("Error " + str(numpy.linalg.norm(numpy.dot(X, self.U) - Y))) 

        return self.U
 def setEll(self, ell):
     """
     :param ell: the initial number of vertices.
     :type ell: :class:`int`
     """
     Parameter.checkInt(ell, 2, float("inf"))
     self.ell = ell
예제 #13
0
    def randCrossValidation(folds, numExamples):
        """
        Returns a list of tuples (trainIndices, testIndices) using k-fold cross
        validation. In this case we randomise the indices and then split into 
        folds. 

        :param folds: The number of cross validation folds.
        :type folds: :class:`int`

        :param numExamples: The number of examples.
        :type numExamples: :class:`int`
        """
        Parameter.checkInt(folds, 1, numExamples)
        Parameter.checkInt(numExamples, 2, float('inf'))

        foldSize = float(numExamples)/folds
        indexList = []

        inds = numpy.random.permutation(numExamples)

        for i in range(0, folds):
            testIndices = inds[int(foldSize*i): int(foldSize*(i+1))]
            trainIndices = numpy.setdiff1d(numpy.arange(0, numExamples), testIndices)
            indexList.append((trainIndices, testIndices))

        return indexList 
예제 #14
0
    def setRandomInfected(self, numInitialInfected, proportionHetero, t=0.0):
        """
        Pick a number of people randomly to be infected at time t. Of that set 
        proportionHetero are selected to be heterosexual and min((1-proportionHetero), totalBi)
        are bisexual. 
        """
        Parameter.checkInt(numInitialInfected, 0, self.size)
        Parameter.checkFloat(proportionHetero, 0.0, 1.0)
        
        heteroInds = numpy.arange(self.size)[self.vlist.V[:, HIVVertices.orientationIndex] == HIVVertices.hetero]
        biInds = numpy.arange(self.size)[self.vlist.V[:, HIVVertices.orientationIndex] == HIVVertices.bi]
        
        numHetero = int(numInitialInfected*proportionHetero) 
        numBi = numInitialInfected-numHetero

        heteroInfectInds = numpy.random.permutation(heteroInds.shape[0])[0:numHetero]
        biInfectInds = numpy.random.permutation(biInds.shape[0])[0:numBi]

        for i in heteroInfectInds:
            j = heteroInds[i]
            self.vlist.setInfected(j, t)
            
        for i in biInfectInds:
            j = biInds[i]
            self.vlist.setInfected(j, t)
예제 #15
0
 def setNumTrees(self, numTrees):
     """
     :param numTrees: The number of trees to generate in the forest.
     :type numTrees: :class:`int`
     """
     Parameter.checkInt(numTrees, 1, float('inf'))
     self.numTrees = numTrees
    def generateGraph(self, alpha, p, dim):
        Parameter.checkFloat(alpha, 0.0, float('inf'))
        Parameter.checkFloat(p, 0.0, 1.0)
        Parameter.checkInt(dim, 0, float('inf'))
        
        numVertices = self.graph.getNumVertices()
        self.X = numpy.random.rand(numVertices, dim)

        D = KernelUtils.computeDistanceMatrix(numpy.dot(self.X, self.X.T))
        P = numpy.exp(-alpha * D)
        diagIndices = numpy.array(list(range(0, numVertices)))
        P[(diagIndices, diagIndices)] = numpy.zeros(numVertices)

        B = numpy.random.rand(numVertices, numVertices) <= P 

        #Note that B is symmetric - could just go through e.g. upper triangle 
        for i in range(numpy.nonzero(B)[0].shape[0]):
            v1 = numpy.nonzero(B)[0][i]
            v2 = numpy.nonzero(B)[1][i]
            
            self.graph.addEdge(v1, v2)

        erdosRenyiGenerator = ErdosRenyiGenerator(p)
        self.graph = erdosRenyiGenerator.generate(self.graph, False)

        return self.graph
예제 #17
0
    def bootstrap2(repetitions, numExamples):
        """
        Perform 0.632 bootstrap in whcih we take a sample with replacement from
        the dataset of size numExamples. The examples not present in the training
        set are used to form the test set. We oversample the test set to include
        0.368 of the examples from the training set. Returns a list of tuples of the form
        (trainIndices, testIndices).

        :param repetitions: The number of repetitions of bootstrap to perform.
        :type repetitions: :class:`int`

        :param numExamples: The number of examples.
        :type numExamples: :class:`int`

        """
        Parameter.checkInt(numExamples, 2, float('inf'))
        Parameter.checkInt(repetitions, 1, float('inf'))

        inds = []
        for i in range(repetitions):
            trainInds = numpy.random.randint(numExamples, size=numExamples)
            testInds = numpy.setdiff1d(numpy.arange(numExamples), numpy.unique(trainInds))
            #testInds = numpy.r_[testInds, trainInds[0:(numExamples*0.368)]]

            inds.append((trainInds, testInds))

        return inds
예제 #18
0
    def predictEdges(self, vertexIndices):
        """
        This makes a prediction for a series of edges using the following score
        \sum_z \in n(x) \cup n(y) = 1/|log(n(z)|
        Returns a matrix with rows are a ranked list of verticies of length self.windowSize.
        """

        Parameter.checkInt(self.windowSize, 1, self.graph.getNumVertices())
        logging.info("Running predictEdges in " + str(self.__class__.__name__))

        P = numpy.zeros((vertexIndices.shape[0], self.windowSize))
        S = numpy.zeros((vertexIndices.shape[0], self.windowSize))
        W = self.graph.getWeightMatrix()


        for i in range(vertexIndices.shape[0]):
            Util.printIteration(i, self.printStep, vertexIndices.shape[0])
            scores = numpy.zeros(self.graph.getNumVertices())

            for j in range(0, self.graph.getNumVertices()):
                commonNeighbours = numpy.nonzero(W[vertexIndices[i], :] * W[j, :])[0]

                for k in commonNeighbours:
                    q = numpy.log(numpy.nonzero(W[k, :])[0].shape[0])
                    if q != 0:
                        scores[j] = scores[j] + 1/q


            P[i, :], S[i, :] = self.indicesFromScores(vertexIndices[i], scores)

        return P, S
예제 #19
0
    def eigenAdd(omega, Q, Y, k):
        """
        Perform an eigen update of the form A*A + Y*Y in which Y is a low-rank matrix
        and A^*A = Q Omega Q*. We use the rank-k approximation of A:  Q_k Omega_k Q_k^*
        and then approximate [A^*A_k Y^*Y]_k.
        """
        #logging.debug("< eigenAdd >")
        Parameter.checkInt(k, 0, omega.shape[0])
        #if not numpy.isrealobj(omega) or not numpy.isrealobj(Q):
        #    raise ValueError("Eigenvalues and eigenvectors must be real")
        if omega.ndim != 1:
            raise ValueError("omega must be 1-d array")
        if omega.shape[0] != Q.shape[1]:
            raise ValueError("Must have same number of eigenvalues and eigenvectors")

        if __debug__:
            Parameter.checkOrthogonal(Q, tol=EigenUpdater.tol, softCheck=True, arrayInfo="input Q in eigenAdd()")

        #Taking the abs of the eigenvalues is correct
        inds = numpy.flipud(numpy.argsort(numpy.abs(omega)))

        omega, Q = Util.indEig(omega, Q, inds[numpy.abs(omega)>EigenUpdater.tol])
        Omega = numpy.diag(omega)

        YY = Y.conj().T.dot(Y)
        QQ = Q.dot(Q.conj().T)
        Ybar = Y - Y.dot(QQ)

        Pbar, sigmaBar, Qbar = numpy.linalg.svd(Ybar, full_matrices=False)
        inds = numpy.flipud(numpy.argsort(numpy.abs(sigmaBar)))
        inds = inds[numpy.abs(sigmaBar)>EigenUpdater.tol]
        Pbar, sigmaBar, Qbar = Util.indSvd(Pbar, sigmaBar, Qbar, inds)
        
        SigmaBar = numpy.diag(sigmaBar)
        Qbar = Ybar.T.dot(Pbar)
        Qbar = Qbar.dot(numpy.diag(numpy.diag(Qbar.T.dot(Qbar))**-0.5))

        r = sigmaBar.shape[0]

        YQ = Y.dot(Q)
        Zeros = numpy.zeros((r, omega.shape[0]))
        D = numpy.c_[Q, Qbar]

        YYQQ = YY.dot(QQ)
        Z = D.conj().T.dot(YYQQ + YYQQ.conj().T).dot(D)
        F = numpy.c_[numpy.r_[Omega - YQ.conj().T.dot(YQ), Zeros], numpy.r_[Zeros.T, SigmaBar.conj().dot(SigmaBar)]]
        F = F + Z 

        pi, H = scipy.linalg.eigh(F)
        inds = numpy.flipud(numpy.argsort(numpy.abs(pi)))

        H = H[:, inds[0:k]]
        pi = pi[inds[0:k]]

        V = D.dot(H)
        #logging.debug("</ eigenAdd >")
        return pi, V
    def setK(self, k):
        """
        Set the number of iterations k.

        :param k: The number of iterations.
        :type k: :class:`int`
        """
        Parameter.checkInt(k, 1, float('inf'))
        self.k = k 
예제 #21
0
    def evaluateLearn(X, y, idx, learnModel, predict, metricMethod, progress=True):
        """
        Evaluate this learning algorithm using the given list of training/test splits 
        The metricMethod is a method which takes (predictedY, realY) as input
        and returns a metric about the quality of the evaluation.

        :param X: A matrix with examples as rows 
        :type X: :class:`ndarray`

        :param y: A vector of labels 
        :type y: :class:`ndarray`

        :param idx: A list of training/test splits 
        :type idx: :class:`list`

        :param learnModel: A function such that learnModel(X, y) finds a mapping from X to y 
        :type learnModel: :class:`function`

        :param predict: A function such that predict(X) makes predictions for X
        :type predict: :class:`function`

        :param metricMethod: A function such that metricMethod(predY, testY) returns the quality of predicted labels predY
        :type metricMethod: :class:`function`

        Output: the mean and variation of the cross validation folds. 
        """
        #Parameter.checkClass(idx, list)
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkArray(X, softCheck=True)
        Parameter.checkInt(X.shape[0], 1, float('inf'))
        Parameter.checkClass(y, numpy.ndarray)
        Parameter.checkArray(y, softCheck=True)

        if y.ndim != 1:
            raise ValueError("Dimention of y must be 1")
        
        i = 0
        metrics = numpy.zeros(len(idx))
        logging.debug("EvaluateLearn: Using " + str(len(idx)) + " splits on " + str(X.shape[0]) + " examples")

        for idxtr, idxts in idx:
            if progress:
                Util.printConciseIteration(i, 1, len(idx))

            trainX, testX = X[idxtr, :], X[idxts, :]
            trainY, testY = y[idxtr], y[idxts]
            #logging.debug("Distribution of labels in evaluateLearn train: " + str(numpy.bincount(trainY)))
            #logging.debug("Distribution of labels in evaluateLearn test: " + str(numpy.bincount(testY)))

            learnModel(trainX, trainY)
            predY = predict(testX)
            gc.collect()

            metrics[i] = metricMethod(predY, testY)
            i += 1

        return metrics
    def setK(self, k):
        """
        Set the number of iterations k.

        :param k: The number of iterations.
        :type k: :class:`int`
        """
        Parameter.checkInt(k, 1, float('inf'))
        self.k = k
예제 #23
0
 def setPosteriorSampleSize(self, posteriorSampleSize):
     """
     Set the sample size of the posterior distribution (population size).
     
     :param posteriorSampleSize: The size of the population 
     :type posteriorSampleSize: `int`
     """
     Parameter.checkInt(posteriorSampleSize, 0, numpy.float('inf'))
     self.N = posteriorSampleSize
    def __init__(self, vList, maxEdgeTypes, undirected=True):
        Parameter.checkInt(maxEdgeTypes, 1, float('inf'))
        self.vList = vList
        self.undirected = undirected
        self.maxEdgeTypes = maxEdgeTypes

        self.sparseGraphs = []

        for i in range(maxEdgeTypes):
            self.sparseGraphs.append(SparseGraph(vList, undirected))
예제 #25
0
    def setDegree(self, degree):
        """
        Set the degree parameter.

        :param degree: kernel degree parameter.
        :type degree: :class:`int`
        """
        Parameter.checkInt(degree, 1, float('inf'))

        self.degree = degree
예제 #26
0
    def setRandomInfected(self, numInitialInfected, t=0.0):
        """
        Pick a number of people randomly to be infected at time t.  
        """
        Parameter.checkInt(numInitialInfected, 0, len(self.getSusceptibleSet()))
        infectInds = numpy.random.permutation(len(self.getSusceptibleSet()))[0:numInitialInfected]
        
        susceptibleList = list(self.getSusceptibleSet())

        for i in infectInds:
            self.vlist.setInfected(susceptibleList[i], t)
예제 #27
0
    def evaluateCv(self, X, y, folds, metricMethod=Evaluator.binaryError):
        """
        Compute the cross validation according to a given metric. 
        """
        Parameter.checkInt(folds, 2, float('inf'))
        idx = Sampling.crossValidation(folds, y.shape[0])
        metrics = AbstractPredictor.evaluateLearn(X, y, idx, self.learnModel, self.predict, metricMethod)

        mean = numpy.mean(metrics, 0)
        var = numpy.var(metrics, 0)

        return (mean, var)
    def __init__(self, numVertices):
        """
        Create an empty GeneralVertexList with the specified number of features
        for each vertex (initialised as None) and number of vertices.

        :param numVertices: The number of vertices.
        :type numVertices: :class:`int`
        """
        Parameter.checkInt(numVertices, 0, float('inf'))

        self.V = {}

        for i in range(numVertices):
            self.V[i] = None
    def __init__(self, numVertices):
        """
        Create an empty GeneralVertexList with the specified number of features
        for each vertex (initialised as None) and number of vertices.

        :param numVertices: The number of vertices.
        :type numVertices: :class:`int`
        """
        Parameter.checkInt(numVertices, 0, float('inf'))

        self.V = {}
        
        for i in range(numVertices):
            self.V[i] = None
예제 #30
0
    def eigenConcat(omega, Q, AB, BB, k):
        """
        Find the eigen update of a matrix [A, B]'[A B] where  A'A = V diag(s) V*
        and AB = A*B, BB = B*B. Q is the set of eigenvectors of A*A and s is the
        vector of eigenvalues. 
        """
        #logging.debug("< eigenConcat >")
        Parameter.checkInt(k, 0, omega.shape[0])
        if not numpy.isrealobj(omega) or not numpy.isrealobj(Q):
            raise ValueError("Eigenvalues and eigenvectors must be real")
        if not numpy.isrealobj(AB) or not numpy.isrealobj(BB):
            raise ValueError("AB and BB must be real")
        if omega.ndim != 1:
            raise ValueError("omega must be 1-d array")
        if omega.shape[0] != Q.shape[1]:
            raise ValueError("Must have same number of eigenvalues and eigenvectors")
        if Q.shape[0] != AB.shape[0]:
            raise ValueError("Q must have the same number of rows as AB")
        if AB.shape[1] != BB.shape[0] or  BB.shape[0]!=BB.shape[1]:
            raise ValueError("AB must have the same number of cols/rows as BB")

        #Check Q is orthogonal
        if __debug__:
            Parameter.checkOrthogonal(Q, tol=EigenUpdater.tol, softCheck=True, arrayInfo = "input Q in eigenConcat()")

        m = Q.shape[0]
        p = BB.shape[0]

        inds = numpy.flipud(numpy.argsort(numpy.abs(omega)))
        Q = Q[:, inds[0:k]]
        omega = omega[inds[0:k]]
        Omega = numpy.diag(omega)

        QAB = Q.conj().T.dot(AB)

        F = numpy.c_[numpy.r_[Omega, QAB.conj().T], numpy.r_[QAB, BB]]
        D = numpy.c_[numpy.r_[Q, numpy.zeros((p, Q.shape[1]))], numpy.r_[numpy.zeros((m, p)), numpy.eye(p)]]

        pi, H = scipy.linalg.eigh(F)

        inds = numpy.flipud(numpy.argsort(numpy.abs(pi)))
        inds = inds[numpy.abs(pi)>EigenUpdater.tol]

        H = H[:, inds[0:k]]
        pi = pi[inds[0:k]]

        V = numpy.dot(D, H)

        #logging.debug("</ eigenConcat >")
        return pi, V
예제 #31
0
    def evaluateCvOuter(self, X, Y, folds, leafRank):
        """
        Run cross validation and output some ROC curves. In this case Y is a 1D array.
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(Y, numpy.ndarray)
        Parameter.checkInt(folds, 2, float('inf'))
        if Y.ndim != 1:
            raise ValueError("Expecting Y to be 1D")

        indexList = cross_val.StratifiedKFold(Y, folds)
        self.setLeafRank(leafRank)

        bestParams = []
        bestTrainAUCs = numpy.zeros(folds)
        bestTrainROCs = []
        bestTestAUCs = numpy.zeros(folds)
        bestTestROCs = []
        bestMetaDicts = []
        i = 0

        for trainInds, testInds in indexList:
            Util.printIteration(i, 1, folds)
            trainX, trainY = X[trainInds, :], Y[trainInds]
            testX, testY = X[testInds, :], Y[testInds]

            logging.debug("Distribution of labels in train: " + str(numpy.bincount(trainY)))
            logging.debug("Distribution of labels in test: " + str(numpy.bincount(testY)))

            self.learnModel(trainX, trainY)
            predTrainY = self.predict(trainX)
            predTestY = self.predict(testX)
            bestTrainAUCs[i] = Evaluator.auc(predTrainY, trainY)
            bestTestAUCs[i] = Evaluator.auc(predTestY, testY)

            #Store the parameters and ROC curves
            bestTrainROCs.append(Evaluator.roc(trainY, predTrainY))
            bestTestROCs.append(Evaluator.roc(testY, predTestY))

            metaDict = {}
            bestMetaDicts.append(metaDict)

            i += 1

        logging.debug("Mean test AUC = " + str(numpy.mean(bestTestAUCs)))
        logging.debug("Std test AUC = " + str(numpy.std(bestTestAUCs)))
        allMetrics = [bestTrainAUCs, bestTrainROCs, bestTestAUCs, bestTestROCs]

        return (bestParams, allMetrics, bestMetaDicts)
예제 #32
0
    def learnModel(self, graph):
        """
        Learn a prediction model based on all of the edges of the input graph.
        For each ego, X contains a list of neighbours and non-neighbours in the same
        ratio, and y = 1 when for a neighbour otherwise -1. We then find the set of
        primal weights w for each ego network and then regress onto the set of weights
        using the ego labels.

        One can either learn by comparing neighbours and non-neighbours, or alternatively
        using the labels of edges and making prediction on unlabelled edges. 

        :param graph: The input graph to learn from.
        :type graph: class:`apgl.graph.AbstractSingleGraph`

        :param randomNegLabel: How to compute edge labels, False means use the labels
        themselves, and True means randomly pick non-neighbours to have -1 labels
        :type randomNegLabel: class `bool`
        """

        Parameter.checkInt(self.windowSize, 1, graph.getNumVertices())
        self.graph = graph
        logging.info("Learning model on graph of size " + str(graph.getNumVertices()))

        allIndices = numpy.arange(0, graph.getNumVertices())
        V = graph.getVertexList().getVertices(allIndices)
        W = numpy.zeros((0, graph.getVertexList().getNumFeatures()))
        Xe  =  numpy.zeros((0, graph.getVertexList().getNumFeatures()))
        printStep = numpy.floor(graph.getNumVertices()/10)

        for i in range(graph.getNumVertices()):
            Util.printIteration(i, printStep, graph.getNumVertices())
            neighbours = graph.neighbours(i)

            if neighbours.shape[0] != 0:
                compNeighbours = numpy.setdiff1d(allIndices, neighbours)
                perm = numpy.random.permutation(compNeighbours.shape[0])[0:neighbours.shape[0]]
                negativeVertices = V[compNeighbours[perm], :]
                X = numpy.r_[V[neighbours, :], negativeVertices]
                y = numpy.ones(X.shape[0])
                y[neighbours.shape[0]:] = -1
 
                w = self.alterRegressor.learnModel(X, y)
                W = numpy.r_[W, numpy.array([w])]
                Xe = numpy.r_[Xe, numpy.array([V[i, :]])]

        #Now we need to solve least to find regressor of Xe onto W
        self.egoRegressor.learnModel(Xe, W)
    def __init__(self, numVertices, numFeatures=0, dtype=numpy.float):
        """
        Create an empty (zeroed) VertexList with the specified number of features
        for each vertex and number of vertices.

        :param numVertices: The number of vertices.
        :type numVertices: :class:`int`

        :param numFeatures: The number of features for each vertex.
        :type numFeatures: :class:`int`

        :param dtype: the data type for the vertex matrix, e.g numpy.int8.
        """
        Parameter.checkInt(numVertices, 0, float('inf'))
        Parameter.checkInt(numFeatures, 0, float('inf'))

        self.V = numpy.zeros((numVertices, numFeatures), dtype)
예제 #34
0
    def eigenRemove(omega, Q, n, k, debug=False):
        """
        Remove a set of rows and columns from a matrix whose eigen-decomposition
        is Q diag(omega) Q^T. Keep the first n rows/cols i.e. the rows/cols starting
        from n to the end are removed and k is the number of eigenvectors/values
        to return for the new matrix. We could generalise this to delete a given
        list of rows/cols.
        """
        #logging.debug("< eigenRemove >")
        Parameter.checkClass(omega, numpy.ndarray)
        Parameter.checkClass(Q, numpy.ndarray)
        Parameter.checkInt(k, 0, float('inf'))
        Parameter.checkInt(n, 0, Q.shape[0])
        if omega.ndim != 1:
            raise ValueError("omega must be 1-d array")
        if omega.shape[0] != Q.shape[1]:
            raise ValueError("Must have same number of eigenvalues and eigenvectors")

        if __debug__:
            Parameter.checkOrthogonal(Q, tol=EigenUpdater.tol, softCheck=True, arrayInfo="input Q in eigenRemove()")

        inds = numpy.flipud(numpy.argsort(numpy.abs(omega)))
        inds = inds[omega[inds]>EigenUpdater.tol]
        
        omega, Q = Util.indEig(omega, Q, inds[0:k])
        AB = (Q[0:n, :]*omega).dot(Q[n:, :].T)
        BB = (Q[n:, :]*omega).dot(Q[n:, :].T)

        p = BB.shape[0]
        Y1 = numpy.r_[numpy.zeros((n, p)), numpy.eye(p)]
        Y2 = -numpy.r_[AB, 0.5*BB]
        pi, V = EigenUpdater.eigenAdd2(omega, Q, Y1, Y2, k)

        #check last rows are zero
        if numpy.linalg.norm(V[n:, :]) >= EigenUpdater.tol:
            logging.warn("numpy.linalg.norm(V[n:, :])= %s" % str(numpy.linalg.norm(V[n:, :])))

        #logging.debug("</ eigenRemove >")
        if not debug:
            return pi, V[0:n, :]
        else:
            C = (Q*omega).dot(Q.T)
            K = C + Y1.dot(Y2.T) + Y2.dot(Y1.T)
            assert numpy.linalg.norm(BB- C[n:, n:]) <= EigenUpdater.tol
            assert numpy.linalg.norm(AB - C[0:n, n:]) <= EigenUpdater.tol, "%s \n %s" % (AB, C[0:n, n:])
            return pi, V[0:n, :], K, Y1, Y2, omega
예제 #35
0
    def __init__(self, numVertices, numFeatures=0, dtype=numpy.float):
        """
        Create an empty (zeroed) VertexList with the specified number of features
        for each vertex and number of vertices.

        :param numVertices: The number of vertices.
        :type numVertices: :class:`int`

        :param numFeatures: The number of features for each vertex.
        :type numFeatures: :class:`int`

        :param dtype: the data type for the vertex matrix, e.g numpy.int8.
        """
        Parameter.checkInt(numVertices, 0, float('inf'))
        Parameter.checkInt(numFeatures, 0, float('inf'))
        
        self.V = numpy.zeros((numVertices, numFeatures), dtype)
예제 #36
0
    def evaluateStratifiedCv(self, X, y, folds, metricMethod=Evaluator.binaryError):
        """
        Compute the stratified cross validation according to a given metric.
        """
        try:
            from sklearn.cross_validation import cross_val
            Parameter.checkInt(folds, 2, float('inf'))
            idx = cross_val.StratifiedKFold(y, folds)
            metrics = AbstractPredictor.evaluateLearn(X, y, idx, self.learnModel, self.predict, metricMethod)

            mean = numpy.mean(metrics, 0)
            var = numpy.var(metrics, 0)

            return (mean, var)

        except ImportError:
            logging.warn("Failed to import scikits")
            raise 
    def __init__(self, initialGraph, k):
        """
        Initialise with a starting graph, and number of iterations k. Note that
        the starting graph must have self edges on every vertex. Only the
        adjacency matrix of the starting graph is used. 

        :param initialGraph: The intial graph to use.
        :type initialGraph: :class:`apgl.graph.AbstractMatrixGraph`

        :param k: The number of iterations.
        :type k: :class:`int`
        """
        Parameter.checkInt(k, 1, float('inf'))
        
        W = initialGraph.getWeightMatrix()
        if (numpy.diag(W)==numpy.zeros(W.shape[0])).any():
            raise ValueError("Initial graph must have all self-edges")

        self.initialGraph = initialGraph
        self.k = k
    def __init__(self, initialGraph, k):
        """
        Initialise with a starting graph, and number of iterations k. Note that
        the starting graph must have self edges on every vertex. Only the
        adjacency matrix of the starting graph is used. 

        :param initialGraph: The intial graph to use.
        :type initialGraph: :class:`apgl.graph.AbstractMatrixGraph`

        :param k: The number of iterations.
        :type k: :class:`int`
        """
        Parameter.checkInt(k, 1, float('inf'))

        W = initialGraph.getWeightMatrix()
        if (numpy.diag(W) == numpy.zeros(W.shape[0])).any():
            raise ValueError("Initial graph must have all self-edges")

        self.initialGraph = initialGraph
        self.k = k
예제 #39
0
    def __init__(self, initialGraph, k):
        """
        Initialise with a starting graph, and number of iterations k. The weights
        of the initial graph correspond to probabilities. 

        :param initialGraph: The intial graph to use.
        :type initialGraph: :class:`apgl.graph.AbstractMatrixGraph`

        :param k: The number of iterations.
        :type k: :class:`int`
        """
        Parameter.checkInt(k, 1, float('inf'))
        edgeVals = initialGraph.getEdgeValues(initialGraph.getAllEdges())
        Parameter.checkList(edgeVals, Parameter.checkFloat, [0.0, 1.0])

        W = initialGraph.getWeightMatrix()
        if (numpy.diag(W) == numpy.zeros(W.shape[0])).any():
            raise ValueError("Initial graph must have all self-edges")

        self.initialGraph = initialGraph
        self.k = k
예제 #40
0
    def testCheckInt(self):
        min = 0
        max = 5
        i = 2

        Parameter.checkInt(i, min, max)
        Parameter.checkInt(min, min, max)
        Parameter.checkInt(max, min, max)
        Parameter.checkInt(i, i, i)

        self.assertRaises(ValueError, Parameter.checkInt, i, max, min)
        self.assertRaises(ValueError, Parameter.checkInt, i, float(min), max)
        self.assertRaises(ValueError, Parameter.checkInt, i, min, float(max))
        #self.assertRaises(ValueError, Parameter.checkInt, 2.0, min, max)
        self.assertRaises(ValueError, Parameter.checkInt, -1, min, max)
        self.assertRaises(ValueError, Parameter.checkInt, 6, min, max)

        #Check half ranges such as [0, inf]
        Parameter.checkInt(i, min, float("inf"))
        Parameter.checkInt(i, float("-inf"), max)

        #Check use of numpy int32
        min = numpy.int32(0)
        max = numpy.int32(5)
        i = numpy.int32(2)

        Parameter.checkInt(i, min, max)
        Parameter.checkInt(min, min, max)
        Parameter.checkInt(max, min, max)
        Parameter.checkInt(i, i, i)

        #Test using an array with 1 int
        i = numpy.array([1], numpy.int)
        logging.debug((type(i)))
        self.assertRaises(ValueError, Parameter.checkInt, i, min, max)