def setEll(self, ell): """ :param ell: the initial number of vertices. :type ell: :class:`int` """ Parameter.checkInt(ell, 2, float('inf')) self.ell = ell
def cut(self, d): """ Return a new tree containing all the vertices of the current one up to a depth of d. The edge and vertex labels are copied by reference only. :param d: The depth of the new cut tree :type d: :class:`int` """ Parameter.checkInt(d, 0, float("inf")) root = self.getRootId() newTree = DictTree() stack = [(root, 0)] newTree.setVertex(root) while(len(stack) != 0): (vertexId, depth) = stack.pop() neighbours = self.neighbours(vertexId) if depth <= d: newTree.setVertex(vertexId, self.getVertex(vertexId)) for neighbour in neighbours: stack.append((neighbour, depth+1)) if depth+1 <= d: newTree.addEdge(vertexId, neighbour, self.getEdge(vertexId, neighbour)) return newTree
def setM(self, m): """ :param m: the number of edges to be added at each step :type m: :class:`int` """ Parameter.checkInt(m, 0, self.ell) self.m = m
def setBestResponse(self, bestResponse): """ :param bestResponse: the label corresponding to "positive" :type bestResponse: :class:`int` """ Parameter.checkInt(bestResponse, -float('inf'), float('inf')) self.bestResponse = bestResponse
def evaluateCvOuter(self, X, y, folds): """ Computer the average AUC using k-fold cross validation and the linear kernel. """ Parameter.checkInt(folds, 2, float('inf')) idx = cross_val.StratifiedKFold(y, folds) metricMethods = [Evaluator.auc2, Evaluator.roc] if self.kernel == "linear": logging.debug("Running linear rank SVM ") trainMetrics, testMetrics = AbstractPredictor.evaluateLearn2(X, y, idx, self.modelSelectLinear, self.predict, metricMethods) elif self.kernel == "rbf": logging.debug("Running RBF rank SVM") trainMetrics, testMetrics = AbstractPredictor.evaluateLearn2(X, y, idx, self.modelSelectRBF, self.predict, metricMethods) bestTrainAUCs = trainMetrics[0] bestTrainROCs = trainMetrics[1] bestTestAUCs = testMetrics[0] bestTestROCs = testMetrics[1] bestParams = {} bestMetaDicts = {} allMetrics = [bestTrainAUCs, bestTrainROCs, bestTestAUCs, bestTestROCs] return (bestParams, allMetrics, bestMetaDicts)
def cut(self, d): """ Return a new tree containing all the vertices of the current one up to a depth of d. The edge and vertex labels are copied by reference only. :param d: The depth of the new cut tree :type d: :class:`int` """ Parameter.checkInt(d, 0, float("inf")) root = self.getRootId() newTree = DictTree() stack = [(root, 0)] newTree.setVertex(root) while (len(stack) != 0): (vertexId, depth) = stack.pop() neighbours = self.neighbours(vertexId) if depth <= d: newTree.setVertex(vertexId, self.getVertex(vertexId)) for neighbour in neighbours: stack.append((neighbour, depth + 1)) if depth + 1 <= d: newTree.addEdge(vertexId, neighbour, self.getEdge(vertexId, neighbour)) return newTree
def shuffleSplit(repetitions, numExamples, trainProportion=None): """ Random permutation cross-validation iterator. The training set is sampled without replacement and of size (repetitions-1)/repetitions of the examples, and the test set represents the remaining examples. Each repetition is sampled independently. :param repetitions: The number of repetitions to perform. :type repetitions: :class:`int` :param numExamples: The number of examples. :type numExamples: :class:`int` :param trainProp: The size of the training set relative to numExamples, between 0 and 1 or None to use (repetitions-1)/repetitions :type trainProp: :class:`int` """ Parameter.checkInt(numExamples, 2, float('inf')) Parameter.checkInt(repetitions, 1, float('inf')) if trainProportion != None: Parameter.checkFloat(trainProportion, 0.0, 1.0) if trainProportion == None: trainSize = (repetitions-1)*numExamples/repetitions else: trainSize = trainProportion*numExamples idx = [] for i in range(repetitions): inds = numpy.random.permutation(numExamples) trainInds = inds[0:trainSize] testInds = inds[trainSize:] idx.append((trainInds, testInds)) return idx
def array1DToRow(X, precision=3): """ Take a 1D numpy array and print in latex table row format i.e. x1 & x2 .. xn :param X: The array to print :type X: :class:`ndarray` :param precision: The precision of the printed floating point numbers. :type precision: :class:`int` """ Parameter.checkInt(precision, 0, 10) if X.ndim != 1: raise ValueError("Array must be one dimensional") n = X.shape[0] outputStr = "" if X.dtype == float: fmtStr = "%." + str(precision) + "f & " endFmtStr = "%." + str(precision) + "f" else: fmtStr = "%d & " endFmtStr = "%d" for i in range(0, n): if i != n-1: outputStr += fmtStr % X[i] else: outputStr += endFmtStr % X[i] return outputStr
def svd(A, k, q=2): """ Compute the SVD of a sparse or dense matrix A, finding the first k singular vectors/values, using exponent q. Returns the left and right singular vectors, and the singular values. The resulting matrix can be approximated using A ~ U s V.T. """ Parameter.checkInt(k, 1, float("inf")) Parameter.checkInt(q, 1, float("inf")) n = A.shape[0] omega = numpy.random.randn(n, k) Y = A.dot(omega) for i in range(q): Y = A.T.dot(Y) Y = A.dot(Y) Q, R = numpy.linalg.qr(Y) B = A.T.dot(Q).T U, s, V = numpy.linalg.svd(B, full_matrices=False) V = V.T U = Q.dot(U) return U, s, V
def learnModel(self, X, Y): """ Learn the weight matrix which matches X and Y. """ Parameter.checkClass(X, numpy.ndarray) Parameter.checkClass(Y, numpy.ndarray) Parameter.checkInt(X.shape[0], 1, float('inf')) Parameter.checkInt(X.shape[1], 1, float('inf')) numExamples = X.shape[0] numFeatures = X.shape[1] logging.debug("Training with " + str(numExamples) + " examples and " + str(numFeatures) + " features") I = numpy.eye(numFeatures) XX = numpy.dot(X.T, X) XY = numpy.dot(X.T, Y) invXX = numpy.linalg.inv(XX + self.lmbda*I) self.U = numpy.dot(invXX, XY) logging.debug("Trace of X'X " + str(numpy.trace(XX))) logging.debug("Error " + str(numpy.linalg.norm(numpy.dot(X, self.U) - Y))) return self.U
def setEll(self, ell): """ :param ell: the initial number of vertices. :type ell: :class:`int` """ Parameter.checkInt(ell, 2, float("inf")) self.ell = ell
def randCrossValidation(folds, numExamples): """ Returns a list of tuples (trainIndices, testIndices) using k-fold cross validation. In this case we randomise the indices and then split into folds. :param folds: The number of cross validation folds. :type folds: :class:`int` :param numExamples: The number of examples. :type numExamples: :class:`int` """ Parameter.checkInt(folds, 1, numExamples) Parameter.checkInt(numExamples, 2, float('inf')) foldSize = float(numExamples)/folds indexList = [] inds = numpy.random.permutation(numExamples) for i in range(0, folds): testIndices = inds[int(foldSize*i): int(foldSize*(i+1))] trainIndices = numpy.setdiff1d(numpy.arange(0, numExamples), testIndices) indexList.append((trainIndices, testIndices)) return indexList
def setRandomInfected(self, numInitialInfected, proportionHetero, t=0.0): """ Pick a number of people randomly to be infected at time t. Of that set proportionHetero are selected to be heterosexual and min((1-proportionHetero), totalBi) are bisexual. """ Parameter.checkInt(numInitialInfected, 0, self.size) Parameter.checkFloat(proportionHetero, 0.0, 1.0) heteroInds = numpy.arange(self.size)[self.vlist.V[:, HIVVertices.orientationIndex] == HIVVertices.hetero] biInds = numpy.arange(self.size)[self.vlist.V[:, HIVVertices.orientationIndex] == HIVVertices.bi] numHetero = int(numInitialInfected*proportionHetero) numBi = numInitialInfected-numHetero heteroInfectInds = numpy.random.permutation(heteroInds.shape[0])[0:numHetero] biInfectInds = numpy.random.permutation(biInds.shape[0])[0:numBi] for i in heteroInfectInds: j = heteroInds[i] self.vlist.setInfected(j, t) for i in biInfectInds: j = biInds[i] self.vlist.setInfected(j, t)
def setNumTrees(self, numTrees): """ :param numTrees: The number of trees to generate in the forest. :type numTrees: :class:`int` """ Parameter.checkInt(numTrees, 1, float('inf')) self.numTrees = numTrees
def generateGraph(self, alpha, p, dim): Parameter.checkFloat(alpha, 0.0, float('inf')) Parameter.checkFloat(p, 0.0, 1.0) Parameter.checkInt(dim, 0, float('inf')) numVertices = self.graph.getNumVertices() self.X = numpy.random.rand(numVertices, dim) D = KernelUtils.computeDistanceMatrix(numpy.dot(self.X, self.X.T)) P = numpy.exp(-alpha * D) diagIndices = numpy.array(list(range(0, numVertices))) P[(diagIndices, diagIndices)] = numpy.zeros(numVertices) B = numpy.random.rand(numVertices, numVertices) <= P #Note that B is symmetric - could just go through e.g. upper triangle for i in range(numpy.nonzero(B)[0].shape[0]): v1 = numpy.nonzero(B)[0][i] v2 = numpy.nonzero(B)[1][i] self.graph.addEdge(v1, v2) erdosRenyiGenerator = ErdosRenyiGenerator(p) self.graph = erdosRenyiGenerator.generate(self.graph, False) return self.graph
def bootstrap2(repetitions, numExamples): """ Perform 0.632 bootstrap in whcih we take a sample with replacement from the dataset of size numExamples. The examples not present in the training set are used to form the test set. We oversample the test set to include 0.368 of the examples from the training set. Returns a list of tuples of the form (trainIndices, testIndices). :param repetitions: The number of repetitions of bootstrap to perform. :type repetitions: :class:`int` :param numExamples: The number of examples. :type numExamples: :class:`int` """ Parameter.checkInt(numExamples, 2, float('inf')) Parameter.checkInt(repetitions, 1, float('inf')) inds = [] for i in range(repetitions): trainInds = numpy.random.randint(numExamples, size=numExamples) testInds = numpy.setdiff1d(numpy.arange(numExamples), numpy.unique(trainInds)) #testInds = numpy.r_[testInds, trainInds[0:(numExamples*0.368)]] inds.append((trainInds, testInds)) return inds
def predictEdges(self, vertexIndices): """ This makes a prediction for a series of edges using the following score \sum_z \in n(x) \cup n(y) = 1/|log(n(z)| Returns a matrix with rows are a ranked list of verticies of length self.windowSize. """ Parameter.checkInt(self.windowSize, 1, self.graph.getNumVertices()) logging.info("Running predictEdges in " + str(self.__class__.__name__)) P = numpy.zeros((vertexIndices.shape[0], self.windowSize)) S = numpy.zeros((vertexIndices.shape[0], self.windowSize)) W = self.graph.getWeightMatrix() for i in range(vertexIndices.shape[0]): Util.printIteration(i, self.printStep, vertexIndices.shape[0]) scores = numpy.zeros(self.graph.getNumVertices()) for j in range(0, self.graph.getNumVertices()): commonNeighbours = numpy.nonzero(W[vertexIndices[i], :] * W[j, :])[0] for k in commonNeighbours: q = numpy.log(numpy.nonzero(W[k, :])[0].shape[0]) if q != 0: scores[j] = scores[j] + 1/q P[i, :], S[i, :] = self.indicesFromScores(vertexIndices[i], scores) return P, S
def eigenAdd(omega, Q, Y, k): """ Perform an eigen update of the form A*A + Y*Y in which Y is a low-rank matrix and A^*A = Q Omega Q*. We use the rank-k approximation of A: Q_k Omega_k Q_k^* and then approximate [A^*A_k Y^*Y]_k. """ #logging.debug("< eigenAdd >") Parameter.checkInt(k, 0, omega.shape[0]) #if not numpy.isrealobj(omega) or not numpy.isrealobj(Q): # raise ValueError("Eigenvalues and eigenvectors must be real") if omega.ndim != 1: raise ValueError("omega must be 1-d array") if omega.shape[0] != Q.shape[1]: raise ValueError("Must have same number of eigenvalues and eigenvectors") if __debug__: Parameter.checkOrthogonal(Q, tol=EigenUpdater.tol, softCheck=True, arrayInfo="input Q in eigenAdd()") #Taking the abs of the eigenvalues is correct inds = numpy.flipud(numpy.argsort(numpy.abs(omega))) omega, Q = Util.indEig(omega, Q, inds[numpy.abs(omega)>EigenUpdater.tol]) Omega = numpy.diag(omega) YY = Y.conj().T.dot(Y) QQ = Q.dot(Q.conj().T) Ybar = Y - Y.dot(QQ) Pbar, sigmaBar, Qbar = numpy.linalg.svd(Ybar, full_matrices=False) inds = numpy.flipud(numpy.argsort(numpy.abs(sigmaBar))) inds = inds[numpy.abs(sigmaBar)>EigenUpdater.tol] Pbar, sigmaBar, Qbar = Util.indSvd(Pbar, sigmaBar, Qbar, inds) SigmaBar = numpy.diag(sigmaBar) Qbar = Ybar.T.dot(Pbar) Qbar = Qbar.dot(numpy.diag(numpy.diag(Qbar.T.dot(Qbar))**-0.5)) r = sigmaBar.shape[0] YQ = Y.dot(Q) Zeros = numpy.zeros((r, omega.shape[0])) D = numpy.c_[Q, Qbar] YYQQ = YY.dot(QQ) Z = D.conj().T.dot(YYQQ + YYQQ.conj().T).dot(D) F = numpy.c_[numpy.r_[Omega - YQ.conj().T.dot(YQ), Zeros], numpy.r_[Zeros.T, SigmaBar.conj().dot(SigmaBar)]] F = F + Z pi, H = scipy.linalg.eigh(F) inds = numpy.flipud(numpy.argsort(numpy.abs(pi))) H = H[:, inds[0:k]] pi = pi[inds[0:k]] V = D.dot(H) #logging.debug("</ eigenAdd >") return pi, V
def setK(self, k): """ Set the number of iterations k. :param k: The number of iterations. :type k: :class:`int` """ Parameter.checkInt(k, 1, float('inf')) self.k = k
def evaluateLearn(X, y, idx, learnModel, predict, metricMethod, progress=True): """ Evaluate this learning algorithm using the given list of training/test splits The metricMethod is a method which takes (predictedY, realY) as input and returns a metric about the quality of the evaluation. :param X: A matrix with examples as rows :type X: :class:`ndarray` :param y: A vector of labels :type y: :class:`ndarray` :param idx: A list of training/test splits :type idx: :class:`list` :param learnModel: A function such that learnModel(X, y) finds a mapping from X to y :type learnModel: :class:`function` :param predict: A function such that predict(X) makes predictions for X :type predict: :class:`function` :param metricMethod: A function such that metricMethod(predY, testY) returns the quality of predicted labels predY :type metricMethod: :class:`function` Output: the mean and variation of the cross validation folds. """ #Parameter.checkClass(idx, list) Parameter.checkClass(X, numpy.ndarray) Parameter.checkArray(X, softCheck=True) Parameter.checkInt(X.shape[0], 1, float('inf')) Parameter.checkClass(y, numpy.ndarray) Parameter.checkArray(y, softCheck=True) if y.ndim != 1: raise ValueError("Dimention of y must be 1") i = 0 metrics = numpy.zeros(len(idx)) logging.debug("EvaluateLearn: Using " + str(len(idx)) + " splits on " + str(X.shape[0]) + " examples") for idxtr, idxts in idx: if progress: Util.printConciseIteration(i, 1, len(idx)) trainX, testX = X[idxtr, :], X[idxts, :] trainY, testY = y[idxtr], y[idxts] #logging.debug("Distribution of labels in evaluateLearn train: " + str(numpy.bincount(trainY))) #logging.debug("Distribution of labels in evaluateLearn test: " + str(numpy.bincount(testY))) learnModel(trainX, trainY) predY = predict(testX) gc.collect() metrics[i] = metricMethod(predY, testY) i += 1 return metrics
def setPosteriorSampleSize(self, posteriorSampleSize): """ Set the sample size of the posterior distribution (population size). :param posteriorSampleSize: The size of the population :type posteriorSampleSize: `int` """ Parameter.checkInt(posteriorSampleSize, 0, numpy.float('inf')) self.N = posteriorSampleSize
def __init__(self, vList, maxEdgeTypes, undirected=True): Parameter.checkInt(maxEdgeTypes, 1, float('inf')) self.vList = vList self.undirected = undirected self.maxEdgeTypes = maxEdgeTypes self.sparseGraphs = [] for i in range(maxEdgeTypes): self.sparseGraphs.append(SparseGraph(vList, undirected))
def setDegree(self, degree): """ Set the degree parameter. :param degree: kernel degree parameter. :type degree: :class:`int` """ Parameter.checkInt(degree, 1, float('inf')) self.degree = degree
def setRandomInfected(self, numInitialInfected, t=0.0): """ Pick a number of people randomly to be infected at time t. """ Parameter.checkInt(numInitialInfected, 0, len(self.getSusceptibleSet())) infectInds = numpy.random.permutation(len(self.getSusceptibleSet()))[0:numInitialInfected] susceptibleList = list(self.getSusceptibleSet()) for i in infectInds: self.vlist.setInfected(susceptibleList[i], t)
def evaluateCv(self, X, y, folds, metricMethod=Evaluator.binaryError): """ Compute the cross validation according to a given metric. """ Parameter.checkInt(folds, 2, float('inf')) idx = Sampling.crossValidation(folds, y.shape[0]) metrics = AbstractPredictor.evaluateLearn(X, y, idx, self.learnModel, self.predict, metricMethod) mean = numpy.mean(metrics, 0) var = numpy.var(metrics, 0) return (mean, var)
def __init__(self, numVertices): """ Create an empty GeneralVertexList with the specified number of features for each vertex (initialised as None) and number of vertices. :param numVertices: The number of vertices. :type numVertices: :class:`int` """ Parameter.checkInt(numVertices, 0, float('inf')) self.V = {} for i in range(numVertices): self.V[i] = None
def eigenConcat(omega, Q, AB, BB, k): """ Find the eigen update of a matrix [A, B]'[A B] where A'A = V diag(s) V* and AB = A*B, BB = B*B. Q is the set of eigenvectors of A*A and s is the vector of eigenvalues. """ #logging.debug("< eigenConcat >") Parameter.checkInt(k, 0, omega.shape[0]) if not numpy.isrealobj(omega) or not numpy.isrealobj(Q): raise ValueError("Eigenvalues and eigenvectors must be real") if not numpy.isrealobj(AB) or not numpy.isrealobj(BB): raise ValueError("AB and BB must be real") if omega.ndim != 1: raise ValueError("omega must be 1-d array") if omega.shape[0] != Q.shape[1]: raise ValueError("Must have same number of eigenvalues and eigenvectors") if Q.shape[0] != AB.shape[0]: raise ValueError("Q must have the same number of rows as AB") if AB.shape[1] != BB.shape[0] or BB.shape[0]!=BB.shape[1]: raise ValueError("AB must have the same number of cols/rows as BB") #Check Q is orthogonal if __debug__: Parameter.checkOrthogonal(Q, tol=EigenUpdater.tol, softCheck=True, arrayInfo = "input Q in eigenConcat()") m = Q.shape[0] p = BB.shape[0] inds = numpy.flipud(numpy.argsort(numpy.abs(omega))) Q = Q[:, inds[0:k]] omega = omega[inds[0:k]] Omega = numpy.diag(omega) QAB = Q.conj().T.dot(AB) F = numpy.c_[numpy.r_[Omega, QAB.conj().T], numpy.r_[QAB, BB]] D = numpy.c_[numpy.r_[Q, numpy.zeros((p, Q.shape[1]))], numpy.r_[numpy.zeros((m, p)), numpy.eye(p)]] pi, H = scipy.linalg.eigh(F) inds = numpy.flipud(numpy.argsort(numpy.abs(pi))) inds = inds[numpy.abs(pi)>EigenUpdater.tol] H = H[:, inds[0:k]] pi = pi[inds[0:k]] V = numpy.dot(D, H) #logging.debug("</ eigenConcat >") return pi, V
def evaluateCvOuter(self, X, Y, folds, leafRank): """ Run cross validation and output some ROC curves. In this case Y is a 1D array. """ Parameter.checkClass(X, numpy.ndarray) Parameter.checkClass(Y, numpy.ndarray) Parameter.checkInt(folds, 2, float('inf')) if Y.ndim != 1: raise ValueError("Expecting Y to be 1D") indexList = cross_val.StratifiedKFold(Y, folds) self.setLeafRank(leafRank) bestParams = [] bestTrainAUCs = numpy.zeros(folds) bestTrainROCs = [] bestTestAUCs = numpy.zeros(folds) bestTestROCs = [] bestMetaDicts = [] i = 0 for trainInds, testInds in indexList: Util.printIteration(i, 1, folds) trainX, trainY = X[trainInds, :], Y[trainInds] testX, testY = X[testInds, :], Y[testInds] logging.debug("Distribution of labels in train: " + str(numpy.bincount(trainY))) logging.debug("Distribution of labels in test: " + str(numpy.bincount(testY))) self.learnModel(trainX, trainY) predTrainY = self.predict(trainX) predTestY = self.predict(testX) bestTrainAUCs[i] = Evaluator.auc(predTrainY, trainY) bestTestAUCs[i] = Evaluator.auc(predTestY, testY) #Store the parameters and ROC curves bestTrainROCs.append(Evaluator.roc(trainY, predTrainY)) bestTestROCs.append(Evaluator.roc(testY, predTestY)) metaDict = {} bestMetaDicts.append(metaDict) i += 1 logging.debug("Mean test AUC = " + str(numpy.mean(bestTestAUCs))) logging.debug("Std test AUC = " + str(numpy.std(bestTestAUCs))) allMetrics = [bestTrainAUCs, bestTrainROCs, bestTestAUCs, bestTestROCs] return (bestParams, allMetrics, bestMetaDicts)
def learnModel(self, graph): """ Learn a prediction model based on all of the edges of the input graph. For each ego, X contains a list of neighbours and non-neighbours in the same ratio, and y = 1 when for a neighbour otherwise -1. We then find the set of primal weights w for each ego network and then regress onto the set of weights using the ego labels. One can either learn by comparing neighbours and non-neighbours, or alternatively using the labels of edges and making prediction on unlabelled edges. :param graph: The input graph to learn from. :type graph: class:`apgl.graph.AbstractSingleGraph` :param randomNegLabel: How to compute edge labels, False means use the labels themselves, and True means randomly pick non-neighbours to have -1 labels :type randomNegLabel: class `bool` """ Parameter.checkInt(self.windowSize, 1, graph.getNumVertices()) self.graph = graph logging.info("Learning model on graph of size " + str(graph.getNumVertices())) allIndices = numpy.arange(0, graph.getNumVertices()) V = graph.getVertexList().getVertices(allIndices) W = numpy.zeros((0, graph.getVertexList().getNumFeatures())) Xe = numpy.zeros((0, graph.getVertexList().getNumFeatures())) printStep = numpy.floor(graph.getNumVertices()/10) for i in range(graph.getNumVertices()): Util.printIteration(i, printStep, graph.getNumVertices()) neighbours = graph.neighbours(i) if neighbours.shape[0] != 0: compNeighbours = numpy.setdiff1d(allIndices, neighbours) perm = numpy.random.permutation(compNeighbours.shape[0])[0:neighbours.shape[0]] negativeVertices = V[compNeighbours[perm], :] X = numpy.r_[V[neighbours, :], negativeVertices] y = numpy.ones(X.shape[0]) y[neighbours.shape[0]:] = -1 w = self.alterRegressor.learnModel(X, y) W = numpy.r_[W, numpy.array([w])] Xe = numpy.r_[Xe, numpy.array([V[i, :]])] #Now we need to solve least to find regressor of Xe onto W self.egoRegressor.learnModel(Xe, W)
def __init__(self, numVertices, numFeatures=0, dtype=numpy.float): """ Create an empty (zeroed) VertexList with the specified number of features for each vertex and number of vertices. :param numVertices: The number of vertices. :type numVertices: :class:`int` :param numFeatures: The number of features for each vertex. :type numFeatures: :class:`int` :param dtype: the data type for the vertex matrix, e.g numpy.int8. """ Parameter.checkInt(numVertices, 0, float('inf')) Parameter.checkInt(numFeatures, 0, float('inf')) self.V = numpy.zeros((numVertices, numFeatures), dtype)
def eigenRemove(omega, Q, n, k, debug=False): """ Remove a set of rows and columns from a matrix whose eigen-decomposition is Q diag(omega) Q^T. Keep the first n rows/cols i.e. the rows/cols starting from n to the end are removed and k is the number of eigenvectors/values to return for the new matrix. We could generalise this to delete a given list of rows/cols. """ #logging.debug("< eigenRemove >") Parameter.checkClass(omega, numpy.ndarray) Parameter.checkClass(Q, numpy.ndarray) Parameter.checkInt(k, 0, float('inf')) Parameter.checkInt(n, 0, Q.shape[0]) if omega.ndim != 1: raise ValueError("omega must be 1-d array") if omega.shape[0] != Q.shape[1]: raise ValueError("Must have same number of eigenvalues and eigenvectors") if __debug__: Parameter.checkOrthogonal(Q, tol=EigenUpdater.tol, softCheck=True, arrayInfo="input Q in eigenRemove()") inds = numpy.flipud(numpy.argsort(numpy.abs(omega))) inds = inds[omega[inds]>EigenUpdater.tol] omega, Q = Util.indEig(omega, Q, inds[0:k]) AB = (Q[0:n, :]*omega).dot(Q[n:, :].T) BB = (Q[n:, :]*omega).dot(Q[n:, :].T) p = BB.shape[0] Y1 = numpy.r_[numpy.zeros((n, p)), numpy.eye(p)] Y2 = -numpy.r_[AB, 0.5*BB] pi, V = EigenUpdater.eigenAdd2(omega, Q, Y1, Y2, k) #check last rows are zero if numpy.linalg.norm(V[n:, :]) >= EigenUpdater.tol: logging.warn("numpy.linalg.norm(V[n:, :])= %s" % str(numpy.linalg.norm(V[n:, :]))) #logging.debug("</ eigenRemove >") if not debug: return pi, V[0:n, :] else: C = (Q*omega).dot(Q.T) K = C + Y1.dot(Y2.T) + Y2.dot(Y1.T) assert numpy.linalg.norm(BB- C[n:, n:]) <= EigenUpdater.tol assert numpy.linalg.norm(AB - C[0:n, n:]) <= EigenUpdater.tol, "%s \n %s" % (AB, C[0:n, n:]) return pi, V[0:n, :], K, Y1, Y2, omega
def evaluateStratifiedCv(self, X, y, folds, metricMethod=Evaluator.binaryError): """ Compute the stratified cross validation according to a given metric. """ try: from sklearn.cross_validation import cross_val Parameter.checkInt(folds, 2, float('inf')) idx = cross_val.StratifiedKFold(y, folds) metrics = AbstractPredictor.evaluateLearn(X, y, idx, self.learnModel, self.predict, metricMethod) mean = numpy.mean(metrics, 0) var = numpy.var(metrics, 0) return (mean, var) except ImportError: logging.warn("Failed to import scikits") raise
def __init__(self, initialGraph, k): """ Initialise with a starting graph, and number of iterations k. Note that the starting graph must have self edges on every vertex. Only the adjacency matrix of the starting graph is used. :param initialGraph: The intial graph to use. :type initialGraph: :class:`apgl.graph.AbstractMatrixGraph` :param k: The number of iterations. :type k: :class:`int` """ Parameter.checkInt(k, 1, float('inf')) W = initialGraph.getWeightMatrix() if (numpy.diag(W)==numpy.zeros(W.shape[0])).any(): raise ValueError("Initial graph must have all self-edges") self.initialGraph = initialGraph self.k = k
def __init__(self, initialGraph, k): """ Initialise with a starting graph, and number of iterations k. Note that the starting graph must have self edges on every vertex. Only the adjacency matrix of the starting graph is used. :param initialGraph: The intial graph to use. :type initialGraph: :class:`apgl.graph.AbstractMatrixGraph` :param k: The number of iterations. :type k: :class:`int` """ Parameter.checkInt(k, 1, float('inf')) W = initialGraph.getWeightMatrix() if (numpy.diag(W) == numpy.zeros(W.shape[0])).any(): raise ValueError("Initial graph must have all self-edges") self.initialGraph = initialGraph self.k = k
def __init__(self, initialGraph, k): """ Initialise with a starting graph, and number of iterations k. The weights of the initial graph correspond to probabilities. :param initialGraph: The intial graph to use. :type initialGraph: :class:`apgl.graph.AbstractMatrixGraph` :param k: The number of iterations. :type k: :class:`int` """ Parameter.checkInt(k, 1, float('inf')) edgeVals = initialGraph.getEdgeValues(initialGraph.getAllEdges()) Parameter.checkList(edgeVals, Parameter.checkFloat, [0.0, 1.0]) W = initialGraph.getWeightMatrix() if (numpy.diag(W) == numpy.zeros(W.shape[0])).any(): raise ValueError("Initial graph must have all self-edges") self.initialGraph = initialGraph self.k = k
def testCheckInt(self): min = 0 max = 5 i = 2 Parameter.checkInt(i, min, max) Parameter.checkInt(min, min, max) Parameter.checkInt(max, min, max) Parameter.checkInt(i, i, i) self.assertRaises(ValueError, Parameter.checkInt, i, max, min) self.assertRaises(ValueError, Parameter.checkInt, i, float(min), max) self.assertRaises(ValueError, Parameter.checkInt, i, min, float(max)) #self.assertRaises(ValueError, Parameter.checkInt, 2.0, min, max) self.assertRaises(ValueError, Parameter.checkInt, -1, min, max) self.assertRaises(ValueError, Parameter.checkInt, 6, min, max) #Check half ranges such as [0, inf] Parameter.checkInt(i, min, float("inf")) Parameter.checkInt(i, float("-inf"), max) #Check use of numpy int32 min = numpy.int32(0) max = numpy.int32(5) i = numpy.int32(2) Parameter.checkInt(i, min, max) Parameter.checkInt(min, min, max) Parameter.checkInt(max, min, max) Parameter.checkInt(i, i, i) #Test using an array with 1 int i = numpy.array([1], numpy.int) logging.debug((type(i))) self.assertRaises(ValueError, Parameter.checkInt, i, min, max)