def multiply(self, graph): """ Multiply the edge weights of the input graph to the current one. Results in an intersection of the edges. :param graph: the input graph. :type graph: :class:`apgl.graph.PySparseGraph` :returns: A new graph with edge weights which are multiples of the current and graph """ Parameter.checkClass(graph, PySparseGraph) if graph.getNumVertices() != self.getNumVertices(): raise ValueError( "Can only add edges from graph with same number of vertices") if self.undirected != graph.undirected: raise ValueError( "Both graphs must be either undirected or directed") if self.W.nnz < graph.W.nnz: (rows, cols) = PySparseUtils.nonzero(self.W) else: (rows, cols) = PySparseUtils.nonzero(graph.W) arr1 = numpy.zeros(len(rows)) arr2 = numpy.zeros(len(rows)) self.W.take(arr1, rows, cols) graph.W.take(arr2, rows, cols) arr1 = arr1 * arr2 newGraph = PySparseGraph(self.vList, self.undirected) newGraph.W.put(arr1, rows, cols) return newGraph
def setDiff(self, graph): """ Find the edges in the current graph which are not present in the input graph. :param graph: the input graph. :type graph: :class:`apgl.graph.PySparseGraph` :returns: A new graph with edges from the current graph and not in the input graph. """ Parameter.checkClass(graph, PySparseGraph) if graph.getNumVertices() != self.getNumVertices(): raise ValueError( "Can only add edges from graph with same number of vertices") if self.undirected != graph.undirected: raise ValueError( "Both graphs must be either undirected or directed") A1 = self.nativeAdjacencyMatrix() A2 = graph.nativeAdjacencyMatrix() (rows, cols) = PySparseUtils.nonzero(A1) arr1 = numpy.zeros(len(rows)) arr2 = numpy.zeros(len(rows)) A1.take(arr1, rows, cols) A2.take(arr2, rows, cols) arr1 = arr1 - arr2 A1.put(arr1, rows, cols) newGraph = PySparseGraph(self.vList, self.undirected) newGraph.W = A1 return newGraph
def randomChoice(V, n=1): """ Make a random choice from a vector V of values which are unnormalised probabilities. Return the corresponding index. For example if v = [1, 2, 4] then the probability of the indices repectively are [1/7, 2/7, 4/7]. The parameter n is the number of random choices to make. If V is a matrix, then the rows are taken as probabilities, and a choice is made for each row. """ Parameter.checkClass(V, numpy.ndarray) if V.shape[0] == 0: return -1 if V.ndim == 1: cumV = numpy.cumsum(V) p = numpy.random.rand(n) * cumV[-1] return numpy.searchsorted(cumV, p) elif V.ndim == 2: cumV = numpy.cumsum(V, 1) P = numpy.random.rand(V.shape[0], n) * numpy.array([cumV[:, -1]]).T inds = numpy.zeros(P.shape, numpy.int) for i in range(P.shape[0]): inds[i, :] = numpy.searchsorted(cumV[i, :], P[i, :]) return inds else: raise ValueError("Invalid number of dimensions")
def random2Choice(V, n=1): """ Make a random binary choice from a vector V of values which are unnormalised probabilities. Return the corresponding index. For example if v = [1, 2] then the probability of the indices repectively are [1/3, 2/3]. The parameter n is the number of random choices to make. If V is a matrix, then the rows are taken as probabilities, and a choice is made for each row. """ Parameter.checkClass(V, numpy.ndarray) if V.ndim == 1 and V.shape[0] != 2: raise ValueError("Function only works on binary probabilities") if V.ndim == 2 and V.shape[1] != 2: raise ValueError("Function only works on binary probabilities") if V.ndim == 1: cumV = numpy.cumsum(V) p = numpy.random.rand(n) * cumV[-1] cumV2 = numpy.ones(n) * cumV[0] - p return numpy.array(cumV2 <= 0, numpy.int) elif V.ndim == 2: cumV = numpy.cumsum(V, 1) P = numpy.random.rand(V.shape[0], n) * numpy.array([cumV[:, -1]]).T cumV2 = numpy.outer(cumV[:, 0], numpy.ones(n)) - P return numpy.array(cumV2 <= 0, numpy.int) else: raise ValueError("Invalid number of dimensions")
def concat(self, graph): """ Take a new graph and concatenate it to the current one. Returns a new graph of the concatenated graphs with this graphs vertices first in the new list of vertices. :param graph: the input graph. :type graph: :class:`apgl.graph.SparseGraph` """ Parameter.checkClass(graph, SparseGraph) if type(graph.getVertexList()) != type(self.getVertexList()): raise ValueError("Vertex lists must be of same type") if graph.isUndirected() != self.isUndirected(): raise ValueError("Graphs must be of the same directed type") numVertices = self.getNumVertices() + graph.getNumVertices() vList = GeneralVertexList(numVertices) vList.setVertices(self.getVertexList().getVertices(), list(range(self.getNumVertices()))) vList.setVertices(graph.getVertexList().getVertices(), list(range(self.getNumVertices(), numVertices))) newGraph = SparseGraph(vList) W = scipy.sparse.bmat([[self.W, None], [None, graph.W]], format="csr") newGraph.setWeightMatrixSparse(W) return newGraph
def setDiff(self, graph): """ Find the edges in the current graph which are not present in the input graph. :param graph: the input graph. :type graph: :class:`apgl.graph.SparseGraph` :returns: A new graph with edges from the current graph and not in the input graph. """ Parameter.checkClass(graph, SparseGraph) if graph.getNumVertices() != self.getNumVertices(): raise ValueError( "Can only add edges from graph with same number of vertices") if self.undirected != graph.undirected: raise ValueError( "Both graphs must be either undirected or directed") A1 = self.nativeAdjacencyMatrix() A2 = graph.nativeAdjacencyMatrix() A1 = A1 - A2 A = (A1 + A1.multiply(A1)) / 2 A.prune() newGraph = SparseGraph(self.vList, self.undirected) newGraph.W = A return newGraph
def add(self, graph): """ Add the edge weights of the input graph to the current one. Results in a union of the edges. :param graph: the input graph. :type graph: :class:`apgl.graph.SparseGraph` :returns: A new graph with same vertex list and addition of edge weights """ Parameter.checkClass(graph, SparseGraph) if graph.getNumVertices() != self.getNumVertices(): raise ValueError( "Can only add edges from graph with same number of vertices") if self.undirected != graph.undirected: raise ValueError( "Both graphs must be either undirected or directed") #The ideal way is to add both weight matrices together, but this results in a csr #We'll just do this manually nonZeros = numpy.nonzero(graph.W) newGraph = SparseGraph(self.vList, self.undirected) newGraph.W = self.W.copy() for i in range(len(nonZeros[0])): ind1 = nonZeros[0][i] ind2 = nonZeros[1][i] newGraph.W[ind1, ind2] = self.W[ind1, ind2] + graph.W[ind1, ind2] return newGraph
def addEdge(self, vertexIndex1, vertexIndex2, edge=1): """ Add a non-zero edge between two vertices. :param vertexIndex1: The index of the first vertex. :type vertexIndex1: :class:`int` :param vertexIndex2: The index of the second vertex. :type vertexIndex2: :class:`int` :param edge: The value of the edge. :type edge: :class:`float` """ Parameter.checkIndex(vertexIndex1, 0, self.vList.getNumVertices()) Parameter.checkIndex(vertexIndex2, 0, self.vList.getNumVertices()) vertexIndex1 = int(vertexIndex1) vertexIndex2 = int(vertexIndex2) if edge == 0 or edge == float('inf'): raise ValueError("Cannot add a zero or infinite edge") if self.undirected: self.W[vertexIndex1, vertexIndex2] = edge self.W[vertexIndex2, vertexIndex1] = edge else: self.W[vertexIndex1, vertexIndex2] = edge
def setNumTrees(self, numTrees): """ :param numTrees: The number of trees to generate in the forest. :type numTrees: :class:`int` """ Parameter.checkInt(numTrees, 1, float('inf')) self.numTrees = numTrees
def random2Choice(V, n=1): """ Make a random binary choice from a vector V of values which are unnormalised probabilities. Return the corresponding index. For example if v = [1, 2] then the probability of the indices repectively are [1/3, 2/3]. The parameter n is the number of random choices to make. If V is a matrix, then the rows are taken as probabilities, and a choice is made for each row. """ Parameter.checkClass(V, numpy.ndarray) if V.ndim == 1 and V.shape[0] != 2: raise ValueError("Function only works on binary probabilities") if V.ndim == 2 and V.shape[1] != 2: raise ValueError("Function only works on binary probabilities") if V.ndim == 1: cumV = numpy.cumsum(V) p = numpy.random.rand(n)*cumV[-1] cumV2 = numpy.ones(n)*cumV[0] - p return numpy.array(cumV2 <= 0, numpy.int) elif V.ndim == 2: cumV = numpy.cumsum(V, 1) P = numpy.random.rand(V.shape[0], n)*numpy.array([cumV[:, -1]]).T cumV2 = numpy.outer(cumV[:, 0], numpy.ones(n)) - P return numpy.array(cumV2 <= 0, numpy.int) else: raise ValueError("Invalid number of dimensions")
def array1DToRow(X, precision=3): """ Take a 1D numpy array and print in latex table row format i.e. x1 & x2 .. xn :param X: The array to print :type X: :class:`ndarray` :param precision: The precision of the printed floating point numbers. :type precision: :class:`int` """ Parameter.checkInt(precision, 0, 10) if X.ndim != 1: raise ValueError("Array must be one dimensional") n = X.shape[0] outputStr = "" if X.dtype == float: fmtStr = "%." + str(precision) + "f & " endFmtStr = "%." + str(precision) + "f" else: fmtStr = "%d & " endFmtStr = "%d" for i in range(0, n): if i != n-1: outputStr += fmtStr % X[i] else: outputStr += endFmtStr % X[i] return outputStr
def setP(self, p): ''' :param p: the probability of an edge :type p: :class:`float` ''' Parameter.checkFloat(p, 0.0, 1.0) self.p = p
def setErrorCost(self, errorCost): """ The penalty on errors on positive labels. The penalty for negative labels is 1. """ Parameter.checkFloat(errorCost, 0.0, 1.0) self.errorCost = errorCost
def evaluate(self, X1, X2): """ Find kernel evaluation between two matrices X1 and X2 whose rows are examples and have an identical number of columns. :param X1: First set of examples. :type X1: :class:`numpy.ndarray` :param X2: Second set of examples. :type X2: :class:`numpy.ndarray` """ Parameter.checkClass(X1, numpy.ndarray) Parameter.checkClass(X2, numpy.ndarray) if X1.shape[1] != X2.shape[1]: raise ValueError("Invalid matrix dimentions: " + str(X1.shape) + " " + str(X2.shape)) j1 = numpy.ones((X1.shape[0], 1)) j2 = numpy.ones((X2.shape[0], 1)) diagK1 = numpy.sum(X1**2, 1) diagK2 = numpy.sum(X2**2, 1) X1X2 = numpy.dot(X1, X2.T) Q = (2*X1X2 - numpy.outer(diagK1, j2) - numpy.outer(j1, diagK2) )/ (2*self.sigma**2) return numpy.exp(Q)
def multiply(self, graph): """ Multiply the edge weights of the input graph to the current one. Results in an intersection of the edges. :param graph: the input graph. :type graph: :class:`apgl.graph.PySparseGraph` :returns: A new graph with edge weights which are multiples of the current and graph """ Parameter.checkClass(graph, PySparseGraph) if graph.getNumVertices() != self.getNumVertices(): raise ValueError("Can only add edges from graph with same number of vertices") if self.undirected != graph.undirected: raise ValueError("Both graphs must be either undirected or directed") if self.W.nnz < graph.W.nnz: (rows, cols) = PySparseUtils.nonzero(self.W) else: (rows, cols) = PySparseUtils.nonzero(graph.W) arr1 = numpy.zeros(len(rows)) arr2 = numpy.zeros(len(rows)) self.W.take(arr1, rows, cols) graph.W.take(arr2, rows, cols) arr1 = arr1 * arr2 newGraph = PySparseGraph(self.vList, self.undirected) newGraph.W.put(arr1, rows, cols) return newGraph
def setDiff(self, graph): """ Find the edges in the current graph which are not present in the input graph. :param graph: the input graph. :type graph: :class:`apgl.graph.PySparseGraph` :returns: A new graph with edges from the current graph and not in the input graph. """ Parameter.checkClass(graph, PySparseGraph) if graph.getNumVertices() != self.getNumVertices(): raise ValueError("Can only add edges from graph with same number of vertices") if self.undirected != graph.undirected: raise ValueError("Both graphs must be either undirected or directed") A1 = self.nativeAdjacencyMatrix() A2 = graph.nativeAdjacencyMatrix() (rows, cols) = PySparseUtils.nonzero(A1) arr1 = numpy.zeros(len(rows)) arr2 = numpy.zeros(len(rows)) A1.take(arr1, rows, cols) A2.take(arr2, rows, cols) arr1 = arr1 - arr2 A1.put(arr1, rows, cols) newGraph = PySparseGraph(self.vList, self.undirected) newGraph.W = A1 return newGraph
def __init__(self, vertex1Indices, vertex2Indices, converters, undirected=True): """ vertex1Indices is a list of fields for the first vertex, with the 1st index being the ID. """ if len(vertex1Indices) < 1 or len(vertex1Indices) < 1: raise ValueError("vertexIndices must have at least 1 index") if len(vertex1Indices) != len(vertex2Indices): raise ValueError("len(vertex1Indices)=" + str(len(vertex1Indices)) + "and len(vertex2Indices)=" + len(vertex2Indices)) Parameter.checkList(vertex1Indices, Parameter.checkInt, [0, float('inf')]) Parameter.checkList(vertex2Indices, Parameter.checkInt, [0, float('inf')]) self.vertex1IdIndex = vertex1Indices[0] self.vertex2IdIndex = vertex2Indices[0] self.vertex1Indices = copy.copy(vertex1Indices) self.vertex2Indices = copy.copy(vertex2Indices) self.vertex1Indices.remove(self.vertex1IdIndex) self.vertex2Indices.remove(self.vertex2IdIndex) self.converters = converters self.undirected = undirected self.edgeWeight = 1
def evaluateCvOuter(self, X, y, folds): """ Computer the average AUC using k-fold cross validation and the linear kernel. """ Parameter.checkInt(folds, 2, float('inf')) idx = cross_val.StratifiedKFold(y, folds) metricMethods = [Evaluator.auc2, Evaluator.roc] if self.kernel == "linear": logging.debug("Running linear rank SVM ") trainMetrics, testMetrics = AbstractPredictor.evaluateLearn2(X, y, idx, self.modelSelectLinear, self.predict, metricMethods) elif self.kernel == "rbf": logging.debug("Running RBF rank SVM") trainMetrics, testMetrics = AbstractPredictor.evaluateLearn2(X, y, idx, self.modelSelectRBF, self.predict, metricMethods) bestTrainAUCs = trainMetrics[0] bestTrainROCs = trainMetrics[1] bestTestAUCs = testMetrics[0] bestTestROCs = testMetrics[1] bestParams = {} bestMetaDicts = {} allMetrics = [bestTrainAUCs, bestTrainROCs, bestTestAUCs, bestTestROCs] return (bestParams, allMetrics, bestMetaDicts)
def cut(self, d): """ Return a new tree containing all the vertices of the current one up to a depth of d. The edge and vertex labels are copied by reference only. :param d: The depth of the new cut tree :type d: :class:`int` """ Parameter.checkInt(d, 0, float("inf")) root = self.getRootId() newTree = DictTree() stack = [(root, 0)] newTree.setVertex(root) while(len(stack) != 0): (vertexId, depth) = stack.pop() neighbours = self.neighbours(vertexId) if depth <= d: newTree.setVertex(vertexId, self.getVertex(vertexId)) for neighbour in neighbours: stack.append((neighbour, depth+1)) if depth+1 <= d: newTree.addEdge(vertexId, neighbour, self.getEdge(vertexId, neighbour)) return newTree
def parallelVfcvRbf(self, X, y, idx, type="C_SVC"): """ Perform parallel cross validation model selection using the RBF kernel and then pick the best one. Using the best set of parameters train using the whole dataset. :param X: The examples as rows :type X: :class:`numpy.ndarray` :param y: The binary -1/+1 labels :type y: :class:`numpy.ndarray` :param idx: A list of train/test splits :params returnGrid: Whether to return the error grid :type returnGrid: :class:`bool` """ Parameter.checkClass(X, numpy.ndarray) Parameter.checkClass(y, numpy.ndarray) folds = len(idx) self.setKernel("gaussian") if type=="C_SVC": paramDict = {} paramDict["setC"] = self.getCs() paramDict["setGamma"] = self.getGammas() else: paramDict = {} paramDict["setC"] = self.getCs() paramDict["setGamma"] = self.getGammas() paramDict["setEpsilon"] = self.getEpsilons() return self.parallelModelSelect(X, y, idx, paramDict)
def cut(self, d): """ Return a new tree containing all the vertices of the current one up to a depth of d. The edge and vertex labels are copied by reference only. :param d: The depth of the new cut tree :type d: :class:`int` """ Parameter.checkInt(d, 0, float("inf")) root = self.getRootId() newTree = DictTree() stack = [(root, 0)] newTree.setVertex(root) while (len(stack) != 0): (vertexId, depth) = stack.pop() neighbours = self.neighbours(vertexId) if depth <= d: newTree.setVertex(vertexId, self.getVertex(vertexId)) for neighbour in neighbours: stack.append((neighbour, depth + 1)) if depth + 1 <= d: newTree.addEdge(vertexId, neighbour, self.getEdge(vertexId, neighbour)) return newTree
def diameter(self, useWeights=False, P=None): """ Finds the diameter of a graph i.e. the longest shortest path. If useWeights is True then the weights in the adjacency matrix are used if P is not provided. :param useWeights: Whether to use edge weights to compute a diameter. :type useWeights: :class:`bool` :param P: An optional nxn matrix whose ijth entry is the shortest path from i to j. :type P: :class:`ndarray` :returns: The diameter of this graph. """ Parameter.checkBoolean(useWeights) if P!=None and (type(P) != numpy.ndarray or P.shape != (self.getNumVertices(), self.getNumVertices())): logging.debug("P.shape = " + P.shape + " W.shape = " + str(self.W.shape)) raise ValueError("P must be array of same size as weight matrix of graph") if self.getNumEdges() == 0: return 0 if P == None: P = self.floydWarshall(useWeights) else: P = P.copy() if useWeights == False: return int(numpy.max(P[P!=float('inf')])) else: return float(numpy.max(P[P!=float('inf')]))
def breadthFirstSearch(self, root): """ Breadth first search starting from a particular vertex. Returns a list of connected vertices in the order they were found. :param root: The index of the root vertex. :type root: :class:`int` :returns: A list of vertices connected to the input one via a path in the graph. """ Parameter.checkIndex(root, 0, self.size) toVisit = [root] visited = set() searchPath = [] #adjacencyList, weights = self.adjacencyList() while len(toVisit) != 0: currentVertex = toVisit.pop(0) if currentVertex not in visited: visited.add(currentVertex) searchPath.append(currentVertex) neighbours = self.neighbours(currentVertex) unvisited = sorted(set(neighbours).difference(visited)) toVisit.extend(list(unvisited)) return searchPath
def setEll(self, ell): """ :param ell: the initial number of vertices. :type ell: :class:`int` """ Parameter.checkInt(ell, 2, float('inf')) self.ell = ell
def setSelfEdges(self, selfEdges): """ :param selfEdges: whether to allow self edges :type selfEdges: :class:`bool` """ Parameter.checkBoolean(selfEdges) self.selfEdges = selfEdges
def parallelPenaltyGridRbf(svm, X, y, fullX, gridPoints, pdfX, pdfY1X, pdfYminus1X): """ Find out the "ideal" penalty. """ Parameter.checkClass(X, numpy.ndarray) Parameter.checkClass(y, numpy.ndarray) chunkSize = 10 idealPenalties = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0])) paramList = [] for i in range(svm.Cs.shape[0]): for j in range(svm.gammas.shape[0]): paramList.append((X, y, fullX, svm.Cs[i], svm.gammas[j], gridPoints, pdfX, pdfY1X, pdfYminus1X)) pool = multiprocessing.Pool() resultsIterator = pool.imap(computeIdealPenalty, paramList, chunkSize) for i in range(svm.Cs.shape[0]): for j in range(svm.gammas.shape[0]): idealPenalties[i, j] = resultsIterator.next() pool.terminate() return idealPenalties
def setRandomInfected(self, numInitialInfected, proportionHetero, t=0.0): """ Pick a number of people randomly to be infected at time t. Of that set proportionHetero are selected to be heterosexual and min((1-proportionHetero), totalBi) are bisexual. """ Parameter.checkInt(numInitialInfected, 0, self.size) Parameter.checkFloat(proportionHetero, 0.0, 1.0) heteroInds = numpy.arange(self.size)[self.vlist.V[:, HIVVertices.orientationIndex] == HIVVertices.hetero] biInds = numpy.arange(self.size)[self.vlist.V[:, HIVVertices.orientationIndex] == HIVVertices.bi] numHetero = int(numInitialInfected*proportionHetero) numBi = numInitialInfected-numHetero heteroInfectInds = numpy.random.permutation(heteroInds.shape[0])[0:numHetero] biInfectInds = numpy.random.permutation(biInds.shape[0])[0:numBi] for i in heteroInfectInds: j = heteroInds[i] self.vlist.setInfected(j, t) for i in biInfectInds: j = biInds[i] self.vlist.setInfected(j, t)
def setSampleReplace(self, sampleReplace): """ :param sampleReplace: A boolean to decide whether to sample with replacement. :type sampleReplace: :class:`bool` """ Parameter.checkBoolean(sampleReplace) self.sampleReplace = sampleReplace
def setWeight(self, weight): """ :param weight: the weight on the positive examples between 0 and 1 (the negative weight is 1-weight) :type weight: :class:`float` """ Parameter.checkFloat(weight, 0.0, 1.0) self.weight = weight
def randomChoice(V, n=1): """ Make a random choice from a vector V of values which are unnormalised probabilities. Return the corresponding index. For example if v = [1, 2, 4] then the probability of the indices repectively are [1/7, 2/7, 4/7]. The parameter n is the number of random choices to make. If V is a matrix, then the rows are taken as probabilities, and a choice is made for each row. """ Parameter.checkClass(V, numpy.ndarray) if V.shape[0]==0: return -1 if V.ndim == 1: cumV = numpy.cumsum(V) p = numpy.random.rand(n)*cumV[-1] return numpy.searchsorted(cumV, p) elif V.ndim == 2: cumV = numpy.cumsum(V, 1) P = numpy.random.rand(V.shape[0], n)*numpy.array([cumV[:, -1]]).T inds = numpy.zeros(P.shape, numpy.int) for i in range(P.shape[0]): inds[i, :] = numpy.searchsorted(cumV[i, :], P[i, :]) return inds else: raise ValueError("Invalid number of dimensions")
def setSampleSize(self, sampleSize): """ :param sampleSize: The number of examples to randomly sample for each tree. :type sampleSize: :class:`int` """ Parameter.checkFloat(sampleSize, 0.0, 1.0) self.sampleSize = sampleSize
def setEll(self, ell): """ :param ell: the initial number of vertices. :type ell: :class:`int` """ Parameter.checkInt(ell, 2, float("inf")) self.ell = ell
def scalarStatistics(self, graph): Parameter.checkClass(graph, AbstractMatrixGraph) statsArray = numpy.ones(self.numStats)*-1 #Find geodesic distance between MSMs logging.debug("Running Floyd-Warshall") P = graph.floydWarshall(False) V = graph.getVertexList().getVertices(list(range(graph.getNumVertices()))) bisexual = CsvConverters.orientConv('HB') msmIndices = list(numpy.nonzero(V[:, self.fInds["orient"]]==bisexual)[0]) if len(msmIndices) != 0: statsArray[self.msmGeodesicIndex] = graph.harmonicGeodesicDistance(P, msmIndices) male = CsvConverters.genderConv('M') menIndices = list(numpy.nonzero(V[:, self.fInds["gender"]]==male)[0]) if len(menIndices) != 0: menGraph = graph.subgraph(menIndices) statsArray[self.menSubgraphGeodesicIndex] = menGraph.harmonicGeodesicDistance() contactTrIndices = list(numpy.nonzero(V[:, self.fInds["contactTrace"]]==1)[0]) if len(contactTrIndices) != 0: ctGraph = graph.subgraph(contactTrIndices) statsArray[self.ctSubgraphGeodesicIndex] = ctGraph.harmonicGeodesicDistance() degreeSequence = graph.outDegreeSequence() sortedInds = numpy.argsort(degreeSequence) numInds = int(float(graph.getNumVertices())*self.topConnect) topConnectInds = sortedInds[-numInds:] statsArray[self.mostConnectedGeodesicIndex] = graph.harmonicGeodesicDistance(P, topConnectInds) return statsArray
def setVertices(self, vertices, indices=None): """ Set the vertices to the given list of vertices. If indices = None then all vertices are replaced, and if not the given indices are used. :param vertices: a list of vertices.. :type vertices: :class:`list` :param indices: a list of indices of the same length as vertices or None for all indices in this object. :type indices: :class:`list` """ if indices != None: Parameter.checkList(indices, Parameter.checkIndex, [0, len(self.V)]) if len(vertices) != len(indices): raise ValueError( "Length of indices list must be same as that of vertices list" ) if indices == None and len(vertices) != len(self.V): raise ValueError("Incorrect number of vertices " + str(len(vertices)) + ", expecting " + str(len(self.V))) if indices == None: for i in range(len(vertices)): self.V[i] = vertices[i] else: for i in range(len(indices)): self.V[indices[i]] = vertices[i]
def bootstrap2(repetitions, numExamples): """ Perform 0.632 bootstrap in whcih we take a sample with replacement from the dataset of size numExamples. The examples not present in the training set are used to form the test set. We oversample the test set to include 0.368 of the examples from the training set. Returns a list of tuples of the form (trainIndices, testIndices). :param repetitions: The number of repetitions of bootstrap to perform. :type repetitions: :class:`int` :param numExamples: The number of examples. :type numExamples: :class:`int` """ Parameter.checkInt(numExamples, 2, float('inf')) Parameter.checkInt(repetitions, 1, float('inf')) inds = [] for i in range(repetitions): trainInds = numpy.random.randint(numExamples, size=numExamples) testInds = numpy.setdiff1d(numpy.arange(numExamples), numpy.unique(trainInds)) #testInds = numpy.r_[testInds, trainInds[0:(numExamples*0.368)]] inds.append((trainInds, testInds)) return inds
def predictEdges(self, vertexIndices): """ This makes a prediction for a series of edges using the following score \sum_z \in n(x) \cup n(y) = 1/|log(n(z)| Returns a matrix with rows are a ranked list of verticies of length self.windowSize. """ Parameter.checkInt(self.windowSize, 1, self.graph.getNumVertices()) logging.info("Running predictEdges in " + str(self.__class__.__name__)) P = numpy.zeros((vertexIndices.shape[0], self.windowSize)) S = numpy.zeros((vertexIndices.shape[0], self.windowSize)) W = self.graph.getWeightMatrix() for i in range(vertexIndices.shape[0]): Util.printIteration(i, self.printStep, vertexIndices.shape[0]) scores = numpy.zeros(self.graph.getNumVertices()) for j in range(0, self.graph.getNumVertices()): commonNeighbours = numpy.nonzero(W[vertexIndices[i], :] * W[j, :])[0] for k in commonNeighbours: q = numpy.log(numpy.nonzero(W[k, :])[0].shape[0]) if q != 0: scores[j] = scores[j] + 1/q P[i, :], S[i, :] = self.indicesFromScores(vertexIndices[i], scores) return P, S
def svd(A, k, q=2): """ Compute the SVD of a sparse or dense matrix A, finding the first k singular vectors/values, using exponent q. Returns the left and right singular vectors, and the singular values. The resulting matrix can be approximated using A ~ U s V.T. """ Parameter.checkInt(k, 1, float("inf")) Parameter.checkInt(q, 1, float("inf")) n = A.shape[0] omega = numpy.random.randn(n, k) Y = A.dot(omega) for i in range(q): Y = A.T.dot(Y) Y = A.dot(Y) Q, R = numpy.linalg.qr(Y) B = A.T.dot(Q).T U, s, V = numpy.linalg.svd(B, full_matrices=False) V = V.T U = Q.dot(U) return U, s, V
def randCrossValidation(folds, numExamples): """ Returns a list of tuples (trainIndices, testIndices) using k-fold cross validation. In this case we randomise the indices and then split into folds. :param folds: The number of cross validation folds. :type folds: :class:`int` :param numExamples: The number of examples. :type numExamples: :class:`int` """ Parameter.checkInt(folds, 1, numExamples) Parameter.checkInt(numExamples, 2, float('inf')) foldSize = float(numExamples)/folds indexList = [] inds = numpy.random.permutation(numExamples) for i in range(0, folds): testIndices = inds[int(foldSize*i): int(foldSize*(i+1))] trainIndices = numpy.setdiff1d(numpy.arange(0, numExamples), testIndices) indexList.append((trainIndices, testIndices)) return indexList
def setBestResponse(self, bestResponse): """ :param bestResponse: the label corresponding to "positive" :type bestResponse: :class:`int` """ Parameter.checkInt(bestResponse, -float('inf'), float('inf')) self.bestResponse = bestResponse
def setM(self, m): """ :param m: the number of edges to be added at each step :type m: :class:`int` """ Parameter.checkInt(m, 0, self.ell) self.m = m
def depthFirstSearch(self, root): """ Depth first search starting from a particular vertex. Returns a list of connected vertices in the order they were found. :param root: The index of the root vertex. :type root: :class:`int` :returns: A list of vertices connected to the input one via a path in the graph. """ Parameter.checkIndex(root, 0, self.size) currentPath = [root] visited = set() searchPath = [] while len(currentPath) != 0: currentVertex = currentPath[-1] if currentVertex not in visited: visited.add(currentVertex) searchPath.append(currentVertex) neighbours = self.neighbours(currentVertex) unvisited = (set(neighbours).difference(visited)) if len(unvisited) != 0: currentPath.append(unvisited.pop()) else: currentPath.pop() return searchPath
def setDiff(self, graph): """ Find the edges in the current graph which are not present in the input graph. Replaces the edges in the current graph with adjacencies. :param graph: the input graph. :type graph: :class:`apgl.graph.DenseGraph` :returns: The graph which is the set difference of the edges of this graph and graph. """ Parameter.checkClass(graph, DenseGraph) if graph.getNumVertices() != self.getNumVertices(): raise ValueError( "Can only add edges from graph with same number of vertices") if self.undirected != graph.undirected: raise ValueError( "Both graphs must be either undirected or directed") A1 = self.adjacencyMatrix() A2 = graph.adjacencyMatrix() A1 = A1 - A2 A1 = (A1 + numpy.abs(A1**2)) / 2 newGraph = DenseGraph(self.vList, self.undirected) newGraph.W = A1 return newGraph
def removeEdge(self, vertexIndex1, vertexIndex2, edgeTypeIndex): """ Remove an edge between two vertices. @param vertexIndex1: The index of the first vertex. @param vertexIndex1: The index of the second vertex. """ Parameter.checkIndex(edgeTypeIndex, 0, self.maxEdgeTypes) self.sparseGraphs[edgeTypeIndex].removeEdge(vertexIndex1, vertexIndex2)
def setK(self, k): """ Set the number of iterations k. :param k: The number of iterations. :type k: :class:`int` """ Parameter.checkInt(k, 1, float('inf')) self.k = k
def clearVertex(self, index): """ Sets a vertex to None :param index: the index of the vertex to assign a value. :type index: :class:`int` """ Parameter.checkIndex(index, 0, len(self.V)) self.V[index] = None
def getVertex(self, index): """ Returns the value of a vertex. :param index: the index of the vertex. :type index: :class:`int` """ Parameter.checkIndex(index, 0, len(self.V)) return self.V[index]
def setK(self, k): """ The number of neighbours of each vertex. :param k: the number of neighbours in the regular lattice. :type k: :class:`int` """ Parameter.checkIndex(k, 0, float('inf')) self.k = k
def clearVertex(self, index): """ Sets a vertex to the all-zeros array. :param index: the index of the vertex to assign a value. :type index: :class:`int` """ Parameter.checkIndex(index, 0, self.V.shape[0]) self.V[index, :] = numpy.zeros((1, self.V.shape[1]))
def vectorStatistics(self, graph, treeStats=False, eigenStats=True): """ Find a series of statistics for the given input graph which can be represented as vector values. """ Parameter.checkClass(graph, AbstractMatrixGraph) Parameter.checkBoolean(treeStats) statsDict = {} statsDict["inDegreeDist"] = graph.inDegreeDistribution() statsDict["outDegreeDist"] = graph.degreeDistribution() logging.debug("Computing hop counts") P = graph.findAllDistances(False) statsDict["hopCount"] = graph.hopCount(P) logging.debug("Computing triangle count") if graph.getNumVertices() != 0: statsDict["triangleDist"] = numpy.bincount( graph.triangleSequence()) else: statsDict["triangleDist"] = numpy.array([]) #Get the distribution of component sizes logging.debug("Finding distribution of component sizes") if graph.isUndirected(): components = graph.findConnectedComponents() if len(components) != 0: statsDict["componentsDist"] = numpy.bincount( numpy.array([len(c) for c in components], numpy.int)) #Make sure weight matrix is symmetric if graph.getNumVertices() != 0 and eigenStats: logging.debug("Computing eigenvalues/vectors") W = graph.getWeightMatrix() W = (W + W.T) / 2 eigenDistribution, V = numpy.linalg.eig(W) i = numpy.argmax(eigenDistribution) statsDict["maxEigVector"] = V[:, i] statsDict["eigenDist"] = numpy.flipud( numpy.sort(eigenDistribution[eigenDistribution > 0])) gc.collect() else: statsDict["maxEigVector"] = numpy.array([]) statsDict["eigenDist"] = numpy.array([]) if treeStats: logging.debug("Computing statistics on trees") trees = graph.findTrees() statsDict["treeSizesDist"] = numpy.bincount( [len(x) for x in trees]) treeDepths = [ GraphUtils.treeDepth((graph.subgraph(x))) for x in trees ] statsDict["treeDepthsDist"] = numpy.bincount(treeDepths) return statsDict
def setP(self, p): """ Set the rewiring probability. :param p: the probability of rewiring an edge. :type p: :class:`float` """ Parameter.checkFloat(p, 0.0, 1.0) self.p = p
def addEdge(self, vertexIndex1, vertexIndex2, edgeTypeIndex, edge=1): """ Add an edge to the graph between two vertices. @param vertexIndex1: The index of the first vertex. @param vertexIndex1: The index of the second vertex. @param edge: The value to assign to the edge. """ Parameter.checkIndex(edgeTypeIndex, 0, self.maxEdgeTypes) self.sparseGraphs[edgeTypeIndex].addEdge(vertexIndex1, vertexIndex2, edge)
def subList(self, indices): """ Returns a subset of this object, indicated by the given indices. """ Parameter.checkList(indices, Parameter.checkIndex, (0, self.getNumVertices())) vList = GeneralVertexList(len(indices)) vList.setVertices(self.getVertices(indices)) return vList