def multiply(self, graph):
        """
        Multiply the edge weights of the input graph to the current one. Results in an
        intersection of the edges.

        :param graph: the input graph.
        :type graph: :class:`apgl.graph.PySparseGraph`

        :returns: A new graph with edge weights which are multiples of the current and graph
        """
        Parameter.checkClass(graph, PySparseGraph)
        if graph.getNumVertices() != self.getNumVertices():
            raise ValueError(
                "Can only add edges from graph with same number of vertices")
        if self.undirected != graph.undirected:
            raise ValueError(
                "Both graphs must be either undirected or directed")

        if self.W.nnz < graph.W.nnz:
            (rows, cols) = PySparseUtils.nonzero(self.W)
        else:
            (rows, cols) = PySparseUtils.nonzero(graph.W)

        arr1 = numpy.zeros(len(rows))
        arr2 = numpy.zeros(len(rows))
        self.W.take(arr1, rows, cols)
        graph.W.take(arr2, rows, cols)

        arr1 = arr1 * arr2

        newGraph = PySparseGraph(self.vList, self.undirected)
        newGraph.W.put(arr1, rows, cols)
        return newGraph
    def setDiff(self, graph):
        """
        Find the edges in the current graph which are not present in the input
        graph.

        :param graph: the input graph.
        :type graph: :class:`apgl.graph.PySparseGraph`

        :returns: A new graph with edges from the current graph and not in the input graph.
        """
        Parameter.checkClass(graph, PySparseGraph)
        if graph.getNumVertices() != self.getNumVertices():
            raise ValueError(
                "Can only add edges from graph with same number of vertices")
        if self.undirected != graph.undirected:
            raise ValueError(
                "Both graphs must be either undirected or directed")

        A1 = self.nativeAdjacencyMatrix()
        A2 = graph.nativeAdjacencyMatrix()

        (rows, cols) = PySparseUtils.nonzero(A1)
        arr1 = numpy.zeros(len(rows))
        arr2 = numpy.zeros(len(rows))

        A1.take(arr1, rows, cols)
        A2.take(arr2, rows, cols)
        arr1 = arr1 - arr2

        A1.put(arr1, rows, cols)

        newGraph = PySparseGraph(self.vList, self.undirected)
        newGraph.W = A1
        return newGraph
Exemple #3
0
    def randomChoice(V, n=1):
        """
        Make a random choice from a vector V of values which are unnormalised
        probabilities. Return the corresponding index. For example if v = [1, 2, 4]
        then the probability of the indices repectively are [1/7, 2/7, 4/7]. The
        parameter n is the number of random choices to make. If V is a matrix,
        then the rows are taken as probabilities, and a choice is made for each
        row. 
        """
        Parameter.checkClass(V, numpy.ndarray)

        if V.shape[0] == 0:
            return -1

        if V.ndim == 1:
            cumV = numpy.cumsum(V)
            p = numpy.random.rand(n) * cumV[-1]
            return numpy.searchsorted(cumV, p)
        elif V.ndim == 2:
            cumV = numpy.cumsum(V, 1)
            P = numpy.random.rand(V.shape[0], n) * numpy.array([cumV[:, -1]]).T

            inds = numpy.zeros(P.shape, numpy.int)
            for i in range(P.shape[0]):
                inds[i, :] = numpy.searchsorted(cumV[i, :], P[i, :])

            return inds
        else:
            raise ValueError("Invalid number of dimensions")
Exemple #4
0
    def random2Choice(V, n=1):
        """
        Make a random binary choice from a vector V of values which are unnormalised
        probabilities. Return the corresponding index. For example if v = [1, 2]
        then the probability of the indices repectively are [1/3, 2/3]. The
        parameter n is the number of random choices to make. If V is a matrix,
        then the rows are taken as probabilities, and a choice is made for each
        row.
        """
        Parameter.checkClass(V, numpy.ndarray)

        if V.ndim == 1 and V.shape[0] != 2:
            raise ValueError("Function only works on binary probabilities")
        if V.ndim == 2 and V.shape[1] != 2:
            raise ValueError("Function only works on binary probabilities")

        if V.ndim == 1:
            cumV = numpy.cumsum(V)
            p = numpy.random.rand(n) * cumV[-1]
            cumV2 = numpy.ones(n) * cumV[0] - p
            return numpy.array(cumV2 <= 0, numpy.int)
        elif V.ndim == 2:
            cumV = numpy.cumsum(V, 1)
            P = numpy.random.rand(V.shape[0], n) * numpy.array([cumV[:, -1]]).T
            cumV2 = numpy.outer(cumV[:, 0], numpy.ones(n)) - P
            return numpy.array(cumV2 <= 0, numpy.int)
        else:
            raise ValueError("Invalid number of dimensions")
    def concat(self, graph):
        """
        Take a new graph and concatenate it to the current one. Returns a new graph
        of the concatenated graphs with this graphs vertices first in the new list of
        vertices.

        :param graph: the input graph.
        :type graph: :class:`apgl.graph.SparseGraph`
        """
        Parameter.checkClass(graph, SparseGraph)
        if type(graph.getVertexList()) != type(self.getVertexList()):
            raise ValueError("Vertex lists must be of same type")
        if graph.isUndirected() != self.isUndirected():
            raise ValueError("Graphs must be of the same directed type")

        numVertices = self.getNumVertices() + graph.getNumVertices()
        vList = GeneralVertexList(numVertices)
        vList.setVertices(self.getVertexList().getVertices(),
                          list(range(self.getNumVertices())))
        vList.setVertices(graph.getVertexList().getVertices(),
                          list(range(self.getNumVertices(), numVertices)))
        newGraph = SparseGraph(vList)

        W = scipy.sparse.bmat([[self.W, None], [None, graph.W]], format="csr")
        newGraph.setWeightMatrixSparse(W)

        return newGraph
    def setDiff(self, graph):
        """
        Find the edges in the current graph which are not present in the input
        graph. 

        :param graph: the input graph.
        :type graph: :class:`apgl.graph.SparseGraph`

        :returns: A new graph with edges from the current graph and not in the input graph. 
        """
        Parameter.checkClass(graph, SparseGraph)
        if graph.getNumVertices() != self.getNumVertices():
            raise ValueError(
                "Can only add edges from graph with same number of vertices")
        if self.undirected != graph.undirected:
            raise ValueError(
                "Both graphs must be either undirected or directed")

        A1 = self.nativeAdjacencyMatrix()
        A2 = graph.nativeAdjacencyMatrix()
        A1 = A1 - A2

        A = (A1 + A1.multiply(A1)) / 2
        A.prune()

        newGraph = SparseGraph(self.vList, self.undirected)
        newGraph.W = A
        return newGraph
    def add(self, graph):
        """
        Add the edge weights of the input graph to the current one. Results in a
        union of the edges.

        :param graph: the input graph.
        :type graph: :class:`apgl.graph.SparseGraph`

        :returns: A new graph with same vertex list and addition of edge weights 
        """
        Parameter.checkClass(graph, SparseGraph)
        if graph.getNumVertices() != self.getNumVertices():
            raise ValueError(
                "Can only add edges from graph with same number of vertices")
        if self.undirected != graph.undirected:
            raise ValueError(
                "Both graphs must be either undirected or directed")

        #The ideal way is to add both weight matrices together, but this results in a csr
        #We'll just do this manually
        nonZeros = numpy.nonzero(graph.W)
        newGraph = SparseGraph(self.vList, self.undirected)
        newGraph.W = self.W.copy()

        for i in range(len(nonZeros[0])):
            ind1 = nonZeros[0][i]
            ind2 = nonZeros[1][i]
            newGraph.W[ind1, ind2] = self.W[ind1, ind2] + graph.W[ind1, ind2]

        return newGraph
    def addEdge(self, vertexIndex1, vertexIndex2, edge=1):
        """
        Add a non-zero edge between two vertices.

        :param vertexIndex1: The index of the first vertex.
        :type vertexIndex1: :class:`int`

        :param vertexIndex2: The index of the second vertex.
        :type vertexIndex2: :class:`int`

        :param edge: The value of the edge.
        :type edge: :class:`float`
        """
        Parameter.checkIndex(vertexIndex1, 0, self.vList.getNumVertices())
        Parameter.checkIndex(vertexIndex2, 0, self.vList.getNumVertices())
        vertexIndex1 = int(vertexIndex1)
        vertexIndex2 = int(vertexIndex2)

        if edge == 0 or edge == float('inf'):
            raise ValueError("Cannot add a zero or infinite edge")

        if self.undirected:
            self.W[vertexIndex1, vertexIndex2] = edge
            self.W[vertexIndex2, vertexIndex1] = edge
        else:
            self.W[vertexIndex1, vertexIndex2] = edge
 def setNumTrees(self, numTrees):
     """
     :param numTrees: The number of trees to generate in the forest.
     :type numTrees: :class:`int`
     """
     Parameter.checkInt(numTrees, 1, float('inf'))
     self.numTrees = numTrees
Exemple #10
0
    def random2Choice(V, n=1):
        """
        Make a random binary choice from a vector V of values which are unnormalised
        probabilities. Return the corresponding index. For example if v = [1, 2]
        then the probability of the indices repectively are [1/3, 2/3]. The
        parameter n is the number of random choices to make. If V is a matrix,
        then the rows are taken as probabilities, and a choice is made for each
        row.
        """
        Parameter.checkClass(V, numpy.ndarray)

        if V.ndim == 1 and V.shape[0] != 2:
            raise ValueError("Function only works on binary probabilities")
        if V.ndim == 2 and V.shape[1] != 2:
            raise ValueError("Function only works on binary probabilities")

        if V.ndim == 1:
            cumV = numpy.cumsum(V)
            p = numpy.random.rand(n)*cumV[-1]
            cumV2 = numpy.ones(n)*cumV[0] - p
            return numpy.array(cumV2 <= 0, numpy.int)
        elif V.ndim == 2:
            cumV = numpy.cumsum(V, 1)
            P = numpy.random.rand(V.shape[0], n)*numpy.array([cumV[:, -1]]).T
            cumV2 = numpy.outer(cumV[:, 0], numpy.ones(n)) - P
            return numpy.array(cumV2 <= 0, numpy.int)
        else:
            raise ValueError("Invalid number of dimensions")
Exemple #11
0
    def array1DToRow(X, precision=3):
        """
        Take a 1D numpy array and print in latex table row format i.e. x1 & x2 .. xn

        :param X: The array to print
        :type X: :class:`ndarray`

        :param precision: The precision of the printed floating point numbers.
        :type precision: :class:`int`
        """
        Parameter.checkInt(precision, 0, 10)
        if X.ndim != 1:
            raise ValueError("Array must be one dimensional")

        n = X.shape[0]
        outputStr = ""

        if X.dtype == float:
            fmtStr = "%." + str(precision) + "f & "
            endFmtStr = "%." + str(precision) + "f"
        else:
            fmtStr = "%d & "
            endFmtStr = "%d"

        for i in range(0, n):
            if i != n-1:
                outputStr += fmtStr % X[i]
            else:
                outputStr += endFmtStr % X[i]

        return outputStr
 def setP(self, p):
     '''
     :param p: the probability of an edge
     :type p: :class:`float`
     '''
     Parameter.checkFloat(p, 0.0, 1.0)
     self.p = p 
Exemple #13
0
 def setErrorCost(self, errorCost):
     """
     The penalty on errors on positive labels. The penalty for negative labels
     is 1.
     """
     Parameter.checkFloat(errorCost, 0.0, 1.0)
     self.errorCost = errorCost
Exemple #14
0
    def evaluate(self, X1, X2):
        """
        Find kernel evaluation between two matrices X1 and X2 whose rows are
        examples and have an identical number of columns.


        :param X1: First set of examples.
        :type X1: :class:`numpy.ndarray`

        :param X2: Second set of examples.
        :type X2: :class:`numpy.ndarray`
        """
        Parameter.checkClass(X1, numpy.ndarray)
        Parameter.checkClass(X2, numpy.ndarray)
        
        if X1.shape[1] != X2.shape[1]:
            raise ValueError("Invalid matrix dimentions: " + str(X1.shape) + " " + str(X2.shape))

        j1 = numpy.ones((X1.shape[0], 1))
        j2 = numpy.ones((X2.shape[0], 1))

        diagK1 = numpy.sum(X1**2, 1)
        diagK2 = numpy.sum(X2**2, 1)

        X1X2 = numpy.dot(X1, X2.T)

        Q = (2*X1X2 - numpy.outer(diagK1, j2) - numpy.outer(j1, diagK2) )/ (2*self.sigma**2)

        return numpy.exp(Q)
    def addEdge(self, vertexIndex1, vertexIndex2, edge=1):
        """
        Add a non-zero edge between two vertices.

        :param vertexIndex1: The index of the first vertex.
        :type vertexIndex1: :class:`int`

        :param vertexIndex2: The index of the second vertex.
        :type vertexIndex2: :class:`int`

        :param edge: The value of the edge.
        :type edge: :class:`float`
        """
        Parameter.checkIndex(vertexIndex1, 0, self.vList.getNumVertices())
        Parameter.checkIndex(vertexIndex2, 0, self.vList.getNumVertices())
        vertexIndex1 = int(vertexIndex1)
        vertexIndex2 = int(vertexIndex2)

        if edge == 0 or edge == float('inf'):
            raise ValueError("Cannot add a zero or infinite edge")

        if self.undirected:
            self.W[vertexIndex1, vertexIndex2] = edge
            self.W[vertexIndex2, vertexIndex1] = edge
        else:
            self.W[vertexIndex1, vertexIndex2] = edge
    def multiply(self, graph):
        """
        Multiply the edge weights of the input graph to the current one. Results in an
        intersection of the edges.

        :param graph: the input graph.
        :type graph: :class:`apgl.graph.PySparseGraph`

        :returns: A new graph with edge weights which are multiples of the current and graph
        """
        Parameter.checkClass(graph, PySparseGraph)
        if graph.getNumVertices() != self.getNumVertices():
            raise ValueError("Can only add edges from graph with same number of vertices")
        if self.undirected != graph.undirected:
            raise ValueError("Both graphs must be either undirected or directed")

        if self.W.nnz < graph.W.nnz:
            (rows, cols) = PySparseUtils.nonzero(self.W)
        else:
            (rows, cols) = PySparseUtils.nonzero(graph.W)

        arr1 = numpy.zeros(len(rows))
        arr2 = numpy.zeros(len(rows))
        self.W.take(arr1, rows, cols)
        graph.W.take(arr2, rows, cols)

        arr1 = arr1 * arr2

        newGraph = PySparseGraph(self.vList, self.undirected)
        newGraph.W.put(arr1, rows, cols)
        return newGraph
    def setDiff(self, graph):
        """
        Find the edges in the current graph which are not present in the input
        graph.

        :param graph: the input graph.
        :type graph: :class:`apgl.graph.PySparseGraph`

        :returns: A new graph with edges from the current graph and not in the input graph.
        """
        Parameter.checkClass(graph, PySparseGraph)
        if graph.getNumVertices() != self.getNumVertices():
            raise ValueError("Can only add edges from graph with same number of vertices")
        if self.undirected != graph.undirected:
            raise ValueError("Both graphs must be either undirected or directed")

        A1 = self.nativeAdjacencyMatrix()
        A2 = graph.nativeAdjacencyMatrix()

        (rows, cols) = PySparseUtils.nonzero(A1)
        arr1 = numpy.zeros(len(rows))
        arr2 = numpy.zeros(len(rows))

        A1.take(arr1, rows, cols)
        A2.take(arr2, rows, cols)
        arr1 = arr1 - arr2

        A1.put(arr1, rows, cols)

        newGraph = PySparseGraph(self.vList, self.undirected)
        newGraph.W = A1
        return newGraph
    def __init__(self, vertex1Indices, vertex2Indices, converters, undirected=True):
        """
        vertex1Indices is a list of fields for the first vertex, with the 1st index
        being the ID. 
        """
        if len(vertex1Indices) < 1 or len(vertex1Indices) < 1:
            raise ValueError("vertexIndices must have at least 1 index")
        if len(vertex1Indices) != len(vertex2Indices):
            raise ValueError("len(vertex1Indices)=" + str(len(vertex1Indices)) + "and len(vertex2Indices)=" + len(vertex2Indices))

        Parameter.checkList(vertex1Indices, Parameter.checkInt, [0, float('inf')])
        Parameter.checkList(vertex2Indices, Parameter.checkInt, [0, float('inf')])

        self.vertex1IdIndex = vertex1Indices[0]
        self.vertex2IdIndex = vertex2Indices[0]



        self.vertex1Indices = copy.copy(vertex1Indices)
        self.vertex2Indices = copy.copy(vertex2Indices)
        self.vertex1Indices.remove(self.vertex1IdIndex)
        self.vertex2Indices.remove(self.vertex2IdIndex)
        self.converters = converters
        self.undirected = undirected
        self.edgeWeight = 1
Exemple #19
0
    def evaluateCvOuter(self, X, y, folds):
        """
        Computer the average AUC using k-fold cross validation and the linear kernel. 
        """
        Parameter.checkInt(folds, 2, float('inf'))
        idx = cross_val.StratifiedKFold(y, folds)
        metricMethods = [Evaluator.auc2, Evaluator.roc]

        if self.kernel == "linear":
            logging.debug("Running linear rank SVM ")
            trainMetrics, testMetrics = AbstractPredictor.evaluateLearn2(X, y, idx, self.modelSelectLinear, self.predict, metricMethods)
        elif self.kernel == "rbf":
            logging.debug("Running RBF rank SVM")
            trainMetrics, testMetrics = AbstractPredictor.evaluateLearn2(X, y, idx, self.modelSelectRBF, self.predict, metricMethods)

        bestTrainAUCs = trainMetrics[0]
        bestTrainROCs = trainMetrics[1]
        bestTestAUCs = testMetrics[0]
        bestTestROCs = testMetrics[1]

        bestParams = {}
        bestMetaDicts = {}
        allMetrics = [bestTrainAUCs, bestTrainROCs, bestTestAUCs, bestTestROCs]

        return (bestParams, allMetrics, bestMetaDicts)
    def evaluate(self, X1, X2):
        """
        Find kernel evaluation between two matrices X1 and X2 whose rows are
        examples and have an identical number of columns.


        :param X1: First set of examples.
        :type X1: :class:`numpy.ndarray`

        :param X2: Second set of examples.
        :type X2: :class:`numpy.ndarray`
        """
        Parameter.checkClass(X1, numpy.ndarray)
        Parameter.checkClass(X2, numpy.ndarray)
        
        if X1.shape[1] != X2.shape[1]:
            raise ValueError("Invalid matrix dimentions: " + str(X1.shape) + " " + str(X2.shape))

        j1 = numpy.ones((X1.shape[0], 1))
        j2 = numpy.ones((X2.shape[0], 1))

        diagK1 = numpy.sum(X1**2, 1)
        diagK2 = numpy.sum(X2**2, 1)

        X1X2 = numpy.dot(X1, X2.T)

        Q = (2*X1X2 - numpy.outer(diagK1, j2) - numpy.outer(j1, diagK2) )/ (2*self.sigma**2)

        return numpy.exp(Q)
    def cut(self, d):
        """
        Return a new tree containing all the vertices of the current one up to
        a depth of d. The edge and vertex labels are copied by reference only. 

        :param d: The depth of the new cut tree
        :type d: :class:`int`
        """
        Parameter.checkInt(d, 0, float("inf"))

        root = self.getRootId()
        newTree = DictTree()
        stack = [(root, 0)]

        newTree.setVertex(root)

        while(len(stack) != 0):
            (vertexId, depth) = stack.pop()
            neighbours = self.neighbours(vertexId)

            if depth <= d:
                newTree.setVertex(vertexId, self.getVertex(vertexId))

            for neighbour in neighbours:
                stack.append((neighbour, depth+1))

                if depth+1 <= d:
                    newTree.addEdge(vertexId, neighbour, self.getEdge(vertexId, neighbour))

        return newTree
Exemple #22
0
    def parallelVfcvRbf(self, X, y, idx, type="C_SVC"):
        """
        Perform parallel cross validation model selection using the RBF kernel
        and then pick the best one. Using the best set of parameters train using
        the whole dataset.

        :param X: The examples as rows
        :type X: :class:`numpy.ndarray`

        :param y: The binary -1/+1 labels 
        :type y: :class:`numpy.ndarray`

        :param idx: A list of train/test splits

        :params returnGrid: Whether to return the error grid
        :type returnGrid: :class:`bool`
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(y, numpy.ndarray)
        folds = len(idx)

        self.setKernel("gaussian")

        if type=="C_SVC":
            paramDict = {} 
            paramDict["setC"] = self.getCs()
            paramDict["setGamma"] = self.getGammas()  
        else: 
            paramDict = {} 
            paramDict["setC"] = self.getCs()
            paramDict["setGamma"] = self.getGammas()  
            paramDict["setEpsilon"] = self.getEpsilons()  
                
        return self.parallelModelSelect(X, y, idx, paramDict)
Exemple #23
0
    def cut(self, d):
        """
        Return a new tree containing all the vertices of the current one up to
        a depth of d. The edge and vertex labels are copied by reference only. 

        :param d: The depth of the new cut tree
        :type d: :class:`int`
        """
        Parameter.checkInt(d, 0, float("inf"))

        root = self.getRootId()
        newTree = DictTree()
        stack = [(root, 0)]

        newTree.setVertex(root)

        while (len(stack) != 0):
            (vertexId, depth) = stack.pop()
            neighbours = self.neighbours(vertexId)

            if depth <= d:
                newTree.setVertex(vertexId, self.getVertex(vertexId))

            for neighbour in neighbours:
                stack.append((neighbour, depth + 1))

                if depth + 1 <= d:
                    newTree.addEdge(vertexId, neighbour,
                                    self.getEdge(vertexId, neighbour))

        return newTree
Exemple #24
0
    def diameter(self, useWeights=False, P=None):
        """
        Finds the diameter of a graph i.e. the longest shortest path. If useWeights
        is True then the weights in the adjacency matrix are used if P is not
        provided. 

        :param useWeights: Whether to use edge weights to compute a diameter. 
        :type useWeights: :class:`bool`

        :param P: An optional nxn matrix whose ijth entry is the shortest path from i to j.
        :type P: :class:`ndarray`

        :returns:  The diameter of this graph. 
        """
        Parameter.checkBoolean(useWeights)
        if P!=None and (type(P) != numpy.ndarray or P.shape != (self.getNumVertices(), self.getNumVertices())):
            logging.debug("P.shape = " + P.shape + " W.shape = " + str(self.W.shape))
            raise ValueError("P must be array of same size as weight matrix of graph")
        
        if self.getNumEdges() == 0: 
            return 0 

        if P == None:
            P = self.floydWarshall(useWeights)
        else:
            P = P.copy()

        if useWeights == False:
            return int(numpy.max(P[P!=float('inf')]))
        else:
            return float(numpy.max(P[P!=float('inf')]))
    def __init__(self,
                 vertex1Indices,
                 vertex2Indices,
                 converters,
                 undirected=True):
        """
        vertex1Indices is a list of fields for the first vertex, with the 1st index
        being the ID. 
        """
        if len(vertex1Indices) < 1 or len(vertex1Indices) < 1:
            raise ValueError("vertexIndices must have at least 1 index")
        if len(vertex1Indices) != len(vertex2Indices):
            raise ValueError("len(vertex1Indices)=" +
                             str(len(vertex1Indices)) +
                             "and len(vertex2Indices)=" + len(vertex2Indices))

        Parameter.checkList(vertex1Indices, Parameter.checkInt,
                            [0, float('inf')])
        Parameter.checkList(vertex2Indices, Parameter.checkInt,
                            [0, float('inf')])

        self.vertex1IdIndex = vertex1Indices[0]
        self.vertex2IdIndex = vertex2Indices[0]

        self.vertex1Indices = copy.copy(vertex1Indices)
        self.vertex2Indices = copy.copy(vertex2Indices)
        self.vertex1Indices.remove(self.vertex1IdIndex)
        self.vertex2Indices.remove(self.vertex2IdIndex)
        self.converters = converters
        self.undirected = undirected
        self.edgeWeight = 1
Exemple #26
0
    def breadthFirstSearch(self, root):
        """
        Breadth first search starting from a particular vertex. Returns a list of 
        connected vertices in the order they were found. 

        :param root: The index of the root vertex.
        :type root: :class:`int`

        :returns: A list of vertices connected to the input one via a path in the graph.
        """
        Parameter.checkIndex(root, 0, self.size)        
        
        toVisit = [root]
        visited = set()
        searchPath = [] 

        #adjacencyList, weights = self.adjacencyList()

        while len(toVisit) != 0:
            currentVertex = toVisit.pop(0)

            if currentVertex not in visited:
                visited.add(currentVertex)
                searchPath.append(currentVertex)

            neighbours = self.neighbours(currentVertex)
            
            unvisited = sorted(set(neighbours).difference(visited))
            toVisit.extend(list(unvisited))

        return searchPath
Exemple #27
0
 def setEll(self, ell):
     """
     :param ell: the initial number of vertices.
     :type ell: :class:`int`
     """
     Parameter.checkInt(ell, 2, float('inf'))
     self.ell = ell
Exemple #28
0
 def setSelfEdges(self, selfEdges):
     """
     :param selfEdges: whether to allow self edges
     :type selfEdges: :class:`bool`
     """
     Parameter.checkBoolean(selfEdges)
     self.selfEdges = selfEdges
def parallelPenaltyGridRbf(svm, X, y, fullX, gridPoints, pdfX, pdfY1X, pdfYminus1X):
    """
    Find out the "ideal" penalty.
    """
    Parameter.checkClass(X, numpy.ndarray)
    Parameter.checkClass(y, numpy.ndarray)
    chunkSize = 10

    idealPenalties = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
    paramList = []

    for i in range(svm.Cs.shape[0]):
        for j in range(svm.gammas.shape[0]):
            paramList.append((X, y, fullX, svm.Cs[i], svm.gammas[j], gridPoints, pdfX, pdfY1X, pdfYminus1X))

    pool = multiprocessing.Pool()
    resultsIterator = pool.imap(computeIdealPenalty, paramList, chunkSize)

    for i in range(svm.Cs.shape[0]):
        for j in range(svm.gammas.shape[0]):
            idealPenalties[i, j] = resultsIterator.next()

    pool.terminate()

    return idealPenalties
Exemple #30
0
    def setRandomInfected(self, numInitialInfected, proportionHetero, t=0.0):
        """
        Pick a number of people randomly to be infected at time t. Of that set 
        proportionHetero are selected to be heterosexual and min((1-proportionHetero), totalBi)
        are bisexual. 
        """
        Parameter.checkInt(numInitialInfected, 0, self.size)
        Parameter.checkFloat(proportionHetero, 0.0, 1.0)
        
        heteroInds = numpy.arange(self.size)[self.vlist.V[:, HIVVertices.orientationIndex] == HIVVertices.hetero]
        biInds = numpy.arange(self.size)[self.vlist.V[:, HIVVertices.orientationIndex] == HIVVertices.bi]
        
        numHetero = int(numInitialInfected*proportionHetero) 
        numBi = numInitialInfected-numHetero

        heteroInfectInds = numpy.random.permutation(heteroInds.shape[0])[0:numHetero]
        biInfectInds = numpy.random.permutation(biInds.shape[0])[0:numBi]

        for i in heteroInfectInds:
            j = heteroInds[i]
            self.vlist.setInfected(j, t)
            
        for i in biInfectInds:
            j = biInds[i]
            self.vlist.setInfected(j, t)
 def setSampleReplace(self, sampleReplace):
     """
     :param sampleReplace: A boolean to decide whether to sample with replacement. 
     :type sampleReplace: :class:`bool`
     """
     Parameter.checkBoolean(sampleReplace)
     self.sampleReplace = sampleReplace
 def setWeight(self, weight):
     """
     :param weight: the weight on the positive examples between 0 and 1 (the negative weight is 1-weight)
     :type weight: :class:`float`
     """
     Parameter.checkFloat(weight, 0.0, 1.0)
     self.weight = weight
Exemple #33
0
    def randomChoice(V, n=1):
        """
        Make a random choice from a vector V of values which are unnormalised
        probabilities. Return the corresponding index. For example if v = [1, 2, 4]
        then the probability of the indices repectively are [1/7, 2/7, 4/7]. The
        parameter n is the number of random choices to make. If V is a matrix,
        then the rows are taken as probabilities, and a choice is made for each
        row. 
        """
        Parameter.checkClass(V, numpy.ndarray)

        if V.shape[0]==0:
            return -1 

        if V.ndim == 1:
            cumV = numpy.cumsum(V)
            p = numpy.random.rand(n)*cumV[-1]
            return numpy.searchsorted(cumV, p)
        elif V.ndim == 2:
            cumV = numpy.cumsum(V, 1)
            P = numpy.random.rand(V.shape[0], n)*numpy.array([cumV[:, -1]]).T

            inds = numpy.zeros(P.shape, numpy.int)
            for i in range(P.shape[0]):
                inds[i, :] = numpy.searchsorted(cumV[i, :], P[i, :])

            return inds
        else:
            raise ValueError("Invalid number of dimensions")
 def setSampleSize(self, sampleSize):
     """
     :param sampleSize: The number of examples to randomly sample for each tree.
     :type sampleSize: :class:`int`
     """
     Parameter.checkFloat(sampleSize, 0.0, 1.0)
     self.sampleSize = sampleSize
 def setEll(self, ell):
     """
     :param ell: the initial number of vertices.
     :type ell: :class:`int`
     """
     Parameter.checkInt(ell, 2, float("inf"))
     self.ell = ell
    def scalarStatistics(self, graph):

        Parameter.checkClass(graph, AbstractMatrixGraph)
        statsArray = numpy.ones(self.numStats)*-1

        #Find geodesic distance between MSMs
        logging.debug("Running Floyd-Warshall")
        P = graph.floydWarshall(False)
        V = graph.getVertexList().getVertices(list(range(graph.getNumVertices())))
        
        bisexual = CsvConverters.orientConv('HB')
        msmIndices = list(numpy.nonzero(V[:, self.fInds["orient"]]==bisexual)[0])
        if len(msmIndices) != 0:
            statsArray[self.msmGeodesicIndex] = graph.harmonicGeodesicDistance(P, msmIndices)

        male = CsvConverters.genderConv('M')
        menIndices = list(numpy.nonzero(V[:, self.fInds["gender"]]==male)[0])
        if len(menIndices) != 0: 
            menGraph = graph.subgraph(menIndices)
            statsArray[self.menSubgraphGeodesicIndex] = menGraph.harmonicGeodesicDistance()

        contactTrIndices = list(numpy.nonzero(V[:, self.fInds["contactTrace"]]==1)[0])
        if len(contactTrIndices) != 0:
            ctGraph = graph.subgraph(contactTrIndices)
            statsArray[self.ctSubgraphGeodesicIndex] = ctGraph.harmonicGeodesicDistance()

        degreeSequence = graph.outDegreeSequence()
        sortedInds = numpy.argsort(degreeSequence)
        numInds = int(float(graph.getNumVertices())*self.topConnect)
        topConnectInds = sortedInds[-numInds:]

        statsArray[self.mostConnectedGeodesicIndex] = graph.harmonicGeodesicDistance(P, topConnectInds)

        return statsArray 
    def setVertices(self, vertices, indices=None):
        """
        Set the vertices to the given list of vertices. If indices = None then
        all vertices are replaced, and if not the given indices are used. 

        :param vertices: a list of vertices..
        :type vertices: :class:`list`

        :param indices: a list of indices of the same length as vertices or None for all indices in this object.
        :type indices: :class:`list`
        """
        if indices != None:
            Parameter.checkList(indices, Parameter.checkIndex,
                                [0, len(self.V)])
            if len(vertices) != len(indices):
                raise ValueError(
                    "Length of indices list must be same as that of vertices list"
                )
        if indices == None and len(vertices) != len(self.V):
            raise ValueError("Incorrect number of vertices " +
                             str(len(vertices)) + ", expecting " +
                             str(len(self.V)))

        if indices == None:
            for i in range(len(vertices)):
                self.V[i] = vertices[i]
        else:
            for i in range(len(indices)):
                self.V[indices[i]] = vertices[i]
Exemple #38
0
    def bootstrap2(repetitions, numExamples):
        """
        Perform 0.632 bootstrap in whcih we take a sample with replacement from
        the dataset of size numExamples. The examples not present in the training
        set are used to form the test set. We oversample the test set to include
        0.368 of the examples from the training set. Returns a list of tuples of the form
        (trainIndices, testIndices).

        :param repetitions: The number of repetitions of bootstrap to perform.
        :type repetitions: :class:`int`

        :param numExamples: The number of examples.
        :type numExamples: :class:`int`

        """
        Parameter.checkInt(numExamples, 2, float('inf'))
        Parameter.checkInt(repetitions, 1, float('inf'))

        inds = []
        for i in range(repetitions):
            trainInds = numpy.random.randint(numExamples, size=numExamples)
            testInds = numpy.setdiff1d(numpy.arange(numExamples), numpy.unique(trainInds))
            #testInds = numpy.r_[testInds, trainInds[0:(numExamples*0.368)]]

            inds.append((trainInds, testInds))

        return inds
    def predictEdges(self, vertexIndices):
        """
        This makes a prediction for a series of edges using the following score
        \sum_z \in n(x) \cup n(y) = 1/|log(n(z)|
        Returns a matrix with rows are a ranked list of verticies of length self.windowSize.
        """

        Parameter.checkInt(self.windowSize, 1, self.graph.getNumVertices())
        logging.info("Running predictEdges in " + str(self.__class__.__name__))

        P = numpy.zeros((vertexIndices.shape[0], self.windowSize))
        S = numpy.zeros((vertexIndices.shape[0], self.windowSize))
        W = self.graph.getWeightMatrix()


        for i in range(vertexIndices.shape[0]):
            Util.printIteration(i, self.printStep, vertexIndices.shape[0])
            scores = numpy.zeros(self.graph.getNumVertices())

            for j in range(0, self.graph.getNumVertices()):
                commonNeighbours = numpy.nonzero(W[vertexIndices[i], :] * W[j, :])[0]

                for k in commonNeighbours:
                    q = numpy.log(numpy.nonzero(W[k, :])[0].shape[0])
                    if q != 0:
                        scores[j] = scores[j] + 1/q


            P[i, :], S[i, :] = self.indicesFromScores(vertexIndices[i], scores)

        return P, S
Exemple #40
0
 def svd(A, k, q=2): 
     """
     Compute the SVD of a sparse or dense matrix A, finding the first k 
     singular vectors/values, using exponent q. Returns the left and right singular 
     vectors, and the singular values. The resulting matrix can be approximated 
     using A ~ U s V.T. 
     """
     Parameter.checkInt(k, 1, float("inf"))
     Parameter.checkInt(q, 1, float("inf"))        
     
     n = A.shape[0]
     omega = numpy.random.randn(n, k)
     Y = A.dot(omega)
     
     for i in range(q): 
         Y = A.T.dot(Y)
         Y = A.dot(Y)
     
     Q, R = numpy.linalg.qr(Y)
     B = A.T.dot(Q).T   
     U, s, V = numpy.linalg.svd(B, full_matrices=False)
     V = V.T
     U = Q.dot(U)
     
     return U, s, V 
Exemple #41
0
    def randCrossValidation(folds, numExamples):
        """
        Returns a list of tuples (trainIndices, testIndices) using k-fold cross
        validation. In this case we randomise the indices and then split into 
        folds. 

        :param folds: The number of cross validation folds.
        :type folds: :class:`int`

        :param numExamples: The number of examples.
        :type numExamples: :class:`int`
        """
        Parameter.checkInt(folds, 1, numExamples)
        Parameter.checkInt(numExamples, 2, float('inf'))

        foldSize = float(numExamples)/folds
        indexList = []

        inds = numpy.random.permutation(numExamples)

        for i in range(0, folds):
            testIndices = inds[int(foldSize*i): int(foldSize*(i+1))]
            trainIndices = numpy.setdiff1d(numpy.arange(0, numExamples), testIndices)
            indexList.append((trainIndices, testIndices))

        return indexList 
 def setBestResponse(self, bestResponse):
     """
     :param bestResponse: the label corresponding to "positive"
     :type bestResponse: :class:`int`
     """
     Parameter.checkInt(bestResponse, -float('inf'), float('inf'))
     self.bestResponse = bestResponse
 def setM(self, m):
     """
     :param m: the number of edges to be added at each step
     :type m: :class:`int`
     """
     Parameter.checkInt(m, 0, self.ell)
     self.m = m
Exemple #44
0
 def setM(self, m):
     """
     :param m: the number of edges to be added at each step
     :type m: :class:`int`
     """
     Parameter.checkInt(m, 0, self.ell)
     self.m = m
Exemple #45
0
    def depthFirstSearch(self, root):
        """
        Depth first search starting from a particular vertex. Returns a list of 
        connected vertices in the order they were found. 

        :param root: The index of the root vertex.
        :type root: :class:`int`

        :returns: A list of vertices connected to the input one via a path in the graph.
        """
        Parameter.checkIndex(root, 0, self.size)        
        
        currentPath = [root]
        visited = set()
        searchPath = [] 

        while len(currentPath) != 0:
            currentVertex = currentPath[-1]

            if currentVertex not in visited:
                visited.add(currentVertex)
                searchPath.append(currentVertex)

            neighbours = self.neighbours(currentVertex)            
            unvisited = (set(neighbours).difference(visited))
            
            if len(unvisited) != 0: 
                currentPath.append(unvisited.pop())
            else: 
                currentPath.pop()

        return searchPath
    def setDiff(self, graph):
        """
        Find the edges in the current graph which are not present in the input
        graph. Replaces the edges in the current graph with adjacencies.

        :param graph: the input graph.
        :type graph: :class:`apgl.graph.DenseGraph`

        :returns: The graph which is the set difference of the edges of this graph and graph.
        """
        Parameter.checkClass(graph, DenseGraph)
        if graph.getNumVertices() != self.getNumVertices():
            raise ValueError(
                "Can only add edges from graph with same number of vertices")
        if self.undirected != graph.undirected:
            raise ValueError(
                "Both graphs must be either undirected or directed")

        A1 = self.adjacencyMatrix()
        A2 = graph.adjacencyMatrix()
        A1 = A1 - A2
        A1 = (A1 + numpy.abs(A1**2)) / 2

        newGraph = DenseGraph(self.vList, self.undirected)
        newGraph.W = A1
        return newGraph
Exemple #47
0
 def setP(self, p):
     '''
     :param p: the probability of an edge
     :type p: :class:`float`
     '''
     Parameter.checkFloat(p, 0.0, 1.0)
     self.p = p
    def removeEdge(self, vertexIndex1, vertexIndex2, edgeTypeIndex):
        """ Remove an edge between two vertices.

        @param vertexIndex1: The index of the first vertex.
        @param vertexIndex1: The index of the second vertex.
        """
        Parameter.checkIndex(edgeTypeIndex, 0, self.maxEdgeTypes)
        self.sparseGraphs[edgeTypeIndex].removeEdge(vertexIndex1, vertexIndex2)
    def setK(self, k):
        """
        Set the number of iterations k.

        :param k: The number of iterations.
        :type k: :class:`int`
        """
        Parameter.checkInt(k, 1, float('inf'))
        self.k = k
    def clearVertex(self, index):
        """
        Sets a vertex to None

        :param index: the index of the vertex to assign a value.
        :type index: :class:`int`
        """
        Parameter.checkIndex(index, 0, len(self.V))
        self.V[index] = None
    def getVertex(self, index):
        """
        Returns the value of a vertex.

        :param index: the index of the vertex.
        :type index: :class:`int`
        """
        Parameter.checkIndex(index, 0, len(self.V))
        return self.V[index]
    def setK(self, k):
        """
        The number of neighbours of each vertex.

        :param k: the number of neighbours in the regular lattice.
        :type k: :class:`int`
        """
        Parameter.checkIndex(k, 0, float('inf'))
        self.k = k
    def clearVertex(self, index):
        """
        Sets a vertex to the all-zeros array.

        :param index: the index of the vertex to assign a value.
        :type index: :class:`int`
        """
        Parameter.checkIndex(index, 0, self.V.shape[0])
        self.V[index, :] = numpy.zeros((1, self.V.shape[1]))
    def vectorStatistics(self, graph, treeStats=False, eigenStats=True):
        """
        Find a series of statistics for the given input graph which can be represented 
        as vector values.
        """
        Parameter.checkClass(graph, AbstractMatrixGraph)
        Parameter.checkBoolean(treeStats)
        statsDict = {}

        statsDict["inDegreeDist"] = graph.inDegreeDistribution()
        statsDict["outDegreeDist"] = graph.degreeDistribution()
        logging.debug("Computing hop counts")
        P = graph.findAllDistances(False)
        statsDict["hopCount"] = graph.hopCount(P)
        logging.debug("Computing triangle count")
        if graph.getNumVertices() != 0:
            statsDict["triangleDist"] = numpy.bincount(
                graph.triangleSequence())
        else:
            statsDict["triangleDist"] = numpy.array([])

        #Get the distribution of component sizes
        logging.debug("Finding distribution of component sizes")

        if graph.isUndirected():
            components = graph.findConnectedComponents()
            if len(components) != 0:
                statsDict["componentsDist"] = numpy.bincount(
                    numpy.array([len(c) for c in components], numpy.int))

        #Make sure weight matrix is symmetric

        if graph.getNumVertices() != 0 and eigenStats:
            logging.debug("Computing eigenvalues/vectors")
            W = graph.getWeightMatrix()
            W = (W + W.T) / 2
            eigenDistribution, V = numpy.linalg.eig(W)
            i = numpy.argmax(eigenDistribution)
            statsDict["maxEigVector"] = V[:, i]
            statsDict["eigenDist"] = numpy.flipud(
                numpy.sort(eigenDistribution[eigenDistribution > 0]))
            gc.collect()
        else:
            statsDict["maxEigVector"] = numpy.array([])
            statsDict["eigenDist"] = numpy.array([])

        if treeStats:
            logging.debug("Computing statistics on trees")
            trees = graph.findTrees()
            statsDict["treeSizesDist"] = numpy.bincount(
                [len(x) for x in trees])
            treeDepths = [
                GraphUtils.treeDepth((graph.subgraph(x))) for x in trees
            ]
            statsDict["treeDepthsDist"] = numpy.bincount(treeDepths)

        return statsDict
    def setP(self, p):
        """
        Set the rewiring probability.

        :param p: the probability of rewiring an edge.
        :type p: :class:`float`
        """
        Parameter.checkFloat(p, 0.0, 1.0)
        self.p = p
    def addEdge(self, vertexIndex1, vertexIndex2, edgeTypeIndex, edge=1):
        """ Add an edge to the graph between two vertices.

        @param vertexIndex1: The index of the first vertex.
        @param vertexIndex1: The index of the second vertex.
        @param edge: The value to assign to the edge.
        """
        Parameter.checkIndex(edgeTypeIndex, 0, self.maxEdgeTypes)
        self.sparseGraphs[edgeTypeIndex].addEdge(vertexIndex1, vertexIndex2, edge)
    def subList(self, indices):
        """
        Returns a subset of this object, indicated by the given indices.
        """
        Parameter.checkList(indices, Parameter.checkIndex,
                            (0, self.getNumVertices()))
        vList = GeneralVertexList(len(indices))
        vList.setVertices(self.getVertices(indices))

        return vList