예제 #1
0
    def evaluateCvOuter(self, X, y, folds):
        """
        Computer the average AUC using k-fold cross validation and the linear kernel. 
        """
        Parameter.checkInt(folds, 2, float('inf'))
        idx = cross_val.StratifiedKFold(y, folds)
        metricMethods = [Evaluator.auc2, Evaluator.roc]

        if self.kernel == "linear":
            logging.debug("Running linear rank SVM ")
            trainMetrics, testMetrics = AbstractPredictor.evaluateLearn2(
                X, y, idx, self.modelSelectLinear, self.predict, metricMethods)
        elif self.kernel == "rbf":
            logging.debug("Running RBF rank SVM")
            trainMetrics, testMetrics = AbstractPredictor.evaluateLearn2(
                X, y, idx, self.modelSelectRBF, self.predict, metricMethods)

        bestTrainAUCs = trainMetrics[0]
        bestTrainROCs = trainMetrics[1]
        bestTestAUCs = testMetrics[0]
        bestTestROCs = testMetrics[1]

        bestParams = {}
        bestMetaDicts = {}
        allMetrics = [bestTrainAUCs, bestTrainROCs, bestTestAUCs, bestTestROCs]

        return (bestParams, allMetrics, bestMetaDicts)
예제 #2
0
 def __init__(self, fileName):
     """
     Lock a job whose results are saved as fileName. 
     """
     Parameter.checkClass(fileName, str)
     self.fileName = fileName
     self.lockFileName = self.fileName + ".lock"
예제 #3
0
    def bootstrap(repetitions, numExamples):
        """
        Perform 0.632 bootstrap in whcih we take a sample with replacement from
        the dataset of size numExamples. The examples not present in the training
        set are used to form the test set. Returns a list of tuples of the form
        (trainIndices, testIndices).

        :param repetitions: The number of repetitions of bootstrap to perform.
        :type repetitions: :class:`int`

        :param numExamples: The number of examples.
        :type numExamples: :class:`int`

        """
        Parameter.checkInt(numExamples, 2, float('inf'))
        Parameter.checkInt(repetitions, 1, float('inf'))

        inds = []
        for i in range(repetitions):
            trainInds = numpy.random.randint(numExamples, size=numExamples)
            testInds = numpy.setdiff1d(numpy.arange(numExamples),
                                       numpy.unique(trainInds))
            inds.append((trainInds, testInds))

        return inds
예제 #4
0
 def setMaxDepth(self, maxDepth):
     """
     :param maxDepth: the maximum depth of the learnt tree. 
     :type maxDepth: :class:`int`
     """
     Parameter.checkInt(maxDepth, 1, float("inf"))
     self.maxDepth = int(maxDepth)
예제 #5
0
    def evaluateCvOuter(self, X, y, folds):
        """
        Computer the average AUC using k-fold cross validation and the linear kernel. 
        """
        Parameter.checkInt(folds, 2, float('inf'))
        idx = cross_val.StratifiedKFold(y, folds)
        metricMethods = [Evaluator.auc2, Evaluator.roc]

        if self.kernel == "linear":
            logging.debug("Running linear rank SVM ")
            trainMetrics, testMetrics = AbstractPredictor.evaluateLearn2(X, y, idx, self.modelSelectLinear, self.predict, metricMethods)
        elif self.kernel == "rbf":
            logging.debug("Running RBF rank SVM")
            trainMetrics, testMetrics = AbstractPredictor.evaluateLearn2(X, y, idx, self.modelSelectRBF, self.predict, metricMethods)

        bestTrainAUCs = trainMetrics[0]
        bestTrainROCs = trainMetrics[1]
        bestTestAUCs = testMetrics[0]
        bestTestROCs = testMetrics[1]

        bestParams = {}
        bestMetaDicts = {}
        allMetrics = [bestTrainAUCs, bestTrainROCs, bestTestAUCs, bestTestROCs]

        return (bestParams, allMetrics, bestMetaDicts)
예제 #6
0
    def predictEdges(self, graph, edges):
        """
        Make prediction  given the edges and given graph.

        :param edges: A numpy array consisting of the edges to make predictions over.
        """
        Parameter.checkInt(graph.getVertexList().getNumFeatures(), 1,
                           float('inf'))
        logging.info("Making prediction over " + str(edges.shape[0]) +
                     " edges")

        predY = numpy.zeros(edges.shape[0])

        for i in range(edges.shape[0]):
            #Make a prediction for each ego-alter
            egoInd = edges[i, 0]
            alterInd = edges[i, 1]

            ego = numpy.array([graph.getVertex(egoInd)])
            #ego = self.standardiser.standardiseArray(ego)
            c = self.egoRegressor.predict(ego)
            #c = self.standardiser2.unstandardiseArray(c)
            predY[i] = numpy.dot(graph.getVertex(alterInd), c.ravel())

        return predY
예제 #7
0
파일: Util.py 프로젝트: kentwang/sandbox
    def randomChoice(V, n=1):
        """
        Make a random choice from a vector V of values which are unnormalised
        probabilities. Return the corresponding index. For example if v = [1, 2, 4]
        then the probability of the indices repectively are [1/7, 2/7, 4/7]. The
        parameter n is the number of random choices to make. If V is a matrix,
        then the rows are taken as probabilities, and a choice is made for each
        row. 
        """
        Parameter.checkClass(V, numpy.ndarray)

        if V.shape[0] == 0:
            return -1

        if V.ndim == 1:
            cumV = numpy.cumsum(V)
            p = numpy.random.rand(n) * cumV[-1]
            return numpy.searchsorted(cumV, p)
        elif V.ndim == 2:
            cumV = numpy.cumsum(V, 1)
            P = numpy.random.rand(V.shape[0], n) * numpy.array([cumV[:, -1]]).T

            inds = numpy.zeros(P.shape, numpy.int)
            for i in range(P.shape[0]):
                inds[i, :] = numpy.searchsorted(cumV[i, :], P[i, :])

            return inds
        else:
            raise ValueError("Invalid number of dimensions")
예제 #8
0
    def predictEdges(self, vertexIndices):
        """
        This makes a prediction for a series of edges using the following score
        \sum_z \in n(x) \cup n(y) = 1/|log(n(z)|
        Returns a matrix with rows are a ranked list of verticies of length self.windowSize.
        """

        Parameter.checkInt(self.windowSize, 1, self.graph.getNumVertices())
        logging.info("Running predictEdges in " + str(self.__class__.__name__))

        P = numpy.zeros((vertexIndices.shape[0], self.windowSize))
        S = numpy.zeros((vertexIndices.shape[0], self.windowSize))
        W = self.graph.getWeightMatrix()


        for i in range(vertexIndices.shape[0]):
            Util.printIteration(i, self.printStep, vertexIndices.shape[0])
            scores = numpy.zeros(self.graph.getNumVertices())

            for j in range(0, self.graph.getNumVertices()):
                commonNeighbours = numpy.nonzero(W[vertexIndices[i], :] * W[j, :])[0]

                for k in commonNeighbours:
                    q = numpy.log(numpy.nonzero(W[k, :])[0].shape[0])
                    if q != 0:
                        scores[j] = scores[j] + 1/q


            P[i, :], S[i, :] = self.indicesFromScores(vertexIndices[i], scores)

        return P, S
예제 #9
0
파일: Latex.py 프로젝트: rezaarmand/sandbox
    def array1DToRow(X, precision=3):
        """
        Take a 1D numpy array and print in latex table row format i.e. x1 & x2 .. xn

        :param X: The array to print
        :type X: :class:`ndarray`

        :param precision: The precision of the printed floating point numbers.
        :type precision: :class:`int`
        """
        Parameter.checkInt(precision, 0, 10)
        if X.ndim != 1:
            raise ValueError("Array must be one dimensional")

        n = X.shape[0]
        outputStr = ""

        if X.dtype == float:
            fmtStr = "%." + str(precision) + "f & "
            endFmtStr = "%." + str(precision) + "f"
        else:
            fmtStr = "%d & "
            endFmtStr = "%d"

        for i in range(0, n):
            if i != n - 1:
                outputStr += fmtStr % X[i]
            else:
                outputStr += endFmtStr % X[i]

        return outputStr
예제 #10
0
 def setSampleSize(self, sampleSize):
     """
     :param sampleSize: The number of examples to randomly sample for each tree.
     :type sampleSize: :class:`int`
     """
     Parameter.checkFloat(sampleSize, 0.0, 1.0)
     self.sampleSize = sampleSize
예제 #11
0
 def setErrorCost(self, errorCost):
     """
     The penalty on errors on positive labels. The penalty for negative labels
     is 1.
     """
     Parameter.checkFloat(errorCost, 0.0, 1.0)
     self.errorCost = errorCost
예제 #12
0
    def predict(self, X):
        """
        Make a prediction for a set of examples given as the rows of the matrix X.

        :param X: A matrix with examples as rows
        :type X: :class:`ndarray`

        :return: A vector of scores corresponding to each example. 
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkArray(X)

        scores = numpy.zeros(X.shape[0])
        root = self.tree.getVertex((0, 0))
        root.setTestInds(numpy.arange(X.shape[0]))

        #We go down the tree making predictions at each stage 
        for d in range(self.maxDepth+1):
            for k in range(2**d):
                if self.tree.vertexExists((d, k)):
                    self.classifyNode(self.tree, X, d, k)

                    node = self.tree.getVertex((d,k))
                    if node.isLeafNode():
                        inds = node.getTestInds()
                        scores[inds] = node.getScore()

        return scores 
예제 #13
0
    def evaluateStratifiedCv(self,
                             X,
                             y,
                             folds,
                             metricMethod=Evaluator.binaryError):
        """
        Compute the stratified cross validation according to a given metric.
        """
        try:
            from sklearn.cross_validation import StratifiedKFold
            Parameter.checkInt(folds, 2, float('inf'))
            idx = StratifiedKFold(y, folds)
            metrics = AbstractPredictor.evaluateLearn(X, y, idx,
                                                      self.learnModel,
                                                      self.predict,
                                                      metricMethod)

            mean = numpy.mean(metrics, 0)
            var = numpy.var(metrics, 0)

            return (mean, var)

        except ImportError:
            logging.warn("Failed to import scikits")
            raise
예제 #14
0
 def setMaxDepth(self, maxDepth):
     """
     :param maxDepth: the maximum depth of the learnt tree. 
     :type maxDepth: :class:`int`
     """
     Parameter.checkInt(maxDepth, 1, float("inf"))
     self.maxDepth = int(maxDepth)
예제 #15
0
 def setBestResponse(self, bestResponse):
     """
     :param bestResponse: the label corresponding to "positive"
     :type bestResponse: :class:`int`
     """
     Parameter.checkInt(bestResponse, -float("inf"), float("inf"))
     self.bestResponse = bestResponse
예제 #16
0
 def setWeight(self, weight):
     """
     :param weight: the weight on the positive examples between 0 and 1 (the negative weight is 1-weight)
     :type weight: :class:`float`
     """
     Parameter.checkFloat(weight, 0.0, 1.0)
     self.weight = weight
예제 #17
0
    def auc(predY, trueY):
        """
        Can be used in conjunction with evaluateCV using the scores, and true
        labels. Note the order of parameters. 
        """
        try:
            import sklearn.metrics
        except ImportError:
            raise

        Parameter.checkClass(predY, numpy.ndarray)
        Parameter.checkClass(trueY, numpy.ndarray)
        if predY.ndim != 1:
            raise ValueError("Expecting predY to be 1D")
        if trueY.ndim != 1:
            raise ValueError("Expecting trueY to be 1D")
        if numpy.unique(trueY).shape[0] > 2:
            raise ValueError("Found more than two label types in trueY")

        if numpy.unique(trueY).shape[0] == 1:
            return 0.5

        fpr, tpr, threshold = sklearn.metrics.roc_curve(
            trueY.ravel(), predY.ravel())
        return sklearn.metrics.metrics.auc(fpr, tpr)
예제 #18
0
 def setNumTrees(self, numTrees):
     """
     :param numTrees: The number of trees to generate in the forest.
     :type numTrees: :class:`int`
     """
     Parameter.checkInt(numTrees, 1, float('inf'))
     self.numTrees = numTrees
예제 #19
0
    def parallelVfcvRbf(self, X, y, idx, type="C_SVC"):
        """
        Perform parallel cross validation model selection using the RBF kernel
        and then pick the best one. Using the best set of parameters train using
        the whole dataset.

        :param X: The examples as rows
        :type X: :class:`numpy.ndarray`

        :param y: The binary -1/+1 labels 
        :type y: :class:`numpy.ndarray`

        :param idx: A list of train/test splits

        :params returnGrid: Whether to return the error grid
        :type returnGrid: :class:`bool`
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(y, numpy.ndarray)

        self.setKernel("gaussian")

        if type=="C_SVC":
            paramDict = {} 
            paramDict["setC"] = self.getCs()
            paramDict["setGamma"] = self.getGammas()  
        else: 
            paramDict = {} 
            paramDict["setC"] = self.getCs()
            paramDict["setGamma"] = self.getGammas()  
            paramDict["setEpsilon"] = self.getEpsilons()  
                
        return self.parallelModelSelect(X, y, idx, paramDict)
예제 #20
0
 def setMinSplit(self, minSplit):
     """
     :param minSplit: the minimum number of examples in a node for it to be split. 
     :type minSplit: :class:`int`
     """
     Parameter.checkInt(minSplit, 2, float("inf"))
     self.minSplit = minSplit
예제 #21
0
 def setMinSplit(self, minSplit):
     """
     :param minSplit: the minimum number of examples in a node for it to be split. 
     :type minSplit: :class:`int`
     """
     Parameter.checkInt(minSplit, 2, float("inf"))
     self.minSplit = minSplit
예제 #22
0
파일: Latex.py 프로젝트: kentwang/sandbox
    def array1DToRow(X, precision=3):
        """
        Take a 1D numpy array and print in latex table row format i.e. x1 & x2 .. xn

        :param X: The array to print
        :type X: :class:`ndarray`

        :param precision: The precision of the printed floating point numbers.
        :type precision: :class:`int`
        """
        Parameter.checkInt(precision, 0, 10)
        if X.ndim != 1:
            raise ValueError("Array must be one dimensional")

        n = X.shape[0]
        outputStr = ""

        if X.dtype == float:
            fmtStr = "%." + str(precision) + "f & "
            endFmtStr = "%." + str(precision) + "f"
        else:
            fmtStr = "%d & "
            endFmtStr = "%d"

        for i in range(0, n):
            if i != n - 1:
                outputStr += fmtStr % X[i]
            else:
                outputStr += endFmtStr % X[i]

        return outputStr
예제 #23
0
def parallelPenaltyGridRbf(svm, X, y, fullX, gridPoints, pdfX, pdfY1X, pdfYminus1X):
    """
    Find out the "ideal" penalty.
    """
    Parameter.checkClass(X, numpy.ndarray)
    Parameter.checkClass(y, numpy.ndarray)
    chunkSize = 10

    idealPenalties = numpy.zeros((svm.Cs.shape[0], svm.gammas.shape[0]))
    paramList = []

    for i in range(svm.Cs.shape[0]):
        for j in range(svm.gammas.shape[0]):
            paramList.append((X, y, fullX, svm.Cs[i], svm.gammas[j], gridPoints, pdfX, pdfY1X, pdfYminus1X))

    pool = multiprocessing.Pool()
    resultsIterator = pool.imap(computeIdealPenalty, paramList, chunkSize)

    for i in range(svm.Cs.shape[0]):
        for j in range(svm.gammas.shape[0]):
            idealPenalties[i, j] = resultsIterator.next()

    pool.terminate()

    return idealPenalties
예제 #24
0
 def setNumTrees(self, numTrees):
     """
     :param numTrees: The number of trees to generate in the forest.
     :type numTrees: :class:`int`
     """
     Parameter.checkInt(numTrees, 1, float('inf'))
     self.numTrees = numTrees
 def setWeight(self, weight):
     """
     :param weight: the weight on the positive examples between 0 and 1 (the negative weight is 1-weight)
     :type weight: :class:`float`
     """
     Parameter.checkFloat(weight, 0.0, 1.0)
     self.weight = weight
예제 #26
0
 def setSampleReplace(self, sampleReplace):
     """
     :param sampleReplace: A boolean to decide whether to sample with replacement. 
     :type sampleReplace: :class:`bool`
     """
     Parameter.checkBoolean(sampleReplace)
     self.sampleReplace = sampleReplace
 def setBestResponse(self, bestResponse):
     """
     :param bestResponse: the label corresponding to "positive"
     :type bestResponse: :class:`int`
     """
     Parameter.checkInt(bestResponse, -float('inf'), float('inf'))
     self.bestResponse = bestResponse
예제 #28
0
파일: Util.py 프로젝트: kentwang/sandbox
    def random2Choice(V, n=1):
        """
        Make a random binary choice from a vector V of values which are unnormalised
        probabilities. Return the corresponding index. For example if v = [1, 2]
        then the probability of the indices repectively are [1/3, 2/3]. The
        parameter n is the number of random choices to make. If V is a matrix,
        then the rows are taken as probabilities, and a choice is made for each
        row.
        """
        Parameter.checkClass(V, numpy.ndarray)

        if V.ndim == 1 and V.shape[0] != 2:
            raise ValueError("Function only works on binary probabilities")
        if V.ndim == 2 and V.shape[1] != 2:
            raise ValueError("Function only works on binary probabilities")

        if V.ndim == 1:
            cumV = numpy.cumsum(V)
            p = numpy.random.rand(n) * cumV[-1]
            cumV2 = numpy.ones(n) * cumV[0] - p
            return numpy.array(cumV2 <= 0, numpy.int)
        elif V.ndim == 2:
            cumV = numpy.cumsum(V, 1)
            P = numpy.random.rand(V.shape[0], n) * numpy.array([cumV[:, -1]]).T
            cumV2 = numpy.outer(cumV[:, 0], numpy.ones(n)) - P
            return numpy.array(cumV2 <= 0, numpy.int)
        else:
            raise ValueError("Invalid number of dimensions")
예제 #29
0
    def crossValidation(folds, numExamples):
        """
        Returns a list of tuples (trainIndices, testIndices) using k-fold cross
        validation. The dataset is split into approximately folds contiguous 
        subsamples.  

        :param folds: The number of cross validation folds.
        :type folds: :class:`int`

        :param numExamples: The number of examples.
        :type numExamples: :class:`int`
        """
        Parameter.checkInt(folds, 1, numExamples)
        Parameter.checkInt(numExamples, 2, float('inf'))

        foldSize = float(numExamples) / folds
        indexList = []

        for i in range(0, folds):
            testIndices = numpy.arange(int(foldSize * i),
                                       int(foldSize * (i + 1)))
            trainIndices = numpy.setdiff1d(numpy.arange(0, numExamples),
                                           numpy.array(testIndices))
            indexList.append((trainIndices, testIndices))

        return indexList
예제 #30
0
    def bootstrap2(repetitions, numExamples):
        """
        Perform 0.632 bootstrap in whcih we take a sample with replacement from
        the dataset of size numExamples. The examples not present in the training
        set are used to form the test set. We oversample the test set to include
        0.368 of the examples from the training set. Returns a list of tuples of the form
        (trainIndices, testIndices).

        :param repetitions: The number of repetitions of bootstrap to perform.
        :type repetitions: :class:`int`

        :param numExamples: The number of examples.
        :type numExamples: :class:`int`

        """
        Parameter.checkInt(numExamples, 2, float('inf'))
        Parameter.checkInt(repetitions, 1, float('inf'))

        inds = []
        for i in range(repetitions):
            trainInds = numpy.random.randint(numExamples, size=numExamples)
            testInds = numpy.setdiff1d(numpy.arange(numExamples), numpy.unique(trainInds))
            #testInds = numpy.r_[testInds, trainInds[0:(numExamples*0.368)]]

            inds.append((trainInds, testInds))

        return inds
예제 #31
0
    def randCrossValidation(folds, numExamples, dtype=numpy.int32):
        """
        Returns a list of tuples (trainIndices, testIndices) using k-fold cross
        validation. In this case we randomise the indices and then split into 
        folds. 

        :param folds: The number of cross validation folds.
        :type folds: :class:`int`

        :param numExamples: The number of examples.
        :type numExamples: :class:`int`
        """
        Parameter.checkInt(folds, 1, numExamples)
        Parameter.checkInt(numExamples, 2, float('inf'))

        foldSize = float(numExamples) / folds
        indexList = []

        inds = numpy.array(numpy.random.permutation(numExamples), dtype)

        for i in range(0, folds):
            testIndices = inds[int(foldSize * i):int(foldSize * (i + 1))]
            trainIndices = numpy.setdiff1d(numpy.arange(0, numExamples),
                                           testIndices)
            indexList.append((trainIndices, testIndices))

        return indexList
예제 #32
0
 def setSampleSize(self, sampleSize):
     """
     :param sampleSize: The number of examples to randomly sample for each tree.
     :type sampleSize: :class:`int`
     """
     Parameter.checkFloat(sampleSize, 0.0, 1.0)
     self.sampleSize = sampleSize
예제 #33
0
파일: LibSVM.py 프로젝트: kentwang/sandbox
 def setErrorCost(self, errorCost):
     """
     The penalty on errors on positive labels. The penalty for negative labels
     is 1.
     """
     Parameter.checkFloat(errorCost, 0.0, 1.0)
     self.errorCost = errorCost
예제 #34
0
    def evaluate(self, X1, X2):
        """
        Find kernel evaluation between two matrices X1 and X2 whose rows are
        examples and have an identical number of columns.


        :param X1: First set of examples.
        :type X1: :class:`numpy.ndarray`

        :param X2: Second set of examples.
        :type X2: :class:`numpy.ndarray`
        """
        Parameter.checkClass(X1, numpy.ndarray)
        Parameter.checkClass(X2, numpy.ndarray)
        
        if X1.shape[1] != X2.shape[1]:
            raise ValueError("Invalid matrix dimentions: " + str(X1.shape) + " " + str(X2.shape))

        j1 = numpy.ones((X1.shape[0], 1))
        j2 = numpy.ones((X2.shape[0], 1))

        diagK1 = numpy.sum(X1**2, 1)
        diagK2 = numpy.sum(X2**2, 1)

        X1X2 = numpy.dot(X1, X2.T)

        Q = (2*X1X2 - numpy.outer(diagK1, j2) - numpy.outer(j1, diagK2) )/ (2*self.sigma**2)

        return numpy.exp(Q)
예제 #35
0
 def setSampleReplace(self, sampleReplace):
     """
     :param sampleReplace: A boolean to decide whether to sample with replacement. 
     :type sampleReplace: :class:`bool`
     """
     Parameter.checkBoolean(sampleReplace)
     self.sampleReplace = sampleReplace
예제 #36
0
파일: LibSVM.py 프로젝트: kentwang/sandbox
    def parallelVfcvRbf(self, X, y, idx, type="C_SVC"):
        """
        Perform parallel cross validation model selection using the RBF kernel
        and then pick the best one. Using the best set of parameters train using
        the whole dataset.

        :param X: The examples as rows
        :type X: :class:`numpy.ndarray`

        :param y: The binary -1/+1 labels 
        :type y: :class:`numpy.ndarray`

        :param idx: A list of train/test splits

        :params returnGrid: Whether to return the error grid
        :type returnGrid: :class:`bool`
        """
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkClass(y, numpy.ndarray)

        self.setKernel("gaussian")

        if type == "C_SVC":
            paramDict = {}
            paramDict["setC"] = self.getCs()
            paramDict["setGamma"] = self.getGammas()
        else:
            paramDict = {}
            paramDict["setC"] = self.getCs()
            paramDict["setGamma"] = self.getGammas()
            paramDict["setEpsilon"] = self.getEpsilons()

        return self.parallelModelSelect(X, y, idx, paramDict)
예제 #37
0
    def __init__(self,
                 algorithm="PATH",
                 alpha=0.5,
                 featureInds=None,
                 useWeightM=True):
        """
        Intialise the matching object with a given algorithm name, alpha 
        which is a trade of between matching adjacency matrices and vertex labels, 
        and featureInds which is an option array of indices to use for label 
        matching. 
        
        :param alpha: A value in [0, 1] which is smaller to match graph structure, larger to match the labels more  
        """
        Parameter.checkFloat(alpha, 0.0, 1.0)
        Parameter.checkClass(algorithm, str)

        self.algorithm = algorithm
        self.alpha = alpha
        self.maxInt = 10**9
        self.featureInds = featureInds
        self.useWeightM = useWeightM
        #Gamma is the same as dummy_nodes_c_coef for costing added vertex labels
        self.gamma = 0.0
        #Same as dummy_nodes_fill
        self.rho = 0.5
        self.init = "rand"
        self.lambdaM = 50
예제 #38
0
    def randCrossValidation(folds, numExamples, dtype=numpy.int32):
        """
        Returns a list of tuples (trainIndices, testIndices) using k-fold cross
        validation. In this case we randomise the indices and then split into 
        folds. 

        :param folds: The number of cross validation folds.
        :type folds: :class:`int`

        :param numExamples: The number of examples.
        :type numExamples: :class:`int`
        """
        Parameter.checkInt(folds, 1, numExamples)
        Parameter.checkInt(numExamples, 2, float('inf'))

        foldSize = float(numExamples)/folds
        indexList = []

        inds = numpy.array(numpy.random.permutation(numExamples), dtype)

        for i in range(0, folds):
            testIndices = inds[int(foldSize*i): int(foldSize*(i+1))]
            trainIndices = numpy.setdiff1d(numpy.arange(0, numExamples), testIndices)
            indexList.append((trainIndices, testIndices))

        return indexList 
예제 #39
0
    def predict(self, X):
        """
        Basically, return the scores.
        """
        Parameter.checkClass(X, numpy.ndarray)

        scores = self.predictScores(X)
        return scores
예제 #40
0
    def predict(self, X):
        """
        Basically, return the scores.
        """
        Parameter.checkClass(X, numpy.ndarray)

        scores = self.predictScores(X)
        return scores
예제 #41
0
    def binaryBootstrapError(testY, predTestY, trainY, predTrainY, weight):
        """
        Evaluate an error in conjunction with a bootstrap method by computing
        w*testErr + (1-w)*trainErr
        """
        Parameter.checkFloat(weight, 0.0, 1.0)

        return weight*Evaluator.binaryError(testY, predTestY) + (1-weight)*Evaluator.binaryError(trainY, predTrainY)
예제 #42
0
파일: LibSVM.py 프로젝트: kentwang/sandbox
    def setC(self, C):
        try:
            from sklearn.svm import SVC
        except:
            raise
        Parameter.checkFloat(C, 0.0, float("inf"))

        self.C = C
        self.__updateParams()
예제 #43
0
 def setPosteriorSampleSize(self, posteriorSampleSize):
     """
     Set the sample size of the posterior distribution (population size).
     
     :param posteriorSampleSize: The size of the population 
     :type posteriorSampleSize: `int`
     """
     Parameter.checkInt(posteriorSampleSize, 0, numpy.float('inf'))
     self.N = posteriorSampleSize
예제 #44
0
    def setBestResponse(self, bestResponse):
        """
        The best response is the label which corresponds to "positive" 

        :param bestResponse: the label corresponding to "positive" 
        :type bestResponse: :class:`int`
        """
        Parameter.checkInt(bestResponse, -float("inf"), float("inf"))
        self.bestResponse = bestResponse
예제 #45
0
    def binaryBootstrapError(testY, predTestY, trainY, predTrainY, weight):
        """
        Evaluate an error in conjunction with a bootstrap method by computing
        w*testErr + (1-w)*trainErr
        """
        Parameter.checkFloat(weight, 0.0, 1.0)

        return weight * Evaluator.binaryError(testY, predTestY) + (
            1 - weight) * Evaluator.binaryError(trainY, predTrainY)
예제 #46
0
 def setC(self, C):
     try:
         from sklearn.svm import SVC
     except:
         raise
     Parameter.checkFloat(C, 0.0, float('inf'))
     
     self.C = C
     self.__updateParams()
예제 #47
0
    def evaluateLearn(X, y, idx, learnModel, predict, metricMethod, progress=True):
        """
        Evaluate this learning algorithm using the given list of training/test splits 
        The metricMethod is a method which takes (predictedY, realY) as input
        and returns a metric about the quality of the evaluation.

        :param X: A matrix with examples as rows 
        :type X: :class:`ndarray`

        :param y: A vector of labels 
        :type y: :class:`ndarray`

        :param idx: A list of training/test splits 
        :type idx: :class:`list`

        :param learnModel: A function such that learnModel(X, y) finds a mapping from X to y 
        :type learnModel: :class:`function`

        :param predict: A function such that predict(X) makes predictions for X
        :type predict: :class:`function`

        :param metricMethod: A function such that metricMethod(predY, testY) returns the quality of predicted labels predY
        :type metricMethod: :class:`function`

        Output: the mean and variation of the cross validation folds. 
        """
        #Parameter.checkClass(idx, list)
        Parameter.checkClass(X, numpy.ndarray)
        Parameter.checkArray(X, softCheck=True)
        Parameter.checkInt(X.shape[0], 1, float('inf'))
        Parameter.checkClass(y, numpy.ndarray)
        Parameter.checkArray(y, softCheck=True)

        if y.ndim != 1:
            raise ValueError("Dimention of y must be 1")
        
        i = 0
        metrics = numpy.zeros(len(idx))
        logging.debug("EvaluateLearn: Using " + str(len(idx)) + " splits on " + str(X.shape[0]) + " examples")

        for idxtr, idxts in idx:
            if progress:
                Util.printConciseIteration(i, 1, len(idx))

            trainX, testX = X[idxtr, :], X[idxts, :]
            trainY, testY = y[idxtr], y[idxts]
            #logging.debug("Distribution of labels in evaluateLearn train: " + str(numpy.bincount(trainY)))
            #logging.debug("Distribution of labels in evaluateLearn test: " + str(numpy.bincount(testY)))

            learnModel(trainX, trainY)
            predY = predict(testX)
            gc.collect()

            metrics[i] = metricMethod(predY, testY)
            i += 1

        return metrics
예제 #48
0
 def setPosteriorSampleSize(self, posteriorSampleSize):
     """
     Set the sample size of the posterior distribution (population size).
     
     :param posteriorSampleSize: The size of the population 
     :type posteriorSampleSize: `int`
     """
     Parameter.checkInt(posteriorSampleSize, 0, numpy.float('inf'))
     self.N = posteriorSampleSize
예제 #49
0
    def eigenAdd(omega, Q, Y, k):
        """
        Perform an eigen update of the form A*A + Y*Y in which Y is a low-rank matrix
        and A^*A = Q Omega Q*. We use the rank-k approximation of A:  Q_k Omega_k Q_k^*
        and then approximate [A^*A_k Y^*Y]_k.
        """
        #logging.debug("< eigenAdd >")
        Parameter.checkInt(k, 0, omega.shape[0])
        #if not numpy.isrealobj(omega) or not numpy.isrealobj(Q):
        #    raise ValueError("Eigenvalues and eigenvectors must be real")
        if omega.ndim != 1:
            raise ValueError("omega must be 1-d array")
        if omega.shape[0] != Q.shape[1]:
            raise ValueError("Must have same number of eigenvalues and eigenvectors")

        if __debug__:
            Parameter.checkOrthogonal(Q, tol=EigenUpdater.tol, softCheck=True, arrayInfo="input Q in eigenAdd()")

        #Taking the abs of the eigenvalues is correct
        inds = numpy.flipud(numpy.argsort(numpy.abs(omega)))

        omega, Q = Util.indEig(omega, Q, inds[numpy.abs(omega)>EigenUpdater.tol])
        Omega = numpy.diag(omega)

        YY = Y.conj().T.dot(Y)
        QQ = Q.dot(Q.conj().T)
        Ybar = Y - Y.dot(QQ)

        Pbar, sigmaBar, Qbar = numpy.linalg.svd(Ybar, full_matrices=False)
        inds = numpy.flipud(numpy.argsort(numpy.abs(sigmaBar)))
        inds = inds[numpy.abs(sigmaBar)>EigenUpdater.tol]
        Pbar, sigmaBar, Qbar = Util.indSvd(Pbar, sigmaBar, Qbar, inds)
        
        SigmaBar = numpy.diag(sigmaBar)
        Qbar = Ybar.T.dot(Pbar)
        Qbar = Qbar.dot(numpy.diag(numpy.diag(Qbar.T.dot(Qbar))**-0.5))

        r = sigmaBar.shape[0]

        YQ = Y.dot(Q)
        Zeros = numpy.zeros((r, omega.shape[0]))
        D = numpy.c_[Q, Qbar]

        YYQQ = YY.dot(QQ)
        Z = D.conj().T.dot(YYQQ + YYQQ.conj().T).dot(D)
        F = numpy.c_[numpy.r_[Omega - YQ.conj().T.dot(YQ), Zeros], numpy.r_[Zeros.T, SigmaBar.conj().dot(SigmaBar)]]
        F = F + Z 

        pi, H = scipy.linalg.eigh(F)
        inds = numpy.flipud(numpy.argsort(numpy.abs(pi)))

        H = H[:, inds[0:k]]
        pi = pi[inds[0:k]]

        V = D.dot(H)
        #logging.debug("</ eigenAdd >")
        return pi, V
예제 #50
0
    def subList(self, indices):
        """
        Returns a subset of this object, indicated by the given indices.
        """
        Parameter.checkList(indices, Parameter.checkIndex, (0, self.getNumVertices()))
        vList = HIVVertices(len(indices))
        vList.setVertices(self.getVertices(indices))

        return vList 
예제 #51
0
    def setInfected(self, vertexInd, time):
        Parameter.checkIndex(vertexInd, 0, self.getNumVertices())
        Parameter.checkFloat(time, 0.0, float('inf'))

        if self.V[vertexInd, HIVVertices.stateIndex] == HIVVertices.infected:
            raise ValueError("Person is already infected")

        self.V[vertexInd, HIVVertices.stateIndex] = HIVVertices.infected
        self.V[vertexInd, HIVVertices.infectionTimeIndex] = time
예제 #52
0
    def setFeatureSize(self, featureSize):
        """
        Set the number of features to use for node computation.

        :param featureSize: the proportion of features to randomly select to compute each node. If none then use sqrt(X.shape[1]) features. 
        :type featureSize: :class:`float`
        """
        if featureSize != None: 
            Parameter.checkFloat(featureSize, 0.0, 1.0)
        self.featureSize = featureSize
예제 #53
0
    def predictScores(self, X):
        """
        Make predictions using the learnt tree. Returns the scores as a numpy array.
        """
        Parameter.checkClass(X, numpy.ndarray)

        predictFunc = robjects.r['predict']
        X = self.baseLib.data_frame(X)
        scores = self.baseLib.matrix(predictFunc(self.getModel(), X))
        return numpy.asarray(scores).ravel()
예제 #54
0
    def standardiseArray(self, X):
        """
        Centre and then normalise an array to have norm 1.
        """
        Parameter.checkClass(X, numpy.ndarray)

        X = self.centreArray(X)
        X = self.normaliseArray(X)

        return X
예제 #55
0
    def setB(self, b):
        """
        Set the b parameter.

        :param b: kernel bias parameter.
        :type b: :class:`float`
        """
        Parameter.checkFloat(b, 0.0, float('inf'))

        self.b = b
예제 #56
0
    def setDegree(self, degree):
        """
        Set the degree parameter.

        :param degree: kernel degree parameter.
        :type degree: :class:`int`
        """
        Parameter.checkInt(degree, 1, float('inf'))

        self.degree = degree
예제 #57
0
    def __init__(self, alterRegressor, egoRegressor):
        """
        The alterRegressor must be a primal method, since the number of alters
        for each ego vary, and hence the dual vectors are not constant in size.
        """
        Parameter.checkClass(alterRegressor, AbstractPredictor)
        Parameter.checkClass(egoRegressor, AbstractPredictor)

        self.alterRegressor = alterRegressor
        self.egoRegressor = egoRegressor
예제 #58
0
    def setFeatureSize(self, featureSize):
        """
        Set the number of features to use for node computation.

        :param featureSize: the proportion of features to randomly select to compute each node. If none then use sqrt(X.shape[1]) features. 
        :type featureSize: :class:`float`
        """
        if featureSize != None:
            Parameter.checkFloat(featureSize, 0.0, 1.0)
        self.featureSize = featureSize