예제 #1
0
    def __generateRandomVertices(self, n): 
        V = numpy.zeros((n, self.numFeatures))
        V[:, self.dobIndex] = numpy.random.rand(n)
        V[:, self.genderIndex] = Util.randomChoice(numpy.array([1, 1]), n)
        #Note in reality females cannot be recorded as bisexual but we model the real scenario
        #We assume that 5% of the population is gay or bisexual 
        V[:, self.orientationIndex] = Util.randomChoice(numpy.array([19, 1]), n)

        V[:, self.stateIndex] = numpy.zeros(n)
        V[:, self.infectionTimeIndex] = numpy.ones(n)*-1
        V[:, self.detectionTimeIndex] = numpy.ones(n)*-1
        V[:, self.detectionTypeIndex] = numpy.ones(n)*-1

        V[:, self.hiddenDegreeIndex] = numpy.ones(n)*-1
        
        return V 
예제 #2
0
    def simulateModel(theta):
        """
        The parameter t is the particle index. 
        """
        logging.debug("theta=" + str(theta))
 
        #We start with the observed graph at the start date 
        graph = targetGraph.subgraph(targetGraph.removedIndsAt(startDate)) 
        graph.addVertices(M-graph.size)

        p = Util.powerLawProbs(alpha, zeroVal)
        hiddenDegSeq = Util.randomChoice(p, graph.getNumVertices())
        
        featureInds = numpy.ones(graph.vlist.getNumFeatures(), numpy.bool)
        featureInds[HIVVertices.dobIndex] = False 
        featureInds[HIVVertices.infectionTimeIndex] = False 
        featureInds[HIVVertices.hiddenDegreeIndex] = False 
        featureInds[HIVVertices.stateIndex] = False
        featureInds = numpy.arange(featureInds.shape[0])[featureInds]
        matcher = GraphMatch(matchAlg, alpha=matchAlpha, featureInds=featureInds, useWeightM=False)
        graphMetrics = HIVGraphMetrics2(targetGraph, breakSize, matcher, float(endDate))
        
        recordStep = (endDate-startDate)/float(numRecordSteps)
        rates = HIVRates(graph, hiddenDegSeq)
        model = HIVEpidemicModel(graph, rates, T=float(endDate), T0=float(startDate), metrics=graphMetrics)
        model.setRecordStep(recordStep)
        model.setParams(theta)
        
        model.simulate() 
    
        objective = model.objective()
        return objective
예제 #3
0
    def growTree(self, X, y, argsortX, startId):
        """
        Grow a tree using a stack. Give a sample of data and a node index, we 
        find the best split and add children to the tree accordingly. We perform 
        pre-pruning based on the penalty. 
        """
        eps = 10**-4
        idStack = [startId]

        while len(idStack) != 0:
            nodeId = idStack.pop()
            node = self.tree.getVertex(nodeId)
            accuracies, thresholds = findBestSplitRisk(self.minSplit, X, y,
                                                       node.getTrainInds(),
                                                       argsortX)

            #Choose best feature based on gains
            accuracies += eps
            bestFeatureInd = Util.randomChoice(accuracies)[0]
            bestThreshold = thresholds[bestFeatureInd]

            nodeInds = node.getTrainInds()
            bestLeftInds = numpy.sort(nodeInds[numpy.arange(nodeInds.shape[0])[
                X[:, bestFeatureInd][nodeInds] < bestThreshold]])
            bestRightInds = numpy.sort(nodeInds[numpy.arange(
                nodeInds.shape[0])[
                    X[:, bestFeatureInd][nodeInds] >= bestThreshold]])

            #The split may have 0 items in one set, so don't split
            if bestLeftInds.sum() != 0 and bestRightInds.sum(
            ) != 0 and self.tree.depth() < self.maxDepth:
                node.setError(1 - accuracies[bestFeatureInd])
                node.setFeatureInd(bestFeatureInd)
                node.setThreshold(bestThreshold)

                leftChildId = self.getLeftChildId(nodeId)
                leftChild = DecisionNode(bestLeftInds,
                                         Util.mode(y[bestLeftInds]))
                self.tree.addChild(nodeId, leftChildId, leftChild)

                if leftChild.getTrainInds().shape[0] >= self.minSplit:
                    idStack.append(leftChildId)

                rightChildId = self.getRightChildId(nodeId)
                rightChild = DecisionNode(bestRightInds,
                                          Util.mode(y[bestRightInds]))
                self.tree.addChild(nodeId, rightChildId, rightChild)

                if rightChild.getTrainInds().shape[0] >= self.minSplit:
                    idStack.append(rightChildId)
예제 #4
0
    def findThetas(self, lastTheta, lastWeights, t): 
        """
        Find a theta to accept. 
        """
        tempTheta = self.abcParams.sampleParams()
        currentTheta, dists = self.loadThetas(t)
        
        while len(currentTheta) < self.N:
            paramList = []   
            
            for i in range(self.batchSize):             
                if t == 0:
                    tempTheta = self.abcParams.sampleParams()
                    paramList.append((tempTheta.copy(), self.createModel, t, self.epsilonArray[t], self.N, self.thetaDir))
                else:  
                    while True:
                        if self.thetaUniformChoice: 
                            tempTheta = lastTheta[numpy.random.randint(self.N), :]   
                        else: 
                            tempTheta = lastTheta[Util.randomChoice(lastWeights)[0], :]
                        tempTheta = self.abcParams.perturbationKernel(tempTheta, numpy.std(lastTheta, 0)/self.pertScale)
                        if self.abcParams.priorDensity(tempTheta) != 0: 
                            break 
                    paramList.append((tempTheta.copy(), self.createModel, t, self.epsilonArray[t], self.N, self.thetaDir))

            pool = multiprocessing.Pool(processes=self.numProcesses)               
            resultsIterator = pool.map(runModel, paramList)     
            #resultsIterator = map(runModel, paramList)     

            for result in resultsIterator: 
                self.numRuns[t] += result[0]
                self.numAccepts[t] += result[1]
            
            if self.numRuns[t] >= self.maxRuns:
                logging.debug("Maximum number of runs exceeded.")
                break 
            
            currentTheta, dists = self.loadThetas(t)                 
            pool.terminate()
            
        if self.autoEpsilon and t!=self.T-1:
            self.epsilonArray[t+1] = numpy.mean(dists)
            logging.debug("Found new epsilon: " + str(self.epsilonArray[0:t+2]))
            
        logging.debug("Num accepts: " + str(self.numAccepts))
        logging.debug("Num runs: " + str(self.numRuns))
        logging.debug("Acceptance rate: " + str(self.numAccepts/(self.numRuns + numpy.array(self.numRuns==0, numpy.int))))
              
        return currentTheta
예제 #5
0
 def growTree(self, X, y, argsortX, startId): 
     """
     Grow a tree using a stack. Give a sample of data and a node index, we 
     find the best split and add children to the tree accordingly. We perform 
     pre-pruning based on the penalty. 
     """
     eps = 10**-4 
     idStack = [startId]
     
     while len(idStack) != 0: 
         nodeId = idStack.pop()
         node = self.tree.getVertex(nodeId)
         accuracies, thresholds = findBestSplitRisk(self.minSplit, X, y, node.getTrainInds(), argsortX)
     
         #Choose best feature based on gains 
         accuracies += eps 
         bestFeatureInd = Util.randomChoice(accuracies)[0]
         bestThreshold = thresholds[bestFeatureInd]
     
         nodeInds = node.getTrainInds()    
         bestLeftInds = numpy.sort(nodeInds[numpy.arange(nodeInds.shape[0])[X[:, bestFeatureInd][nodeInds]<bestThreshold]]) 
         bestRightInds = numpy.sort(nodeInds[numpy.arange(nodeInds.shape[0])[X[:, bestFeatureInd][nodeInds]>=bestThreshold]])
         
         #The split may have 0 items in one set, so don't split 
         if bestLeftInds.sum() != 0 and bestRightInds.sum() != 0 and self.tree.depth() < self.maxDepth: 
             node.setError(1-accuracies[bestFeatureInd])
             node.setFeatureInd(bestFeatureInd)
             node.setThreshold(bestThreshold)            
                         
             leftChildId = self.getLeftChildId(nodeId)
             leftChild = DecisionNode(bestLeftInds, Util.mode(y[bestLeftInds]))
             self.tree.addChild(nodeId, leftChildId, leftChild)
             
             if leftChild.getTrainInds().shape[0] >= self.minSplit: 
                 idStack.append(leftChildId)
             
             rightChildId = self.getRightChildId(nodeId)
             rightChild = DecisionNode(bestRightInds, Util.mode(y[bestRightInds]))
             self.tree.addChild(nodeId, rightChildId, rightChild)
             
             if rightChild.getTrainInds().shape[0] >= self.minSplit: 
                 idStack.append(rightChildId)
예제 #6
0
    def setUp(self):
        numpy.seterr(invalid='raise')
        logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
        numpy.set_printoptions(suppress=True, precision=4, linewidth=100)
        numpy.random.seed(21)

        M = 1000
        undirected = True

        graph = HIVGraph(M, undirected)
        alpha = 2
        zeroVal = 0.9
        p = Util.powerLawProbs(alpha, zeroVal)
        hiddenDegSeq = Util.randomChoice(p, graph.getNumVertices())
        rates = HIVRates(graph, hiddenDegSeq)

        self.numParams = 6
        self.graph = graph
        self.meanTheta = numpy.array([100, 0.9, 0.05, 0.001, 0.1, 0.005])
        self.hivAbcParams = HIVABCParameters(self.meanTheta, self.meanTheta/2)
예제 #7
0
    def testRandomChoice(self):
        v = numpy.array([0.25, 0.25, 0.25])

        tol = 10**-2
        c = numpy.zeros(3)
        numSamples = 500

        for i in range(numSamples):
            j = Util.randomChoice(v)
            #logging.debug(j)
            c[j] += 1

        self.assertTrue((c/numSamples == numpy.array([0.33, 0.33, 0.33])).all() < tol)

        v = v * 20
        c = numpy.zeros(3)

        for i in range(numSamples):
            j = Util.randomChoice(v)
            #logging.debug(j)
            c[j] += 1

        self.assertTrue((c/numSamples == numpy.array([0.33, 0.33, 0.33])).all() < tol)

        #Now try different distribution 
        v = numpy.array([0.2, 0.6, 0.2])

        c = numpy.zeros(3)

        for i in range(numSamples):
            j = Util.randomChoice(v)
            #logging.debug(j)
            c[j] += 1

        self.assertTrue((c/numSamples == v).all() < tol)

        #Test empty vector
        v = numpy.array([])
        self.assertEquals(Util.randomChoice(v), -1)

        #Test case where we want multiple random choices
        n = 1000
        v = numpy.array([0.2, 0.6, 0.2])
        j = Util.randomChoice(v, n)

        self.assertEquals(j.shape[0], n)
        self.assertAlmostEquals(numpy.sum(j==0)/float(n), v[0], places=1)
        self.assertAlmostEquals(numpy.sum(j==1)/float(n), v[1], places=1)

        #Now test the 2D case
        n = 2000
        V = numpy.array([[0.1, 0.3, 0.6], [0.6, 0.3, 0.1]])

        J = Util.randomChoice(V, n)

        self.assertEquals(J.shape[0], V.shape[0])
        self.assertEquals(J.shape[1], n)

        self.assertAlmostEquals(numpy.sum(J[0, :]==0)/float(n), V[0, 0], places=1)
        self.assertAlmostEquals(numpy.sum(J[0, :]==1)/float(n), V[0, 1], places=1)
        self.assertAlmostEquals(numpy.sum(J[0, :]==2)/float(n), V[0, 2], places=1)

        self.assertAlmostEquals(numpy.sum(J[1, :]==0)/float(n), V[1, 0], places=1)
        self.assertAlmostEquals(numpy.sum(J[1, :]==1)/float(n), V[1, 1], places=1)
        self.assertAlmostEquals(numpy.sum(J[1, :]==2)/float(n), V[1, 2], places=1)
예제 #8
0
 def runRandom2Choice():
     reps = 100
     for i in range(reps):
         Util.randomChoice(V, m)
예제 #9
0
    def findThetas(self, lastTheta, lastWeights, t):
        """
        Find a theta to accept. 
        """
        tempTheta = self.abcParams.sampleParams()
        currentTheta, dists = self.loadThetas(t)

        while len(currentTheta) < self.N:
            paramList = []

            for i in range(self.batchSize):
                if t == 0:
                    tempTheta = self.abcParams.sampleParams()
                    paramList.append(
                        (tempTheta.copy(), self.createModel, t,
                         self.epsilonArray[t], self.N, self.thetaDir))
                else:
                    while True:
                        if self.thetaUniformChoice:
                            tempTheta = lastTheta[
                                numpy.random.randint(self.N), :]
                        else:
                            tempTheta = lastTheta[
                                Util.randomChoice(lastWeights)[0], :]
                        tempTheta = self.abcParams.perturbationKernel(
                            tempTheta,
                            numpy.std(lastTheta, 0) / self.pertScale)
                        if self.abcParams.priorDensity(tempTheta) != 0:
                            break
                    paramList.append(
                        (tempTheta.copy(), self.createModel, t,
                         self.epsilonArray[t], self.N, self.thetaDir))

            pool = multiprocessing.Pool(processes=self.numProcesses)
            resultsIterator = pool.map(runModel, paramList)
            #resultsIterator = map(runModel, paramList)

            for result in resultsIterator:
                self.numRuns[t] += result[0]
                self.numAccepts[t] += result[1]

            if self.numRuns[t] >= self.maxRuns:
                logging.debug("Maximum number of runs exceeded.")
                break

            currentTheta, dists = self.loadThetas(t)
            pool.terminate()

        if self.autoEpsilon and t != self.T - 1:
            self.epsilonArray[t + 1] = numpy.mean(dists)
            logging.debug("Found new epsilon: " +
                          str(self.epsilonArray[0:t + 2]))

        logging.debug("Num accepts: " + str(self.numAccepts))
        logging.debug("Num runs: " + str(self.numRuns))
        logging.debug(
            "Acceptance rate: " +
            str(self.numAccepts /
                (self.numRuns + numpy.array(self.numRuns == 0, numpy.int))))

        return currentTheta
예제 #10
0
 def runRandom2Choice():
     reps = 100
     for i in range(reps):
         Util.randomChoice(V, m)
예제 #11
0
    def testRandomChoice(self):
        v = numpy.array([0.25, 0.25, 0.25])

        tol = 10**-2
        c = numpy.zeros(3)
        numSamples = 500

        for i in range(numSamples):
            j = Util.randomChoice(v)
            #logging.debug(j)
            c[j] += 1

        self.assertTrue(
            (c / numSamples == numpy.array([0.33, 0.33, 0.33])).all() < tol)

        v = v * 20
        c = numpy.zeros(3)

        for i in range(numSamples):
            j = Util.randomChoice(v)
            #logging.debug(j)
            c[j] += 1

        self.assertTrue(
            (c / numSamples == numpy.array([0.33, 0.33, 0.33])).all() < tol)

        #Now try different distribution
        v = numpy.array([0.2, 0.6, 0.2])

        c = numpy.zeros(3)

        for i in range(numSamples):
            j = Util.randomChoice(v)
            #logging.debug(j)
            c[j] += 1

        self.assertTrue((c / numSamples == v).all() < tol)

        #Test empty vector
        v = numpy.array([])
        self.assertEquals(Util.randomChoice(v), -1)

        #Test case where we want multiple random choices
        n = 1000
        v = numpy.array([0.2, 0.6, 0.2])
        j = Util.randomChoice(v, n)

        self.assertEquals(j.shape[0], n)
        self.assertAlmostEquals(numpy.sum(j == 0) / float(n), v[0], places=1)
        self.assertAlmostEquals(numpy.sum(j == 1) / float(n), v[1], places=1)

        #Now test the 2D case
        n = 2000
        V = numpy.array([[0.1, 0.3, 0.6], [0.6, 0.3, 0.1]])

        J = Util.randomChoice(V, n)

        self.assertEquals(J.shape[0], V.shape[0])
        self.assertEquals(J.shape[1], n)

        self.assertAlmostEquals(numpy.sum(J[0, :] == 0) / float(n),
                                V[0, 0],
                                places=1)
        self.assertAlmostEquals(numpy.sum(J[0, :] == 1) / float(n),
                                V[0, 1],
                                places=1)
        self.assertAlmostEquals(numpy.sum(J[0, :] == 2) / float(n),
                                V[0, 2],
                                places=1)

        self.assertAlmostEquals(numpy.sum(J[1, :] == 0) / float(n),
                                V[1, 0],
                                places=1)
        self.assertAlmostEquals(numpy.sum(J[1, :] == 1) / float(n),
                                V[1, 1],
                                places=1)
        self.assertAlmostEquals(numpy.sum(J[1, :] == 2) / float(n),
                                V[1, 2],
                                places=1)