def profileSimulateGraphMatch(self): N, matchAlpha, breakDist, purtScale = HIVModelUtils.toyABCParams() startDate, endDate, recordStep, M, targetGraph = HIVModelUtils.toySimulationParams() theta, stdTheta = HIVModelUtils.toyTheta() featureInds= numpy.ones(targetGraph.vlist.getNumFeatures(), numpy.bool) featureInds[HIVVertices.dobIndex] = False featureInds[HIVVertices.infectionTimeIndex] = False featureInds[HIVVertices.hiddenDegreeIndex] = False featureInds[HIVVertices.stateIndex] = False featureInds = numpy.arange(featureInds.shape[0])[featureInds] #QCV is fastest and most accurate #PATH is slowests but quite accurate #RANK is very fast by less accurate than PATH #U is fastest but least accurate matcher = GraphMatch("QCV", alpha=matchAlpha, featureInds=featureInds, useWeightM=False) matcher.lambdaM = 50 matcher.init = "rand" graphMetrics = HIVGraphMetrics2(targetGraph, breakDist, matcher, float(endDate)) def run(): times, infectedIndices, removedIndices, graph = HIVModelUtils.simulate(theta, startDate, endDate, recordStep, M, graphMetrics) print("Mean distance " + str(graphMetrics.meanDistance())) ProfileUtils.profile('run()', globals(), locals())
def runModel(meanTheta): startDate, endDate, recordStep, M, targetGraph = HIVModelUtils.toySimulationParams() endDate = 1000.0 recordStep = 50 undirected = True logging.debug("MeanTheta=" + str(meanTheta)) numReps = 10 numInfectedIndices = [] numRemovedIndices = [] numRemovedEdges = [] numContactEdges = [] statistics = GraphStatistics() statsTimes = numpy.arange(0, endDate, recordStep) for i in range(numReps): graph = HIVGraph(M, undirected) logging.info("Created graph at index " + str(i) + ": " + str(graph)) alpha = 2 zeroVal = 0.9 p = Util.powerLawProbs(alpha, zeroVal) hiddenDegSeq = Util.randomChoice(p, graph.getNumVertices()) rates = HIVRates(graph, hiddenDegSeq) model = HIVEpidemicModel(graph, rates) model.setT0(startDate) model.setT(endDate) model.setRecordStep(recordStep) model.setParams(meanTheta) times, infectedIndices, removedIndices, graph = model.simulate(True) vertexArray, infectedIndices, removedIndices, contactGraphStats, removedGraphStats = HIVModelUtils.generateStatistics( graph, statsTimes ) numInfectedIndices.append([len(x) for x in infectedIndices]) numRemovedIndices.append([len(x) for x in removedIndices]) numContactEdges.append(contactGraphStats[:, statistics.numVerticesIndex]) numRemovedEdges.append(removedGraphStats[:, statistics.numVerticesIndex]) numInfectedIndices = numpy.array(numInfectedIndices) numInfectedIndices = numpy.mean(numInfectedIndices, 0) numRemovedIndices = numpy.array(numRemovedIndices) numRemovedIndices = numpy.mean(numRemovedIndices, 0) numContactEdges = numpy.array(numContactEdges) numContactEdges = numpy.mean(numContactEdges, 0) numRemovedEdges = numpy.array(numRemovedEdges) numRemovedEdges = numpy.mean(numRemovedEdges, 0) return statsTimes, numInfectedIndices, numRemovedIndices, numContactEdges, numRemovedEdges, vertexArray[:, 6]
def saveStats(args): i, theta = args resultsFileName = outputDir + "SimStats" + str(i) + ".pkl" lock = FileLock(resultsFileName) if not lock.fileExists() and not lock.isLocked(): lock.lock() model = HIVModelUtils.createModel(targetGraph, startDate, endDate, recordStep, M, matchAlpha, breakSize, matchAlg, theta=thetaArray[i]) times, infectedIndices, removedIndices, graph, compTimes, graphMetrics = HIVModelUtils.simulate(model) times = numpy.arange(startDate, endDate+1, recordStep) vertexArray, infectedIndices, removedIndices, contactGraphStats, removedGraphStats, finalRemovedDegrees = HIVModelUtils.generateStatistics(graph, times) stats = times, vertexArray, infectedIndices, removedGraphStats, finalRemovedDegrees, graphMetrics.objectives, compTimes Util.savePickle(stats, resultsFileName) lock.unlock() else: logging.debug("Results already computed: " + str(resultsFileName))
def testSimulate(self): #We want to see if we can get the same simulation twice N, matchAlpha, breakScale, numEpsilons, epsilon, minEpsilon, matchAlg, abcMaxRuns, batchSize, pertScale = HIVModelUtils.toyABCParams() startDate, endDate, recordStep, M, targetGraph = HIVModelUtils.toySimulationParams(test=True) breakSize = (targetGraph.subgraph(targetGraph.removedIndsAt(endDate)).size - targetGraph.subgraph(targetGraph.removedIndsAt(startDate)).size) * breakScale theta, sigmaTheta, pertTheta = HIVModelUtils.toyTheta() model = HIVModelUtils.createModel(theta, targetGraph, startDate, endDate, recordStep, M, matchAlpha, breakSize, matchAlg) model.setParams(theta) times, infectedIndices, removedIndices, graph, compTimes, graphMetrics = HIVModelUtils.simulate(model) numEdges = graph.getNumEdges() lastRemovedIndices = removedIndices[-1] #Simulate again model = HIVModelUtils.createModel(theta, targetGraph, startDate, endDate, recordStep, M, matchAlpha, breakSize, matchAlg) model.setParams(theta) times, infectedIndices, removedIndices, graph, compTimes, graphMetrics = HIVModelUtils.simulate(model) numEdges2 = graph.getNumEdges() lastRemovedIndices2 = removedIndices[-1] self.assertEquals(numEdges, numEdges2) self.assertEquals(lastRemovedIndices, lastRemovedIndices2)
def loadParams(ind): if processReal: resultsDir = PathDefaults.getOutputDir() + "viroscopy/real/theta" + str(ind) + "/" outputDir = resultsDir + "stats/" N, matchAlpha, breakScale, numEpsilons, epsilon, minEpsilon, matchAlg, abcMaxRuns, batchSize, pertScale = HIVModelUtils.realABCParams(True) startDate, endDate, recordStep, M, targetGraph, numInds = HIVModelUtils.realSimulationParams(test=True, ind=ind) realTheta, sigmaTheta, pertTheta = HIVModelUtils.estimatedRealTheta(ind) numInds=2 prefix = "Real" else: resultsDir = PathDefaults.getOutputDir() + "viroscopy/toy/theta/" outputDir = resultsDir + "stats/" N, matchAlpha, breakScale, numEpsilons, epsilon, minEpsilon, matchAlg, abcMaxRuns, batchSize, pertScale = HIVModelUtils.toyABCParams() startDate, endDate, recordStep, M, targetGraph = HIVModelUtils.toySimulationParams(test=True) realTheta, sigmaTheta, pertTheta = HIVModelUtils.toyTheta() prefix = "Toy" numInds = 1 breakSize = (targetGraph.subgraph(targetGraph.removedIndsAt(endDate)).size - targetGraph.subgraph(targetGraph.removedIndsAt(startDate)).size) * breakScale return N, resultsDir, outputDir, recordStep, startDate, endDate, prefix, targetGraph, breakSize, numEpsilons, M, matchAlpha, matchAlg, numInds
def profileSimulate(self): startDate, endDates, numRecordSteps, M, targetGraph = HIVModelUtils.realSimulationParams() meanTheta, sigmaTheta = HIVModelUtils.estimatedRealTheta() undirected = True graph = HIVGraph(M, undirected) logging.info("Created graph: " + str(graph)) alpha = 2 zeroVal = 0.9 p = Util.powerLawProbs(alpha, zeroVal) hiddenDegSeq = Util.randomChoice(p, graph.getNumVertices()) rates = HIVRates(graph, hiddenDegSeq) model = HIVEpidemicModel(graph, rates) model.setT0(startDate) model.setT(startDate+1000) model.setRecordStep(10) model.setParams(meanTheta) logging.debug("MeanTheta=" + str(meanTheta)) ProfileUtils.profile('model.simulate()', globals(), locals())
def testSimulate2(self): alpha = 2 zeroVal = 0.9 startDate = 0.0 endDate = 200.0 M = 1000 undirected = True theta, sigmaTheta, pertTheta = HIVModelUtils.toyTheta() numpy.random.seed(21) graph = HIVGraph(M, undirected) p = Util.powerLawProbs(alpha, zeroVal) hiddenDegSeq = Util.randomChoice(p, graph.getNumVertices()) rates = HIVRates(graph, hiddenDegSeq) model = HIVEpidemicModel(graph, rates, endDate, startDate, metrics=None) #model.setRecordStep(recordStep) model.setParams(theta) times, infectedIndices, removedIndices, graph = model.simulate(True) numVertices = graph.size numEdges = graph.getNumEdges() #Try again numpy.random.seed(21) graph = HIVGraph(M, undirected) p = Util.powerLawProbs(alpha, zeroVal) hiddenDegSeq = Util.randomChoice(p, graph.getNumVertices()) rates = HIVRates(graph, hiddenDegSeq) model = HIVEpidemicModel(graph, rates, endDate, startDate, metrics=None) model.setParams(theta) times, infectedIndices, removedIndices, graph = model.simulate(True) numVertices2 = graph.size numEdges2 = graph.getNumEdges() self.assertEquals(numVertices2, numVertices) self.assertEquals(numEdges2, numEdges)
def createModel(t): """ The parameter t is the particle index. """ return HIVModelUtils.createModel(targetGraph, startDate, endDate, recordStep, M, matchAlpha, breakSize, matchAlg)
numProcesses = multiprocessing.cpu_count() if len(sys.argv) > 2: i = int(sys.argv[2]) else: i = 0 FORMAT = "%(levelname)s:root:%(process)d:%(message)s" logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format=FORMAT) logging.debug("Number of processes: " + str(numProcesses)) logging.debug("Epidemic period index " + str(i)) numpy.set_printoptions(suppress=True, precision=4, linewidth=150) numpy.seterr(invalid='raise') resultsDir = PathDefaults.getOutputDir() + "viroscopy/real/" startDate, endDate, recordStep, M, targetGraph, numInds = HIVModelUtils.realSimulationParams(ind=i) N, matchAlpha, breakScale, numEpsilons, epsilon, minEpsilon, matchAlg, abcMaxRuns, batchSize, pertScale = HIVModelUtils.realABCParams(i) logging.debug("Posterior sample size " + str(N)) logging.debug("Matching algorithm " + str(matchAlg)) logging.debug("="*10 + "Starting new simulation batch with index " + str(i) + "="*10) logging.debug("Total time of simulation is " + str(endDate-startDate)) breakSize = (targetGraph.subgraph(targetGraph.removedIndsAt(endDate)).size - targetGraph.subgraph(targetGraph.removedIndsAt(startDate)).size) * breakScale logging.debug("Largest acceptable graph is " + str(breakSize)) def createModel(t): """ The parameter t is the particle index. """
thetaArray = loadThetaArray(N, resultsDir, t)[0] logging.debug(thetaArray) paramList = [] for i in range(thetaArray.shape[0]): paramList.append((i, thetaArray[i, :])) pool = multiprocessing.Pool(multiprocessing.cpu_count()) resultIterator = pool.map(saveStats, paramList) #resultIterator = map(saveStats, paramList) pool.terminate() #Now save the statistics on the target graph times = numpy.arange(startDate, endDate+1, recordStep) vertexArray, infectedIndices, removedIndices, contactGraphStats, removedGraphStats, finalRemovedDegrees = HIVModelUtils.generateStatistics(targetGraph, times) stats = vertexArray, infectedIndices, removedIndices, contactGraphStats, removedGraphStats, finalRemovedDegrees resultsFileName = outputDir + "IdealStats.pkl" Util.savePickle(stats, resultsFileName) else: import matplotlib matplotlib.use("GTK3Agg") import matplotlib.pyplot as plt plotStyles = ['k-', 'kx-', 'k+-', 'k.-', 'k*-'] N, resultsDir, outputDir, recordStep, startDate, endDate, prefix, targetGraph, breakSize, numEpsilons, M, matchAlpha, matchAlg, numInds = loadParams(0) inds = range(numInds) numRecordSteps = int((endDate-startDate)/recordStep)+1
def run(): times, infectedIndices, removedIndices, graph = HIVModelUtils.simulate(theta, startDate, endDate, recordStep, M, graphMetrics) print("Mean distance " + str(graphMetrics.meanDistance()))
from wallhack.viroscopy.model.HIVEpidemicModel import HIVEpidemicModel from wallhack.viroscopy.model.HIVRates import HIVRates from wallhack.viroscopy.model.HIVModelUtils import HIVModelUtils """ This is the epidemic model for the HIV spread in cuba. We repeat the simulation a number of times and average the results. The purpose is to test the ABC model selection by using a known value of theta. """ logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) numpy.seterr(all="raise") numpy.random.seed(24) numpy.set_printoptions(suppress=True, precision=4, linewidth=100) startDate, endDate, recordStep, M = HIVModelUtils.toySimulationParams(False, test=True) numRepetitions = 10 undirected = True outputDir = PathDefaults.getOutputDir() + "viroscopy/toy/" theta, sigmaTheta, purtTheta = HIVModelUtils.toyTheta() graphList = [] numInfected = numpy.zeros(numRepetitions) numRemoved = numpy.zeros(numRepetitions) for j in range(numRepetitions): graph = HIVGraph(M, undirected) logging.debug("Created graph: " + str(graph)) alpha = 2
matplotlib.use("GTK3Agg") import matplotlib.pyplot as plt """ This is the epidemic model for the HIV spread in cuba. Let's try to get an exponential infection. """ assert False, "Must run with -O flag" logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) numpy.seterr(all='raise') numpy.random.seed(24) numpy.set_printoptions(suppress=True, precision=4, linewidth=100) startDate, endDate, recordStep, M, targetGraph = HIVModelUtils.realSimulationParams() M = 100 endDate = startDate + 1000 meanTheta, sigmaTheta = HIVModelUtils.estimatedRealTheta() meanTheta = numpy.array([ 1, 0.1, 0.0, 0.00, 0.5, 0.1]) outputDir = PathDefaults.getOutputDir() + "viroscopy/" undirected = True graph = HIVGraph(M, undirected) logging.info("Created graph: " + str(graph)) alpha = 2 zeroVal = 0.9 p = Util.powerLawProbs(alpha, zeroVal) hiddenDegSeq = Util.randomChoice(p, graph.getNumVertices())
from wallhack.viroscopy.model.HIVGraphMetrics2 import HIVGraphMetrics2 import matplotlib.pyplot as plt """ This is the epidemic model for the HIV spread in cuba. We want to see how different graphs can get under the same params but different seeds. """ assert False, "Must run with -O flag" logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) numpy.seterr(all='raise') numpy.random.seed(24) numpy.set_printoptions(suppress=True, precision=4, linewidth=100) startDate, endDate, recordStep, M, targetGraph = HIVModelUtils.toySimulationParams() meanTheta, sigmaTheta = HIVModelUtils.toyTheta() #In this case, we'll extend the end date endDate *= 2 epsilon = 5.0 reps = 10 graphDists = [] removedArray = [] biArray = [] maleArray = [] femaleArray = [] for i in range(reps):
assert False, "Must run with -O flag" if len(sys.argv) > 1: numProcesses = int(sys.argv[1]) else: numProcesses = multiprocessing.cpu_count() FORMAT = "%(levelname)s:root:%(process)d:%(message)s" logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format=FORMAT) logging.debug("Number of processes: " + str(numProcesses)) numpy.set_printoptions(suppress=True, precision=4, linewidth=150) numpy.seterr(invalid='raise') resultsDir = PathDefaults.getOutputDir() + "viroscopy/real/" startDates, endDates, numRecordSteps, M, targetGraph = HIVModelUtils.realSimulationParams() N, matchAlpha, breakScale, numEpsilons, epsilon, minEpsilon, matchAlg, abcMaxRuns, batchSize = HIVModelUtils.realABCParams() logging.debug("Posterior sample size " + str(N)) alpha = 2 zeroVal = 0.9 for i, endDate in enumerate(endDates): startDate = startDates[i] logging.debug("="*10 + "Starting new simulation batch with index " + str(i) + "="*10) logging.debug("Total time of simulation is " + str(endDate-startDate)) breakSize = targetGraph.subgraph(targetGraph.removedIndsAt(endDate)).size * breakScale logging.debug("Largest acceptable graph is " + str(breakSize))
assert False, "Must run with -O flag" if len(sys.argv) > 1: numProcesses = int(sys.argv[1]) else: numProcesses = multiprocessing.cpu_count() FORMAT = "%(levelname)s:root:%(process)d:%(message)s" logging.basicConfig(stream=sys.stdout, level=logging.DEBUG, format=FORMAT) logging.debug("Number of processes: " + str(numProcesses)) numpy.set_printoptions(suppress=True, precision=4, linewidth=150) numpy.seterr(invalid='raise') resultsDir = PathDefaults.getOutputDir() + "viroscopy/toy/" startDate, endDate, recordStep, M, targetGraph = HIVModelUtils.toySimulationParams() N, matchAlpha, breakScale, numEpsilons, epsilon, minEpsilon, matchAlg, abcMaxRuns, batchSize, pertScale = HIVModelUtils.toyABCParams() logging.debug("Total time of simulation is " + str(endDate-startDate)) logging.debug("Posterior sample size " + str(N)) epsilonArray = numpy.ones(numEpsilons)*epsilon breakSize = (targetGraph.subgraph(targetGraph.removedIndsAt(endDate)).size - targetGraph.subgraph(targetGraph.removedIndsAt(startDate)).size) * breakScale logging.debug("Largest acceptable graph is " + str(breakSize)) def createModel(t): """ The parameter t is the particle index. """ return HIVModelUtils.createModel(targetGraph, startDate, endDate, recordStep, M, matchAlpha, breakSize, matchAlg)