def saveStats(args):
    i, theta, startDate, endDate, recordStep = args 
    
    resultsFileName = outputDir + "SimStats" + str(i) + ".pkl"
    
    try:
        with open(resultsFileName) as f: pass
    except IOError as e:
        featureInds= numpy.ones(targetGraph.vlist.getNumFeatures(), numpy.bool)
        featureInds[HIVVertices.dobIndex] = False 
        featureInds[HIVVertices.infectionTimeIndex] = False 
        featureInds[HIVVertices.hiddenDegreeIndex] = False 
        featureInds[HIVVertices.stateIndex] = False 
        featureInds = numpy.arange(featureInds.shape[0])[featureInds]        
        
        matcher = GraphMatch("PATH", alpha=0.5, featureInds=featureInds, useWeightM=False)
        graphMetrics = HIVGraphMetrics2(targetGraph, 1.0, matcher, float(endDate))        
        
        times, infectedIndices, removedIndices, graph = HIVModelUtils.simulate(thetaArray[i], startDate, endDate, recordStep, M, graphMetrics)
        times, vertexArray, removedGraphStats = HIVModelUtils.generateStatistics(graph, startDate, endDate, recordStep)
    
        stats = times, vertexArray, removedGraphStats, graphMetrics.dists, graphMetrics.graphDists, graphMetrics.labelDists
        
        
        Util.savePickle(stats, resultsFileName)
예제 #2
0
    def saveResult(self, X, Y, learner, fileName):
        """
        Save a single result to file, checking if the results have already been computed
        """
        fileBaseName, sep, ext = fileName.rpartition(".")
        lockFileName = fileBaseName + ".lock"
        gc.collect()

        if not os.path.isfile(fileName) and not os.path.isfile(lockFileName):
            try:
                lockFile = open(lockFileName, 'w')
                lockFile.close()
                logging.debug("Created lock file " + lockFileName)

                logging.debug("Computing file " + fileName)
                logging.debug(learner)
                (bestParams, allMetrics, bestMetaDicts) = learner.evaluateCvOuter(X, Y, self.folds)
                cvResults = {"bestParams":bestParams, "allMetrics":allMetrics, "metaDicts":bestMetaDicts}
                Util.savePickle(cvResults, fileName)
                
                os.remove(lockFileName)
                logging.debug("Deleted lock file " + lockFileName)
            except:
                logging.debug("Caught an error in the code ... skipping")
                raise
        else:
            logging.debug("File exists, or is locked: " + fileName)
    def save(self, filename):
        """
        Save this object to filename.nvl.

        :param filename: The name of the file to save to.
        :type filename: :class:`str`

        :returns: The name of the saved file including extension.
        """
        Util.savePickle(self.V, filename + self.ext, overwrite=True)
        return filename + self.ext
    def save(self, filename):
        """
        Save this object to filename.nvl.

        :param filename: The name of the file to save to.
        :type filename: :class:`str`

        :returns: The name of the saved file including extension.
        """
        Util.savePickle(self.V, filename + self.ext, overwrite=True)
        return filename + self.ext
예제 #5
0
    def coauthorsGraph(self, field, relevantExperts): 
        """
        Using the relevant authors we find all coauthors. 
        """  
        if not os.path.exists(self.getCoauthorsFilename(field)) or self.overwriteGraph: 
            logging.debug("Finding coauthors of relevant experts")
            graph, authorIndexer = self.coauthorsGraphFromAuthors(set(relevantExperts))
            logging.debug(graph.summary())
            Util.savePickle([graph, authorIndexer], self.getCoauthorsFilename(field), debug=True)
        else: 
            logging.debug("Files already generated: " + self.getCoauthorsFilename(field))  

        graph, authorIndexer = Util.loadPickle(self.getCoauthorsFilename(field))
        return graph, authorIndexer 
예제 #6
0
    def save(self, filename):
        """
        Save the graph object to the corresponding filename under the .zip extension. The
        adjacency matrix is stored in matrix market format and the AbstractVertexList
        decides how to store the vertex labels. 

        :param filename: The name of the file to save.
        :type filename: :class:`str`

        :returns: The name of the saved zip file.
        """
        Parameter.checkClass(filename, str)
        import zipfile
        
        (path, filename) = os.path.split(filename)
        if path == "":
            path = "./"        
        
        tempPath = tempfile.mkdtemp()

        originalPath = os.getcwd()
        try:
            os.chdir(tempPath)

            self.saveMatrix(self.W, self._wFilename)
            vListFilename = self.vList.save(self._verticesFilename)

            metaDict = {}
            metaDict["version"] = apgl.__version__
            metaDict["undirected"] = self.undirected
            metaDict["vListType"] = self.vList.__class__.__name__
            Util.savePickle(metaDict, self._metaFilename)

            myzip = zipfile.ZipFile(filename + '.zip', 'w')
            myzip.write(self._wFilename)
            myzip.write(vListFilename)
            myzip.write(self._metaFilename)
            myzip.close()

            os.remove(self._wFilename)
            os.remove(vListFilename)
            os.remove(self._metaFilename)
            
            shutil.move(filename + ".zip", path + "/" + filename + '.zip')
        finally:
            os.chdir(originalPath)
            
        os.rmdir(tempPath)
            
        return path + "/" + filename + '.zip'
def computeConfigScalarStats():
    logging.info("Computing configuration model scalar stats")

    graphFileNameBase = resultsDir + "ConfigInfectGraph"
    resultsFileNameBase = resultsDir + "ConfigInfectGraphScalarStats"

    for j in range(numConfigGraphs):
        resultsFileName = resultsFileNameBase + str(j)

        if not os.path.isfile(resultsFileName):
            configGraph = SparseGraph.load(graphFileNameBase + str(j))
            statsArray = graphStats.sequenceScalarStats(configGraph, subgraphIndicesList, slowStats, treeStats=True)
            Util.savePickle(statsArray, resultsFileName, True)
            gc.collect()

    logging.info("All done")
예제 #8
0
 def computeLDA(self):
     if not os.path.exists(self.modelFilename) or self.overwriteModel:
         self.vectoriseDocuments()
         self.loadVectoriser()
         corpus = gensim.corpora.mmcorpus.MmCorpus(self.docTermMatrixFilename + ".mtx")
         id2WordDict = dict(zip(range(len(self.vectoriser.get_feature_names())), self.vectoriser.get_feature_names()))   
         
         logging.getLogger('gensim').setLevel(logging.INFO)
         lda = LdaModel(corpus, num_topics=self.k, id2word=id2WordDict, chunksize=self.chunksize, distributed=False) 
         #index = gensim.similarities.docsim.SparseMatrixSimilarity(lda[corpus], num_features=self.k) 
         index = gensim.similarities.docsim.Similarity(self.indexFilename, lda[corpus], num_features=self.k)            
         
         Util.savePickle([lda, index], self.modelFilename, debug=True)
         gc.collect()
     else: 
         logging.debug("File already exists: " + self.modelFilename)
def computeConfigVectorStats():
    #Note: We can make this multithreaded
    logging.info("Computing configuration model vector stats")

    graphFileNameBase = resultsDir + "ConfigInfectGraph"
    resultsFileNameBase = resultsDir + "ConfigInfectGraphVectorStats"

    for j in range(numConfigGraphs):
        resultsFileName = resultsFileNameBase + str(j)

        if not os.path.isfile(resultsFileName):
            configGraph = SparseGraph.load(graphFileNameBase + str(j))
            statsDictList = graphStats.sequenceVectorStats(configGraph, subgraphIndicesList2, eigenStats=False, treeStats=True)
            Util.savePickle(statsDictList, resultsFileName, False)
            gc.collect()

    logging.info("All done")
def plotOtherStats():
    #Let's look at geodesic distances in subgraphs and communities
    logging.info("Computing other stats")

    resultsFileName = resultsDir + "ContactGrowthOtherStats.pkl"
    hivGraphStats = HIVGraphStatistics(fInds)

    if saveResults:
        statsArray = hivGraphStats.sequenceScalarStats(sGraph, subgraphIndicesList)
        #statsArray["dayList"] = absDayList
        Util.savePickle(statsArray, resultsFileName, True)
    else:
        statsArray = Util.loadPickle(resultsFileName)
        #Just load the harmonic geodesic distances of the full graph 
        resultsFileName = resultsDir + "ContactGrowthScalarStats.pkl"
        statsArray2 = Util.loadPickle(resultsFileName)

        global plotInd

        msmGeodesic = statsArray[:, hivGraphStats.msmGeodesicIndex]
        msmGeodesic[msmGeodesic < 0] = 0
        msmGeodesic[msmGeodesic == float('inf')] = 0

        #Output all the results into plots
        plt.figure(plotInd)
        plt.plot(absDayList, msmGeodesic, 'k-', absDayList, statsArray[:, hivGraphStats.mostConnectedGeodesicIndex], 'k--')
        plt.xticks(locs, labels)
        #plt.ylim([0, 0.1])
        plt.xlabel("Year")
        plt.ylabel("Mean harmonic geodesic distance")
        plt.legend(("MSM individuals", "Top 10% degree"), loc="upper right")
        plt.savefig(figureDir + "MSM10Geodesic" + ".eps")
        plotInd += 1


        plt.figure(plotInd)
        plt.plot(absDayList, statsArray2[:, graphStats.harmonicGeoDistanceIndex], 'k-', absDayList, statsArray[:, hivGraphStats.menSubgraphGeodesicIndex], 'k--')
        plt.xticks(locs, labels)
        plt.ylim([0, 200.0])
        plt.xlabel("Year")
        plt.ylabel("Mean harmonic geodesic distance")
        plt.legend(("All individuals", "Men subgraph"), loc="upper right")
        plt.savefig(figureDir + "MenSubgraphGeodesic" + ".eps")
        plotInd += 1
예제 #11
0
def saveStats(args):    
    i, theta = args 
    
    resultsFileName = outputDir + "SimStats" + str(i) + ".pkl"
    lock = FileLock(resultsFileName)
    
    if not lock.fileExists() and not lock.isLocked():    
        lock.lock()
         
        model = HIVModelUtils.createModel(targetGraph, startDate, endDate, recordStep, M, matchAlpha, breakSize, matchAlg, theta=thetaArray[i])
        times, infectedIndices, removedIndices, graph, compTimes, graphMetrics = HIVModelUtils.simulate(model)
        times = numpy.arange(startDate, endDate+1, recordStep)
        vertexArray, infectedIndices, removedIndices, contactGraphStats, removedGraphStats, finalRemovedDegrees = HIVModelUtils.generateStatistics(graph, times)
        stats = times, vertexArray, infectedIndices, removedGraphStats, finalRemovedDegrees, graphMetrics.objectives, compTimes
        
        Util.savePickle(stats, resultsFileName)
        lock.unlock()
    else: 
        logging.debug("Results already computed: " + str(resultsFileName))
예제 #12
0
 def computeLSI(self):
     """
     Compute using the LSI version in gensim 
     """
     if not os.path.exists(self.modelFilename) or self.overwriteModel:
         self.vectoriseDocuments()
         self.loadVectoriser()
         #X = scipy.io.mmread(self.docTermMatrixFilename)
         #corpus = gensim.matutils.MmReader(self.docTermMatrixFilename + ".mtx", True)
         #corpus = gensim.matutils.Sparse2Corpus(X, documents_columns=False)
         corpus = gensim.corpora.mmcorpus.MmCorpus(self.docTermMatrixFilename + ".mtx")
         id2WordDict = dict(zip(range(len(self.vectoriser.get_feature_names())), self.vectoriser.get_feature_names()))   
         
         logging.getLogger('gensim').setLevel(logging.INFO)
         lsi = LsiModel(corpus, num_topics=self.k, id2word=id2WordDict, chunksize=self.chunksize, distributed=False) 
         index = gensim.similarities.docsim.Similarity(self.indexFilename, lsi[corpus], num_features=self.k)          
         
         Util.savePickle([lsi, index], self.modelFilename, debug=True)
         gc.collect()
     else: 
         logging.debug("File already exists: " + self.modelFilename)   
예제 #13
0
    def saveResult(self, X, Y, indexList, splitFunction, learnerIterator, metricMethods, fileName, labelIndex, standardiserY):

        gc.collect()

        try:
            if not os.path.isfile(fileName):
                logging.debug("Computing file " + fileName)
                allMetrics, bestLearners = AbstractPredictor.evaluateLearners(X, Y, indexList, splitFunction, learnerIterator, metricMethods)
                rankMetrics = self.computeRankMetrics(X, Y, indexList, bestLearners, standardiserY, labelIndex)

                #Create objects we can serialise
                paramStrList = []
                for bestLearner in bestLearners:
                    paramStrList.append(str(bestLearner))

                Util.savePickle((allMetrics, rankMetrics, paramStrList), fileName)
            else:
                logging.debug("File exists: " + fileName)
        except:
            logging.debug("Caught an error in the code ... skipping")
            raise
예제 #14
0
 def vectoriseDocuments(self):
     """
     We want to go through the dataset and vectorise all the title+abstracts.
     The results are saved in TDIDF format in a matrix X. 
     """
     if not os.path.exists(self.docTermMatrixFilename + ".mtx") or not os.path.exists(self.authorListFilename) or not os.path.exists(self.vectoriserFilename) or self.overwriteVectoriser:
         logging.debug("Vectorising documents")            
         
         authorList, documentList, citationList = self.readAuthorsAndDocuments()
         Util.savePickle(authorList, self.authorListFilename, debug=True)
         Util.savePickle(citationList, self.citationListFilename, debug=True)
         
         #vectoriser = text.HashingVectorizer(ngram_range=(1,2), binary=self.binary, norm="l2", stop_words="english", tokenizer=PorterTokeniser(), dtype=numpy.float)
         
         #if self.tfidf: 
         logging.debug("Generating TFIDF features")
         vectoriser = text.TfidfVectorizer(min_df=self.minDf, ngram_range=(1,self.ngram), binary=self.binary, sublinear_tf=self.sublinearTf, norm="l2", max_df=0.95, stop_words="english", tokenizer=PorterTokeniser(), max_features=self.numFeatures, dtype=numpy.float)
         #else: 
         #    logging.debug("Generating bag of word features")
         #    vectoriser = text.CountVectorizer(min_df=self.minDf, ngram_range=(1,self.ngram), binary=False, max_df=0.95, stop_words="english", max_features=self.numFeatures, dtype=numpy.float, tokenizer=PorterTokeniser())            
         
         X = vectoriser.fit_transform(documentList)
         del documentList
         scipy.io.mmwrite(self.docTermMatrixFilename, X)
         logging.debug("Wrote X with shape " + str(X.shape) + " and " + str(X.nnz) + " nonzeros to file " + self.docTermMatrixFilename + ".mtx")
         del X 
             
         #Save vectoriser - note that we can't pickle the tokeniser so it needs to be reset when loaded 
         vectoriser.tokenizer = None 
         Util.savePickle(vectoriser, self.vectoriserFilename, debug=True) 
         del vectoriser  
         gc.collect()
     else: 
         logging.debug("Author list, document-term matrix and vectoriser already generated: ")   
예제 #15
0
def plotTreeStats():
    logging.info("Computing tree stats")
    resultsFileName = resultsDir + "InfectGrowthTreeStats.pkl"

    if saveResults:
        statsDictList = []

        for j in range(len(subgraphIndicesList2)):
            Util.printIteration(j, 1, len(subgraphIndicesList2))
            subgraphIndices = subgraphIndicesList2[j]
            subgraph = sGraph.subgraph(subgraphIndices)
            logging.info("Finding trees")
            trees = subgraph.findTrees()
            logging.info("Computing tree statistics")
            statsDict = {}

            locationEntropy = []
            orientEntropy = []
            detectionRanges = []

            for i in range(len(trees)):
                if len(trees[i]) > 1:
                    treeGraph = subgraph.subgraph(trees[i])
                    vertexArray = treeGraph.getVertexList().getVertices(list(range(treeGraph.getNumVertices())))
                    
                    locationEntropy.append(Util.entropy(vertexArray[:, locationIndex]))
                    orientEntropy.append(Util.entropy(vertexArray[:, orientationIndex]))
                    
                    detections = vertexArray[:, detectionIndex]
                    detectionRanges.append(numpy.max(detections) - numpy.min(detections))

            statsDict["locationEnt"] = numpy.array(locationEntropy)
            statsDict["orientEnt"] = numpy.array(orientEntropy)
            statsDict["detectRanges"] = numpy.array(detectionRanges)
            statsDictList.append(statsDict)

        Util.savePickle(statsDictList, resultsFileName, True)
    else:
        statsDictList = Util.loadPickle(resultsFileName)
        
        locBins = numpy.arange(0, 2.4, 0.2)
        detectBins = numpy.arange(0, 6500, 500)
        locationEntDists = []
        orientEntDists = []
        detectionDists = [] 

        for j in range(0, len(dayList2)):
            dateStr = (str(DateUtils.getDateStrFromDay(dayList2[j], startYear)))
            logging.info(dateStr)
            statsDict = statsDictList[j]
            plotInd2 = plotInd

            locationEntDists.append(statsDict["locationEnt"])
            orientEntDists.append(statsDict["orientEnt"])
            detectionDists.append(statsDict["detectRanges"])

        #for j in range(len(orientEntDists)):
        #    print(numpy.sum(numpy.histogram(orientEntDists[j])[0]))
        #    print(numpy.histogram(orientEntDists[j])[0]/float(orientEntDists[j].shape[0]))

        dateStrs = [DateUtils.getDateStrFromDay(dayList2[i], startYear) for i in range(1, len(dayList2))]

        plt.figure(plotInd2)
        histOut = plt.hist(locationEntDists, locBins, normed=True)
        plt.xlabel("Location Entropy")
        plt.ylabel("Probability Density")
        plt.savefig(figureDir + "LocationEnt" +  ".eps")
        #plt.legend()
        plotInd2 += 1

        plt.figure(plotInd2)
        histOut = plt.hist(orientEntDists, normed=True)
        plt.xlabel("Orientation Entropy")
        plt.ylabel("Probability Density")
        plt.savefig(figureDir + "OrientEnt" +  ".eps")
        #plt.legend()
        plotInd2 += 1

        plt.figure(plotInd2)
        histOut = plt.hist(detectionDists, detectBins, normed=True)
        plt.xlabel("Detection Range (days)")
        plt.ylabel("Probability Density")
        plt.savefig(figureDir + "DetectionRanges" +  ".eps")
        #plt.legend()
        plotInd2 += 1
def plotVectorStats():
    #Finally, compute some vector stats at various points in the graph
    logging.info("Computing vector stats")
    global plotInd
    resultsFileName = resultsDir + "ContactGrowthVectorStats.pkl"

    if saveResults:
        statsDictList = graphStats.sequenceVectorStats(sGraph, subgraphIndicesList2)
        Util.savePickle(statsDictList, resultsFileName, False)
    else:
        statsDictList = Util.loadPickle(resultsFileName)

        #Load up configuration model results
        configStatsDictList = []
        resultsFileNameBase = resultsDir + "ConfigGraphVectorStats"

        for j in range(numConfigGraphs):
            resultsFileName = resultsFileNameBase + str(j)
            configStatsDictList.append(Util.loadPickle(resultsFileName))

        #Now need to take mean of 1st element of list
        meanConfigStatsDictList = configStatsDictList[0]
        for i in range(len(configStatsDictList[0])):
            for k in range(1, numConfigGraphs):
                for key in configStatsDictList[k][i].keys():
                    if configStatsDictList[k][i][key].shape[0] > meanConfigStatsDictList[i][key].shape[0]:
                        meanConfigStatsDictList[i][key] = numpy.r_[meanConfigStatsDictList[i][key], numpy.zeros(configStatsDictList[k][i][key].shape[0] - meanConfigStatsDictList[i][key].shape[0])]
                    elif configStatsDictList[k][i][key].shape[0] < meanConfigStatsDictList[i][key].shape[0]:
                        configStatsDictList[k][i][key] = numpy.r_[configStatsDictList[k][i][key], numpy.zeros(meanConfigStatsDictList[i][key].shape[0] - configStatsDictList[k][i][key].shape[0])]

                    meanConfigStatsDictList[i][key] += configStatsDictList[k][i][key]

            for key in configStatsDictList[0][i].keys():
                meanConfigStatsDictList[i][key] = meanConfigStatsDictList[i][key]/numConfigGraphs


        triangleDistArray = numpy.zeros((len(dayList2), 100))
        configTriangleDistArray = numpy.zeros((len(dayList2), 100))
        hopPlotArray = numpy.zeros((len(dayList2), 27))
        configHopPlotArray = numpy.zeros((len(dayList2), 30))
        componentsDistArray = numpy.zeros((len(dayList2), 3000))
        configComponentsDistArray = numpy.zeros((len(dayList2), 3000))
        numVerticesEdgesArray = numpy.zeros((len(dayList2), 2), numpy.int)
        numVerticesEdgesArray[:, 0] = [len(sgl) for sgl in subgraphIndicesList2]
        numVerticesEdgesArray[:, 1] = [sGraph.subgraph(sgl).getNumEdges() for sgl in subgraphIndicesList2]

        binWidths = numpy.arange(0, 0.50, 0.05)
        eigVectorDists = numpy.zeros((len(dayList2), binWidths.shape[0]-1), numpy.int)

        femaleSums = numpy.zeros(len(dayList2))
        maleSums = numpy.zeros(len(dayList2))
        heteroSums = numpy.zeros(len(dayList2))
        biSums = numpy.zeros(len(dayList2))

        contactSums = numpy.zeros(len(dayList2))
        nonContactSums = numpy.zeros(len(dayList2))
        donorSums = numpy.zeros(len(dayList2))
        randomTestSums = numpy.zeros(len(dayList2))
        stdSums = numpy.zeros(len(dayList2))
        prisonerSums = numpy.zeros(len(dayList2))
        recommendSums = numpy.zeros(len(dayList2))
        
        meanAges = numpy.zeros(len(dayList2))
        degrees = numpy.zeros((len(dayList2), 20))

        provinces = numpy.zeros((len(dayList2), 15))

        havanaSums = numpy.zeros(len(dayList2))
        villaClaraSums = numpy.zeros(len(dayList2))
        pinarSums = numpy.zeros(len(dayList2))
        holguinSums = numpy.zeros(len(dayList2))
        habanaSums = numpy.zeros(len(dayList2))
        sanctiSums = numpy.zeros(len(dayList2))

        meanDegrees = numpy.zeros(len(dayList2))
        stdDegrees = numpy.zeros(len(dayList2))

        #Note that death has a lot of missing values
        for j in range(len(dayList2)):
            dateStr = (str(DateUtils.getDateStrFromDay(dayList2[j], startYear)))
            logging.info(dateStr)
            statsDict = statsDictList[j]
            configStatsDict = meanConfigStatsDictList[j]

            degreeDist = statsDict["outDegreeDist"]
            degreeDist = degreeDist/float(numpy.sum(degreeDist))
            #Note that degree distribution for configuration graph will be identical 

            eigenDist = statsDict["eigenDist"]
            eigenDist = numpy.log(eigenDist[eigenDist>=10**-1])
            #configEigenDist = configStatsDict["eigenDist"]
            #configEigenDist = numpy.log(configEigenDist[configEigenDist>=10**-1])

            hopCount = statsDict["hopCount"]
            hopCount = numpy.log10(hopCount)
            hopPlotArray[j, 0:hopCount.shape[0]] = hopCount
            configHopCount = configStatsDict["hopCount"]
            configHopCount = numpy.log10(configHopCount)
            #configHopPlotArray[j, 0:configHopCount.shape[0]] = configHopCount

            triangleDist = statsDict["triangleDist"]
            #triangleDist = numpy.array(triangleDist, numpy.float64)/numpy.sum(triangleDist)
            triangleDist = numpy.array(triangleDist, numpy.float64)
            triangleDistArray[j, 0:triangleDist.shape[0]] = triangleDist
            configTriangleDist = configStatsDict["triangleDist"]
            configTriangleDist = numpy.array(configTriangleDist, numpy.float64)/numpy.sum(configTriangleDist)
            configTriangleDistArray[j, 0:configTriangleDist.shape[0]] = configTriangleDist

            maxEigVector = statsDict["maxEigVector"]
            eigenvectorInds = numpy.flipud(numpy.argsort(numpy.abs(maxEigVector)))
            top10eigenvectorInds = eigenvectorInds[0:numpy.round(eigenvectorInds.shape[0]/10.0)]
            maxEigVector = numpy.abs(maxEigVector[eigenvectorInds])
            #print(maxEigVector)
            eigVectorDists[j, :] = numpy.histogram(maxEigVector, binWidths)[0]

            componentsDist = statsDict["componentsDist"]
            componentsDist = numpy.array(componentsDist, numpy.float64)/numpy.sum(componentsDist)
            componentsDistArray[j, 0:componentsDist.shape[0]] = componentsDist
            configComponentsDist = configStatsDict["componentsDist"]
            configComponentsDist = numpy.array(configComponentsDist, numpy.float64)/numpy.sum(configComponentsDist)
            configComponentsDistArray[j, 0:configComponentsDist.shape[0]] = configComponentsDist

            plotInd2 = plotInd

            plt.figure(plotInd2)
            plt.plot(numpy.arange(degreeDist.shape[0]), degreeDist, plotStyles2[j], label=dateStr)
            plt.xlabel("Degree")
            plt.ylabel("Probability")
            plt.ylim((0, 0.5))
            plt.savefig(figureDir + "DegreeDist" +  ".eps")
            plt.legend()
            plotInd2 += 1

            """
            plt.figure(plotInd2)
            plt.plot(numpy.arange(eigenDist.shape[0]), eigenDist, label=dateStr)
            plt.xlabel("Eigenvalue rank")
            plt.ylabel("log(Eigenvalue)")
            plt.savefig(figureDir + "EigenDist" +  ".eps")
            plt.legend()
            plotInd2 += 1
            """

            #How does kleinberg do the hop plots 
            plt.figure(plotInd2)
            plt.plot(numpy.arange(hopCount.shape[0]), hopCount, plotStyles[j], label=dateStr)
            plt.xlabel("k")
            plt.ylabel("log10(pairs)")
            plt.ylim( (2.5, 7) )
            plt.legend(loc="lower right")
            plt.savefig(figureDir + "HopCount" + ".eps")
            plotInd2 += 1
            
            plt.figure(plotInd2)
            plt.plot(numpy.arange(maxEigVector.shape[0]), maxEigVector, plotStyles2[j], label=dateStr)
            plt.xlabel("Rank")
            plt.ylabel("log(eigenvector coefficient)")
            plt.savefig(figureDir + "MaxEigVector" +  ".eps")
            plt.legend()
            plotInd2 += 1

            #Compute some information the 10% most central vertices
            
            subgraphIndices = numpy.nonzero(detections <= dayList2[j])[0]
            subgraph = sGraph.subgraph(subgraphIndices)
            subgraphVertexArray = subgraph.getVertexList().getVertices()

            femaleSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, genderIndex]==1)
            maleSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, genderIndex]==0)
            heteroSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, orientationIndex]==0)
            biSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, orientationIndex]==1)

            contactSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, contactIndex])
            donorSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, donorIndex])
            randomTestSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, randomTestIndex])
            stdSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, stdIndex])
            prisonerSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, prisonerIndex])
            recommendSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, doctorIndex])

            meanAges[j] = numpy.mean(subgraphVertexArray[top10eigenvectorInds, detectionIndex] - subgraphVertexArray[top10eigenvectorInds, dobIndex])/daysInYear

            havanaSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, havanaIndex])
            villaClaraSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, villaClaraIndex])
            pinarSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, pinarIndex])
            holguinSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, holguinIndex])
            habanaSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, habanaIndex])
            sanctiSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, sanctiIndex])

            provinces[j, :] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, 22:37], 0)

            ddist = numpy.bincount(subgraph.outDegreeSequence()[top10eigenvectorInds])
            degrees[j, 0:ddist.shape[0]] = numpy.array(ddist, numpy.float)/numpy.sum(ddist)

            meanDegrees[j] = numpy.mean(subgraph.outDegreeSequence()[top10eigenvectorInds])
            stdDegrees[j] = numpy.std(subgraph.outDegreeSequence()[top10eigenvectorInds])


            plt.figure(plotInd2)
            plt.plot(numpy.arange(degrees[j, :].shape[0]), degrees[j, :], plotStyles2[j], label=dateStr)
            plt.xlabel("Degree")
            plt.ylabel("Probability")
            #plt.ylim((0, 0.5))
            plt.savefig(figureDir + "DegreeDistCentral" +  ".eps")
            plt.legend()
            plotInd2 += 1

        precision = 4
        dateStrList = [DateUtils.getDateStrFromDay(day, startYear) for day in dayList2]

        print("Hop counts")
        print(Latex.listToRow(dateStrList))
        print(Latex.array2DToRows(hopPlotArray.T))

        print("\nHop counts for configuration graphs")
        print(Latex.listToRow(dateStrList))
        print(Latex.array2DToRows(configHopPlotArray.T))

        print("\n\nEdges and vertices")
        print((Latex.listToRow(dateStrList)))
        print((Latex.array2DToRows(numVerticesEdgesArray.T, precision)))

        print("\n\nEigenvector distribution")
        print((Latex.array1DToRow(binWidths[1:]) + "\\\\"))
        print((Latex.array2DToRows(eigVectorDists)))

        print("\n\nDistribution of component sizes")
        componentsDistArray = componentsDistArray[:, 0:componentsDist.shape[0]]
        nonZeroCols = numpy.sum(componentsDistArray, 0)!=0
        componentsDistArray = numpy.r_[numpy.array([numpy.arange(componentsDistArray.shape[1])[nonZeroCols]]), componentsDistArray[:, nonZeroCols]]
        print((Latex.listToRow(dateStrList)))
        print((Latex.array2DToRows(componentsDistArray.T, precision)))

        print("\n\nDistribution of component sizes in configuration graphs")
        configComponentsDistArray = configComponentsDistArray[:, 0:configComponentsDist.shape[0]]
        nonZeroCols = numpy.sum(configComponentsDistArray, 0)!=0
        configComponentsDistArray = numpy.r_[numpy.array([numpy.arange(configComponentsDistArray.shape[1])[nonZeroCols]]), configComponentsDistArray[:, nonZeroCols]]
        print((Latex.listToRow(dateStrList)))
        print((Latex.array2DToRows(configComponentsDistArray.T, precision)))

        print("\n\nDistribution of triangle participations")
        triangleDistArray = triangleDistArray[:, 0:triangleDist.shape[0]]
        nonZeroCols = numpy.sum(triangleDistArray, 0)!=0
        triangleDistArray = numpy.r_[numpy.array([numpy.arange(triangleDistArray.shape[1])[nonZeroCols]])/2, triangleDistArray[:, nonZeroCols]]
        print((Latex.listToRow(dateStrList)))
        print((Latex.array2DToRows(triangleDistArray.T, precision)))

        configTriangleDistArray = configTriangleDistArray[:, 0:configTriangleDist.shape[0]]
        nonZeroCols = numpy.sum(configTriangleDistArray, 0)!=0
        configTriangleDistArray = numpy.r_[numpy.array([numpy.arange(configTriangleDistArray.shape[1])[nonZeroCols]])/2, configTriangleDistArray[:, nonZeroCols]]
        configTriangleDistArray = numpy.c_[configTriangleDistArray, numpy.zeros((configTriangleDistArray.shape[0], triangleDistArray.shape[1]-configTriangleDistArray.shape[1]))]

        print("\n\nDistribution of central vertices")
        print((Latex.listToRow(dateStrList)))
        subgraphSizes = numpy.array(maleSums + femaleSums, numpy.float)
        print("Female & " + Latex.array1DToRow(femaleSums*100/subgraphSizes, 1) + "\\\\")
        print("Male & " + Latex.array1DToRow(maleSums*100/subgraphSizes, 1) + "\\\\")
        print("\hline")
        print("Heterosexual & " + Latex.array1DToRow(heteroSums*100/subgraphSizes, 1) + "\\\\")
        print("Bisexual & " + Latex.array1DToRow(biSums*100/subgraphSizes, 1) + "\\\\")
        print("\hline")
        print("Contact traced & " + Latex.array1DToRow(contactSums*100/subgraphSizes, 1) + "\\\\")
        print("Blood donor & " + Latex.array1DToRow(donorSums*100/subgraphSizes, 1) + "\\\\")
        print("RandomTest & " + Latex.array1DToRow(randomTestSums*100/subgraphSizes, 1) + "\\\\")
        print("STD & " + Latex.array1DToRow(stdSums*100/subgraphSizes, 1) + "\\\\")
        print("Prisoner & " + Latex.array1DToRow(prisonerSums*100/subgraphSizes, 1) + "\\\\")
        print("Doctor recommendation & " + Latex.array1DToRow(recommendSums*100/subgraphSizes, 1) + "\\\\")
        print("\hline")
        print("Mean ages (years) & " + Latex.array1DToRow(meanAges, 2) + "\\\\")
        print("\hline")
        print("Holguin & " + Latex.array1DToRow(holguinSums*100/subgraphSizes, 1) + "\\\\")
        print("La Habana & " + Latex.array1DToRow(habanaSums*100/subgraphSizes, 1) + "\\\\")
        print("Havana City & " + Latex.array1DToRow(havanaSums*100/subgraphSizes, 1) + "\\\\")
        print("Pinar del Rio & " + Latex.array1DToRow(pinarSums*100/subgraphSizes, 1) + "\\\\")
        print("Sancti Spiritus & " + Latex.array1DToRow(sanctiSums*100/subgraphSizes, 1) + "\\\\")
        print("Villa Clara & " + Latex.array1DToRow(villaClaraSums*100/subgraphSizes, 1) + "\\\\")
        print("\hline")
        print("Mean degrees & " + Latex.array1DToRow(meanDegrees, 2) + "\\\\")
        print("Std degrees & " + Latex.array1DToRow(stdDegrees, 2) + "\\\\")
        
        print("\n\nProvinces")
        print(Latex.array2DToRows(provinces))

        print("\n\nDegree distribution")
        print(Latex.array2DToRows(degrees))
def plotScalarStats():
    logging.info("Computing scalar stats")

    resultsFileName = resultsDir + "ContactGrowthScalarStats.pkl"

    if saveResults:
        statsArray = graphStats.sequenceScalarStats(sGraph, subgraphIndicesList, slowStats)
        Util.savePickle(statsArray, resultsFileName, True)

        #Now compute statistics on the configuration graphs 
    else:
        statsArray = Util.loadPickle(resultsFileName)

        #Take the mean of the results over the configuration model graphs
        resultsFileNameBase = resultsDir + "ConfigGraphScalarStats"
        numGraphs = len(subgraphIndicesList)
        #configStatsArrays = numpy.zeros((numGraphs, graphStats.getNumStats(), numConfigGraphs))
        configStatsArrays = numpy.zeros((numGraphs, graphStats.getNumStats()-2, numConfigGraphs))

        for j in range(numConfigGraphs):
            resultsFileName = resultsFileNameBase + str(j)
            configStatsArrays[:, :, j] = Util.loadPickle(resultsFileName)

        configStatsArray = numpy.mean(configStatsArrays, 2)
        configStatsStd =  numpy.std(configStatsArrays, 2)
        global plotInd

        def plotRealConfigError(index, styleReal, styleConfig, realLabel, configLabel):
            plt.hold(True)
            plt.plot(absDayList, statsArray[:, index], styleReal, label=realLabel)
            #errors = numpy.c_[configStatsArray[:, index]-configStatsMinArray[:, index] , configStatsMaxArray[:, index]-configStatsArray[:, index]].T
            errors = numpy.c_[configStatsStd[:, index], configStatsStd[:, index]].T
            plt.plot(absDayList, configStatsArray[:, index], styleConfig, label=configLabel)
            plt.errorbar(absDayList, configStatsArray[:, index], errors, linewidth=0, elinewidth=1, label="_nolegend_", ecolor="red")

            xmin, xmax = plt.xlim()
            plt.xlim((0, xmax))
            ymin, ymax = plt.ylim()
            plt.ylim((0, ymax))


        #Output all the results into plots
        plt.figure(plotInd)
        plt.hold(True)
        plotRealConfigError(graphStats.maxComponentSizeIndex, plotStyleBW[0], plotStyles4[0], "Max comp. vertices", "CM max comp. vertices")
        plotRealConfigError(graphStats.maxComponentEdgesIndex, plotStyleBW[1], plotStyles4[1], "Max comp. edges", "CM max comp. edges")
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("No. vertices/edges")
        plt.legend(loc="upper left")
        plt.savefig(figureDir + "MaxComponentSizeGrowth.eps")
        plotInd += 1

        for k in range(len(dayList)):
            day = dayList[k]
            print(str(DateUtils.getDateStrFromDay(day, startYear)) + ": " + str(statsArray[k, graphStats.maxComponentEdgesIndex]))
            #print(str(DateUtils.getDateStrFromDay(day, startYear)) + ": " + str(configStatsArray[k, graphStats.numComponentsIndex]))

        plt.figure(plotInd)
        plotRealConfigError(graphStats.numComponentsIndex, plotStyleBW[0], plotStyles4[0], "Size >= 1", "CM size >= 1")
        plotRealConfigError(graphStats.numNonSingletonComponentsIndex, plotStyleBW[1], plotStyles4[1], "Size >= 2", "CM size >= 2")
        plotRealConfigError(graphStats.numTriOrMoreComponentsIndex, plotStyleBW[2], plotStyles4[2], "Size >= 3", "CM size >= 3")

        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("No. components")
        plt.legend(loc="upper left")
        plt.savefig(figureDir + "NumComponentsGrowth.eps")
        plotInd += 1

        plt.figure(plotInd)
        plotRealConfigError(graphStats.meanComponentSizeIndex, plotStyleBW[0], plotStyles4[0], "Real graph", "CM")
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("Mean component size")
        plt.legend(loc="lower right")
        plt.savefig(figureDir + "MeanComponentSizeGrowth.eps")
        plotInd += 1

        plt.figure(plotInd)
        plotRealConfigError(graphStats.diameterIndex, plotStyleBW[0], plotStyles4[0], "Real graph", "CM")
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("Max component diameter")
        plt.legend(loc="lower right")
        plt.savefig(figureDir + "MaxComponentDiameterGrowth.eps")
        plotInd += 1

        plt.figure(plotInd)
        plotRealConfigError(graphStats.effectiveDiameterIndex, plotStyleBW[0], plotStyles4[0], "Real graph", "CM")
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("Effective diameter")
        plt.legend(loc="lower right")
        plt.savefig(figureDir + "MaxComponentEffDiameterGrowth.eps")
        plotInd += 1

        plt.figure(plotInd)
        plotRealConfigError(graphStats.meanDegreeIndex, plotStyleBW[0], plotStyles4[0], "All vertices", "CM all vertices")
        plotRealConfigError(graphStats.maxCompMeanDegreeIndex, plotStyleBW[1], plotStyles4[1], "Max component", "CM max component")
        #plt.plot(absDayList, statsArray[:, graphStats.meanDegreeIndex], plotStyleBW[0], absDayList, statsArray[:, graphStats.maxCompMeanDegreeIndex], plotStyleBW[1], absDayList, configStatsArray[:, graphStats.meanDegreeIndex], plotStyles4[0], absDayList, configStatsArray[:, graphStats.maxCompMeanDegreeIndex], plotStyles4[1])
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("Mean degree")
        plt.legend(loc="lower right")
        plt.savefig(figureDir + "MeanDegrees.eps")
        plotInd += 1

        plt.figure(plotInd)
        plotRealConfigError(graphStats.densityIndex, plotStyleBW[0], plotStyles4[0], "Real Graph", "Config Model")
        #plt.plot(absDayList, statsArray[:, graphStats.densityIndex], plotStyleBW[0], absDayList, configStatsArray[:, graphStats.densityIndex], plotStyles4[0])
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("Density")
        plt.legend()
        plt.savefig(figureDir + "DensityGrowth.eps")
        plotInd += 1

        plt.figure(plotInd)
        plt.plot(absDayList, statsArray[:, graphStats.powerLawIndex], plotStyleBW[0])
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("Alpha")
        plt.savefig(figureDir + "PowerLawGrowth.eps")
        plotInd += 1

        plt.figure(plotInd)
        plotRealConfigError(graphStats.geodesicDistanceIndex, plotStyleBW[0], plotStyles4[0], "Real Graph", "Config Model")
        #plt.plot(absDayList, statsArray[:, graphStats.geodesicDistanceIndex], plotStyleBW[0], absDayList, configStatsArray[:, graphStats.geodesicDistanceIndex], plotStyles4[0])
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("Geodesic distance")
        plt.legend(loc="lower right")
        plt.savefig(figureDir + "GeodesicGrowth.eps")
        plotInd += 1

        plt.figure(plotInd)
        plotRealConfigError(graphStats.harmonicGeoDistanceIndex, plotStyleBW[0], plotStyles4[0], "Real Graph", "Config Model")
        #plt.plot(absDayList, statsArray[:, graphStats.harmonicGeoDistanceIndex], plotStyleBW[0], absDayList, configStatsArray[:, graphStats.harmonicGeoDistanceIndex], plotStyles4[0])
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("Mean harmonic geodesic distance")
        plt.legend(loc="upper right")
        plt.savefig(figureDir + "HarmonicGeodesicGrowth.eps")
        plotInd += 1

        #print(statsArray[:, graphStats.harmonicGeoDistanceIndex])

        plt.figure(plotInd)
        plotRealConfigError(graphStats.geodesicDistMaxCompIndex, plotStyleBW[0], plotStyles4[0], "Real graph", "Config model")
        #plt.plot(absDayList, statsArray[:, graphStats.geodesicDistMaxCompIndex], plotStyleBW[0], absDayList, configStatsArray[:, graphStats.geodesicDistMaxCompIndex], plotStyles4[0])
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("Max component mean geodesic distance")
        plt.legend(loc="lower right")
        plt.savefig(figureDir + "MaxCompGeodesicGrowth.eps")
        plotInd += 1

        #Find the number of edges in the infection graph
        resultsFileName = resultsDir + "InfectGrowthScalarStats.pkl"
        infectStatsArray = Util.loadPickle(resultsFileName)

        #Make sure we don't include 0 in the array
        vertexIndex = numpy.argmax(statsArray[:, graphStats.numVerticesIndex] > 0)
        edgeIndex = numpy.argmax(infectStatsArray[:, graphStats.numEdgesIndex] > 0)
        minIndex = numpy.maximum(vertexIndex, edgeIndex)

        plt.figure(plotInd)
        plt.plot(numpy.log(statsArray[minIndex:, graphStats.numVerticesIndex]), numpy.log(statsArray[minIndex:, graphStats.numEdgesIndex]), plotStyleBW[0])
        plt.plot(numpy.log(infectStatsArray[minIndex:, graphStats.numVerticesIndex]), numpy.log(infectStatsArray[minIndex:, graphStats.numEdgesIndex]), plotStyleBW[1])
        plt.plot(numpy.log(statsArray[minIndex:, graphStats.maxComponentSizeIndex]), numpy.log(statsArray[minIndex:, graphStats.maxComponentEdgesIndex]), plotStyleBW[2])
        plt.xlabel("log(|V|)")
        plt.ylabel("log(|E|)/log(|D|)")
        plt.legend(("Contact graph", "Infection graph", "Max component"), loc="upper left")
        plt.savefig(figureDir + "LogVerticesEdgesGrowth.eps")
        plotInd += 1

    results = statsArray[:, graphStats.effectiveDiameterIndex] 
    results = numpy.c_[results, configStatsArray[:, graphStats.effectiveDiameterIndex]]
    results = numpy.c_[results, statsArray[:, graphStats.geodesicDistMaxCompIndex]]
    results = numpy.c_[results, configStatsArray[:, graphStats.geodesicDistMaxCompIndex]]
    configStatsArray

    print("\n\n")
    print(Latex.listToRow(["Diameter", "CM Diameter", "Mean Geodesic", "CM Mean Geodesic"]))
    print("\\hline")
    for i in range(0, len(dayList), 4):
        day = dayList[i]
        print(str(DateUtils.getDateStrFromDay(day, startYear)) + " & " + Latex.array1DToRow(results[i, :]) + "\\\\")
def plotVertexStats():
    #Calculate all vertex statistics
    logging.info("Computing vertex stats")
    
    #Indices
    numContactsIndex = fInds["numContacts"]
    numTestedIndex = fInds["numTested"]
    numPositiveIndex = fInds["numPositive"]

    #Properties of vertex values
    detectionAges = []
    deathAfterInfectAges = []
    deathAges = []
    homoMeans = []

    maleSums = []
    femaleSums = []
    heteroSums = []
    biSums = []

    contactMaleSums = []
    contactFemaleSums = []
    contactHeteroSums = []
    contactBiSums = []

    doctorMaleSums = []
    doctorFemaleSums = []
    doctorHeteroSums = []
    doctorBiSums = []

    contactSums = []
    nonContactSums = []
    donorSums = []
    randomTestSums = []
    stdSums = []
    prisonerSums = []
    recommendSums = []
    #This is: all detections - contact, donor, randomTest, str, recommend
    otherSums = []

    havanaSums = []
    villaClaraSums = []
    pinarSums = []
    holguinSums = []
    habanaSums = []
    sanctiSums = []

    numContactSums = []
    numTestedSums = []
    numPositiveSums = []

    #Total number of sexual contacts 
    numContactMaleSums = []
    numContactFemaleSums = []
    numContactHeteroSums = []
    numContactBiSums = []

    numTestedMaleSums = []
    numTestedFemaleSums = []
    numTestedHeteroSums = []
    numTestedBiSums = []

    numPositiveMaleSums = []
    numPositiveFemaleSums = []
    numPositiveHeteroSums = []
    numPositiveBiSums = []

    propPositiveMaleSums = []
    propPositiveFemaleSums = []
    propPositiveHeteroSums = []
    propPositiveBiSums = []

    numContactVertices = []
    numContactEdges = []
    numInfectEdges = []

    #Mean proportion of degree at end of epidemic 
    meanPropDegree = []
    finalDegreeSequence = numpy.array(sGraph.outDegreeSequence(), numpy.float) 

    degreeOneSums = []
    degreeTwoSums = []
    degreeThreePlusSums = []

    numProvinces = 15
    provinceArray = numpy.zeros((len(subgraphIndicesList), numProvinces))
    m = 0 

    for subgraphIndices in subgraphIndicesList: 
        subgraph = sGraph.subgraph(subgraphIndices)
        infectSubGraph = sGraphInfect.subgraph(subgraphIndices)

        subgraphVertexArray = subgraph.getVertexList().getVertices(range(subgraph.getNumVertices()))

        detectionAges.append(numpy.mean((subgraphVertexArray[:, detectionIndex] - subgraphVertexArray[:, dobIndex]))/daysInYear)
        deathAfterInfectAges.append((numpy.mean(subgraphVertexArray[:, deathIndex] - subgraphVertexArray[:, detectionIndex]))/daysInYear)
        deathAges.append(numpy.mean((subgraphVertexArray[:, deathIndex] - subgraphVertexArray[:, dobIndex]))/daysInYear)
        homoMeans.append(numpy.mean(subgraphVertexArray[:, orientationIndex]))

        nonContactSums.append(subgraphVertexArray.shape[0] - numpy.sum(subgraphVertexArray[:, contactIndex]))
        contactSums.append(numpy.sum(subgraphVertexArray[:, contactIndex]))
        donorSums.append(numpy.sum(subgraphVertexArray[:, donorIndex]))
        randomTestSums.append(numpy.sum(subgraphVertexArray[:, randomTestIndex]))
        stdSums.append(numpy.sum(subgraphVertexArray[:, stdIndex]))
        prisonerSums.append(numpy.sum(subgraphVertexArray[:, prisonerIndex]))
        recommendSums.append(numpy.sum(subgraphVertexArray[:, doctorIndex]))
        otherSums.append(subgraphVertexArray.shape[0] - numpy.sum(subgraphVertexArray[:, [contactIndex, donorIndex, randomTestIndex, stdIndex, doctorIndex]]))

        heteroSums.append(numpy.sum(subgraphVertexArray[:, orientationIndex]==0))
        biSums.append(numpy.sum(subgraphVertexArray[:, orientationIndex]==1))

        femaleSums.append(numpy.sum(subgraphVertexArray[:, genderIndex]==1))
        maleSums.append(numpy.sum(subgraphVertexArray[:, genderIndex]==0))

        contactHeteroSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, orientationIndex]==0, subgraphVertexArray[:, contactIndex])))
        contactBiSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, orientationIndex]==1, subgraphVertexArray[:, contactIndex])))
        contactFemaleSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, genderIndex]==1, subgraphVertexArray[:, contactIndex])))
        contactMaleSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, genderIndex]==0, subgraphVertexArray[:, contactIndex])))

        doctorHeteroSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, orientationIndex]==0, subgraphVertexArray[:, doctorIndex])))
        doctorBiSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, orientationIndex]==1, subgraphVertexArray[:, doctorIndex])))
        doctorFemaleSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, genderIndex]==1, subgraphVertexArray[:, doctorIndex])))
        doctorMaleSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, genderIndex]==0, subgraphVertexArray[:, doctorIndex])))

        havanaSums.append(numpy.sum(subgraphVertexArray[:, havanaIndex]==1))
        villaClaraSums.append(numpy.sum(subgraphVertexArray[:, villaClaraIndex]==1))
        pinarSums.append(numpy.sum(subgraphVertexArray[:, pinarIndex]==1))
        holguinSums.append(numpy.sum(subgraphVertexArray[:, holguinIndex]==1))
        habanaSums.append(numpy.sum(subgraphVertexArray[:, habanaIndex]==1))
        sanctiSums.append(numpy.sum(subgraphVertexArray[:, sanctiIndex]==1))

        numContactSums.append(numpy.mean(subgraphVertexArray[:, numContactsIndex]))
        numTestedSums.append(numpy.mean(subgraphVertexArray[:, numTestedIndex]))
        numPositiveSums.append(numpy.mean(subgraphVertexArray[:, numPositiveIndex]))

        numContactMaleSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, genderIndex]==0, numContactsIndex]))
        numContactFemaleSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, genderIndex]==1, numContactsIndex]))
        numContactHeteroSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, orientationIndex]==0, numContactsIndex]))
        numContactBiSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, orientationIndex]==1, numContactsIndex]))

        numTestedMaleSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, genderIndex]==0, numTestedIndex]))
        numTestedFemaleSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, genderIndex]==1, numTestedIndex]))
        numTestedHeteroSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, orientationIndex]==0, numTestedIndex]))
        numTestedBiSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, orientationIndex]==1, numTestedIndex]))

        numPositiveMaleSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, genderIndex]==0, numPositiveIndex]))
        numPositiveFemaleSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, genderIndex]==1, numPositiveIndex]))
        numPositiveHeteroSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, orientationIndex]==0, numPositiveIndex]))
        numPositiveBiSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, orientationIndex]==1, numPositiveIndex]))

        propPositiveMaleSums.append(numPositiveMaleSums[m]/float(numTestedMaleSums[m]))
        propPositiveFemaleSums.append(numPositiveFemaleSums[m]/float(numTestedFemaleSums[m]))
        propPositiveHeteroSums.append(numPositiveHeteroSums[m]/float(numTestedHeteroSums[m]))
        propPositiveBiSums.append(numPositiveBiSums[m]/float(numTestedMaleSums[m]))

        numContactVertices.append(subgraph.getNumVertices())
        numContactEdges.append(subgraph.getNumEdges())
        numInfectEdges.append(infectSubGraph.getNumEdges())

        nonZeroInds = finalDegreeSequence[subgraphIndices]!=0
        propDegrees = numpy.mean(subgraph.outDegreeSequence()[nonZeroInds]/finalDegreeSequence[subgraphIndices][nonZeroInds])
        meanPropDegree.append(numpy.mean(propDegrees)) 

        degreeOneSums.append(numpy.sum(subgraph.outDegreeSequence()==1))
        degreeTwoSums.append(numpy.sum(subgraph.outDegreeSequence()==2))
        degreeThreePlusSums.append(numpy.sum(subgraph.outDegreeSequence()>=3))

        provinceArray[m, :] = numpy.sum(subgraphVertexArray[:, fInds["CA"]:fInds['VC']+1], 0)
        m += 1 

    #Save some of the results for the ABC work
    numStats = 2 
    vertexStatsArray = numpy.zeros((len(subgraphIndicesList), numStats))
    vertexStatsArray[:, 0] = numpy.array(biSums)
    vertexStatsArray[:, 1] = numpy.array(heteroSums)

    resultsFileName = resultsDir + "ContactGrowthVertexStats.pkl"
    Util.savePickle(vertexStatsArray, resultsFileName)

    global plotInd 

    plt.figure(plotInd)
    plt.plot(absDayList, detectionAges)
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Detection Age (years)")
    plt.savefig(figureDir + "DetectionMeansGrowth.eps")
    plotInd += 1

    plt.figure(plotInd)
    plt.plot(absDayList, heteroSums, 'k-', absDayList, biSums, 'k--', absDayList, femaleSums, 'k-.', absDayList, maleSums, 'k:')
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Detections")
    plt.legend(("Heterosexual", "MSM", "Female", "Male"), loc="upper left")
    plt.savefig(figureDir + "OrientationGenderGrowth.eps")
    plotInd += 1

    plt.figure(plotInd)
    plt.plot(absDayList, contactHeteroSums, 'k-', absDayList, contactBiSums, 'k--', absDayList, contactFemaleSums, 'k-.', absDayList, contactMaleSums, 'k:')
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Contact tracing detections")
    plt.legend(("Heterosexual", "MSM", "Female", "Male"), loc="upper left")
    plt.savefig(figureDir + "OrientationGenderContact.eps")
    plotInd += 1

    plt.figure(plotInd)
    plt.plot(absDayList, doctorHeteroSums, 'k-', absDayList, doctorBiSums, 'k--', absDayList, doctorFemaleSums, 'k-.', absDayList, doctorMaleSums, 'k:')
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Doctor recommendation detections")
    plt.legend(("Heterosexual", "MSM", "Female", "Male"), loc="upper left")
    plt.savefig(figureDir + "OrientationGenderDoctor.eps")
    plotInd += 1



    #Plot all the provinces 
    plt.figure(plotInd)
    plt.hold(True)
    for k in range(provinceArray.shape[1]):
        plt.plot(absDayList, provinceArray[:, k], label=str(k))
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Detections")
    plt.legend(loc="upper left")
    plotInd += 1 

    #Plot of detection types
    plt.figure(plotInd)
    plt.plot(absDayList, contactSums, plotStyles2[0], absDayList, donorSums, plotStyles2[1], absDayList, randomTestSums, plotStyles2[2], absDayList, stdSums, plotStyles2[3], absDayList, otherSums, plotStyles2[4], absDayList, recommendSums, plotStyles2[5])
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Detections")
    plt.legend(("Contact tracing", "Blood donation", "Random test", "STD", "Other test", "Doctor recommendation"), loc="upper left")
    plt.savefig(figureDir + "DetectionGrowth.eps")
    plotInd += 1

    plt.figure(plotInd)
    plt.plot(absDayList, numContactSums, plotStyleBW[0], absDayList, numTestedSums, plotStyleBW[1], absDayList, numPositiveSums, plotStyleBW[2])
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Contacts")
    plt.legend(("No. contacts", "No. tested", "No. positive"), loc="center left")
    plt.savefig(figureDir + "ContactsGrowth.eps")
    plotInd += 1

    plt.figure(plotInd)
    plt.plot(absDayList, numContactHeteroSums, plotStyleBW[0], absDayList, numContactBiSums, plotStyleBW[1], absDayList, numContactFemaleSums, plotStyleBW[2], absDayList, numContactMaleSums, plotStyleBW[3])
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Total contacts")
    plt.legend(("Heterosexual", "MSM", "Female", "Male"), loc="upper right")
    plt.savefig(figureDir + "ContactsGrowthOrientGen.eps")
    plotInd += 1

    plt.figure(plotInd)
    plt.plot(absDayList, numTestedHeteroSums, plotStyleBW[0], absDayList, numTestedBiSums, plotStyleBW[1], absDayList, numTestedFemaleSums, plotStyleBW[2], absDayList, numTestedMaleSums, plotStyleBW[3])
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Tested contacts")
    plt.legend(("Heterosexual", "MSM", "Female", "Male"), loc="upper right")
    plt.savefig(figureDir + "TestedGrowthOrientGen.eps")
    plotInd += 1

    plt.figure(plotInd)
    plt.plot(absDayList, numPositiveHeteroSums, plotStyleBW[0], absDayList, numPositiveBiSums, plotStyleBW[1], absDayList, numPositiveFemaleSums, plotStyleBW[2], absDayList, numPositiveMaleSums, plotStyleBW[3])
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Positive contacts")
    plt.legend(("Heterosexual", "MSM", "Female", "Male"), loc="upper right")
    plt.savefig(figureDir + "PositiveGrowthOrientGen.eps")
    plotInd += 1

    #Proportion positive versus tested
    plt.figure(plotInd)
    plt.plot(absDayList, propPositiveHeteroSums, plotStyleBW[0], absDayList, propPositiveBiSums, plotStyleBW[1], absDayList, propPositiveFemaleSums, plotStyleBW[2], absDayList, propPositiveMaleSums, plotStyleBW[3])
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Proportion positive contacts")
    plt.legend(("Heterosexual", "MSM", "Female", "Male"), loc="upper right")
    plt.savefig(figureDir + "PercentPositiveGrowthOrientGen.eps")
    plotInd += 1

    plt.figure(plotInd)
    plt.hold(True)
    plt.plot(absDayList, havanaSums, plotStyles2[0])
    plt.plot(absDayList, villaClaraSums, plotStyles2[1])
    plt.plot(absDayList, pinarSums, plotStyles2[2])
    plt.plot(absDayList, holguinSums, plotStyles2[3])
    plt.plot(absDayList, habanaSums, plotStyles2[4])
    plt.plot(absDayList, sanctiSums, plotStyles2[5])
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Detections")
    plt.legend(("Havana City", "Villa Clara", "Pinar del Rio", "Holguin", "La Habana", "Sancti Spiritus"), loc="upper left")
    plt.savefig(figureDir + "ProvinceGrowth.eps")
    plotInd += 1

    plt.figure(plotInd)
    plt.plot(absDayList, numContactVertices, plotStyleBW[0], absDayList, numContactEdges, plotStyleBW[1], absDayList, numInfectEdges, plotStyleBW[2])
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Vertices/edges")
    plt.legend(("Contact vertices", "Contact edges", "Infect edges"), loc="upper left")
    plt.savefig(figureDir + "VerticesEdges.eps")
    plotInd += 1

    plt.figure(plotInd)
    plt.plot(absDayList, meanPropDegree, plotStyleBW[0])
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Proportion of final degree")
    plt.savefig(figureDir + "MeanPropDegree.eps")
    plotInd += 1

    plt.figure(plotInd)
    plt.plot(absDayList, degreeOneSums, plotStyleBW[0], absDayList, degreeTwoSums, plotStyleBW[1], absDayList, degreeThreePlusSums, plotStyleBW[2])
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Detections")
    plt.legend(("Degree = 1", "Degree = 2", "Degree >= 3"), loc="upper left")
    plotInd += 1

    #Print a table of interesting stats
    results = numpy.array([havanaSums])
    results = numpy.r_[results, numpy.array([villaClaraSums])]
    results = numpy.r_[results, numpy.array([pinarSums])]
    results = numpy.r_[results, numpy.array([holguinSums])]
    results = numpy.r_[results, numpy.array([habanaSums])]
    results = numpy.r_[results, numpy.array([sanctiSums])]

    print(Latex.listToRow(["Havana City", "Villa Clara", "Pinar del Rio", "Holguin", "La Habana", "Sancti Spiritus"]))
    print("\\hline")
    for i in range(0, len(dayList), 4):
        day = dayList[i]
        print(str(DateUtils.getDateStrFromDay(day, startYear)) + " & " + Latex.array1DToRow(results[:, i].T) + "\\\\")

    results = numpy.array([heteroSums])
    results = numpy.r_[results, numpy.array([biSums])]
    results = numpy.r_[results, numpy.array([femaleSums])]
    results = numpy.r_[results, numpy.array([maleSums])]

    print("\n\n")
    print(Latex.listToRow(["Heterosexual", "MSM", "Female", "Male"]))
    print("\\hline")
    for i in range(0, len(dayList), 4):
        day = dayList[i]
        print(str(DateUtils.getDateStrFromDay(day, startYear)) + " & " + Latex.array1DToRow(results[:, i].T) + "\\\\")
예제 #19
0
def plotScalarStats():
    logging.info("Computing scalar stats")
    resultsFileName = resultsDir + "InfectGrowthScalarStats.pkl"


    if saveResults:
        statsArray = graphStats.sequenceScalarStats(sGraph, subgraphIndicesList, treeStats=True)
        Util.savePickle(statsArray, resultsFileName, True)
    else:
        statsArray = Util.loadPickle(resultsFileName)

        global plotInd

        #Output all the results into plots
        #Take the mean of the results over the configuration model graphs
        resultsFileNameBase = resultsDir + "ConfigInfectGraphScalarStats"
        numGraphs = len(subgraphIndicesList)
        configStatsArrays = numpy.zeros((numGraphs, graphStats.getNumStats(), numConfigGraphs))

        for j in range(numConfigGraphs):
            resultsFileName = resultsFileNameBase + str(j)
            configStatsArrays[:, :, j] = Util.loadPickle(resultsFileName)

        configStatsArray = numpy.mean(configStatsArrays, 2)
        configStatsStd = numpy.std(configStatsArrays, 2)

        #Make sure we don't include 0 in the array
        vertexIndex = numpy.argmax(statsArray[:, graphStats.numVerticesIndex] > 0)
        edgeIndex = numpy.argmax(statsArray[:, graphStats.numEdgesIndex] > 0)
        minIndex = numpy.maximum(vertexIndex, edgeIndex)

        def plotRealConfigError(index, styleReal, styleConfig, realLabel, configLabel):
            plt.hold(True)
            plt.plot(absDayList, statsArray[:, index], styleReal, label=realLabel)
            #errors = numpy.c_[configStatsArray[:, index]-configStatsMinArray[:, index] , configStatsMaxArray[:, index]-configStatsArray[:, index]].T
            errors = numpy.c_[configStatsStd[:, index], configStatsStd[:, index]].T
            plt.plot(absDayList, configStatsArray[:, index], styleConfig, label=configLabel)
            plt.errorbar(absDayList, configStatsArray[:, index], errors, linewidth=0, elinewidth=0, label="_nolegend_", ecolor=styleConfig[0])

            xmin, xmax = plt.xlim()
            plt.xlim((0, xmax))
            ymin, ymax = plt.ylim()
            plt.ylim((0, ymax))

        plt.figure(plotInd)
        plt.plot(numpy.log(statsArray[minIndex:, graphStats.numVerticesIndex]), numpy.log(statsArray[minIndex:, graphStats.numEdgesIndex]))
        plt.xlabel("log(|V|)")
        plt.ylabel("log(|E|)")
        plt.savefig(figureDir + "LogVerticesEdgesGrowth.eps")
        plotInd += 1

        plt.figure(plotInd)
        #plt.plot(absDayList, statsArray[:, graphStats.numTreesIndex], plotStyles3[0], label="Trees Size >= 1")
        #plt.plot(absDayList, statsArray[:, graphStats.numNonSingletonTreesIndex], plotStyles3[1], label="Trees Size >= 2")
        plotRealConfigError(graphStats.numTreesIndex, plotStyles3[0], plotStyles5[0], "Trees size >= 1", "CM trees size >= 1")
        plotRealConfigError(graphStats.numNonSingletonTreesIndex, plotStyles3[0], plotStyles5[0], "Trees size >= 2", "CM trees size >= 2")
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("No. trees")
        plt.legend(loc="upper left")
        plt.savefig(figureDir + "NumTreesGrowth.eps")
        plotInd += 1

        for k in range(len(dayList)):
            day = dayList[k]
            print(str(DateUtils.getDateStrFromDay(day, startYear)) + ": " + str(statsArray[k, graphStats.numTreesIndex]))
            print(str(DateUtils.getDateStrFromDay(day, startYear)) + ": " + str(configStatsArray[k, graphStats.numTreesIndex]))


        #Load stats from a file to get the max tree from its root 
        resultsFilename = resultsDir + "treeSizesDepths.npz"
        file = open(resultsFilename, 'r')
        arrayDict = numpy.load(file)
        statsArray[:, graphStats.maxTreeDepthIndex] = arrayDict["arr_0"]
        statsArray[:, graphStats.maxTreeSizeIndex] = arrayDict["arr_1"]
        statsArray[:, graphStats.secondTreeDepthIndex] = arrayDict["arr_2"]
        statsArray[:, graphStats.secondTreeSizeIndex] = arrayDict["arr_3"]

        plt.figure(plotInd)
        plotRealConfigError(graphStats.maxTreeSizeIndex, plotStyles3[0], plotStyles5[0], "Max tree", "CM max tree")
        plotRealConfigError(graphStats.secondTreeSizeIndex, plotStyles3[1], plotStyles5[1], "2nd tree", "CM 2nd tree")
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("Size")
        plt.legend(loc="upper left")
        plt.savefig(figureDir + "MaxTreeGrowth.eps")
        plotInd += 1

        plt.figure(plotInd)
        plotRealConfigError(graphStats.maxTreeDepthIndex, plotStyles3[0], plotStyles5[0], "Max tree", "CM max tree")
        plotRealConfigError(graphStats.secondTreeDepthIndex, plotStyles3[1], plotStyles5[1], "2nd tree", "CM 2nd tree")
        #plt.plot(absDayList, statsArray[:, graphStats.maxTreeDepthIndex], plotStyles3[0], absDayList, statsArray[:, graphStats.secondTreeDepthIndex], plotStyles3[1] )
        #plt.plot(absDayList, configStatsArray[:, graphStats.maxTreeDepthIndex], plotStyles4[0], absDayList, configStatsArray[:, graphStats.secondTreeDepthIndex], plotStyles4[1])
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("Depth")
        plt.legend(loc="lower right")
        plt.savefig(figureDir + "MaxTreeDepthGrowth.eps")
        
        plotInd += 1
예제 #20
0
 
 if len(testExpertMatches) != 0: 
     #First compute graph properties 
     computeInfluence = True
     graphRanker = GraphRanker(k=100, numRuns=100, computeInfluence=computeInfluence, p=0.05, inputRanking=[relevantAuthorInds1, relevantAuthorInds2])
     outputLists = graphRanker.vertexRankings(graph, relevantAuthorsInds)
          
     itemList = RankAggregator.generateItemList(outputLists)
     methodNames = graphRanker.getNames()
     
     if runLSI: 
         outputFilename = dataset.getOutputFieldDir(field) + "outputListsLSI.npz"
     else: 
         outputFilename = dataset.getOutputFieldDir(field) + "outputListsLDA.npz"
         
     Util.savePickle([outputLists, trainExpertMatchesInds, testExpertMatchesInds], outputFilename, debug=True)
     
     numMethods = len(outputLists)
     precisions = numpy.zeros((len(ns), numMethods))
     averagePrecisions = numpy.zeros(numMethods)
     
     for i, n in enumerate(ns):     
         for j in range(len(outputLists)): 
             precisions[i, j] = Evaluator.precisionFromIndLists(testExpertMatchesInds, outputLists[j][0:n]) 
         
     for j in range(len(outputLists)):                 
         averagePrecisions[j] = Evaluator.averagePrecisionFromLists(testExpertMatchesInds, outputLists[j][0:averagePrecisionN], averagePrecisionN) 
     
     precisions2 = numpy.c_[numpy.array(ns), precisions]
     
     logging.debug(Latex.listToRow(methodNames))
예제 #21
0
def plotVectorStats():
    #Finally, compute some vector stats at various points in the graph
    logging.info("Computing vector stats")
    global plotInd
    resultsFileName = resultsDir + "InfectGrowthVectorStats.pkl"

    if saveResults:
        statsDictList = graphStats.sequenceVectorStats(sGraph, subgraphIndicesList2, True)
        Util.savePickle(statsDictList, resultsFileName, True)
    else:
        statsDictList = Util.loadPickle(resultsFileName)

        treeSizesDistArray = numpy.zeros((len(dayList2), 3000))
        treeDepthsDistArray = numpy.zeros((len(dayList2), 100))
        numVerticesEdgesArray = numpy.zeros((len(dayList2), 2), numpy.int)
        numVerticesEdgesArray[:, 0] = [len(sgl) for sgl in subgraphIndicesList2]
        numVerticesEdgesArray[:, 1] = [sGraph.subgraph(sgl).getNumEdges() for sgl in subgraphIndicesList2]

        for j in range(len(dayList2)):
            dateStr = (str(DateUtils.getDateStrFromDay(dayList2[j], startYear)))
            logging.info(dateStr)
            statsDict = statsDictList[j]

            degreeDist = statsDict["outDegreeDist"]
            degreeDist = degreeDist/float(numpy.sum(degreeDist))

            maxEigVector = statsDict["maxEigVector"]
            maxEigVector = numpy.flipud(numpy.sort(numpy.abs(maxEigVector)))
            maxEigVector = numpy.log(maxEigVector[maxEigVector>0])

            treeSizesDist = statsDict["treeSizesDist"]
            treeSizesDist = numpy.array(treeSizesDist, numpy.float64)/numpy.sum(treeSizesDist)
            treeSizesDistArray[j, 0:treeSizesDist.shape[0]] = treeSizesDist

            treeDepthsDist = statsDict["treeDepthsDist"]
            #treeDepthsDist = numpy.array(treeDepthsDist, numpy.float64)/numpy.sum(treeDepthsDist)
            treeDepthsDist = numpy.array(treeDepthsDist, numpy.float64)
            treeDepthsDistArray[j, 0:treeDepthsDist.shape[0]] = treeDepthsDist

            plotInd2 = plotInd

            plt.figure(plotInd2)
            plt.plot(numpy.arange(degreeDist.shape[0]), degreeDist, label=dateStr)
            plt.xlabel("Degree")
            plt.ylabel("Probability")
            plt.ylim((0, 0.8))
            plt.legend()
            plt.savefig(figureDir + "DegreeDist" +  ".eps")
            plotInd2 += 1

            plt.figure(plotInd2)
            plt.scatter(numpy.arange(treeSizesDist.shape[0])[treeSizesDist!=0], numpy.log(treeSizesDist[treeSizesDist!=0]), s=30, c=plotStyles2[j][0], label=dateStr)
            plt.xlabel("Size")
            plt.ylabel("log(probability)")
            plt.xlim((0, 125))
            plt.legend()
            plt.savefig(figureDir + "TreeSizeDist" +  ".eps")
            plotInd2 += 1

            plt.figure(plotInd2)
            plt.scatter(numpy.arange(treeDepthsDist.shape[0])[treeDepthsDist!=0], numpy.log(treeDepthsDist[treeDepthsDist!=0]), s=30, c=plotStyles2[j][0], label=dateStr)
            plt.xlabel("Depth")
            plt.ylabel("log(probability)")
            plt.xlim((0, 15))
            plt.legend()
            plt.savefig(figureDir + "TreeDepthDist" +  ".eps")
            plotInd2 += 1

        dateStrList = [DateUtils.getDateStrFromDay(day, startYear) for day in dayList2]
        precision = 4 

        treeSizesDistArray = treeSizesDistArray[:, 0:treeSizesDist.shape[0]]
        nonZeroCols = numpy.sum(treeSizesDistArray, 0)!=0
        print((Latex.array1DToRow(numpy.arange(treeSizesDistArray.shape[1])[nonZeroCols])))
        print((Latex.array2DToRows(treeSizesDistArray[:, nonZeroCols])))

        print("Tree depths")
        treeDepthsDistArray = treeDepthsDistArray[:, 0:treeDepthsDist.shape[0]]
        nonZeroCols = numpy.sum(treeDepthsDistArray, 0)!=0
        print((Latex.array1DToRow(numpy.arange(treeDepthsDistArray.shape[1])[nonZeroCols])))
        print((Latex.array2DToRows(treeDepthsDistArray[:, nonZeroCols])))

        print(numpy.sum(treeDepthsDistArray[:, 0:3], 1))

        print("Edges and verticies")
        print(Latex.listToRow(dateStrList))
        print(Latex.array2DToRows(numVerticesEdgesArray.T, precision))
예제 #22
0
        paramList = []
        
        for i in range(thetaArray.shape[0]): 
            paramList.append((i, thetaArray[i, :]))
    
        pool = multiprocessing.Pool(multiprocessing.cpu_count())               
        resultIterator = pool.map(saveStats, paramList)  
        #resultIterator = map(saveStats, paramList)  
        pool.terminate()
    
        #Now save the statistics on the target graph 
        times = numpy.arange(startDate, endDate+1, recordStep)
        vertexArray, infectedIndices, removedIndices, contactGraphStats, removedGraphStats, finalRemovedDegrees = HIVModelUtils.generateStatistics(targetGraph, times)
        stats = vertexArray, infectedIndices, removedIndices, contactGraphStats, removedGraphStats, finalRemovedDegrees
        resultsFileName = outputDir + "IdealStats.pkl"
        Util.savePickle(stats, resultsFileName)
else:
    import matplotlib 
    matplotlib.use("GTK3Agg")
    import matplotlib.pyplot as plt     
    
    plotStyles = ['k-', 'kx-', 'k+-', 'k.-', 'k*-']
    
    N, resultsDir, outputDir, recordStep, startDate, endDate, prefix, targetGraph, breakSize, numEpsilons, M, matchAlpha, matchAlg, numInds = loadParams(0) 

    inds = range(numInds)
    numRecordSteps = int((endDate-startDate)/recordStep)+1
    
    #We store: number of detections, CT detections, rand detections, infectives, max componnent size, num components, edges, objectives
    numMeasures = 12
    numTimings = 2