예제 #1
0
 def computeLDA(self):
     if not os.path.exists(self.modelFilename) or self.overwriteModel:
         self.vectoriseDocuments()
         self.loadVectoriser()
         corpus = gensim.corpora.mmcorpus.MmCorpus(self.docTermMatrixFilename + ".mtx")
         id2WordDict = dict(zip(range(len(self.vectoriser.get_feature_names())), self.vectoriser.get_feature_names()))   
         
         logging.getLogger('gensim').setLevel(logging.INFO)
         lda = LdaModel(corpus, num_topics=self.k, id2word=id2WordDict, chunksize=self.chunksize, distributed=False) 
         #index = gensim.similarities.docsim.SparseMatrixSimilarity(lda[corpus], num_features=self.k) 
         index = gensim.similarities.docsim.Similarity(self.indexFilename, lda[corpus], num_features=self.k)            
         
         Util.savePickle([lda, index], self.modelFilename, debug=True)
         gc.collect()
     else: 
         logging.debug("File already exists: " + self.modelFilename)
예제 #2
0
def computeConfigScalarStats():
    logging.info("Computing configuration model scalar stats")

    graphFileNameBase = resultsDir + "ConfigInfectGraph"
    resultsFileNameBase = resultsDir + "ConfigInfectGraphScalarStats"

    for j in range(numConfigGraphs):
        resultsFileName = resultsFileNameBase + str(j)

        if not os.path.isfile(resultsFileName):
            configGraph = SparseGraph.load(graphFileNameBase + str(j))
            statsArray = graphStats.sequenceScalarStats(configGraph, subgraphIndicesList, slowStats, treeStats=True)
            Util.savePickle(statsArray, resultsFileName, True)
            gc.collect()

    logging.info("All done")
예제 #3
0
def computeConfigVectorStats():
    #Note: We can make this multithreaded
    logging.info("Computing configuration model vector stats")

    graphFileNameBase = resultsDir + "ConfigInfectGraph"
    resultsFileNameBase = resultsDir + "ConfigInfectGraphVectorStats"

    for j in range(numConfigGraphs):
        resultsFileName = resultsFileNameBase + str(j)

        if not os.path.isfile(resultsFileName):
            configGraph = SparseGraph.load(graphFileNameBase + str(j))
            statsDictList = graphStats.sequenceVectorStats(configGraph, subgraphIndicesList2, eigenStats=False, treeStats=True)
            Util.savePickle(statsDictList, resultsFileName, False)
            gc.collect()

    logging.info("All done")
예제 #4
0
    def coauthorsGraph(self, field, relevantExperts): 
        """
        Using the relevant authors we find all coauthors. 
        """  
        if not os.path.exists(self.getCoauthorsFilename(field)) or self.overwriteGraph: 
            logging.debug("Finding coauthors of relevant experts")
            if self.knownAuthors: 
                graph, authorIndexer = self.coauthorsGraphFromAuthors2(set(relevantExperts), field)
            else: 
                graph, authorIndexer = self.coauthorsGraphFromAuthors(set(relevantExperts))
            logging.debug(graph.summary())
            Util.savePickle([graph, authorIndexer], self.getCoauthorsFilename(field), debug=True)
        else: 
            logging.debug("Files already generated: " + self.getCoauthorsFilename(field))  

        graph, authorIndexer = Util.loadPickle(self.getCoauthorsFilename(field))
        return graph, authorIndexer 
def plotOtherStats():
    #Let's look at geodesic distances in subgraphs and communities
    logging.info("Computing other stats")

    resultsFileName = resultsDir + "ContactGrowthOtherStats.pkl"
    hivGraphStats = HIVGraphStatistics(fInds)

    if saveResults:
        statsArray = hivGraphStats.sequenceScalarStats(sGraph, subgraphIndicesList)
        #statsArray["dayList"] = absDayList
        Util.savePickle(statsArray, resultsFileName, True)
    else:
        statsArray = Util.loadPickle(resultsFileName)
        #Just load the harmonic geodesic distances of the full graph 
        resultsFileName = resultsDir + "ContactGrowthScalarStats.pkl"
        statsArray2 = Util.loadPickle(resultsFileName)

        global plotInd

        msmGeodesic = statsArray[:, hivGraphStats.msmGeodesicIndex]
        msmGeodesic[msmGeodesic < 0] = 0
        msmGeodesic[msmGeodesic == float('inf')] = 0

        #Output all the results into plots
        plt.figure(plotInd)
        plt.plot(absDayList, msmGeodesic, 'k-', absDayList, statsArray[:, hivGraphStats.mostConnectedGeodesicIndex], 'k--')
        plt.xticks(locs, labels)
        #plt.ylim([0, 0.1])
        plt.xlabel("Year")
        plt.ylabel("Mean harmonic geodesic distance")
        plt.legend(("MSM individuals", "Top 10% degree"), loc="upper right")
        plt.savefig(figureDir + "MSM10Geodesic" + ".eps")
        plotInd += 1


        plt.figure(plotInd)
        plt.plot(absDayList, statsArray2[:, graphStats.harmonicGeoDistanceIndex], 'k-', absDayList, statsArray[:, hivGraphStats.menSubgraphGeodesicIndex], 'k--')
        plt.xticks(locs, labels)
        plt.ylim([0, 200.0])
        plt.xlabel("Year")
        plt.ylabel("Mean harmonic geodesic distance")
        plt.legend(("All individuals", "Men subgraph"), loc="upper right")
        plt.savefig(figureDir + "MenSubgraphGeodesic" + ".eps")
        plotInd += 1
예제 #6
0
def saveStats(args):    
    i, theta = args 
    
    resultsFileName = outputDir + "SimStats" + str(i) + ".pkl"
    lock = FileLock(resultsFileName)
    
    if not lock.fileExists() and not lock.isLocked():    
        lock.lock()
         
        model = HIVModelUtils.createModel(targetGraph, startDate, endDate, recordStep, M, matchAlpha, breakSize, matchAlg, theta=thetaArray[i])
        times, infectedIndices, removedIndices, graph, compTimes, graphMetrics = HIVModelUtils.simulate(model)
        times = numpy.arange(startDate, endDate+1, recordStep)
        vertexArray, infectedIndices, removedIndices, contactGraphStats, removedGraphStats, finalRemovedDegrees = HIVModelUtils.generateStatistics(graph, times)
        stats = times, vertexArray, infectedIndices, removedGraphStats, finalRemovedDegrees, graphMetrics.objectives, compTimes
        
        Util.savePickle(stats, resultsFileName)
        lock.unlock()
    else: 
        logging.debug("Results already computed: " + str(resultsFileName))
예제 #7
0
 def computeLSI(self):
     """
     Compute using the LSI version in gensim 
     """
     if not os.path.exists(self.modelFilename) or self.overwriteModel:
         self.vectoriseDocuments()
         self.loadVectoriser()
         #X = scipy.io.mmread(self.docTermMatrixFilename)
         #corpus = gensim.matutils.MmReader(self.docTermMatrixFilename + ".mtx", True)
         #corpus = gensim.matutils.Sparse2Corpus(X, documents_columns=False)
         corpus = gensim.corpora.mmcorpus.MmCorpus(self.docTermMatrixFilename + ".mtx")
         id2WordDict = dict(zip(range(len(self.vectoriser.get_feature_names())), self.vectoriser.get_feature_names()))   
         
         logging.getLogger('gensim').setLevel(logging.INFO)
         lsi = LsiModel(corpus, num_topics=self.k, id2word=id2WordDict, chunksize=self.chunksize, distributed=False) 
         index = gensim.similarities.docsim.Similarity(self.indexFilename, lsi[corpus], num_features=self.k)          
         
         Util.savePickle([lsi, index], self.modelFilename, debug=True)
         gc.collect()
     else: 
         logging.debug("File already exists: " + self.modelFilename)   
예제 #8
0
 def vectoriseDocuments(self):
     """
     We want to go through the dataset and vectorise all the title+abstracts.
     The results are saved in TDIDF format in a matrix X. 
     """
     if not os.path.exists(self.docTermMatrixFilename + ".mtx") or not os.path.exists(self.authorListFilename) or not os.path.exists(self.vectoriserFilename) or self.overwriteVectoriser:
         logging.debug("Vectorising documents")            
         
         authorList, documentList, citationList = self.readAuthorsAndDocuments()
         Util.savePickle(authorList, self.authorListFilename, debug=True)
         Util.savePickle(citationList, self.citationListFilename, debug=True)
         
         #vectoriser = text.HashingVectorizer(ngram_range=(1,2), binary=self.binary, norm="l2", stop_words="english", tokenizer=PorterTokeniser(), dtype=numpy.float)
         
         #if self.tfidf: 
         logging.debug("Generating TFIDF features")
         vectoriser = text.TfidfVectorizer(min_df=self.minDf, ngram_range=(1,self.ngram), binary=self.binary, sublinear_tf=self.sublinearTf, norm="l2", max_df=0.95, stop_words="english", tokenizer=PorterTokeniser(), max_features=self.numFeatures, dtype=numpy.float)
         #else: 
         #    logging.debug("Generating bag of word features")
         #    vectoriser = text.CountVectorizer(min_df=self.minDf, ngram_range=(1,self.ngram), binary=False, max_df=0.95, stop_words="english", max_features=self.numFeatures, dtype=numpy.float, tokenizer=PorterTokeniser())            
         
         X = vectoriser.fit_transform(documentList)
         del documentList
         scipy.io.mmwrite(self.docTermMatrixFilename, X)
         logging.debug("Wrote X with shape " + str(X.shape) + " and " + str(X.nnz) + " nonzeros to file " + self.docTermMatrixFilename + ".mtx")
         del X 
             
         #Save vectoriser - note that we can't pickle the tokeniser so it needs to be reset when loaded 
         vectoriser.tokenizer = None 
         Util.savePickle(vectoriser, self.vectoriserFilename, debug=True) 
         del vectoriser  
         gc.collect()
     else: 
         logging.debug("Author list, document-term matrix and vectoriser already generated: ")   
예제 #9
0
        paramList = []
        
        for i in range(thetaArray.shape[0]): 
            paramList.append((i, thetaArray[i, :]))
    
        pool = multiprocessing.Pool(multiprocessing.cpu_count())               
        resultIterator = pool.map(saveStats, paramList)  
        #resultIterator = map(saveStats, paramList)  
        pool.terminate()
    
        #Now save the statistics on the target graph 
        times = numpy.arange(startDate, endDate+1, recordStep)
        vertexArray, infectedIndices, removedIndices, contactGraphStats, removedGraphStats, finalRemovedDegrees = HIVModelUtils.generateStatistics(targetGraph, times)
        stats = vertexArray, infectedIndices, removedIndices, contactGraphStats, removedGraphStats, finalRemovedDegrees
        resultsFileName = outputDir + "IdealStats.pkl"
        Util.savePickle(stats, resultsFileName)
else:
    import matplotlib 
    matplotlib.use("GTK3Agg")
    import matplotlib.pyplot as plt     
    
    plotStyles = ['k-', 'kx-', 'k+-', 'k.-', 'k*-']
    
    N, resultsDir, outputDir, recordStep, startDate, endDate, prefix, targetGraph, breakSize, numEpsilons, M, matchAlpha, matchAlg, numInds = loadParams(0) 

    inds = range(numInds)
    numRecordSteps = int((endDate-startDate)/recordStep)+1
    
    #We store: number of detections, CT detections, rand detections, infectives, max componnent size, num components, edges, objectives
    numMeasures = 12
    numTimings = 2
예제 #10
0
def plotVectorStats():
    #Finally, compute some vector stats at various points in the graph
    logging.info("Computing vector stats")
    global plotInd
    resultsFileName = resultsDir + "InfectGrowthVectorStats.pkl"

    if saveResults:
        statsDictList = graphStats.sequenceVectorStats(sGraph, subgraphIndicesList2, True)
        Util.savePickle(statsDictList, resultsFileName, True)
    else:
        statsDictList = Util.loadPickle(resultsFileName)

        treeSizesDistArray = numpy.zeros((len(dayList2), 3000))
        treeDepthsDistArray = numpy.zeros((len(dayList2), 100))
        numVerticesEdgesArray = numpy.zeros((len(dayList2), 2), numpy.int)
        numVerticesEdgesArray[:, 0] = [len(sgl) for sgl in subgraphIndicesList2]
        numVerticesEdgesArray[:, 1] = [sGraph.subgraph(sgl).getNumEdges() for sgl in subgraphIndicesList2]

        for j in range(len(dayList2)):
            dateStr = (str(DateUtils.getDateStrFromDay(dayList2[j], startYear)))
            logging.info(dateStr)
            statsDict = statsDictList[j]

            degreeDist = statsDict["outDegreeDist"]
            degreeDist = degreeDist/float(numpy.sum(degreeDist))

            maxEigVector = statsDict["maxEigVector"]
            maxEigVector = numpy.flipud(numpy.sort(numpy.abs(maxEigVector)))
            maxEigVector = numpy.log(maxEigVector[maxEigVector>0])

            treeSizesDist = statsDict["treeSizesDist"]
            treeSizesDist = numpy.array(treeSizesDist, numpy.float64)/numpy.sum(treeSizesDist)
            treeSizesDistArray[j, 0:treeSizesDist.shape[0]] = treeSizesDist

            treeDepthsDist = statsDict["treeDepthsDist"]
            #treeDepthsDist = numpy.array(treeDepthsDist, numpy.float64)/numpy.sum(treeDepthsDist)
            treeDepthsDist = numpy.array(treeDepthsDist, numpy.float64)
            treeDepthsDistArray[j, 0:treeDepthsDist.shape[0]] = treeDepthsDist

            plotInd2 = plotInd

            plt.figure(plotInd2)
            plt.plot(numpy.arange(degreeDist.shape[0]), degreeDist, label=dateStr)
            plt.xlabel("Degree")
            plt.ylabel("Probability")
            plt.ylim((0, 0.8))
            plt.legend()
            plt.savefig(figureDir + "DegreeDist" +  ".eps")
            plotInd2 += 1

            plt.figure(plotInd2)
            plt.scatter(numpy.arange(treeSizesDist.shape[0])[treeSizesDist!=0], numpy.log(treeSizesDist[treeSizesDist!=0]), s=30, c=plotStyles2[j][0], label=dateStr)
            plt.xlabel("Size")
            plt.ylabel("log(probability)")
            plt.xlim((0, 125))
            plt.legend()
            plt.savefig(figureDir + "TreeSizeDist" +  ".eps")
            plotInd2 += 1

            plt.figure(plotInd2)
            plt.scatter(numpy.arange(treeDepthsDist.shape[0])[treeDepthsDist!=0], numpy.log(treeDepthsDist[treeDepthsDist!=0]), s=30, c=plotStyles2[j][0], label=dateStr)
            plt.xlabel("Depth")
            plt.ylabel("log(probability)")
            plt.xlim((0, 15))
            plt.legend()
            plt.savefig(figureDir + "TreeDepthDist" +  ".eps")
            plotInd2 += 1

        dateStrList = [DateUtils.getDateStrFromDay(day, startYear) for day in dayList2]
        precision = 4 

        treeSizesDistArray = treeSizesDistArray[:, 0:treeSizesDist.shape[0]]
        nonZeroCols = numpy.sum(treeSizesDistArray, 0)!=0
        print((Latex.array1DToRow(numpy.arange(treeSizesDistArray.shape[1])[nonZeroCols])))
        print((Latex.array2DToRows(treeSizesDistArray[:, nonZeroCols])))

        print("Tree depths")
        treeDepthsDistArray = treeDepthsDistArray[:, 0:treeDepthsDist.shape[0]]
        nonZeroCols = numpy.sum(treeDepthsDistArray, 0)!=0
        print((Latex.array1DToRow(numpy.arange(treeDepthsDistArray.shape[1])[nonZeroCols])))
        print((Latex.array2DToRows(treeDepthsDistArray[:, nonZeroCols])))

        print(numpy.sum(treeDepthsDistArray[:, 0:3], 1))

        print("Edges and verticies")
        print(Latex.listToRow(dateStrList))
        print(Latex.array2DToRows(numVerticesEdgesArray.T, precision))
예제 #11
0
def plotScalarStats():
    logging.info("Computing scalar stats")
    resultsFileName = resultsDir + "InfectGrowthScalarStats.pkl"


    if saveResults:
        statsArray = graphStats.sequenceScalarStats(sGraph, subgraphIndicesList, treeStats=True)
        Util.savePickle(statsArray, resultsFileName, True)
    else:
        statsArray = Util.loadPickle(resultsFileName)

        global plotInd

        #Output all the results into plots
        #Take the mean of the results over the configuration model graphs
        resultsFileNameBase = resultsDir + "ConfigInfectGraphScalarStats"
        numGraphs = len(subgraphIndicesList)
        configStatsArrays = numpy.zeros((numGraphs, graphStats.getNumStats(), numConfigGraphs))

        for j in range(numConfigGraphs):
            resultsFileName = resultsFileNameBase + str(j)
            configStatsArrays[:, :, j] = Util.loadPickle(resultsFileName)

        configStatsArray = numpy.mean(configStatsArrays, 2)
        configStatsStd = numpy.std(configStatsArrays, 2)

        #Make sure we don't include 0 in the array
        vertexIndex = numpy.argmax(statsArray[:, graphStats.numVerticesIndex] > 0)
        edgeIndex = numpy.argmax(statsArray[:, graphStats.numEdgesIndex] > 0)
        minIndex = numpy.maximum(vertexIndex, edgeIndex)

        def plotRealConfigError(index, styleReal, styleConfig, realLabel, configLabel):
            plt.hold(True)
            plt.plot(absDayList, statsArray[:, index], styleReal, label=realLabel)
            #errors = numpy.c_[configStatsArray[:, index]-configStatsMinArray[:, index] , configStatsMaxArray[:, index]-configStatsArray[:, index]].T
            errors = numpy.c_[configStatsStd[:, index], configStatsStd[:, index]].T
            plt.plot(absDayList, configStatsArray[:, index], styleConfig, label=configLabel)
            plt.errorbar(absDayList, configStatsArray[:, index], errors, linewidth=0, elinewidth=0, label="_nolegend_", ecolor=styleConfig[0])

            xmin, xmax = plt.xlim()
            plt.xlim((0, xmax))
            ymin, ymax = plt.ylim()
            plt.ylim((0, ymax))

        plt.figure(plotInd)
        plt.plot(numpy.log(statsArray[minIndex:, graphStats.numVerticesIndex]), numpy.log(statsArray[minIndex:, graphStats.numEdgesIndex]))
        plt.xlabel("log(|V|)")
        plt.ylabel("log(|E|)")
        plt.savefig(figureDir + "LogVerticesEdgesGrowth.eps")
        plotInd += 1

        plt.figure(plotInd)
        #plt.plot(absDayList, statsArray[:, graphStats.numTreesIndex], plotStyles3[0], label="Trees Size >= 1")
        #plt.plot(absDayList, statsArray[:, graphStats.numNonSingletonTreesIndex], plotStyles3[1], label="Trees Size >= 2")
        plotRealConfigError(graphStats.numTreesIndex, plotStyles3[0], plotStyles5[0], "Trees size >= 1", "CM trees size >= 1")
        plotRealConfigError(graphStats.numNonSingletonTreesIndex, plotStyles3[0], plotStyles5[0], "Trees size >= 2", "CM trees size >= 2")
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("No. trees")
        plt.legend(loc="upper left")
        plt.savefig(figureDir + "NumTreesGrowth.eps")
        plotInd += 1

        for k in range(len(dayList)):
            day = dayList[k]
            print(str(DateUtils.getDateStrFromDay(day, startYear)) + ": " + str(statsArray[k, graphStats.numTreesIndex]))
            print(str(DateUtils.getDateStrFromDay(day, startYear)) + ": " + str(configStatsArray[k, graphStats.numTreesIndex]))


        #Load stats from a file to get the max tree from its root 
        resultsFilename = resultsDir + "treeSizesDepths.npz"
        file = open(resultsFilename, 'r')
        arrayDict = numpy.load(file)
        statsArray[:, graphStats.maxTreeDepthIndex] = arrayDict["arr_0"]
        statsArray[:, graphStats.maxTreeSizeIndex] = arrayDict["arr_1"]
        statsArray[:, graphStats.secondTreeDepthIndex] = arrayDict["arr_2"]
        statsArray[:, graphStats.secondTreeSizeIndex] = arrayDict["arr_3"]

        plt.figure(plotInd)
        plotRealConfigError(graphStats.maxTreeSizeIndex, plotStyles3[0], plotStyles5[0], "Max tree", "CM max tree")
        plotRealConfigError(graphStats.secondTreeSizeIndex, plotStyles3[1], plotStyles5[1], "2nd tree", "CM 2nd tree")
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("Size")
        plt.legend(loc="upper left")
        plt.savefig(figureDir + "MaxTreeGrowth.eps")
        plotInd += 1

        plt.figure(plotInd)
        plotRealConfigError(graphStats.maxTreeDepthIndex, plotStyles3[0], plotStyles5[0], "Max tree", "CM max tree")
        plotRealConfigError(graphStats.secondTreeDepthIndex, plotStyles3[1], plotStyles5[1], "2nd tree", "CM 2nd tree")
        #plt.plot(absDayList, statsArray[:, graphStats.maxTreeDepthIndex], plotStyles3[0], absDayList, statsArray[:, graphStats.secondTreeDepthIndex], plotStyles3[1] )
        #plt.plot(absDayList, configStatsArray[:, graphStats.maxTreeDepthIndex], plotStyles4[0], absDayList, configStatsArray[:, graphStats.secondTreeDepthIndex], plotStyles4[1])
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("Depth")
        plt.legend(loc="lower right")
        plt.savefig(figureDir + "MaxTreeDepthGrowth.eps")
        
        plotInd += 1
예제 #12
0
def plotTreeStats():
    logging.info("Computing tree stats")
    resultsFileName = resultsDir + "InfectGrowthTreeStats.pkl"

    if saveResults:
        statsDictList = []

        for j in range(len(subgraphIndicesList2)):
            Util.printIteration(j, 1, len(subgraphIndicesList2))
            subgraphIndices = subgraphIndicesList2[j]
            subgraph = sGraph.subgraph(subgraphIndices)
            logging.info("Finding trees")
            trees = subgraph.findTrees()
            logging.info("Computing tree statistics")
            statsDict = {}

            locationEntropy = []
            orientEntropy = []
            detectionRanges = []

            for i in range(len(trees)):
                if len(trees[i]) > 1:
                    treeGraph = subgraph.subgraph(trees[i])
                    vertexArray = treeGraph.getVertexList().getVertices(list(range(treeGraph.getNumVertices())))
                    
                    locationEntropy.append(Util.entropy(vertexArray[:, locationIndex]))
                    orientEntropy.append(Util.entropy(vertexArray[:, orientationIndex]))
                    
                    detections = vertexArray[:, detectionIndex]
                    detectionRanges.append(numpy.max(detections) - numpy.min(detections))

            statsDict["locationEnt"] = numpy.array(locationEntropy)
            statsDict["orientEnt"] = numpy.array(orientEntropy)
            statsDict["detectRanges"] = numpy.array(detectionRanges)
            statsDictList.append(statsDict)

        Util.savePickle(statsDictList, resultsFileName, True)
    else:
        statsDictList = Util.loadPickle(resultsFileName)
        
        locBins = numpy.arange(0, 2.4, 0.2)
        detectBins = numpy.arange(0, 6500, 500)
        locationEntDists = []
        orientEntDists = []
        detectionDists = [] 

        for j in range(0, len(dayList2)):
            dateStr = (str(DateUtils.getDateStrFromDay(dayList2[j], startYear)))
            logging.info(dateStr)
            statsDict = statsDictList[j]
            plotInd2 = plotInd

            locationEntDists.append(statsDict["locationEnt"])
            orientEntDists.append(statsDict["orientEnt"])
            detectionDists.append(statsDict["detectRanges"])

        #for j in range(len(orientEntDists)):
        #    print(numpy.sum(numpy.histogram(orientEntDists[j])[0]))
        #    print(numpy.histogram(orientEntDists[j])[0]/float(orientEntDists[j].shape[0]))

        dateStrs = [DateUtils.getDateStrFromDay(dayList2[i], startYear) for i in range(1, len(dayList2))]

        plt.figure(plotInd2)
        histOut = plt.hist(locationEntDists, locBins, normed=True)
        plt.xlabel("Location Entropy")
        plt.ylabel("Probability Density")
        plt.savefig(figureDir + "LocationEnt" +  ".eps")
        #plt.legend()
        plotInd2 += 1

        plt.figure(plotInd2)
        histOut = plt.hist(orientEntDists, normed=True)
        plt.xlabel("Orientation Entropy")
        plt.ylabel("Probability Density")
        plt.savefig(figureDir + "OrientEnt" +  ".eps")
        #plt.legend()
        plotInd2 += 1

        plt.figure(plotInd2)
        histOut = plt.hist(detectionDists, detectBins, normed=True)
        plt.xlabel("Detection Range (days)")
        plt.ylabel("Probability Density")
        plt.savefig(figureDir + "DetectionRanges" +  ".eps")
        #plt.legend()
        plotInd2 += 1
예제 #13
0
          
        #for line in outputLists:  
         #fich.write(line[i]) 
        #Ajout du score de l'expertise
        #outputLists.append(expertAuthorsInds)

         
        itemList = RankAggregator.generateItemList(outputLists)
        methodNames = graphRanker.getNames()
        
        if runLSI: 
            outputFilename = dataset.getOutputFieldDir(field) + "outputListsLSI.npz"
        else: 
            outputFilename = dataset.getOutputFieldDir(field) + "outputListsLDA.npz"
            
        Util.savePickle([outputLists, trainExpertMatchesInds, testExpertMatchesInds], outputFilename, debug=True)
        
        numMethods = len(outputLists)
        precisions = numpy.zeros((len(ns), numMethods))
        averagePrecisions = numpy.zeros(numMethods)
        
        for i, n in enumerate(ns):     
            for j in range(len(outputLists)): 
                precisions[i, j] = Evaluator.precisionFromIndLists(testExpertMatchesInds, outputLists[j][0:n]) 
            
        for j in range(len(outputLists)):                 
            averagePrecisions[j] = Evaluator.averagePrecisionFromLists(testExpertMatchesInds, outputLists[j][0:averagePrecisionN], averagePrecisionN) 
        
        precisions2 = numpy.c_[numpy.array(ns), precisions]
        
        logging.debug(Latex.listToRow(methodNames))
예제 #14
0
def plotVectorStats():
    #Finally, compute some vector stats at various points in the graph
    logging.info("Computing vector stats")
    global plotInd
    resultsFileName = resultsDir + "ContactGrowthVectorStats.pkl"

    if saveResults:
        statsDictList = graphStats.sequenceVectorStats(sGraph, subgraphIndicesList2)
        Util.savePickle(statsDictList, resultsFileName, False)
    else:
        statsDictList = Util.loadPickle(resultsFileName)

        #Load up configuration model results
        configStatsDictList = []
        resultsFileNameBase = resultsDir + "ConfigGraphVectorStats"

        for j in range(numConfigGraphs):
            resultsFileName = resultsFileNameBase + str(j)
            configStatsDictList.append(Util.loadPickle(resultsFileName))

        #Now need to take mean of 1st element of list
        meanConfigStatsDictList = configStatsDictList[0]
        for i in range(len(configStatsDictList[0])):
            for k in range(1, numConfigGraphs):
                for key in configStatsDictList[k][i].keys():
                    if configStatsDictList[k][i][key].shape[0] > meanConfigStatsDictList[i][key].shape[0]:
                        meanConfigStatsDictList[i][key] = numpy.r_[meanConfigStatsDictList[i][key], numpy.zeros(configStatsDictList[k][i][key].shape[0] - meanConfigStatsDictList[i][key].shape[0])]
                    elif configStatsDictList[k][i][key].shape[0] < meanConfigStatsDictList[i][key].shape[0]:
                        configStatsDictList[k][i][key] = numpy.r_[configStatsDictList[k][i][key], numpy.zeros(meanConfigStatsDictList[i][key].shape[0] - configStatsDictList[k][i][key].shape[0])]

                    meanConfigStatsDictList[i][key] += configStatsDictList[k][i][key]

            for key in configStatsDictList[0][i].keys():
                meanConfigStatsDictList[i][key] = meanConfigStatsDictList[i][key]/numConfigGraphs


        triangleDistArray = numpy.zeros((len(dayList2), 100))
        configTriangleDistArray = numpy.zeros((len(dayList2), 100))
        hopPlotArray = numpy.zeros((len(dayList2), 27))
        configHopPlotArray = numpy.zeros((len(dayList2), 30))
        componentsDistArray = numpy.zeros((len(dayList2), 3000))
        configComponentsDistArray = numpy.zeros((len(dayList2), 3000))
        numVerticesEdgesArray = numpy.zeros((len(dayList2), 2), numpy.int)
        numVerticesEdgesArray[:, 0] = [len(sgl) for sgl in subgraphIndicesList2]
        numVerticesEdgesArray[:, 1] = [sGraph.subgraph(sgl).getNumEdges() for sgl in subgraphIndicesList2]

        binWidths = numpy.arange(0, 0.50, 0.05)
        eigVectorDists = numpy.zeros((len(dayList2), binWidths.shape[0]-1), numpy.int)

        femaleSums = numpy.zeros(len(dayList2))
        maleSums = numpy.zeros(len(dayList2))
        heteroSums = numpy.zeros(len(dayList2))
        biSums = numpy.zeros(len(dayList2))

        contactSums = numpy.zeros(len(dayList2))
        nonContactSums = numpy.zeros(len(dayList2))
        donorSums = numpy.zeros(len(dayList2))
        randomTestSums = numpy.zeros(len(dayList2))
        stdSums = numpy.zeros(len(dayList2))
        prisonerSums = numpy.zeros(len(dayList2))
        recommendSums = numpy.zeros(len(dayList2))
        
        meanAges = numpy.zeros(len(dayList2))
        degrees = numpy.zeros((len(dayList2), 20))

        provinces = numpy.zeros((len(dayList2), 15))

        havanaSums = numpy.zeros(len(dayList2))
        villaClaraSums = numpy.zeros(len(dayList2))
        pinarSums = numpy.zeros(len(dayList2))
        holguinSums = numpy.zeros(len(dayList2))
        habanaSums = numpy.zeros(len(dayList2))
        sanctiSums = numpy.zeros(len(dayList2))

        meanDegrees = numpy.zeros(len(dayList2))
        stdDegrees = numpy.zeros(len(dayList2))

        #Note that death has a lot of missing values
        for j in range(len(dayList2)):
            dateStr = (str(DateUtils.getDateStrFromDay(dayList2[j], startYear)))
            logging.info(dateStr)
            statsDict = statsDictList[j]
            configStatsDict = meanConfigStatsDictList[j]

            degreeDist = statsDict["outDegreeDist"]
            degreeDist = degreeDist/float(numpy.sum(degreeDist))
            #Note that degree distribution for configuration graph will be identical 

            eigenDist = statsDict["eigenDist"]
            eigenDist = numpy.log(eigenDist[eigenDist>=10**-1])
            #configEigenDist = configStatsDict["eigenDist"]
            #configEigenDist = numpy.log(configEigenDist[configEigenDist>=10**-1])

            hopCount = statsDict["hopCount"]
            hopCount = numpy.log10(hopCount)
            hopPlotArray[j, 0:hopCount.shape[0]] = hopCount
            configHopCount = configStatsDict["hopCount"]
            configHopCount = numpy.log10(configHopCount)
            #configHopPlotArray[j, 0:configHopCount.shape[0]] = configHopCount

            triangleDist = statsDict["triangleDist"]
            #triangleDist = numpy.array(triangleDist, numpy.float64)/numpy.sum(triangleDist)
            triangleDist = numpy.array(triangleDist, numpy.float64)
            triangleDistArray[j, 0:triangleDist.shape[0]] = triangleDist
            configTriangleDist = configStatsDict["triangleDist"]
            configTriangleDist = numpy.array(configTriangleDist, numpy.float64)/numpy.sum(configTriangleDist)
            configTriangleDistArray[j, 0:configTriangleDist.shape[0]] = configTriangleDist

            maxEigVector = statsDict["maxEigVector"]
            eigenvectorInds = numpy.flipud(numpy.argsort(numpy.abs(maxEigVector)))
            top10eigenvectorInds = eigenvectorInds[0:numpy.round(eigenvectorInds.shape[0]/10.0)]
            maxEigVector = numpy.abs(maxEigVector[eigenvectorInds])
            #print(maxEigVector)
            eigVectorDists[j, :] = numpy.histogram(maxEigVector, binWidths)[0]

            componentsDist = statsDict["componentsDist"]
            componentsDist = numpy.array(componentsDist, numpy.float64)/numpy.sum(componentsDist)
            componentsDistArray[j, 0:componentsDist.shape[0]] = componentsDist
            configComponentsDist = configStatsDict["componentsDist"]
            configComponentsDist = numpy.array(configComponentsDist, numpy.float64)/numpy.sum(configComponentsDist)
            configComponentsDistArray[j, 0:configComponentsDist.shape[0]] = configComponentsDist

            plotInd2 = plotInd

            plt.figure(plotInd2)
            plt.plot(numpy.arange(degreeDist.shape[0]), degreeDist, plotStyles2[j], label=dateStr)
            plt.xlabel("Degree")
            plt.ylabel("Probability")
            plt.ylim((0, 0.5))
            plt.savefig(figureDir + "DegreeDist" +  ".eps")
            plt.legend()
            plotInd2 += 1

            """
            plt.figure(plotInd2)
            plt.plot(numpy.arange(eigenDist.shape[0]), eigenDist, label=dateStr)
            plt.xlabel("Eigenvalue rank")
            plt.ylabel("log(Eigenvalue)")
            plt.savefig(figureDir + "EigenDist" +  ".eps")
            plt.legend()
            plotInd2 += 1
            """

            #How does kleinberg do the hop plots 
            plt.figure(plotInd2)
            plt.plot(numpy.arange(hopCount.shape[0]), hopCount, plotStyles[j], label=dateStr)
            plt.xlabel("k")
            plt.ylabel("log10(pairs)")
            plt.ylim( (2.5, 7) )
            plt.legend(loc="lower right")
            plt.savefig(figureDir + "HopCount" + ".eps")
            plotInd2 += 1
            
            plt.figure(plotInd2)
            plt.plot(numpy.arange(maxEigVector.shape[0]), maxEigVector, plotStyles2[j], label=dateStr)
            plt.xlabel("Rank")
            plt.ylabel("log(eigenvector coefficient)")
            plt.savefig(figureDir + "MaxEigVector" +  ".eps")
            plt.legend()
            plotInd2 += 1

            #Compute some information the 10% most central vertices
            
            subgraphIndices = numpy.nonzero(detections <= dayList2[j])[0]
            subgraph = sGraph.subgraph(subgraphIndices)
            subgraphVertexArray = subgraph.getVertexList().getVertices()

            femaleSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, genderIndex]==1)
            maleSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, genderIndex]==0)
            heteroSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, orientationIndex]==0)
            biSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, orientationIndex]==1)

            contactSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, contactIndex])
            donorSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, donorIndex])
            randomTestSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, randomTestIndex])
            stdSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, stdIndex])
            prisonerSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, prisonerIndex])
            recommendSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, doctorIndex])

            meanAges[j] = numpy.mean(subgraphVertexArray[top10eigenvectorInds, detectionIndex] - subgraphVertexArray[top10eigenvectorInds, dobIndex])/daysInYear

            havanaSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, havanaIndex])
            villaClaraSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, villaClaraIndex])
            pinarSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, pinarIndex])
            holguinSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, holguinIndex])
            habanaSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, habanaIndex])
            sanctiSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, sanctiIndex])

            provinces[j, :] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, 22:37], 0)

            ddist = numpy.bincount(subgraph.outDegreeSequence()[top10eigenvectorInds])
            degrees[j, 0:ddist.shape[0]] = numpy.array(ddist, numpy.float)/numpy.sum(ddist)

            meanDegrees[j] = numpy.mean(subgraph.outDegreeSequence()[top10eigenvectorInds])
            stdDegrees[j] = numpy.std(subgraph.outDegreeSequence()[top10eigenvectorInds])


            plt.figure(plotInd2)
            plt.plot(numpy.arange(degrees[j, :].shape[0]), degrees[j, :], plotStyles2[j], label=dateStr)
            plt.xlabel("Degree")
            plt.ylabel("Probability")
            #plt.ylim((0, 0.5))
            plt.savefig(figureDir + "DegreeDistCentral" +  ".eps")
            plt.legend()
            plotInd2 += 1

        precision = 4
        dateStrList = [DateUtils.getDateStrFromDay(day, startYear) for day in dayList2]

        print("Hop counts")
        print(Latex.listToRow(dateStrList))
        print(Latex.array2DToRows(hopPlotArray.T))

        print("\nHop counts for configuration graphs")
        print(Latex.listToRow(dateStrList))
        print(Latex.array2DToRows(configHopPlotArray.T))

        print("\n\nEdges and vertices")
        print((Latex.listToRow(dateStrList)))
        print((Latex.array2DToRows(numVerticesEdgesArray.T, precision)))

        print("\n\nEigenvector distribution")
        print((Latex.array1DToRow(binWidths[1:]) + "\\\\"))
        print((Latex.array2DToRows(eigVectorDists)))

        print("\n\nDistribution of component sizes")
        componentsDistArray = componentsDistArray[:, 0:componentsDist.shape[0]]
        nonZeroCols = numpy.sum(componentsDistArray, 0)!=0
        componentsDistArray = numpy.r_[numpy.array([numpy.arange(componentsDistArray.shape[1])[nonZeroCols]]), componentsDistArray[:, nonZeroCols]]
        print((Latex.listToRow(dateStrList)))
        print((Latex.array2DToRows(componentsDistArray.T, precision)))

        print("\n\nDistribution of component sizes in configuration graphs")
        configComponentsDistArray = configComponentsDistArray[:, 0:configComponentsDist.shape[0]]
        nonZeroCols = numpy.sum(configComponentsDistArray, 0)!=0
        configComponentsDistArray = numpy.r_[numpy.array([numpy.arange(configComponentsDistArray.shape[1])[nonZeroCols]]), configComponentsDistArray[:, nonZeroCols]]
        print((Latex.listToRow(dateStrList)))
        print((Latex.array2DToRows(configComponentsDistArray.T, precision)))

        print("\n\nDistribution of triangle participations")
        triangleDistArray = triangleDistArray[:, 0:triangleDist.shape[0]]
        nonZeroCols = numpy.sum(triangleDistArray, 0)!=0
        triangleDistArray = numpy.r_[numpy.array([numpy.arange(triangleDistArray.shape[1])[nonZeroCols]])/2, triangleDistArray[:, nonZeroCols]]
        print((Latex.listToRow(dateStrList)))
        print((Latex.array2DToRows(triangleDistArray.T, precision)))

        configTriangleDistArray = configTriangleDistArray[:, 0:configTriangleDist.shape[0]]
        nonZeroCols = numpy.sum(configTriangleDistArray, 0)!=0
        configTriangleDistArray = numpy.r_[numpy.array([numpy.arange(configTriangleDistArray.shape[1])[nonZeroCols]])/2, configTriangleDistArray[:, nonZeroCols]]
        configTriangleDistArray = numpy.c_[configTriangleDistArray, numpy.zeros((configTriangleDistArray.shape[0], triangleDistArray.shape[1]-configTriangleDistArray.shape[1]))]

        print("\n\nDistribution of central vertices")
        print((Latex.listToRow(dateStrList)))
        subgraphSizes = numpy.array(maleSums + femaleSums, numpy.float)
        print("Female & " + Latex.array1DToRow(femaleSums*100/subgraphSizes, 1) + "\\\\")
        print("Male & " + Latex.array1DToRow(maleSums*100/subgraphSizes, 1) + "\\\\")
        print("\hline")
        print("Heterosexual & " + Latex.array1DToRow(heteroSums*100/subgraphSizes, 1) + "\\\\")
        print("Bisexual & " + Latex.array1DToRow(biSums*100/subgraphSizes, 1) + "\\\\")
        print("\hline")
        print("Contact traced & " + Latex.array1DToRow(contactSums*100/subgraphSizes, 1) + "\\\\")
        print("Blood donor & " + Latex.array1DToRow(donorSums*100/subgraphSizes, 1) + "\\\\")
        print("RandomTest & " + Latex.array1DToRow(randomTestSums*100/subgraphSizes, 1) + "\\\\")
        print("STD & " + Latex.array1DToRow(stdSums*100/subgraphSizes, 1) + "\\\\")
        print("Prisoner & " + Latex.array1DToRow(prisonerSums*100/subgraphSizes, 1) + "\\\\")
        print("Doctor recommendation & " + Latex.array1DToRow(recommendSums*100/subgraphSizes, 1) + "\\\\")
        print("\hline")
        print("Mean ages (years) & " + Latex.array1DToRow(meanAges, 2) + "\\\\")
        print("\hline")
        print("Holguin & " + Latex.array1DToRow(holguinSums*100/subgraphSizes, 1) + "\\\\")
        print("La Habana & " + Latex.array1DToRow(habanaSums*100/subgraphSizes, 1) + "\\\\")
        print("Havana City & " + Latex.array1DToRow(havanaSums*100/subgraphSizes, 1) + "\\\\")
        print("Pinar del Rio & " + Latex.array1DToRow(pinarSums*100/subgraphSizes, 1) + "\\\\")
        print("Sancti Spiritus & " + Latex.array1DToRow(sanctiSums*100/subgraphSizes, 1) + "\\\\")
        print("Villa Clara & " + Latex.array1DToRow(villaClaraSums*100/subgraphSizes, 1) + "\\\\")
        print("\hline")
        print("Mean degrees & " + Latex.array1DToRow(meanDegrees, 2) + "\\\\")
        print("Std degrees & " + Latex.array1DToRow(stdDegrees, 2) + "\\\\")
        
        print("\n\nProvinces")
        print(Latex.array2DToRows(provinces))

        print("\n\nDegree distribution")
        print(Latex.array2DToRows(degrees))
예제 #15
0
def plotScalarStats():
    logging.info("Computing scalar stats")

    resultsFileName = resultsDir + "ContactGrowthScalarStats.pkl"

    if saveResults:
        statsArray = graphStats.sequenceScalarStats(sGraph, subgraphIndicesList, slowStats)
        Util.savePickle(statsArray, resultsFileName, True)

        #Now compute statistics on the configuration graphs 
    else:
        statsArray = Util.loadPickle(resultsFileName)

        #Take the mean of the results over the configuration model graphs
        resultsFileNameBase = resultsDir + "ConfigGraphScalarStats"
        numGraphs = len(subgraphIndicesList)
        #configStatsArrays = numpy.zeros((numGraphs, graphStats.getNumStats(), numConfigGraphs))
        configStatsArrays = numpy.zeros((numGraphs, graphStats.getNumStats()-2, numConfigGraphs))

        for j in range(numConfigGraphs):
            resultsFileName = resultsFileNameBase + str(j)
            configStatsArrays[:, :, j] = Util.loadPickle(resultsFileName)

        configStatsArray = numpy.mean(configStatsArrays, 2)
        configStatsStd =  numpy.std(configStatsArrays, 2)
        global plotInd

        def plotRealConfigError(index, styleReal, styleConfig, realLabel, configLabel):
            plt.hold(True)
            plt.plot(absDayList, statsArray[:, index], styleReal, label=realLabel)
            #errors = numpy.c_[configStatsArray[:, index]-configStatsMinArray[:, index] , configStatsMaxArray[:, index]-configStatsArray[:, index]].T
            errors = numpy.c_[configStatsStd[:, index], configStatsStd[:, index]].T
            plt.plot(absDayList, configStatsArray[:, index], styleConfig, label=configLabel)
            plt.errorbar(absDayList, configStatsArray[:, index], errors, linewidth=0, elinewidth=1, label="_nolegend_", ecolor="red")

            xmin, xmax = plt.xlim()
            plt.xlim((0, xmax))
            ymin, ymax = plt.ylim()
            plt.ylim((0, ymax))


        #Output all the results into plots
        plt.figure(plotInd)
        plt.hold(True)
        plotRealConfigError(graphStats.maxComponentSizeIndex, plotStyleBW[0], plotStyles4[0], "Max comp. vertices", "CM max comp. vertices")
        plotRealConfigError(graphStats.maxComponentEdgesIndex, plotStyleBW[1], plotStyles4[1], "Max comp. edges", "CM max comp. edges")
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("No. vertices/edges")
        plt.legend(loc="upper left")
        plt.savefig(figureDir + "MaxComponentSizeGrowth.eps")
        plotInd += 1

        for k in range(len(dayList)):
            day = dayList[k]
            print(str(DateUtils.getDateStrFromDay(day, startYear)) + ": " + str(statsArray[k, graphStats.maxComponentEdgesIndex]))
            #print(str(DateUtils.getDateStrFromDay(day, startYear)) + ": " + str(configStatsArray[k, graphStats.numComponentsIndex]))

        plt.figure(plotInd)
        plotRealConfigError(graphStats.numComponentsIndex, plotStyleBW[0], plotStyles4[0], "Size >= 1", "CM size >= 1")
        plotRealConfigError(graphStats.numNonSingletonComponentsIndex, plotStyleBW[1], plotStyles4[1], "Size >= 2", "CM size >= 2")
        plotRealConfigError(graphStats.numTriOrMoreComponentsIndex, plotStyleBW[2], plotStyles4[2], "Size >= 3", "CM size >= 3")

        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("No. components")
        plt.legend(loc="upper left")
        plt.savefig(figureDir + "NumComponentsGrowth.eps")
        plotInd += 1

        plt.figure(plotInd)
        plotRealConfigError(graphStats.meanComponentSizeIndex, plotStyleBW[0], plotStyles4[0], "Real graph", "CM")
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("Mean component size")
        plt.legend(loc="lower right")
        plt.savefig(figureDir + "MeanComponentSizeGrowth.eps")
        plotInd += 1

        plt.figure(plotInd)
        plotRealConfigError(graphStats.diameterIndex, plotStyleBW[0], plotStyles4[0], "Real graph", "CM")
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("Max component diameter")
        plt.legend(loc="lower right")
        plt.savefig(figureDir + "MaxComponentDiameterGrowth.eps")
        plotInd += 1

        plt.figure(plotInd)
        plotRealConfigError(graphStats.effectiveDiameterIndex, plotStyleBW[0], plotStyles4[0], "Real graph", "CM")
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("Effective diameter")
        plt.legend(loc="lower right")
        plt.savefig(figureDir + "MaxComponentEffDiameterGrowth.eps")
        plotInd += 1

        plt.figure(plotInd)
        plotRealConfigError(graphStats.meanDegreeIndex, plotStyleBW[0], plotStyles4[0], "All vertices", "CM all vertices")
        plotRealConfigError(graphStats.maxCompMeanDegreeIndex, plotStyleBW[1], plotStyles4[1], "Max component", "CM max component")
        #plt.plot(absDayList, statsArray[:, graphStats.meanDegreeIndex], plotStyleBW[0], absDayList, statsArray[:, graphStats.maxCompMeanDegreeIndex], plotStyleBW[1], absDayList, configStatsArray[:, graphStats.meanDegreeIndex], plotStyles4[0], absDayList, configStatsArray[:, graphStats.maxCompMeanDegreeIndex], plotStyles4[1])
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("Mean degree")
        plt.legend(loc="lower right")
        plt.savefig(figureDir + "MeanDegrees.eps")
        plotInd += 1

        plt.figure(plotInd)
        plotRealConfigError(graphStats.densityIndex, plotStyleBW[0], plotStyles4[0], "Real Graph", "Config Model")
        #plt.plot(absDayList, statsArray[:, graphStats.densityIndex], plotStyleBW[0], absDayList, configStatsArray[:, graphStats.densityIndex], plotStyles4[0])
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("Density")
        plt.legend()
        plt.savefig(figureDir + "DensityGrowth.eps")
        plotInd += 1

        plt.figure(plotInd)
        plt.plot(absDayList, statsArray[:, graphStats.powerLawIndex], plotStyleBW[0])
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("Alpha")
        plt.savefig(figureDir + "PowerLawGrowth.eps")
        plotInd += 1

        plt.figure(plotInd)
        plotRealConfigError(graphStats.geodesicDistanceIndex, plotStyleBW[0], plotStyles4[0], "Real Graph", "Config Model")
        #plt.plot(absDayList, statsArray[:, graphStats.geodesicDistanceIndex], plotStyleBW[0], absDayList, configStatsArray[:, graphStats.geodesicDistanceIndex], plotStyles4[0])
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("Geodesic distance")
        plt.legend(loc="lower right")
        plt.savefig(figureDir + "GeodesicGrowth.eps")
        plotInd += 1

        plt.figure(plotInd)
        plotRealConfigError(graphStats.harmonicGeoDistanceIndex, plotStyleBW[0], plotStyles4[0], "Real Graph", "Config Model")
        #plt.plot(absDayList, statsArray[:, graphStats.harmonicGeoDistanceIndex], plotStyleBW[0], absDayList, configStatsArray[:, graphStats.harmonicGeoDistanceIndex], plotStyles4[0])
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("Mean harmonic geodesic distance")
        plt.legend(loc="upper right")
        plt.savefig(figureDir + "HarmonicGeodesicGrowth.eps")
        plotInd += 1

        #print(statsArray[:, graphStats.harmonicGeoDistanceIndex])

        plt.figure(plotInd)
        plotRealConfigError(graphStats.geodesicDistMaxCompIndex, plotStyleBW[0], plotStyles4[0], "Real graph", "Config model")
        #plt.plot(absDayList, statsArray[:, graphStats.geodesicDistMaxCompIndex], plotStyleBW[0], absDayList, configStatsArray[:, graphStats.geodesicDistMaxCompIndex], plotStyles4[0])
        plt.xticks(locs, labels)
        plt.xlabel("Year")
        plt.ylabel("Max component mean geodesic distance")
        plt.legend(loc="lower right")
        plt.savefig(figureDir + "MaxCompGeodesicGrowth.eps")
        plotInd += 1

        #Find the number of edges in the infection graph
        resultsFileName = resultsDir + "InfectGrowthScalarStats.pkl"
        infectStatsArray = Util.loadPickle(resultsFileName)

        #Make sure we don't include 0 in the array
        vertexIndex = numpy.argmax(statsArray[:, graphStats.numVerticesIndex] > 0)
        edgeIndex = numpy.argmax(infectStatsArray[:, graphStats.numEdgesIndex] > 0)
        minIndex = numpy.maximum(vertexIndex, edgeIndex)

        plt.figure(plotInd)
        plt.plot(numpy.log(statsArray[minIndex:, graphStats.numVerticesIndex]), numpy.log(statsArray[minIndex:, graphStats.numEdgesIndex]), plotStyleBW[0])
        plt.plot(numpy.log(infectStatsArray[minIndex:, graphStats.numVerticesIndex]), numpy.log(infectStatsArray[minIndex:, graphStats.numEdgesIndex]), plotStyleBW[1])
        plt.plot(numpy.log(statsArray[minIndex:, graphStats.maxComponentSizeIndex]), numpy.log(statsArray[minIndex:, graphStats.maxComponentEdgesIndex]), plotStyleBW[2])
        plt.xlabel("log(|V|)")
        plt.ylabel("log(|E|)/log(|D|)")
        plt.legend(("Contact graph", "Infection graph", "Max component"), loc="upper left")
        plt.savefig(figureDir + "LogVerticesEdgesGrowth.eps")
        plotInd += 1

    results = statsArray[:, graphStats.effectiveDiameterIndex] 
    results = numpy.c_[results, configStatsArray[:, graphStats.effectiveDiameterIndex]]
    results = numpy.c_[results, statsArray[:, graphStats.geodesicDistMaxCompIndex]]
    results = numpy.c_[results, configStatsArray[:, graphStats.geodesicDistMaxCompIndex]]
    configStatsArray

    print("\n\n")
    print(Latex.listToRow(["Diameter", "CM Diameter", "Mean Geodesic", "CM Mean Geodesic"]))
    print("\\hline")
    for i in range(0, len(dayList), 4):
        day = dayList[i]
        print(str(DateUtils.getDateStrFromDay(day, startYear)) + " & " + Latex.array1DToRow(results[i, :]) + "\\\\")
예제 #16
0
def plotVertexStats():
    #Calculate all vertex statistics
    logging.info("Computing vertex stats")
    
    #Indices
    numContactsIndex = fInds["numContacts"]
    numTestedIndex = fInds["numTested"]
    numPositiveIndex = fInds["numPositive"]

    #Properties of vertex values
    detectionAges = []
    deathAfterInfectAges = []
    deathAges = []
    homoMeans = []

    maleSums = []
    femaleSums = []
    heteroSums = []
    biSums = []

    contactMaleSums = []
    contactFemaleSums = []
    contactHeteroSums = []
    contactBiSums = []

    doctorMaleSums = []
    doctorFemaleSums = []
    doctorHeteroSums = []
    doctorBiSums = []

    contactSums = []
    nonContactSums = []
    donorSums = []
    randomTestSums = []
    stdSums = []
    prisonerSums = []
    recommendSums = []
    #This is: all detections - contact, donor, randomTest, str, recommend
    otherSums = []

    havanaSums = []
    villaClaraSums = []
    pinarSums = []
    holguinSums = []
    habanaSums = []
    sanctiSums = []

    numContactSums = []
    numTestedSums = []
    numPositiveSums = []

    #Total number of sexual contacts 
    numContactMaleSums = []
    numContactFemaleSums = []
    numContactHeteroSums = []
    numContactBiSums = []

    numTestedMaleSums = []
    numTestedFemaleSums = []
    numTestedHeteroSums = []
    numTestedBiSums = []

    numPositiveMaleSums = []
    numPositiveFemaleSums = []
    numPositiveHeteroSums = []
    numPositiveBiSums = []

    propPositiveMaleSums = []
    propPositiveFemaleSums = []
    propPositiveHeteroSums = []
    propPositiveBiSums = []

    numContactVertices = []
    numContactEdges = []
    numInfectEdges = []

    #Mean proportion of degree at end of epidemic 
    meanPropDegree = []
    finalDegreeSequence = numpy.array(sGraph.outDegreeSequence(), numpy.float) 

    degreeOneSums = []
    degreeTwoSums = []
    degreeThreePlusSums = []

    numProvinces = 15
    provinceArray = numpy.zeros((len(subgraphIndicesList), numProvinces))
    m = 0 

    for subgraphIndices in subgraphIndicesList: 
        subgraph = sGraph.subgraph(subgraphIndices)
        infectSubGraph = sGraphInfect.subgraph(subgraphIndices)

        subgraphVertexArray = subgraph.getVertexList().getVertices(range(subgraph.getNumVertices()))

        detectionAges.append(numpy.mean((subgraphVertexArray[:, detectionIndex] - subgraphVertexArray[:, dobIndex]))/daysInYear)
        deathAfterInfectAges.append((numpy.mean(subgraphVertexArray[:, deathIndex] - subgraphVertexArray[:, detectionIndex]))/daysInYear)
        deathAges.append(numpy.mean((subgraphVertexArray[:, deathIndex] - subgraphVertexArray[:, dobIndex]))/daysInYear)
        homoMeans.append(numpy.mean(subgraphVertexArray[:, orientationIndex]))

        nonContactSums.append(subgraphVertexArray.shape[0] - numpy.sum(subgraphVertexArray[:, contactIndex]))
        contactSums.append(numpy.sum(subgraphVertexArray[:, contactIndex]))
        donorSums.append(numpy.sum(subgraphVertexArray[:, donorIndex]))
        randomTestSums.append(numpy.sum(subgraphVertexArray[:, randomTestIndex]))
        stdSums.append(numpy.sum(subgraphVertexArray[:, stdIndex]))
        prisonerSums.append(numpy.sum(subgraphVertexArray[:, prisonerIndex]))
        recommendSums.append(numpy.sum(subgraphVertexArray[:, doctorIndex]))
        otherSums.append(subgraphVertexArray.shape[0] - numpy.sum(subgraphVertexArray[:, [contactIndex, donorIndex, randomTestIndex, stdIndex, doctorIndex]]))

        heteroSums.append(numpy.sum(subgraphVertexArray[:, orientationIndex]==0))
        biSums.append(numpy.sum(subgraphVertexArray[:, orientationIndex]==1))

        femaleSums.append(numpy.sum(subgraphVertexArray[:, genderIndex]==1))
        maleSums.append(numpy.sum(subgraphVertexArray[:, genderIndex]==0))

        contactHeteroSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, orientationIndex]==0, subgraphVertexArray[:, contactIndex])))
        contactBiSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, orientationIndex]==1, subgraphVertexArray[:, contactIndex])))
        contactFemaleSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, genderIndex]==1, subgraphVertexArray[:, contactIndex])))
        contactMaleSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, genderIndex]==0, subgraphVertexArray[:, contactIndex])))

        doctorHeteroSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, orientationIndex]==0, subgraphVertexArray[:, doctorIndex])))
        doctorBiSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, orientationIndex]==1, subgraphVertexArray[:, doctorIndex])))
        doctorFemaleSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, genderIndex]==1, subgraphVertexArray[:, doctorIndex])))
        doctorMaleSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, genderIndex]==0, subgraphVertexArray[:, doctorIndex])))

        havanaSums.append(numpy.sum(subgraphVertexArray[:, havanaIndex]==1))
        villaClaraSums.append(numpy.sum(subgraphVertexArray[:, villaClaraIndex]==1))
        pinarSums.append(numpy.sum(subgraphVertexArray[:, pinarIndex]==1))
        holguinSums.append(numpy.sum(subgraphVertexArray[:, holguinIndex]==1))
        habanaSums.append(numpy.sum(subgraphVertexArray[:, habanaIndex]==1))
        sanctiSums.append(numpy.sum(subgraphVertexArray[:, sanctiIndex]==1))

        numContactSums.append(numpy.mean(subgraphVertexArray[:, numContactsIndex]))
        numTestedSums.append(numpy.mean(subgraphVertexArray[:, numTestedIndex]))
        numPositiveSums.append(numpy.mean(subgraphVertexArray[:, numPositiveIndex]))

        numContactMaleSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, genderIndex]==0, numContactsIndex]))
        numContactFemaleSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, genderIndex]==1, numContactsIndex]))
        numContactHeteroSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, orientationIndex]==0, numContactsIndex]))
        numContactBiSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, orientationIndex]==1, numContactsIndex]))

        numTestedMaleSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, genderIndex]==0, numTestedIndex]))
        numTestedFemaleSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, genderIndex]==1, numTestedIndex]))
        numTestedHeteroSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, orientationIndex]==0, numTestedIndex]))
        numTestedBiSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, orientationIndex]==1, numTestedIndex]))

        numPositiveMaleSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, genderIndex]==0, numPositiveIndex]))
        numPositiveFemaleSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, genderIndex]==1, numPositiveIndex]))
        numPositiveHeteroSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, orientationIndex]==0, numPositiveIndex]))
        numPositiveBiSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, orientationIndex]==1, numPositiveIndex]))

        propPositiveMaleSums.append(numPositiveMaleSums[m]/float(numTestedMaleSums[m]))
        propPositiveFemaleSums.append(numPositiveFemaleSums[m]/float(numTestedFemaleSums[m]))
        propPositiveHeteroSums.append(numPositiveHeteroSums[m]/float(numTestedHeteroSums[m]))
        propPositiveBiSums.append(numPositiveBiSums[m]/float(numTestedMaleSums[m]))

        numContactVertices.append(subgraph.getNumVertices())
        numContactEdges.append(subgraph.getNumEdges())
        numInfectEdges.append(infectSubGraph.getNumEdges())

        nonZeroInds = finalDegreeSequence[subgraphIndices]!=0
        propDegrees = numpy.mean(subgraph.outDegreeSequence()[nonZeroInds]/finalDegreeSequence[subgraphIndices][nonZeroInds])
        meanPropDegree.append(numpy.mean(propDegrees)) 

        degreeOneSums.append(numpy.sum(subgraph.outDegreeSequence()==1))
        degreeTwoSums.append(numpy.sum(subgraph.outDegreeSequence()==2))
        degreeThreePlusSums.append(numpy.sum(subgraph.outDegreeSequence()>=3))

        provinceArray[m, :] = numpy.sum(subgraphVertexArray[:, fInds["CA"]:fInds['VC']+1], 0)
        m += 1 

    #Save some of the results for the ABC work
    numStats = 2 
    vertexStatsArray = numpy.zeros((len(subgraphIndicesList), numStats))
    vertexStatsArray[:, 0] = numpy.array(biSums)
    vertexStatsArray[:, 1] = numpy.array(heteroSums)

    resultsFileName = resultsDir + "ContactGrowthVertexStats.pkl"
    Util.savePickle(vertexStatsArray, resultsFileName)

    global plotInd 

    plt.figure(plotInd)
    plt.plot(absDayList, detectionAges)
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Detection Age (years)")
    plt.savefig(figureDir + "DetectionMeansGrowth.eps")
    plotInd += 1

    plt.figure(plotInd)
    plt.plot(absDayList, heteroSums, 'k-', absDayList, biSums, 'k--', absDayList, femaleSums, 'k-.', absDayList, maleSums, 'k:')
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Detections")
    plt.legend(("Heterosexual", "MSM", "Female", "Male"), loc="upper left")
    plt.savefig(figureDir + "OrientationGenderGrowth.eps")
    plotInd += 1

    plt.figure(plotInd)
    plt.plot(absDayList, contactHeteroSums, 'k-', absDayList, contactBiSums, 'k--', absDayList, contactFemaleSums, 'k-.', absDayList, contactMaleSums, 'k:')
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Contact tracing detections")
    plt.legend(("Heterosexual", "MSM", "Female", "Male"), loc="upper left")
    plt.savefig(figureDir + "OrientationGenderContact.eps")
    plotInd += 1

    plt.figure(plotInd)
    plt.plot(absDayList, doctorHeteroSums, 'k-', absDayList, doctorBiSums, 'k--', absDayList, doctorFemaleSums, 'k-.', absDayList, doctorMaleSums, 'k:')
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Doctor recommendation detections")
    plt.legend(("Heterosexual", "MSM", "Female", "Male"), loc="upper left")
    plt.savefig(figureDir + "OrientationGenderDoctor.eps")
    plotInd += 1



    #Plot all the provinces 
    plt.figure(plotInd)
    plt.hold(True)
    for k in range(provinceArray.shape[1]):
        plt.plot(absDayList, provinceArray[:, k], label=str(k))
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Detections")
    plt.legend(loc="upper left")
    plotInd += 1 

    #Plot of detection types
    plt.figure(plotInd)
    plt.plot(absDayList, contactSums, plotStyles2[0], absDayList, donorSums, plotStyles2[1], absDayList, randomTestSums, plotStyles2[2], absDayList, stdSums, plotStyles2[3], absDayList, otherSums, plotStyles2[4], absDayList, recommendSums, plotStyles2[5])
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Detections")
    plt.legend(("Contact tracing", "Blood donation", "Random test", "STD", "Other test", "Doctor recommendation"), loc="upper left")
    plt.savefig(figureDir + "DetectionGrowth.eps")
    plotInd += 1

    plt.figure(plotInd)
    plt.plot(absDayList, numContactSums, plotStyleBW[0], absDayList, numTestedSums, plotStyleBW[1], absDayList, numPositiveSums, plotStyleBW[2])
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Contacts")
    plt.legend(("No. contacts", "No. tested", "No. positive"), loc="center left")
    plt.savefig(figureDir + "ContactsGrowth.eps")
    plotInd += 1

    plt.figure(plotInd)
    plt.plot(absDayList, numContactHeteroSums, plotStyleBW[0], absDayList, numContactBiSums, plotStyleBW[1], absDayList, numContactFemaleSums, plotStyleBW[2], absDayList, numContactMaleSums, plotStyleBW[3])
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Total contacts")
    plt.legend(("Heterosexual", "MSM", "Female", "Male"), loc="upper right")
    plt.savefig(figureDir + "ContactsGrowthOrientGen.eps")
    plotInd += 1

    plt.figure(plotInd)
    plt.plot(absDayList, numTestedHeteroSums, plotStyleBW[0], absDayList, numTestedBiSums, plotStyleBW[1], absDayList, numTestedFemaleSums, plotStyleBW[2], absDayList, numTestedMaleSums, plotStyleBW[3])
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Tested contacts")
    plt.legend(("Heterosexual", "MSM", "Female", "Male"), loc="upper right")
    plt.savefig(figureDir + "TestedGrowthOrientGen.eps")
    plotInd += 1

    plt.figure(plotInd)
    plt.plot(absDayList, numPositiveHeteroSums, plotStyleBW[0], absDayList, numPositiveBiSums, plotStyleBW[1], absDayList, numPositiveFemaleSums, plotStyleBW[2], absDayList, numPositiveMaleSums, plotStyleBW[3])
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Positive contacts")
    plt.legend(("Heterosexual", "MSM", "Female", "Male"), loc="upper right")
    plt.savefig(figureDir + "PositiveGrowthOrientGen.eps")
    plotInd += 1

    #Proportion positive versus tested
    plt.figure(plotInd)
    plt.plot(absDayList, propPositiveHeteroSums, plotStyleBW[0], absDayList, propPositiveBiSums, plotStyleBW[1], absDayList, propPositiveFemaleSums, plotStyleBW[2], absDayList, propPositiveMaleSums, plotStyleBW[3])
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Proportion positive contacts")
    plt.legend(("Heterosexual", "MSM", "Female", "Male"), loc="upper right")
    plt.savefig(figureDir + "PercentPositiveGrowthOrientGen.eps")
    plotInd += 1

    plt.figure(plotInd)
    plt.hold(True)
    plt.plot(absDayList, havanaSums, plotStyles2[0])
    plt.plot(absDayList, villaClaraSums, plotStyles2[1])
    plt.plot(absDayList, pinarSums, plotStyles2[2])
    plt.plot(absDayList, holguinSums, plotStyles2[3])
    plt.plot(absDayList, habanaSums, plotStyles2[4])
    plt.plot(absDayList, sanctiSums, plotStyles2[5])
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Detections")
    plt.legend(("Havana City", "Villa Clara", "Pinar del Rio", "Holguin", "La Habana", "Sancti Spiritus"), loc="upper left")
    plt.savefig(figureDir + "ProvinceGrowth.eps")
    plotInd += 1

    plt.figure(plotInd)
    plt.plot(absDayList, numContactVertices, plotStyleBW[0], absDayList, numContactEdges, plotStyleBW[1], absDayList, numInfectEdges, plotStyleBW[2])
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Vertices/edges")
    plt.legend(("Contact vertices", "Contact edges", "Infect edges"), loc="upper left")
    plt.savefig(figureDir + "VerticesEdges.eps")
    plotInd += 1

    plt.figure(plotInd)
    plt.plot(absDayList, meanPropDegree, plotStyleBW[0])
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Proportion of final degree")
    plt.savefig(figureDir + "MeanPropDegree.eps")
    plotInd += 1

    plt.figure(plotInd)
    plt.plot(absDayList, degreeOneSums, plotStyleBW[0], absDayList, degreeTwoSums, plotStyleBW[1], absDayList, degreeThreePlusSums, plotStyleBW[2])
    plt.xticks(locs, labels)
    plt.xlabel("Year")
    plt.ylabel("Detections")
    plt.legend(("Degree = 1", "Degree = 2", "Degree >= 3"), loc="upper left")
    plotInd += 1

    #Print a table of interesting stats
    results = numpy.array([havanaSums])
    results = numpy.r_[results, numpy.array([villaClaraSums])]
    results = numpy.r_[results, numpy.array([pinarSums])]
    results = numpy.r_[results, numpy.array([holguinSums])]
    results = numpy.r_[results, numpy.array([habanaSums])]
    results = numpy.r_[results, numpy.array([sanctiSums])]

    print(Latex.listToRow(["Havana City", "Villa Clara", "Pinar del Rio", "Holguin", "La Habana", "Sancti Spiritus"]))
    print("\\hline")
    for i in range(0, len(dayList), 4):
        day = dayList[i]
        print(str(DateUtils.getDateStrFromDay(day, startYear)) + " & " + Latex.array1DToRow(results[:, i].T) + "\\\\")

    results = numpy.array([heteroSums])
    results = numpy.r_[results, numpy.array([biSums])]
    results = numpy.r_[results, numpy.array([femaleSums])]
    results = numpy.r_[results, numpy.array([maleSums])]

    print("\n\n")
    print(Latex.listToRow(["Heterosexual", "MSM", "Female", "Male"]))
    print("\\hline")
    for i in range(0, len(dayList), 4):
        day = dayList[i]
        print(str(DateUtils.getDateStrFromDay(day, startYear)) + " & " + Latex.array1DToRow(results[:, i].T) + "\\\\")