def testListToLatex(self): lst = ["one", "two", "three"] self.assertEquals(Latex.listToRow(lst), "one & two & three\\\\") lst = [] self.assertEquals(Latex.listToRow(lst), "")
def summary(datasetNames, sampleSizes, foldsSet, cvScalings, sampleMethods, fileNameSuffix, gridResultsSuffix="GridResults"): """ Print the errors for all results plus a summary. """ numMethods = (1+(cvScalings.shape[0]+1)) numDatasets = len(datasetNames) overallErrors = numpy.zeros((numDatasets, len(sampleMethods), sampleSizes.shape[0], foldsSet.shape[0], numMethods)) overallStdWins = numpy.zeros((len(sampleMethods), len(sampleSizes), foldsSet.shape[0], numMethods+1, 3), numpy.int) overallErrorsPerSampMethod = numpy.zeros((numDatasets, len(sampleMethods), len(sampleSizes), numMethods), numpy.float) table1 = "" table2 = "" table3 = "" for i in range(len(datasetNames)): table3Error = numpy.zeros((2, len(sampleMethods))) table3Stds = numpy.zeros((2, len(sampleMethods))) for j in range(len(sampleMethods)): print("="*50 + "\n" + datasetNames[i] + "-" + sampleMethods[j] + "\n" + "="*50 ) outfileName = outputDir + datasetNames[i] + sampleMethods[j] + fileNameSuffix + ".npz" try: data = numpy.load(outfileName) errors = data["arr_0"] params = data["arr_1"] meanErrorGrids = data["arr_2"] stdErrorGrids = data["arr_3"] meanApproxGrids = data["arr_4"] stdApproxGrids = data["arr_5"] #Load ideal results outfileName = outputDir + datasetNames[i] + gridResultsSuffix + ".npz" data = numpy.load(outfileName) idealErrors = data["arr_0"] errorTable, meanErrors, stdErrors = getLatexTable(errors, cvScalings, idealErrors) wins = getWins(errors) idealWins = getIdealWins(errors, idealErrors) excessError = numpy.zeros(errors.shape) for k in range(errors.shape[1]): excessError[:, k, :, :] = errors[:, k, :, :] - numpy.tile(errors[:, k, :, 0, numpy.newaxis], (1, 1, numMethods)) meanExcessError = numpy.mean(excessError, 0) stdExcessError = numpy.std(excessError, 0) excessErrorTable, meanExcessErrors, stdExcessErrors = getLatexTable(excessError, cvScalings, idealErrors) overallErrorsPerSampMethod[i, j, :, :] = numpy.mean(meanErrors, 1) overallErrors[i, j, :, :, :] = meanExcessError overallStdWins[j, :, :, 0:-1, :] += wins overallStdWins[j, :, :, -1, :] += idealWins print(errorTable) #print("Min error is: " + str(numpy.min(meanErrors))) #print("Max error is: " + str(numpy.max(meanErrors))) #print("Mean error is: " + str(numpy.mean(meanErrors)) + "\n") #This is a table with V=10, alpha=1 and CV sampling sliceFoldIndex = 0 print(meanErrors[0, 1, 0]) numSliceMethods = 3 table1Error = numpy.zeros(len(sampleSizes)*numSliceMethods) table1Std = numpy.zeros(len(sampleSizes)*numSliceMethods) for k in range(len(sampleSizes)): table1Error[k*numSliceMethods] = meanErrors[k, sliceFoldIndex, 0] table1Error[k*numSliceMethods+1] = meanErrors[k, sliceFoldIndex, 1] table1Error[k*numSliceMethods+2] = meanErrors[k, sliceFoldIndex, 4] table1Std[k*numSliceMethods] = stdErrors[k, sliceFoldIndex, 0] table1Std[k*numSliceMethods+1] = stdErrors[k, sliceFoldIndex, 1] table1Std[k*numSliceMethods+2] = stdErrors[k, sliceFoldIndex, 4] if j == 0: table1 += datasetNames[i] + " & " + Latex.array2DToRows(numpy.array([table1Error]), numpy.array([table1Std])) + "\n" #See how alpha varies with V=10, CV sampling table2Error = numpy.zeros(range(numMethods-2)) table2Std = numpy.zeros(range(numMethods-2)) for s in range(len(sampleSizes)): table2Error = meanErrors[s, sliceFoldIndex, 2:] table2Std = stdErrors[s, sliceFoldIndex, 2:] if j == 0: table2 += datasetNames[i] + " $m=" + str(sampleSizes[s]) + "$ & " + Latex.array2DToRows(numpy.array([table2Error]), numpy.array([table2Std])) + "\n" """ #See how each sample method effects CV and pen alpha=1 fourFoldIndex = 4 hundredMIndex = 1 table3Error[0, j] = meanErrors[hundredMIndex, fourFoldIndex, 0] table3Error[1, j] = meanErrors[hundredMIndex, fourFoldIndex, 3] table3Stds[0, j] = stdErrors[hundredMIndex, fourFoldIndex, 0] table3Stds[1, j] = stdErrors[hundredMIndex, fourFoldIndex, 3] """ except IOError: print("Failed to open file: " + outfileName) table3 += Latex.addRowNames([datasetNames[i] + " Std ", datasetNames[i] + " PenVF "], Latex.array2DToRows(table3Error, table3Stds)) datasetMeanErrors = Latex.listToRow(sampleMethods) + "\n" for j in range(len(sampleSizes)): datasetMeanErrors += Latex.array2DToRows(overallErrorsPerSampMethod[i, :, j, :].T) + "\n" datasetMeanErrors = Latex.addRowNames(getRowNames(cvScalings), datasetMeanErrors) print(datasetMeanErrors) print("="*50 + "\n" + "Sliced Tables" + "\n" + "="*50) print(table1 + "\n") print(table2 + "\n") print(table3) print("="*50 + "\n" + "Overall" + "\n" + "="*50) overallMeanErrors = numpy.mean(overallErrors, 0) overallStdErrors = numpy.std(overallErrors, 0) for i in range(len(sampleMethods)): print("-"*20 + sampleMethods[i] + "-"*20) overallErrorTable = Latex.array1DToRow(foldsSet) + "\\\\ \n" overallWinsTable = Latex.array1DToRow(foldsSet) + " & Total & " +Latex.array1DToRow(foldsSet) + " & Total \\\\ \n" rowNames = getRowNames(cvScalings) for j in range(sampleSizes.shape[0]): overallErrorTable += Latex.array2DToRows(overallMeanErrors[i, j, :, :].T, overallStdErrors[i, j, :, :].T, bold=overallMeanErrors[i, j, :, :].T<0) + "\n" tiesWins = numpy.r_[overallStdWins[i, j, :, :, 0], overallStdWins[i, j, :, :, 1], overallStdWins[i, j, :, :, 2]] overallWinsTable += Latex.array2DToRows(tiesWins.T) + "\n" overallErrorTable = Latex.addRowNames(rowNames, overallErrorTable) rowNames = getRowNames(cvScalings, True) overallWinsTable = Latex.addRowNames(rowNames, overallWinsTable) print(Latex.latexTable(overallWinsTable, "Wins for " + sampleMethods[i], True)) print(Latex.latexTable(overallErrorTable.replace("0.", "."), "Excess errors for " + sampleMethods[i], True)) #print(overallWinsTable) #print(overallErrorTable) #Now print the mean errors for all datasets datasetMeanErrors = Latex.listToRow(sampleMethods) + "\n" overallErrorsPerSampMethod = numpy.mean(overallErrorsPerSampMethod[:, :, :, :], 0) for j in range(len(sampleSizes)): datasetMeanErrors += Latex.array2DToRows(overallErrorsPerSampMethod[:, j, :].T) + "\n" datasetMeanErrors = Latex.addRowNames(getRowNames(cvScalings), datasetMeanErrors) print(datasetMeanErrors)
def plotVectorStats(): #Finally, compute some vector stats at various points in the graph logging.info("Computing vector stats") global plotInd resultsFileName = resultsDir + "InfectGrowthVectorStats.pkl" if saveResults: statsDictList = graphStats.sequenceVectorStats(sGraph, subgraphIndicesList2, True) Util.savePickle(statsDictList, resultsFileName, True) else: statsDictList = Util.loadPickle(resultsFileName) treeSizesDistArray = numpy.zeros((len(dayList2), 3000)) treeDepthsDistArray = numpy.zeros((len(dayList2), 100)) numVerticesEdgesArray = numpy.zeros((len(dayList2), 2), numpy.int) numVerticesEdgesArray[:, 0] = [len(sgl) for sgl in subgraphIndicesList2] numVerticesEdgesArray[:, 1] = [sGraph.subgraph(sgl).getNumEdges() for sgl in subgraphIndicesList2] for j in range(len(dayList2)): dateStr = (str(DateUtils.getDateStrFromDay(dayList2[j], startYear))) logging.info(dateStr) statsDict = statsDictList[j] degreeDist = statsDict["outDegreeDist"] degreeDist = degreeDist/float(numpy.sum(degreeDist)) maxEigVector = statsDict["maxEigVector"] maxEigVector = numpy.flipud(numpy.sort(numpy.abs(maxEigVector))) maxEigVector = numpy.log(maxEigVector[maxEigVector>0]) treeSizesDist = statsDict["treeSizesDist"] treeSizesDist = numpy.array(treeSizesDist, numpy.float64)/numpy.sum(treeSizesDist) treeSizesDistArray[j, 0:treeSizesDist.shape[0]] = treeSizesDist treeDepthsDist = statsDict["treeDepthsDist"] #treeDepthsDist = numpy.array(treeDepthsDist, numpy.float64)/numpy.sum(treeDepthsDist) treeDepthsDist = numpy.array(treeDepthsDist, numpy.float64) treeDepthsDistArray[j, 0:treeDepthsDist.shape[0]] = treeDepthsDist plotInd2 = plotInd plt.figure(plotInd2) plt.plot(numpy.arange(degreeDist.shape[0]), degreeDist, label=dateStr) plt.xlabel("Degree") plt.ylabel("Probability") plt.ylim((0, 0.8)) plt.legend() plt.savefig(figureDir + "DegreeDist" + ".eps") plotInd2 += 1 plt.figure(plotInd2) plt.scatter(numpy.arange(treeSizesDist.shape[0])[treeSizesDist!=0], numpy.log(treeSizesDist[treeSizesDist!=0]), s=30, c=plotStyles2[j][0], label=dateStr) plt.xlabel("Size") plt.ylabel("log(probability)") plt.xlim((0, 125)) plt.legend() plt.savefig(figureDir + "TreeSizeDist" + ".eps") plotInd2 += 1 plt.figure(plotInd2) plt.scatter(numpy.arange(treeDepthsDist.shape[0])[treeDepthsDist!=0], numpy.log(treeDepthsDist[treeDepthsDist!=0]), s=30, c=plotStyles2[j][0], label=dateStr) plt.xlabel("Depth") plt.ylabel("log(probability)") plt.xlim((0, 15)) plt.legend() plt.savefig(figureDir + "TreeDepthDist" + ".eps") plotInd2 += 1 dateStrList = [DateUtils.getDateStrFromDay(day, startYear) for day in dayList2] precision = 4 treeSizesDistArray = treeSizesDistArray[:, 0:treeSizesDist.shape[0]] nonZeroCols = numpy.sum(treeSizesDistArray, 0)!=0 print((Latex.array1DToRow(numpy.arange(treeSizesDistArray.shape[1])[nonZeroCols]))) print((Latex.array2DToRows(treeSizesDistArray[:, nonZeroCols]))) print("Tree depths") treeDepthsDistArray = treeDepthsDistArray[:, 0:treeDepthsDist.shape[0]] nonZeroCols = numpy.sum(treeDepthsDistArray, 0)!=0 print((Latex.array1DToRow(numpy.arange(treeDepthsDistArray.shape[1])[nonZeroCols]))) print((Latex.array2DToRows(treeDepthsDistArray[:, nonZeroCols]))) print(numpy.sum(treeDepthsDistArray[:, 0:3], 1)) print("Edges and verticies") print(Latex.listToRow(dateStrList)) print(Latex.array2DToRows(numVerticesEdgesArray.T, precision))
def plotVectorStats(): #Finally, compute some vector stats at various points in the graph logging.info("Computing vector stats") global plotInd resultsFileName = resultsDir + "ContactGrowthVectorStats.pkl" if saveResults: statsDictList = graphStats.sequenceVectorStats(sGraph, subgraphIndicesList2) Util.savePickle(statsDictList, resultsFileName, False) else: statsDictList = Util.loadPickle(resultsFileName) #Load up configuration model results configStatsDictList = [] resultsFileNameBase = resultsDir + "ConfigGraphVectorStats" for j in range(numConfigGraphs): resultsFileName = resultsFileNameBase + str(j) configStatsDictList.append(Util.loadPickle(resultsFileName)) #Now need to take mean of 1st element of list meanConfigStatsDictList = configStatsDictList[0] for i in range(len(configStatsDictList[0])): for k in range(1, numConfigGraphs): for key in configStatsDictList[k][i].keys(): if configStatsDictList[k][i][key].shape[0] > meanConfigStatsDictList[i][key].shape[0]: meanConfigStatsDictList[i][key] = numpy.r_[meanConfigStatsDictList[i][key], numpy.zeros(configStatsDictList[k][i][key].shape[0] - meanConfigStatsDictList[i][key].shape[0])] elif configStatsDictList[k][i][key].shape[0] < meanConfigStatsDictList[i][key].shape[0]: configStatsDictList[k][i][key] = numpy.r_[configStatsDictList[k][i][key], numpy.zeros(meanConfigStatsDictList[i][key].shape[0] - configStatsDictList[k][i][key].shape[0])] meanConfigStatsDictList[i][key] += configStatsDictList[k][i][key] for key in configStatsDictList[0][i].keys(): meanConfigStatsDictList[i][key] = meanConfigStatsDictList[i][key]/numConfigGraphs triangleDistArray = numpy.zeros((len(dayList2), 100)) configTriangleDistArray = numpy.zeros((len(dayList2), 100)) hopPlotArray = numpy.zeros((len(dayList2), 27)) configHopPlotArray = numpy.zeros((len(dayList2), 30)) componentsDistArray = numpy.zeros((len(dayList2), 3000)) configComponentsDistArray = numpy.zeros((len(dayList2), 3000)) numVerticesEdgesArray = numpy.zeros((len(dayList2), 2), numpy.int) numVerticesEdgesArray[:, 0] = [len(sgl) for sgl in subgraphIndicesList2] numVerticesEdgesArray[:, 1] = [sGraph.subgraph(sgl).getNumEdges() for sgl in subgraphIndicesList2] binWidths = numpy.arange(0, 0.50, 0.05) eigVectorDists = numpy.zeros((len(dayList2), binWidths.shape[0]-1), numpy.int) femaleSums = numpy.zeros(len(dayList2)) maleSums = numpy.zeros(len(dayList2)) heteroSums = numpy.zeros(len(dayList2)) biSums = numpy.zeros(len(dayList2)) contactSums = numpy.zeros(len(dayList2)) nonContactSums = numpy.zeros(len(dayList2)) donorSums = numpy.zeros(len(dayList2)) randomTestSums = numpy.zeros(len(dayList2)) stdSums = numpy.zeros(len(dayList2)) prisonerSums = numpy.zeros(len(dayList2)) recommendSums = numpy.zeros(len(dayList2)) meanAges = numpy.zeros(len(dayList2)) degrees = numpy.zeros((len(dayList2), 20)) provinces = numpy.zeros((len(dayList2), 15)) havanaSums = numpy.zeros(len(dayList2)) villaClaraSums = numpy.zeros(len(dayList2)) pinarSums = numpy.zeros(len(dayList2)) holguinSums = numpy.zeros(len(dayList2)) habanaSums = numpy.zeros(len(dayList2)) sanctiSums = numpy.zeros(len(dayList2)) meanDegrees = numpy.zeros(len(dayList2)) stdDegrees = numpy.zeros(len(dayList2)) #Note that death has a lot of missing values for j in range(len(dayList2)): dateStr = (str(DateUtils.getDateStrFromDay(dayList2[j], startYear))) logging.info(dateStr) statsDict = statsDictList[j] configStatsDict = meanConfigStatsDictList[j] degreeDist = statsDict["outDegreeDist"] degreeDist = degreeDist/float(numpy.sum(degreeDist)) #Note that degree distribution for configuration graph will be identical eigenDist = statsDict["eigenDist"] eigenDist = numpy.log(eigenDist[eigenDist>=10**-1]) #configEigenDist = configStatsDict["eigenDist"] #configEigenDist = numpy.log(configEigenDist[configEigenDist>=10**-1]) hopCount = statsDict["hopCount"] hopCount = numpy.log10(hopCount) hopPlotArray[j, 0:hopCount.shape[0]] = hopCount configHopCount = configStatsDict["hopCount"] configHopCount = numpy.log10(configHopCount) #configHopPlotArray[j, 0:configHopCount.shape[0]] = configHopCount triangleDist = statsDict["triangleDist"] #triangleDist = numpy.array(triangleDist, numpy.float64)/numpy.sum(triangleDist) triangleDist = numpy.array(triangleDist, numpy.float64) triangleDistArray[j, 0:triangleDist.shape[0]] = triangleDist configTriangleDist = configStatsDict["triangleDist"] configTriangleDist = numpy.array(configTriangleDist, numpy.float64)/numpy.sum(configTriangleDist) configTriangleDistArray[j, 0:configTriangleDist.shape[0]] = configTriangleDist maxEigVector = statsDict["maxEigVector"] eigenvectorInds = numpy.flipud(numpy.argsort(numpy.abs(maxEigVector))) top10eigenvectorInds = eigenvectorInds[0:numpy.round(eigenvectorInds.shape[0]/10.0)] maxEigVector = numpy.abs(maxEigVector[eigenvectorInds]) #print(maxEigVector) eigVectorDists[j, :] = numpy.histogram(maxEigVector, binWidths)[0] componentsDist = statsDict["componentsDist"] componentsDist = numpy.array(componentsDist, numpy.float64)/numpy.sum(componentsDist) componentsDistArray[j, 0:componentsDist.shape[0]] = componentsDist configComponentsDist = configStatsDict["componentsDist"] configComponentsDist = numpy.array(configComponentsDist, numpy.float64)/numpy.sum(configComponentsDist) configComponentsDistArray[j, 0:configComponentsDist.shape[0]] = configComponentsDist plotInd2 = plotInd plt.figure(plotInd2) plt.plot(numpy.arange(degreeDist.shape[0]), degreeDist, plotStyles2[j], label=dateStr) plt.xlabel("Degree") plt.ylabel("Probability") plt.ylim((0, 0.5)) plt.savefig(figureDir + "DegreeDist" + ".eps") plt.legend() plotInd2 += 1 """ plt.figure(plotInd2) plt.plot(numpy.arange(eigenDist.shape[0]), eigenDist, label=dateStr) plt.xlabel("Eigenvalue rank") plt.ylabel("log(Eigenvalue)") plt.savefig(figureDir + "EigenDist" + ".eps") plt.legend() plotInd2 += 1 """ #How does kleinberg do the hop plots plt.figure(plotInd2) plt.plot(numpy.arange(hopCount.shape[0]), hopCount, plotStyles[j], label=dateStr) plt.xlabel("k") plt.ylabel("log10(pairs)") plt.ylim( (2.5, 7) ) plt.legend(loc="lower right") plt.savefig(figureDir + "HopCount" + ".eps") plotInd2 += 1 plt.figure(plotInd2) plt.plot(numpy.arange(maxEigVector.shape[0]), maxEigVector, plotStyles2[j], label=dateStr) plt.xlabel("Rank") plt.ylabel("log(eigenvector coefficient)") plt.savefig(figureDir + "MaxEigVector" + ".eps") plt.legend() plotInd2 += 1 #Compute some information the 10% most central vertices subgraphIndices = numpy.nonzero(detections <= dayList2[j])[0] subgraph = sGraph.subgraph(subgraphIndices) subgraphVertexArray = subgraph.getVertexList().getVertices() femaleSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, genderIndex]==1) maleSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, genderIndex]==0) heteroSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, orientationIndex]==0) biSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, orientationIndex]==1) contactSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, contactIndex]) donorSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, donorIndex]) randomTestSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, randomTestIndex]) stdSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, stdIndex]) prisonerSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, prisonerIndex]) recommendSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, doctorIndex]) meanAges[j] = numpy.mean(subgraphVertexArray[top10eigenvectorInds, detectionIndex] - subgraphVertexArray[top10eigenvectorInds, dobIndex])/daysInYear havanaSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, havanaIndex]) villaClaraSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, villaClaraIndex]) pinarSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, pinarIndex]) holguinSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, holguinIndex]) habanaSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, habanaIndex]) sanctiSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, sanctiIndex]) provinces[j, :] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, 22:37], 0) ddist = numpy.bincount(subgraph.outDegreeSequence()[top10eigenvectorInds]) degrees[j, 0:ddist.shape[0]] = numpy.array(ddist, numpy.float)/numpy.sum(ddist) meanDegrees[j] = numpy.mean(subgraph.outDegreeSequence()[top10eigenvectorInds]) stdDegrees[j] = numpy.std(subgraph.outDegreeSequence()[top10eigenvectorInds]) plt.figure(plotInd2) plt.plot(numpy.arange(degrees[j, :].shape[0]), degrees[j, :], plotStyles2[j], label=dateStr) plt.xlabel("Degree") plt.ylabel("Probability") #plt.ylim((0, 0.5)) plt.savefig(figureDir + "DegreeDistCentral" + ".eps") plt.legend() plotInd2 += 1 precision = 4 dateStrList = [DateUtils.getDateStrFromDay(day, startYear) for day in dayList2] print("Hop counts") print(Latex.listToRow(dateStrList)) print(Latex.array2DToRows(hopPlotArray.T)) print("\nHop counts for configuration graphs") print(Latex.listToRow(dateStrList)) print(Latex.array2DToRows(configHopPlotArray.T)) print("\n\nEdges and vertices") print((Latex.listToRow(dateStrList))) print((Latex.array2DToRows(numVerticesEdgesArray.T, precision))) print("\n\nEigenvector distribution") print((Latex.array1DToRow(binWidths[1:]) + "\\\\")) print((Latex.array2DToRows(eigVectorDists))) print("\n\nDistribution of component sizes") componentsDistArray = componentsDistArray[:, 0:componentsDist.shape[0]] nonZeroCols = numpy.sum(componentsDistArray, 0)!=0 componentsDistArray = numpy.r_[numpy.array([numpy.arange(componentsDistArray.shape[1])[nonZeroCols]]), componentsDistArray[:, nonZeroCols]] print((Latex.listToRow(dateStrList))) print((Latex.array2DToRows(componentsDistArray.T, precision))) print("\n\nDistribution of component sizes in configuration graphs") configComponentsDistArray = configComponentsDistArray[:, 0:configComponentsDist.shape[0]] nonZeroCols = numpy.sum(configComponentsDistArray, 0)!=0 configComponentsDistArray = numpy.r_[numpy.array([numpy.arange(configComponentsDistArray.shape[1])[nonZeroCols]]), configComponentsDistArray[:, nonZeroCols]] print((Latex.listToRow(dateStrList))) print((Latex.array2DToRows(configComponentsDistArray.T, precision))) print("\n\nDistribution of triangle participations") triangleDistArray = triangleDistArray[:, 0:triangleDist.shape[0]] nonZeroCols = numpy.sum(triangleDistArray, 0)!=0 triangleDistArray = numpy.r_[numpy.array([numpy.arange(triangleDistArray.shape[1])[nonZeroCols]])/2, triangleDistArray[:, nonZeroCols]] print((Latex.listToRow(dateStrList))) print((Latex.array2DToRows(triangleDistArray.T, precision))) configTriangleDistArray = configTriangleDistArray[:, 0:configTriangleDist.shape[0]] nonZeroCols = numpy.sum(configTriangleDistArray, 0)!=0 configTriangleDistArray = numpy.r_[numpy.array([numpy.arange(configTriangleDistArray.shape[1])[nonZeroCols]])/2, configTriangleDistArray[:, nonZeroCols]] configTriangleDistArray = numpy.c_[configTriangleDistArray, numpy.zeros((configTriangleDistArray.shape[0], triangleDistArray.shape[1]-configTriangleDistArray.shape[1]))] print("\n\nDistribution of central vertices") print((Latex.listToRow(dateStrList))) subgraphSizes = numpy.array(maleSums + femaleSums, numpy.float) print("Female & " + Latex.array1DToRow(femaleSums*100/subgraphSizes, 1) + "\\\\") print("Male & " + Latex.array1DToRow(maleSums*100/subgraphSizes, 1) + "\\\\") print("\hline") print("Heterosexual & " + Latex.array1DToRow(heteroSums*100/subgraphSizes, 1) + "\\\\") print("Bisexual & " + Latex.array1DToRow(biSums*100/subgraphSizes, 1) + "\\\\") print("\hline") print("Contact traced & " + Latex.array1DToRow(contactSums*100/subgraphSizes, 1) + "\\\\") print("Blood donor & " + Latex.array1DToRow(donorSums*100/subgraphSizes, 1) + "\\\\") print("RandomTest & " + Latex.array1DToRow(randomTestSums*100/subgraphSizes, 1) + "\\\\") print("STD & " + Latex.array1DToRow(stdSums*100/subgraphSizes, 1) + "\\\\") print("Prisoner & " + Latex.array1DToRow(prisonerSums*100/subgraphSizes, 1) + "\\\\") print("Doctor recommendation & " + Latex.array1DToRow(recommendSums*100/subgraphSizes, 1) + "\\\\") print("\hline") print("Mean ages (years) & " + Latex.array1DToRow(meanAges, 2) + "\\\\") print("\hline") print("Holguin & " + Latex.array1DToRow(holguinSums*100/subgraphSizes, 1) + "\\\\") print("La Habana & " + Latex.array1DToRow(habanaSums*100/subgraphSizes, 1) + "\\\\") print("Havana City & " + Latex.array1DToRow(havanaSums*100/subgraphSizes, 1) + "\\\\") print("Pinar del Rio & " + Latex.array1DToRow(pinarSums*100/subgraphSizes, 1) + "\\\\") print("Sancti Spiritus & " + Latex.array1DToRow(sanctiSums*100/subgraphSizes, 1) + "\\\\") print("Villa Clara & " + Latex.array1DToRow(villaClaraSums*100/subgraphSizes, 1) + "\\\\") print("\hline") print("Mean degrees & " + Latex.array1DToRow(meanDegrees, 2) + "\\\\") print("Std degrees & " + Latex.array1DToRow(stdDegrees, 2) + "\\\\") print("\n\nProvinces") print(Latex.array2DToRows(provinces)) print("\n\nDegree distribution") print(Latex.array2DToRows(degrees))
def plotScalarStats(): logging.info("Computing scalar stats") resultsFileName = resultsDir + "ContactGrowthScalarStats.pkl" if saveResults: statsArray = graphStats.sequenceScalarStats(sGraph, subgraphIndicesList, slowStats) Util.savePickle(statsArray, resultsFileName, True) #Now compute statistics on the configuration graphs else: statsArray = Util.loadPickle(resultsFileName) #Take the mean of the results over the configuration model graphs resultsFileNameBase = resultsDir + "ConfigGraphScalarStats" numGraphs = len(subgraphIndicesList) #configStatsArrays = numpy.zeros((numGraphs, graphStats.getNumStats(), numConfigGraphs)) configStatsArrays = numpy.zeros((numGraphs, graphStats.getNumStats()-2, numConfigGraphs)) for j in range(numConfigGraphs): resultsFileName = resultsFileNameBase + str(j) configStatsArrays[:, :, j] = Util.loadPickle(resultsFileName) configStatsArray = numpy.mean(configStatsArrays, 2) configStatsStd = numpy.std(configStatsArrays, 2) global plotInd def plotRealConfigError(index, styleReal, styleConfig, realLabel, configLabel): plt.hold(True) plt.plot(absDayList, statsArray[:, index], styleReal, label=realLabel) #errors = numpy.c_[configStatsArray[:, index]-configStatsMinArray[:, index] , configStatsMaxArray[:, index]-configStatsArray[:, index]].T errors = numpy.c_[configStatsStd[:, index], configStatsStd[:, index]].T plt.plot(absDayList, configStatsArray[:, index], styleConfig, label=configLabel) plt.errorbar(absDayList, configStatsArray[:, index], errors, linewidth=0, elinewidth=1, label="_nolegend_", ecolor="red") xmin, xmax = plt.xlim() plt.xlim((0, xmax)) ymin, ymax = plt.ylim() plt.ylim((0, ymax)) #Output all the results into plots plt.figure(plotInd) plt.hold(True) plotRealConfigError(graphStats.maxComponentSizeIndex, plotStyleBW[0], plotStyles4[0], "Max comp. vertices", "CM max comp. vertices") plotRealConfigError(graphStats.maxComponentEdgesIndex, plotStyleBW[1], plotStyles4[1], "Max comp. edges", "CM max comp. edges") plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("No. vertices/edges") plt.legend(loc="upper left") plt.savefig(figureDir + "MaxComponentSizeGrowth.eps") plotInd += 1 for k in range(len(dayList)): day = dayList[k] print(str(DateUtils.getDateStrFromDay(day, startYear)) + ": " + str(statsArray[k, graphStats.maxComponentEdgesIndex])) #print(str(DateUtils.getDateStrFromDay(day, startYear)) + ": " + str(configStatsArray[k, graphStats.numComponentsIndex])) plt.figure(plotInd) plotRealConfigError(graphStats.numComponentsIndex, plotStyleBW[0], plotStyles4[0], "Size >= 1", "CM size >= 1") plotRealConfigError(graphStats.numNonSingletonComponentsIndex, plotStyleBW[1], plotStyles4[1], "Size >= 2", "CM size >= 2") plotRealConfigError(graphStats.numTriOrMoreComponentsIndex, plotStyleBW[2], plotStyles4[2], "Size >= 3", "CM size >= 3") plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("No. components") plt.legend(loc="upper left") plt.savefig(figureDir + "NumComponentsGrowth.eps") plotInd += 1 plt.figure(plotInd) plotRealConfigError(graphStats.meanComponentSizeIndex, plotStyleBW[0], plotStyles4[0], "Real graph", "CM") plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Mean component size") plt.legend(loc="lower right") plt.savefig(figureDir + "MeanComponentSizeGrowth.eps") plotInd += 1 plt.figure(plotInd) plotRealConfigError(graphStats.diameterIndex, plotStyleBW[0], plotStyles4[0], "Real graph", "CM") plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Max component diameter") plt.legend(loc="lower right") plt.savefig(figureDir + "MaxComponentDiameterGrowth.eps") plotInd += 1 plt.figure(plotInd) plotRealConfigError(graphStats.effectiveDiameterIndex, plotStyleBW[0], plotStyles4[0], "Real graph", "CM") plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Effective diameter") plt.legend(loc="lower right") plt.savefig(figureDir + "MaxComponentEffDiameterGrowth.eps") plotInd += 1 plt.figure(plotInd) plotRealConfigError(graphStats.meanDegreeIndex, plotStyleBW[0], plotStyles4[0], "All vertices", "CM all vertices") plotRealConfigError(graphStats.maxCompMeanDegreeIndex, plotStyleBW[1], plotStyles4[1], "Max component", "CM max component") #plt.plot(absDayList, statsArray[:, graphStats.meanDegreeIndex], plotStyleBW[0], absDayList, statsArray[:, graphStats.maxCompMeanDegreeIndex], plotStyleBW[1], absDayList, configStatsArray[:, graphStats.meanDegreeIndex], plotStyles4[0], absDayList, configStatsArray[:, graphStats.maxCompMeanDegreeIndex], plotStyles4[1]) plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Mean degree") plt.legend(loc="lower right") plt.savefig(figureDir + "MeanDegrees.eps") plotInd += 1 plt.figure(plotInd) plotRealConfigError(graphStats.densityIndex, plotStyleBW[0], plotStyles4[0], "Real Graph", "Config Model") #plt.plot(absDayList, statsArray[:, graphStats.densityIndex], plotStyleBW[0], absDayList, configStatsArray[:, graphStats.densityIndex], plotStyles4[0]) plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Density") plt.legend() plt.savefig(figureDir + "DensityGrowth.eps") plotInd += 1 plt.figure(plotInd) plt.plot(absDayList, statsArray[:, graphStats.powerLawIndex], plotStyleBW[0]) plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Alpha") plt.savefig(figureDir + "PowerLawGrowth.eps") plotInd += 1 plt.figure(plotInd) plotRealConfigError(graphStats.geodesicDistanceIndex, plotStyleBW[0], plotStyles4[0], "Real Graph", "Config Model") #plt.plot(absDayList, statsArray[:, graphStats.geodesicDistanceIndex], plotStyleBW[0], absDayList, configStatsArray[:, graphStats.geodesicDistanceIndex], plotStyles4[0]) plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Geodesic distance") plt.legend(loc="lower right") plt.savefig(figureDir + "GeodesicGrowth.eps") plotInd += 1 plt.figure(plotInd) plotRealConfigError(graphStats.harmonicGeoDistanceIndex, plotStyleBW[0], plotStyles4[0], "Real Graph", "Config Model") #plt.plot(absDayList, statsArray[:, graphStats.harmonicGeoDistanceIndex], plotStyleBW[0], absDayList, configStatsArray[:, graphStats.harmonicGeoDistanceIndex], plotStyles4[0]) plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Mean harmonic geodesic distance") plt.legend(loc="upper right") plt.savefig(figureDir + "HarmonicGeodesicGrowth.eps") plotInd += 1 #print(statsArray[:, graphStats.harmonicGeoDistanceIndex]) plt.figure(plotInd) plotRealConfigError(graphStats.geodesicDistMaxCompIndex, plotStyleBW[0], plotStyles4[0], "Real graph", "Config model") #plt.plot(absDayList, statsArray[:, graphStats.geodesicDistMaxCompIndex], plotStyleBW[0], absDayList, configStatsArray[:, graphStats.geodesicDistMaxCompIndex], plotStyles4[0]) plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Max component mean geodesic distance") plt.legend(loc="lower right") plt.savefig(figureDir + "MaxCompGeodesicGrowth.eps") plotInd += 1 #Find the number of edges in the infection graph resultsFileName = resultsDir + "InfectGrowthScalarStats.pkl" infectStatsArray = Util.loadPickle(resultsFileName) #Make sure we don't include 0 in the array vertexIndex = numpy.argmax(statsArray[:, graphStats.numVerticesIndex] > 0) edgeIndex = numpy.argmax(infectStatsArray[:, graphStats.numEdgesIndex] > 0) minIndex = numpy.maximum(vertexIndex, edgeIndex) plt.figure(plotInd) plt.plot(numpy.log(statsArray[minIndex:, graphStats.numVerticesIndex]), numpy.log(statsArray[minIndex:, graphStats.numEdgesIndex]), plotStyleBW[0]) plt.plot(numpy.log(infectStatsArray[minIndex:, graphStats.numVerticesIndex]), numpy.log(infectStatsArray[minIndex:, graphStats.numEdgesIndex]), plotStyleBW[1]) plt.plot(numpy.log(statsArray[minIndex:, graphStats.maxComponentSizeIndex]), numpy.log(statsArray[minIndex:, graphStats.maxComponentEdgesIndex]), plotStyleBW[2]) plt.xlabel("log(|V|)") plt.ylabel("log(|E|)/log(|D|)") plt.legend(("Contact graph", "Infection graph", "Max component"), loc="upper left") plt.savefig(figureDir + "LogVerticesEdgesGrowth.eps") plotInd += 1 results = statsArray[:, graphStats.effectiveDiameterIndex] results = numpy.c_[results, configStatsArray[:, graphStats.effectiveDiameterIndex]] results = numpy.c_[results, statsArray[:, graphStats.geodesicDistMaxCompIndex]] results = numpy.c_[results, configStatsArray[:, graphStats.geodesicDistMaxCompIndex]] configStatsArray print("\n\n") print(Latex.listToRow(["Diameter", "CM Diameter", "Mean Geodesic", "CM Mean Geodesic"])) print("\\hline") for i in range(0, len(dayList), 4): day = dayList[i] print(str(DateUtils.getDateStrFromDay(day, startYear)) + " & " + Latex.array1DToRow(results[i, :]) + "\\\\")
def plotVertexStats(): #Calculate all vertex statistics logging.info("Computing vertex stats") #Indices numContactsIndex = fInds["numContacts"] numTestedIndex = fInds["numTested"] numPositiveIndex = fInds["numPositive"] #Properties of vertex values detectionAges = [] deathAfterInfectAges = [] deathAges = [] homoMeans = [] maleSums = [] femaleSums = [] heteroSums = [] biSums = [] contactMaleSums = [] contactFemaleSums = [] contactHeteroSums = [] contactBiSums = [] doctorMaleSums = [] doctorFemaleSums = [] doctorHeteroSums = [] doctorBiSums = [] contactSums = [] nonContactSums = [] donorSums = [] randomTestSums = [] stdSums = [] prisonerSums = [] recommendSums = [] #This is: all detections - contact, donor, randomTest, str, recommend otherSums = [] havanaSums = [] villaClaraSums = [] pinarSums = [] holguinSums = [] habanaSums = [] sanctiSums = [] numContactSums = [] numTestedSums = [] numPositiveSums = [] #Total number of sexual contacts numContactMaleSums = [] numContactFemaleSums = [] numContactHeteroSums = [] numContactBiSums = [] numTestedMaleSums = [] numTestedFemaleSums = [] numTestedHeteroSums = [] numTestedBiSums = [] numPositiveMaleSums = [] numPositiveFemaleSums = [] numPositiveHeteroSums = [] numPositiveBiSums = [] propPositiveMaleSums = [] propPositiveFemaleSums = [] propPositiveHeteroSums = [] propPositiveBiSums = [] numContactVertices = [] numContactEdges = [] numInfectEdges = [] #Mean proportion of degree at end of epidemic meanPropDegree = [] finalDegreeSequence = numpy.array(sGraph.outDegreeSequence(), numpy.float) degreeOneSums = [] degreeTwoSums = [] degreeThreePlusSums = [] numProvinces = 15 provinceArray = numpy.zeros((len(subgraphIndicesList), numProvinces)) m = 0 for subgraphIndices in subgraphIndicesList: subgraph = sGraph.subgraph(subgraphIndices) infectSubGraph = sGraphInfect.subgraph(subgraphIndices) subgraphVertexArray = subgraph.getVertexList().getVertices(range(subgraph.getNumVertices())) detectionAges.append(numpy.mean((subgraphVertexArray[:, detectionIndex] - subgraphVertexArray[:, dobIndex]))/daysInYear) deathAfterInfectAges.append((numpy.mean(subgraphVertexArray[:, deathIndex] - subgraphVertexArray[:, detectionIndex]))/daysInYear) deathAges.append(numpy.mean((subgraphVertexArray[:, deathIndex] - subgraphVertexArray[:, dobIndex]))/daysInYear) homoMeans.append(numpy.mean(subgraphVertexArray[:, orientationIndex])) nonContactSums.append(subgraphVertexArray.shape[0] - numpy.sum(subgraphVertexArray[:, contactIndex])) contactSums.append(numpy.sum(subgraphVertexArray[:, contactIndex])) donorSums.append(numpy.sum(subgraphVertexArray[:, donorIndex])) randomTestSums.append(numpy.sum(subgraphVertexArray[:, randomTestIndex])) stdSums.append(numpy.sum(subgraphVertexArray[:, stdIndex])) prisonerSums.append(numpy.sum(subgraphVertexArray[:, prisonerIndex])) recommendSums.append(numpy.sum(subgraphVertexArray[:, doctorIndex])) otherSums.append(subgraphVertexArray.shape[0] - numpy.sum(subgraphVertexArray[:, [contactIndex, donorIndex, randomTestIndex, stdIndex, doctorIndex]])) heteroSums.append(numpy.sum(subgraphVertexArray[:, orientationIndex]==0)) biSums.append(numpy.sum(subgraphVertexArray[:, orientationIndex]==1)) femaleSums.append(numpy.sum(subgraphVertexArray[:, genderIndex]==1)) maleSums.append(numpy.sum(subgraphVertexArray[:, genderIndex]==0)) contactHeteroSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, orientationIndex]==0, subgraphVertexArray[:, contactIndex]))) contactBiSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, orientationIndex]==1, subgraphVertexArray[:, contactIndex]))) contactFemaleSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, genderIndex]==1, subgraphVertexArray[:, contactIndex]))) contactMaleSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, genderIndex]==0, subgraphVertexArray[:, contactIndex]))) doctorHeteroSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, orientationIndex]==0, subgraphVertexArray[:, doctorIndex]))) doctorBiSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, orientationIndex]==1, subgraphVertexArray[:, doctorIndex]))) doctorFemaleSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, genderIndex]==1, subgraphVertexArray[:, doctorIndex]))) doctorMaleSums.append(numpy.sum(numpy.logical_and(subgraphVertexArray[:, genderIndex]==0, subgraphVertexArray[:, doctorIndex]))) havanaSums.append(numpy.sum(subgraphVertexArray[:, havanaIndex]==1)) villaClaraSums.append(numpy.sum(subgraphVertexArray[:, villaClaraIndex]==1)) pinarSums.append(numpy.sum(subgraphVertexArray[:, pinarIndex]==1)) holguinSums.append(numpy.sum(subgraphVertexArray[:, holguinIndex]==1)) habanaSums.append(numpy.sum(subgraphVertexArray[:, habanaIndex]==1)) sanctiSums.append(numpy.sum(subgraphVertexArray[:, sanctiIndex]==1)) numContactSums.append(numpy.mean(subgraphVertexArray[:, numContactsIndex])) numTestedSums.append(numpy.mean(subgraphVertexArray[:, numTestedIndex])) numPositiveSums.append(numpy.mean(subgraphVertexArray[:, numPositiveIndex])) numContactMaleSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, genderIndex]==0, numContactsIndex])) numContactFemaleSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, genderIndex]==1, numContactsIndex])) numContactHeteroSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, orientationIndex]==0, numContactsIndex])) numContactBiSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, orientationIndex]==1, numContactsIndex])) numTestedMaleSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, genderIndex]==0, numTestedIndex])) numTestedFemaleSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, genderIndex]==1, numTestedIndex])) numTestedHeteroSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, orientationIndex]==0, numTestedIndex])) numTestedBiSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, orientationIndex]==1, numTestedIndex])) numPositiveMaleSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, genderIndex]==0, numPositiveIndex])) numPositiveFemaleSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, genderIndex]==1, numPositiveIndex])) numPositiveHeteroSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, orientationIndex]==0, numPositiveIndex])) numPositiveBiSums.append(numpy.mean(subgraphVertexArray[subgraphVertexArray[:, orientationIndex]==1, numPositiveIndex])) propPositiveMaleSums.append(numPositiveMaleSums[m]/float(numTestedMaleSums[m])) propPositiveFemaleSums.append(numPositiveFemaleSums[m]/float(numTestedFemaleSums[m])) propPositiveHeteroSums.append(numPositiveHeteroSums[m]/float(numTestedHeteroSums[m])) propPositiveBiSums.append(numPositiveBiSums[m]/float(numTestedMaleSums[m])) numContactVertices.append(subgraph.getNumVertices()) numContactEdges.append(subgraph.getNumEdges()) numInfectEdges.append(infectSubGraph.getNumEdges()) nonZeroInds = finalDegreeSequence[subgraphIndices]!=0 propDegrees = numpy.mean(subgraph.outDegreeSequence()[nonZeroInds]/finalDegreeSequence[subgraphIndices][nonZeroInds]) meanPropDegree.append(numpy.mean(propDegrees)) degreeOneSums.append(numpy.sum(subgraph.outDegreeSequence()==1)) degreeTwoSums.append(numpy.sum(subgraph.outDegreeSequence()==2)) degreeThreePlusSums.append(numpy.sum(subgraph.outDegreeSequence()>=3)) provinceArray[m, :] = numpy.sum(subgraphVertexArray[:, fInds["CA"]:fInds['VC']+1], 0) m += 1 #Save some of the results for the ABC work numStats = 2 vertexStatsArray = numpy.zeros((len(subgraphIndicesList), numStats)) vertexStatsArray[:, 0] = numpy.array(biSums) vertexStatsArray[:, 1] = numpy.array(heteroSums) resultsFileName = resultsDir + "ContactGrowthVertexStats.pkl" Util.savePickle(vertexStatsArray, resultsFileName) global plotInd plt.figure(plotInd) plt.plot(absDayList, detectionAges) plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Detection Age (years)") plt.savefig(figureDir + "DetectionMeansGrowth.eps") plotInd += 1 plt.figure(plotInd) plt.plot(absDayList, heteroSums, 'k-', absDayList, biSums, 'k--', absDayList, femaleSums, 'k-.', absDayList, maleSums, 'k:') plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Detections") plt.legend(("Heterosexual", "MSM", "Female", "Male"), loc="upper left") plt.savefig(figureDir + "OrientationGenderGrowth.eps") plotInd += 1 plt.figure(plotInd) plt.plot(absDayList, contactHeteroSums, 'k-', absDayList, contactBiSums, 'k--', absDayList, contactFemaleSums, 'k-.', absDayList, contactMaleSums, 'k:') plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Contact tracing detections") plt.legend(("Heterosexual", "MSM", "Female", "Male"), loc="upper left") plt.savefig(figureDir + "OrientationGenderContact.eps") plotInd += 1 plt.figure(plotInd) plt.plot(absDayList, doctorHeteroSums, 'k-', absDayList, doctorBiSums, 'k--', absDayList, doctorFemaleSums, 'k-.', absDayList, doctorMaleSums, 'k:') plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Doctor recommendation detections") plt.legend(("Heterosexual", "MSM", "Female", "Male"), loc="upper left") plt.savefig(figureDir + "OrientationGenderDoctor.eps") plotInd += 1 #Plot all the provinces plt.figure(plotInd) plt.hold(True) for k in range(provinceArray.shape[1]): plt.plot(absDayList, provinceArray[:, k], label=str(k)) plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Detections") plt.legend(loc="upper left") plotInd += 1 #Plot of detection types plt.figure(plotInd) plt.plot(absDayList, contactSums, plotStyles2[0], absDayList, donorSums, plotStyles2[1], absDayList, randomTestSums, plotStyles2[2], absDayList, stdSums, plotStyles2[3], absDayList, otherSums, plotStyles2[4], absDayList, recommendSums, plotStyles2[5]) plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Detections") plt.legend(("Contact tracing", "Blood donation", "Random test", "STD", "Other test", "Doctor recommendation"), loc="upper left") plt.savefig(figureDir + "DetectionGrowth.eps") plotInd += 1 plt.figure(plotInd) plt.plot(absDayList, numContactSums, plotStyleBW[0], absDayList, numTestedSums, plotStyleBW[1], absDayList, numPositiveSums, plotStyleBW[2]) plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Contacts") plt.legend(("No. contacts", "No. tested", "No. positive"), loc="center left") plt.savefig(figureDir + "ContactsGrowth.eps") plotInd += 1 plt.figure(plotInd) plt.plot(absDayList, numContactHeteroSums, plotStyleBW[0], absDayList, numContactBiSums, plotStyleBW[1], absDayList, numContactFemaleSums, plotStyleBW[2], absDayList, numContactMaleSums, plotStyleBW[3]) plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Total contacts") plt.legend(("Heterosexual", "MSM", "Female", "Male"), loc="upper right") plt.savefig(figureDir + "ContactsGrowthOrientGen.eps") plotInd += 1 plt.figure(plotInd) plt.plot(absDayList, numTestedHeteroSums, plotStyleBW[0], absDayList, numTestedBiSums, plotStyleBW[1], absDayList, numTestedFemaleSums, plotStyleBW[2], absDayList, numTestedMaleSums, plotStyleBW[3]) plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Tested contacts") plt.legend(("Heterosexual", "MSM", "Female", "Male"), loc="upper right") plt.savefig(figureDir + "TestedGrowthOrientGen.eps") plotInd += 1 plt.figure(plotInd) plt.plot(absDayList, numPositiveHeteroSums, plotStyleBW[0], absDayList, numPositiveBiSums, plotStyleBW[1], absDayList, numPositiveFemaleSums, plotStyleBW[2], absDayList, numPositiveMaleSums, plotStyleBW[3]) plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Positive contacts") plt.legend(("Heterosexual", "MSM", "Female", "Male"), loc="upper right") plt.savefig(figureDir + "PositiveGrowthOrientGen.eps") plotInd += 1 #Proportion positive versus tested plt.figure(plotInd) plt.plot(absDayList, propPositiveHeteroSums, plotStyleBW[0], absDayList, propPositiveBiSums, plotStyleBW[1], absDayList, propPositiveFemaleSums, plotStyleBW[2], absDayList, propPositiveMaleSums, plotStyleBW[3]) plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Proportion positive contacts") plt.legend(("Heterosexual", "MSM", "Female", "Male"), loc="upper right") plt.savefig(figureDir + "PercentPositiveGrowthOrientGen.eps") plotInd += 1 plt.figure(plotInd) plt.hold(True) plt.plot(absDayList, havanaSums, plotStyles2[0]) plt.plot(absDayList, villaClaraSums, plotStyles2[1]) plt.plot(absDayList, pinarSums, plotStyles2[2]) plt.plot(absDayList, holguinSums, plotStyles2[3]) plt.plot(absDayList, habanaSums, plotStyles2[4]) plt.plot(absDayList, sanctiSums, plotStyles2[5]) plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Detections") plt.legend(("Havana City", "Villa Clara", "Pinar del Rio", "Holguin", "La Habana", "Sancti Spiritus"), loc="upper left") plt.savefig(figureDir + "ProvinceGrowth.eps") plotInd += 1 plt.figure(plotInd) plt.plot(absDayList, numContactVertices, plotStyleBW[0], absDayList, numContactEdges, plotStyleBW[1], absDayList, numInfectEdges, plotStyleBW[2]) plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Vertices/edges") plt.legend(("Contact vertices", "Contact edges", "Infect edges"), loc="upper left") plt.savefig(figureDir + "VerticesEdges.eps") plotInd += 1 plt.figure(plotInd) plt.plot(absDayList, meanPropDegree, plotStyleBW[0]) plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Proportion of final degree") plt.savefig(figureDir + "MeanPropDegree.eps") plotInd += 1 plt.figure(plotInd) plt.plot(absDayList, degreeOneSums, plotStyleBW[0], absDayList, degreeTwoSums, plotStyleBW[1], absDayList, degreeThreePlusSums, plotStyleBW[2]) plt.xticks(locs, labels) plt.xlabel("Year") plt.ylabel("Detections") plt.legend(("Degree = 1", "Degree = 2", "Degree >= 3"), loc="upper left") plotInd += 1 #Print a table of interesting stats results = numpy.array([havanaSums]) results = numpy.r_[results, numpy.array([villaClaraSums])] results = numpy.r_[results, numpy.array([pinarSums])] results = numpy.r_[results, numpy.array([holguinSums])] results = numpy.r_[results, numpy.array([habanaSums])] results = numpy.r_[results, numpy.array([sanctiSums])] print(Latex.listToRow(["Havana City", "Villa Clara", "Pinar del Rio", "Holguin", "La Habana", "Sancti Spiritus"])) print("\\hline") for i in range(0, len(dayList), 4): day = dayList[i] print(str(DateUtils.getDateStrFromDay(day, startYear)) + " & " + Latex.array1DToRow(results[:, i].T) + "\\\\") results = numpy.array([heteroSums]) results = numpy.r_[results, numpy.array([biSums])] results = numpy.r_[results, numpy.array([femaleSums])] results = numpy.r_[results, numpy.array([maleSums])] print("\n\n") print(Latex.listToRow(["Heterosexual", "MSM", "Female", "Male"])) print("\\hline") for i in range(0, len(dayList), 4): day = dayList[i] print(str(DateUtils.getDateStrFromDay(day, startYear)) + " & " + Latex.array1DToRow(results[:, i].T) + "\\\\")
plt.plot(testROCs[m][0], testROCs[m][1], "r") plt.xlabel("False Positive Rate") plt.ylabel("True Positive Rate") plt.savefig(figureDir + labelName.replace(".", "_") + "-ROC.eps") plotInd += 1 #The last column is the mean value meanMeanAUCs = numpy.mean(meanAUCs, 1) meanStdAUCs = numpy.mean(stdAUCs, 1) meanAUCs = numpy.c_[meanAUCs, meanMeanAUCs] stdAUCs = numpy.c_[stdAUCs, meanStdAUCs] print("\n") print(Latex.listToRow(labelNames)) print(Latex.addRowNames(rowNames, Latex.array2DsToRows(meanAUCs, stdAUCs, 2))) #----------------- Results for RankBoost and RankSVM ---------------------------------- algorithmNames = ["RankBoost", "RankSVM"] numMethods = len(dataTypes)*len(algorithmNames) rowNames = numpy.zeros(numMethods, "a20") meanAUCs = numpy.zeros((numMethods, len(labelNames))) stdAUCs = numpy.zeros((numMethods, len(labelNames))) for m in range(len(algorithmNames)): algorithmName = algorithmNames[m] for i in range(len(labelNames)):
outputLists = graphRanker.vertexRankings(graph, relevantAuthorsInds) itemList = RankAggregator.generateItemList(outputLists) methodNames = graphRanker.getNames() if runLSI: outputFilename = dataset.getOutputFieldDir(field) + "outputListsLSI.npz" else: outputFilename = dataset.getOutputFieldDir(field) + "outputListsLDA.npz" Util.savePickle([outputLists, trainExpertMatchesInds, testExpertMatchesInds], outputFilename, debug=True) numMethods = len(outputLists) precisions = numpy.zeros((len(ns), numMethods)) averagePrecisions = numpy.zeros(numMethods) for i, n in enumerate(ns): for j in range(len(outputLists)): precisions[i, j] = Evaluator.precisionFromIndLists(testExpertMatchesInds, outputLists[j][0:n]) for j in range(len(outputLists)): averagePrecisions[j] = Evaluator.averagePrecisionFromLists(testExpertMatchesInds, outputLists[j][0:averagePrecisionN], averagePrecisionN) precisions2 = numpy.c_[numpy.array(ns), precisions] logging.debug(Latex.listToRow(methodNames)) logging.debug(Latex.array2DToRows(precisions2)) logging.debug(Latex.array1DToRow(averagePrecisions)) logging.debug("All done!")
#logging.debug("Read file: " + fileName) except: logging.debug("File not found : " + str(fileName)) numMissingFiles += 1 logging.debug("Number of missing files: " + str(numMissingFiles)) for i, dataName in enumerate(dataNames): print("-"*10 + dataName + "-"*10) algorithms = [x.ljust(20) for x in algorithmsAbbr] currentTestAucsMean = testAucsMean[:, i, :].T maxAUCs = numpy.zeros(currentTestAucsMean.shape, numpy.bool) maxAUCs[numpy.argmax(currentTestAucsMean, 0), numpy.arange(currentTestAucsMean.shape[1])] = 1 table = Latex.array2DToRows(testAucsMean[:, i, :].T, testAucsStd[:, i, :].T, precision=2, bold=maxAUCs) print(Latex.listToRow(hormoneNameIndicators)) print(Latex.addRowNames(algorithms, table)) #Now looks at the features for the raw spectra algorithm = "L1SvmTreeRankForest" dataName = "raw" numMissingFiles = 0 numFeatures = 100 numIndicators = 6 featureInds = numpy.zeros((numFeatures, numIndicators)) for i, (hormoneName, hormoneConc) in enumerate(helper.hormoneDict.items()): try: fileName = resultsDir + "Weights" + algorithm + "-" + hormoneName + "-0" + "-" + dataName + ".npy"