def testArray2DToRows(self): numpy.random.seed(21) X = numpy.random.rand(2, 2) Y = numpy.random.rand(2, 2) outputStr = "0.049 (0.206) & 0.289 (0.051)\\\\\n" outputStr += "0.721 (0.302) & 0.022 (0.664)\\\\" self.assertTrue(Latex.array2DToRows(X, Y) == outputStr) #Now test using highlights Z = X>0.2 A = X > 0.7 outputStr = "0.049 (0.206) & \\textbf{0.289} (0.051)\\\\\n" outputStr += "\\emph{\\textbf{0.721}} (0.302) & 0.022 (0.664)\\\\" self.assertTrue(Latex.array2DToRows(X, Y, bold=Z, italic=A) == outputStr) #Now test leaving out the Y values outputStr = "0.049 & 0.289\\\\\n" outputStr += "0.721 & 0.022\\\\" self.assertTrue(Latex.array2DToRows(X, Y=None) == outputStr) #Now put in bold outputStr = "0.049 & 0.289\\\\\n" outputStr += "0.721 & 0.022\\\\"
def getLatexTable(measures, cvScalings, idealMeasures): rowNames = getRowNames(cvScalings, True) table = Latex.array1DToRow(foldsSet) + "\\\\ \n" for j in range(sampleSizes.shape[0]): meanMeasures = numpy.mean(measures, 0) stdMeasures = numpy.std(measures, 0) table += Latex.array2DToRows(meanMeasures[j, :, :].T, stdMeasures[j, :, :].T) + "\n" meanIdealMeasures = numpy.mean(idealMeasures, 0) stdIdealMeasures = numpy.std(idealMeasures, 0) table += Latex.array2DToRows(numpy.ones((1, len(foldsSet)))*meanIdealMeasures[j], numpy.ones((1, len(foldsSet)))*stdIdealMeasures[j]) + "\n" table = Latex.addRowNames(rowNames, table) return table, meanMeasures, stdMeasures
def testAddRowNames(self): numpy.random.seed(21) X = numpy.random.rand(2, 2) Y = numpy.random.rand(2, 2) latexTable = Latex.array2DToRows(X, Y) rowNames = ["a", "b"] latexTable = Latex.addRowNames(rowNames, latexTable) outputStr = "a & 0.049 (0.206) & 0.289 (0.051)\\\\\n" outputStr += "b & 0.721 (0.302) & 0.022 (0.664)\\\\\n" self.assertTrue(latexTable == outputStr) #Now test error method rowNames = ["a", "b", "c"] self.assertRaises(ValueError, Latex.addRowNames, rowNames, latexTable) #Now test error method rowNames = ["a"] self.assertRaises(ValueError, Latex.addRowNames, rowNames, latexTable)
def summary(datasetNames, sampleSizes, foldsSet, cvScalings, sampleMethods, fileNameSuffix, gridResultsSuffix="GridResults"): """ Print the errors for all results plus a summary. """ numMethods = (1+(cvScalings.shape[0]+1)) numDatasets = len(datasetNames) overallErrors = numpy.zeros((numDatasets, len(sampleMethods), sampleSizes.shape[0], foldsSet.shape[0], numMethods)) overallStdWins = numpy.zeros((len(sampleMethods), len(sampleSizes), foldsSet.shape[0], numMethods+1, 3), numpy.int) overallErrorsPerSampMethod = numpy.zeros((numDatasets, len(sampleMethods), len(sampleSizes), numMethods), numpy.float) table1 = "" table2 = "" table3 = "" for i in range(len(datasetNames)): table3Error = numpy.zeros((2, len(sampleMethods))) table3Stds = numpy.zeros((2, len(sampleMethods))) for j in range(len(sampleMethods)): print("="*50 + "\n" + datasetNames[i] + "-" + sampleMethods[j] + "\n" + "="*50 ) outfileName = outputDir + datasetNames[i] + sampleMethods[j] + fileNameSuffix + ".npz" try: data = numpy.load(outfileName) errors = data["arr_0"] params = data["arr_1"] meanErrorGrids = data["arr_2"] stdErrorGrids = data["arr_3"] meanApproxGrids = data["arr_4"] stdApproxGrids = data["arr_5"] #Load ideal results outfileName = outputDir + datasetNames[i] + gridResultsSuffix + ".npz" data = numpy.load(outfileName) idealErrors = data["arr_0"] errorTable, meanErrors, stdErrors = getLatexTable(errors, cvScalings, idealErrors) wins = getWins(errors) idealWins = getIdealWins(errors, idealErrors) excessError = numpy.zeros(errors.shape) for k in range(errors.shape[1]): excessError[:, k, :, :] = errors[:, k, :, :] - numpy.tile(errors[:, k, :, 0, numpy.newaxis], (1, 1, numMethods)) meanExcessError = numpy.mean(excessError, 0) stdExcessError = numpy.std(excessError, 0) excessErrorTable, meanExcessErrors, stdExcessErrors = getLatexTable(excessError, cvScalings, idealErrors) overallErrorsPerSampMethod[i, j, :, :] = numpy.mean(meanErrors, 1) overallErrors[i, j, :, :, :] = meanExcessError overallStdWins[j, :, :, 0:-1, :] += wins overallStdWins[j, :, :, -1, :] += idealWins print(errorTable) #print("Min error is: " + str(numpy.min(meanErrors))) #print("Max error is: " + str(numpy.max(meanErrors))) #print("Mean error is: " + str(numpy.mean(meanErrors)) + "\n") #This is a table with V=10, alpha=1 and CV sampling sliceFoldIndex = 0 print(meanErrors[0, 1, 0]) numSliceMethods = 3 table1Error = numpy.zeros(len(sampleSizes)*numSliceMethods) table1Std = numpy.zeros(len(sampleSizes)*numSliceMethods) for k in range(len(sampleSizes)): table1Error[k*numSliceMethods] = meanErrors[k, sliceFoldIndex, 0] table1Error[k*numSliceMethods+1] = meanErrors[k, sliceFoldIndex, 1] table1Error[k*numSliceMethods+2] = meanErrors[k, sliceFoldIndex, 4] table1Std[k*numSliceMethods] = stdErrors[k, sliceFoldIndex, 0] table1Std[k*numSliceMethods+1] = stdErrors[k, sliceFoldIndex, 1] table1Std[k*numSliceMethods+2] = stdErrors[k, sliceFoldIndex, 4] if j == 0: table1 += datasetNames[i] + " & " + Latex.array2DToRows(numpy.array([table1Error]), numpy.array([table1Std])) + "\n" #See how alpha varies with V=10, CV sampling table2Error = numpy.zeros(range(numMethods-2)) table2Std = numpy.zeros(range(numMethods-2)) for s in range(len(sampleSizes)): table2Error = meanErrors[s, sliceFoldIndex, 2:] table2Std = stdErrors[s, sliceFoldIndex, 2:] if j == 0: table2 += datasetNames[i] + " $m=" + str(sampleSizes[s]) + "$ & " + Latex.array2DToRows(numpy.array([table2Error]), numpy.array([table2Std])) + "\n" """ #See how each sample method effects CV and pen alpha=1 fourFoldIndex = 4 hundredMIndex = 1 table3Error[0, j] = meanErrors[hundredMIndex, fourFoldIndex, 0] table3Error[1, j] = meanErrors[hundredMIndex, fourFoldIndex, 3] table3Stds[0, j] = stdErrors[hundredMIndex, fourFoldIndex, 0] table3Stds[1, j] = stdErrors[hundredMIndex, fourFoldIndex, 3] """ except IOError: print("Failed to open file: " + outfileName) table3 += Latex.addRowNames([datasetNames[i] + " Std ", datasetNames[i] + " PenVF "], Latex.array2DToRows(table3Error, table3Stds)) datasetMeanErrors = Latex.listToRow(sampleMethods) + "\n" for j in range(len(sampleSizes)): datasetMeanErrors += Latex.array2DToRows(overallErrorsPerSampMethod[i, :, j, :].T) + "\n" datasetMeanErrors = Latex.addRowNames(getRowNames(cvScalings), datasetMeanErrors) print(datasetMeanErrors) print("="*50 + "\n" + "Sliced Tables" + "\n" + "="*50) print(table1 + "\n") print(table2 + "\n") print(table3) print("="*50 + "\n" + "Overall" + "\n" + "="*50) overallMeanErrors = numpy.mean(overallErrors, 0) overallStdErrors = numpy.std(overallErrors, 0) for i in range(len(sampleMethods)): print("-"*20 + sampleMethods[i] + "-"*20) overallErrorTable = Latex.array1DToRow(foldsSet) + "\\\\ \n" overallWinsTable = Latex.array1DToRow(foldsSet) + " & Total & " +Latex.array1DToRow(foldsSet) + " & Total \\\\ \n" rowNames = getRowNames(cvScalings) for j in range(sampleSizes.shape[0]): overallErrorTable += Latex.array2DToRows(overallMeanErrors[i, j, :, :].T, overallStdErrors[i, j, :, :].T, bold=overallMeanErrors[i, j, :, :].T<0) + "\n" tiesWins = numpy.r_[overallStdWins[i, j, :, :, 0], overallStdWins[i, j, :, :, 1], overallStdWins[i, j, :, :, 2]] overallWinsTable += Latex.array2DToRows(tiesWins.T) + "\n" overallErrorTable = Latex.addRowNames(rowNames, overallErrorTable) rowNames = getRowNames(cvScalings, True) overallWinsTable = Latex.addRowNames(rowNames, overallWinsTable) print(Latex.latexTable(overallWinsTable, "Wins for " + sampleMethods[i], True)) print(Latex.latexTable(overallErrorTable.replace("0.", "."), "Excess errors for " + sampleMethods[i], True)) #print(overallWinsTable) #print(overallErrorTable) #Now print the mean errors for all datasets datasetMeanErrors = Latex.listToRow(sampleMethods) + "\n" overallErrorsPerSampMethod = numpy.mean(overallErrorsPerSampMethod[:, :, :, :], 0) for j in range(len(sampleSizes)): datasetMeanErrors += Latex.array2DToRows(overallErrorsPerSampMethod[:, j, :].T) + "\n" datasetMeanErrors = Latex.addRowNames(getRowNames(cvScalings), datasetMeanErrors) print(datasetMeanErrors)
plt.scatter(X[y == posLabel, 2], X[y == posLabel, 3], c="r") plt.xlabel("X_(2)") plt.ylabel("X_(3)") plt.savefig("../Lecture2/Figures/X34.eps") k += 1 n = X.shape[0] d = X.shape[1] correlations = numpy.zeros((X.shape[1], X.shape[1])) for i in range(d): for j in range(d): correlations[i, j] = numpy.corrcoef(X[:, i], X[:, j])[0, 1] print(Latex.array2DToRows(correlations)) #Run PCA and find first directions pca = PCA(n_components=3) newX = pca.fit_transform(X) s, V = numpy.linalg.eigh(1 / float(n) * X.T.dot(X)) s = numpy.sort(s) print(s) print(pca.explained_variance_ratio_) print(pca.components_[0, :]) plt.figure(k) plt.scatter(newX[y != 1, 0], newX[y != 1, 1], c="b") plt.scatter(newX[y == 1, 0], newX[y == 1, 1], c="r")
def plotVectorStats(): #Finally, compute some vector stats at various points in the graph logging.info("Computing vector stats") global plotInd resultsFileName = resultsDir + "InfectGrowthVectorStats.pkl" if saveResults: statsDictList = graphStats.sequenceVectorStats(sGraph, subgraphIndicesList2, True) Util.savePickle(statsDictList, resultsFileName, True) else: statsDictList = Util.loadPickle(resultsFileName) treeSizesDistArray = numpy.zeros((len(dayList2), 3000)) treeDepthsDistArray = numpy.zeros((len(dayList2), 100)) numVerticesEdgesArray = numpy.zeros((len(dayList2), 2), numpy.int) numVerticesEdgesArray[:, 0] = [len(sgl) for sgl in subgraphIndicesList2] numVerticesEdgesArray[:, 1] = [sGraph.subgraph(sgl).getNumEdges() for sgl in subgraphIndicesList2] for j in range(len(dayList2)): dateStr = (str(DateUtils.getDateStrFromDay(dayList2[j], startYear))) logging.info(dateStr) statsDict = statsDictList[j] degreeDist = statsDict["outDegreeDist"] degreeDist = degreeDist/float(numpy.sum(degreeDist)) maxEigVector = statsDict["maxEigVector"] maxEigVector = numpy.flipud(numpy.sort(numpy.abs(maxEigVector))) maxEigVector = numpy.log(maxEigVector[maxEigVector>0]) treeSizesDist = statsDict["treeSizesDist"] treeSizesDist = numpy.array(treeSizesDist, numpy.float64)/numpy.sum(treeSizesDist) treeSizesDistArray[j, 0:treeSizesDist.shape[0]] = treeSizesDist treeDepthsDist = statsDict["treeDepthsDist"] #treeDepthsDist = numpy.array(treeDepthsDist, numpy.float64)/numpy.sum(treeDepthsDist) treeDepthsDist = numpy.array(treeDepthsDist, numpy.float64) treeDepthsDistArray[j, 0:treeDepthsDist.shape[0]] = treeDepthsDist plotInd2 = plotInd plt.figure(plotInd2) plt.plot(numpy.arange(degreeDist.shape[0]), degreeDist, label=dateStr) plt.xlabel("Degree") plt.ylabel("Probability") plt.ylim((0, 0.8)) plt.legend() plt.savefig(figureDir + "DegreeDist" + ".eps") plotInd2 += 1 plt.figure(plotInd2) plt.scatter(numpy.arange(treeSizesDist.shape[0])[treeSizesDist!=0], numpy.log(treeSizesDist[treeSizesDist!=0]), s=30, c=plotStyles2[j][0], label=dateStr) plt.xlabel("Size") plt.ylabel("log(probability)") plt.xlim((0, 125)) plt.legend() plt.savefig(figureDir + "TreeSizeDist" + ".eps") plotInd2 += 1 plt.figure(plotInd2) plt.scatter(numpy.arange(treeDepthsDist.shape[0])[treeDepthsDist!=0], numpy.log(treeDepthsDist[treeDepthsDist!=0]), s=30, c=plotStyles2[j][0], label=dateStr) plt.xlabel("Depth") plt.ylabel("log(probability)") plt.xlim((0, 15)) plt.legend() plt.savefig(figureDir + "TreeDepthDist" + ".eps") plotInd2 += 1 dateStrList = [DateUtils.getDateStrFromDay(day, startYear) for day in dayList2] precision = 4 treeSizesDistArray = treeSizesDistArray[:, 0:treeSizesDist.shape[0]] nonZeroCols = numpy.sum(treeSizesDistArray, 0)!=0 print((Latex.array1DToRow(numpy.arange(treeSizesDistArray.shape[1])[nonZeroCols]))) print((Latex.array2DToRows(treeSizesDistArray[:, nonZeroCols]))) print("Tree depths") treeDepthsDistArray = treeDepthsDistArray[:, 0:treeDepthsDist.shape[0]] nonZeroCols = numpy.sum(treeDepthsDistArray, 0)!=0 print((Latex.array1DToRow(numpy.arange(treeDepthsDistArray.shape[1])[nonZeroCols]))) print((Latex.array2DToRows(treeDepthsDistArray[:, nonZeroCols]))) print(numpy.sum(treeDepthsDistArray[:, 0:3], 1)) print("Edges and verticies") print(Latex.listToRow(dateStrList)) print(Latex.array2DToRows(numVerticesEdgesArray.T, precision))
def plotVectorStats(): #Finally, compute some vector stats at various points in the graph logging.info("Computing vector stats") global plotInd resultsFileName = resultsDir + "ContactGrowthVectorStats.pkl" if saveResults: statsDictList = graphStats.sequenceVectorStats(sGraph, subgraphIndicesList2) Util.savePickle(statsDictList, resultsFileName, False) else: statsDictList = Util.loadPickle(resultsFileName) #Load up configuration model results configStatsDictList = [] resultsFileNameBase = resultsDir + "ConfigGraphVectorStats" for j in range(numConfigGraphs): resultsFileName = resultsFileNameBase + str(j) configStatsDictList.append(Util.loadPickle(resultsFileName)) #Now need to take mean of 1st element of list meanConfigStatsDictList = configStatsDictList[0] for i in range(len(configStatsDictList[0])): for k in range(1, numConfigGraphs): for key in configStatsDictList[k][i].keys(): if configStatsDictList[k][i][key].shape[0] > meanConfigStatsDictList[i][key].shape[0]: meanConfigStatsDictList[i][key] = numpy.r_[meanConfigStatsDictList[i][key], numpy.zeros(configStatsDictList[k][i][key].shape[0] - meanConfigStatsDictList[i][key].shape[0])] elif configStatsDictList[k][i][key].shape[0] < meanConfigStatsDictList[i][key].shape[0]: configStatsDictList[k][i][key] = numpy.r_[configStatsDictList[k][i][key], numpy.zeros(meanConfigStatsDictList[i][key].shape[0] - configStatsDictList[k][i][key].shape[0])] meanConfigStatsDictList[i][key] += configStatsDictList[k][i][key] for key in configStatsDictList[0][i].keys(): meanConfigStatsDictList[i][key] = meanConfigStatsDictList[i][key]/numConfigGraphs triangleDistArray = numpy.zeros((len(dayList2), 100)) configTriangleDistArray = numpy.zeros((len(dayList2), 100)) hopPlotArray = numpy.zeros((len(dayList2), 27)) configHopPlotArray = numpy.zeros((len(dayList2), 30)) componentsDistArray = numpy.zeros((len(dayList2), 3000)) configComponentsDistArray = numpy.zeros((len(dayList2), 3000)) numVerticesEdgesArray = numpy.zeros((len(dayList2), 2), numpy.int) numVerticesEdgesArray[:, 0] = [len(sgl) for sgl in subgraphIndicesList2] numVerticesEdgesArray[:, 1] = [sGraph.subgraph(sgl).getNumEdges() for sgl in subgraphIndicesList2] binWidths = numpy.arange(0, 0.50, 0.05) eigVectorDists = numpy.zeros((len(dayList2), binWidths.shape[0]-1), numpy.int) femaleSums = numpy.zeros(len(dayList2)) maleSums = numpy.zeros(len(dayList2)) heteroSums = numpy.zeros(len(dayList2)) biSums = numpy.zeros(len(dayList2)) contactSums = numpy.zeros(len(dayList2)) nonContactSums = numpy.zeros(len(dayList2)) donorSums = numpy.zeros(len(dayList2)) randomTestSums = numpy.zeros(len(dayList2)) stdSums = numpy.zeros(len(dayList2)) prisonerSums = numpy.zeros(len(dayList2)) recommendSums = numpy.zeros(len(dayList2)) meanAges = numpy.zeros(len(dayList2)) degrees = numpy.zeros((len(dayList2), 20)) provinces = numpy.zeros((len(dayList2), 15)) havanaSums = numpy.zeros(len(dayList2)) villaClaraSums = numpy.zeros(len(dayList2)) pinarSums = numpy.zeros(len(dayList2)) holguinSums = numpy.zeros(len(dayList2)) habanaSums = numpy.zeros(len(dayList2)) sanctiSums = numpy.zeros(len(dayList2)) meanDegrees = numpy.zeros(len(dayList2)) stdDegrees = numpy.zeros(len(dayList2)) #Note that death has a lot of missing values for j in range(len(dayList2)): dateStr = (str(DateUtils.getDateStrFromDay(dayList2[j], startYear))) logging.info(dateStr) statsDict = statsDictList[j] configStatsDict = meanConfigStatsDictList[j] degreeDist = statsDict["outDegreeDist"] degreeDist = degreeDist/float(numpy.sum(degreeDist)) #Note that degree distribution for configuration graph will be identical eigenDist = statsDict["eigenDist"] eigenDist = numpy.log(eigenDist[eigenDist>=10**-1]) #configEigenDist = configStatsDict["eigenDist"] #configEigenDist = numpy.log(configEigenDist[configEigenDist>=10**-1]) hopCount = statsDict["hopCount"] hopCount = numpy.log10(hopCount) hopPlotArray[j, 0:hopCount.shape[0]] = hopCount configHopCount = configStatsDict["hopCount"] configHopCount = numpy.log10(configHopCount) #configHopPlotArray[j, 0:configHopCount.shape[0]] = configHopCount triangleDist = statsDict["triangleDist"] #triangleDist = numpy.array(triangleDist, numpy.float64)/numpy.sum(triangleDist) triangleDist = numpy.array(triangleDist, numpy.float64) triangleDistArray[j, 0:triangleDist.shape[0]] = triangleDist configTriangleDist = configStatsDict["triangleDist"] configTriangleDist = numpy.array(configTriangleDist, numpy.float64)/numpy.sum(configTriangleDist) configTriangleDistArray[j, 0:configTriangleDist.shape[0]] = configTriangleDist maxEigVector = statsDict["maxEigVector"] eigenvectorInds = numpy.flipud(numpy.argsort(numpy.abs(maxEigVector))) top10eigenvectorInds = eigenvectorInds[0:numpy.round(eigenvectorInds.shape[0]/10.0)] maxEigVector = numpy.abs(maxEigVector[eigenvectorInds]) #print(maxEigVector) eigVectorDists[j, :] = numpy.histogram(maxEigVector, binWidths)[0] componentsDist = statsDict["componentsDist"] componentsDist = numpy.array(componentsDist, numpy.float64)/numpy.sum(componentsDist) componentsDistArray[j, 0:componentsDist.shape[0]] = componentsDist configComponentsDist = configStatsDict["componentsDist"] configComponentsDist = numpy.array(configComponentsDist, numpy.float64)/numpy.sum(configComponentsDist) configComponentsDistArray[j, 0:configComponentsDist.shape[0]] = configComponentsDist plotInd2 = plotInd plt.figure(plotInd2) plt.plot(numpy.arange(degreeDist.shape[0]), degreeDist, plotStyles2[j], label=dateStr) plt.xlabel("Degree") plt.ylabel("Probability") plt.ylim((0, 0.5)) plt.savefig(figureDir + "DegreeDist" + ".eps") plt.legend() plotInd2 += 1 """ plt.figure(plotInd2) plt.plot(numpy.arange(eigenDist.shape[0]), eigenDist, label=dateStr) plt.xlabel("Eigenvalue rank") plt.ylabel("log(Eigenvalue)") plt.savefig(figureDir + "EigenDist" + ".eps") plt.legend() plotInd2 += 1 """ #How does kleinberg do the hop plots plt.figure(plotInd2) plt.plot(numpy.arange(hopCount.shape[0]), hopCount, plotStyles[j], label=dateStr) plt.xlabel("k") plt.ylabel("log10(pairs)") plt.ylim( (2.5, 7) ) plt.legend(loc="lower right") plt.savefig(figureDir + "HopCount" + ".eps") plotInd2 += 1 plt.figure(plotInd2) plt.plot(numpy.arange(maxEigVector.shape[0]), maxEigVector, plotStyles2[j], label=dateStr) plt.xlabel("Rank") plt.ylabel("log(eigenvector coefficient)") plt.savefig(figureDir + "MaxEigVector" + ".eps") plt.legend() plotInd2 += 1 #Compute some information the 10% most central vertices subgraphIndices = numpy.nonzero(detections <= dayList2[j])[0] subgraph = sGraph.subgraph(subgraphIndices) subgraphVertexArray = subgraph.getVertexList().getVertices() femaleSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, genderIndex]==1) maleSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, genderIndex]==0) heteroSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, orientationIndex]==0) biSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, orientationIndex]==1) contactSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, contactIndex]) donorSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, donorIndex]) randomTestSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, randomTestIndex]) stdSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, stdIndex]) prisonerSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, prisonerIndex]) recommendSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, doctorIndex]) meanAges[j] = numpy.mean(subgraphVertexArray[top10eigenvectorInds, detectionIndex] - subgraphVertexArray[top10eigenvectorInds, dobIndex])/daysInYear havanaSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, havanaIndex]) villaClaraSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, villaClaraIndex]) pinarSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, pinarIndex]) holguinSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, holguinIndex]) habanaSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, habanaIndex]) sanctiSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, sanctiIndex]) provinces[j, :] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, 22:37], 0) ddist = numpy.bincount(subgraph.outDegreeSequence()[top10eigenvectorInds]) degrees[j, 0:ddist.shape[0]] = numpy.array(ddist, numpy.float)/numpy.sum(ddist) meanDegrees[j] = numpy.mean(subgraph.outDegreeSequence()[top10eigenvectorInds]) stdDegrees[j] = numpy.std(subgraph.outDegreeSequence()[top10eigenvectorInds]) plt.figure(plotInd2) plt.plot(numpy.arange(degrees[j, :].shape[0]), degrees[j, :], plotStyles2[j], label=dateStr) plt.xlabel("Degree") plt.ylabel("Probability") #plt.ylim((0, 0.5)) plt.savefig(figureDir + "DegreeDistCentral" + ".eps") plt.legend() plotInd2 += 1 precision = 4 dateStrList = [DateUtils.getDateStrFromDay(day, startYear) for day in dayList2] print("Hop counts") print(Latex.listToRow(dateStrList)) print(Latex.array2DToRows(hopPlotArray.T)) print("\nHop counts for configuration graphs") print(Latex.listToRow(dateStrList)) print(Latex.array2DToRows(configHopPlotArray.T)) print("\n\nEdges and vertices") print((Latex.listToRow(dateStrList))) print((Latex.array2DToRows(numVerticesEdgesArray.T, precision))) print("\n\nEigenvector distribution") print((Latex.array1DToRow(binWidths[1:]) + "\\\\")) print((Latex.array2DToRows(eigVectorDists))) print("\n\nDistribution of component sizes") componentsDistArray = componentsDistArray[:, 0:componentsDist.shape[0]] nonZeroCols = numpy.sum(componentsDistArray, 0)!=0 componentsDistArray = numpy.r_[numpy.array([numpy.arange(componentsDistArray.shape[1])[nonZeroCols]]), componentsDistArray[:, nonZeroCols]] print((Latex.listToRow(dateStrList))) print((Latex.array2DToRows(componentsDistArray.T, precision))) print("\n\nDistribution of component sizes in configuration graphs") configComponentsDistArray = configComponentsDistArray[:, 0:configComponentsDist.shape[0]] nonZeroCols = numpy.sum(configComponentsDistArray, 0)!=0 configComponentsDistArray = numpy.r_[numpy.array([numpy.arange(configComponentsDistArray.shape[1])[nonZeroCols]]), configComponentsDistArray[:, nonZeroCols]] print((Latex.listToRow(dateStrList))) print((Latex.array2DToRows(configComponentsDistArray.T, precision))) print("\n\nDistribution of triangle participations") triangleDistArray = triangleDistArray[:, 0:triangleDist.shape[0]] nonZeroCols = numpy.sum(triangleDistArray, 0)!=0 triangleDistArray = numpy.r_[numpy.array([numpy.arange(triangleDistArray.shape[1])[nonZeroCols]])/2, triangleDistArray[:, nonZeroCols]] print((Latex.listToRow(dateStrList))) print((Latex.array2DToRows(triangleDistArray.T, precision))) configTriangleDistArray = configTriangleDistArray[:, 0:configTriangleDist.shape[0]] nonZeroCols = numpy.sum(configTriangleDistArray, 0)!=0 configTriangleDistArray = numpy.r_[numpy.array([numpy.arange(configTriangleDistArray.shape[1])[nonZeroCols]])/2, configTriangleDistArray[:, nonZeroCols]] configTriangleDistArray = numpy.c_[configTriangleDistArray, numpy.zeros((configTriangleDistArray.shape[0], triangleDistArray.shape[1]-configTriangleDistArray.shape[1]))] print("\n\nDistribution of central vertices") print((Latex.listToRow(dateStrList))) subgraphSizes = numpy.array(maleSums + femaleSums, numpy.float) print("Female & " + Latex.array1DToRow(femaleSums*100/subgraphSizes, 1) + "\\\\") print("Male & " + Latex.array1DToRow(maleSums*100/subgraphSizes, 1) + "\\\\") print("\hline") print("Heterosexual & " + Latex.array1DToRow(heteroSums*100/subgraphSizes, 1) + "\\\\") print("Bisexual & " + Latex.array1DToRow(biSums*100/subgraphSizes, 1) + "\\\\") print("\hline") print("Contact traced & " + Latex.array1DToRow(contactSums*100/subgraphSizes, 1) + "\\\\") print("Blood donor & " + Latex.array1DToRow(donorSums*100/subgraphSizes, 1) + "\\\\") print("RandomTest & " + Latex.array1DToRow(randomTestSums*100/subgraphSizes, 1) + "\\\\") print("STD & " + Latex.array1DToRow(stdSums*100/subgraphSizes, 1) + "\\\\") print("Prisoner & " + Latex.array1DToRow(prisonerSums*100/subgraphSizes, 1) + "\\\\") print("Doctor recommendation & " + Latex.array1DToRow(recommendSums*100/subgraphSizes, 1) + "\\\\") print("\hline") print("Mean ages (years) & " + Latex.array1DToRow(meanAges, 2) + "\\\\") print("\hline") print("Holguin & " + Latex.array1DToRow(holguinSums*100/subgraphSizes, 1) + "\\\\") print("La Habana & " + Latex.array1DToRow(habanaSums*100/subgraphSizes, 1) + "\\\\") print("Havana City & " + Latex.array1DToRow(havanaSums*100/subgraphSizes, 1) + "\\\\") print("Pinar del Rio & " + Latex.array1DToRow(pinarSums*100/subgraphSizes, 1) + "\\\\") print("Sancti Spiritus & " + Latex.array1DToRow(sanctiSums*100/subgraphSizes, 1) + "\\\\") print("Villa Clara & " + Latex.array1DToRow(villaClaraSums*100/subgraphSizes, 1) + "\\\\") print("\hline") print("Mean degrees & " + Latex.array1DToRow(meanDegrees, 2) + "\\\\") print("Std degrees & " + Latex.array1DToRow(stdDegrees, 2) + "\\\\") print("\n\nProvinces") print(Latex.array2DToRows(provinces)) print("\n\nDegree distribution") print(Latex.array2DToRows(degrees))
meanDegreeDists = numpy.mean(degreeDists, 2) stdDegreeDists = numpy.std(degreeDists, 2) plt.figure(plotInd) plt.errorbar(numpy.arange(numDegrees), meanDegreeDists[ind, :], yerr=stdDegreeDists[ind, :], color="k") plt.plot(numpy.arange(numDegrees), idealDegreeDists[ind, :], "k--") plt.xlabel("degree") plt.ylabel("frequency") plotInd += 1 #Print the table of thetas thetas = numpy.array(thetas) meanThetas = numpy.mean(thetas, 1) stdThetas = numpy.std(thetas, 1) table = Latex.array2DToRows(meanThetas.T, stdThetas.T, precision=4) rowNames = ["$|\\mathcal{I}_0 |$", "$\\alpha$", "$\\gamma$", "$\\beta$", "$\\lambda$", "$\\sigma$"] table = Latex.addRowNames(rowNames, table) print(table) #Now print the graph properties idealTable = [] tableMeanArray = [] tableStdArray = [] for ind in inds: idealTable.append(idealMeasures[ind, :, timeInds]) tableMeanArray.append(meanMeasures[ind, :, timeInds]) tableStdArray.append(stdMeasures[ind, :, timeInds]) idealTable = numpy.vstack(idealTable).T
plt.scatter(X[y==posLabel, 2], X[y==posLabel, 3], c="r") plt.xlabel("X_(2)") plt.ylabel("X_(3)") plt.savefig("../Lecture2/Figures/X34.eps") k += 1 n = X.shape[0] d = X.shape[1] correlations = numpy.zeros((X.shape[1], X.shape[1])) for i in range(d): for j in range(d): correlations[i, j] = numpy.corrcoef(X[:, i], X[:, j])[0,1] print(Latex.array2DToRows(correlations)) #Run PCA and find first directions pca = PCA(n_components=3) newX = pca.fit_transform(X) s, V = numpy.linalg.eigh(1/float(n)*X.T.dot(X)) s = numpy.sort(s) print(s) print(pca.explained_variance_ratio_) print(pca.components_[0, :]) plt.figure(k) plt.scatter(newX[y!=1, 0], newX[y!=1, 1], c="b") plt.scatter(newX[y==1, 0], newX[y==1, 1], c="r")
t = i logging.debug(resultsDir) newNumRecordSteps = numRecordSteps + 5 endDate += HIVModelUtils.realTestPeriods[j] recordStep = (endDate-startDate)/float(newNumRecordSteps) thetaArray = loadThetaArray(N, resultsDir, t)[0] print(thetaArray) meanTable = numpy.array([thetaArray.mean(0)]).T print(meanTable) stdTable = numpy.array([thetaArray.std(0)]).T table = Latex.array2DToRows(meanTable, stdTable, precision=4) rowNames = ["$\\|\\mathcal{I}_0 \\|$", "$\\rho_B$", "$\\alpha$", "$C$", "$\\gamma$", "$\\beta$", "$\\kappa_{max}$", "$\\lambda_H$", "$\\lambda_B$", "$\\sigma_{WM}$", "$\\sigma_{MW}$","$\\sigma_{MB}$"] table = Latex.addRowNames(rowNames, table) print(table) resultsFileName = outputDir + "IdealStats.pkl" stats = Util.loadPickle(resultsFileName) times, vertexArray, removedGraphStats = stats times = numpy.array(times) - startDate times2 = numpy.arange(startDate, endDate+1, recordStep) times2 = times2[1:] times2 = numpy.array(times2) - startDate graphStats = GraphStatistics()
outputLists = graphRanker.vertexRankings(graph, relevantAuthorsInds) itemList = RankAggregator.generateItemList(outputLists) methodNames = graphRanker.getNames() if runLSI: outputFilename = dataset.getOutputFieldDir(field) + "outputListsLSI.npz" else: outputFilename = dataset.getOutputFieldDir(field) + "outputListsLDA.npz" Util.savePickle([outputLists, trainExpertMatchesInds, testExpertMatchesInds], outputFilename, debug=True) numMethods = len(outputLists) precisions = numpy.zeros((len(ns), numMethods)) averagePrecisions = numpy.zeros(numMethods) for i, n in enumerate(ns): for j in range(len(outputLists)): precisions[i, j] = Evaluator.precisionFromIndLists(testExpertMatchesInds, outputLists[j][0:n]) for j in range(len(outputLists)): averagePrecisions[j] = Evaluator.averagePrecisionFromLists(testExpertMatchesInds, outputLists[j][0:averagePrecisionN], averagePrecisionN) precisions2 = numpy.c_[numpy.array(ns), precisions] logging.debug(Latex.listToRow(methodNames)) logging.debug(Latex.array2DToRows(precisions2)) logging.debug(Latex.array1DToRow(averagePrecisions)) logging.debug("All done!")
testAucsStd[i, j, k] = numpy.std(errors) #logging.debug("Read file: " + fileName) except: logging.debug("File not found : " + str(fileName)) numMissingFiles += 1 logging.debug("Number of missing files: " + str(numMissingFiles)) for i, dataName in enumerate(dataNames): print("-"*10 + dataName + "-"*10) algorithms = [x.ljust(20) for x in algorithmsAbbr] currentTestAucsMean = testAucsMean[:, i, :].T maxAUCs = numpy.zeros(currentTestAucsMean.shape, numpy.bool) maxAUCs[numpy.argmax(currentTestAucsMean, 0), numpy.arange(currentTestAucsMean.shape[1])] = 1 table = Latex.array2DToRows(testAucsMean[:, i, :].T, testAucsStd[:, i, :].T, precision=2, bold=maxAUCs) print(Latex.listToRow(hormoneNameIndicators)) print(Latex.addRowNames(algorithms, table)) #Now looks at the features for the raw spectra algorithm = "L1SvmTreeRankForest" dataName = "raw" numMissingFiles = 0 numFeatures = 100 numIndicators = 6 featureInds = numpy.zeros((numFeatures, numIndicators)) for i, (hormoneName, hormoneConc) in enumerate(helper.hormoneDict.items()): try: