Esempio n. 1
0
    def testArray2DToRows(self):
        numpy.random.seed(21)
        X = numpy.random.rand(2, 2)
        Y = numpy.random.rand(2, 2)

        outputStr = "0.049 (0.206) & 0.289 (0.051)\\\\\n"
        outputStr += "0.721 (0.302) & 0.022 (0.664)\\\\"

        self.assertTrue(Latex.array2DToRows(X, Y) == outputStr)

        #Now test using highlights
        Z = X>0.2
        A = X > 0.7
        outputStr = "0.049 (0.206) & \\textbf{0.289} (0.051)\\\\\n"
        outputStr += "\\emph{\\textbf{0.721}} (0.302) & 0.022 (0.664)\\\\"
        self.assertTrue(Latex.array2DToRows(X, Y, bold=Z, italic=A) == outputStr)

        #Now test leaving out the Y values
        outputStr = "0.049 & 0.289\\\\\n"
        outputStr += "0.721 & 0.022\\\\"
        self.assertTrue(Latex.array2DToRows(X, Y=None) == outputStr)

        #Now put in bold 
        outputStr = "0.049 & 0.289\\\\\n"
        outputStr += "0.721 & 0.022\\\\"
Esempio n. 2
0
def getLatexTable(measures, cvScalings, idealMeasures):
    rowNames = getRowNames(cvScalings, True)
    table = Latex.array1DToRow(foldsSet) + "\\\\ \n"

    for j in range(sampleSizes.shape[0]):
        meanMeasures = numpy.mean(measures, 0)
        stdMeasures = numpy.std(measures, 0)
        table += Latex.array2DToRows(meanMeasures[j, :, :].T, stdMeasures[j, :, :].T) + "\n"
        
        meanIdealMeasures = numpy.mean(idealMeasures, 0)
        stdIdealMeasures = numpy.std(idealMeasures, 0)
        table += Latex.array2DToRows(numpy.ones((1, len(foldsSet)))*meanIdealMeasures[j], numpy.ones((1, len(foldsSet)))*stdIdealMeasures[j]) + "\n"

    table = Latex.addRowNames(rowNames, table)
    return table, meanMeasures, stdMeasures
Esempio n. 3
0
    def testAddRowNames(self):
        numpy.random.seed(21)
        X = numpy.random.rand(2, 2)
        Y = numpy.random.rand(2, 2)

        latexTable = Latex.array2DToRows(X, Y)
        rowNames = ["a", "b"]
        latexTable = Latex.addRowNames(rowNames, latexTable)

        outputStr = "a & 0.049 (0.206) & 0.289 (0.051)\\\\\n"
        outputStr += "b & 0.721 (0.302) & 0.022 (0.664)\\\\\n"

        self.assertTrue(latexTable == outputStr)

        #Now test error method
        rowNames = ["a", "b", "c"]
        self.assertRaises(ValueError, Latex.addRowNames, rowNames, latexTable)

        #Now test error method
        rowNames = ["a"]
        self.assertRaises(ValueError, Latex.addRowNames, rowNames, latexTable)
Esempio n. 4
0
def summary(datasetNames, sampleSizes, foldsSet, cvScalings, sampleMethods, fileNameSuffix, gridResultsSuffix="GridResults"):
    """
    Print the errors for all results plus a summary. 
    """
    numMethods = (1+(cvScalings.shape[0]+1))
    numDatasets = len(datasetNames)
    overallErrors = numpy.zeros((numDatasets, len(sampleMethods), sampleSizes.shape[0], foldsSet.shape[0], numMethods))
    overallStdWins = numpy.zeros((len(sampleMethods), len(sampleSizes), foldsSet.shape[0], numMethods+1, 3), numpy.int)
    overallErrorsPerSampMethod = numpy.zeros((numDatasets, len(sampleMethods), len(sampleSizes), numMethods), numpy.float)
    
    table1 = ""
    table2 = ""
    table3 = ""

    for i in range(len(datasetNames)):
        table3Error = numpy.zeros((2, len(sampleMethods)))   
        table3Stds = numpy.zeros((2, len(sampleMethods)))   
        
        for j in range(len(sampleMethods)):
            print("="*50 + "\n" + datasetNames[i] + "-" + sampleMethods[j] + "\n" + "="*50 )
            
            outfileName = outputDir + datasetNames[i] + sampleMethods[j] + fileNameSuffix + ".npz"
            try: 
                
                data = numpy.load(outfileName)
    
                errors = data["arr_0"]
                params = data["arr_1"]
                meanErrorGrids = data["arr_2"]
                stdErrorGrids = data["arr_3"]
                meanApproxGrids = data["arr_4"]
                stdApproxGrids = data["arr_5"]      
                
                #Load ideal results 
                outfileName = outputDir + datasetNames[i]  + gridResultsSuffix + ".npz"
                data = numpy.load(outfileName)
                idealErrors = data["arr_0"]
                
                errorTable, meanErrors, stdErrors = getLatexTable(errors, cvScalings, idealErrors)
    
                wins = getWins(errors)
                idealWins = getIdealWins(errors, idealErrors)
                excessError = numpy.zeros(errors.shape)
    
                for k in range(errors.shape[1]):
                    excessError[:, k, :, :] = errors[:, k, :, :] - numpy.tile(errors[:, k, :, 0, numpy.newaxis], (1, 1, numMethods))
    
                meanExcessError = numpy.mean(excessError, 0)
                stdExcessError = numpy.std(excessError, 0)
                excessErrorTable, meanExcessErrors, stdExcessErrors = getLatexTable(excessError, cvScalings, idealErrors)
    
                overallErrorsPerSampMethod[i, j, :, :] = numpy.mean(meanErrors, 1)
                overallErrors[i, j, :, :, :] = meanExcessError
                overallStdWins[j, :, :, 0:-1, :] += wins
                overallStdWins[j, :, :, -1, :] += idealWins
                print(errorTable)
                #print("Min error is: " + str(numpy.min(meanErrors)))
                #print("Max error is: " + str(numpy.max(meanErrors)))
                #print("Mean error is: " + str(numpy.mean(meanErrors)) + "\n")
                
                #This is a table with V=10, alpha=1 and CV sampling 
                
                sliceFoldIndex = 0  
                
                print(meanErrors[0, 1, 0])
                numSliceMethods = 3
                table1Error = numpy.zeros(len(sampleSizes)*numSliceMethods)
                table1Std = numpy.zeros(len(sampleSizes)*numSliceMethods)
                for  k in range(len(sampleSizes)):
                    table1Error[k*numSliceMethods] = meanErrors[k, sliceFoldIndex, 0]
                    table1Error[k*numSliceMethods+1] = meanErrors[k, sliceFoldIndex, 1]
                    table1Error[k*numSliceMethods+2] = meanErrors[k, sliceFoldIndex, 4]

                    table1Std[k*numSliceMethods] = stdErrors[k, sliceFoldIndex, 0]
                    table1Std[k*numSliceMethods+1] = stdErrors[k, sliceFoldIndex, 1]
                    table1Std[k*numSliceMethods+2] = stdErrors[k, sliceFoldIndex, 4]
                    
                if j == 0: 
                    table1 += datasetNames[i] + " & " + Latex.array2DToRows(numpy.array([table1Error]), numpy.array([table1Std])) + "\n"
                
                
                          
                
                #See how alpha varies with V=10, CV sampling 
                table2Error = numpy.zeros(range(numMethods-2))
                table2Std = numpy.zeros(range(numMethods-2))
                for s in range(len(sampleSizes)): 
                    table2Error = meanErrors[s, sliceFoldIndex, 2:]
                    table2Std = stdErrors[s, sliceFoldIndex, 2:]
                
                    if j == 0: 
                        table2 += datasetNames[i] + " $m=" + str(sampleSizes[s]) + "$ & " + Latex.array2DToRows(numpy.array([table2Error]), numpy.array([table2Std])) + "\n"
    
                """
                #See how each sample method effects CV and pen alpha=1
                fourFoldIndex = 4  
                hundredMIndex = 1            
                
                table3Error[0, j] = meanErrors[hundredMIndex, fourFoldIndex, 0]
                table3Error[1, j] = meanErrors[hundredMIndex, fourFoldIndex, 3]
                table3Stds[0, j] = stdErrors[hundredMIndex, fourFoldIndex, 0]
                table3Stds[1, j] = stdErrors[hundredMIndex, fourFoldIndex, 3]
                """
            except IOError: 
                print("Failed to open file: " + outfileName)

        table3 +=  Latex.addRowNames([datasetNames[i] + " Std ", datasetNames[i] + " PenVF "], Latex.array2DToRows(table3Error, table3Stds))            
            
        datasetMeanErrors = Latex.listToRow(sampleMethods) + "\n"

        for j in range(len(sampleSizes)):
            datasetMeanErrors += Latex.array2DToRows(overallErrorsPerSampMethod[i, :, j, :].T) + "\n"

        datasetMeanErrors = Latex.addRowNames(getRowNames(cvScalings), datasetMeanErrors)
        print(datasetMeanErrors)
     
    print("="*50 + "\n" + "Sliced Tables" + "\n" + "="*50)   
    
    print(table1 + "\n")
    print(table2 + "\n")
    print(table3)
     
    print("="*50 + "\n" + "Overall" + "\n" + "="*50)

    overallMeanErrors = numpy.mean(overallErrors, 0)
    overallStdErrors = numpy.std(overallErrors, 0)

    for i in range(len(sampleMethods)):
        print("-"*20 + sampleMethods[i] + "-"*20)
        overallErrorTable = Latex.array1DToRow(foldsSet) + "\\\\ \n"
        overallWinsTable = Latex.array1DToRow(foldsSet) + " & Total & "  +Latex.array1DToRow(foldsSet) + " & Total \\\\ \n"

        rowNames = getRowNames(cvScalings)

        for j in range(sampleSizes.shape[0]):
            overallErrorTable += Latex.array2DToRows(overallMeanErrors[i, j, :, :].T, overallStdErrors[i, j, :, :].T, bold=overallMeanErrors[i, j, :, :].T<0) + "\n"

            tiesWins = numpy.r_[overallStdWins[i, j, :, :, 0], overallStdWins[i, j, :, :, 1], overallStdWins[i, j, :, :, 2]]            
            
            overallWinsTable += Latex.array2DToRows(tiesWins.T) + "\n"

        overallErrorTable = Latex.addRowNames(rowNames, overallErrorTable)
        
        rowNames = getRowNames(cvScalings, True)
        overallWinsTable = Latex.addRowNames(rowNames, overallWinsTable)

        print(Latex.latexTable(overallWinsTable, "Wins for " + sampleMethods[i], True))
        print(Latex.latexTable(overallErrorTable.replace("0.", "."), "Excess errors for " + sampleMethods[i], True))
        #print(overallWinsTable)
        #print(overallErrorTable)

    #Now print the mean errors for all datasets
    datasetMeanErrors = Latex.listToRow(sampleMethods) + "\n"
    overallErrorsPerSampMethod = numpy.mean(overallErrorsPerSampMethod[:, :, :, :], 0)

    for j in range(len(sampleSizes)):
        datasetMeanErrors += Latex.array2DToRows(overallErrorsPerSampMethod[:, j, :].T) + "\n"

    datasetMeanErrors = Latex.addRowNames(getRowNames(cvScalings), datasetMeanErrors)
    print(datasetMeanErrors)
Esempio n. 5
0
plt.scatter(X[y == posLabel, 2], X[y == posLabel, 3], c="r")
plt.xlabel("X_(2)")
plt.ylabel("X_(3)")
plt.savefig("../Lecture2/Figures/X34.eps")
k += 1

n = X.shape[0]
d = X.shape[1]

correlations = numpy.zeros((X.shape[1], X.shape[1]))

for i in range(d):
    for j in range(d):
        correlations[i, j] = numpy.corrcoef(X[:, i], X[:, j])[0, 1]

print(Latex.array2DToRows(correlations))

#Run PCA and find first directions
pca = PCA(n_components=3)
newX = pca.fit_transform(X)

s, V = numpy.linalg.eigh(1 / float(n) * X.T.dot(X))
s = numpy.sort(s)
print(s)

print(pca.explained_variance_ratio_)
print(pca.components_[0, :])

plt.figure(k)
plt.scatter(newX[y != 1, 0], newX[y != 1, 1], c="b")
plt.scatter(newX[y == 1, 0], newX[y == 1, 1], c="r")
def plotVectorStats():
    #Finally, compute some vector stats at various points in the graph
    logging.info("Computing vector stats")
    global plotInd
    resultsFileName = resultsDir + "InfectGrowthVectorStats.pkl"

    if saveResults:
        statsDictList = graphStats.sequenceVectorStats(sGraph, subgraphIndicesList2, True)
        Util.savePickle(statsDictList, resultsFileName, True)
    else:
        statsDictList = Util.loadPickle(resultsFileName)

        treeSizesDistArray = numpy.zeros((len(dayList2), 3000))
        treeDepthsDistArray = numpy.zeros((len(dayList2), 100))
        numVerticesEdgesArray = numpy.zeros((len(dayList2), 2), numpy.int)
        numVerticesEdgesArray[:, 0] = [len(sgl) for sgl in subgraphIndicesList2]
        numVerticesEdgesArray[:, 1] = [sGraph.subgraph(sgl).getNumEdges() for sgl in subgraphIndicesList2]

        for j in range(len(dayList2)):
            dateStr = (str(DateUtils.getDateStrFromDay(dayList2[j], startYear)))
            logging.info(dateStr)
            statsDict = statsDictList[j]

            degreeDist = statsDict["outDegreeDist"]
            degreeDist = degreeDist/float(numpy.sum(degreeDist))

            maxEigVector = statsDict["maxEigVector"]
            maxEigVector = numpy.flipud(numpy.sort(numpy.abs(maxEigVector)))
            maxEigVector = numpy.log(maxEigVector[maxEigVector>0])

            treeSizesDist = statsDict["treeSizesDist"]
            treeSizesDist = numpy.array(treeSizesDist, numpy.float64)/numpy.sum(treeSizesDist)
            treeSizesDistArray[j, 0:treeSizesDist.shape[0]] = treeSizesDist

            treeDepthsDist = statsDict["treeDepthsDist"]
            #treeDepthsDist = numpy.array(treeDepthsDist, numpy.float64)/numpy.sum(treeDepthsDist)
            treeDepthsDist = numpy.array(treeDepthsDist, numpy.float64)
            treeDepthsDistArray[j, 0:treeDepthsDist.shape[0]] = treeDepthsDist

            plotInd2 = plotInd

            plt.figure(plotInd2)
            plt.plot(numpy.arange(degreeDist.shape[0]), degreeDist, label=dateStr)
            plt.xlabel("Degree")
            plt.ylabel("Probability")
            plt.ylim((0, 0.8))
            plt.legend()
            plt.savefig(figureDir + "DegreeDist" +  ".eps")
            plotInd2 += 1

            plt.figure(plotInd2)
            plt.scatter(numpy.arange(treeSizesDist.shape[0])[treeSizesDist!=0], numpy.log(treeSizesDist[treeSizesDist!=0]), s=30, c=plotStyles2[j][0], label=dateStr)
            plt.xlabel("Size")
            plt.ylabel("log(probability)")
            plt.xlim((0, 125))
            plt.legend()
            plt.savefig(figureDir + "TreeSizeDist" +  ".eps")
            plotInd2 += 1

            plt.figure(plotInd2)
            plt.scatter(numpy.arange(treeDepthsDist.shape[0])[treeDepthsDist!=0], numpy.log(treeDepthsDist[treeDepthsDist!=0]), s=30, c=plotStyles2[j][0], label=dateStr)
            plt.xlabel("Depth")
            plt.ylabel("log(probability)")
            plt.xlim((0, 15))
            plt.legend()
            plt.savefig(figureDir + "TreeDepthDist" +  ".eps")
            plotInd2 += 1

        dateStrList = [DateUtils.getDateStrFromDay(day, startYear) for day in dayList2]
        precision = 4 

        treeSizesDistArray = treeSizesDistArray[:, 0:treeSizesDist.shape[0]]
        nonZeroCols = numpy.sum(treeSizesDistArray, 0)!=0
        print((Latex.array1DToRow(numpy.arange(treeSizesDistArray.shape[1])[nonZeroCols])))
        print((Latex.array2DToRows(treeSizesDistArray[:, nonZeroCols])))

        print("Tree depths")
        treeDepthsDistArray = treeDepthsDistArray[:, 0:treeDepthsDist.shape[0]]
        nonZeroCols = numpy.sum(treeDepthsDistArray, 0)!=0
        print((Latex.array1DToRow(numpy.arange(treeDepthsDistArray.shape[1])[nonZeroCols])))
        print((Latex.array2DToRows(treeDepthsDistArray[:, nonZeroCols])))

        print(numpy.sum(treeDepthsDistArray[:, 0:3], 1))

        print("Edges and verticies")
        print(Latex.listToRow(dateStrList))
        print(Latex.array2DToRows(numVerticesEdgesArray.T, precision))
def plotVectorStats():
    #Finally, compute some vector stats at various points in the graph
    logging.info("Computing vector stats")
    global plotInd
    resultsFileName = resultsDir + "ContactGrowthVectorStats.pkl"

    if saveResults:
        statsDictList = graphStats.sequenceVectorStats(sGraph, subgraphIndicesList2)
        Util.savePickle(statsDictList, resultsFileName, False)
    else:
        statsDictList = Util.loadPickle(resultsFileName)

        #Load up configuration model results
        configStatsDictList = []
        resultsFileNameBase = resultsDir + "ConfigGraphVectorStats"

        for j in range(numConfigGraphs):
            resultsFileName = resultsFileNameBase + str(j)
            configStatsDictList.append(Util.loadPickle(resultsFileName))

        #Now need to take mean of 1st element of list
        meanConfigStatsDictList = configStatsDictList[0]
        for i in range(len(configStatsDictList[0])):
            for k in range(1, numConfigGraphs):
                for key in configStatsDictList[k][i].keys():
                    if configStatsDictList[k][i][key].shape[0] > meanConfigStatsDictList[i][key].shape[0]:
                        meanConfigStatsDictList[i][key] = numpy.r_[meanConfigStatsDictList[i][key], numpy.zeros(configStatsDictList[k][i][key].shape[0] - meanConfigStatsDictList[i][key].shape[0])]
                    elif configStatsDictList[k][i][key].shape[0] < meanConfigStatsDictList[i][key].shape[0]:
                        configStatsDictList[k][i][key] = numpy.r_[configStatsDictList[k][i][key], numpy.zeros(meanConfigStatsDictList[i][key].shape[0] - configStatsDictList[k][i][key].shape[0])]

                    meanConfigStatsDictList[i][key] += configStatsDictList[k][i][key]

            for key in configStatsDictList[0][i].keys():
                meanConfigStatsDictList[i][key] = meanConfigStatsDictList[i][key]/numConfigGraphs


        triangleDistArray = numpy.zeros((len(dayList2), 100))
        configTriangleDistArray = numpy.zeros((len(dayList2), 100))
        hopPlotArray = numpy.zeros((len(dayList2), 27))
        configHopPlotArray = numpy.zeros((len(dayList2), 30))
        componentsDistArray = numpy.zeros((len(dayList2), 3000))
        configComponentsDistArray = numpy.zeros((len(dayList2), 3000))
        numVerticesEdgesArray = numpy.zeros((len(dayList2), 2), numpy.int)
        numVerticesEdgesArray[:, 0] = [len(sgl) for sgl in subgraphIndicesList2]
        numVerticesEdgesArray[:, 1] = [sGraph.subgraph(sgl).getNumEdges() for sgl in subgraphIndicesList2]

        binWidths = numpy.arange(0, 0.50, 0.05)
        eigVectorDists = numpy.zeros((len(dayList2), binWidths.shape[0]-1), numpy.int)

        femaleSums = numpy.zeros(len(dayList2))
        maleSums = numpy.zeros(len(dayList2))
        heteroSums = numpy.zeros(len(dayList2))
        biSums = numpy.zeros(len(dayList2))

        contactSums = numpy.zeros(len(dayList2))
        nonContactSums = numpy.zeros(len(dayList2))
        donorSums = numpy.zeros(len(dayList2))
        randomTestSums = numpy.zeros(len(dayList2))
        stdSums = numpy.zeros(len(dayList2))
        prisonerSums = numpy.zeros(len(dayList2))
        recommendSums = numpy.zeros(len(dayList2))
        
        meanAges = numpy.zeros(len(dayList2))
        degrees = numpy.zeros((len(dayList2), 20))

        provinces = numpy.zeros((len(dayList2), 15))

        havanaSums = numpy.zeros(len(dayList2))
        villaClaraSums = numpy.zeros(len(dayList2))
        pinarSums = numpy.zeros(len(dayList2))
        holguinSums = numpy.zeros(len(dayList2))
        habanaSums = numpy.zeros(len(dayList2))
        sanctiSums = numpy.zeros(len(dayList2))

        meanDegrees = numpy.zeros(len(dayList2))
        stdDegrees = numpy.zeros(len(dayList2))

        #Note that death has a lot of missing values
        for j in range(len(dayList2)):
            dateStr = (str(DateUtils.getDateStrFromDay(dayList2[j], startYear)))
            logging.info(dateStr)
            statsDict = statsDictList[j]
            configStatsDict = meanConfigStatsDictList[j]

            degreeDist = statsDict["outDegreeDist"]
            degreeDist = degreeDist/float(numpy.sum(degreeDist))
            #Note that degree distribution for configuration graph will be identical 

            eigenDist = statsDict["eigenDist"]
            eigenDist = numpy.log(eigenDist[eigenDist>=10**-1])
            #configEigenDist = configStatsDict["eigenDist"]
            #configEigenDist = numpy.log(configEigenDist[configEigenDist>=10**-1])

            hopCount = statsDict["hopCount"]
            hopCount = numpy.log10(hopCount)
            hopPlotArray[j, 0:hopCount.shape[0]] = hopCount
            configHopCount = configStatsDict["hopCount"]
            configHopCount = numpy.log10(configHopCount)
            #configHopPlotArray[j, 0:configHopCount.shape[0]] = configHopCount

            triangleDist = statsDict["triangleDist"]
            #triangleDist = numpy.array(triangleDist, numpy.float64)/numpy.sum(triangleDist)
            triangleDist = numpy.array(triangleDist, numpy.float64)
            triangleDistArray[j, 0:triangleDist.shape[0]] = triangleDist
            configTriangleDist = configStatsDict["triangleDist"]
            configTriangleDist = numpy.array(configTriangleDist, numpy.float64)/numpy.sum(configTriangleDist)
            configTriangleDistArray[j, 0:configTriangleDist.shape[0]] = configTriangleDist

            maxEigVector = statsDict["maxEigVector"]
            eigenvectorInds = numpy.flipud(numpy.argsort(numpy.abs(maxEigVector)))
            top10eigenvectorInds = eigenvectorInds[0:numpy.round(eigenvectorInds.shape[0]/10.0)]
            maxEigVector = numpy.abs(maxEigVector[eigenvectorInds])
            #print(maxEigVector)
            eigVectorDists[j, :] = numpy.histogram(maxEigVector, binWidths)[0]

            componentsDist = statsDict["componentsDist"]
            componentsDist = numpy.array(componentsDist, numpy.float64)/numpy.sum(componentsDist)
            componentsDistArray[j, 0:componentsDist.shape[0]] = componentsDist
            configComponentsDist = configStatsDict["componentsDist"]
            configComponentsDist = numpy.array(configComponentsDist, numpy.float64)/numpy.sum(configComponentsDist)
            configComponentsDistArray[j, 0:configComponentsDist.shape[0]] = configComponentsDist

            plotInd2 = plotInd

            plt.figure(plotInd2)
            plt.plot(numpy.arange(degreeDist.shape[0]), degreeDist, plotStyles2[j], label=dateStr)
            plt.xlabel("Degree")
            plt.ylabel("Probability")
            plt.ylim((0, 0.5))
            plt.savefig(figureDir + "DegreeDist" +  ".eps")
            plt.legend()
            plotInd2 += 1

            """
            plt.figure(plotInd2)
            plt.plot(numpy.arange(eigenDist.shape[0]), eigenDist, label=dateStr)
            plt.xlabel("Eigenvalue rank")
            plt.ylabel("log(Eigenvalue)")
            plt.savefig(figureDir + "EigenDist" +  ".eps")
            plt.legend()
            plotInd2 += 1
            """

            #How does kleinberg do the hop plots 
            plt.figure(plotInd2)
            plt.plot(numpy.arange(hopCount.shape[0]), hopCount, plotStyles[j], label=dateStr)
            plt.xlabel("k")
            plt.ylabel("log10(pairs)")
            plt.ylim( (2.5, 7) )
            plt.legend(loc="lower right")
            plt.savefig(figureDir + "HopCount" + ".eps")
            plotInd2 += 1
            
            plt.figure(plotInd2)
            plt.plot(numpy.arange(maxEigVector.shape[0]), maxEigVector, plotStyles2[j], label=dateStr)
            plt.xlabel("Rank")
            plt.ylabel("log(eigenvector coefficient)")
            plt.savefig(figureDir + "MaxEigVector" +  ".eps")
            plt.legend()
            plotInd2 += 1

            #Compute some information the 10% most central vertices
            
            subgraphIndices = numpy.nonzero(detections <= dayList2[j])[0]
            subgraph = sGraph.subgraph(subgraphIndices)
            subgraphVertexArray = subgraph.getVertexList().getVertices()

            femaleSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, genderIndex]==1)
            maleSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, genderIndex]==0)
            heteroSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, orientationIndex]==0)
            biSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, orientationIndex]==1)

            contactSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, contactIndex])
            donorSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, donorIndex])
            randomTestSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, randomTestIndex])
            stdSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, stdIndex])
            prisonerSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, prisonerIndex])
            recommendSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, doctorIndex])

            meanAges[j] = numpy.mean(subgraphVertexArray[top10eigenvectorInds, detectionIndex] - subgraphVertexArray[top10eigenvectorInds, dobIndex])/daysInYear

            havanaSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, havanaIndex])
            villaClaraSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, villaClaraIndex])
            pinarSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, pinarIndex])
            holguinSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, holguinIndex])
            habanaSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, habanaIndex])
            sanctiSums[j] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, sanctiIndex])

            provinces[j, :] = numpy.sum(subgraphVertexArray[top10eigenvectorInds, 22:37], 0)

            ddist = numpy.bincount(subgraph.outDegreeSequence()[top10eigenvectorInds])
            degrees[j, 0:ddist.shape[0]] = numpy.array(ddist, numpy.float)/numpy.sum(ddist)

            meanDegrees[j] = numpy.mean(subgraph.outDegreeSequence()[top10eigenvectorInds])
            stdDegrees[j] = numpy.std(subgraph.outDegreeSequence()[top10eigenvectorInds])


            plt.figure(plotInd2)
            plt.plot(numpy.arange(degrees[j, :].shape[0]), degrees[j, :], plotStyles2[j], label=dateStr)
            plt.xlabel("Degree")
            plt.ylabel("Probability")
            #plt.ylim((0, 0.5))
            plt.savefig(figureDir + "DegreeDistCentral" +  ".eps")
            plt.legend()
            plotInd2 += 1

        precision = 4
        dateStrList = [DateUtils.getDateStrFromDay(day, startYear) for day in dayList2]

        print("Hop counts")
        print(Latex.listToRow(dateStrList))
        print(Latex.array2DToRows(hopPlotArray.T))

        print("\nHop counts for configuration graphs")
        print(Latex.listToRow(dateStrList))
        print(Latex.array2DToRows(configHopPlotArray.T))

        print("\n\nEdges and vertices")
        print((Latex.listToRow(dateStrList)))
        print((Latex.array2DToRows(numVerticesEdgesArray.T, precision)))

        print("\n\nEigenvector distribution")
        print((Latex.array1DToRow(binWidths[1:]) + "\\\\"))
        print((Latex.array2DToRows(eigVectorDists)))

        print("\n\nDistribution of component sizes")
        componentsDistArray = componentsDistArray[:, 0:componentsDist.shape[0]]
        nonZeroCols = numpy.sum(componentsDistArray, 0)!=0
        componentsDistArray = numpy.r_[numpy.array([numpy.arange(componentsDistArray.shape[1])[nonZeroCols]]), componentsDistArray[:, nonZeroCols]]
        print((Latex.listToRow(dateStrList)))
        print((Latex.array2DToRows(componentsDistArray.T, precision)))

        print("\n\nDistribution of component sizes in configuration graphs")
        configComponentsDistArray = configComponentsDistArray[:, 0:configComponentsDist.shape[0]]
        nonZeroCols = numpy.sum(configComponentsDistArray, 0)!=0
        configComponentsDistArray = numpy.r_[numpy.array([numpy.arange(configComponentsDistArray.shape[1])[nonZeroCols]]), configComponentsDistArray[:, nonZeroCols]]
        print((Latex.listToRow(dateStrList)))
        print((Latex.array2DToRows(configComponentsDistArray.T, precision)))

        print("\n\nDistribution of triangle participations")
        triangleDistArray = triangleDistArray[:, 0:triangleDist.shape[0]]
        nonZeroCols = numpy.sum(triangleDistArray, 0)!=0
        triangleDistArray = numpy.r_[numpy.array([numpy.arange(triangleDistArray.shape[1])[nonZeroCols]])/2, triangleDistArray[:, nonZeroCols]]
        print((Latex.listToRow(dateStrList)))
        print((Latex.array2DToRows(triangleDistArray.T, precision)))

        configTriangleDistArray = configTriangleDistArray[:, 0:configTriangleDist.shape[0]]
        nonZeroCols = numpy.sum(configTriangleDistArray, 0)!=0
        configTriangleDistArray = numpy.r_[numpy.array([numpy.arange(configTriangleDistArray.shape[1])[nonZeroCols]])/2, configTriangleDistArray[:, nonZeroCols]]
        configTriangleDistArray = numpy.c_[configTriangleDistArray, numpy.zeros((configTriangleDistArray.shape[0], triangleDistArray.shape[1]-configTriangleDistArray.shape[1]))]

        print("\n\nDistribution of central vertices")
        print((Latex.listToRow(dateStrList)))
        subgraphSizes = numpy.array(maleSums + femaleSums, numpy.float)
        print("Female & " + Latex.array1DToRow(femaleSums*100/subgraphSizes, 1) + "\\\\")
        print("Male & " + Latex.array1DToRow(maleSums*100/subgraphSizes, 1) + "\\\\")
        print("\hline")
        print("Heterosexual & " + Latex.array1DToRow(heteroSums*100/subgraphSizes, 1) + "\\\\")
        print("Bisexual & " + Latex.array1DToRow(biSums*100/subgraphSizes, 1) + "\\\\")
        print("\hline")
        print("Contact traced & " + Latex.array1DToRow(contactSums*100/subgraphSizes, 1) + "\\\\")
        print("Blood donor & " + Latex.array1DToRow(donorSums*100/subgraphSizes, 1) + "\\\\")
        print("RandomTest & " + Latex.array1DToRow(randomTestSums*100/subgraphSizes, 1) + "\\\\")
        print("STD & " + Latex.array1DToRow(stdSums*100/subgraphSizes, 1) + "\\\\")
        print("Prisoner & " + Latex.array1DToRow(prisonerSums*100/subgraphSizes, 1) + "\\\\")
        print("Doctor recommendation & " + Latex.array1DToRow(recommendSums*100/subgraphSizes, 1) + "\\\\")
        print("\hline")
        print("Mean ages (years) & " + Latex.array1DToRow(meanAges, 2) + "\\\\")
        print("\hline")
        print("Holguin & " + Latex.array1DToRow(holguinSums*100/subgraphSizes, 1) + "\\\\")
        print("La Habana & " + Latex.array1DToRow(habanaSums*100/subgraphSizes, 1) + "\\\\")
        print("Havana City & " + Latex.array1DToRow(havanaSums*100/subgraphSizes, 1) + "\\\\")
        print("Pinar del Rio & " + Latex.array1DToRow(pinarSums*100/subgraphSizes, 1) + "\\\\")
        print("Sancti Spiritus & " + Latex.array1DToRow(sanctiSums*100/subgraphSizes, 1) + "\\\\")
        print("Villa Clara & " + Latex.array1DToRow(villaClaraSums*100/subgraphSizes, 1) + "\\\\")
        print("\hline")
        print("Mean degrees & " + Latex.array1DToRow(meanDegrees, 2) + "\\\\")
        print("Std degrees & " + Latex.array1DToRow(stdDegrees, 2) + "\\\\")
        
        print("\n\nProvinces")
        print(Latex.array2DToRows(provinces))

        print("\n\nDegree distribution")
        print(Latex.array2DToRows(degrees))
Esempio n. 8
0
        meanDegreeDists = numpy.mean(degreeDists, 2)
        stdDegreeDists = numpy.std(degreeDists, 2)        
        
        plt.figure(plotInd)
        plt.errorbar(numpy.arange(numDegrees), meanDegreeDists[ind, :], yerr=stdDegreeDists[ind, :], color="k") 
        plt.plot(numpy.arange(numDegrees), idealDegreeDists[ind,  :], "k--")
        plt.xlabel("degree")
        plt.ylabel("frequency")
        plotInd += 1
        

    #Print the table of thetas 
    thetas = numpy.array(thetas)
    meanThetas = numpy.mean(thetas, 1)
    stdThetas = numpy.std(thetas, 1)
    table = Latex.array2DToRows(meanThetas.T, stdThetas.T, precision=4)
    rowNames = ["$|\\mathcal{I}_0 |$", "$\\alpha$", "$\\gamma$", "$\\beta$", "$\\lambda$",  "$\\sigma$"]
    table = Latex.addRowNames(rowNames, table)
    print(table)    
    
    #Now print the graph properties 
    idealTable = []
    tableMeanArray = [] 
    tableStdArray = [] 
    for ind in inds: 
        idealTable.append(idealMeasures[ind, :, timeInds])
        tableMeanArray.append(meanMeasures[ind, :, timeInds])
        tableStdArray.append(stdMeasures[ind, :, timeInds])
       
       
    idealTable = numpy.vstack(idealTable).T
Esempio n. 9
0
plt.scatter(X[y==posLabel, 2], X[y==posLabel, 3], c="r") 
plt.xlabel("X_(2)")
plt.ylabel("X_(3)")
plt.savefig("../Lecture2/Figures/X34.eps")
k += 1

n = X.shape[0]
d = X.shape[1]

correlations = numpy.zeros((X.shape[1], X.shape[1]))

for i in range(d): 
	for j in range(d): 
		correlations[i, j] = numpy.corrcoef(X[:, i], X[:, j])[0,1]
		
print(Latex.array2DToRows(correlations))

#Run PCA and find first directions 
pca = PCA(n_components=3)
newX = pca.fit_transform(X)

s, V = numpy.linalg.eigh(1/float(n)*X.T.dot(X))
s = numpy.sort(s)
print(s)

print(pca.explained_variance_ratio_) 
print(pca.components_[0, :])

plt.figure(k)
plt.scatter(newX[y!=1, 0], newX[y!=1, 1], c="b") 
plt.scatter(newX[y==1, 0], newX[y==1, 1], c="r") 
            t = i       
    

    
    logging.debug(resultsDir)
    newNumRecordSteps = numRecordSteps + 5        
    endDate += HIVModelUtils.realTestPeriods[j]
    recordStep = (endDate-startDate)/float(newNumRecordSteps)

    thetaArray = loadThetaArray(N, resultsDir, t)[0]
    print(thetaArray)    
    
    meanTable = numpy.array([thetaArray.mean(0)]).T
    print(meanTable)
    stdTable = numpy.array([thetaArray.std(0)]).T
    table = Latex.array2DToRows(meanTable, stdTable, precision=4)
    rowNames = ["$\\|\\mathcal{I}_0 \\|$", "$\\rho_B$", "$\\alpha$", "$C$", "$\\gamma$", "$\\beta$", "$\\kappa_{max}$", "$\\lambda_H$", "$\\lambda_B$", "$\\sigma_{WM}$",  "$\\sigma_{MW}$","$\\sigma_{MB}$"]
    table = Latex.addRowNames(rowNames, table)
    print(table)
    
    resultsFileName = outputDir + "IdealStats.pkl"
    stats = Util.loadPickle(resultsFileName)  
    times, vertexArray, removedGraphStats = stats 
    
    times = numpy.array(times) - startDate
    times2 = numpy.arange(startDate, endDate+1, recordStep)  
    times2 = times2[1:]
    times2 = numpy.array(times2) - startDate
    
    graphStats = GraphStatistics()
    
Esempio n. 11
0
        outputLists = graphRanker.vertexRankings(graph, relevantAuthorsInds)
             
        itemList = RankAggregator.generateItemList(outputLists)
        methodNames = graphRanker.getNames()
        
        if runLSI: 
            outputFilename = dataset.getOutputFieldDir(field) + "outputListsLSI.npz"
        else: 
            outputFilename = dataset.getOutputFieldDir(field) + "outputListsLDA.npz"
            
        Util.savePickle([outputLists, trainExpertMatchesInds, testExpertMatchesInds], outputFilename, debug=True)
        
        numMethods = len(outputLists)
        precisions = numpy.zeros((len(ns), numMethods))
        averagePrecisions = numpy.zeros(numMethods)
        
        for i, n in enumerate(ns):     
            for j in range(len(outputLists)): 
                precisions[i, j] = Evaluator.precisionFromIndLists(testExpertMatchesInds, outputLists[j][0:n]) 
            
        for j in range(len(outputLists)):                 
            averagePrecisions[j] = Evaluator.averagePrecisionFromLists(testExpertMatchesInds, outputLists[j][0:averagePrecisionN], averagePrecisionN) 
        
        precisions2 = numpy.c_[numpy.array(ns), precisions]
        
        logging.debug(Latex.listToRow(methodNames))
        logging.debug(Latex.array2DToRows(precisions2))
        logging.debug(Latex.array1DToRow(averagePrecisions))

logging.debug("All done!")
Esempio n. 12
0
                testAucsStd[i, j, k] = numpy.std(errors)
                #logging.debug("Read file: " + fileName)
            except: 
                logging.debug("File not found : " + str(fileName))
                numMissingFiles += 1 
                
logging.debug("Number of missing files: " + str(numMissingFiles))
    
for i, dataName in enumerate(dataNames): 
    print("-"*10 + dataName + "-"*10)

    algorithms = [x.ljust(20) for x in algorithmsAbbr]
    currentTestAucsMean = testAucsMean[:, i, :].T
    maxAUCs = numpy.zeros(currentTestAucsMean.shape, numpy.bool)
    maxAUCs[numpy.argmax(currentTestAucsMean, 0), numpy.arange(currentTestAucsMean.shape[1])] = 1
    table = Latex.array2DToRows(testAucsMean[:, i, :].T, testAucsStd[:, i, :].T, precision=2, bold=maxAUCs)
    print(Latex.listToRow(hormoneNameIndicators))
    print(Latex.addRowNames(algorithms, table))
    

#Now looks at the features for the raw spectra 
algorithm = "L1SvmTreeRankForest" 
dataName = "raw"
numMissingFiles = 0 
numFeatures = 100

numIndicators = 6 
featureInds = numpy.zeros((numFeatures, numIndicators))

for i, (hormoneName, hormoneConc) in enumerate(helper.hormoneDict.items()):
    try: