Esempio n. 1
0
def summary(datasetNames, sampleSizes, foldsSet, cvScalings, sampleMethods, fileNameSuffix, gridResultsSuffix="GridResults"):
    """
    Print the errors for all results plus a summary. 
    """
    numMethods = (1+(cvScalings.shape[0]+1))
    numDatasets = len(datasetNames)
    overallErrors = numpy.zeros((numDatasets, len(sampleMethods), sampleSizes.shape[0], foldsSet.shape[0], numMethods))
    overallStdWins = numpy.zeros((len(sampleMethods), len(sampleSizes), foldsSet.shape[0], numMethods+1, 3), numpy.int)
    overallErrorsPerSampMethod = numpy.zeros((numDatasets, len(sampleMethods), len(sampleSizes), numMethods), numpy.float)
    
    table1 = ""
    table2 = ""
    table3 = ""

    for i in range(len(datasetNames)):
        table3Error = numpy.zeros((2, len(sampleMethods)))   
        table3Stds = numpy.zeros((2, len(sampleMethods)))   
        
        for j in range(len(sampleMethods)):
            print("="*50 + "\n" + datasetNames[i] + "-" + sampleMethods[j] + "\n" + "="*50 )
            
            outfileName = outputDir + datasetNames[i] + sampleMethods[j] + fileNameSuffix + ".npz"
            try: 
                
                data = numpy.load(outfileName)
    
                errors = data["arr_0"]
                params = data["arr_1"]
                meanErrorGrids = data["arr_2"]
                stdErrorGrids = data["arr_3"]
                meanApproxGrids = data["arr_4"]
                stdApproxGrids = data["arr_5"]      
                
                #Load ideal results 
                outfileName = outputDir + datasetNames[i]  + gridResultsSuffix + ".npz"
                data = numpy.load(outfileName)
                idealErrors = data["arr_0"]
                
                errorTable, meanErrors, stdErrors = getLatexTable(errors, cvScalings, idealErrors)
    
                wins = getWins(errors)
                idealWins = getIdealWins(errors, idealErrors)
                excessError = numpy.zeros(errors.shape)
    
                for k in range(errors.shape[1]):
                    excessError[:, k, :, :] = errors[:, k, :, :] - numpy.tile(errors[:, k, :, 0, numpy.newaxis], (1, 1, numMethods))
    
                meanExcessError = numpy.mean(excessError, 0)
                stdExcessError = numpy.std(excessError, 0)
                excessErrorTable, meanExcessErrors, stdExcessErrors = getLatexTable(excessError, cvScalings, idealErrors)
    
                overallErrorsPerSampMethod[i, j, :, :] = numpy.mean(meanErrors, 1)
                overallErrors[i, j, :, :, :] = meanExcessError
                overallStdWins[j, :, :, 0:-1, :] += wins
                overallStdWins[j, :, :, -1, :] += idealWins
                print(errorTable)
                #print("Min error is: " + str(numpy.min(meanErrors)))
                #print("Max error is: " + str(numpy.max(meanErrors)))
                #print("Mean error is: " + str(numpy.mean(meanErrors)) + "\n")
                
                #This is a table with V=10, alpha=1 and CV sampling 
                
                sliceFoldIndex = 0  
                
                print(meanErrors[0, 1, 0])
                numSliceMethods = 3
                table1Error = numpy.zeros(len(sampleSizes)*numSliceMethods)
                table1Std = numpy.zeros(len(sampleSizes)*numSliceMethods)
                for  k in range(len(sampleSizes)):
                    table1Error[k*numSliceMethods] = meanErrors[k, sliceFoldIndex, 0]
                    table1Error[k*numSliceMethods+1] = meanErrors[k, sliceFoldIndex, 1]
                    table1Error[k*numSliceMethods+2] = meanErrors[k, sliceFoldIndex, 4]

                    table1Std[k*numSliceMethods] = stdErrors[k, sliceFoldIndex, 0]
                    table1Std[k*numSliceMethods+1] = stdErrors[k, sliceFoldIndex, 1]
                    table1Std[k*numSliceMethods+2] = stdErrors[k, sliceFoldIndex, 4]
                    
                if j == 0: 
                    table1 += datasetNames[i] + " & " + Latex.array2DToRows(numpy.array([table1Error]), numpy.array([table1Std])) + "\n"
                
                
                          
                
                #See how alpha varies with V=10, CV sampling 
                table2Error = numpy.zeros(range(numMethods-2))
                table2Std = numpy.zeros(range(numMethods-2))
                for s in range(len(sampleSizes)): 
                    table2Error = meanErrors[s, sliceFoldIndex, 2:]
                    table2Std = stdErrors[s, sliceFoldIndex, 2:]
                
                    if j == 0: 
                        table2 += datasetNames[i] + " $m=" + str(sampleSizes[s]) + "$ & " + Latex.array2DToRows(numpy.array([table2Error]), numpy.array([table2Std])) + "\n"
    
                """
                #See how each sample method effects CV and pen alpha=1
                fourFoldIndex = 4  
                hundredMIndex = 1            
                
                table3Error[0, j] = meanErrors[hundredMIndex, fourFoldIndex, 0]
                table3Error[1, j] = meanErrors[hundredMIndex, fourFoldIndex, 3]
                table3Stds[0, j] = stdErrors[hundredMIndex, fourFoldIndex, 0]
                table3Stds[1, j] = stdErrors[hundredMIndex, fourFoldIndex, 3]
                """
            except IOError: 
                print("Failed to open file: " + outfileName)

        table3 +=  Latex.addRowNames([datasetNames[i] + " Std ", datasetNames[i] + " PenVF "], Latex.array2DToRows(table3Error, table3Stds))            
            
        datasetMeanErrors = Latex.listToRow(sampleMethods) + "\n"

        for j in range(len(sampleSizes)):
            datasetMeanErrors += Latex.array2DToRows(overallErrorsPerSampMethod[i, :, j, :].T) + "\n"

        datasetMeanErrors = Latex.addRowNames(getRowNames(cvScalings), datasetMeanErrors)
        print(datasetMeanErrors)
     
    print("="*50 + "\n" + "Sliced Tables" + "\n" + "="*50)   
    
    print(table1 + "\n")
    print(table2 + "\n")
    print(table3)
     
    print("="*50 + "\n" + "Overall" + "\n" + "="*50)

    overallMeanErrors = numpy.mean(overallErrors, 0)
    overallStdErrors = numpy.std(overallErrors, 0)

    for i in range(len(sampleMethods)):
        print("-"*20 + sampleMethods[i] + "-"*20)
        overallErrorTable = Latex.array1DToRow(foldsSet) + "\\\\ \n"
        overallWinsTable = Latex.array1DToRow(foldsSet) + " & Total & "  +Latex.array1DToRow(foldsSet) + " & Total \\\\ \n"

        rowNames = getRowNames(cvScalings)

        for j in range(sampleSizes.shape[0]):
            overallErrorTable += Latex.array2DToRows(overallMeanErrors[i, j, :, :].T, overallStdErrors[i, j, :, :].T, bold=overallMeanErrors[i, j, :, :].T<0) + "\n"

            tiesWins = numpy.r_[overallStdWins[i, j, :, :, 0], overallStdWins[i, j, :, :, 1], overallStdWins[i, j, :, :, 2]]            
            
            overallWinsTable += Latex.array2DToRows(tiesWins.T) + "\n"

        overallErrorTable = Latex.addRowNames(rowNames, overallErrorTable)
        
        rowNames = getRowNames(cvScalings, True)
        overallWinsTable = Latex.addRowNames(rowNames, overallWinsTable)

        print(Latex.latexTable(overallWinsTable, "Wins for " + sampleMethods[i], True))
        print(Latex.latexTable(overallErrorTable.replace("0.", "."), "Excess errors for " + sampleMethods[i], True))
        #print(overallWinsTable)
        #print(overallErrorTable)

    #Now print the mean errors for all datasets
    datasetMeanErrors = Latex.listToRow(sampleMethods) + "\n"
    overallErrorsPerSampMethod = numpy.mean(overallErrorsPerSampMethod[:, :, :, :], 0)

    for j in range(len(sampleSizes)):
        datasetMeanErrors += Latex.array2DToRows(overallErrorsPerSampMethod[:, j, :].T) + "\n"

    datasetMeanErrors = Latex.addRowNames(getRowNames(cvScalings), datasetMeanErrors)
    print(datasetMeanErrors)