def summary(datasetNames, sampleSizes, foldsSet, cvScalings, sampleMethods, fileNameSuffix, gridResultsSuffix="GridResults"): """ Print the errors for all results plus a summary. """ numMethods = (1+(cvScalings.shape[0]+1)) numDatasets = len(datasetNames) overallErrors = numpy.zeros((numDatasets, len(sampleMethods), sampleSizes.shape[0], foldsSet.shape[0], numMethods)) overallStdWins = numpy.zeros((len(sampleMethods), len(sampleSizes), foldsSet.shape[0], numMethods+1, 3), numpy.int) overallErrorsPerSampMethod = numpy.zeros((numDatasets, len(sampleMethods), len(sampleSizes), numMethods), numpy.float) table1 = "" table2 = "" table3 = "" for i in range(len(datasetNames)): table3Error = numpy.zeros((2, len(sampleMethods))) table3Stds = numpy.zeros((2, len(sampleMethods))) for j in range(len(sampleMethods)): print("="*50 + "\n" + datasetNames[i] + "-" + sampleMethods[j] + "\n" + "="*50 ) outfileName = outputDir + datasetNames[i] + sampleMethods[j] + fileNameSuffix + ".npz" try: data = numpy.load(outfileName) errors = data["arr_0"] params = data["arr_1"] meanErrorGrids = data["arr_2"] stdErrorGrids = data["arr_3"] meanApproxGrids = data["arr_4"] stdApproxGrids = data["arr_5"] #Load ideal results outfileName = outputDir + datasetNames[i] + gridResultsSuffix + ".npz" data = numpy.load(outfileName) idealErrors = data["arr_0"] errorTable, meanErrors, stdErrors = getLatexTable(errors, cvScalings, idealErrors) wins = getWins(errors) idealWins = getIdealWins(errors, idealErrors) excessError = numpy.zeros(errors.shape) for k in range(errors.shape[1]): excessError[:, k, :, :] = errors[:, k, :, :] - numpy.tile(errors[:, k, :, 0, numpy.newaxis], (1, 1, numMethods)) meanExcessError = numpy.mean(excessError, 0) stdExcessError = numpy.std(excessError, 0) excessErrorTable, meanExcessErrors, stdExcessErrors = getLatexTable(excessError, cvScalings, idealErrors) overallErrorsPerSampMethod[i, j, :, :] = numpy.mean(meanErrors, 1) overallErrors[i, j, :, :, :] = meanExcessError overallStdWins[j, :, :, 0:-1, :] += wins overallStdWins[j, :, :, -1, :] += idealWins print(errorTable) #print("Min error is: " + str(numpy.min(meanErrors))) #print("Max error is: " + str(numpy.max(meanErrors))) #print("Mean error is: " + str(numpy.mean(meanErrors)) + "\n") #This is a table with V=10, alpha=1 and CV sampling sliceFoldIndex = 0 print(meanErrors[0, 1, 0]) numSliceMethods = 3 table1Error = numpy.zeros(len(sampleSizes)*numSliceMethods) table1Std = numpy.zeros(len(sampleSizes)*numSliceMethods) for k in range(len(sampleSizes)): table1Error[k*numSliceMethods] = meanErrors[k, sliceFoldIndex, 0] table1Error[k*numSliceMethods+1] = meanErrors[k, sliceFoldIndex, 1] table1Error[k*numSliceMethods+2] = meanErrors[k, sliceFoldIndex, 4] table1Std[k*numSliceMethods] = stdErrors[k, sliceFoldIndex, 0] table1Std[k*numSliceMethods+1] = stdErrors[k, sliceFoldIndex, 1] table1Std[k*numSliceMethods+2] = stdErrors[k, sliceFoldIndex, 4] if j == 0: table1 += datasetNames[i] + " & " + Latex.array2DToRows(numpy.array([table1Error]), numpy.array([table1Std])) + "\n" #See how alpha varies with V=10, CV sampling table2Error = numpy.zeros(range(numMethods-2)) table2Std = numpy.zeros(range(numMethods-2)) for s in range(len(sampleSizes)): table2Error = meanErrors[s, sliceFoldIndex, 2:] table2Std = stdErrors[s, sliceFoldIndex, 2:] if j == 0: table2 += datasetNames[i] + " $m=" + str(sampleSizes[s]) + "$ & " + Latex.array2DToRows(numpy.array([table2Error]), numpy.array([table2Std])) + "\n" """ #See how each sample method effects CV and pen alpha=1 fourFoldIndex = 4 hundredMIndex = 1 table3Error[0, j] = meanErrors[hundredMIndex, fourFoldIndex, 0] table3Error[1, j] = meanErrors[hundredMIndex, fourFoldIndex, 3] table3Stds[0, j] = stdErrors[hundredMIndex, fourFoldIndex, 0] table3Stds[1, j] = stdErrors[hundredMIndex, fourFoldIndex, 3] """ except IOError: print("Failed to open file: " + outfileName) table3 += Latex.addRowNames([datasetNames[i] + " Std ", datasetNames[i] + " PenVF "], Latex.array2DToRows(table3Error, table3Stds)) datasetMeanErrors = Latex.listToRow(sampleMethods) + "\n" for j in range(len(sampleSizes)): datasetMeanErrors += Latex.array2DToRows(overallErrorsPerSampMethod[i, :, j, :].T) + "\n" datasetMeanErrors = Latex.addRowNames(getRowNames(cvScalings), datasetMeanErrors) print(datasetMeanErrors) print("="*50 + "\n" + "Sliced Tables" + "\n" + "="*50) print(table1 + "\n") print(table2 + "\n") print(table3) print("="*50 + "\n" + "Overall" + "\n" + "="*50) overallMeanErrors = numpy.mean(overallErrors, 0) overallStdErrors = numpy.std(overallErrors, 0) for i in range(len(sampleMethods)): print("-"*20 + sampleMethods[i] + "-"*20) overallErrorTable = Latex.array1DToRow(foldsSet) + "\\\\ \n" overallWinsTable = Latex.array1DToRow(foldsSet) + " & Total & " +Latex.array1DToRow(foldsSet) + " & Total \\\\ \n" rowNames = getRowNames(cvScalings) for j in range(sampleSizes.shape[0]): overallErrorTable += Latex.array2DToRows(overallMeanErrors[i, j, :, :].T, overallStdErrors[i, j, :, :].T, bold=overallMeanErrors[i, j, :, :].T<0) + "\n" tiesWins = numpy.r_[overallStdWins[i, j, :, :, 0], overallStdWins[i, j, :, :, 1], overallStdWins[i, j, :, :, 2]] overallWinsTable += Latex.array2DToRows(tiesWins.T) + "\n" overallErrorTable = Latex.addRowNames(rowNames, overallErrorTable) rowNames = getRowNames(cvScalings, True) overallWinsTable = Latex.addRowNames(rowNames, overallWinsTable) print(Latex.latexTable(overallWinsTable, "Wins for " + sampleMethods[i], True)) print(Latex.latexTable(overallErrorTable.replace("0.", "."), "Excess errors for " + sampleMethods[i], True)) #print(overallWinsTable) #print(overallErrorTable) #Now print the mean errors for all datasets datasetMeanErrors = Latex.listToRow(sampleMethods) + "\n" overallErrorsPerSampMethod = numpy.mean(overallErrorsPerSampMethod[:, :, :, :], 0) for j in range(len(sampleSizes)): datasetMeanErrors += Latex.array2DToRows(overallErrorsPerSampMethod[:, j, :].T) + "\n" datasetMeanErrors = Latex.addRowNames(getRowNames(cvScalings), datasetMeanErrors) print(datasetMeanErrors)