def getLatexTable(measures, cvScalings, idealMeasures): rowNames = getRowNames(cvScalings, True) table = Latex.array1DToRow(foldsSet) + "\\\\ \n" for j in range(sampleSizes.shape[0]): meanMeasures = numpy.mean(measures, 0) stdMeasures = numpy.std(measures, 0) table += Latex.array2DToRows(meanMeasures[j, :, :].T, stdMeasures[j, :, :].T) + "\n" meanIdealMeasures = numpy.mean(idealMeasures, 0) stdIdealMeasures = numpy.std(idealMeasures, 0) table += Latex.array2DToRows(numpy.ones((1, len(foldsSet)))*meanIdealMeasures[j], numpy.ones((1, len(foldsSet)))*stdIdealMeasures[j]) + "\n" table = Latex.addRowNames(rowNames, table) return table, meanMeasures, stdMeasures
def testAddRowNames(self): numpy.random.seed(21) X = numpy.random.rand(2, 2) Y = numpy.random.rand(2, 2) latexTable = Latex.array2DToRows(X, Y) rowNames = ["a", "b"] latexTable = Latex.addRowNames(rowNames, latexTable) outputStr = "a & 0.049 (0.206) & 0.289 (0.051)\\\\\n" outputStr += "b & 0.721 (0.302) & 0.022 (0.664)\\\\\n" self.assertTrue(latexTable == outputStr) #Now test error method rowNames = ["a", "b", "c"] self.assertRaises(ValueError, Latex.addRowNames, rowNames, latexTable) #Now test error method rowNames = ["a"] self.assertRaises(ValueError, Latex.addRowNames, rowNames, latexTable)
def summary(datasetNames, sampleSizes, foldsSet, cvScalings, sampleMethods, fileNameSuffix, gridResultsSuffix="GridResults"): """ Print the errors for all results plus a summary. """ numMethods = (1+(cvScalings.shape[0]+1)) numDatasets = len(datasetNames) overallErrors = numpy.zeros((numDatasets, len(sampleMethods), sampleSizes.shape[0], foldsSet.shape[0], numMethods)) overallStdWins = numpy.zeros((len(sampleMethods), len(sampleSizes), foldsSet.shape[0], numMethods+1, 3), numpy.int) overallErrorsPerSampMethod = numpy.zeros((numDatasets, len(sampleMethods), len(sampleSizes), numMethods), numpy.float) table1 = "" table2 = "" table3 = "" for i in range(len(datasetNames)): table3Error = numpy.zeros((2, len(sampleMethods))) table3Stds = numpy.zeros((2, len(sampleMethods))) for j in range(len(sampleMethods)): print("="*50 + "\n" + datasetNames[i] + "-" + sampleMethods[j] + "\n" + "="*50 ) outfileName = outputDir + datasetNames[i] + sampleMethods[j] + fileNameSuffix + ".npz" try: data = numpy.load(outfileName) errors = data["arr_0"] params = data["arr_1"] meanErrorGrids = data["arr_2"] stdErrorGrids = data["arr_3"] meanApproxGrids = data["arr_4"] stdApproxGrids = data["arr_5"] #Load ideal results outfileName = outputDir + datasetNames[i] + gridResultsSuffix + ".npz" data = numpy.load(outfileName) idealErrors = data["arr_0"] errorTable, meanErrors, stdErrors = getLatexTable(errors, cvScalings, idealErrors) wins = getWins(errors) idealWins = getIdealWins(errors, idealErrors) excessError = numpy.zeros(errors.shape) for k in range(errors.shape[1]): excessError[:, k, :, :] = errors[:, k, :, :] - numpy.tile(errors[:, k, :, 0, numpy.newaxis], (1, 1, numMethods)) meanExcessError = numpy.mean(excessError, 0) stdExcessError = numpy.std(excessError, 0) excessErrorTable, meanExcessErrors, stdExcessErrors = getLatexTable(excessError, cvScalings, idealErrors) overallErrorsPerSampMethod[i, j, :, :] = numpy.mean(meanErrors, 1) overallErrors[i, j, :, :, :] = meanExcessError overallStdWins[j, :, :, 0:-1, :] += wins overallStdWins[j, :, :, -1, :] += idealWins print(errorTable) #print("Min error is: " + str(numpy.min(meanErrors))) #print("Max error is: " + str(numpy.max(meanErrors))) #print("Mean error is: " + str(numpy.mean(meanErrors)) + "\n") #This is a table with V=10, alpha=1 and CV sampling sliceFoldIndex = 0 print(meanErrors[0, 1, 0]) numSliceMethods = 3 table1Error = numpy.zeros(len(sampleSizes)*numSliceMethods) table1Std = numpy.zeros(len(sampleSizes)*numSliceMethods) for k in range(len(sampleSizes)): table1Error[k*numSliceMethods] = meanErrors[k, sliceFoldIndex, 0] table1Error[k*numSliceMethods+1] = meanErrors[k, sliceFoldIndex, 1] table1Error[k*numSliceMethods+2] = meanErrors[k, sliceFoldIndex, 4] table1Std[k*numSliceMethods] = stdErrors[k, sliceFoldIndex, 0] table1Std[k*numSliceMethods+1] = stdErrors[k, sliceFoldIndex, 1] table1Std[k*numSliceMethods+2] = stdErrors[k, sliceFoldIndex, 4] if j == 0: table1 += datasetNames[i] + " & " + Latex.array2DToRows(numpy.array([table1Error]), numpy.array([table1Std])) + "\n" #See how alpha varies with V=10, CV sampling table2Error = numpy.zeros(range(numMethods-2)) table2Std = numpy.zeros(range(numMethods-2)) for s in range(len(sampleSizes)): table2Error = meanErrors[s, sliceFoldIndex, 2:] table2Std = stdErrors[s, sliceFoldIndex, 2:] if j == 0: table2 += datasetNames[i] + " $m=" + str(sampleSizes[s]) + "$ & " + Latex.array2DToRows(numpy.array([table2Error]), numpy.array([table2Std])) + "\n" """ #See how each sample method effects CV and pen alpha=1 fourFoldIndex = 4 hundredMIndex = 1 table3Error[0, j] = meanErrors[hundredMIndex, fourFoldIndex, 0] table3Error[1, j] = meanErrors[hundredMIndex, fourFoldIndex, 3] table3Stds[0, j] = stdErrors[hundredMIndex, fourFoldIndex, 0] table3Stds[1, j] = stdErrors[hundredMIndex, fourFoldIndex, 3] """ except IOError: print("Failed to open file: " + outfileName) table3 += Latex.addRowNames([datasetNames[i] + " Std ", datasetNames[i] + " PenVF "], Latex.array2DToRows(table3Error, table3Stds)) datasetMeanErrors = Latex.listToRow(sampleMethods) + "\n" for j in range(len(sampleSizes)): datasetMeanErrors += Latex.array2DToRows(overallErrorsPerSampMethod[i, :, j, :].T) + "\n" datasetMeanErrors = Latex.addRowNames(getRowNames(cvScalings), datasetMeanErrors) print(datasetMeanErrors) print("="*50 + "\n" + "Sliced Tables" + "\n" + "="*50) print(table1 + "\n") print(table2 + "\n") print(table3) print("="*50 + "\n" + "Overall" + "\n" + "="*50) overallMeanErrors = numpy.mean(overallErrors, 0) overallStdErrors = numpy.std(overallErrors, 0) for i in range(len(sampleMethods)): print("-"*20 + sampleMethods[i] + "-"*20) overallErrorTable = Latex.array1DToRow(foldsSet) + "\\\\ \n" overallWinsTable = Latex.array1DToRow(foldsSet) + " & Total & " +Latex.array1DToRow(foldsSet) + " & Total \\\\ \n" rowNames = getRowNames(cvScalings) for j in range(sampleSizes.shape[0]): overallErrorTable += Latex.array2DToRows(overallMeanErrors[i, j, :, :].T, overallStdErrors[i, j, :, :].T, bold=overallMeanErrors[i, j, :, :].T<0) + "\n" tiesWins = numpy.r_[overallStdWins[i, j, :, :, 0], overallStdWins[i, j, :, :, 1], overallStdWins[i, j, :, :, 2]] overallWinsTable += Latex.array2DToRows(tiesWins.T) + "\n" overallErrorTable = Latex.addRowNames(rowNames, overallErrorTable) rowNames = getRowNames(cvScalings, True) overallWinsTable = Latex.addRowNames(rowNames, overallWinsTable) print(Latex.latexTable(overallWinsTable, "Wins for " + sampleMethods[i], True)) print(Latex.latexTable(overallErrorTable.replace("0.", "."), "Excess errors for " + sampleMethods[i], True)) #print(overallWinsTable) #print(overallErrorTable) #Now print the mean errors for all datasets datasetMeanErrors = Latex.listToRow(sampleMethods) + "\n" overallErrorsPerSampMethod = numpy.mean(overallErrorsPerSampMethod[:, :, :, :], 0) for j in range(len(sampleSizes)): datasetMeanErrors += Latex.array2DToRows(overallErrorsPerSampMethod[:, j, :].T) + "\n" datasetMeanErrors = Latex.addRowNames(getRowNames(cvScalings), datasetMeanErrors) print(datasetMeanErrors)
plt.figure(plotInd) plt.errorbar(numpy.arange(numDegrees), meanDegreeDists[ind, :], yerr=stdDegreeDists[ind, :], color="k") plt.plot(numpy.arange(numDegrees), idealDegreeDists[ind, :], "k--") plt.xlabel("degree") plt.ylabel("frequency") plotInd += 1 #Print the table of thetas thetas = numpy.array(thetas) meanThetas = numpy.mean(thetas, 1) stdThetas = numpy.std(thetas, 1) table = Latex.array2DToRows(meanThetas.T, stdThetas.T, precision=4) rowNames = ["$|\\mathcal{I}_0 |$", "$\\alpha$", "$\\gamma$", "$\\beta$", "$\\lambda$", "$\\sigma$"] table = Latex.addRowNames(rowNames, table) print(table) #Now print the graph properties idealTable = [] tableMeanArray = [] tableStdArray = [] for ind in inds: idealTable.append(idealMeasures[ind, :, timeInds]) tableMeanArray.append(meanMeasures[ind, :, timeInds]) tableStdArray.append(stdMeasures[ind, :, timeInds]) idealTable = numpy.vstack(idealTable).T tableMeanArray = numpy.vstack(tableMeanArray).T tableStdArray = numpy.vstack(tableStdArray).T
plt.xlabel("False Positive Rate") plt.ylabel("True Positive Rate") plt.savefig(figureDir + labelName.replace(".", "_") + "-ROC.eps") plotInd += 1 #The last column is the mean value meanMeanAUCs = numpy.mean(meanAUCs, 1) meanStdAUCs = numpy.mean(stdAUCs, 1) meanAUCs = numpy.c_[meanAUCs, meanMeanAUCs] stdAUCs = numpy.c_[stdAUCs, meanStdAUCs] print("\n") print(Latex.listToRow(labelNames)) print(Latex.addRowNames(rowNames, Latex.array2DsToRows(meanAUCs, stdAUCs, 2))) #----------------- Results for RankBoost and RankSVM ---------------------------------- algorithmNames = ["RankBoost", "RankSVM"] numMethods = len(dataTypes)*len(algorithmNames) rowNames = numpy.zeros(numMethods, "a20") meanAUCs = numpy.zeros((numMethods, len(labelNames))) stdAUCs = numpy.zeros((numMethods, len(labelNames))) for m in range(len(algorithmNames)): algorithmName = algorithmNames[m] for i in range(len(labelNames)): labelName = labelNames[i]
logging.debug(resultsDir) newNumRecordSteps = numRecordSteps + 5 endDate += HIVModelUtils.realTestPeriods[j] recordStep = (endDate-startDate)/float(newNumRecordSteps) thetaArray = loadThetaArray(N, resultsDir, t)[0] print(thetaArray) meanTable = numpy.array([thetaArray.mean(0)]).T print(meanTable) stdTable = numpy.array([thetaArray.std(0)]).T table = Latex.array2DToRows(meanTable, stdTable, precision=4) rowNames = ["$\\|\\mathcal{I}_0 \\|$", "$\\rho_B$", "$\\alpha$", "$C$", "$\\gamma$", "$\\beta$", "$\\kappa_{max}$", "$\\lambda_H$", "$\\lambda_B$", "$\\sigma_{WM}$", "$\\sigma_{MW}$","$\\sigma_{MB}$"] table = Latex.addRowNames(rowNames, table) print(table) resultsFileName = outputDir + "IdealStats.pkl" stats = Util.loadPickle(resultsFileName) times, vertexArray, removedGraphStats = stats times = numpy.array(times) - startDate times2 = numpy.arange(startDate, endDate+1, recordStep) times2 = times2[1:] times2 = numpy.array(times2) - startDate graphStats = GraphStatistics() #First plot graphs for ideal theta plotInd = 0
except: logging.debug("File not found : " + str(fileName)) numMissingFiles += 1 logging.debug("Number of missing files: " + str(numMissingFiles)) for i, dataName in enumerate(dataNames): print("-"*10 + dataName + "-"*10) algorithms = [x.ljust(20) for x in algorithmsAbbr] currentTestAucsMean = testAucsMean[:, i, :].T maxAUCs = numpy.zeros(currentTestAucsMean.shape, numpy.bool) maxAUCs[numpy.argmax(currentTestAucsMean, 0), numpy.arange(currentTestAucsMean.shape[1])] = 1 table = Latex.array2DToRows(testAucsMean[:, i, :].T, testAucsStd[:, i, :].T, precision=2, bold=maxAUCs) print(Latex.listToRow(hormoneNameIndicators)) print(Latex.addRowNames(algorithms, table)) #Now looks at the features for the raw spectra algorithm = "L1SvmTreeRankForest" dataName = "raw" numMissingFiles = 0 numFeatures = 100 numIndicators = 6 featureInds = numpy.zeros((numFeatures, numIndicators)) for i, (hormoneName, hormoneConc) in enumerate(helper.hormoneDict.items()): try: fileName = resultsDir + "Weights" + algorithm + "-" + hormoneName + "-0" + "-" + dataName + ".npy" weights = numpy.load(fileName)