def worstCaseDistancePlot(): import matplotlib.pyplot as plt from data import thrombin train, labels = thrombin.load() dims = [1000, 750, 500, 250, 100, 75, 50, 25, 10, 5, 2] epsilon = 0.1 numTrials = 20 dataPoints = [] means = [] stds = [] for dim in dims: dataPoints = [] for i in range(numTrials): print("%d, trial %d" % (dim, i)) newData = jlt(train, dim) dataPoints.append(checkTheorem(train, newData, epsilon)) means.append(numpy.mean(dataPoints)) stds.append(numpy.std(dataPoints)) plt.clf() plt.errorbar(dims, means, yerr=stds, fmt="-o") plt.savefig("thrombin-worst-case.png")
def worstCaseDistancePlot(): import matplotlib.pyplot as plt from data import thrombin train, labels = thrombin.load() dims = [1000, 750, 500, 250, 100, 75, 50, 25, 10, 5, 2] epsilon = 0.1 numTrials = 20 dataPoints = [] means = [] stds = [] for dim in dims: dataPoints = [] for i in range(numTrials): print("%d, trial %d" % (dim, i)) newData = jlt(train, dim) dataPoints.append(checkTheorem(train, newData, epsilon)) means.append(numpy.mean(dataPoints)) stds.append(numpy.std(dataPoints)) plt.clf() plt.errorbar(dims, means, yerr=stds, fmt='-o') plt.savefig('thrombin-worst-case.png')
def knnThrombinAccuracyPlot(trials=50): import matplotlib.pyplot as plt from data import thrombin train, labels = thrombin.load() dims = [1000, 750, 500, 250, 100, 75, 50, 25, 10, 5, 2] epsilon = 0.1 print("original data") baseAccuracy, baseFP, baseFN = nearestNeighborsAccuracy(train, labels) print((baseAccuracy, baseFP, baseFN)) accuracyMeans = [] accuracyStds = [] falsePosMeans = [] falsePosStds = [] falseNegMeans = [] falseNegStds = [] for dim in dims: accuracyPts = [] falsePosPts = [] falseNegPts = [] for i in range(trials): print("dim %d, trial %d" % (dim, i)) newData = jlt(train, dim) acc, fp, fn = nearestNeighborsAccuracy(newData, labels) accuracyPts.append(acc) falsePosPts.append(fp) falseNegPts.append(fn) # print((accuracies[-1], falsePos[-1], falseNeg[-1])) accuracyMeans.append(numpy.mean(accuracyPts)) falsePosMeans.append(numpy.mean(falsePosPts)) falseNegMeans.append(numpy.mean(falseNegPts)) accuracyStds.append(numpy.std(accuracyPts)) falsePosStds.append(numpy.std(falsePosPts)) falseNegStds.append(numpy.std(falseNegPts)) plt.clf() plt.errorbar(dims, accuracyMeans, yerr=accuracyStds, fmt="-o") plt.axhline(y=baseAccuracy) plt.savefig("thrombin-knn-accuracy.png") plt.clf() plt.errorbar(dims, falsePosMeans, yerr=falsePosStds, fmt="-o") plt.axhline(y=baseFP) plt.savefig("thrombin-knn-fp.png") plt.clf() plt.errorbar(dims, falseNegMeans, yerr=falseNegStds, fmt="-o") plt.axhline(y=baseFN) plt.savefig("thrombin-knn-fn.png")
def knnThrombinAccuracyPlot(trials=50): import matplotlib.pyplot as plt from data import thrombin train, labels = thrombin.load() dims = [1000, 750, 500, 250, 100, 75, 50, 25, 10, 5, 2] epsilon = 0.1 print("original data") baseAccuracy, baseFP, baseFN = nearestNeighborsAccuracy(train, labels) print((baseAccuracy, baseFP, baseFN)) accuracyMeans = [] accuracyStds = [] falsePosMeans = [] falsePosStds = [] falseNegMeans = [] falseNegStds = [] for dim in dims: accuracyPts = [] falsePosPts = [] falseNegPts = [] for i in range(trials): print("dim %d, trial %d" % (dim, i)) newData = jlt(train, dim) acc, fp, fn = nearestNeighborsAccuracy(newData, labels) accuracyPts.append(acc) falsePosPts.append(fp) falseNegPts.append(fn) #print((accuracies[-1], falsePos[-1], falseNeg[-1])) accuracyMeans.append(numpy.mean(accuracyPts)) falsePosMeans.append(numpy.mean(falsePosPts)) falseNegMeans.append(numpy.mean(falseNegPts)) accuracyStds.append(numpy.std(accuracyPts)) falsePosStds.append(numpy.std(falsePosPts)) falseNegStds.append(numpy.std(falseNegPts)) plt.clf() plt.errorbar(dims, accuracyMeans, yerr=accuracyStds, fmt='-o') plt.axhline(y=baseAccuracy) plt.savefig('thrombin-knn-accuracy.png') plt.clf() plt.errorbar(dims, falsePosMeans, yerr=falsePosStds, fmt='-o') plt.axhline(y=baseFP) plt.savefig('thrombin-knn-fp.png') plt.clf() plt.errorbar(dims, falseNegMeans, yerr=falseNegStds, fmt='-o') plt.axhline(y=baseFN) plt.savefig('thrombin-knn-fn.png')
def thrombinSubpsaceDistanceHistograms(): import matplotlib.pyplot as plt from data import thrombin train, labels = thrombin.load() for subspaceDim in [5000, 1000, 750, 500, 250, 100, 75, 50, 10, 5, 2]: newData = jlt(train, subspaceDim) plt.clf() plt.ylim(0,200000) plt.xlim(0,250) plt.hist(distances(newData), bins=100) plt.savefig('thrombin-animation/%05d.png' % subspaceDim, bbox_inches='tight')
def checkThrombin(): from data import thrombin train, labels = thrombin.load() numPoints = len(train) epsilon = 0.2 subspaceDim = theoreticalBound(numPoints, epsilon) ambientDim = len(train[0]) print((subspaceDim, ambientDim)) newData = jlt(train, subspaceDim) print(checkTheorem(train, newData, epsilon))
def thrombinSubpsaceDistanceHistograms(): import matplotlib.pyplot as plt from data import thrombin train, labels = thrombin.load() for subspaceDim in [5000, 1000, 750, 500, 250, 100, 75, 50, 10, 5, 2]: newData = jlt(train, subspaceDim) plt.clf() plt.ylim(0, 200000) plt.xlim(0, 250) plt.hist(distances(newData), bins=100) plt.savefig("thrombin-animation/%05d.png" % subspaceDim, bbox_inches="tight")
def thrombinTheoreticalBoundReduction(): import matplotlib.pyplot as plt from data import thrombin train, labels = thrombin.load() numPoints = len(train) subspaceDim = theoreticalBound(numPoints, 0.2) ambientDim = len(train[0]) print((subspaceDim, ambientDim)) #newData = jlt(train, subspaceDim) allDistances = distances(train) plt.clf() plt.hist(allDistances, bins=100) plt.savefig('thrombin-original.png', bbox_inches='tight') projectedDistances = distances(jlt(train, subspaceDim)) plt.clf() plt.hist(projectedDistances, bins=100) plt.savefig('thrombin-%05d.png' % subspaceDim, bbox_inches='tight')
def thrombinTheoreticalBoundReduction(): import matplotlib.pyplot as plt from data import thrombin train, labels = thrombin.load() numPoints = len(train) subspaceDim = theoreticalBound(numPoints, 0.2) ambientDim = len(train[0]) print((subspaceDim, ambientDim)) # newData = jlt(train, subspaceDim) allDistances = distances(train) plt.clf() plt.hist(allDistances, bins=100) plt.savefig("thrombin-original.png", bbox_inches="tight") projectedDistances = distances(jlt(train, subspaceDim)) plt.clf() plt.hist(projectedDistances, bins=100) plt.savefig("thrombin-%05d.png" % subspaceDim, bbox_inches="tight")