def worstCaseDistancePlot():
    import matplotlib.pyplot as plt
    from data import thrombin

    train, labels = thrombin.load()

    dims = [1000, 750, 500, 250, 100, 75, 50, 25, 10, 5, 2]
    epsilon = 0.1
    numTrials = 20

    dataPoints = []
    means = []
    stds = []

    for dim in dims:
        dataPoints = []

        for i in range(numTrials):
            print("%d, trial %d" % (dim, i))
            newData = jlt(train, dim)
            dataPoints.append(checkTheorem(train, newData, epsilon))

        means.append(numpy.mean(dataPoints))
        stds.append(numpy.std(dataPoints))

    plt.clf()
    plt.errorbar(dims, means, yerr=stds, fmt="-o")
    plt.savefig("thrombin-worst-case.png")
def worstCaseDistancePlot():
   import matplotlib.pyplot as plt
   from data import thrombin 
   train, labels = thrombin.load()
         
   dims = [1000, 750, 500, 250, 100, 75, 50, 25, 10, 5, 2]
   epsilon = 0.1
   numTrials = 20

   dataPoints = []
   means = []
   stds = []

   for dim in dims:
      dataPoints = []

      for i in range(numTrials):
         print("%d, trial %d" % (dim, i))
         newData = jlt(train, dim)
         dataPoints.append(checkTheorem(train, newData, epsilon))
   
      means.append(numpy.mean(dataPoints))
      stds.append(numpy.std(dataPoints))

   plt.clf()
   plt.errorbar(dims, means, yerr=stds, fmt='-o')
   plt.savefig('thrombin-worst-case.png')
def knnThrombinAccuracyPlot(trials=50):
    import matplotlib.pyplot as plt
    from data import thrombin

    train, labels = thrombin.load()

    dims = [1000, 750, 500, 250, 100, 75, 50, 25, 10, 5, 2]
    epsilon = 0.1

    print("original data")
    baseAccuracy, baseFP, baseFN = nearestNeighborsAccuracy(train, labels)
    print((baseAccuracy, baseFP, baseFN))

    accuracyMeans = []
    accuracyStds = []
    falsePosMeans = []
    falsePosStds = []
    falseNegMeans = []
    falseNegStds = []

    for dim in dims:
        accuracyPts = []
        falsePosPts = []
        falseNegPts = []

        for i in range(trials):
            print("dim %d, trial %d" % (dim, i))
            newData = jlt(train, dim)
            acc, fp, fn = nearestNeighborsAccuracy(newData, labels)
            accuracyPts.append(acc)
            falsePosPts.append(fp)
            falseNegPts.append(fn)
            # print((accuracies[-1], falsePos[-1], falseNeg[-1]))

        accuracyMeans.append(numpy.mean(accuracyPts))
        falsePosMeans.append(numpy.mean(falsePosPts))
        falseNegMeans.append(numpy.mean(falseNegPts))

        accuracyStds.append(numpy.std(accuracyPts))
        falsePosStds.append(numpy.std(falsePosPts))
        falseNegStds.append(numpy.std(falseNegPts))

    plt.clf()
    plt.errorbar(dims, accuracyMeans, yerr=accuracyStds, fmt="-o")
    plt.axhline(y=baseAccuracy)
    plt.savefig("thrombin-knn-accuracy.png")

    plt.clf()
    plt.errorbar(dims, falsePosMeans, yerr=falsePosStds, fmt="-o")
    plt.axhline(y=baseFP)
    plt.savefig("thrombin-knn-fp.png")

    plt.clf()
    plt.errorbar(dims, falseNegMeans, yerr=falseNegStds, fmt="-o")
    plt.axhline(y=baseFN)
    plt.savefig("thrombin-knn-fn.png")
def knnThrombinAccuracyPlot(trials=50):
   import matplotlib.pyplot as plt
   from data import thrombin
   train, labels = thrombin.load()

   dims = [1000, 750, 500, 250, 100, 75, 50, 25, 10, 5, 2]
   epsilon = 0.1

   print("original data")
   baseAccuracy, baseFP, baseFN = nearestNeighborsAccuracy(train, labels)
   print((baseAccuracy, baseFP, baseFN))

   accuracyMeans = []
   accuracyStds = []
   falsePosMeans = []
   falsePosStds = []
   falseNegMeans = []
   falseNegStds = []

   for dim in dims:
      accuracyPts = []
      falsePosPts = []
      falseNegPts = []

      for i in range(trials):
         print("dim %d, trial %d" % (dim, i))
         newData = jlt(train, dim)
         acc, fp, fn = nearestNeighborsAccuracy(newData, labels)
         accuracyPts.append(acc)
         falsePosPts.append(fp)
         falseNegPts.append(fn)
         #print((accuracies[-1], falsePos[-1], falseNeg[-1]))

      accuracyMeans.append(numpy.mean(accuracyPts))
      falsePosMeans.append(numpy.mean(falsePosPts))
      falseNegMeans.append(numpy.mean(falseNegPts))

      accuracyStds.append(numpy.std(accuracyPts))
      falsePosStds.append(numpy.std(falsePosPts))
      falseNegStds.append(numpy.std(falseNegPts))

   plt.clf()
   plt.errorbar(dims, accuracyMeans, yerr=accuracyStds, fmt='-o')
   plt.axhline(y=baseAccuracy)
   plt.savefig('thrombin-knn-accuracy.png')

   plt.clf()
   plt.errorbar(dims, falsePosMeans, yerr=falsePosStds, fmt='-o')
   plt.axhline(y=baseFP)
   plt.savefig('thrombin-knn-fp.png')

   plt.clf()
   plt.errorbar(dims, falseNegMeans, yerr=falseNegStds, fmt='-o')
   plt.axhline(y=baseFN)
   plt.savefig('thrombin-knn-fn.png')
def thrombinSubpsaceDistanceHistograms():
   import matplotlib.pyplot as plt
   from data import thrombin 
   train, labels = thrombin.load()
   
   for subspaceDim in [5000, 1000, 750, 500, 250, 100, 75, 50, 10, 5, 2]:
      newData = jlt(train, subspaceDim)
      
      plt.clf()
      plt.ylim(0,200000)
      plt.xlim(0,250)
      plt.hist(distances(newData), bins=100)
      plt.savefig('thrombin-animation/%05d.png' % subspaceDim, bbox_inches='tight')
def checkThrombin():
   from data import thrombin 
   train, labels = thrombin.load()
   
   numPoints = len(train)
   epsilon = 0.2
   subspaceDim = theoreticalBound(numPoints, epsilon)
   ambientDim = len(train[0])
   print((subspaceDim, ambientDim))
   
   newData = jlt(train, subspaceDim)

   print(checkTheorem(train, newData, epsilon))
def thrombinSubpsaceDistanceHistograms():
    import matplotlib.pyplot as plt
    from data import thrombin

    train, labels = thrombin.load()

    for subspaceDim in [5000, 1000, 750, 500, 250, 100, 75, 50, 10, 5, 2]:
        newData = jlt(train, subspaceDim)

        plt.clf()
        plt.ylim(0, 200000)
        plt.xlim(0, 250)
        plt.hist(distances(newData), bins=100)
        plt.savefig("thrombin-animation/%05d.png" % subspaceDim, bbox_inches="tight")
def checkThrombin():
    from data import thrombin

    train, labels = thrombin.load()

    numPoints = len(train)
    epsilon = 0.2
    subspaceDim = theoreticalBound(numPoints, epsilon)
    ambientDim = len(train[0])
    print((subspaceDim, ambientDim))

    newData = jlt(train, subspaceDim)

    print(checkTheorem(train, newData, epsilon))
def thrombinTheoreticalBoundReduction():
   import matplotlib.pyplot as plt
   from data import thrombin 
   train, labels = thrombin.load()
   
   numPoints = len(train)
   subspaceDim = theoreticalBound(numPoints, 0.2)
   ambientDim = len(train[0])
   print((subspaceDim, ambientDim))
   
   #newData = jlt(train, subspaceDim)
   allDistances = distances(train)
   plt.clf()
   plt.hist(allDistances, bins=100)
   plt.savefig('thrombin-original.png', bbox_inches='tight')

   projectedDistances = distances(jlt(train, subspaceDim))
   plt.clf()
   plt.hist(projectedDistances, bins=100)
   plt.savefig('thrombin-%05d.png' % subspaceDim, bbox_inches='tight')
Esempio n. 10
0
def thrombinTheoreticalBoundReduction():
    import matplotlib.pyplot as plt
    from data import thrombin

    train, labels = thrombin.load()

    numPoints = len(train)
    subspaceDim = theoreticalBound(numPoints, 0.2)
    ambientDim = len(train[0])
    print((subspaceDim, ambientDim))

    # newData = jlt(train, subspaceDim)
    allDistances = distances(train)
    plt.clf()
    plt.hist(allDistances, bins=100)
    plt.savefig("thrombin-original.png", bbox_inches="tight")

    projectedDistances = distances(jlt(train, subspaceDim))
    plt.clf()
    plt.hist(projectedDistances, bins=100)
    plt.savefig("thrombin-%05d.png" % subspaceDim, bbox_inches="tight")