exampleData = LazyDiskData("../data/RDR/ALL.CSV",
      columnSlice = slice(3, None))
  query = randomUnitVector(6144)
  numTrees = 1
elif analysis == "sim":
  exampleData = LazyDiskData("../data/testdata.csv",
      columnSlice = slice(1, None))
  query = np.zeros(10)
  numTrees = 10
else:
  raise Exception("Unrecognized analysis: {}".format(analysis))

timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S.%f")
outputDir = "forests/forest_" + analysis + "_" + timestamp

with time("naive linear scan query"):
  naiveResult = exampleData.linearScanNearestNeighbor(query,
      distanceFunction = euclidean)
  print(naiveResult)
naiveRuntime = getLastElapsedTime()

with time("build trees"):
  forest = makeForest(exampleData, maxLeafSize = 500, numTrees = numTrees,
      distanceFunction = euclidean, depthPerBatch = 3,
      outputDir = outputDir)

with time("run query"):
  result = forest.nearestNeighbor(query)
  print(result)

print("For comparison, naive result:\n{}\nnaive elapsed = {}".format(
Esempio n. 2
0
#!/usr/bin/env python

import pickle
import numpy as np
import sys
from bigNearestNeighbor import time, loadForest

if len(sys.argv) > 1:
  filename = sys.argv[1]
else:
  print("Please provide a filename")
  sys.exit(1)

with time("load forest"):
  forest = loadForest(filename)
query = np.zeros(len(forest.trees[0].rule.direction))
with time("run query"):
  result = forest.nearestNeighbor(query)
print(result)