exampleData = LazyDiskData("../data/RDR/ALL.CSV", columnSlice = slice(3, None)) query = randomUnitVector(6144) numTrees = 1 elif analysis == "sim": exampleData = LazyDiskData("../data/testdata.csv", columnSlice = slice(1, None)) query = np.zeros(10) numTrees = 10 else: raise Exception("Unrecognized analysis: {}".format(analysis)) timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S.%f") outputDir = "forests/forest_" + analysis + "_" + timestamp with time("naive linear scan query"): naiveResult = exampleData.linearScanNearestNeighbor(query, distanceFunction = euclidean) print(naiveResult) naiveRuntime = getLastElapsedTime() with time("build trees"): forest = makeForest(exampleData, maxLeafSize = 500, numTrees = numTrees, distanceFunction = euclidean, depthPerBatch = 3, outputDir = outputDir) with time("run query"): result = forest.nearestNeighbor(query) print(result) print("For comparison, naive result:\n{}\nnaive elapsed = {}".format(
#!/usr/bin/env python import pickle import numpy as np import sys from bigNearestNeighbor import time, loadForest if len(sys.argv) > 1: filename = sys.argv[1] else: print("Please provide a filename") sys.exit(1) with time("load forest"): forest = loadForest(filename) query = np.zeros(len(forest.trees[0].rule.direction)) with time("run query"): result = forest.nearestNeighbor(query) print(result)