def test_find_self(): X_train, X_test = _get_mnist_data() for no_trees, expected_precision in ((1, 0.05), (5, 0.3), (10, 0.5), (50, 0.9)): tree = RPForest(leaf_size=10, no_trees=no_trees) tree.fit(X_train) nodes = {k: set(v) for k, v in tree.get_leaf_nodes()} for i, x_train in enumerate(X_train): nns = tree.query(x_train, 10)[:10] assert nns[0] == i point_codes = tree.encode(x_train) for code in point_codes: assert i in nodes[code] tree = pickle.loads(pickle.dumps(tree)) nodes = {k: set(v) for k, v in tree.get_leaf_nodes()} for i, x_train in enumerate(X_train): nns = tree.query(x_train, 10)[:10] assert nns[0] == i point_codes = tree.encode(x_train) for code in point_codes: assert i in nodes[code]
def lvnn(fp, nt=3, k=5, iter=5, leaves=50): nn = np.zeros((fp.shape[0], k, 2)) - 1 print(' start Tree build') model = RPForest(leaf_size=leaves, no_trees=nt) model.fit(fp) for i in range(0, fp.shape[0]): nn[i, :, 0] = model.query(fp[i, ], k) t = 0 while t < iter: t += 1 old_nn = nn for i in range(0, fp.shape[0]): h = set() for j in range(0, k): ji = old_nn[i, j, 0] for l in range(0, k): li = old_nn[ji, l, 0] d = -np.linalg.norm(fp[i, :] - fp[li, :]) h.update([(li, d)]) nn[i, :, :] = np.array(nsmallest(k, h)) csr = np.zeros((fp.shape[0] * k, 3)) l = 0 for i in range(fp.shape[0]): for j in range(k): csr[l, 0] = i csr[l, 1] = nn[i, j, 0] csr[l, 2] = nn[i, j, 1] l = l + 1 return csr
def test_sample_training(): X_train, X_test = _get_mnist_data() for no_trees, expected_precision in ((1, 0.05), (5, 0.3), (10, 0.5), (50, 0.9)): tree = RPForest(leaf_size=10, no_trees=no_trees) # Fit on quarter of data X_sample = X_train[:X_train.shape[0] / 4] tree.fit(X_sample) # Clear and index everything tree.clear() for i, x in enumerate(X_train): tree.index(i, x) tree._X = X_train precision = 0.0 X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis] for x_test in X_test: true_nns = np.argsort(-np.dot(X_train, x_test))[:10] nns = tree.query(x_test, 10)[:10] precision += len(set(nns) & set(true_nns)) / 10.0 precision /= X_test.shape[0] assert precision >= expected_precision
def test_serialization_mnist(): X_train, X_test = _get_mnist_data() for no_trees, expected_precision in ((1, 0.05), (5, 0.3), (10, 0.5), (50, 0.9)): tree = RPForest(leaf_size=10, no_trees=no_trees) tree.fit(X_train) # Serialize and deserialize tree = pickle.loads(pickle.dumps(tree)) precision = 0.0 X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis] for x_test in X_test: true_nns = np.argsort(-np.dot(X_train, x_test))[:10] nns = tree.query(x_test, 10)[:10] assert (nns < X_train.shape[0]).all() precision += len(set(nns) & set(true_nns)) / 10.0 precision /= X_test.shape[0] assert precision >= expected_precision
class RPForest(BaseANN): def __init__(self, leaf_size, n_trees): from rpforest import RPForest self.name = 'RPForest(leaf_size=%d, n_trees=%d)' % (leaf_size, n_trees) self._model = RPForest(leaf_size=leaf_size, no_trees=n_trees) def fit(self, X): self._model.fit(X) def query(self, v, n): return self._model.query(v, n)
b = [350] for leaf_size in a: for no_trees in b: fq = open('fq_RPForest.txt', 'a') if X.dtype != np.double: X = np.array(X).astype(np.double) t = RPForest(leaf_size, no_trees) t.fit(X) start_query = time.time() accuracy = 0 for i in range(len(xq)): v = xq[i] if v.dtype != np.double: v = np.array(v).astype(np.double) ans = t.query(v, k) for x in ans: if x in gt[i]: accuracy += 1 end_query = time.time() print(leaf_size, no_trees) print(round(accuracy / len(xq) / k, 4), ": ", round(end_query - start_query, 4), ",", file=fq, sep="") fq.close() print('time query:', end_query - start_query)