def test_candidates_mnist(): X_train, X_test = _get_mnist_data() for no_trees, expected_precision in ((1, 0.05), (5, 0.12), (10, 0.2), (50, 0.5), (80, 0.6)): tree = RPForest(leaf_size=10, no_trees=no_trees) tree.fit(X_train) precision = 0.0 X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis] for x_test in X_test: true_nns = np.argsort(-np.dot(X_train, x_test))[:10] check_nns = tree.get_candidates(x_test, 100000) assert len(check_nns) == len(set(check_nns)) assert -1 not in check_nns assert (check_nns < X_train.shape[0]).all() nns = tree.get_candidates(x_test, 10)[:10] assert (nns < X_train.shape[0]).all() precision += len(set(nns) & set(true_nns)) / 10.0 precision /= X_test.shape[0] assert precision >= expected_precision
name = 'RPForest(leaf_size=%d, n_trees=%d)' % (leaf_size, n_trees) model = RPForest(leaf_size=leaf_size, no_trees=n_trees) #fitting features = features.copy(order='C') #something related to Cython error model.fit(features) model.clear() #indexing for i, x in enumerate(features): t = Timer() with t: model.index(dict_feat[i], x.tolist()) #querying for i in range(features.shape[0]): t = Timer() with t: results = model.get_candidates(features[i]) print 'queried', dict_feat[i], 'results', results import timeit class Timer: def __init__(self, timer=None, disable_gc=False, verbose=True): if timer is None: timer = timeit.default_timer self.timer = timer self.disable_gc = disable_gc self.verbose = verbose self.start = self.end = self.interval = None def __enter__(self):