def test_sample_training(): X_train, X_test = _get_mnist_data() for no_trees, expected_precision in ((1, 0.05), (5, 0.3), (10, 0.5), (50, 0.9)): tree = RPForest(leaf_size=10, no_trees=no_trees) # Fit on quarter of data X_sample = X_train[:X_train.shape[0] / 4] tree.fit(X_sample) # Clear and index everything tree.clear() for i, x in enumerate(X_train): tree.index(i, x) tree._X = X_train precision = 0.0 X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis] for x_test in X_test: true_nns = np.argsort(-np.dot(X_train, x_test))[:10] nns = tree.query(x_test, 10)[:10] precision += len(set(nns) & set(true_nns)) / 10.0 precision /= X_test.shape[0] assert precision >= expected_precision
def test_clear(): X_train, X_test = _get_mnist_data() tree = RPForest(leaf_size=10, no_trees=10) tree.fit(X_train) for leaf_code, leaf_indices in tree.get_leaf_nodes(): assert leaf_indices tree.clear() for leaf_code, leaf_indices in tree.get_leaf_nodes(): assert not leaf_indices
## RPFOREST TEST from rpforest import RPForest leaf_size = 5 n_trees = 20 name = 'RPForest(leaf_size=%d, n_trees=%d)' % (leaf_size, n_trees) model = RPForest(leaf_size=leaf_size, no_trees=n_trees) #fitting features = features.copy(order='C') #something related to Cython error model.fit(features) model.clear() #indexing for i, x in enumerate(features): t = Timer() with t: model.index(dict_feat[i], x.tolist()) #querying for i in range(features.shape[0]): t = Timer() with t: results = model.get_candidates(features[i]) print 'queried', dict_feat[i], 'results', results