def test_sample_training(): X_train, X_test = _get_mnist_data() for no_trees, expected_precision in ((1, 0.05), (5, 0.3), (10, 0.5), (50, 0.9)): tree = RPForest(leaf_size=10, no_trees=no_trees) # Fit on quarter of data X_sample = X_train[:X_train.shape[0] / 4] tree.fit(X_sample) # Clear and index everything tree.clear() for i, x in enumerate(X_train): tree.index(i, x) tree._X = X_train precision = 0.0 X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis] for x_test in X_test: true_nns = np.argsort(-np.dot(X_train, x_test))[:10] nns = tree.query(x_test, 10)[:10] precision += len(set(nns) & set(true_nns)) / 10.0 precision /= X_test.shape[0] assert precision >= expected_precision