예제 #1
0
def test_sample_training():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05),
                                         (5, 0.3),
                                         (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        # Fit on quarter of data
        X_sample = X_train[:X_train.shape[0] / 4]
        tree.fit(X_sample)
        # Clear and index everything
        tree.clear()
        for i, x in enumerate(X_train):
            tree.index(i, x)
        tree._X = X_train

        precision = 0.0
        X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis]
        for x_test in X_test:
            true_nns = np.argsort(-np.dot(X_train, x_test))[:10]
            nns = tree.query(x_test, 10)[:10]

            precision += len(set(nns) & set(true_nns)) / 10.0

        precision /= X_test.shape[0]

        assert precision >= expected_precision
예제 #2
0
def test_sample_training():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05), (5, 0.3), (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        # Fit on quarter of data
        X_sample = X_train[:X_train.shape[0] / 4]
        tree.fit(X_sample)
        # Clear and index everything
        tree.clear()
        for i, x in enumerate(X_train):
            tree.index(i, x)
        tree._X = X_train

        precision = 0.0
        X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis]
        for x_test in X_test:
            true_nns = np.argsort(-np.dot(X_train, x_test))[:10]
            nns = tree.query(x_test, 10)[:10]

            precision += len(set(nns) & set(true_nns)) / 10.0

        precision /= X_test.shape[0]

        assert precision >= expected_precision
예제 #3
0
def test_clear():

    X_train, X_test = _get_mnist_data()

    tree = RPForest(leaf_size=10, no_trees=10)
    tree.fit(X_train)

    for leaf_code, leaf_indices in tree.get_leaf_nodes():
        assert leaf_indices

    tree.clear()

    for leaf_code, leaf_indices in tree.get_leaf_nodes():
        assert not leaf_indices
예제 #4
0
def test_clear():

    X_train, X_test = _get_mnist_data()

    tree = RPForest(leaf_size=10, no_trees=10)
    tree.fit(X_train)

    for leaf_code, leaf_indices in tree.get_leaf_nodes():
        assert leaf_indices

    tree.clear()

    for leaf_code, leaf_indices in tree.get_leaf_nodes():
        assert not leaf_indices
예제 #5
0





## RPFOREST TEST
from rpforest import RPForest
leaf_size = 5
n_trees = 20
name = 'RPForest(leaf_size=%d, n_trees=%d)' % (leaf_size, n_trees)
model = RPForest(leaf_size=leaf_size, no_trees=n_trees)
#fitting
features = features.copy(order='C') #something related to Cython error
model.fit(features)
model.clear()
#indexing
for i, x in enumerate(features):
    t = Timer()
    with t:
        model.index(dict_feat[i], x.tolist())
#querying
for i in range(features.shape[0]):
    t = Timer()
    with t:
        results = model.get_candidates(features[i])
    print 'queried', dict_feat[i], 'results', results